diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 15:55:15 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 15:55:15 +0000 |
commit | 02ad08238d02c56e16fc99788c732ff5e77a1759 (patch) | |
tree | 61ad5e18868748f4705e487b65c847377bb7031c /src | |
parent | Initial commit. (diff) | |
download | parallel-upstream/20221122+ds.tar.xz parallel-upstream/20221122+ds.zip |
Adding upstream version 20221122+ds.upstream/20221122+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src')
34 files changed, 57205 insertions, 0 deletions
diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..cf7325c --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,416 @@ +# SPDX-FileCopyrightText: 2002-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# +# SPDX-License-Identifier: GPL-3.0-or-later + +bin_SCRIPTS = parallel sql niceload parcat parset parsort \ + env_parallel env_parallel.ash env_parallel.bash \ + env_parallel.csh env_parallel.dash env_parallel.fish \ + env_parallel.ksh env_parallel.mksh env_parallel.pdksh \ + env_parallel.sh env_parallel.tcsh env_parallel.zsh + +install-exec-hook: + rm "$(DESTDIR)$(bindir)"/sem || true + $(LN_S) parallel "$(DESTDIR)$(bindir)"/sem + +if DOCUMENTATION +man_MANS = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ + parallel_examples.7 parallel_tutorial.7 parallel_book.7 \ + parallel_design.7 parallel_alternatives.7 parcat.1 parset.1 \ + parsort.1 +doc_DATA = parallel.html env_parallel.html sem.html sql.html \ + niceload.html parallel_examples.html parallel_tutorial.html \ + parallel_book.html parallel_design.html \ + parallel_alternatives.html parcat.html parset.html \ + parsort.html \ + parallel.texi env_parallel.texi sem.texi sql.texi \ + niceload.texi parallel_examples.texi parallel_tutorial.texi \ + parallel_book.texi parallel_design.texi \ + parallel_alternatives.texi parcat.texi parset.texi \ + parsort.texi \ + parallel.rst env_parallel.rst sem.rst sql.rst niceload.rst \ + parallel_examples.rst parallel_tutorial.rst parallel_book.rst \ + parallel_design.rst parallel_alternatives.rst parcat.rst \ + parset.rst parsort.rst \ + parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ + parallel_examples.pdf parallel_tutorial.pdf parallel_book.pdf \ + parallel_design.pdf parallel_alternatives.pdf parcat.pdf \ + parset.pdf parsort.pdf parallel_cheat_bw.pdf \ + parallel_options_map.pdf +endif + +web: sphinx + true + +sphinx: *.rst + cd sphinx && make && cd .. + +# Build documentation file if the tool to build exists. +# Otherwise: Use the distributed version +parallel.1: parallel.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/parallel.pod > "$(srcdir)"/parallel.1n \ + && mv "$(srcdir)"/parallel.1n "$(srcdir)"/parallel.1 \ + || echo "Warning: pod2man not found. Using old parallel.1" + +env_parallel.1: env_parallel.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/env_parallel.pod > "$(srcdir)"/env_parallel.1n \ + && mv "$(srcdir)"/env_parallel.1n "$(srcdir)"/env_parallel.1 \ + || echo "Warning: pod2man not found. Using old env_parallel.1" + +parallel_examples.7: parallel_examples.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=7 "$(srcdir)"/parallel_examples.pod > "$(srcdir)"/parallel_examples.7n \ + && mv "$(srcdir)"/parallel_examples.7n "$(srcdir)"/parallel_examples.7 \ + || echo "Warning: pod2man not found. Using old parallel_examples.7" + +parallel_tutorial.7: parallel_tutorial.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=7 "$(srcdir)"/parallel_tutorial.pod > "$(srcdir)"/parallel_tutorial.7n \ + && mv "$(srcdir)"/parallel_tutorial.7n "$(srcdir)"/parallel_tutorial.7 \ + || echo "Warning: pod2man not found. Using old parallel_tutorial.7" + +parallel_book.7: parallel_book.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=7 "$(srcdir)"/parallel_book.pod > "$(srcdir)"/parallel_book.7n \ + && mv "$(srcdir)"/parallel_book.7n "$(srcdir)"/parallel_book.7 \ + || echo "Warning: pod2man not found. Using old parallel_book.7" + +parallel_design.7: parallel_design.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=7 "$(srcdir)"/parallel_design.pod > "$(srcdir)"/parallel_design.7n \ + && mv "$(srcdir)"/parallel_design.7n "$(srcdir)"/parallel_design.7 \ + || echo "Warning: pod2man not found. Using old parallel_design.7" + +parallel_alternatives.7: parallel_alternatives.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=7 "$(srcdir)"/parallel_alternatives.pod > "$(srcdir)"/parallel_alternatives.7n \ + && mv "$(srcdir)"/parallel_alternatives.7n "$(srcdir)"/parallel_alternatives.7 \ + || echo "Warning: pod2man not found. Using old parallel_alternatives.7" + +sem.1: sem.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/sem.pod > "$(srcdir)"/sem.1n \ + && mv "$(srcdir)"/sem.1n "$(srcdir)"/sem.1 \ + || echo "Warning: pod2man not found. Using old sem.1" + +sql.1: sql + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/sql > "$(srcdir)"/sql.1n \ + && mv "$(srcdir)"/sql.1n "$(srcdir)"/sql.1 \ + || echo "Warning: pod2man not found. Using old sql.1" + +niceload.1: niceload.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/niceload.pod > "$(srcdir)"/niceload.1n \ + && mv "$(srcdir)"/niceload.1n "$(srcdir)"/niceload.1 \ + || echo "Warning: pod2man not found. Using old niceload.1" + +parcat.1: parcat.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/parcat.pod > "$(srcdir)"/parcat.1n \ + && mv "$(srcdir)"/parcat.1n "$(srcdir)"/parcat.1 \ + || echo "Warning: pod2man not found. Using old parcat.1" + +parset.1: parset.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/parset.pod > "$(srcdir)"/parset.1n \ + && mv "$(srcdir)"/parset.1n "$(srcdir)"/parset.1 \ + || echo "Warning: pod2man not found. Using old parset.1" + +parsort.1: parsort + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/parsort > "$(srcdir)"/parsort.1n \ + && mv "$(srcdir)"/parsort.1n "$(srcdir)"/parsort.1 \ + || echo "Warning: pod2man not found. Using old parsort.1" + +parallel.html: parallel.pod + pod2html --title "GNU Parallel" "$(srcdir)"/parallel.pod > "$(srcdir)"/parallel.htmln \ + && mv "$(srcdir)"/parallel.htmln "$(srcdir)"/parallel.html \ + || echo "Warning: pod2html not found. Using old parallel.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel.html to avoid stupid pod2html race condition +env_parallel.html: env_parallel.pod parallel.html + pod2html --title "GNU Parallel with environment" "$(srcdir)"/env_parallel.pod > "$(srcdir)"/env_parallel.htmln \ + && mv "$(srcdir)"/env_parallel.htmln "$(srcdir)"/env_parallel.html \ + || echo "Warning: pod2html not found. Using old env_parallel.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on env_parallel.html to avoid stupid pod2html race condition +parallel_examples.html: parallel_examples.pod env_parallel.html + pod2html --title "GNU Parallel examples" "$(srcdir)"/parallel_examples.pod > "$(srcdir)"/parallel_examples.htmln \ + && mv "$(srcdir)"/parallel_examples.htmln "$(srcdir)"/parallel_examples.html \ + || echo "Warning: pod2html not found. Using old parallel_examples.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel_examples.html to avoid stupid pod2html race condition +parallel_tutorial.html: parallel_tutorial.pod parallel_examples.html + pod2html --title "GNU Parallel tutorial" "$(srcdir)"/parallel_tutorial.pod > "$(srcdir)"/parallel_tutorial.htmln \ + && mv "$(srcdir)"/parallel_tutorial.htmln "$(srcdir)"/parallel_tutorial.html \ + || echo "Warning: pod2html not found. Using old parallel_tutorial.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel_tutorial.html to avoid stupid pod2html race condition +parallel_book.html: parallel_book.pod parallel_tutorial.html + pod2html --title "GNU Parallel book" "$(srcdir)"/parallel_book.pod > "$(srcdir)"/parallel_book.htmln \ + && mv "$(srcdir)"/parallel_book.htmln "$(srcdir)"/parallel_book.html \ + || echo "Warning: pod2html not found. Using old parallel_book.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel_book.html to avoid stupid pod2html race condition +parallel_design.html: parallel_design.pod parallel_book.html + pod2html --title "GNU Parallel design" "$(srcdir)"/parallel_design.pod > "$(srcdir)"/parallel_design.htmln \ + && mv "$(srcdir)"/parallel_design.htmln "$(srcdir)"/parallel_design.html \ + || echo "Warning: pod2html not found. Using old parallel_design.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel_design.html to avoid stupid pod2html race condition +parallel_alternatives.html: parallel_alternatives.pod parallel_design.html + pod2html --title "GNU Parallel alternatives" "$(srcdir)"/parallel_alternatives.pod > "$(srcdir)"/parallel_alternatives.htmln \ + && mv "$(srcdir)"/parallel_alternatives.htmln "$(srcdir)"/parallel_alternatives.html \ + || echo "Warning: pod2html not found. Using old parallel_alternatives.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel_alternatives.html to avoid stupid pod2html race condition +sem.html: sem.pod parallel_alternatives.html + pod2html --title "sem (GNU Parallel)" "$(srcdir)"/sem.pod > "$(srcdir)"/sem.htmln \ + && mv "$(srcdir)"/sem.htmln "$(srcdir)"/sem.html \ + || echo "Warning: pod2html not found. Using old sem.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on sem.html to avoid stupid pod2html race condition +sql.html: sql sem.html + pod2html --title "GNU SQL" "$(srcdir)"/sql > "$(srcdir)"/sql.htmln \ + && mv "$(srcdir)"/sql.htmln "$(srcdir)"/sql.html \ + || echo "Warning: pod2html not found. Using old sql.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on sql.html to avoid stupid pod2html race condition +niceload.html: niceload.pod sql.html + pod2html --title "GNU niceload" "$(srcdir)"/niceload.pod > "$(srcdir)"/niceload.htmln \ + && mv "$(srcdir)"/niceload.htmln "$(srcdir)"/niceload.html \ + || echo "Warning: pod2html not found. Using old niceload.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on niceload.html to avoid stupid pod2html race condition +parcat.html: parcat.pod niceload.html + pod2html --title "GNU parcat" "$(srcdir)"/parcat.pod > "$(srcdir)"/parcat.htmln \ + && mv "$(srcdir)"/parcat.htmln "$(srcdir)"/parcat.html \ + || echo "Warning: pod2html not found. Using old parcat.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parcat.html to avoid stupid pod2html race condition +parset.html: parset.pod parcat.html + pod2html --title "GNU parset" "$(srcdir)"/parset.pod > "$(srcdir)"/parset.htmln \ + && mv "$(srcdir)"/parset.htmln "$(srcdir)"/parset.html \ + || echo "Warning: pod2html not found. Using old parset.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parset.html to avoid stupid pod2html race condition +parsort.html: parsort parset.html + pod2html --title "GNU parsort" "$(srcdir)"/parsort > "$(srcdir)"/parsort.htmln \ + && mv "$(srcdir)"/parsort.htmln "$(srcdir)"/parsort.html \ + || echo "Warning: pod2html not found. Using old parsort.html" + rm -f "$(srcdir)"/pod2htm* + +parallel.texi: parallel.pod + pod2texi --output="$(srcdir)"/parallel.texi "$(srcdir)"/parallel.pod \ + || echo "Warning: pod2texi not found. Using old parallel.texi" + +env_parallel.texi: env_parallel.pod + pod2texi --output="$(srcdir)"/env_parallel.texi "$(srcdir)"/env_parallel.pod \ + || echo "Warning: pod2texi not found. Using old env_parallel.texi" + +parallel_examples.texi: parallel_examples.pod + pod2texi --output="$(srcdir)"/parallel_examples.texi "$(srcdir)"/parallel_examples.pod \ + || echo "Warning: pod2texi not found. Using old parallel_examples.texi" + +parallel_tutorial.texi: parallel_tutorial.pod + pod2texi --output="$(srcdir)"/parallel_tutorial.texi "$(srcdir)"/parallel_tutorial.pod \ + || echo "Warning: pod2texi not found. Using old parallel_tutorial.texi" + +parallel_book.texi: parallel_book.pod + pod2texi --output="$(srcdir)"/parallel_book.texi "$(srcdir)"/parallel_book.pod \ + || echo "Warning: pod2texi not found. Using old parallel_book.texi" + +parallel_design.texi: parallel_design.pod + pod2texi --output="$(srcdir)"/parallel_design.texi "$(srcdir)"/parallel_design.pod \ + || echo "Warning: pod2texi not found. Using old parallel_design.texi" + +parallel_alternatives.texi: parallel_alternatives.pod + pod2texi --output="$(srcdir)"/parallel_alternatives.texi "$(srcdir)"/parallel_alternatives.pod \ + || echo "Warning: pod2texi not found. Using old parallel_alternatives.texi" + +sem.texi: sem.pod + pod2texi --output="$(srcdir)"/sem.texi "$(srcdir)"/sem.pod \ + || echo "Warning: pod2texi not found. Using old sem.texi" + +sql.texi: sql + pod2texi --output="$(srcdir)"/sql.texi "$(srcdir)"/sql \ + || echo "Warning: pod2texi not found. Using old sql.texi" + +niceload.texi: niceload.pod + pod2texi --output="$(srcdir)"/niceload.texi "$(srcdir)"/niceload.pod \ + || echo "Warning: pod2texi not found. Using old niceload.texi" + +parcat.texi: parcat.pod + pod2texi --output="$(srcdir)"/parcat.texi "$(srcdir)"/parcat.pod \ + || echo "Warning: pod2texi not found. Using old parcat.texi" + +parset.texi: parset.pod + pod2texi --output="$(srcdir)"/parset.texi "$(srcdir)"/parset.pod \ + || echo "Warning: pod2texi not found. Using old parset.texi" + +parsort.texi: parsort + pod2texi --output="$(srcdir)"/parsort.texi "$(srcdir)"/parsort \ + || echo "Warning: pod2texi not found. Using old parsort.texi" + +parallel.rst: parallel.pod + ./pod2rst-fix < "$(srcdir)"/parallel.pod > "$(srcdir)"/parallel.rst \ + || echo "Warning: pod2rst not found. Using old parallel.rst" + +env_parallel.rst: env_parallel.pod + ./pod2rst-fix < "$(srcdir)"/env_parallel.pod > "$(srcdir)"/env_parallel.rst \ + || echo "Warning: pod2rst not found. Using old env_parallel.rst" + +parallel_examples.rst: parallel_examples.pod + ./pod2rst-fix < "$(srcdir)"/parallel_examples.pod > "$(srcdir)"/parallel_examples.rst \ + || echo "Warning: pod2rst not found. Using old parallel_examples.rst" + +parallel_tutorial.rst: parallel_tutorial.pod + ./pod2rst-fix < "$(srcdir)"/parallel_tutorial.pod > "$(srcdir)"/parallel_tutorial.rst \ + || echo "Warning: pod2rst not found. Using old parallel_tutorial.rst" + +parallel_book.rst: parallel_book.pod + ./pod2rst-fix < "$(srcdir)"/parallel_book.pod > "$(srcdir)"/parallel_book.rst \ + || echo "Warning: pod2rst not found. Using old parallel_book.rst" + +parallel_design.rst: parallel_design.pod + ./pod2rst-fix < "$(srcdir)"/parallel_design.pod > "$(srcdir)"/parallel_design.rst \ + || echo "Warning: pod2rst not found. Using old parallel_design.rst" + +parallel_alternatives.rst: parallel_alternatives.pod + ./pod2rst-fix < "$(srcdir)"/parallel_alternatives.pod > "$(srcdir)"/parallel_alternatives.rst \ + || echo "Warning: pod2rst not found. Using old parallel_alternatives.rst" + +sem.rst: sem.pod + ./pod2rst-fix < "$(srcdir)"/sem.pod > "$(srcdir)"/sem.rst \ + || echo "Warning: pod2rst not found. Using old sem.rst" + +sql.rst: sql + ./pod2rst-fix < "$(srcdir)"/sql > "$(srcdir)"/sql.rst \ + || echo "Warning: pod2rst not found. Using old sql.rst" + +niceload.rst: niceload.pod + ./pod2rst-fix < "$(srcdir)"/niceload.pod > "$(srcdir)"/niceload.rst \ + || echo "Warning: pod2rst not found. Using old niceload.rst" + +parcat.rst: parcat.pod + ./pod2rst-fix < "$(srcdir)"/parcat.pod > "$(srcdir)"/parcat.rst \ + || echo "Warning: pod2rst not found. Using old parcat.rst" + +parset.rst: parset.pod + ./pod2rst-fix < "$(srcdir)"/parset.pod > "$(srcdir)"/parset.rst \ + || echo "Warning: pod2rst not found. Using old parset.rst" + +parsort.rst: parsort + ./pod2rst-fix < "$(srcdir)"/parsort > "$(srcdir)"/parsort.rst \ + || echo "Warning: pod2rst not found. Using old parsort.rst" + +parallel.pdf: parallel.pod + pod2pdf --output-file "$(srcdir)"/parallel.pdf "$(srcdir)"/parallel.pod --title "GNU Parallel" \ + || echo "Warning: pod2pdf not found. Using old parallel.pdf" + +env_parallel.pdf: env_parallel.pod + pod2pdf --output-file "$(srcdir)"/env_parallel.pdf "$(srcdir)"/env_parallel.pod --title "GNU Parallel with environment" \ + || echo "Warning: pod2pdf not found. Using old env_parallel.pdf" + +parallel_examples.pdf: parallel_examples.pod + pod2pdf --output-file "$(srcdir)"/parallel_examples.pdf "$(srcdir)"/parallel_examples.pod --title "GNU Parallel Examples" \ + || echo "Warning: pod2pdf not found. Using old parallel_examples.pdf" + +parallel_tutorial.pdf: parallel_tutorial.pod + pod2pdf --output-file "$(srcdir)"/parallel_tutorial.pdf "$(srcdir)"/parallel_tutorial.pod --title "GNU Parallel Tutorial" \ + || echo "Warning: pod2pdf not found. Using old parallel_tutorial.pdf" + +parallel_book.pdf: parallel_book.pod + pod2pdf --output-file "$(srcdir)"/parallel_book.pdf "$(srcdir)"/parallel_book.pod --title "GNU Parallel Book" \ + || echo "Warning: pod2pdf not found. Using old parallel_book.pdf" + +parallel_design.pdf: parallel_design.pod + pod2pdf --output-file "$(srcdir)"/parallel_design.pdf "$(srcdir)"/parallel_design.pod --title "GNU Parallel Design" \ + || echo "Warning: pod2pdf not found. Using old parallel_design.pdf" + +parallel_alternatives.pdf: parallel_alternatives.pod + pod2pdf --output-file "$(srcdir)"/parallel_alternatives.pdf "$(srcdir)"/parallel_alternatives.pod --title "GNU Parallel alternatives" \ + || echo "Warning: pod2pdf not found. Using old parallel_alternatives.pdf" + +sem.pdf: sem.pod + pod2pdf --output-file "$(srcdir)"/sem.pdf "$(srcdir)"/sem.pod --title "GNU sem" \ + || echo "Warning: pod2pdf not found. Using old sem.pdf" + +sql.pdf: sql + pod2pdf --output-file "$(srcdir)"/sql.pdf "$(srcdir)"/sql --title "GNU SQL" \ + || echo "Warning: pod2pdf not found. Using old sql.pdf" + +niceload.pdf: niceload.pod + pod2pdf --output-file "$(srcdir)"/niceload.pdf "$(srcdir)"/niceload.pod --title "GNU niceload" \ + || echo "Warning: pod2pdf not found. Using old niceload.pdf" + +parcat.pdf: parcat.pod + pod2pdf --output-file "$(srcdir)"/parcat.pdf "$(srcdir)"/parcat.pod --title "GNU parcat" \ + || echo "Warning: pod2pdf not found. Using old parcat.pdf" + +parset.pdf: parset.pod + pod2pdf --output-file "$(srcdir)"/parset.pdf "$(srcdir)"/parset.pod --title "GNU parset" \ + || echo "Warning: pod2pdf not found. Using old parset.pdf" + +parsort.pdf: parsort + pod2pdf --output-file "$(srcdir)"/parsort.pdf "$(srcdir)"/parsort --title "GNU parsort" \ + || echo "Warning: pod2pdf not found. Using old parsort.pdf" + +parallel_cheat_bw.pdf: parallel_cheat_bw.fodt + libreoffice --headless --convert-to pdf parallel_cheat_bw.fodt \ + || echo "Warning: libreoffice failed. Using old parallel_cheat_bw.pdf" + +parallel_options_map.pdf: parallel.pod pod2graph + ./pod2graph parallel.pod > parallel_options_map.pdf \ + || echo "Warning: pod2graph failed. Using old parallel_options_map.pdf" + +sem: parallel + ln -fs parallel sem + +DISTCLEANFILES = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ + parallel_examples.7 parallel_tutorial.7 parallel_book.7 \ + parallel_design.7 parallel_alternatives.7 parcat.1 parset.1 \ + parsort.1 \ + parallel.html env_parallel.html sem.html sql.html \ + niceload.html parallel_examples.html parallel_tutorial.html \ + parallel_book.html parallel_design.html \ + parallel_alternatives.html parcat.html parset.html \ + parsort.html \ + parallel.texi env_parallel.texi sem.texi sql.texi \ + niceload.texi parallel_examples.texi parallel_tutorial.texi \ + parallel_book.texi parallel_design.texi \ + parallel_alternatives.texi parcat.texi parset.texi \ + parsort.texi \ + parallel.rst env_parallel.rst sem.rst sql.rst niceload.rst \ + parallel_examples.rst parallel_tutorial.rst parallel_book.rst \ + parallel_design.rst parallel_alternatives.rst parcat.rst \ + parset.rst parsort.rst \ + parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ + parallel_examples.pdf parallel_tutorial.pdf parallel_book.pdf \ + parallel_design.pdf parallel_alternatives.pdf parcat.pdf \ + parset.pdf parsort.pdf parallel_cheat_bw.pdf \ + parallel_options_map.pdf + +EXTRA_DIST = parallel sem sql niceload parcat parset parsort \ + env_parallel env_parallel.ash env_parallel.bash \ + env_parallel.csh env_parallel.dash env_parallel.fish \ + env_parallel.ksh env_parallel.mksh env_parallel.pdksh \ + env_parallel.sh env_parallel.tcsh env_parallel.zsh parcat.pod \ + parset.pod sem.pod parallel.pod env_parallel.pod niceload.pod \ + parallel_examples.pod parallel_tutorial.pod parallel_book.pod \ + parallel_design.pod parallel_alternatives.pod \ + parallel_cheat_bw.fodt pod2graph $(DISTCLEANFILES) diff --git a/src/Makefile.in b/src/Makefile.in new file mode 100644 index 0000000..9956b5c --- /dev/null +++ b/src/Makefile.in @@ -0,0 +1,992 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# SPDX-FileCopyrightText: 2002-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# +# SPDX-License-Identifier: GPL-3.0-or-later + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +subdir = src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" \ + "$(DESTDIR)$(man7dir)" "$(DESTDIR)$(docdir)" +SCRIPTS = $(bin_SCRIPTS) +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +man1dir = $(mandir)/man1 +man7dir = $(mandir)/man7 +NROFF = nroff +MANS = $(man_MANS) +DATA = $(doc_DATA) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +ETAGS = @ETAGS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +am__leading_dot = @am__leading_dot@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build_alias = @build_alias@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host_alias = @host_alias@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +bin_SCRIPTS = parallel sql niceload parcat parset parsort \ + env_parallel env_parallel.ash env_parallel.bash \ + env_parallel.csh env_parallel.dash env_parallel.fish \ + env_parallel.ksh env_parallel.mksh env_parallel.pdksh \ + env_parallel.sh env_parallel.tcsh env_parallel.zsh + +@DOCUMENTATION_TRUE@man_MANS = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ +@DOCUMENTATION_TRUE@ parallel_examples.7 parallel_tutorial.7 parallel_book.7 \ +@DOCUMENTATION_TRUE@ parallel_design.7 parallel_alternatives.7 parcat.1 parset.1 \ +@DOCUMENTATION_TRUE@ parsort.1 + +@DOCUMENTATION_TRUE@doc_DATA = parallel.html env_parallel.html sem.html sql.html \ +@DOCUMENTATION_TRUE@ niceload.html parallel_examples.html parallel_tutorial.html \ +@DOCUMENTATION_TRUE@ parallel_book.html parallel_design.html \ +@DOCUMENTATION_TRUE@ parallel_alternatives.html parcat.html parset.html \ +@DOCUMENTATION_TRUE@ parsort.html \ +@DOCUMENTATION_TRUE@ parallel.texi env_parallel.texi sem.texi sql.texi \ +@DOCUMENTATION_TRUE@ niceload.texi parallel_examples.texi parallel_tutorial.texi \ +@DOCUMENTATION_TRUE@ parallel_book.texi parallel_design.texi \ +@DOCUMENTATION_TRUE@ parallel_alternatives.texi parcat.texi parset.texi \ +@DOCUMENTATION_TRUE@ parsort.texi \ +@DOCUMENTATION_TRUE@ parallel.rst env_parallel.rst sem.rst sql.rst niceload.rst \ +@DOCUMENTATION_TRUE@ parallel_examples.rst parallel_tutorial.rst parallel_book.rst \ +@DOCUMENTATION_TRUE@ parallel_design.rst parallel_alternatives.rst parcat.rst \ +@DOCUMENTATION_TRUE@ parset.rst parsort.rst \ +@DOCUMENTATION_TRUE@ parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ +@DOCUMENTATION_TRUE@ parallel_examples.pdf parallel_tutorial.pdf parallel_book.pdf \ +@DOCUMENTATION_TRUE@ parallel_design.pdf parallel_alternatives.pdf parcat.pdf \ +@DOCUMENTATION_TRUE@ parset.pdf parsort.pdf parallel_cheat_bw.pdf \ +@DOCUMENTATION_TRUE@ parallel_options_map.pdf + +DISTCLEANFILES = parallel.1 env_parallel.1 sem.1 sql.1 niceload.1 \ + parallel_examples.7 parallel_tutorial.7 parallel_book.7 \ + parallel_design.7 parallel_alternatives.7 parcat.1 parset.1 \ + parsort.1 \ + parallel.html env_parallel.html sem.html sql.html \ + niceload.html parallel_examples.html parallel_tutorial.html \ + parallel_book.html parallel_design.html \ + parallel_alternatives.html parcat.html parset.html \ + parsort.html \ + parallel.texi env_parallel.texi sem.texi sql.texi \ + niceload.texi parallel_examples.texi parallel_tutorial.texi \ + parallel_book.texi parallel_design.texi \ + parallel_alternatives.texi parcat.texi parset.texi \ + parsort.texi \ + parallel.rst env_parallel.rst sem.rst sql.rst niceload.rst \ + parallel_examples.rst parallel_tutorial.rst parallel_book.rst \ + parallel_design.rst parallel_alternatives.rst parcat.rst \ + parset.rst parsort.rst \ + parallel.pdf env_parallel.pdf sem.pdf sql.pdf niceload.pdf \ + parallel_examples.pdf parallel_tutorial.pdf parallel_book.pdf \ + parallel_design.pdf parallel_alternatives.pdf parcat.pdf \ + parset.pdf parsort.pdf parallel_cheat_bw.pdf \ + parallel_options_map.pdf + +EXTRA_DIST = parallel sem sql niceload parcat parset parsort \ + env_parallel env_parallel.ash env_parallel.bash \ + env_parallel.csh env_parallel.dash env_parallel.fish \ + env_parallel.ksh env_parallel.mksh env_parallel.pdksh \ + env_parallel.sh env_parallel.tcsh env_parallel.zsh parcat.pod \ + parset.pod sem.pod parallel.pod env_parallel.pod niceload.pod \ + parallel_examples.pod parallel_tutorial.pod parallel_book.pod \ + parallel_design.pod parallel_alternatives.pod \ + parallel_cheat_bw.fodt pod2graph $(DISTCLEANFILES) + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binSCRIPTS: $(bin_SCRIPTS) + @$(NORMAL_INSTALL) + @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n' \ + -e 'h;s|.*|.|' \ + -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) { files[d] = files[d] " " $$1; \ + if (++n[d] == $(am__install_max)) { \ + print "f", d, files[d]; n[d] = 0; files[d] = "" } } \ + else { print "f", d "/" $$4, $$1 } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binSCRIPTS: + @$(NORMAL_UNINSTALL) + @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 's,.*/,,;$(transform)'`; \ + dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir) +install-man1: $(man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(man_MANS)'; \ + test -n "$(man1dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.1[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } + +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.1[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) +install-man7: $(man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(man_MANS)'; \ + test -n "$(man7dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man7dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man7dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.7[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^7][0-9a-z]*$$,7,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man7dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man7dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man7dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man7dir)" || exit $$?; }; \ + done; } + +uninstall-man7: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man7dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.7[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^7][0-9a-z]*$$,7,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man7dir)'; $(am__uninstall_files_from_dir) +install-docDATA: $(doc_DATA) + @$(NORMAL_INSTALL) + @list='$(doc_DATA)'; test -n "$(docdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(docdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(docdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(docdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(docdir)" || exit $$?; \ + done + +uninstall-docDATA: + @$(NORMAL_UNINSTALL) + @list='$(doc_DATA)'; test -n "$(docdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(docdir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(SCRIPTS) $(MANS) $(DATA) +installdirs: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man7dir)" "$(DESTDIR)$(docdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-docDATA install-man + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binSCRIPTS + @$(NORMAL_INSTALL) + $(MAKE) $(AM_MAKEFLAGS) install-exec-hook +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: install-man1 install-man7 + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binSCRIPTS uninstall-docDATA uninstall-man + +uninstall-man: uninstall-man1 uninstall-man7 + +.MAKE: install-am install-exec-am install-strip + +.PHONY: all all-am check check-am clean clean-generic cscopelist-am \ + ctags-am distclean distclean-generic distdir dvi dvi-am html \ + html-am info info-am install install-am install-binSCRIPTS \ + install-data install-data-am install-docDATA install-dvi \ + install-dvi-am install-exec install-exec-am install-exec-hook \ + install-html install-html-am install-info install-info-am \ + install-man install-man1 install-man7 install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ + pdf-am ps ps-am tags-am uninstall uninstall-am \ + uninstall-binSCRIPTS uninstall-docDATA uninstall-man \ + uninstall-man1 uninstall-man7 + +.PRECIOUS: Makefile + + +install-exec-hook: + rm "$(DESTDIR)$(bindir)"/sem || true + $(LN_S) parallel "$(DESTDIR)$(bindir)"/sem + +web: sphinx + true + +sphinx: *.rst + cd sphinx && make && cd .. + +# Build documentation file if the tool to build exists. +# Otherwise: Use the distributed version +parallel.1: parallel.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/parallel.pod > "$(srcdir)"/parallel.1n \ + && mv "$(srcdir)"/parallel.1n "$(srcdir)"/parallel.1 \ + || echo "Warning: pod2man not found. Using old parallel.1" + +env_parallel.1: env_parallel.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/env_parallel.pod > "$(srcdir)"/env_parallel.1n \ + && mv "$(srcdir)"/env_parallel.1n "$(srcdir)"/env_parallel.1 \ + || echo "Warning: pod2man not found. Using old env_parallel.1" + +parallel_examples.7: parallel_examples.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=7 "$(srcdir)"/parallel_examples.pod > "$(srcdir)"/parallel_examples.7n \ + && mv "$(srcdir)"/parallel_examples.7n "$(srcdir)"/parallel_examples.7 \ + || echo "Warning: pod2man not found. Using old parallel_examples.7" + +parallel_tutorial.7: parallel_tutorial.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=7 "$(srcdir)"/parallel_tutorial.pod > "$(srcdir)"/parallel_tutorial.7n \ + && mv "$(srcdir)"/parallel_tutorial.7n "$(srcdir)"/parallel_tutorial.7 \ + || echo "Warning: pod2man not found. Using old parallel_tutorial.7" + +parallel_book.7: parallel_book.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=7 "$(srcdir)"/parallel_book.pod > "$(srcdir)"/parallel_book.7n \ + && mv "$(srcdir)"/parallel_book.7n "$(srcdir)"/parallel_book.7 \ + || echo "Warning: pod2man not found. Using old parallel_book.7" + +parallel_design.7: parallel_design.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=7 "$(srcdir)"/parallel_design.pod > "$(srcdir)"/parallel_design.7n \ + && mv "$(srcdir)"/parallel_design.7n "$(srcdir)"/parallel_design.7 \ + || echo "Warning: pod2man not found. Using old parallel_design.7" + +parallel_alternatives.7: parallel_alternatives.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=7 "$(srcdir)"/parallel_alternatives.pod > "$(srcdir)"/parallel_alternatives.7n \ + && mv "$(srcdir)"/parallel_alternatives.7n "$(srcdir)"/parallel_alternatives.7 \ + || echo "Warning: pod2man not found. Using old parallel_alternatives.7" + +sem.1: sem.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/sem.pod > "$(srcdir)"/sem.1n \ + && mv "$(srcdir)"/sem.1n "$(srcdir)"/sem.1 \ + || echo "Warning: pod2man not found. Using old sem.1" + +sql.1: sql + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/sql > "$(srcdir)"/sql.1n \ + && mv "$(srcdir)"/sql.1n "$(srcdir)"/sql.1 \ + || echo "Warning: pod2man not found. Using old sql.1" + +niceload.1: niceload.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/niceload.pod > "$(srcdir)"/niceload.1n \ + && mv "$(srcdir)"/niceload.1n "$(srcdir)"/niceload.1 \ + || echo "Warning: pod2man not found. Using old niceload.1" + +parcat.1: parcat.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/parcat.pod > "$(srcdir)"/parcat.1n \ + && mv "$(srcdir)"/parcat.1n "$(srcdir)"/parcat.1 \ + || echo "Warning: pod2man not found. Using old parcat.1" + +parset.1: parset.pod + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/parset.pod > "$(srcdir)"/parset.1n \ + && mv "$(srcdir)"/parset.1n "$(srcdir)"/parset.1 \ + || echo "Warning: pod2man not found. Using old parset.1" + +parsort.1: parsort + pod2man --release='$(PACKAGE_VERSION)' --center='$(PACKAGE_NAME)' \ + --section=1 "$(srcdir)"/parsort > "$(srcdir)"/parsort.1n \ + && mv "$(srcdir)"/parsort.1n "$(srcdir)"/parsort.1 \ + || echo "Warning: pod2man not found. Using old parsort.1" + +parallel.html: parallel.pod + pod2html --title "GNU Parallel" "$(srcdir)"/parallel.pod > "$(srcdir)"/parallel.htmln \ + && mv "$(srcdir)"/parallel.htmln "$(srcdir)"/parallel.html \ + || echo "Warning: pod2html not found. Using old parallel.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel.html to avoid stupid pod2html race condition +env_parallel.html: env_parallel.pod parallel.html + pod2html --title "GNU Parallel with environment" "$(srcdir)"/env_parallel.pod > "$(srcdir)"/env_parallel.htmln \ + && mv "$(srcdir)"/env_parallel.htmln "$(srcdir)"/env_parallel.html \ + || echo "Warning: pod2html not found. Using old env_parallel.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on env_parallel.html to avoid stupid pod2html race condition +parallel_examples.html: parallel_examples.pod env_parallel.html + pod2html --title "GNU Parallel examples" "$(srcdir)"/parallel_examples.pod > "$(srcdir)"/parallel_examples.htmln \ + && mv "$(srcdir)"/parallel_examples.htmln "$(srcdir)"/parallel_examples.html \ + || echo "Warning: pod2html not found. Using old parallel_examples.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel_examples.html to avoid stupid pod2html race condition +parallel_tutorial.html: parallel_tutorial.pod parallel_examples.html + pod2html --title "GNU Parallel tutorial" "$(srcdir)"/parallel_tutorial.pod > "$(srcdir)"/parallel_tutorial.htmln \ + && mv "$(srcdir)"/parallel_tutorial.htmln "$(srcdir)"/parallel_tutorial.html \ + || echo "Warning: pod2html not found. Using old parallel_tutorial.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel_tutorial.html to avoid stupid pod2html race condition +parallel_book.html: parallel_book.pod parallel_tutorial.html + pod2html --title "GNU Parallel book" "$(srcdir)"/parallel_book.pod > "$(srcdir)"/parallel_book.htmln \ + && mv "$(srcdir)"/parallel_book.htmln "$(srcdir)"/parallel_book.html \ + || echo "Warning: pod2html not found. Using old parallel_book.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel_book.html to avoid stupid pod2html race condition +parallel_design.html: parallel_design.pod parallel_book.html + pod2html --title "GNU Parallel design" "$(srcdir)"/parallel_design.pod > "$(srcdir)"/parallel_design.htmln \ + && mv "$(srcdir)"/parallel_design.htmln "$(srcdir)"/parallel_design.html \ + || echo "Warning: pod2html not found. Using old parallel_design.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel_design.html to avoid stupid pod2html race condition +parallel_alternatives.html: parallel_alternatives.pod parallel_design.html + pod2html --title "GNU Parallel alternatives" "$(srcdir)"/parallel_alternatives.pod > "$(srcdir)"/parallel_alternatives.htmln \ + && mv "$(srcdir)"/parallel_alternatives.htmln "$(srcdir)"/parallel_alternatives.html \ + || echo "Warning: pod2html not found. Using old parallel_alternatives.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parallel_alternatives.html to avoid stupid pod2html race condition +sem.html: sem.pod parallel_alternatives.html + pod2html --title "sem (GNU Parallel)" "$(srcdir)"/sem.pod > "$(srcdir)"/sem.htmln \ + && mv "$(srcdir)"/sem.htmln "$(srcdir)"/sem.html \ + || echo "Warning: pod2html not found. Using old sem.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on sem.html to avoid stupid pod2html race condition +sql.html: sql sem.html + pod2html --title "GNU SQL" "$(srcdir)"/sql > "$(srcdir)"/sql.htmln \ + && mv "$(srcdir)"/sql.htmln "$(srcdir)"/sql.html \ + || echo "Warning: pod2html not found. Using old sql.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on sql.html to avoid stupid pod2html race condition +niceload.html: niceload.pod sql.html + pod2html --title "GNU niceload" "$(srcdir)"/niceload.pod > "$(srcdir)"/niceload.htmln \ + && mv "$(srcdir)"/niceload.htmln "$(srcdir)"/niceload.html \ + || echo "Warning: pod2html not found. Using old niceload.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on niceload.html to avoid stupid pod2html race condition +parcat.html: parcat.pod niceload.html + pod2html --title "GNU parcat" "$(srcdir)"/parcat.pod > "$(srcdir)"/parcat.htmln \ + && mv "$(srcdir)"/parcat.htmln "$(srcdir)"/parcat.html \ + || echo "Warning: pod2html not found. Using old parcat.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parcat.html to avoid stupid pod2html race condition +parset.html: parset.pod parcat.html + pod2html --title "GNU parset" "$(srcdir)"/parset.pod > "$(srcdir)"/parset.htmln \ + && mv "$(srcdir)"/parset.htmln "$(srcdir)"/parset.html \ + || echo "Warning: pod2html not found. Using old parset.html" + rm -f "$(srcdir)"/pod2htm* + +# Depending on parset.html to avoid stupid pod2html race condition +parsort.html: parsort parset.html + pod2html --title "GNU parsort" "$(srcdir)"/parsort > "$(srcdir)"/parsort.htmln \ + && mv "$(srcdir)"/parsort.htmln "$(srcdir)"/parsort.html \ + || echo "Warning: pod2html not found. Using old parsort.html" + rm -f "$(srcdir)"/pod2htm* + +parallel.texi: parallel.pod + pod2texi --output="$(srcdir)"/parallel.texi "$(srcdir)"/parallel.pod \ + || echo "Warning: pod2texi not found. Using old parallel.texi" + +env_parallel.texi: env_parallel.pod + pod2texi --output="$(srcdir)"/env_parallel.texi "$(srcdir)"/env_parallel.pod \ + || echo "Warning: pod2texi not found. Using old env_parallel.texi" + +parallel_examples.texi: parallel_examples.pod + pod2texi --output="$(srcdir)"/parallel_examples.texi "$(srcdir)"/parallel_examples.pod \ + || echo "Warning: pod2texi not found. Using old parallel_examples.texi" + +parallel_tutorial.texi: parallel_tutorial.pod + pod2texi --output="$(srcdir)"/parallel_tutorial.texi "$(srcdir)"/parallel_tutorial.pod \ + || echo "Warning: pod2texi not found. Using old parallel_tutorial.texi" + +parallel_book.texi: parallel_book.pod + pod2texi --output="$(srcdir)"/parallel_book.texi "$(srcdir)"/parallel_book.pod \ + || echo "Warning: pod2texi not found. Using old parallel_book.texi" + +parallel_design.texi: parallel_design.pod + pod2texi --output="$(srcdir)"/parallel_design.texi "$(srcdir)"/parallel_design.pod \ + || echo "Warning: pod2texi not found. Using old parallel_design.texi" + +parallel_alternatives.texi: parallel_alternatives.pod + pod2texi --output="$(srcdir)"/parallel_alternatives.texi "$(srcdir)"/parallel_alternatives.pod \ + || echo "Warning: pod2texi not found. Using old parallel_alternatives.texi" + +sem.texi: sem.pod + pod2texi --output="$(srcdir)"/sem.texi "$(srcdir)"/sem.pod \ + || echo "Warning: pod2texi not found. Using old sem.texi" + +sql.texi: sql + pod2texi --output="$(srcdir)"/sql.texi "$(srcdir)"/sql \ + || echo "Warning: pod2texi not found. Using old sql.texi" + +niceload.texi: niceload.pod + pod2texi --output="$(srcdir)"/niceload.texi "$(srcdir)"/niceload.pod \ + || echo "Warning: pod2texi not found. Using old niceload.texi" + +parcat.texi: parcat.pod + pod2texi --output="$(srcdir)"/parcat.texi "$(srcdir)"/parcat.pod \ + || echo "Warning: pod2texi not found. Using old parcat.texi" + +parset.texi: parset.pod + pod2texi --output="$(srcdir)"/parset.texi "$(srcdir)"/parset.pod \ + || echo "Warning: pod2texi not found. Using old parset.texi" + +parsort.texi: parsort + pod2texi --output="$(srcdir)"/parsort.texi "$(srcdir)"/parsort \ + || echo "Warning: pod2texi not found. Using old parsort.texi" + +parallel.rst: parallel.pod + ./pod2rst-fix < "$(srcdir)"/parallel.pod > "$(srcdir)"/parallel.rst \ + || echo "Warning: pod2rst not found. Using old parallel.rst" + +env_parallel.rst: env_parallel.pod + ./pod2rst-fix < "$(srcdir)"/env_parallel.pod > "$(srcdir)"/env_parallel.rst \ + || echo "Warning: pod2rst not found. Using old env_parallel.rst" + +parallel_examples.rst: parallel_examples.pod + ./pod2rst-fix < "$(srcdir)"/parallel_examples.pod > "$(srcdir)"/parallel_examples.rst \ + || echo "Warning: pod2rst not found. Using old parallel_examples.rst" + +parallel_tutorial.rst: parallel_tutorial.pod + ./pod2rst-fix < "$(srcdir)"/parallel_tutorial.pod > "$(srcdir)"/parallel_tutorial.rst \ + || echo "Warning: pod2rst not found. Using old parallel_tutorial.rst" + +parallel_book.rst: parallel_book.pod + ./pod2rst-fix < "$(srcdir)"/parallel_book.pod > "$(srcdir)"/parallel_book.rst \ + || echo "Warning: pod2rst not found. Using old parallel_book.rst" + +parallel_design.rst: parallel_design.pod + ./pod2rst-fix < "$(srcdir)"/parallel_design.pod > "$(srcdir)"/parallel_design.rst \ + || echo "Warning: pod2rst not found. Using old parallel_design.rst" + +parallel_alternatives.rst: parallel_alternatives.pod + ./pod2rst-fix < "$(srcdir)"/parallel_alternatives.pod > "$(srcdir)"/parallel_alternatives.rst \ + || echo "Warning: pod2rst not found. Using old parallel_alternatives.rst" + +sem.rst: sem.pod + ./pod2rst-fix < "$(srcdir)"/sem.pod > "$(srcdir)"/sem.rst \ + || echo "Warning: pod2rst not found. Using old sem.rst" + +sql.rst: sql + ./pod2rst-fix < "$(srcdir)"/sql > "$(srcdir)"/sql.rst \ + || echo "Warning: pod2rst not found. Using old sql.rst" + +niceload.rst: niceload.pod + ./pod2rst-fix < "$(srcdir)"/niceload.pod > "$(srcdir)"/niceload.rst \ + || echo "Warning: pod2rst not found. Using old niceload.rst" + +parcat.rst: parcat.pod + ./pod2rst-fix < "$(srcdir)"/parcat.pod > "$(srcdir)"/parcat.rst \ + || echo "Warning: pod2rst not found. Using old parcat.rst" + +parset.rst: parset.pod + ./pod2rst-fix < "$(srcdir)"/parset.pod > "$(srcdir)"/parset.rst \ + || echo "Warning: pod2rst not found. Using old parset.rst" + +parsort.rst: parsort + ./pod2rst-fix < "$(srcdir)"/parsort > "$(srcdir)"/parsort.rst \ + || echo "Warning: pod2rst not found. Using old parsort.rst" + +parallel.pdf: parallel.pod + pod2pdf --output-file "$(srcdir)"/parallel.pdf "$(srcdir)"/parallel.pod --title "GNU Parallel" \ + || echo "Warning: pod2pdf not found. Using old parallel.pdf" + +env_parallel.pdf: env_parallel.pod + pod2pdf --output-file "$(srcdir)"/env_parallel.pdf "$(srcdir)"/env_parallel.pod --title "GNU Parallel with environment" \ + || echo "Warning: pod2pdf not found. Using old env_parallel.pdf" + +parallel_examples.pdf: parallel_examples.pod + pod2pdf --output-file "$(srcdir)"/parallel_examples.pdf "$(srcdir)"/parallel_examples.pod --title "GNU Parallel Examples" \ + || echo "Warning: pod2pdf not found. Using old parallel_examples.pdf" + +parallel_tutorial.pdf: parallel_tutorial.pod + pod2pdf --output-file "$(srcdir)"/parallel_tutorial.pdf "$(srcdir)"/parallel_tutorial.pod --title "GNU Parallel Tutorial" \ + || echo "Warning: pod2pdf not found. Using old parallel_tutorial.pdf" + +parallel_book.pdf: parallel_book.pod + pod2pdf --output-file "$(srcdir)"/parallel_book.pdf "$(srcdir)"/parallel_book.pod --title "GNU Parallel Book" \ + || echo "Warning: pod2pdf not found. Using old parallel_book.pdf" + +parallel_design.pdf: parallel_design.pod + pod2pdf --output-file "$(srcdir)"/parallel_design.pdf "$(srcdir)"/parallel_design.pod --title "GNU Parallel Design" \ + || echo "Warning: pod2pdf not found. Using old parallel_design.pdf" + +parallel_alternatives.pdf: parallel_alternatives.pod + pod2pdf --output-file "$(srcdir)"/parallel_alternatives.pdf "$(srcdir)"/parallel_alternatives.pod --title "GNU Parallel alternatives" \ + || echo "Warning: pod2pdf not found. Using old parallel_alternatives.pdf" + +sem.pdf: sem.pod + pod2pdf --output-file "$(srcdir)"/sem.pdf "$(srcdir)"/sem.pod --title "GNU sem" \ + || echo "Warning: pod2pdf not found. Using old sem.pdf" + +sql.pdf: sql + pod2pdf --output-file "$(srcdir)"/sql.pdf "$(srcdir)"/sql --title "GNU SQL" \ + || echo "Warning: pod2pdf not found. Using old sql.pdf" + +niceload.pdf: niceload.pod + pod2pdf --output-file "$(srcdir)"/niceload.pdf "$(srcdir)"/niceload.pod --title "GNU niceload" \ + || echo "Warning: pod2pdf not found. Using old niceload.pdf" + +parcat.pdf: parcat.pod + pod2pdf --output-file "$(srcdir)"/parcat.pdf "$(srcdir)"/parcat.pod --title "GNU parcat" \ + || echo "Warning: pod2pdf not found. Using old parcat.pdf" + +parset.pdf: parset.pod + pod2pdf --output-file "$(srcdir)"/parset.pdf "$(srcdir)"/parset.pod --title "GNU parset" \ + || echo "Warning: pod2pdf not found. Using old parset.pdf" + +parsort.pdf: parsort + pod2pdf --output-file "$(srcdir)"/parsort.pdf "$(srcdir)"/parsort --title "GNU parsort" \ + || echo "Warning: pod2pdf not found. Using old parsort.pdf" + +parallel_cheat_bw.pdf: parallel_cheat_bw.fodt + libreoffice --headless --convert-to pdf parallel_cheat_bw.fodt \ + || echo "Warning: libreoffice failed. Using old parallel_cheat_bw.pdf" + +parallel_options_map.pdf: parallel.pod pod2graph + ./pod2graph parallel.pod > parallel_options_map.pdf \ + || echo "Warning: pod2graph failed. Using old parallel_options_map.pdf" + +sem: parallel + ln -fs parallel sem + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/env_parallel b/src/env_parallel new file mode 100755 index 0000000..3f968fc --- /dev/null +++ b/src/env_parallel @@ -0,0 +1,143 @@ +#!/usr/bin/env bash + +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +grepq() { + # grep -q for systems without -q + grep >/dev/null 2>/dev/null "$@" +} + +installer() { + source="$1" + script="$2" + into="$3" + if grepq $script "$into"; then + true already installed + else + echo $source \`which $script\` >> "$into" + fi +} + +while test $# -gt 0; do + key="$1" + + case $key in + -i|--install) + installer . env_parallel.bash "$HOME"/.bashrc + installer . env_parallel.sh "$HOME"/.shrc + installer . env_parallel.zsh "$HOME"/.zshenv + installer source env_parallel.ksh "$HOME"/.kshrc + installer source env_parallel.mksh "$HOME"/.mkshrc + echo $SHELL | grepq /pdksh && + installer . env_parallel.pdksh "$HOME"/.profile + echo $SHELL | grepq /ash && + installer . env_parallel.ash "$HOME"/.profile + echo $SHELL | grepq /dash && + installer . env_parallel.dash "$HOME"/.profile + installer source env_parallel.csh "$HOME"/.cshrc + installer source env_parallel.tcsh "$HOME"/.tcshrc + mkdir -p "$HOME"/.config/fish + grepq env_parallel.fish "$HOME"/.config/fish/config.fish || + echo '. (which env_parallel.fish)' >> "$HOME"/.config/fish/config.fish + echo 'Installed env_parallel in:' + echo " " "$HOME"/.bashrc + echo " " "$HOME"/.shrc + echo " " "$HOME"/.zshenv + echo " " "$HOME"/.config/fish/config.fish + echo " " "$HOME"/.kshrc + echo " " "$HOME"/.mkshrc + echo " " "$HOME"/.profile + echo " " "$HOME"/.cshrc + echo " " "$HOME"/.tcshrc + exit + ;; + *) + echo "Unknown option: $key" + ;; + esac + shift # past argument or value +done + + +cat <<'_EOS' +env_parallel only works if it is a function. + +Do this and restart your shell: + +bash: Put this in $HOME/.bashrc: . `which env_parallel.bash` + E.g. by doing: echo '. `which env_parallel.bash`' >> $HOME/.bashrc + Supports: variables, aliases, functions, arrays + +fish: Put this in $HOME/.config/fish/config.fish: . (which env_parallel.fish) + E.g. by doing: + echo '. (which env_parallel.fish)' >> $HOME/.config/fish/config.fish + Supports: variables, aliases, functions, arrays + +ksh: Put this in $HOME/.kshrc: source `which env_parallel.ksh` + E.g. by doing: echo 'source `which env_parallel.ksh`' >> $HOME/.kshrc + Supports: variables, aliases, functions, arrays + +mksh: Put this in $HOME/.mkshrc: source `which env_parallel.mksh` + E.g. by doing: echo 'source `which env_parallel.mksh`' >> $HOME/.mkshrc + Supports: variables, aliases, functions, arrays + +pdksh: Put this in $HOME/.profile: source `which env_parallel.pdksh` + E.g. by doing: echo '. `which env_parallel.pdksh`' >> $HOME/.profile + Supports: variables, aliases, functions, arrays + +zsh: Put this in $HOME/.zshrc: . `which env_parallel.zsh` + E.g. by doing: echo '. `which env_parallel.zsh`' >> $HOME/.zshenv + Supports: variables, functions, arrays + +ash: Put this in $HOME/.profile: . `which env_parallel.ash` + E.g. by doing: echo '. `which env_parallel.ash`' >> $HOME/.profile + Supports: variables, aliases + +dash: Put this in $HOME/.profile: . `which env_parallel.dash` + E.g. by doing: echo '. `which env_parallel.dash`' >> $HOME/.profile + Supports: variables, aliases + +csh: Put this in $HOME/.cshrc: source `which env_parallel.csh` + E.g. by doing: echo 'source `which env_parallel.csh`' >> $HOME/.cshrc + Supports: variables, aliases, arrays with no special chars + +tcsh: Put this in $HOME/.tcshrc: source `which env_parallel.tcsh` + E.g. by doing: echo 'source `which env_parallel.tcsh`' >> $HOME/.tcshrc + Supports: variables, aliases, arrays with no special chars + +To install in all shells run: + + env_parallel --install + +In a script you need to run this before using env_parallel: + +bash: . `which env_parallel.bash` +ksh: source `which env_parallel.ksh` +mksh: source `which env_parallel.mksh` +pdksh: source `which env_parallel.pdksh` +zsh: . `which env_parallel.zsh` +ash: . `which env_parallel.ash` +dash: . `which env_parallel.dash` + +For details: see man env_parallel + +_EOS diff --git a/src/env_parallel.ash b/src/env_parallel.ash new file mode 100755 index 0000000..a3a24d5 --- /dev/null +++ b/src/env_parallel.ash @@ -0,0 +1,430 @@ +#!/usr/bin/env ash + +# This file must be sourced in ash: +# +# . `which env_parallel.ash` +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later +# shellcheck disable=SC2006 shell=dash + +env_parallel() { + # based on env_parallel.sh + + _names_of_ALIASES() { + # alias fails on Unixware 5 + for _i in `alias 2>/dev/null | perl -ne 's/^alias //;s/^(\S+)=.*/$1/ && print' 2>/dev/null`; do + # Check if this name really is an alias + # or just part of a multiline alias definition + if alias "$_i" >/dev/null 2>/dev/null; then + echo "$_i" + fi + done + } + _bodies_of_ALIASES() { + # alias may return: + # myalias='definition' (GNU/Linux ash) + # alias myalias='definition' (FreeBSD ash) + # so remove 'alias ' from first line + for _i in "$@"; do + echo 'alias '"`alias "$_i" | perl -pe '1..1 and s/^alias //'`" + done + } + _names_of_maybe_FUNCTIONS() { + set | perl -ne '/^([A-Z_0-9]+)\s*\(\)\s*\{?$/i and print "$1\n"' + } + _names_of_FUNCTIONS() { + # myfunc is a function + # shellcheck disable=SC2046 + LANG=C type `_names_of_maybe_FUNCTIONS` | + perl -ne '/^(\S+) is a function$/ and not $seen{$1}++ and print "$1\n"' + } + _bodies_of_FUNCTIONS() { + LANG=C type "$@" | perl -ne '/^(\S+) is a function$/ or print' + } + _names_of_VARIABLES() { + # This may screw up if variables contain \n and = + set | perl -ne 's/^(\S+?)=.*/$1/ and print;' + } + _bodies_of_VARIABLES() { + # Crappy typeset -p + for _i in "$@" + do + perl -e 'print @ARGV' "$_i=" + eval echo "\"\$$_i\"" | perl -e '$/=undef; $a=<>; chop($a); print $a' | + perl -pe 's/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\202-\377]/\\$&/go;'"s/'/\\\'/g; s/[\n]/'\\n'/go;"; + echo + done + } + _ignore_HARDCODED() { + # These names cannot be detected + echo '(_|TIMEOUT|IFS)' + } + _ignore_READONLY() { + # shellcheck disable=SC1078,SC1079,SC2026 + readonly | perl -e '@r = map { + chomp; + # sh on UnixWare: readonly TIMEOUT + # ash: readonly var='val' + # ksh: var='val' + # mksh: PIPESTATUS[0] + s/^(readonly )?([^=\[ ]*?)(\[\d+\])?(=.*|)$/$2/ or + # bash: declare -ar BASH_VERSINFO=([0]="4" [1]="4") + # zsh: typeset -r var='val' + s/^\S+\s+\S+\s+(\S[^=]*)(=.*|$)/$1/; + $_ } <>; + $vars = join "|",map { quotemeta $_ } @r; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' + } + _remove_bad_NAMES() { + # Do not transfer vars and funcs from env_parallel + # shellcheck disable=SC2006 + _ignore_RO="`_ignore_READONLY`" + # shellcheck disable=SC2006 + _ignore_HARD="`_ignore_HARDCODED`" + # Macos-grep does not like long patterns + # Old Solaris grep does not support -E + # Perl Version of: + # grep -Ev '^(...)$' | + perl -ne '/^( + PARALLEL_ENV| + PARALLEL_TMP| + _alias_NAMES| + _bodies_of_ALIASES| + _bodies_of_FUNCTIONS| + _bodies_of_VARIABLES| + _error_PAR| + _function_NAMES| + _get_ignored_VARS| + _grep_REGEXP| + _ignore_HARD| + _ignore_HARDCODED| + _ignore_READONLY| + _ignore_RO| + _ignore_UNDERSCORE| + _list_alias_BODIES| + _list_function_BODIES| + _list_variable_VALUES| + _make_grep_REGEXP| + _names_of_ALIASES| + _names_of_FUNCTIONS| + _names_of_VARIABLES| + _names_of_maybe_FUNCTIONS| + _parallel_exit_CODE| + _prefix_PARALLEL_ENV| + _prefix_PARALLEL_ENV| + _remove_bad_NAMES| + _remove_readonly| + _variable_NAMES| + _warning_PAR| + _which_PAR)$/x and next; + # Filter names matching --env + /^'"$_grep_REGEXP"'$/ or next; + /^'"$_ignore_UNDERSCORE"'$/ and next; + # Remove readonly variables + /^'"$_ignore_RO"'$/ and next; + /^'"$_ignore_HARD"'$/ and next; + print;' + } + _get_ignored_VARS() { + perl -e ' + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + } + } + if($ENV{PARALLEL_IGNORED_NAMES}) { + push @ignored_vars, split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + chomp @ignored_vars; + } + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' -- "$@" + } + + # Get the --env variables if set + # --env _ should be ignored + # and convert a b c to (a|b|c) + # If --env not set: Match everything (.*) + _make_grep_REGEXP() { + perl -e ' + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; + ' -- "$@" + } + _which_PAR() { + # type returns: + # ll is an alias for ls -l (in ash) + # bash is a tracked alias for /bin/bash + # true is a shell builtin (in bash) + # myfunc is a function (in bash) + # myfunc is a shell function (in zsh) + # which is /usr/bin/which (in sh, bash) + # which is hashed (/usr/bin/which) + # gi is aliased to `grep -i' (in bash) + # aliased to `alias | /usr/bin/which --tty-only --read-alias --show-dot --show-tilde' + # Return 0 if found, 1 otherwise + LANG=C type "$@" | + perl -pe '$exit += (s/ is an alias for .*// || + s/ is aliased to .*// || + s/ is a function// || + s/ is a shell function// || + s/ is a shell builtin// || + s/.* is hashed .(\S+).$/$1/ || + s/.* is (a tracked alias for )?//); + END { exit not $exit }' + } + _warning_PAR() { + echo "env_parallel: Warning: $*" >&2 + } + _error_PAR() { + echo "env_parallel: Error: $*" >&2 + } + + if _which_PAR parallel >/dev/null; then + true parallel found in path + else + # shellcheck disable=SC2016 + _error_PAR 'parallel must be in $PATH.' + return 255 + fi + + # Grep regexp for vars given by --env + # shellcheck disable=SC2006 + _grep_REGEXP="`_make_grep_REGEXP \"$@\"`" + unset _make_grep_REGEXP + + # Deal with --env _ + # shellcheck disable=SC2006 + _ignore_UNDERSCORE="`_get_ignored_VARS \"$@\"`" + unset _get_ignored_VARS + + # --record-env + if perl -e 'exit grep { /^--record-env$/ } @ARGV' -- "$@"; then + true skip + else + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | + cat > "$HOME"/.parallel/ignored_vars + return 0 + fi + + # --session + if perl -e 'exit grep { /^--session$/ } @ARGV' -- "$@"; then + true skip + else + # Insert ::: between each level of session + # so you can pop off the last ::: at --end-session + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`echo \"$PARALLEL_IGNORED_NAMES\"; + echo :::; + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | perl -ne ' + BEGIN{ + map { $ignored_vars{$_}++ } + split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + } + chomp; + for(split/\s+/) { + if(not $ignored_vars{$_}) { + print $_,\"\\n\"; + } + } + '`" + export PARALLEL_IGNORED_NAMES + return 0 + fi + if perl -e 'exit grep { /^--end.?session$/ } @ARGV' -- "$@"; then + true skip + else + # Pop off last ::: from PARALLEL_IGNORED_NAMES + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`perl -e ' + $ENV{PARALLEL_IGNORED_NAMES} =~ s/(.*):::.*?$/$1/s; + print $ENV{PARALLEL_IGNORED_NAMES} + '`" + return 0 + fi + # Grep alias names + # shellcheck disable=SC2006 + _alias_NAMES="`_names_of_ALIASES | _remove_bad_NAMES | xargs echo`" + _list_alias_BODIES="_bodies_of_ALIASES $_alias_NAMES" + if [ "$_alias_NAMES" = "" ] ; then + # no aliases selected + _list_alias_BODIES="true" + fi + unset _alias_NAMES + + # Grep function names + # shellcheck disable=SC2006 + _function_NAMES="`_names_of_FUNCTIONS | _remove_bad_NAMES | xargs echo`" + _list_function_BODIES="_bodies_of_FUNCTIONS $_function_NAMES" + if [ "$_function_NAMES" = "" ] ; then + # no functions selected + _list_function_BODIES="true" + fi + unset _function_NAMES + + # Grep variable names + # shellcheck disable=SC2006 + _variable_NAMES="`_names_of_VARIABLES | _remove_bad_NAMES | xargs echo`" + _list_variable_VALUES="_bodies_of_VARIABLES $_variable_NAMES" + if [ "$_variable_NAMES" = "" ] ; then + # no variables selected + _list_variable_VALUES="true" + fi + unset _variable_NAMES + + # shellcheck disable=SC2006 + PARALLEL_ENV="` + $_list_alias_BODIES; + $_list_function_BODIES; + $_list_variable_VALUES; + `" + export PARALLEL_ENV + unset _list_alias_BODIES _list_variable_VALUES _list_function_BODIES + unset _bodies_of_ALIASES _bodies_of_VARIABLES _bodies_of_FUNCTIONS + unset _names_of_ALIASES _names_of_VARIABLES _names_of_FUNCTIONS + unset _ignore_HARDCODED _ignore_READONLY _ignore_UNDERSCORE + unset _remove_bad_NAMES _grep_REGEXP + unset _prefix_PARALLEL_ENV + # Test if environment is too big by running 'true' + # shellcheck disable=SC2006,SC2092 + if `_which_PAR true` >/dev/null 2>/dev/null ; then + parallel "$@" + _parallel_exit_CODE=$? + # Clean up variables/functions + unset PARALLEL_ENV + unset _which_PAR _which_TRUE + unset _warning_PAR _error_PAR + # Unset _parallel_exit_CODE before return + eval "unset _parallel_exit_CODE; return $_parallel_exit_CODE" + else + unset PARALLEL_ENV; + _error_PAR "Your environment is too big." + _error_PAR "You can try 3 different approaches:" + _error_PAR "1. Run 'env_parallel --session' before you set" + _error_PAR " variables or define functions." + _error_PAR "2. Use --env and only mention the names to copy." + _error_PAR "3. Try running this in a clean environment once:" + _error_PAR " env_parallel --record-env" + _error_PAR " And then use '--env _'" + _error_PAR "For details see: man env_parallel" + return 255 + fi +} + +parset() { + _parset_PARALLEL_PRG=parallel + _parset_main "$@" +} + +env_parset() { + _parset_PARALLEL_PRG=env_parallel + _parset_main "$@" +} + +_parset_main() { + # If $1 contains ',' or space: + # Split on , to get the destination variable names + # If $1 is a single destination variable name: + # Treat it as the name of an array + # + # # Create array named myvar + # parset myvar echo ::: {1..10} + # echo ${myvar[5]} + # + # # Put output into $var_a $var_b $var_c + # varnames=(var_a var_b var_c) + # parset "${varnames[*]}" echo ::: {1..3} + # echo $var_c + # + # # Put output into $var_a4 $var_b4 $var_c4 + # parset "var_a4 var_b4 var_c4" echo ::: {1..3} + # echo $var_c4 + + _parset_NAME="$1" + if [ "$_parset_NAME" = "" ] ; then + echo parset: Error: No destination variable given. >&2 + echo parset: Error: Try: >&2 + echo parset: Error: ' ' parset myarray echo ::: foo bar >&2 + return 255 + fi + if [ "$_parset_NAME" = "--help" ] ; then + echo parset: Error: Usage: >&2 + echo parset: Error: ' ' parset varname GNU Parallel options and command >&2 + echo + parallel --help + return 255 + fi + if [ "$_parset_NAME" = "--version" ] ; then + # shellcheck disable=SC2006 + echo "parset 20221122 (GNU parallel `parallel --minversion 1`)" + echo "Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free Software" + echo "Foundation, Inc." + echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>" + echo "This is free software: you are free to change and redistribute it." + echo "GNU parallel comes with no warranty." + echo + echo "Web site: https://www.gnu.org/software/parallel" + echo + echo "When using programs that use GNU Parallel to process data for publication" + echo "please cite as described in 'parallel --citation'." + echo + return 255 + fi + shift + + # Bash: declare -A myassoc=( ) + # Zsh: typeset -A myassoc=( ) + # Ksh: typeset -A myassoc=( ) + # shellcheck disable=SC2039,SC2169 + if (typeset -p "$_parset_NAME" 2>/dev/null; echo) | + perl -ne 'exit not (/^declare[^=]+-A|^typeset[^=]+-A/)' ; then + # This is an associative array + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset assoc,"$_parset_NAME" "$@"`" + # The eval returns the function! + else + # This is a normal array or a list of variable names + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset var,"$_parset_NAME" "$@"`" + # The eval returns the function! + fi +} diff --git a/src/env_parallel.bash b/src/env_parallel.bash new file mode 100755 index 0000000..1165a60 --- /dev/null +++ b/src/env_parallel.bash @@ -0,0 +1,432 @@ +#!/usr/bin/env bash + +# This file must be sourced in bash: +# +# source `which env_parallel.bash` +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later +# shellcheck disable=SC2006 + +env_parallel() { + # env_parallel.bash + + _names_of_ALIASES() { + # No aliases will return false. This error should be ignored. + compgen -a || true + } + _bodies_of_ALIASES() { + local _i + for _i in "$@"; do + # shellcheck disable=SC2046 + if [ $(alias "$_i" | wc -l) == 1 ] ; then + true Alias is a single line. Good. + else + _warning_PAR "Alias '$_i' contains newline." + _warning_PAR "Make sure the command has at least one newline after '$_i'." + _warning_PAR "See BUGS in 'man env_parallel'." + fi + done + alias "$@" + } + _names_of_FUNCTIONS() { + compgen -A function + } + _bodies_of_FUNCTIONS() { + typeset -f "$@" + } + _names_of_VARIABLES() { + compgen -A variable + } + _bodies_of_VARIABLES() { + typeset -p "$@" + } + _ignore_HARDCODED() { + # Copying $RANDOM will cause it not to be random + # The rest cannot be detected as read-only + echo '(RANDOM|_|TIMEOUT|GROUPS|FUNCNAME|DIRSTACK|PIPESTATUS|USERNAME|BASHPID|BASH_[A-Z_]+)' + } + _ignore_READONLY() { + # shellcheck disable=SC1078,SC1079,SC2026 + readonly | perl -e '@r = map { + chomp; + # sh on UnixWare: readonly TIMEOUT + # ash: readonly var='val' + # ksh: var='val' + # mksh: PIPESTATUS[0] + s/^(readonly )?([^=\[ ]*?)(\[\d+\])?(=.*|)$/$2/ or + # bash: declare -ar BASH_VERSINFO=([0]="4" [1]="4") + # zsh: typeset -r var='val' + s/^\S+\s+\S+\s+(\S[^=]*)(=.*|$)/$1/; + $_ } <>; + $vars = join "|",map { quotemeta $_ } @r; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' + } + _remove_bad_NAMES() { + # Do not transfer vars and funcs from env_parallel + # shellcheck disable=SC2006 + _ignore_RO="`_ignore_READONLY`" + # shellcheck disable=SC2006 + _ignore_HARD="`_ignore_HARDCODED`" + # Macos-grep does not like long patterns + # Old Solaris grep does not support -E + # Perl Version of: + # grep -Ev '^(...)$' | + perl -ne '/^( + PARALLEL_ENV| + PARALLEL_TMP| + _alias_NAMES| + _bodies_of_ALIASES| + _bodies_of_FUNCTIONS| + _bodies_of_VARIABLES| + _error_PAR| + _function_NAMES| + _get_ignored_VARS| + _grep_REGEXP| + _ignore_HARD| + _ignore_HARDCODED| + _ignore_READONLY| + _ignore_RO| + _ignore_UNDERSCORE| + _list_alias_BODIES| + _list_function_BODIES| + _list_variable_VALUES| + _make_grep_REGEXP| + _names_of_ALIASES| + _names_of_FUNCTIONS| + _names_of_VARIABLES| + _names_of_maybe_FUNCTIONS| + _parallel_exit_CODE| + _prefix_PARALLEL_ENV| + _prefix_PARALLEL_ENV| + _remove_bad_NAMES| + _remove_readonly| + _variable_NAMES| + _warning_PAR| + _which_PAR)$/x and next; + # Filter names matching --env + /^'"$_grep_REGEXP"'$/ or next; + /^'"$_ignore_UNDERSCORE"'$/ and next; + # Remove readonly variables + /^'"$_ignore_RO"'$/ and next; + /^'"$_ignore_HARD"'$/ and next; + print;' + } + _prefix_PARALLEL_ENV() { + shopt 2>/dev/null | + perl -pe 's:\s+off:;: and s/^/shopt -u /; + s:\s+on:;: and s/^/shopt -s /; + s:;$:&>/dev/null;:'; + echo 'shopt -s expand_aliases &>/dev/null'; + } + + _get_ignored_VARS() { + perl -e ' + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + } + } + if($ENV{PARALLEL_IGNORED_NAMES}) { + push @ignored_vars, split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + chomp @ignored_vars; + } + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' -- "$@" + } + + # Get the --env variables if set + # --env _ should be ignored + # and convert a b c to (a|b|c) + # If --env not set: Match everything (.*) + _make_grep_REGEXP() { + perl -e ' + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; + ' -- "$@" + } + _which_PAR() { + # type returns: + # ll is an alias for ls -l (in ash) + # bash is a tracked alias for /bin/bash + # true is a shell builtin (in bash) + # myfunc is a function (in bash) + # myfunc is a shell function (in zsh) + # which is /usr/bin/which (in sh, bash) + # which is hashed (/usr/bin/which) + # gi is aliased to `grep -i' (in bash) + # aliased to `alias | /usr/bin/which --tty-only --read-alias --show-dot --show-tilde' + # Return 0 if found, 1 otherwise + LANG=C type "$@" | + perl -pe '$exit += (s/ is an alias for .*// || + s/ is aliased to .*// || + s/ is a function// || + s/ is a shell function// || + s/ is a shell builtin// || + s/.* is hashed .(\S+).$/$1/ || + s/.* is (a tracked alias for )?//); + END { exit not $exit }' + } + _warning_PAR() { + echo "env_parallel: Warning: $*" >&2 + } + _error_PAR() { + echo "env_parallel: Error: $*" >&2 + } + + # Bash is broken in version 3.2.25 and 4.2.39 + # The crazy '[ "`...`" == "" ]' is needed for the same reason + if [ "`_which_PAR parallel`" == "" ]; then + # shellcheck disable=SC2016 + _error_PAR 'parallel must be in $PATH.' + return 255 + fi + + # Grep regexp for vars given by --env + # shellcheck disable=SC2006 + _grep_REGEXP="`_make_grep_REGEXP \"$@\"`" + unset _make_grep_REGEXP + + # Deal with --env _ + # shellcheck disable=SC2006 + _ignore_UNDERSCORE="`_get_ignored_VARS \"$@\"`" + unset _get_ignored_VARS + + # --record-env + # Bash is broken in version 3.2.25 and 4.2.39 + # The crazy '[ "`...`" == 0 ]' is needed for the same reason + if [ "`perl -e 'exit grep { /^--record-env$/ } @ARGV' -- "$@"; echo $?`" == 0 ] ; then + true skip + else + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | + cat > "$HOME"/.parallel/ignored_vars + return 0 + fi + + # --session + # Bash is broken in version 3.2.25 and 4.2.39 + # The crazy '[ "`...`" == 0 ]' is needed for the same reason + if [ "`perl -e 'exit grep { /^--session$/ } @ARGV' -- "$@"; echo $?`" == 0 ] ; then + true skip + else + # Insert ::: between each level of session + # so you can pop off the last ::: at --end-session + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`echo \"$PARALLEL_IGNORED_NAMES\"; + echo :::; + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | perl -ne ' + BEGIN{ + map { $ignored_vars{$_}++ } + split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + } + chomp; + for(split/\s+/) { + if(not $ignored_vars{$_}) { + print $_,\"\\n\"; + } + } + '`" + export PARALLEL_IGNORED_NAMES + return 0 + fi + # Bash is broken in version 3.2.25 and 4.2.39 + # The crazy '[ "`...`" == 0 ]' is needed for the same reason + if [ "`perl -e 'exit grep { /^--end.?session$/ } @ARGV' -- "$@"; echo $?`" == 0 ] ; then + true skip + else + # Pop off last ::: from PARALLEL_IGNORED_NAMES + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`perl -e ' + $ENV{PARALLEL_IGNORED_NAMES} =~ s/(.*):::.*?$/$1/s; + print $ENV{PARALLEL_IGNORED_NAMES} + '`" + return 0 + fi + # Grep alias names + # shellcheck disable=SC2006 + _alias_NAMES="`_names_of_ALIASES | _remove_bad_NAMES | xargs echo`" + _list_alias_BODIES="_bodies_of_ALIASES $_alias_NAMES" + if [ "$_alias_NAMES" = "" ] ; then + # no aliases selected + _list_alias_BODIES="true" + fi + unset _alias_NAMES + + # Grep function names + # shellcheck disable=SC2006 + _function_NAMES="`_names_of_FUNCTIONS | _remove_bad_NAMES | xargs echo`" + _list_function_BODIES="_bodies_of_FUNCTIONS $_function_NAMES" + if [ "$_function_NAMES" = "" ] ; then + # no functions selected + _list_function_BODIES="true" + fi + unset _function_NAMES + + # Grep variable names + # shellcheck disable=SC2006 + _variable_NAMES="`_names_of_VARIABLES | _remove_bad_NAMES | xargs echo`" + _list_variable_VALUES="_bodies_of_VARIABLES $_variable_NAMES" + if [ "$_variable_NAMES" = "" ] ; then + # no variables selected + _list_variable_VALUES="true" + fi + unset _variable_NAMES + + _which_TRUE="`_which_PAR true`" + # Copy shopt (so e.g. extended globbing works) + # But force expand_aliases as aliases otherwise do not work + PARALLEL_ENV="` + _prefix_PARALLEL_ENV + $_list_alias_BODIES; + $_list_function_BODIES; + $_list_variable_VALUES; + `" + export PARALLEL_ENV + unset _list_alias_BODIES _list_variable_VALUES _list_function_BODIES + unset _bodies_of_ALIASES _bodies_of_VARIABLES _bodies_of_FUNCTIONS + unset _names_of_ALIASES _names_of_VARIABLES _names_of_FUNCTIONS + unset _ignore_HARDCODED _ignore_READONLY _ignore_UNDERSCORE + unset _remove_bad_NAMES _grep_REGEXP + unset _prefix_PARALLEL_ENV + # Test if environment is too big + if [ "`_which_PAR true`" == "$_which_TRUE" ] ; then + parallel "$@" + _parallel_exit_CODE=$? + # Clean up variables/functions + unset PARALLEL_ENV + unset _which_PAR _which_TRUE + unset _warning_PAR _error_PAR + # Unset _parallel_exit_CODE before return + eval "unset _parallel_exit_CODE; return $_parallel_exit_CODE" + else + unset PARALLEL_ENV; + _error_PAR "Your environment is too big." + _error_PAR "You can try 3 different approaches:" + _error_PAR "1. Run 'env_parallel --session' before you set" + _error_PAR " variables or define functions." + _error_PAR "2. Use --env and only mention the names to copy." + _error_PAR "3. Try running this in a clean environment once:" + _error_PAR " env_parallel --record-env" + _error_PAR " And then use '--env _'" + _error_PAR "For details see: man env_parallel" + return 255 + fi +} + +parset() { + _parset_PARALLEL_PRG=parallel + _parset_main "$@" +} + +env_parset() { + _parset_PARALLEL_PRG=env_parallel + _parset_main "$@" +} + +_parset_main() { + # If $1 contains ',' or space: + # Split on , to get the destination variable names + # If $1 is a single destination variable name: + # Treat it as the name of an array + # + # # Create array named myvar + # parset myvar echo ::: {1..10} + # echo ${myvar[5]} + # + # # Put output into $var_a $var_b $var_c + # varnames=(var_a var_b var_c) + # parset "${varnames[*]}" echo ::: {1..3} + # echo $var_c + # + # # Put output into $var_a4 $var_b4 $var_c4 + # parset "var_a4 var_b4 var_c4" echo ::: {1..3} + # echo $var_c4 + + _parset_NAME="$1" + if [ "$_parset_NAME" = "" ] ; then + echo parset: Error: No destination variable given. >&2 + echo parset: Error: Try: >&2 + echo parset: Error: ' ' parset myarray echo ::: foo bar >&2 + return 255 + fi + if [ "$_parset_NAME" = "--help" ] ; then + echo parset: Error: Usage: >&2 + echo parset: Error: ' ' parset varname GNU Parallel options and command >&2 + echo + parallel --help + return 255 + fi + if [ "$_parset_NAME" = "--version" ] ; then + # shellcheck disable=SC2006 + echo "parset 20221122 (GNU parallel `parallel --minversion 1`)" + echo "Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free Software" + echo "Foundation, Inc." + echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>" + echo "This is free software: you are free to change and redistribute it." + echo "GNU parallel comes with no warranty." + echo + echo "Web site: https://www.gnu.org/software/parallel" + echo + echo "When using programs that use GNU Parallel to process data for publication" + echo "please cite as described in 'parallel --citation'." + echo + return 255 + fi + shift + + # Bash: declare -A myassoc=( ) + # Zsh: typeset -A myassoc=( ) + # Ksh: typeset -A myassoc=( ) + # shellcheck disable=SC2039,SC2169 + if (typeset -p "$_parset_NAME" 2>/dev/null; echo) | + perl -ne 'exit not (/^declare[^=]+-A|^typeset[^=]+-A/)' ; then + # This is an associative array + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset assoc,"$_parset_NAME" "$@"`" + # The eval returns the function! + else + # This is a normal array or a list of variable names + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset var,"$_parset_NAME" "$@"`" + # The eval returns the function! + fi +} diff --git a/src/env_parallel.csh b/src/env_parallel.csh new file mode 100755 index 0000000..b36e4be --- /dev/null +++ b/src/env_parallel.csh @@ -0,0 +1,142 @@ +#!/usr/bin/env csh + +# This file must be sourced in csh: +# +# source `which env_parallel.csh` +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +set _parallel_exit_CODE=0 +if ("`alias env_parallel`" == '' || ! $?PARALLEL_CSH) then + # Activate alias + alias env_parallel '(setenv PARALLEL_CSH "\!*"; source `which env_parallel.csh`)' +else + # Get the --env variables if set + # --env _ should be ignored + # and convert a b c to (a|b|c) + # If --env not set: Match everything (.*) + + # simple 'tempfile': Return nonexisting filename: /tmp/parXXXXX + alias _tempfile 'perl -e do\{\$t\=\"/tmp/par\".join\"\",map\{\(0..9,\"a\"..\"z\",\"A\"..\"Z\"\)\[rand\(62\)\]\}\(1..5\)\;\}while\(-e\$t\)\;print\"\$t\\n\"' + set _tMpscRIpt=`_tempfile` + + cat <<'EOF' > $_tMpscRIpt + #!/usr/bin/perl + + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; +'EOF' + set _grep_REGEXP="`perl $_tMpscRIpt -- $PARALLEL_CSH`" + + # Deal with --env _ + cat <<'EOF' > $_tMpscRIpt + #!/usr/bin/perl + + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + } + } +'EOF' + set _ignore_UNDERSCORE="`perl $_tMpscRIpt -- $PARALLEL_CSH`" + rm $_tMpscRIpt + + # Get the scalar and array variable names + set _vARnAmES=(`set | perl -ne 's/\s.*//; /^(#|_|killring|prompt2|command|PARALLEL_ENV|PARALLEL_TMP)$/ and next; /^'"$_grep_REGEXP"'$/ or next; /^'"$_ignore_UNDERSCORE"'$/ and next; print'`) + + # Make a tmpfile for the variable definitions + set _tMpvARfILe=`_tempfile` + touch $_tMpvARfILe + # Make a tmpfile for the variable definitions + alias + set _tMpaLLfILe=`_tempfile` + foreach _vARnAmE ($_vARnAmES); + # These 3 lines break in csh ver. 20110502-3 + # if not defined: next + eval if'(! $?'$_vARnAmE') continue' + # if $#myvar <= 1 echo scalar_myvar=$var + eval if'(${#'$_vARnAmE'} <= 1) echo scalar_'$_vARnAmE'='\"\$$_vARnAmE\" >> $_tMpvARfILe; + # if $#myvar > 1 echo array_myvar=$var + eval if'(${#'$_vARnAmE'} > 1) echo array_'$_vARnAmE'="$'$_vARnAmE'"' >> $_tMpvARfILe; + end + unset _vARnAmE _vARnAmES + # shell quote variables (--plain needed due to ignore if $PARALLEL is set) + # Convert 'scalar_myvar=...' to 'set myvar=...' + # Convert 'array_myvar=...' to 'set array=(...)' + cat $_tMpvARfILe | parallel --plain --shellquote | perl -pe 's/^scalar_(\S+).=/set $1=/ or s/^array_(\S+).=(.*)/set $1=($2)/ && s/\\ / /g;' > $_tMpaLLfILe + # Cleanup + rm $_tMpvARfILe; unset _tMpvARfILe + +# ALIAS TO EXPORT ALIASES: + +# Quote ' by putting it inside " +# s/'/'"'"'/g; +# ' => \047 " => \042 +# s/\047/\047\042\047\042\047/g; +# Quoted: s/\\047/\\047\\042\\047\\042\\047/g\; + +# Remove () from second column +# s/^(\S+)(\s+)\((.*)\)/\1\2\3/; +# Quoted: s/\^\(\\S+\)\(\\s+\)\\\(\(.\*\)\\\)/\\1\\2\\3/\; + +# Add ' around second column +# s/^(\S+)(\s+)(.*)/\1\2'\3'/ +# \047 => ' +# s/^(\S+)(\s+)(.*)/\1\2\047\3\047/; +# Quoted: s/\^\(\\S+\)\(\\s+\)\(.\*\)/\\1\\2\\047\\3\\047/\; + +# Quote ! as \! +# s/\!/\\\!/g; +# Quoted: s/\\\!/\\\\\\\!/g; + +# Prepend with "\nalias " +# s/^/\001alias /; +# Quoted: s/\^/\\001alias\ /\; + alias | \ + perl -ne '/^'"$_grep_REGEXP"'/ or next; /^'"$_ignore_UNDERSCORE"'[^_a-zA-Z]/ and next; print' | \ + perl -pe s/\\047/\\047\\042\\047\\042\\047/g\;s/\^\(\\S+\)\(\\s+\)\\\(\(.\*\)\\\)/\\1\\2\\3/\;s/\^\(\\S+\)\(\\s+\)\(.\*\)/\\1\\2\\047\\3\\047/\;s/\^/\\001alias\ /\;s/\\\!/\\\\\\\!/g >> $_tMpaLLfILe + + setenv PARALLEL_ENV "`cat $_tMpaLLfILe; rm $_tMpaLLfILe`"; + unset _tMpaLLfILe; + # Use $PARALLEL_CSH set in calling alias + parallel + set _parallel_exit_CODE=$status + setenv PARALLEL_ENV + setenv PARALLEL_CSH +endif +(exit $_parallel_exit_CODE) diff --git a/src/env_parallel.dash b/src/env_parallel.dash new file mode 100755 index 0000000..e73991d --- /dev/null +++ b/src/env_parallel.dash @@ -0,0 +1,430 @@ +#!/usr/bin/env dash + +# This file must be sourced in dash: +# +# . `which env_parallel.dash` +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later +# shellcheck disable=SC2006 + +env_parallel() { + # based on env_parallel.sh + + _names_of_ALIASES() { + # alias fails on Unixware 5 + for _i in `alias 2>/dev/null | perl -ne 's/^alias //;s/^(\S+)=.*/$1/ && print' 2>/dev/null`; do + # Check if this name really is an alias + # or just part of a multiline alias definition + if alias "$_i" >/dev/null 2>/dev/null; then + echo "$_i" + fi + done + } + _bodies_of_ALIASES() { + # alias may return: + # myalias='definition' (GNU/Linux ash) + # alias myalias='definition' (FreeBSD ash) + # so remove 'alias ' from first line + for _i in "$@"; do + echo 'alias '"`alias "$_i" | perl -pe '1..1 and s/^alias //'`" + done + } + _names_of_maybe_FUNCTIONS() { + set | perl -ne '/^([A-Z_0-9]+)\s*\(\)\s*\{?$/i and print "$1\n"' + } + _names_of_FUNCTIONS() { + # myfunc is a function + # shellcheck disable=SC2046 + LANG=C type `_names_of_maybe_FUNCTIONS` | + perl -ne '/^(\S+) is a function$/ and not $seen{$1}++ and print "$1\n"' + } + _bodies_of_FUNCTIONS() { + LANG=C type "$@" | perl -ne '/^(\S+) is a function$/ or print' + } + _names_of_VARIABLES() { + # This may screw up if variables contain \n and = + set | perl -ne 's/^(\S+?)=.*/$1/ and print;' + } + _bodies_of_VARIABLES() { + # Crappy typeset -p + for _i in "$@" + do + perl -e 'print @ARGV' "$_i=" + eval echo "\"\$$_i\"" | perl -e '$/=undef; $a=<>; chop($a); print $a' | + perl -pe 's/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\202-\377]/\\$&/go;'"s/'/\\\'/g; s/[\n]/'\\n'/go;"; + echo + done + } + _ignore_HARDCODED() { + # These names cannot be detected + echo '(_|TIMEOUT|IFS)' + } + _ignore_READONLY() { + # shellcheck disable=SC1078,SC1079,SC2026 + readonly | perl -e '@r = map { + chomp; + # sh on UnixWare: readonly TIMEOUT + # ash: readonly var='val' + # ksh: var='val' + # mksh: PIPESTATUS[0] + s/^(readonly )?([^=\[ ]*?)(\[\d+\])?(=.*|)$/$2/ or + # bash: declare -ar BASH_VERSINFO=([0]="4" [1]="4") + # zsh: typeset -r var='val' + s/^\S+\s+\S+\s+(\S[^=]*)(=.*|$)/$1/; + $_ } <>; + $vars = join "|",map { quotemeta $_ } @r; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' + } + _remove_bad_NAMES() { + # Do not transfer vars and funcs from env_parallel + # shellcheck disable=SC2006 + _ignore_RO="`_ignore_READONLY`" + # shellcheck disable=SC2006 + _ignore_HARD="`_ignore_HARDCODED`" + # Macos-grep does not like long patterns + # Old Solaris grep does not support -E + # Perl Version of: + # grep -Ev '^(...)$' | + perl -ne '/^( + PARALLEL_ENV| + PARALLEL_TMP| + _alias_NAMES| + _bodies_of_ALIASES| + _bodies_of_FUNCTIONS| + _bodies_of_VARIABLES| + _error_PAR| + _function_NAMES| + _get_ignored_VARS| + _grep_REGEXP| + _ignore_HARD| + _ignore_HARDCODED| + _ignore_READONLY| + _ignore_RO| + _ignore_UNDERSCORE| + _list_alias_BODIES| + _list_function_BODIES| + _list_variable_VALUES| + _make_grep_REGEXP| + _names_of_ALIASES| + _names_of_FUNCTIONS| + _names_of_VARIABLES| + _names_of_maybe_FUNCTIONS| + _parallel_exit_CODE| + _prefix_PARALLEL_ENV| + _prefix_PARALLEL_ENV| + _remove_bad_NAMES| + _remove_readonly| + _variable_NAMES| + _warning_PAR| + _which_PAR)$/x and next; + # Filter names matching --env + /^'"$_grep_REGEXP"'$/ or next; + /^'"$_ignore_UNDERSCORE"'$/ and next; + # Remove readonly variables + /^'"$_ignore_RO"'$/ and next; + /^'"$_ignore_HARD"'$/ and next; + print;' + } + _get_ignored_VARS() { + perl -e ' + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + } + } + if($ENV{PARALLEL_IGNORED_NAMES}) { + push @ignored_vars, split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + chomp @ignored_vars; + } + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' -- "$@" + } + + # Get the --env variables if set + # --env _ should be ignored + # and convert a b c to (a|b|c) + # If --env not set: Match everything (.*) + _make_grep_REGEXP() { + perl -e ' + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; + ' -- "$@" + } + _which_PAR() { + # type returns: + # ll is an alias for ls -l (in ash) + # bash is a tracked alias for /bin/bash + # true is a shell builtin (in bash) + # myfunc is a function (in bash) + # myfunc is a shell function (in zsh) + # which is /usr/bin/which (in sh, bash) + # which is hashed (/usr/bin/which) + # gi is aliased to `grep -i' (in bash) + # aliased to `alias | /usr/bin/which --tty-only --read-alias --show-dot --show-tilde' + # Return 0 if found, 1 otherwise + LANG=C type "$@" | + perl -pe '$exit += (s/ is an alias for .*// || + s/ is aliased to .*// || + s/ is a function// || + s/ is a shell function// || + s/ is a shell builtin// || + s/.* is hashed .(\S+).$/$1/ || + s/.* is (a tracked alias for )?//); + END { exit not $exit }' + } + _warning_PAR() { + echo "env_parallel: Warning: $*" >&2 + } + _error_PAR() { + echo "env_parallel: Error: $*" >&2 + } + + if _which_PAR parallel >/dev/null; then + true parallel found in path + else + # shellcheck disable=SC2016 + _error_PAR 'parallel must be in $PATH.' + return 255 + fi + + # Grep regexp for vars given by --env + # shellcheck disable=SC2006 + _grep_REGEXP="`_make_grep_REGEXP \"$@\"`" + unset _make_grep_REGEXP + + # Deal with --env _ + # shellcheck disable=SC2006 + _ignore_UNDERSCORE="`_get_ignored_VARS \"$@\"`" + unset _get_ignored_VARS + + # --record-env + if perl -e 'exit grep { /^--record-env$/ } @ARGV' -- "$@"; then + true skip + else + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | + cat > "$HOME"/.parallel/ignored_vars + return 0 + fi + + # --session + if perl -e 'exit grep { /^--session$/ } @ARGV' -- "$@"; then + true skip + else + # Insert ::: between each level of session + # so you can pop off the last ::: at --end-session + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`echo \"$PARALLEL_IGNORED_NAMES\"; + echo :::; + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | perl -ne ' + BEGIN{ + map { $ignored_vars{$_}++ } + split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + } + chomp; + for(split/\s+/) { + if(not $ignored_vars{$_}) { + print $_,\"\\n\"; + } + } + '`" + export PARALLEL_IGNORED_NAMES + return 0 + fi + if perl -e 'exit grep { /^--end.?session$/ } @ARGV' -- "$@"; then + true skip + else + # Pop off last ::: from PARALLEL_IGNORED_NAMES + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`perl -e ' + $ENV{PARALLEL_IGNORED_NAMES} =~ s/(.*):::.*?$/$1/s; + print $ENV{PARALLEL_IGNORED_NAMES} + '`" + return 0 + fi + # Grep alias names + # shellcheck disable=SC2006 + _alias_NAMES="`_names_of_ALIASES | _remove_bad_NAMES | xargs echo`" + _list_alias_BODIES="_bodies_of_ALIASES $_alias_NAMES" + if [ "$_alias_NAMES" = "" ] ; then + # no aliases selected + _list_alias_BODIES="true" + fi + unset _alias_NAMES + + # Grep function names + # shellcheck disable=SC2006 + _function_NAMES="`_names_of_FUNCTIONS | _remove_bad_NAMES | xargs echo`" + _list_function_BODIES="_bodies_of_FUNCTIONS $_function_NAMES" + if [ "$_function_NAMES" = "" ] ; then + # no functions selected + _list_function_BODIES="true" + fi + unset _function_NAMES + + # Grep variable names + # shellcheck disable=SC2006 + _variable_NAMES="`_names_of_VARIABLES | _remove_bad_NAMES | xargs echo`" + _list_variable_VALUES="_bodies_of_VARIABLES $_variable_NAMES" + if [ "$_variable_NAMES" = "" ] ; then + # no variables selected + _list_variable_VALUES="true" + fi + unset _variable_NAMES + + # shellcheck disable=SC2006 + PARALLEL_ENV="` + $_list_alias_BODIES; + $_list_function_BODIES; + $_list_variable_VALUES; + `" + export PARALLEL_ENV + unset _list_alias_BODIES _list_variable_VALUES _list_function_BODIES + unset _bodies_of_ALIASES _bodies_of_VARIABLES _bodies_of_FUNCTIONS + unset _names_of_ALIASES _names_of_VARIABLES _names_of_FUNCTIONS + unset _ignore_HARDCODED _ignore_READONLY _ignore_UNDERSCORE + unset _remove_bad_NAMES _grep_REGEXP + unset _prefix_PARALLEL_ENV + # Test if environment is too big by running 'true' + # shellcheck disable=SC2006,SC2092 + if `_which_PAR true` >/dev/null 2>/dev/null ; then + parallel "$@" + _parallel_exit_CODE=$? + # Clean up variables/functions + unset PARALLEL_ENV + unset _which_PAR _which_TRUE + unset _warning_PAR _error_PAR + # Unset _parallel_exit_CODE before return + eval "unset _parallel_exit_CODE; return $_parallel_exit_CODE" + else + unset PARALLEL_ENV; + _error_PAR "Your environment is too big." + _error_PAR "You can try 3 different approaches:" + _error_PAR "1. Run 'env_parallel --session' before you set" + _error_PAR " variables or define functions." + _error_PAR "2. Use --env and only mention the names to copy." + _error_PAR "3. Try running this in a clean environment once:" + _error_PAR " env_parallel --record-env" + _error_PAR " And then use '--env _'" + _error_PAR "For details see: man env_parallel" + return 255 + fi +} + +parset() { + _parset_PARALLEL_PRG=parallel + _parset_main "$@" +} + +env_parset() { + _parset_PARALLEL_PRG=env_parallel + _parset_main "$@" +} + +_parset_main() { + # If $1 contains ',' or space: + # Split on , to get the destination variable names + # If $1 is a single destination variable name: + # Treat it as the name of an array + # + # # Create array named myvar + # parset myvar echo ::: {1..10} + # echo ${myvar[5]} + # + # # Put output into $var_a $var_b $var_c + # varnames=(var_a var_b var_c) + # parset "${varnames[*]}" echo ::: {1..3} + # echo $var_c + # + # # Put output into $var_a4 $var_b4 $var_c4 + # parset "var_a4 var_b4 var_c4" echo ::: {1..3} + # echo $var_c4 + + _parset_NAME="$1" + if [ "$_parset_NAME" = "" ] ; then + echo parset: Error: No destination variable given. >&2 + echo parset: Error: Try: >&2 + echo parset: Error: ' ' parset myarray echo ::: foo bar >&2 + return 255 + fi + if [ "$_parset_NAME" = "--help" ] ; then + echo parset: Error: Usage: >&2 + echo parset: Error: ' ' parset varname GNU Parallel options and command >&2 + echo + parallel --help + return 255 + fi + if [ "$_parset_NAME" = "--version" ] ; then + # shellcheck disable=SC2006 + echo "parset 20221122 (GNU parallel `parallel --minversion 1`)" + echo "Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free Software" + echo "Foundation, Inc." + echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>" + echo "This is free software: you are free to change and redistribute it." + echo "GNU parallel comes with no warranty." + echo + echo "Web site: https://www.gnu.org/software/parallel" + echo + echo "When using programs that use GNU Parallel to process data for publication" + echo "please cite as described in 'parallel --citation'." + echo + return 255 + fi + shift + + # Bash: declare -A myassoc=( ) + # Zsh: typeset -A myassoc=( ) + # Ksh: typeset -A myassoc=( ) + # shellcheck disable=SC2039,SC2169 + if (typeset -p "$_parset_NAME" 2>/dev/null; echo) | + perl -ne 'exit not (/^declare[^=]+-A|^typeset[^=]+-A/)' ; then + # This is an associative array + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset assoc,"$_parset_NAME" "$@"`" + # The eval returns the function! + else + # This is a normal array or a list of variable names + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset var,"$_parset_NAME" "$@"`" + # The eval returns the function! + fi +} diff --git a/src/env_parallel.fish b/src/env_parallel.fish new file mode 100755 index 0000000..f4568e7 --- /dev/null +++ b/src/env_parallel.fish @@ -0,0 +1,194 @@ +#!/usr/bin/env fish + +# This file must be sourced in fish: +# +# . (which env_parallel.fish) +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +# If you are a fisherman feel free to improve the code +# +# The code needs to deal with variables like: +# set funky (perl -e 'print pack "c*", 2..254') +# +# Problem: +# Tell the difference between: +# set tmp "a' 'b' 'c" +# set tmparr1 "a' 'b" 'c' +# set tmparr2 'a' "b' 'c" +# The output from `set` is exactly the same. +# Solution: +# for-loop for each variable. Each value is separated with a +# separator. + +function env_parallel + # env_parallel.fish + + # --session + perl -e 'exit grep { /^--session/ } @ARGV' -- $argv; or begin; + setenv PARALLEL_IGNORED_NAMES ( + begin; + functions -n + set -n; + end | perl -pe 's/\n/,/g'; + ) + return 0 + end; + setenv PARALLEL_ENV ( + begin; + set _grep_REGEXP ( + begin; + perl -e ' + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; + ' -- $argv; + end; + ) + # Deal with --env _ + set _ignore_UNDERSCORE ( + begin; + perl -e ' + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + } + } + if($ENV{PARALLEL_IGNORED_NAMES}) { + push @ignored_vars, split/,/, $ENV{PARALLEL_IGNORED_NAMES}; + chomp @ignored_vars; + } + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' -- $argv; + end; + ) + + # --record-env + perl -e 'exit grep { /^--record-env$/ } @ARGV' -- $argv; or begin; + begin; + functions -n | perl -pe 's/,/\n/g'; + set -n; + end | cat > $HOME/.parallel/ignored_vars; + end; + + # Export function definitions + # Keep the ones from --env + # Ignore the ones from ~/.parallel/ignored_vars + # Dump each function defition + # Replace \001 with \002 because \001 is used by env_parallel + # Convert \n to \001 + functions -n | perl -pe 's/,/\n/g' | \ + grep -Ev '^(PARALLEL_ENV|PARALLEL_TMP)$' | \ + grep -E "^$_grep_REGEXP"\$ | grep -vE "^$_ignore_UNDERSCORE"\$ | \ + while read d; functions $d; end | \ + perl -pe 's/\001/\002/g and not $printed++ and print STDERR + "env_parallel: Warning: ASCII value 1 in functions is not supported\n"; + s/\n/\001/g'; + # Convert scalar vars to fish \XX quoting + # Keep the ones from --env + # Ignore the ones from ~/.parallel/ignored_vars + # Ignore read only vars + # Execute 'set' of the content + eval (set -L | \ + grep -Ev '^(PARALLEL_TMP)$' | \ + grep -E "^$_grep_REGEXP " | grep -vE "^$_ignore_UNDERSCORE " | \ + perl -ne 'chomp; + ($name,$val)=split(/ /,$_,2); + $name=~/^(HOME|USER|COLUMNS|FISH_VERSION|LINES|PWD|SHLVL|_| + history|status|version)$|\./x and next; + if($val=~/^'"'"'/) { next; } + print "set $name \"\$$name\";\n"; + ') + # Generate commands to set scalar variables + # Keep the ones from --env + # Ignore the ones from ~/.parallel/ignored_vars + # + begin; + for v in (set -n | \ + grep -Ev '^(PARALLEL_TMP)$' | \ + grep -E "^$_grep_REGEXP\$" | grep -vE "^$_ignore_UNDERSCORE\$"); + # Separate variables with the string: \000 + # array_name1 val1\0 + # array_name1 val2\0 + # array_name2 val3\0 + # array_name2 val4\0 + eval "for i in \$$v; + echo -n $v \$i; + perl -e print\\\"\\\\0\\\"; + end;" + end; + # A final line to flush the last variable in Perl + perl -e print\"\\0\"; + end | perl -0 -ne ' + # Remove separator string \0 + chop; + # Split line into name and value + ($name,$val)=split(/ /,$_,2); + # Ignore read-only vars + $name=~/^(HOME|USER|COLUMNS|FISH_VERSION|LINES|PWD|SHLVL|_| + fish_pid|history|hostname|status|version)$/x and next; + # Single quote $val + if($val =~ /[^-_.+a-z0-9\/]/i) { + $val =~ s/\047/\047"\047"\047/g; # "-quote single quotes + $val = "\047$val\047"; # single-quote entire string + $val =~ s/^\047\047|\047\047$//g; # Remove unneeded '' at ends + } elsif ($val eq "") { + $val = "\047\047"; + } + + if($name ne $last and $last) { + # The $name is different, so this is a new variable. + # Print the last one. + # Separate list elements by 2 spaces + $"=" "; + print "set $last @qval;\n"; + @qval=(); + } + push @qval,$val; + $last=$name; + '| \ + perl -pe 's/\001/\002/g and not $printed++ and print STDERR + "env_parallel: Warning: ASCII value 1 in variables is not supported\n"; + s/\n/\001/g' + end; + ) + # If --record-env: exit + perl -e 'exit grep { /^--record-env$/ } @ARGV' -- $argv; and parallel $argv; + set _parallel_exit_CODE $status + set -e PARALLEL_ENV + return $_parallel_exit_CODE +end diff --git a/src/env_parallel.ksh b/src/env_parallel.ksh new file mode 100755 index 0000000..a6e3b53 --- /dev/null +++ b/src/env_parallel.ksh @@ -0,0 +1,413 @@ +#!/usr/bin/env ksh + +# This file must be sourced in ksh: +# +# source `which env_parallel.ksh` +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later +# shellcheck disable=SC2006 + +env_parallel() { + # env_parallel.ksh + + _names_of_ALIASES() { + alias | perl -pe 's/=.*//' + } + _bodies_of_ALIASES() { + alias "$@" | perl -pe 's/^/alias /; + sub warning { print STDERR "env_parallel: Warning: @_\n"; } + if(/^alias (\S+)=\$.*\\n/) { + warning("Alias \"$1\" contains newline."); + warning("Make sure the command has at least one newline after \"$1\"."); + warning("See BUGS in \"man env_parallel\"."); + }' + + } + _names_of_maybe_FUNCTIONS() { + true not used + } + _names_of_FUNCTIONS() { + typeset +p -f | perl -pe 's/\(\).*//' + } + _bodies_of_FUNCTIONS() { + functions "$@" + } + _names_of_VARIABLES() { + typeset +p | perl -pe 's/^typeset .. //' + } + _bodies_of_VARIABLES() { + typeset -p "$@" + } + _ignore_HARDCODED() { + # These names cannot be detected + echo '(_|TIMEOUT|IFS)' + } + _ignore_READONLY() { + # shellcheck disable=SC1078,SC1079,SC2026 + readonly | perl -e '@r = map { + chomp; + # sh on UnixWare: readonly TIMEOUT + # ash: readonly var='val' + # ksh: var='val' + # mksh: PIPESTATUS[0] + s/^(readonly )?([^=\[ ]*?)(\[\d+\])?(=.*|)$/$2/ or + # bash: declare -ar BASH_VERSINFO=([0]="4" [1]="4") + # zsh: typeset -r var='val' + s/^\S+\s+\S+\s+(\S[^=]*)(=.*|$)/$1/; + $_ } <>; + $vars = join "|",map { quotemeta $_ } @r; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' + } + _remove_bad_NAMES() { + # Do not transfer vars and funcs from env_parallel + # shellcheck disable=SC2006 + _ignore_RO="`_ignore_READONLY`" + # shellcheck disable=SC2006 + _ignore_HARD="`_ignore_HARDCODED`" + # Macos-grep does not like long patterns + # Old Solaris grep does not support -E + # Perl Version of: + # grep -Ev '^(...)$' | + perl -ne '/^( + PARALLEL_ENV| + PARALLEL_TMP| + _alias_NAMES| + _bodies_of_ALIASES| + _bodies_of_FUNCTIONS| + _bodies_of_VARIABLES| + _error_PAR| + _function_NAMES| + _get_ignored_VARS| + _grep_REGEXP| + _ignore_HARD| + _ignore_HARDCODED| + _ignore_READONLY| + _ignore_RO| + _ignore_UNDERSCORE| + _list_alias_BODIES| + _list_function_BODIES| + _list_variable_VALUES| + _make_grep_REGEXP| + _names_of_ALIASES| + _names_of_FUNCTIONS| + _names_of_VARIABLES| + _names_of_maybe_FUNCTIONS| + _parallel_exit_CODE| + _prefix_PARALLEL_ENV| + _prefix_PARALLEL_ENV| + _remove_bad_NAMES| + _remove_readonly| + _variable_NAMES| + _warning_PAR| + _which_PAR)$/x and next; + # Filter names matching --env + /^'"$_grep_REGEXP"'$/ or next; + /^'"$_ignore_UNDERSCORE"'$/ and next; + # Remove readonly variables + /^'"$_ignore_RO"'$/ and next; + /^'"$_ignore_HARD"'$/ and next; + print;' + } + _get_ignored_VARS() { + perl -e ' + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + } + } + if($ENV{PARALLEL_IGNORED_NAMES}) { + push @ignored_vars, split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + chomp @ignored_vars; + } + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' -- "$@" + } + + # Get the --env variables if set + # --env _ should be ignored + # and convert a b c to (a|b|c) + # If --env not set: Match everything (.*) + _make_grep_REGEXP() { + perl -e ' + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; + ' -- "$@" + } + _which_PAR() { + # type returns: + # ll is an alias for ls -l (in ash) + # bash is a tracked alias for /bin/bash + # true is a shell builtin (in bash) + # myfunc is a function (in bash) + # myfunc is a shell function (in zsh) + # which is /usr/bin/which (in sh, bash) + # which is hashed (/usr/bin/which) + # gi is aliased to `grep -i' (in bash) + # aliased to `alias | /usr/bin/which --tty-only --read-alias --show-dot --show-tilde' + # Return 0 if found, 1 otherwise + LANG=C type "$@" | + perl -pe '$exit += (s/ is an alias for .*// || + s/ is aliased to .*// || + s/ is a function// || + s/ is a shell function// || + s/ is a shell builtin// || + s/.* is hashed .(\S+).$/$1/ || + s/.* is (a tracked alias for )?//); + END { exit not $exit }' + } + _warning_PAR() { + echo "env_parallel: Warning: $*" >&2 + } + _error_PAR() { + echo "env_parallel: Error: $*" >&2 + } + + if _which_PAR parallel >/dev/null; then + true parallel found in path + else + # shellcheck disable=SC2016 + _error_PAR 'parallel must be in $PATH.' + return 255 + fi + + # Grep regexp for vars given by --env + # shellcheck disable=SC2006 + _grep_REGEXP="`_make_grep_REGEXP \"$@\"`" + unset _make_grep_REGEXP + + # Deal with --env _ + # shellcheck disable=SC2006 + _ignore_UNDERSCORE="`_get_ignored_VARS \"$@\"`" + unset _get_ignored_VARS + + # --record-env + if perl -e 'exit grep { /^--record-env$/ } @ARGV' -- "$@"; then + true skip + else + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | + cat > "$HOME"/.parallel/ignored_vars + return 0 + fi + + # --session + if perl -e 'exit grep { /^--session$/ } @ARGV' -- "$@"; then + true skip + else + # Insert ::: between each level of session + # so you can pop off the last ::: at --end-session + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`echo \"$PARALLEL_IGNORED_NAMES\"; + echo :::; + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | perl -ne ' + BEGIN{ + map { $ignored_vars{$_}++ } + split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + } + chomp; + for(split/\s+/) { + if(not $ignored_vars{$_}) { + print $_,\"\\n\"; + } + } + '`" + export PARALLEL_IGNORED_NAMES + return 0 + fi + if perl -e 'exit grep { /^--end.?session$/ } @ARGV' -- "$@"; then + true skip + else + # Pop off last ::: from PARALLEL_IGNORED_NAMES + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`perl -e ' + $ENV{PARALLEL_IGNORED_NAMES} =~ s/(.*):::.*?$/$1/s; + print $ENV{PARALLEL_IGNORED_NAMES} + '`" + return 0 + fi + # Grep alias names + # shellcheck disable=SC2006 + _alias_NAMES="`_names_of_ALIASES | _remove_bad_NAMES | xargs echo`" + _list_alias_BODIES="_bodies_of_ALIASES $_alias_NAMES" + if [ "$_alias_NAMES" = "" ] ; then + # no aliases selected + _list_alias_BODIES="true" + fi + unset _alias_NAMES + + # Grep function names + # shellcheck disable=SC2006 + _function_NAMES="`_names_of_FUNCTIONS | _remove_bad_NAMES | xargs echo`" + _list_function_BODIES="_bodies_of_FUNCTIONS $_function_NAMES" + if [ "$_function_NAMES" = "" ] ; then + # no functions selected + _list_function_BODIES="true" + fi + unset _function_NAMES + + # Grep variable names + # shellcheck disable=SC2006 + _variable_NAMES="`_names_of_VARIABLES | _remove_bad_NAMES | xargs echo`" + _list_variable_VALUES="_bodies_of_VARIABLES $_variable_NAMES" + if [ "$_variable_NAMES" = "" ] ; then + # no variables selected + _list_variable_VALUES="true" + fi + unset _variable_NAMES + + # shellcheck disable=SC2006 + PARALLEL_ENV="` + $_list_alias_BODIES; + $_list_function_BODIES; + $_list_variable_VALUES; + `" + export PARALLEL_ENV + unset _list_alias_BODIES _list_variable_VALUES _list_function_BODIES + unset _bodies_of_ALIASES _bodies_of_VARIABLES _bodies_of_FUNCTIONS + unset _names_of_ALIASES _names_of_VARIABLES _names_of_FUNCTIONS + unset _ignore_HARDCODED _ignore_READONLY _ignore_UNDERSCORE + unset _remove_bad_NAMES _grep_REGEXP + unset _prefix_PARALLEL_ENV + # Test if environment is too big by running 'true' + # shellcheck disable=SC2006,SC2092 + if `_which_PAR true` >/dev/null 2>/dev/null ; then + parallel "$@" + _parallel_exit_CODE=$? + # Clean up variables/functions + unset PARALLEL_ENV + unset _which_PAR _which_TRUE + unset _warning_PAR _error_PAR + # Unset _parallel_exit_CODE before return + eval "unset _parallel_exit_CODE; return $_parallel_exit_CODE" + else + unset PARALLEL_ENV; + _error_PAR "Your environment is too big." + _error_PAR "You can try 3 different approaches:" + _error_PAR "1. Run 'env_parallel --session' before you set" + _error_PAR " variables or define functions." + _error_PAR "2. Use --env and only mention the names to copy." + _error_PAR "3. Try running this in a clean environment once:" + _error_PAR " env_parallel --record-env" + _error_PAR " And then use '--env _'" + _error_PAR "For details see: man env_parallel" + return 255 + fi +} + +parset() { + _parset_PARALLEL_PRG=parallel + _parset_main "$@" +} + +env_parset() { + _parset_PARALLEL_PRG=env_parallel + _parset_main "$@" +} + +_parset_main() { + # If $1 contains ',' or space: + # Split on , to get the destination variable names + # If $1 is a single destination variable name: + # Treat it as the name of an array + # + # # Create array named myvar + # parset myvar echo ::: {1..10} + # echo ${myvar[5]} + # + # # Put output into $var_a $var_b $var_c + # varnames=(var_a var_b var_c) + # parset "${varnames[*]}" echo ::: {1..3} + # echo $var_c + # + # # Put output into $var_a4 $var_b4 $var_c4 + # parset "var_a4 var_b4 var_c4" echo ::: {1..3} + # echo $var_c4 + + _parset_NAME="$1" + if [ "$_parset_NAME" = "" ] ; then + echo parset: Error: No destination variable given. >&2 + echo parset: Error: Try: >&2 + echo parset: Error: ' ' parset myarray echo ::: foo bar >&2 + return 255 + fi + if [ "$_parset_NAME" = "--help" ] ; then + echo parset: Error: Usage: >&2 + echo parset: Error: ' ' parset varname GNU Parallel options and command >&2 + echo + parallel --help + return 255 + fi + if [ "$_parset_NAME" = "--version" ] ; then + # shellcheck disable=SC2006 + echo "parset 20221122 (GNU parallel `parallel --minversion 1`)" + echo "Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free Software" + echo "Foundation, Inc." + echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>" + echo "This is free software: you are free to change and redistribute it." + echo "GNU parallel comes with no warranty." + echo + echo "Web site: https://www.gnu.org/software/parallel" + echo + echo "When using programs that use GNU Parallel to process data for publication" + echo "please cite as described in 'parallel --citation'." + echo + return 255 + fi + shift + + # Bash: declare -A myassoc=( ) + # Zsh: typeset -A myassoc=( ) + # Ksh: typeset -A myassoc=( ) + # shellcheck disable=SC2039,SC2169 + if (typeset -p "$_parset_NAME" 2>/dev/null; echo) | + perl -ne 'exit not (/^declare[^=]+-A|^typeset[^=]+-A/)' ; then + # This is an associative array + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset assoc,"$_parset_NAME" "$@"`" + # The eval returns the function! + else + # This is a normal array or a list of variable names + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset var,"$_parset_NAME" "$@"`" + # The eval returns the function! + fi +} diff --git a/src/env_parallel.mksh b/src/env_parallel.mksh new file mode 100644 index 0000000..52cd1bb --- /dev/null +++ b/src/env_parallel.mksh @@ -0,0 +1,415 @@ +#!/usr/bin/env ksh + +# This file must be sourced in mksh: +# +# source `which env_parallel.mksh` +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later +# shellcheck disable=SC2006 + +env_parallel() { + # env_parallel.mksh + + _names_of_ALIASES() { + alias | perl -pe 's/=.*//' + } + _bodies_of_ALIASES() { + alias "$@" | perl -pe 's/^/alias /; + sub warning { print STDERR "env_parallel: Warning: @_\n"; } + if(/^alias (\S+)=\$.*\\n/) { + warning("Alias \"$1\" contains newline."); + warning("Make sure the command has at least one newline after \"$1\"."); + warning("See BUGS in \"man env_parallel\"."); + }' + + } + _names_of_maybe_FUNCTIONS() { + true not used + } + _names_of_FUNCTIONS() { + typeset +f + } + _bodies_of_FUNCTIONS() { + typeset -f "$@" + } + _names_of_VARIABLES() { + typeset +p | + perl -pe 's/^(type)?set( [-+][a-zA-Z0-9]*)* //; s/(\[\d+\])?=.*//' | + uniq + } + _bodies_of_VARIABLES() { + typeset -p "$@" + } + _ignore_HARDCODED() { + # These names cannot be detected + echo '(_)' + } + _ignore_READONLY() { + # shellcheck disable=SC1078,SC1079,SC2026 + readonly | perl -e '@r = map { + chomp; + # sh on UnixWare: readonly TIMEOUT + # ash: readonly var='val' + # ksh: var='val' + # mksh: PIPESTATUS[0] + s/^(readonly )?([^=\[ ]*?)(\[\d+\])?(=.*|)$/$2/ or + # bash: declare -ar BASH_VERSINFO=([0]="4" [1]="4") + # zsh: typeset -r var='val' + s/^\S+\s+\S+\s+(\S[^=]*)(=.*|$)/$1/; + $_ } <>; + $vars = join "|",map { quotemeta $_ } @r; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' + } + _remove_bad_NAMES() { + # Do not transfer vars and funcs from env_parallel + # shellcheck disable=SC2006 + _ignore_RO="`_ignore_READONLY`" + # shellcheck disable=SC2006 + _ignore_HARD="`_ignore_HARDCODED`" + # Macos-grep does not like long patterns + # Old Solaris grep does not support -E + # Perl Version of: + # grep -Ev '^(...)$' | + perl -ne '/^( + PARALLEL_ENV| + PARALLEL_TMP| + _alias_NAMES| + _bodies_of_ALIASES| + _bodies_of_FUNCTIONS| + _bodies_of_VARIABLES| + _error_PAR| + _function_NAMES| + _get_ignored_VARS| + _grep_REGEXP| + _ignore_HARD| + _ignore_HARDCODED| + _ignore_READONLY| + _ignore_RO| + _ignore_UNDERSCORE| + _list_alias_BODIES| + _list_function_BODIES| + _list_variable_VALUES| + _make_grep_REGEXP| + _names_of_ALIASES| + _names_of_FUNCTIONS| + _names_of_VARIABLES| + _names_of_maybe_FUNCTIONS| + _parallel_exit_CODE| + _prefix_PARALLEL_ENV| + _prefix_PARALLEL_ENV| + _remove_bad_NAMES| + _remove_readonly| + _variable_NAMES| + _warning_PAR| + _which_PAR)$/x and next; + # Filter names matching --env + /^'"$_grep_REGEXP"'$/ or next; + /^'"$_ignore_UNDERSCORE"'$/ and next; + # Remove readonly variables + /^'"$_ignore_RO"'$/ and next; + /^'"$_ignore_HARD"'$/ and next; + print;' + } + _get_ignored_VARS() { + perl -e ' + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + } + } + if($ENV{PARALLEL_IGNORED_NAMES}) { + push @ignored_vars, split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + chomp @ignored_vars; + } + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' -- "$@" + } + + # Get the --env variables if set + # --env _ should be ignored + # and convert a b c to (a|b|c) + # If --env not set: Match everything (.*) + _make_grep_REGEXP() { + perl -e ' + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; + ' -- "$@" + } + _which_PAR() { + # type returns: + # ll is an alias for ls -l (in ash) + # bash is a tracked alias for /bin/bash + # true is a shell builtin (in bash) + # myfunc is a function (in bash) + # myfunc is a shell function (in zsh) + # which is /usr/bin/which (in sh, bash) + # which is hashed (/usr/bin/which) + # gi is aliased to `grep -i' (in bash) + # aliased to `alias | /usr/bin/which --tty-only --read-alias --show-dot --show-tilde' + # Return 0 if found, 1 otherwise + LANG=C type "$@" | + perl -pe '$exit += (s/ is an alias for .*// || + s/ is aliased to .*// || + s/ is a function// || + s/ is a shell function// || + s/ is a shell builtin// || + s/.* is hashed .(\S+).$/$1/ || + s/.* is (a tracked alias for )?//); + END { exit not $exit }' + } + _warning_PAR() { + echo "env_parallel: Warning: $*" >&2 + } + _error_PAR() { + echo "env_parallel: Error: $*" >&2 + } + + if _which_PAR parallel >/dev/null; then + true parallel found in path + else + # shellcheck disable=SC2016 + _error_PAR 'parallel must be in $PATH.' + return 255 + fi + + # Grep regexp for vars given by --env + # shellcheck disable=SC2006 + _grep_REGEXP="`_make_grep_REGEXP \"$@\"`" + unset _make_grep_REGEXP + + # Deal with --env _ + # shellcheck disable=SC2006 + _ignore_UNDERSCORE="`_get_ignored_VARS \"$@\"`" + unset _get_ignored_VARS + + # --record-env + if perl -e 'exit grep { /^--record-env$/ } @ARGV' -- "$@"; then + true skip + else + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | + cat > "$HOME"/.parallel/ignored_vars + return 0 + fi + + # --session + if perl -e 'exit grep { /^--session$/ } @ARGV' -- "$@"; then + true skip + else + # Insert ::: between each level of session + # so you can pop off the last ::: at --end-session + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`echo \"$PARALLEL_IGNORED_NAMES\"; + echo :::; + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | perl -ne ' + BEGIN{ + map { $ignored_vars{$_}++ } + split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + } + chomp; + for(split/\s+/) { + if(not $ignored_vars{$_}) { + print $_,\"\\n\"; + } + } + '`" + export PARALLEL_IGNORED_NAMES + return 0 + fi + if perl -e 'exit grep { /^--end.?session$/ } @ARGV' -- "$@"; then + true skip + else + # Pop off last ::: from PARALLEL_IGNORED_NAMES + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`perl -e ' + $ENV{PARALLEL_IGNORED_NAMES} =~ s/(.*):::.*?$/$1/s; + print $ENV{PARALLEL_IGNORED_NAMES} + '`" + return 0 + fi + # Grep alias names + # shellcheck disable=SC2006 + _alias_NAMES="`_names_of_ALIASES | _remove_bad_NAMES | xargs echo`" + _list_alias_BODIES="_bodies_of_ALIASES $_alias_NAMES" + if [ "$_alias_NAMES" = "" ] ; then + # no aliases selected + _list_alias_BODIES="true" + fi + unset _alias_NAMES + + # Grep function names + # shellcheck disable=SC2006 + _function_NAMES="`_names_of_FUNCTIONS | _remove_bad_NAMES | xargs echo`" + _list_function_BODIES="_bodies_of_FUNCTIONS $_function_NAMES" + if [ "$_function_NAMES" = "" ] ; then + # no functions selected + _list_function_BODIES="true" + fi + unset _function_NAMES + + # Grep variable names + # shellcheck disable=SC2006 + _variable_NAMES="`_names_of_VARIABLES | _remove_bad_NAMES | xargs echo`" + _list_variable_VALUES="_bodies_of_VARIABLES $_variable_NAMES" + if [ "$_variable_NAMES" = "" ] ; then + # no variables selected + _list_variable_VALUES="true" + fi + unset _variable_NAMES + + # shellcheck disable=SC2006 + PARALLEL_ENV="` + $_list_alias_BODIES; + $_list_function_BODIES; + $_list_variable_VALUES; + `" + export PARALLEL_ENV + unset _list_alias_BODIES _list_variable_VALUES _list_function_BODIES + unset _bodies_of_ALIASES _bodies_of_VARIABLES _bodies_of_FUNCTIONS + unset _names_of_ALIASES _names_of_VARIABLES _names_of_FUNCTIONS + unset _ignore_HARDCODED _ignore_READONLY _ignore_UNDERSCORE + unset _remove_bad_NAMES _grep_REGEXP + unset _prefix_PARALLEL_ENV + # Test if environment is too big by running 'true' + # shellcheck disable=SC2006,SC2092 + if `_which_PAR true` >/dev/null 2>/dev/null ; then + parallel "$@" + _parallel_exit_CODE=$? + # Clean up variables/functions + unset PARALLEL_ENV + unset _which_PAR _which_TRUE + unset _warning_PAR _error_PAR + # Unset _parallel_exit_CODE before return + eval "unset _parallel_exit_CODE; return $_parallel_exit_CODE" + else + unset PARALLEL_ENV; + _error_PAR "Your environment is too big." + _error_PAR "You can try 3 different approaches:" + _error_PAR "1. Run 'env_parallel --session' before you set" + _error_PAR " variables or define functions." + _error_PAR "2. Use --env and only mention the names to copy." + _error_PAR "3. Try running this in a clean environment once:" + _error_PAR " env_parallel --record-env" + _error_PAR " And then use '--env _'" + _error_PAR "For details see: man env_parallel" + return 255 + fi +} + +parset() { + _parset_PARALLEL_PRG=parallel + _parset_main "$@" +} + +env_parset() { + _parset_PARALLEL_PRG=env_parallel + _parset_main "$@" +} + +_parset_main() { + # If $1 contains ',' or space: + # Split on , to get the destination variable names + # If $1 is a single destination variable name: + # Treat it as the name of an array + # + # # Create array named myvar + # parset myvar echo ::: {1..10} + # echo ${myvar[5]} + # + # # Put output into $var_a $var_b $var_c + # varnames=(var_a var_b var_c) + # parset "${varnames[*]}" echo ::: {1..3} + # echo $var_c + # + # # Put output into $var_a4 $var_b4 $var_c4 + # parset "var_a4 var_b4 var_c4" echo ::: {1..3} + # echo $var_c4 + + _parset_NAME="$1" + if [ "$_parset_NAME" = "" ] ; then + echo parset: Error: No destination variable given. >&2 + echo parset: Error: Try: >&2 + echo parset: Error: ' ' parset myarray echo ::: foo bar >&2 + return 255 + fi + if [ "$_parset_NAME" = "--help" ] ; then + echo parset: Error: Usage: >&2 + echo parset: Error: ' ' parset varname GNU Parallel options and command >&2 + echo + parallel --help + return 255 + fi + if [ "$_parset_NAME" = "--version" ] ; then + # shellcheck disable=SC2006 + echo "parset 20221122 (GNU parallel `parallel --minversion 1`)" + echo "Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free Software" + echo "Foundation, Inc." + echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>" + echo "This is free software: you are free to change and redistribute it." + echo "GNU parallel comes with no warranty." + echo + echo "Web site: https://www.gnu.org/software/parallel" + echo + echo "When using programs that use GNU Parallel to process data for publication" + echo "please cite as described in 'parallel --citation'." + echo + return 255 + fi + shift + + # Bash: declare -A myassoc=( ) + # Zsh: typeset -A myassoc=( ) + # Ksh: typeset -A myassoc=( ) + # shellcheck disable=SC2039,SC2169 + if (typeset -p "$_parset_NAME" 2>/dev/null; echo) | + perl -ne 'exit not (/^declare[^=]+-A|^typeset[^=]+-A/)' ; then + # This is an associative array + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset assoc,"$_parset_NAME" "$@"`" + # The eval returns the function! + else + # This is a normal array or a list of variable names + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset var,"$_parset_NAME" "$@"`" + # The eval returns the function! + fi +} diff --git a/src/env_parallel.pdksh b/src/env_parallel.pdksh new file mode 100755 index 0000000..b530ccf --- /dev/null +++ b/src/env_parallel.pdksh @@ -0,0 +1,183 @@ +#!/usr/bin/env pdksh + +# This file must be sourced in pdksh: +# +# source `which env_parallel.pdksh` +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +env_parallel() { + # env_parallel.pdksh + + _names_of_ALIASES() { + compgen -a + } + _bodies_of_ALIASES() { + alias "$@" | perl -pe 's/^/alias /' + } + _names_of_FUNCTIONS() { + compgen -A function + } + _bodies_of_FUNCTIONS() { + typeset -f "$@" + } + _names_of_VARIABLES() { + compgen -A variable + } + _bodies_of_VARIABLES() { + typeset -p "$@" + } + _remove_bad_NAMES() { + _tmp_READONLY="$(mktemp)" + readonly > "$_tmp_READONLY" + # Do not transfer vars and funcs from env_parallel + grep -Ev '^(_names_of_ALIASES|_bodies_of_ALIASES|_names_of_maybe_FUNCTIONS|_names_of_FUNCTIONS|_bodies_of_FUNCTIONS|_names_of_VARIABLES|_bodies_of_VARIABLES|_remove_bad_NAMES|_prefix_PARALLEL_ENV|_get_ignored_VARS|_make_grep_REGEXP|_ignore_UNDERSCORE|_alias_NAMES|_list_alias_BODIES|_function_NAMES|_list_function_BODIES|_variable_NAMES|_list_variable_VALUES|_prefix_PARALLEL_ENV|PARALLEL_ENV|PARALLEL_TMP)$' | + # Filter names matching --env + grep -E "^$_grep_REGEXP"\$ | grep -vE "^$_ignore_UNDERSCORE"\$ | + grep -vFf $_tmp_READONLY | + grep -Ev '^(PIPESTATUS)' + rm $_tmp_READONLY + unset _tmp_READONLY + } + _prefix_PARALLEL_ENV() { + shopt 2>/dev/null | + perl -pe 's:\s+off:;: and s/^/shopt -u /; + s:\s+on:;: and s/^/shopt -s /; + s:;$:&>/dev/null;:'; + echo 'shopt -s expand_aliases &>/dev/null'; + } + + _get_ignored_VARS() { + perl -e ' + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + } + } + ' -- "$@" + } + + # Get the --env variables if set + # --env _ should be ignored + # and convert a b c to (a|b|c) + # If --env not set: Match everything (.*) + _make_grep_REGEXP() { + perl -e ' + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; + ' -- "$@" + } + + if which parallel | grep 'no parallel in' >/dev/null; then + echo 'env_parallel: Error: parallel must be in $PATH.' >&2 + return 1 + fi + if which parallel >/dev/null; then + true which on linux + else + echo 'env_parallel: Error: parallel must be in $PATH.' >&2 + return 1 + fi + + # Grep regexp for vars given by --env + _grep_REGEXP="`_make_grep_REGEXP \"$@\"`" + unset _make_grep_REGEXP + + # Deal with --env _ + _ignore_UNDERSCORE="`_get_ignored_VARS \"$@\"`" + unset _get_ignored_VARS + + # --record-env + if perl -e 'exit grep { /^--record-env$/ } @ARGV' -- "$@"; then + true skip + else + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | + cat > $HOME/.parallel/ignored_vars + return 0 + fi + + # Grep alias names + _alias_NAMES="`_names_of_ALIASES | _remove_bad_NAMES | xargs echo`" + _list_alias_BODIES="_bodies_of_ALIASES $_alias_NAMES" + if [ "$_alias_NAMES" = "" ] ; then + # no aliases selected + _list_alias_BODIES="true" + fi + unset _alias_NAMES + + # Grep function names + _function_NAMES="`_names_of_FUNCTIONS | _remove_bad_NAMES | xargs echo`" + _list_function_BODIES="_bodies_of_FUNCTIONS $_function_NAMES" + if [ "$_function_NAMES" = "" ] ; then + # no functions selected + _list_function_BODIES="true" + fi + unset _function_NAMES + + # Grep variable names + _variable_NAMES="`_names_of_VARIABLES | _remove_bad_NAMES | xargs echo`" + _list_variable_VALUES="_bodies_of_VARIABLES $_variable_NAMES" + if [ "$_variable_NAMES" = "" ] ; then + # no variables selected + _list_variable_VALUES="true" + fi + unset _variable_NAMES + + # eval is needed for aliases - cannot explain why + export PARALLEL_ENV="` + eval $_list_alias_BODIES; + $_list_function_BODIES + $_list_variable_VALUES; + `"; + unset _list_alias_BODIES _list_variable_VALUES _list_function_BODIES + unset _bodies_of_ALIASES _bodies_of_VARIABLES _bodies_of_FUNCTIONS + unset _names_of_ALIASES _names_of_VARIABLES _names_of_FUNCTIONS + unset _ignore_HARDCODED _ignore_READONLY _ignore_UNDERSCORE + unset _remove_bad_NAMES _grep_REGEXP + unset _prefix_PARALLEL_ENV + `which parallel` "$@" + _parallel_exit_CODE=$? + unset PARALLEL_ENV; + unset _which_PAR _which_TRUE + unset _warning_PAR _error_PAR + # Unset _parallel_exit_CODE before return + eval "unset _parallel_exit_CODE; return $_parallel_exit_CODE" +} diff --git a/src/env_parallel.pod b/src/env_parallel.pod new file mode 100644 index 0000000..a198258 --- /dev/null +++ b/src/env_parallel.pod @@ -0,0 +1,935 @@ +#!/usr/bin/perl -w + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=encoding utf8 + +=head1 NAME + +env_parallel - export environment to GNU parallel + + +=head1 SYNOPSIS + +B<env_parallel> [--record-env|--session|--end-session] + [options for GNU Parallel] + + +=head1 DESCRIPTION + +B<env_parallel> is a shell function that exports the current +environment to GNU B<parallel>. + +If the shell function is not loaded, a dummy script will be run +instead that explains how to install the function. + +B<env_parallel> is 100 ms slower at startup than pure GNU +B<parallel>, and takes up to 30% longer to start a job (typically 15 ms). + +Due to the problem with environment space (see below) the recommended +usage is either: + + # Do --record-env into $PARALLEL_IGNORED_NAMES + env_parallel --session + + # Define whatever you want to use + myfunc() { myalias and functions $myvar work. $1.; } + alias myalias='echo Aliases' + myvar='and variables' + + # env_parallel will not export names in $PARALLEL_IGNORED_NAMES + env_parallel -S localhost myfunc ::: Hooray + +Or: + + # Record the "clean" environment (this only needs to be run once) + env_parallel --record-env + + # Optionally edit ~/.parallel/ignored_vars (only needed once) + + # Define whatever you want to use + myfunc() { myalias and functions $myvar work. $1.; } + alias myalias='echo Aliases' + myvar='and variables' + + # Use --env _ to only transfer the names not in the "empty" environment + env_parallel --env _ -S localhost myfunc ::: Hooray + +In B<csh> B<--session> is not supported: + + # Record the "clean" environment - this only needs to be run once + env_parallel --record-env + + # Optionally edit ~/.parallel/ignored_vars - only needed once + + # Define whatever you want to use + alias myalias 'echo Aliases $myvar \!*.' + set myvar='and variables' + + # Use --env _ to only transfer the names not in the "empty" environment + env_parallel --env _ -S localhost myalias ::: work + +=head2 Environment space + +By default B<env_parallel> will export all environment variables, +arrays, aliases, functions and shell options (see details for the +individual shells below). + +But this only works if the size of the current environment is smaller +than the maximal length of a command and smaller than half of the max +if running remotely. E.g. The max size of Bash's command is 128 KB, so +B<env_parallel> will fail if 'B<set | wc -c>' is bigger than 128 +KB. Technically the limit is in execve(1) which IPC::open3 uses. + +Bash completion functions are well-known for taking up well over 128 +KB of environment space and the primary reason for causing +B<env_parallel> to fail. + +Instead you can use B<--env> to specify which variables, arrays, +aliases and functions to export as this will only export those with +the given name. Or follow the recommended usage in shown in +DESCRIPTION. + + +=head1 OPTIONS + +Same as GNU B<parallel> in addition to these: + +=over 4 + +=item B<--end-session> + +Undo last B<--session> + + +=item B<--record-env> + +Record all names currently defined to be ignored every time running +B<env_parallel> in the future. + + +=item B<--session> + +Ignore all names currently defined. Aliases, variables, arrays, and +functions currently defined will not be transferred. + +But names defined I<after> running B<parallel --session> I<will> be +transferred. + +This is only valid in the running shell, and can be undone with +B<parallel --end-session>. + +You can run multiple B<--session> inside each other: + + env_parallel --session + var=not + # var is transferred + env_parallel -Slocalhost 'echo var is $var' ::: ignored + env_parallel --session + # var is not transferred + env_parallel -Slocalhost 'echo var is $var' ::: ignored + env_parallel --end-session + # var is transferred again + env_parallel -Slocalhost 'echo var is $var' ::: ignored + + + +=back + + +=head1 SUPPORTED SHELLS + +=head2 Ash + +=head3 Installation + +Put this in $HOME/.profile: + + . `which env_parallel.ash` + +E.g. by doing: + + echo '. `which env_parallel.ash`' >> $HOME/.profile + +=head3 Supported use + +B<--env> is supported to export only the variable, or alias with the +given name. Multiple B<--env>s can be given. + +B<--session> is supported. + +=over 8 + +=item aliases + + alias myecho='echo aliases' + env_parallel myecho ::: work + env_parallel -S server myecho ::: work + env_parallel --env myecho myecho ::: work + env_parallel --env myecho -S server myecho ::: work + + alias multiline='echo multiline + echo aliases' + env_parallel multiline ::: work + env_parallel -S server multiline ::: work + env_parallel --env multiline multiline ::: work + env_parallel --env multiline -S server multiline ::: work + +=item functions + + ash cannot list defined functions - thus is not supported. + +=item variables + + myvar=variables + env_parallel echo '$myvar' ::: work + env_parallel -S server echo '$myvar' ::: work + env_parallel --env myvar echo '$myvar' ::: work + env_parallel --env myvar -S server echo '$myvar' ::: work + +=item arrays + +Arrays are not supported by Ash. + +=back + +=head2 Bash + +=head3 Installation + +Put this in $HOME/.bashrc: + + . `which env_parallel.bash` + +E.g. by doing: + + echo '. `which env_parallel.bash`' >> $HOME/.bashrc + +=head3 Supported use + +B<--env> is supported to export only the variable, alias, function, or +array with the given name. Multiple B<--env>s can be given. + +B<--session> is supported. + +=over 8 + +=item aliases + + alias myecho='echo aliases' + env_parallel myecho ::: work + env_parallel -S server myecho ::: work + env_parallel --env myecho myecho ::: work + env_parallel --env myecho -S server myecho ::: work + + alias multiline='echo multiline + echo aliases' + env_parallel 'multiline {}; + echo but only when followed by a newline' ::: work + env_parallel -S server 'multiline {}; + echo but only when followed by a newline' ::: work + env_parallel --env multiline 'multiline {}; + echo but only when followed by a newline' ::: work + env_parallel --env multiline -S server 'multiline {}; + echo but only when followed by a newline' ::: work + +=item functions + + myfunc() { echo functions $*; } + env_parallel myfunc ::: work + env_parallel -S server myfunc ::: work + env_parallel --env myfunc myfunc ::: work + env_parallel --env myfunc -S server myfunc ::: work + +=item variables + + myvar=variables + env_parallel echo '$myvar' ::: work + env_parallel -S server echo '$myvar' ::: work + env_parallel --env myvar echo '$myvar' ::: work + env_parallel --env myvar -S server echo '$myvar' ::: work + +=item arrays + + myarray=(arrays work, too) + env_parallel -k echo '${myarray[{}]}' ::: 0 1 2 + env_parallel -k -S server echo '${myarray[{}]}' ::: 0 1 2 + env_parallel -k --env myarray echo '${myarray[{}]}' ::: 0 1 2 + env_parallel -k --env myarray -S server \ + echo '${myarray[{}]}' ::: 0 1 2 + +=back + +=head3 BUGS + +Due to a bug in Bash, aliases containing newlines must be followed by +a newline in the command. Some systems are not affected by this bug, +but will print a warning anyway. + +=head2 csh + +B<env_parallel> for B<csh> breaks B<$PARALLEL>, so do not use +B<$PARALLEL>. + +=head3 Installation + +Put this in $HOME/.cshrc: + + source `which env_parallel.csh` + +E.g. by doing: + + echo 'source `which env_parallel.csh`' >> $HOME/.cshrc + +=head3 Supported use + +B<--env> is supported to export only the variable, alias, or +array with the given name. Multiple B<--env>s can be given. + +=over 8 + +=item aliases + + alias myecho 'echo aliases' + env_parallel myecho ::: work + env_parallel -S server myecho ::: work + env_parallel --env myecho myecho ::: work + env_parallel --env myecho -S server myecho ::: work + +=item functions + +Not supported by B<csh>. + +=item variables + + set myvar=variables + env_parallel echo '$myvar' ::: work + env_parallel -S server echo '$myvar' ::: work + env_parallel --env myvar echo '$myvar' ::: work + env_parallel --env myvar -S server echo '$myvar' ::: work + +=item arrays with no special chars + + set myarray=(arrays work, too) + env_parallel -k echo \$'{myarray[{}]}' ::: 1 2 3 + env_parallel -k -S server echo \$'{myarray[{}]}' ::: 1 2 3 + env_parallel -k --env myarray echo \$'{myarray[{}]}' ::: 1 2 3 + env_parallel -k --env myarray -S server \ + echo \$'{myarray[{}]}' ::: 1 2 3 + +=back + + +=head2 Dash + +=head3 Installation + +Put this in $HOME/.profile: + + . `which env_parallel.dash` + +E.g. by doing: + + echo '. `which env_parallel.dash`' >> $HOME/.profile + +=head3 Supported use + +B<--env> is supported to export only the variable, or alias with the +given name. Multiple B<--env>s can be given. + +B<--session> is supported. + +=over 8 + +=item aliases + + alias myecho='echo aliases' + env_parallel myecho ::: work + env_parallel -S server myecho ::: work + env_parallel --env myecho myecho ::: work + env_parallel --env myecho -S server myecho ::: work + + alias multiline='echo multiline + echo aliases' + env_parallel multiline ::: work + env_parallel -S server multiline ::: work + env_parallel --env multiline multiline ::: work + env_parallel --env multiline -S server multiline ::: work + +=item functions + + dash cannot list defined functions - thus is not supported. + +=item variables + + myvar=variables + env_parallel echo '$myvar' ::: work + env_parallel -S server echo '$myvar' ::: work + env_parallel --env myvar echo '$myvar' ::: work + env_parallel --env myvar -S server echo '$myvar' ::: work + +=item arrays + + dash does not support arrays. + +=back + + +=head2 fish + +=head3 Installation + +Put this in $HOME/.config/fish/config.fish: + + source (which env_parallel.fish) + +E.g. by doing: + + echo 'source (which env_parallel.fish)' \ + >> $HOME/.config/fish/config.fish + +=head3 Supported use + +B<--env> is supported to export only the variable, alias, function, or +array with the given name. Multiple B<--env>s can be given. + +B<--session> is supported. + +=over 8 + +=item aliases + + alias myecho 'echo aliases' + env_parallel myecho ::: work + env_parallel -S server myecho ::: work + env_parallel --env myecho myecho ::: work + env_parallel --env myecho -S server myecho ::: work + +=item functions + + function myfunc + echo functions $argv + end + env_parallel myfunc ::: work + env_parallel -S server myfunc ::: work + env_parallel --env myfunc myfunc ::: work + env_parallel --env myfunc -S server myfunc ::: work + +=item variables + + set myvar variables + env_parallel echo '$myvar' ::: work + env_parallel -S server echo '$myvar' ::: work + env_parallel --env myvar echo '$myvar' ::: work + env_parallel --env myvar -S server echo '$myvar' ::: work + +=item arrays + + set myarray arrays work, too + env_parallel -k echo '$myarray[{}]' ::: 1 2 3 + env_parallel -k -S server echo '$myarray[{}]' ::: 1 2 3 + env_parallel -k --env myarray echo '$myarray[{}]' ::: 1 2 3 + env_parallel -k --env myarray -S server \ + echo '$myarray[{}]' ::: 1 2 3 + +=back + + +=head2 ksh + +=head3 Installation + +Put this in $HOME/.kshrc: + + source `which env_parallel.ksh` + +E.g. by doing: + + echo 'source `which env_parallel.ksh`' >> $HOME/.kshrc + +=head3 Supported use + +B<--env> is supported to export only the variable, alias, function, or +array with the given name. Multiple B<--env>s can be given. + +B<--session> is supported. + +=over 8 + +=item aliases + + alias myecho='echo aliases' + env_parallel myecho ::: work + env_parallel -S server myecho ::: work + env_parallel --env myecho myecho ::: work + env_parallel --env myecho -S server myecho ::: work + + alias multiline='echo multiline + echo aliases' + env_parallel multiline ::: work + env_parallel -S server multiline ::: work + env_parallel --env multiline multiline ::: work + env_parallel --env multiline -S server multiline ::: work + +=item functions + + myfunc() { echo functions $*; } + env_parallel myfunc ::: work + env_parallel -S server myfunc ::: work + env_parallel --env myfunc myfunc ::: work + env_parallel --env myfunc -S server myfunc ::: work + +=item variables + + myvar=variables + env_parallel echo '$myvar' ::: work + env_parallel -S server echo '$myvar' ::: work + env_parallel --env myvar echo '$myvar' ::: work + env_parallel --env myvar -S server echo '$myvar' ::: work + +=item arrays + + myarray=(arrays work, too) + env_parallel -k echo '${myarray[{}]}' ::: 0 1 2 + env_parallel -k -S server echo '${myarray[{}]}' ::: 0 1 2 + env_parallel -k --env myarray echo '${myarray[{}]}' ::: 0 1 2 + env_parallel -k --env myarray -S server \ + echo '${myarray[{}]}' ::: 0 1 2 + +=back + + +=head2 mksh + +=head3 Installation + +Put this in $HOME/.mkshrc: + + source `which env_parallel.mksh` + +E.g. by doing: + + echo 'source `which env_parallel.mksh`' >> $HOME/.mkshrc + +=head3 Supported use + +B<--env> is supported to export only the variable, alias, function, or +array with the given name. Multiple B<--env>s can be given. + +B<--session> is supported. + +=over 8 + +=item aliases + + alias myecho='echo aliases' + env_parallel myecho ::: work + env_parallel -S server myecho ::: work + env_parallel --env myecho myecho ::: work + env_parallel --env myecho -S server myecho ::: work + + alias multiline='echo multiline + echo aliases' + env_parallel multiline ::: work + env_parallel -S server multiline ::: work + env_parallel --env multiline multiline ::: work + env_parallel --env multiline -S server multiline ::: work + +=item functions + + myfunc() { echo functions $*; } + env_parallel myfunc ::: work + env_parallel -S server myfunc ::: work + env_parallel --env myfunc myfunc ::: work + env_parallel --env myfunc -S server myfunc ::: work + +=item variables + + myvar=variables + env_parallel echo '$myvar' ::: work + env_parallel -S server echo '$myvar' ::: work + env_parallel --env myvar echo '$myvar' ::: work + env_parallel --env myvar -S server echo '$myvar' ::: work + +=item arrays + + myarray=(arrays work, too) + env_parallel -k echo '${myarray[{}]}' ::: 0 1 2 + env_parallel -k -S server echo '${myarray[{}]}' ::: 0 1 2 + env_parallel -k --env myarray echo '${myarray[{}]}' ::: 0 1 2 + env_parallel -k --env myarray -S server \ + echo '${myarray[{}]}' ::: 0 1 2 + +=back + + +=head2 pdksh + +=head3 Installation + +Put this in $HOME/.profile: + + source `which env_parallel.pdksh` + +E.g. by doing: + + echo 'source `which env_parallel.pdksh`' >> $HOME/.profile + +=head3 Supported use + +B<--env> is supported to export only the variable, alias, function, or +array with the given name. Multiple B<--env>s can be given. + +B<--session> is supported. + +=over 8 + +=item aliases + + alias myecho="echo aliases"; + env_parallel myecho ::: work; + env_parallel -S server myecho ::: work; + env_parallel --env myecho myecho ::: work; + env_parallel --env myecho -S server myecho ::: work + +=item functions + + myfunc() { echo functions $*; }; + env_parallel myfunc ::: work; + env_parallel -S server myfunc ::: work; + env_parallel --env myfunc myfunc ::: work; + env_parallel --env myfunc -S server myfunc ::: work + +=item variables + + myvar=variables; + env_parallel echo "\$myvar" ::: work; + env_parallel -S server echo "\$myvar" ::: work; + env_parallel --env myvar echo "\$myvar" ::: work; + env_parallel --env myvar -S server echo "\$myvar" ::: work + +=item arrays + + myarray=(arrays work, too); + env_parallel -k echo "\${myarray[{}]}" ::: 0 1 2; + env_parallel -k -S server echo "\${myarray[{}]}" ::: 0 1 2; + env_parallel -k --env myarray echo "\${myarray[{}]}" ::: 0 1 2; + env_parallel -k --env myarray -S server \ + echo "\${myarray[{}]}" ::: 0 1 2 + +=back + + +=head2 sh + +=head3 Installation + +Put this in $HOME/.profile: + + . `which env_parallel.sh` + +E.g. by doing: + + echo '. `which env_parallel.sh`' >> $HOME/.profile + +=head3 Supported use + +B<--env> is supported to export only the variable, or alias with the +given name. Multiple B<--env>s can be given. + +B<--session> is supported. + +=over 8 + +=item aliases + + sh does not support aliases. + +=item functions + + myfunc() { echo functions $*; } + env_parallel myfunc ::: work + env_parallel -S server myfunc ::: work + env_parallel --env myfunc myfunc ::: work + env_parallel --env myfunc -S server myfunc ::: work + +=item variables + + myvar=variables + env_parallel echo '$myvar' ::: work + env_parallel -S server echo '$myvar' ::: work + env_parallel --env myvar echo '$myvar' ::: work + env_parallel --env myvar -S server echo '$myvar' ::: work + +=item arrays + + sh does not support arrays. + +=back + + +=head2 tcsh + +B<env_parallel> for B<tcsh> breaks B<$PARALLEL>, so do not use +B<$PARALLEL>. + +=head3 Installation + +Put this in $HOME/.tcshrc: + + source `which env_parallel.tcsh` + +E.g. by doing: + + echo 'source `which env_parallel.tcsh`' >> $HOME/.tcshrc + +=head3 Supported use + +B<--env> is supported to export only the variable, alias, or +array with the given name. Multiple B<--env>s can be given. + +=over 8 + +=item aliases + + alias myecho 'echo aliases' + env_parallel myecho ::: work + env_parallel -S server myecho ::: work + env_parallel --env myecho myecho ::: work + env_parallel --env myecho -S server myecho ::: work + +=item functions + +Not supported by B<tcsh>. + +=item variables + + set myvar=variables + env_parallel echo '$myvar' ::: work + env_parallel -S server echo '$myvar' ::: work + env_parallel --env myvar echo '$myvar' ::: work + env_parallel --env myvar -S server echo '$myvar' ::: work + +=item arrays with no special chars + + set myarray=(arrays work, too) + env_parallel -k echo \$'{myarray[{}]}' ::: 1 2 3 + env_parallel -k -S server echo \$'{myarray[{}]}' ::: 1 2 3 + env_parallel -k --env myarray echo \$'{myarray[{}]}' ::: 1 2 3 + env_parallel -k --env myarray -S server \ + echo \$'{myarray[{}]}' ::: 1 2 3 + +=back + + +=head2 Zsh + +=head3 Installation + +Put this in $HOME/.zshrc: + + . `which env_parallel.zsh` + +E.g. by doing: + + echo '. `which env_parallel.zsh`' >> $HOME/.zshenv + +=head3 Supported use + +B<--env> is supported to export only the variable, alias, function, or +array with the given name. Multiple B<--env>s can be given. + +B<--session> is supported. + +=over 8 + +=item aliases + + alias myecho='echo aliases' + env_parallel myecho ::: work + env_parallel -S server myecho ::: work + env_parallel --env myecho myecho ::: work + env_parallel --env myecho -S server myecho ::: work + + alias multiline='echo multiline + echo aliases' + env_parallel multiline ::: work + env_parallel -S server multiline ::: work + env_parallel --env multiline multiline ::: work + env_parallel --env multiline -S server multiline ::: work + +=item functions + + myfunc() { echo functions $*; } + env_parallel myfunc ::: work + env_parallel -S server myfunc ::: work + env_parallel --env myfunc myfunc ::: work + env_parallel --env myfunc -S server myfunc ::: work + +=item variables + + myvar=variables + env_parallel echo '$myvar' ::: work + env_parallel -S server echo '$myvar' ::: work + env_parallel --env myvar echo '$myvar' ::: work + env_parallel --env myvar -S server echo '$myvar' ::: work + +=item arrays + + myarray=(arrays work, too) + env_parallel -k echo '${myarray[{}]}' ::: 1 2 3 + env_parallel -k -S server echo '${myarray[{}]}' ::: 1 2 3 + env_parallel -k --env myarray echo '${myarray[{}]}' ::: 1 2 3 + env_parallel -k --env myarray -S server \ + echo '${myarray[{}]}' ::: 1 2 3 + +=back + + +=head1 EXIT STATUS + +Same as GNU B<parallel>. + + +=head1 AUTHOR + +When using GNU B<env_parallel> for a publication please cite: + +O. Tange (2018): GNU Parallel 2018, March 2018, ISBN 9781387509881, +DOI: 10.5281/zenodo.1146014. + +This helps funding further development; and it won't cost you a cent. +If you pay 10000 EUR you should feel free to use GNU Parallel without citing. + +Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk + +Copyright (C) 2008-2010 Ole Tange, http://ole.tange.dk + +Copyright (C) 2010-2022 Ole Tange, http://ole.tange.dk and Free +Software Foundation, Inc. + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this +documentation under the terms of the GNU Free Documentation License, +Version 1.3 or any later version published by the Free Software +Foundation; with no Invariant Sections, with no Front-Cover Texts, and +with no Back-Cover Texts. A copy of the license is included in the +file LICENSES/GFDL-1.3-or-later.txt. + + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B<to Share> + +to copy, distribute and transmit the work + +=item B<to Remix> + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B<Attribution> + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B<Share Alike> + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B<Waiver> + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B<Public Domain> + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B<Other Rights> + +In no way are any of the following rights affected by the license: + +=over 2 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=back + +=over 9 + +=item B<Notice> + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as +LICENCES/CC-BY-SA-4.0.txt + + +=head1 DEPENDENCIES + +B<env_parallel> uses GNU B<parallel>. + + +=head1 SEE ALSO + +B<parallel>(1), B<ash>(1), B<bash>(1), B<csh>(1), B<dash>(1), +B<fish>(1), B<ksh>(1), B<pdksh>(1) B<tcsh>(1), B<zsh>(1). + + +=cut diff --git a/src/env_parallel.sh b/src/env_parallel.sh new file mode 100755 index 0000000..666c9be --- /dev/null +++ b/src/env_parallel.sh @@ -0,0 +1,430 @@ +#!/usr/bin/env sh + +# This file must be sourced in sh: +# +# . `which env_parallel.sh` +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later +# shellcheck disable=SC2006 + +env_parallel() { + # env_parallel.sh + + _names_of_ALIASES() { + # alias fails on Unixware 5 + for _i in `alias 2>/dev/null | perl -ne 's/^alias //;s/^(\S+)=.*/$1/ && print' 2>/dev/null`; do + # Check if this name really is an alias + # or just part of a multiline alias definition + if alias "$_i" >/dev/null 2>/dev/null; then + echo "$_i" + fi + done + } + _bodies_of_ALIASES() { + # alias may return: + # myalias='definition' (GNU/Linux ash) + # alias myalias='definition' (FreeBSD ash) + # so remove 'alias ' from first line + for _i in "$@"; do + echo 'alias '"`alias "$_i" | perl -pe '1..1 and s/^alias //'`" + done + } + _names_of_maybe_FUNCTIONS() { + set | perl -ne '/^([A-Z_0-9]+)\s*\(\)\s*\{?$/i and print "$1\n"' + } + _names_of_FUNCTIONS() { + # myfunc is a function + # shellcheck disable=SC2046 + LANG=C type `_names_of_maybe_FUNCTIONS` | + perl -ne '/^(\S+) is a function$/ and not $seen{$1}++ and print "$1\n"' + } + _bodies_of_FUNCTIONS() { + LANG=C type "$@" | perl -ne '/^(\S+) is a function$/ or print' + } + _names_of_VARIABLES() { + # This may screw up if variables contain \n and = + set | perl -ne 's/^(\S+?)=.*/$1/ and print;' + } + _bodies_of_VARIABLES() { + # Crappy typeset -p + for _i in "$@" + do + perl -e 'print @ARGV' "$_i=" + eval echo "\"\$$_i\"" | perl -e '$/=undef; $a=<>; chop($a); print $a' | + perl -pe 's/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\202-\377]/\\$&/go;'"s/'/\\\'/g; s/[\n]/'\\n'/go;"; + echo + done + } + _ignore_HARDCODED() { + # These names cannot be detected + echo '(_|TIMEOUT|IFS)' + } + _ignore_READONLY() { + # shellcheck disable=SC1078,SC1079,SC2026 + readonly | perl -e '@r = map { + chomp; + # sh on UnixWare: readonly TIMEOUT + # ash: readonly var='val' + # ksh: var='val' + # mksh: PIPESTATUS[0] + s/^(readonly )?([^=\[ ]*?)(\[\d+\])?(=.*|)$/$2/ or + # bash: declare -ar BASH_VERSINFO=([0]="4" [1]="4") + # zsh: typeset -r var='val' + s/^\S+\s+\S+\s+(\S[^=]*)(=.*|$)/$1/; + $_ } <>; + $vars = join "|",map { quotemeta $_ } @r; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' + } + _remove_bad_NAMES() { + # Do not transfer vars and funcs from env_parallel + # shellcheck disable=SC2006 + _ignore_RO="`_ignore_READONLY`" + # shellcheck disable=SC2006 + _ignore_HARD="`_ignore_HARDCODED`" + # Macos-grep does not like long patterns + # Old Solaris grep does not support -E + # Perl Version of: + # grep -Ev '^(...)$' | + perl -ne '/^( + PARALLEL_ENV| + PARALLEL_TMP| + _alias_NAMES| + _bodies_of_ALIASES| + _bodies_of_FUNCTIONS| + _bodies_of_VARIABLES| + _error_PAR| + _function_NAMES| + _get_ignored_VARS| + _grep_REGEXP| + _ignore_HARD| + _ignore_HARDCODED| + _ignore_READONLY| + _ignore_RO| + _ignore_UNDERSCORE| + _list_alias_BODIES| + _list_function_BODIES| + _list_variable_VALUES| + _make_grep_REGEXP| + _names_of_ALIASES| + _names_of_FUNCTIONS| + _names_of_VARIABLES| + _names_of_maybe_FUNCTIONS| + _parallel_exit_CODE| + _prefix_PARALLEL_ENV| + _prefix_PARALLEL_ENV| + _remove_bad_NAMES| + _remove_readonly| + _variable_NAMES| + _warning_PAR| + _which_PAR)$/x and next; + # Filter names matching --env + /^'"$_grep_REGEXP"'$/ or next; + /^'"$_ignore_UNDERSCORE"'$/ and next; + # Remove readonly variables + /^'"$_ignore_RO"'$/ and next; + /^'"$_ignore_HARD"'$/ and next; + print;' + } + _get_ignored_VARS() { + perl -e ' + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + } + } + if($ENV{PARALLEL_IGNORED_NAMES}) { + push @ignored_vars, split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + chomp @ignored_vars; + } + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' -- "$@" + } + + # Get the --env variables if set + # --env _ should be ignored + # and convert a b c to (a|b|c) + # If --env not set: Match everything (.*) + _make_grep_REGEXP() { + perl -e ' + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; + ' -- "$@" + } + _which_PAR() { + # type returns: + # ll is an alias for ls -l (in ash) + # bash is a tracked alias for /bin/bash + # true is a shell builtin (in bash) + # myfunc is a function (in bash) + # myfunc is a shell function (in zsh) + # which is /usr/bin/which (in sh, bash) + # which is hashed (/usr/bin/which) + # gi is aliased to `grep -i' (in bash) + # aliased to `alias | /usr/bin/which --tty-only --read-alias --show-dot --show-tilde' + # Return 0 if found, 1 otherwise + LANG=C type "$@" | + perl -pe '$exit += (s/ is an alias for .*// || + s/ is aliased to .*// || + s/ is a function// || + s/ is a shell function// || + s/ is a shell builtin// || + s/.* is hashed .(\S+).$/$1/ || + s/.* is (a tracked alias for )?//); + END { exit not $exit }' + } + _warning_PAR() { + echo "env_parallel: Warning: $*" >&2 + } + _error_PAR() { + echo "env_parallel: Error: $*" >&2 + } + + if _which_PAR parallel >/dev/null; then + true parallel found in path + else + # shellcheck disable=SC2016 + _error_PAR 'parallel must be in $PATH.' + return 255 + fi + + # Grep regexp for vars given by --env + # shellcheck disable=SC2006 + _grep_REGEXP="`_make_grep_REGEXP \"$@\"`" + unset _make_grep_REGEXP + + # Deal with --env _ + # shellcheck disable=SC2006 + _ignore_UNDERSCORE="`_get_ignored_VARS \"$@\"`" + unset _get_ignored_VARS + + # --record-env + if perl -e 'exit grep { /^--record-env$/ } @ARGV' -- "$@"; then + true skip + else + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | + cat > "$HOME"/.parallel/ignored_vars + return 0 + fi + + # --session + if perl -e 'exit grep { /^--session$/ } @ARGV' -- "$@"; then + true skip + else + # Insert ::: between each level of session + # so you can pop off the last ::: at --end-session + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`echo \"$PARALLEL_IGNORED_NAMES\"; + echo :::; + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | perl -ne ' + BEGIN{ + map { $ignored_vars{$_}++ } + split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + } + chomp; + for(split/\s+/) { + if(not $ignored_vars{$_}) { + print $_,\"\\n\"; + } + } + '`" + export PARALLEL_IGNORED_NAMES + return 0 + fi + if perl -e 'exit grep { /^--end.?session$/ } @ARGV' -- "$@"; then + true skip + else + # Pop off last ::: from PARALLEL_IGNORED_NAMES + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`perl -e ' + $ENV{PARALLEL_IGNORED_NAMES} =~ s/(.*):::.*?$/$1/s; + print $ENV{PARALLEL_IGNORED_NAMES} + '`" + return 0 + fi + # Grep alias names + # shellcheck disable=SC2006 + _alias_NAMES="`_names_of_ALIASES | _remove_bad_NAMES | xargs echo`" + _list_alias_BODIES="_bodies_of_ALIASES $_alias_NAMES" + if [ "$_alias_NAMES" = "" ] ; then + # no aliases selected + _list_alias_BODIES="true" + fi + unset _alias_NAMES + + # Grep function names + # shellcheck disable=SC2006 + _function_NAMES="`_names_of_FUNCTIONS | _remove_bad_NAMES | xargs echo`" + _list_function_BODIES="_bodies_of_FUNCTIONS $_function_NAMES" + if [ "$_function_NAMES" = "" ] ; then + # no functions selected + _list_function_BODIES="true" + fi + unset _function_NAMES + + # Grep variable names + # shellcheck disable=SC2006 + _variable_NAMES="`_names_of_VARIABLES | _remove_bad_NAMES | xargs echo`" + _list_variable_VALUES="_bodies_of_VARIABLES $_variable_NAMES" + if [ "$_variable_NAMES" = "" ] ; then + # no variables selected + _list_variable_VALUES="true" + fi + unset _variable_NAMES + + # shellcheck disable=SC2006 + PARALLEL_ENV="` + $_list_alias_BODIES; + $_list_function_BODIES; + $_list_variable_VALUES; + `" + export PARALLEL_ENV + unset _list_alias_BODIES _list_variable_VALUES _list_function_BODIES + unset _bodies_of_ALIASES _bodies_of_VARIABLES _bodies_of_FUNCTIONS + unset _names_of_ALIASES _names_of_VARIABLES _names_of_FUNCTIONS + unset _ignore_HARDCODED _ignore_READONLY _ignore_UNDERSCORE + unset _remove_bad_NAMES _grep_REGEXP + unset _prefix_PARALLEL_ENV + # Test if environment is too big by running 'true' + # shellcheck disable=SC2006,SC2092 + if `_which_PAR true` >/dev/null 2>/dev/null ; then + parallel "$@" + _parallel_exit_CODE=$? + # Clean up variables/functions + unset PARALLEL_ENV + unset _which_PAR _which_TRUE + unset _warning_PAR _error_PAR + # Unset _parallel_exit_CODE before return + eval "unset _parallel_exit_CODE; return $_parallel_exit_CODE" + else + unset PARALLEL_ENV; + _error_PAR "Your environment is too big." + _error_PAR "You can try 3 different approaches:" + _error_PAR "1. Run 'env_parallel --session' before you set" + _error_PAR " variables or define functions." + _error_PAR "2. Use --env and only mention the names to copy." + _error_PAR "3. Try running this in a clean environment once:" + _error_PAR " env_parallel --record-env" + _error_PAR " And then use '--env _'" + _error_PAR "For details see: man env_parallel" + return 255 + fi +} + +parset() { + _parset_PARALLEL_PRG=parallel + _parset_main "$@" +} + +env_parset() { + _parset_PARALLEL_PRG=env_parallel + _parset_main "$@" +} + +_parset_main() { + # If $1 contains ',' or space: + # Split on , to get the destination variable names + # If $1 is a single destination variable name: + # Treat it as the name of an array + # + # # Create array named myvar + # parset myvar echo ::: {1..10} + # echo ${myvar[5]} + # + # # Put output into $var_a $var_b $var_c + # varnames=(var_a var_b var_c) + # parset "${varnames[*]}" echo ::: {1..3} + # echo $var_c + # + # # Put output into $var_a4 $var_b4 $var_c4 + # parset "var_a4 var_b4 var_c4" echo ::: {1..3} + # echo $var_c4 + + _parset_NAME="$1" + if [ "$_parset_NAME" = "" ] ; then + echo parset: Error: No destination variable given. >&2 + echo parset: Error: Try: >&2 + echo parset: Error: ' ' parset myarray echo ::: foo bar >&2 + return 255 + fi + if [ "$_parset_NAME" = "--help" ] ; then + echo parset: Error: Usage: >&2 + echo parset: Error: ' ' parset varname GNU Parallel options and command >&2 + echo + parallel --help + return 255 + fi + if [ "$_parset_NAME" = "--version" ] ; then + # shellcheck disable=SC2006 + echo "parset 20221122 (GNU parallel `parallel --minversion 1`)" + echo "Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free Software" + echo "Foundation, Inc." + echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>" + echo "This is free software: you are free to change and redistribute it." + echo "GNU parallel comes with no warranty." + echo + echo "Web site: https://www.gnu.org/software/parallel" + echo + echo "When using programs that use GNU Parallel to process data for publication" + echo "please cite as described in 'parallel --citation'." + echo + return 255 + fi + shift + + # Bash: declare -A myassoc=( ) + # Zsh: typeset -A myassoc=( ) + # Ksh: typeset -A myassoc=( ) + # shellcheck disable=SC2039,SC2169 + if (typeset -p "$_parset_NAME" 2>/dev/null; echo) | + perl -ne 'exit not (/^declare[^=]+-A|^typeset[^=]+-A/)' ; then + # This is an associative array + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset assoc,"$_parset_NAME" "$@"`" + # The eval returns the function! + else + # This is a normal array or a list of variable names + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset var,"$_parset_NAME" "$@"`" + # The eval returns the function! + fi +} diff --git a/src/env_parallel.tcsh b/src/env_parallel.tcsh new file mode 100755 index 0000000..8358ec1 --- /dev/null +++ b/src/env_parallel.tcsh @@ -0,0 +1,142 @@ +#!/usr/bin/env tcsh + +# This file must be sourced in tcsh: +# +# source `which env_parallel.tcsh` +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +set _parallel_exit_CODE=0 +if ("`alias env_parallel`" == '' || ! $?PARALLEL_CSH) then + # Activate alias + alias env_parallel '(setenv PARALLEL_CSH "\!*"; source `which env_parallel.csh`)' +else + # Get the --env variables if set + # --env _ should be ignored + # and convert a b c to (a|b|c) + # If --env not set: Match everything (.*) + + # simple 'tempfile': Return nonexisting filename: /tmp/parXXXXX + alias _tempfile 'perl -e do\{\$t\=\"/tmp/par\".join\"\",map\{\(0..9,\"a\"..\"z\",\"A\"..\"Z\"\)\[rand\(62\)\]\}\(1..5\)\;\}while\(-e\$t\)\;print\"\$t\\n\"' + set _tMpscRIpt=`_tempfile` + + cat <<'EOF' > $_tMpscRIpt + #!/usr/bin/perl + + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; +'EOF' + set _grep_REGEXP="`perl $_tMpscRIpt -- $PARALLEL_CSH`" + + # Deal with --env _ + cat <<'EOF' > $_tMpscRIpt + #!/usr/bin/perl + + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + } + } +'EOF' + set _ignore_UNDERSCORE="`perl $_tMpscRIpt -- $PARALLEL_CSH`" + rm $_tMpscRIpt + + # Get the scalar and array variable names + set _vARnAmES=(`set | perl -ne 's/\s.*//; /^(#|_|killring|prompt2|command|PARALLEL_ENV|PARALLEL_TMP)$/ and next; /^'"$_grep_REGEXP"'$/ or next; /^'"$_ignore_UNDERSCORE"'$/ and next; print'`) + + # Make a tmpfile for the variable definitions + set _tMpvARfILe=`_tempfile` + touch $_tMpvARfILe + # Make a tmpfile for the variable definitions + alias + set _tMpaLLfILe=`_tempfile` + foreach _vARnAmE ($_vARnAmES); + # These 3 lines break in csh ver. 20110502-3 + # if not defined: next + eval if'(! $?'$_vARnAmE') continue' + # if $#myvar <= 1 echo scalar_myvar=$var + eval if'(${#'$_vARnAmE'} <= 1) echo scalar_'$_vARnAmE'='\"\$$_vARnAmE\" >> $_tMpvARfILe; + # if $#myvar > 1 echo array_myvar=$var + eval if'(${#'$_vARnAmE'} > 1) echo array_'$_vARnAmE'="$'$_vARnAmE'"' >> $_tMpvARfILe; + end + unset _vARnAmE _vARnAmES + # shell quote variables (--plain needed due to ignore if $PARALLEL is set) + # Convert 'scalar_myvar=...' to 'set myvar=...' + # Convert 'array_myvar=...' to 'set array=(...)' + cat $_tMpvARfILe | parallel --plain --shellquote | perl -pe 's/^scalar_(\S+).=/set $1=/ or s/^array_(\S+).=(.*)/set $1=($2)/ && s/\\ / /g;' > $_tMpaLLfILe + # Cleanup + rm $_tMpvARfILe; unset _tMpvARfILe + +# ALIAS TO EXPORT ALIASES: + +# Quote ' by putting it inside " +# s/'/'"'"'/g; +# ' => \047 " => \042 +# s/\047/\047\042\047\042\047/g; +# Quoted: s/\\047/\\047\\042\\047\\042\\047/g\; + +# Remove () from second column +# s/^(\S+)(\s+)\((.*)\)/\1\2\3/; +# Quoted: s/\^\(\\S+\)\(\\s+\)\\\(\(.\*\)\\\)/\\1\\2\\3/\; + +# Add ' around second column +# s/^(\S+)(\s+)(.*)/\1\2'\3'/ +# \047 => ' +# s/^(\S+)(\s+)(.*)/\1\2\047\3\047/; +# Quoted: s/\^\(\\S+\)\(\\s+\)\(.\*\)/\\1\\2\\047\\3\\047/\; + +# Quote ! as \! +# s/\!/\\\!/g; +# Quoted: s/\\\!/\\\\\\\!/g; + +# Prepend with "\nalias " +# s/^/\001alias /; +# Quoted: s/\^/\\001alias\ /\; + alias | \ + perl -ne '/^'"$_grep_REGEXP"'/ or next; /^'"$_ignore_UNDERSCORE"'[^_a-zA-Z]/ and next; print' | \ + perl -pe s/\\047/\\047\\042\\047\\042\\047/g\;s/\^\(\\S+\)\(\\s+\)\\\(\(.\*\)\\\)/\\1\\2\\3/\;s/\^\(\\S+\)\(\\s+\)\(.\*\)/\\1\\2\\047\\3\\047/\;s/\^/\\001alias\ /\;s/\\\!/\\\\\\\!/g >> $_tMpaLLfILe + + setenv PARALLEL_ENV "`cat $_tMpaLLfILe; rm $_tMpaLLfILe`"; + unset _tMpaLLfILe; + # Use $PARALLEL_CSH set in calling alias + parallel + set _parallel_exit_CODE=$status + setenv PARALLEL_ENV + setenv PARALLEL_CSH +endif +(exit $_parallel_exit_CODE) diff --git a/src/env_parallel.zsh b/src/env_parallel.zsh new file mode 100755 index 0000000..1320142 --- /dev/null +++ b/src/env_parallel.zsh @@ -0,0 +1,405 @@ +#!/usr/bin/env zsh + +# This file must be sourced in zsh: +# +# source =env_parallel.zsh +# +# after which 'env_parallel' works +# +# +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later +# shellcheck disable=SC2006 + +env_parallel() { + # env_parallel.zsh + + _names_of_ALIASES() { + print -l ${(k)aliases} + } + _bodies_of_ALIASES() { + local _i + for _i ($@); do + echo 'alias '"$(alias $_i)" + done + } + _names_of_FUNCTIONS() { + print -l ${(k)functions} + } + _bodies_of_FUNCTIONS() { + typeset -f "$@" + } + _names_of_VARIABLES() { + print -l ${(k)parameters} + } + _bodies_of_VARIABLES() { + typeset -p "$@" + } + _ignore_HARDCODED() { + # These names cannot be detected + echo '([-\?\#\!\$\*\@\_0]|zsh_eval_context|ZSH_EVAL_CONTEXT|LINENO|IFS|commands|functions|options|aliases|EUID|EGID|UID|GID|dis_patchars|patchars|terminfo|galiases|keymaps|parameters|jobdirs|dirstack|functrace|funcsourcetrace|zsh_scheduled_events|dis_aliases|dis_reswords|dis_saliases|modules|reswords|saliases|widgets|userdirs|historywords|nameddirs|termcap|dis_builtins|dis_functions|jobtexts|funcfiletrace|dis_galiases|builtins|history|jobstates|funcstack|run-help)' + } + _ignore_READONLY() { + typeset -pr | perl -e '@r = map { + chomp; + # sh on UnixWare: readonly TIMEOUT + # ash: readonly var='val' + # ksh: var='val' + # mksh: PIPESTATUS[0] + s/^(readonly )?([^=\[ ]*?)(\[\d+\])?(=.*|)$/$2/ or + # bash: declare -ar BASH_VERSINFO=([0]="4" [1]="4") + # zsh: typeset -r var='val' + s/^\S+\s+\S+\s+(\S[^=]*)(=.*|$)/$1/; + $_ } <>; + $vars = join "|",map { quotemeta $_ } @r; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' + } + _remove_bad_NAMES() { + # Do not transfer vars and funcs from env_parallel + # shellcheck disable=SC2006 + _ignore_RO="`_ignore_READONLY`" + # shellcheck disable=SC2006 + _ignore_HARD="`_ignore_HARDCODED`" + # Macos-grep does not like long patterns + # Old Solaris grep does not support -E + # Perl Version of: + # grep -Ev '^(...)$' | + perl -ne '/^( + PARALLEL_ENV| + PARALLEL_TMP| + _alias_NAMES| + _bodies_of_ALIASES| + _bodies_of_FUNCTIONS| + _bodies_of_VARIABLES| + _error_PAR| + _function_NAMES| + _get_ignored_VARS| + _grep_REGEXP| + _ignore_HARD| + _ignore_HARDCODED| + _ignore_READONLY| + _ignore_RO| + _ignore_UNDERSCORE| + _list_alias_BODIES| + _list_function_BODIES| + _list_variable_VALUES| + _make_grep_REGEXP| + _names_of_ALIASES| + _names_of_FUNCTIONS| + _names_of_VARIABLES| + _names_of_maybe_FUNCTIONS| + _parallel_exit_CODE| + _prefix_PARALLEL_ENV| + _prefix_PARALLEL_ENV| + _remove_bad_NAMES| + _remove_readonly| + _variable_NAMES| + _warning_PAR| + _which_PAR)$/x and next; + # Filter names matching --env + /^'"$_grep_REGEXP"'$/ or next; + /^'"$_ignore_UNDERSCORE"'$/ and next; + # Remove readonly variables + /^'"$_ignore_RO"'$/ and next; + /^'"$_ignore_HARD"'$/ and next; + print;' + } + _get_ignored_VARS() { + perl -e ' + for(@ARGV){ + $next_is_env and push @envvar, split/,/, $_; + $next_is_env=/^--env$/; + } + if(grep { /^_$/ } @envvar) { + if(not open(IN, "<", "$ENV{HOME}/.parallel/ignored_vars")) { + print STDERR "parallel: Error: ", + "Run \"parallel --record-env\" in a clean environment first.\n"; + } else { + chomp(@ignored_vars = <IN>); + } + } + if($ENV{PARALLEL_IGNORED_NAMES}) { + push @ignored_vars, split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + chomp @ignored_vars; + } + $vars = join "|",map { quotemeta $_ } @ignored_vars; + print $vars ? "($vars)" : "(,,nO,,VaRs,,)"; + ' -- "$@" + } + + # Get the --env variables if set + # --env _ should be ignored + # and convert a b c to (a|b|c) + # If --env not set: Match everything (.*) + _make_grep_REGEXP() { + perl -e ' + for(@ARGV){ + /^_$/ and $next_is_env = 0; + $next_is_env and push @envvar, split/,/, $_; + $next_is_env = /^--env$/; + } + $vars = join "|",map { quotemeta $_ } @envvar; + print $vars ? "($vars)" : "(.*)"; + ' -- "$@" + } + _which_PAR() { + # type returns: + # ll is an alias for ls -l (in ash) + # bash is a tracked alias for /bin/bash + # true is a shell builtin (in bash) + # myfunc is a function (in bash) + # myfunc is a shell function (in zsh) + # which is /usr/bin/which (in sh, bash) + # which is hashed (/usr/bin/which) + # gi is aliased to `grep -i' (in bash) + # aliased to `alias | /usr/bin/which --tty-only --read-alias --show-dot --show-tilde' + # Return 0 if found, 1 otherwise + LANG=C type "$@" | + perl -pe '$exit += (s/ is an alias for .*// || + s/ is aliased to .*// || + s/ is a function// || + s/ is a shell function// || + s/ is a shell builtin// || + s/.* is hashed .(\S+).$/$1/ || + s/.* is (a tracked alias for )?//); + END { exit not $exit }' + } + _warning_PAR() { + echo "env_parallel: Warning: $*" >&2 + } + _error_PAR() { + echo "env_parallel: Error: $*" >&2 + } + + if _which_PAR parallel >/dev/null; then + true parallel found in path + else + # shellcheck disable=SC2016 + _error_PAR 'parallel must be in $PATH.' + return 255 + fi + + # Grep regexp for vars given by --env + # shellcheck disable=SC2006 + _grep_REGEXP="`_make_grep_REGEXP \"$@\"`" + unset _make_grep_REGEXP + + # Deal with --env _ + # shellcheck disable=SC2006 + _ignore_UNDERSCORE="`_get_ignored_VARS \"$@\"`" + unset _get_ignored_VARS + + # --record-env + if perl -e 'exit grep { /^--record-env$/ } @ARGV' -- "$@"; then + true skip + else + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | + cat > "$HOME"/.parallel/ignored_vars + return 0 + fi + + # --session + if perl -e 'exit grep { /^--session$/ } @ARGV' -- "$@"; then + true skip + else + # Insert ::: between each level of session + # so you can pop off the last ::: at --end-session + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`echo \"$PARALLEL_IGNORED_NAMES\"; + echo :::; + (_names_of_ALIASES; + _names_of_FUNCTIONS; + _names_of_VARIABLES) | perl -ne ' + BEGIN{ + map { $ignored_vars{$_}++ } + split/\s+/, $ENV{PARALLEL_IGNORED_NAMES}; + } + chomp; + for(split/\s+/) { + if(not $ignored_vars{$_}) { + print $_,\"\\n\"; + } + } + '`" + export PARALLEL_IGNORED_NAMES + return 0 + fi + if perl -e 'exit grep { /^--end.?session$/ } @ARGV' -- "$@"; then + true skip + else + # Pop off last ::: from PARALLEL_IGNORED_NAMES + # shellcheck disable=SC2006 + PARALLEL_IGNORED_NAMES="`perl -e ' + $ENV{PARALLEL_IGNORED_NAMES} =~ s/(.*):::.*?$/$1/s; + print $ENV{PARALLEL_IGNORED_NAMES} + '`" + return 0 + fi + # Grep alias names + # shellcheck disable=SC2006 + _alias_NAMES="`_names_of_ALIASES | _remove_bad_NAMES | xargs echo`" + _list_alias_BODIES="_bodies_of_ALIASES $_alias_NAMES" + if [ "$_alias_NAMES" = "" ] ; then + # no aliases selected + _list_alias_BODIES="true" + fi + unset _alias_NAMES + + # Grep function names + # shellcheck disable=SC2006 + _function_NAMES="`_names_of_FUNCTIONS | _remove_bad_NAMES | xargs echo`" + _list_function_BODIES="_bodies_of_FUNCTIONS $_function_NAMES" + if [ "$_function_NAMES" = "" ] ; then + # no functions selected + _list_function_BODIES="true" + fi + unset _function_NAMES + + # Grep variable names + # shellcheck disable=SC2006 + _variable_NAMES="`_names_of_VARIABLES | _remove_bad_NAMES | xargs echo`" + _list_variable_VALUES="_bodies_of_VARIABLES $_variable_NAMES" + if [ "$_variable_NAMES" = "" ] ; then + # no variables selected + _list_variable_VALUES="true" + fi + unset _variable_NAMES + + # shellcheck disable=SC2006 + PARALLEL_ENV="` + eval $_list_alias_BODIES; + eval $_list_function_BODIES; + eval $_list_variable_VALUES; + `" + export PARALLEL_ENV + unset _list_alias_BODIES _list_variable_VALUES _list_function_BODIES + unset _bodies_of_ALIASES _bodies_of_VARIABLES _bodies_of_FUNCTIONS + unset _names_of_ALIASES _names_of_VARIABLES _names_of_FUNCTIONS + unset _ignore_HARDCODED _ignore_READONLY _ignore_UNDERSCORE + unset _remove_bad_NAMES _grep_REGEXP + unset _prefix_PARALLEL_ENV + # Test if environment is too big by running 'true' + # shellcheck disable=SC2006,SC2092 + if `_which_PAR true` >/dev/null 2>/dev/null ; then + parallel "$@" + _parallel_exit_CODE=$? + # Clean up variables/functions + unset PARALLEL_ENV + unset _which_PAR _which_TRUE + unset _warning_PAR _error_PAR + # Unset _parallel_exit_CODE before return + eval "unset _parallel_exit_CODE; return $_parallel_exit_CODE" + else + unset PARALLEL_ENV; + _error_PAR "Your environment is too big." + _error_PAR "You can try 3 different approaches:" + _error_PAR "1. Run 'env_parallel --session' before you set" + _error_PAR " variables or define functions." + _error_PAR "2. Use --env and only mention the names to copy." + _error_PAR "3. Try running this in a clean environment once:" + _error_PAR " env_parallel --record-env" + _error_PAR " And then use '--env _'" + _error_PAR "For details see: man env_parallel" + return 255 + fi +} + +parset() { + _parset_PARALLEL_PRG=parallel + _parset_main "$@" +} + +env_parset() { + _parset_PARALLEL_PRG=env_parallel + _parset_main "$@" +} + +_parset_main() { + # If $1 contains ',' or space: + # Split on , to get the destination variable names + # If $1 is a single destination variable name: + # Treat it as the name of an array + # + # # Create array named myvar + # parset myvar echo ::: {1..10} + # echo ${myvar[5]} + # + # # Put output into $var_a $var_b $var_c + # varnames=(var_a var_b var_c) + # parset "${varnames[*]}" echo ::: {1..3} + # echo $var_c + # + # # Put output into $var_a4 $var_b4 $var_c4 + # parset "var_a4 var_b4 var_c4" echo ::: {1..3} + # echo $var_c4 + + _parset_NAME="$1" + if [ "$_parset_NAME" = "" ] ; then + echo parset: Error: No destination variable given. >&2 + echo parset: Error: Try: >&2 + echo parset: Error: ' ' parset myarray echo ::: foo bar >&2 + return 255 + fi + if [ "$_parset_NAME" = "--help" ] ; then + echo parset: Error: Usage: >&2 + echo parset: Error: ' ' parset varname GNU Parallel options and command >&2 + echo + parallel --help + return 255 + fi + if [ "$_parset_NAME" = "--version" ] ; then + # shellcheck disable=SC2006 + echo "parset 20221122 (GNU parallel `parallel --minversion 1`)" + echo "Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free Software" + echo "Foundation, Inc." + echo "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>" + echo "This is free software: you are free to change and redistribute it." + echo "GNU parallel comes with no warranty." + echo + echo "Web site: https://www.gnu.org/software/parallel" + echo + echo "When using programs that use GNU Parallel to process data for publication" + echo "please cite as described in 'parallel --citation'." + echo + return 255 + fi + shift + + # Bash: declare -A myassoc=( ) + # Zsh: typeset -A myassoc=( ) + # Ksh: typeset -A myassoc=( ) + # shellcheck disable=SC2039,SC2169 + if (typeset -p "$_parset_NAME" 2>/dev/null; echo) | + perl -ne 'exit not (/^declare[^=]+-A|^typeset[^=]+-A/)' ; then + # This is an associative array + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset assoc,"$_parset_NAME" "$@"`" + # The eval returns the function! + else + # This is a normal array or a list of variable names + # shellcheck disable=SC2006 + eval "`$_parset_PARALLEL_PRG -k --_parset var,"$_parset_NAME" "$@"`" + # The eval returns the function! + fi +} diff --git a/src/niceload b/src/niceload new file mode 100755 index 0000000..2e59aee --- /dev/null +++ b/src/niceload @@ -0,0 +1,1173 @@ +#!/usr/bin/perl -w + +# Copyright (C) 2004-2010 Ole Tange, http://ole.tange.dk +# +# Copyright (C) 2010-2022 Ole Tange, http://ole.tange.dk and +# Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +use strict; +use Getopt::Long; +$Global::progname="niceload"; +$Global::version = 20221122; +Getopt::Long::Configure("bundling","require_order"); +get_options_from_array(\@ARGV) || die_usage(); +if($opt::version) { + version(); + exit 0; +} +if($opt::help) { + help(); + exit 0; +} +if($opt::factor and $opt::suspend) { + # You cannot have --suspend and --factor + help(); + exit; +} + +if(not (defined $opt::start_io or defined $opt::run_io + or defined $opt::start_load or defined $opt::run_load + or defined $opt::start_mem or defined $opt::run_mem + or defined $opt::start_noswap or defined $opt::run_noswap + or defined $opt::io or defined $opt::load + or defined $opt::mem or defined $opt::noswap)) { + # Default is --runload=1 + $opt::run_load = 1; +} + +if(not defined $opt::start_io) { $opt::start_io = $opt::io; } +if(not defined $opt::run_io) { $opt::run_io = $opt::io; } +if(not defined $opt::start_load) { $opt::start_load = $opt::load; } +if(not defined $opt::run_load) { $opt::run_load = $opt::load; } +if(not defined $opt::start_mem) { $opt::start_mem = $opt::mem; } +if(not defined $opt::run_mem) { $opt::run_mem = $opt::mem; } +if(not defined $opt::start_noswap) { $opt::start_noswap = $opt::noswap; } +if(not defined $opt::run_noswap) { $opt::run_noswap = $opt::noswap; } + +if(defined $opt::load) { multiply_binary_prefix($opt::load); } +if(defined $opt::baseline) { collect_net_baseline(); } + +my $limit = Limit->new(); +my $process = Process->new($opt::nice,@ARGV); +$::exitstatus = 0; +if(@opt::prg) { + # Find all pids of prg + my($children_of, $parent_of, $name_of) = pid_table(); + my @exact_name_pids; + my @substr_name_pids; + for my $name (@opt::prg) { + push(@exact_name_pids, + grep { index($name_of->{$_},$name) == 0 and $_ } keys %$name_of); + push(@substr_name_pids, + grep { index($name_of->{$_},$name) != -1 and $_ } keys %$name_of); + } + # Remove current pid + @exact_name_pids = grep { $_ != $$ } @exact_name_pids; + @substr_name_pids = grep { $_ != $$ } @substr_name_pids; + my @pids; + if(@exact_name_pids) { + @pids = @exact_name_pids; + } elsif(@substr_name_pids) { + warning("@opt::prg no exact matches. Using substrings."); + my %name_pids; + for(sort @substr_name_pids) { + # If the process has run for long, then time column will + # enter the name, so remove leading digits + $name_of->{$_} =~ s/^\d+ //; + # Remove arguments + $name_of->{$_} =~ s/ .*//; + push @{$name_pids{$name_of->{$_}}},$_; + } + warning("Niceloading", + map { "$_ (".(join" ",sort @{$name_pids{$_}}).")" } keys %name_pids + ); + @pids = @substr_name_pids; + } else { + error("@opt::prg no matches."); + exit(1); + } + $process->set_pid(@pids); + $::resume_process = $process; + $SIG{TERM} = $SIG{INT} = \&resume; +} elsif(@opt::pid) { + # Support --pid 3567,25678 + @opt::pid = map { split /,/, $_ } @opt::pid; + $process->set_pid(@opt::pid); + $::resume_process = $process; + $SIG{TERM} = $SIG{INT} = \&resume; +} elsif (@ARGV) { + # Wait until limit is below start_limit and run_limit + while($limit->over_start_limit() + or + ($limit->hard() and $limit->over_run_limit())) { + $limit->sleep_for_recheck(); + } + $process->start(); +} + +while($process->is_alive()) { + if($limit->over_run_limit()) { + $process->suspend(); + $limit->sleep_for_recheck(); + if(not $limit->hard()) { + $process->resume(); + $limit->sleep_while_running(); + } + } else { + $process->resume(); + $limit->sleep_while_running(); + } +} + +exit($::exitstatus); + +{ + my %pid_parentpid_cmd; + + sub pid_table { + # Returns: + # %children_of = { pid -> children of pid } + # %parent_of = { pid -> pid of parent } + # %name_of = { pid -> commandname } + + if(not %pid_parentpid_cmd) { + # Filter for SysV-style `ps` + my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print "@F[1,2] $_"' ); + # Crazy msys: ' is not accepted on the cmd line, but " are treated as ' + my $msys = q( ps -ef | perl -ane "1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print qq{@F[1,2] $_}" ); + # BSD-style `ps` + my $bsd = q(ps -o pid,ppid,command -ax); + %pid_parentpid_cmd = + ( + 'aix' => $sysv, + 'cygwin' => $sysv, + 'darwin' => $bsd, + 'dec_osf' => $sysv, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $sysv, + 'hpux' => $sysv, + 'linux' => $sysv, + 'mirbsd' => $bsd, + 'msys' => $msys, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $sysv, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $sysv, + 'syllable' => "echo ps not supported", + ); + } + $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing"); + + my (@pidtable,%parent_of,%children_of,%name_of); + # Table with pid -> children of pid + @pidtable = `$pid_parentpid_cmd{$^O}`; + my $p=$$; + for (@pidtable) { + # must match: 24436 21224 busybox ash + # must match: 24436 21224 <<empty on MacOSX running cubase>> + # or: perl -e 'while($0=" "){}' + if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/ + or + $^O eq "darwin" and /^\s*(\S+)\s+(\S+)\s+()$/) { + $parent_of{$1} = $2; + push @{$children_of{$2}}, $1; + $name_of{$1} = $3; + } else { + ::die_bug("pidtable format: $_"); + } + } + return(\%children_of, \%parent_of, \%name_of); + } +} + +sub resume { + $::resume_process->resume(); + exit(0); +} + +sub status { + my @w = @_; + my $fh = *STDERR; + print $fh @w; + flush $fh; +} + +sub warning { + my @w = @_; + my $prog = $Global::progname || "niceload"; + status(map { ($prog, ": Warning: ", $_, "\n"); } @w); +} + +sub error { + my @w = @_; + my $prog = $Global::progname || "niceload"; + status(map { ($prog, ": Error: ", $_, "\n"); } @w); +} + +sub uniq { + # Remove duplicates and return unique values + return keys %{{ map { $_ => 1 } @_ }}; +} + +sub multiply_binary_prefix { + # Evalualte numbers with binary prefix + # k=10^3, m=10^6, g=10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 + # K=2^10, M=2^20, G=2^30, T=2^40, P=2^50, E=2^70, Z=2^80, Y=2^80 + # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 + # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 + # 13G = 13*1024*1024*1024 = 13958643712 + my $s = shift; + $s =~ s/k/*1000/g; + $s =~ s/M/*1000*1000/g; + $s =~ s/G/*1000*1000*1000/g; + $s =~ s/T/*1000*1000*1000*1000/g; + $s =~ s/P/*1000*1000*1000*1000*1000/g; + $s =~ s/E/*1000*1000*1000*1000*1000*1000/g; + $s =~ s/Z/*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/Y/*1000*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/X/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; + + $s =~ s/Ki?/*1024/gi; + $s =~ s/Mi?/*1024*1024/gi; + $s =~ s/Gi?/*1024*1024*1024/gi; + $s =~ s/Ti?/*1024*1024*1024*1024/gi; + $s =~ s/Pi?/*1024*1024*1024*1024*1024/gi; + $s =~ s/Ei?/*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/Zi?/*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/Yi?/*1024*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/Xi?/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; + $s = eval $s; + return $s; +} + +sub get_options_from_array { + # Run GetOptions on @array + # Returns: + # true if parsing worked + # false if parsing failed + # @array is changed + my $array_ref = shift; + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + my @retval = GetOptions + ("debug|D" => \$opt::debug, + "factor|f=s" => \$opt::factor, + "hard|H" => \$opt::hard, + "soft|S" => \$opt::soft, + "sensor=s" => \$opt::sensor, + + "si|sio|startio|start-io=s" => \$opt::start_io, + "ri|rio|runio|run-io=s" => \$opt::run_io, + "io|I=s" => \$opt::io, + + "sl|startload|start-load=s" => \$opt::start_load, + "rl|runload|run-load=s" => \$opt::run_load, + "load|L|l=s" => \$opt::load, + + "sm|startmem|start-mem=s" => \$opt::start_mem, + "rm|runmem|run-mem=s" => \$opt::run_mem, + "mem|M=s" => \$opt::mem, + + "sn|startnoswap|start-noswap|start-no-swap" => \$opt::start_noswap, + "rn|runnoswap|run-noswap|run-no-swap" => \$opt::run_noswap, + "noswap|N" => \$opt::noswap, + + "battery|B" => \$opt::battery, + "net" => \$opt::net, + "nethops=i" => \$opt::nethops, + "baseline" => \$opt::baseline, + + "nice|n=i" => \$opt::nice, + "program|prg=s" => \@opt::prg, + "process|pid|p=s" => \@opt::pid, + "suspend|s=s" => \$opt::suspend, + "recheck|t=s" => \$opt::recheck, + "quote|q" => \$opt::quote, + "help|h" => \$opt::help, + "verbose|v" => \$opt::verbose, + "version|V" => \$opt::version, + ); + if($opt::battery) { + # niceload -l -1 --sensor \ + # 'cat /sys/class/power_supply/BAT0/status \ + # /proc/acpi/battery/BAT0/state 2>/dev/null | + # grep -i -q discharging; echo $?' + $opt::sensor = ('cat /sys/class/power_supply/BAT0/status '. + '/proc/acpi/battery/BAT0/state 2>/dev/null | '. + 'grep -i -q discharging; echo $?'); + $opt::load = -1; + } + if($opt::net) { + $opt::nethops ||= 3; + } + if($opt::nethops) { + # niceload -l 0.01 --sensor 'netsensor_script' + $opt::sensor = netsensor_script($opt::nethops); + $opt::load ||= 0.01; + } + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return @retval; +} + +sub shell_quote_scalar { + # Quote for other shells + my $a = $_[0]; + if(defined $a) { + # zsh wants '=' quoted + # Solaris sh wants ^ quoted. + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + + + # quote newline as '\n' + ($a =~ s/[\n]/'\n'/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub die_usage { + help(); + exit 1; +} + + +sub help { + print q{ +Usage: + niceload [-v] [-n niceness] [-L loadavg] [-I io] [-N] [-M mem] + [-s suspend_sec|-f factor] [-H] [-S] + command or -p pid +}; +} + + +sub die_bug { + my $bugid = shift; + print STDERR + ("$Global::progname: This should not happen. You have found a bug.\n", + "Please contact <parallel\@gnu.org> and include:\n", + "* The version number: $Global::version\n", + "* The bugid: $bugid\n", + "* The command line being run\n", + "* The files being read (put the files on a webserver if they are big)\n", + "\n", + "If you get the error on smaller/fewer files, please include those instead.\n"); + exit(255); +} + +sub now { + # Returns time since epoch as in seconds with 3 decimals + # Uses: + # @Global::use + # Returns: + # $time = time now with millisecond accuracy + if(not $Global::use{"Time::HiRes"}) { + if(eval "use Time::HiRes qw ( time );") { + eval "sub TimeHiRestime { return Time::HiRes::time };"; + } else { + eval "sub TimeHiRestime { return time() };"; + } + $Global::use{"Time::HiRes"} = 1; + } + + return (int(TimeHiRestime()*1000))/1000; +} + +sub usleep { + # Sleep this many milliseconds. + my $ms = shift; + ::debug("Sleeping ",$ms," millisecs\n"); + my $start = now(); + my $now; + do { + # Something makes 'select' wake up too early + # when using --sensor + select(undef, undef, undef, $ms/1000); + $now = now(); + } while($now < $start + $ms/1000); +} + +sub debug { + if($opt::debug) { + print STDERR @_; + } +} + + +sub my_dump { + # Returns: + # ascii expression of object if Data::Dump(er) is installed + # error code otherwise + my @dump_this = (@_); + eval "use Data::Dump qw(dump);"; + if ($@) { + # Data::Dump not installed + eval "use Data::Dumper;"; + if ($@) { + my $err = "Neither Data::Dump nor Data::Dumper is installed\n". + "Not dumping output\n"; + print STDERR $err; + return $err; + } else { + return Dumper(@dump_this); + } + } else { + eval "use Data::Dump qw(dump);"; + return (Data::Dump::dump(@dump_this)); + } +} + + +sub version { + # Returns: N/A + print join("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2004,2005,2006,2007,2008,2009 Ole Tange", + "Copyright (C) 2010,2011 Ole Tange and Free Software Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: http://www.gnu.org/software/parallel\n" + ); +} + + +sub max { + # Returns: + # Maximum value of array + my $max; + for (@_) { + # Skip undefs + defined $_ or next; + defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef + $max = ($max > $_) ? $max : $_; + } + return $max; +} + +sub min { + # Returns: + # Minimum value of array + my $min; + for (@_) { + # Skip undefs + defined $_ or next; + defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef + $min = ($min < $_) ? $min : $_; + } + return $min; +} + +sub collect_net_baseline { + # Collect what a normal (unloaded) net connection looks line +} + + +sub netsensor_script { + # Script for --sensor when using --net + my $hops = shift; + my $perlscript = q{ + use Net::Traceroute; + use Net::Ping; + + my $medtrc = MedianTraceroute->new(shift); + $medtrc->set_remedian($medtrc->ping()); + $medtrc->set_remedian($medtrc->ping()); + while(1) { + my $ms = $medtrc->ping(); + my $m = $medtrc->remedian(); + if($m*1.5 < $ms) { + # Bad 1 = median*1.5 < current latency + } else { + # OK 0 = median*1.5 > current latency + $medtrc->set_remedian($ms); + } + printf("%d\n",$m*1.5 < $ms); + sleep(1); + } + + package MedianTraceroute; + + sub new { + my $class = shift; + my $hop = shift; + # Find router + my $tr = Net::Traceroute->new(host => "8.8.8.8", + max_ttl => $hop); + if($tr->found) { + $host = $tr->hop_query_host($hop, 0); + } else { + # ns1.censurfridns.dk + $tr = Net::Traceroute->new(host => "89.233.43.71", + max_ttl => $hop); + if($tr->found) { + $host = $tr->hop_query_host($hop, 0); + } else { + die("Cannot traceroute to 8.8.8.8 and 89.233.43.71"); + } + } + my $p = Net::Ping->new(); + $p->hires(); + + return bless { + 'hop' => $hop, + 'host' => $host, + 'pinger' => $p, + 'remedian_idx' => 0, + 'remedian_arr' => [], + 'remedian' => undef, + }, ref($class) || $class; + } + + sub ping { + my $self = shift; + for(1..3) { + # Ping should never take longer than 5.5 sec + my ($ret, $duration, $ip) = + $self->{'pinger'}->ping($self->{'host'}, 5.5); + if($ret) { + return $duration; + } + } + warn("Ping failed 3 times."); + } + + sub remedian { + my $self = shift; + return $self->{'remedian'}; + } + + sub set_remedian { + # Set median of the last 999^3 (=997002999) values using Remedian + # + # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A + # robust averaging method for large data sets." Journal of the + # American Statistical Association 85.409 (1990): 97-104. + my $self = shift; + my $val = shift; + my $i = $self->{'remedian_idx'}++; + my $rref = $self->{'remedian_arr'}; + $rref->[0][$i%999] = $val; + $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; + $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; + $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; + } + }; + return "perl -e ".shell_quote_scalar($perlscript)." $hops"; +} + + +package Process; + +sub new { + my $class = shift; + my $nice = shift; + my @ARGV = @_; + if($nice) { + unshift(@ARGV, "nice", "-n", $nice); + } + return bless { + 'running' => 0, # Is the process running now? + 'command' => [@ARGV], + }, ref($class) || $class; +} + +sub pgrp { + my $self = shift; + my @pgrp; + if(not $self->{'pgrp'}) { + for(@{$self->{'pids'}}) { + push @pgrp,-getpgrp($_); + } + @pgrp = ::uniq(@pgrp); + @{$self->{'pgrp'}} = @pgrp; + } + return @{$self->{'pgrp'}}; +} + +sub set_pid { + my $self = shift; + push(@{$self->{'pids'}},@_); + $self->{'running'} = 1; + $::exitstatus = 0; +} + +sub start { + # Start the program + my $self = shift; + ::debug("Starting @{$self->{'command'}}\n"); + $self->{'running'} = 1; + if($self->{'pid'} = fork) { + # set signal handler to kill children if parent is killed + push @{$self->{'pids'}}, $self->{'pid'}; + $Global::process = $self; + $SIG{CHLD} = \&REAPER; + $SIG{INT}=\&kill_child_INT; + $SIG{TSTP}=\&kill_child_TSTP; + $SIG{CONT}=\&kill_child_CONT; + sleep 1; # Give child time to setpgrp(0,0); + } else { + setpgrp(0,0); + ::debug("Child pid: $$, pgrp: ",getpgrp $$,"\n"); + ::debug("@{$self->{'command'}}\n"); + if($opt::quote) { + system(@{$self->{'command'}}); + } else { + system("@{$self->{'command'}}"); + } + $::exitstatus = $? >> 8; + $::exitsignal = $? & 127; + ::debug("Child exit $::exitstatus\n"); + exit($::exitstatus); + } +} + +use POSIX ":sys_wait_h"; +use POSIX qw(:sys_wait_h); + +sub REAPER { + my $stiff; + while (($stiff = waitpid(-1, &WNOHANG)) > 0) { + # do something with $stiff if you want + $::exitstatus = $? >> 8; + $::exitsignal = $? & 127; + } + $SIG{CHLD} = \&REAPER; # install *after* calling waitpid +} + + +sub kill_child_CONT { + my $self = $Global::process; + ::debug("SIGCONT received. Killing @{$self->{'pgrp'}}\n"); + kill CONT => $self->pgrp(); +} + + +sub kill_child_TSTP { + my $self = $Global::process; + ::debug("SIGTSTP received. Killing $self->{'pid'} and self ($$)\n"); + kill TSTP => $self->pgrp(); + kill STOP => -$$; + kill STOP => $$; +} + + +sub kill_child_INT { + my $self = $Global::process; + ::debug("SIGINT received.\n"); + if(not @opt::pid) { + ::debug("Killing $self->{'pid'} Exit\n"); + kill INT => $self->pgrp(); + } else { + ::debug("Continue pids $self->{'pid'} Exit\n"); + kill CONT => $self->pgrp(); + } + exit; +} + + +sub resume { + my $self = shift; + ::debug("Resume @{$self->{'pids'}}\n"); + if(not $self->{'running'}) { + # - = PID group + map { kill "CONT", -$_ } @{$self->{'pids'}}; + # If using -p it is not in a group + map { kill "CONT", $_ } @{$self->{'pids'}}; + $self->{'running'} = 1; + } +} + + +sub suspend { + my $self = shift; + ::debug("Suspend @{$self->{'pids'}}\n"); + if($self->{'running'}) { + # - = PID group + map { kill "STOP", -$_ } @{$self->{'pids'}}; + # If using -p it is not in a group + map { kill "STOP", $_ } @{$self->{'pids'}}; + $self->{'running'} = 0; + } +} + + +sub is_alive { + # The process is dead if none of the pids exist + my $self = shift; + my ($exists) = 0; + for my $pid (@{$self->{'pids'}}) { + if(kill 0 => $pid) { $exists++ } + } + ::debug("is_alive: $exists\n"); + return $exists; +} + + +package Limit; + +sub new { + my $class = shift; + my %limits = @_; + my $hard = $opt::soft ? 0 : $opt::hard; + my $runio = $opt::run_io ? ::multiply_binary_prefix($opt::run_io) : 0; + my $startio = $opt::start_io ? ::multiply_binary_prefix($opt::start_io) : 0; + my $runload = $opt::run_load ? ::multiply_binary_prefix($opt::run_load) : 0; + my $startload = $opt::start_load ? ::multiply_binary_prefix($opt::start_load) : 0; + my $runmem = $opt::run_mem ? ::multiply_binary_prefix($opt::run_mem) : 0; + my $startmem = $opt::start_mem ? ::multiply_binary_prefix($opt::start_mem) : 0; + my $runnoswap = $opt::run_noswap ? ::multiply_binary_prefix($opt::run_noswap) : 0; + my $startnoswap = $opt::start_noswap ? ::multiply_binary_prefix($opt::start_noswap) : 0; + my $recheck = $opt::recheck ? ::multiply_binary_prefix($opt::recheck) : 1; # Default + my $runtime = $opt::suspend ? ::multiply_binary_prefix($opt::suspend) : 1; # Default + + return bless { + 'hard' => $hard, + 'recheck' => $recheck, + 'runio' => $runio, + 'startio' => $startio, + 'runload' => $runload, + 'startload' => $startload, + 'runmem' => $runmem, + 'startmem' => $startmem, + 'runnoswap' => $runnoswap, + 'startnoswap' => $startnoswap, + 'factor' => $opt::factor || 1, + 'recheck' => $recheck, + 'runtime' => $runtime, + 'over_run_limit' => 1, + 'over_start_limit' => 1, + 'verbose' => $opt::verbose, + }, ref($class) || $class; +} + + +sub over_run_limit { + my $self = shift; + my $status = 0; + if($self->{'runmem'}) { + # mem should be between 0-10ish + # 100% available => 0 (1-1) + # 50% available => 1 (2-1) + # 10% available => 9 (10-1) + my $mem = $self->mem_status(); + ::debug("Run memory: $self->{'runmem'}/$mem\n"); + $status += (::max(1,$self->{'runmem'}/$mem)-1); + } + if($self->{'runload'}) { + # If used with other limits load should be between 0-10ish + no warnings 'numeric'; + my $load = $self->load_status(); + if($self->{'runload'} > 0) { + # Stop if the load is above the limit + $status += ::max(0,$load - $self->{'runload'}); + } else { + # Stop if the load is below the limit (for sensor) + $status += ::max(0,-$load - $self->{'runload'}); + } + } + if($self->{'runnoswap'}) { + # swap should be between 0-10ish + # swap in or swap out or no swap = 0 + # else log(swapin*swapout) + my $swap = $self->swap_status(); + $status += log(::max(1, $swap - $self->{'runnoswap'})); + } + if($self->{'runio'}) { + my $io = $self->io_status(); + $status += ::max(0,$io - $self->{'runio'}); + } + $self->{'over_run_limit'} = $status; + if(not $opt::recheck) { + $self->{'recheck'} = $self->{'factor'} * $self->{'over_run_limit'}; + } + ::debug("over_run_limit: $status\n"); + return $self->{'over_run_limit'}; +} + +sub over_start_limit { + my $self = shift; + my $status = 0; + if($self->{'startmem'}) { + # mem should be between 0-10ish + # 100% available => 0 (1-1) + # 50% available => 1 (2-1) + # 10% available => 9 (10-1) + my $mem = $self->mem_status(); + ::debug("Start memory: $self->{'startmem'}/$mem\n"); + $status += (::max(1,$self->{'startmem'}/$mem)-1); + } + if($self->{'startload'}) { + # load should be between 0-10ish + # 0 load => 0 + no warnings 'numeric'; + my $load = $self->load_status(); + if($self->{'startload'} > 0) { + # Stop if the load is above the limit + $status += ::max(0,$load - $self->{'startload'}); + } else { + # Stop if the load is below the limit (for sensor) + $status += ::max(0,-$load - $self->{'startload'}); + } + } + if($self->{'startnoswap'}) { + # swap should be between 0-10ish + # swap in or swap out or no swap = 0 + # else log(swapin*swapout) + my $swap = $self->swap_status(); + $status += log(::max(1, $swap - $self->{'startnoswap'})); + } + if($self->{'startio'}) { + my $io = $self->io_status(); + $status += ::max(0,$io - $self->{'startio'}); + } + $self->{'over_start_limit'} = $status; + if(not $opt::recheck) { + $self->{'recheck'} = $self->{'factor'} * $self->{'over_start_limit'}; + } + ::debug("over_start_limit: $status\n"); + return $self->{'over_start_limit'}; +} + + +sub hard { + my $self = shift; + return $self->{'hard'}; +} + + +sub verbose { + my $self = shift; + return $self->{'verbose'}; +} + + +sub sleep_for_recheck { + my $self = shift; + if($self->{'recheck'} < 0.01) { + # Never sleep less than 0.01 sec + $self->{'recheck'} = 0.01; + } + if($self->verbose()) { + $self->{'recheck'} = int($self->{'recheck'}*100)/100; + print STDERR "Sleeping $self->{'recheck'}s\n"; + } + ::debug("recheck in $self->{'recheck'}s\n"); + ::usleep(1000*$self->{'recheck'}); +} + + +sub sleep_while_running { + my $self = shift; + ::debug("check in $self->{'runtime'}s\n"); + if($self->verbose()) { + $self->{'runtime'} = int($self->{'runtime'}*100)/100; + print STDERR "Running $self->{'runtime'}s\n"; + } + ::usleep(1000*$self->{'runtime'}); +} + + +sub nonblockGetLines { + # An non-blocking filehandle read that returns an array of lines read + # Returns: ($eof,@lines) + # Example: --sensor 'vmstat 1 | perl -ane '\''$|=1; 4..0 and print $F[8],"\n"'\' + my ($fh,$timeout) = @_; + + $timeout = 0 unless defined $timeout; + my $rfd = ''; + $::nonblockGetLines_last{$fh} = '' + unless defined $::nonblockGetLines_last{$fh}; + + vec($rfd,fileno($fh),1) = 1; + return unless select($rfd, undef, undef, $timeout)>=0; + # I'm not sure the following is necessary? + return unless vec($rfd,fileno($fh),1); + my $buf = ''; + my $n = sysread($fh,$buf,1024*1024); + + my $eof = eof($fh); + # If we're done, make sure to send the last unfinished line + return ($eof,$::nonblockGetLines_last{$fh}) unless $n; + # Prepend the last unfinished line + $buf = $::nonblockGetLines_last{$fh}.$buf; + # And save any newly unfinished lines + $::nonblockGetLines_last{$fh} = + (substr($buf,-1) !~ /[\r\n]/ && $buf =~ s/([^\r\n]*)$//) + ? $1 : ''; + $buf ? ($eof,split(/\n/,$buf)) : ($eof); +} + +sub read_sensor { + my $self = shift; + ::debug("read_sensor: "); + my $fh = $self->{'sensor_fh'}; + if(not $fh) { + # Start the sensor + $self->{'sensor_pid'} = + open($fh, "-|", $opt::sensor) || + ::die_bug("Cannot open: $opt::sensor"); + $self->{'sensor_fh'} = $fh; + } + # Read as much as we can (non_block) + my ($eof,@lines) = nonblockGetLines($fh); + + # new load = last full line + foreach my $line (@lines) { + if(defined $line) { + ::debug("Pipe saw: [$line] eof=$eof\n"); + $Global::last_sensor_reading = $line; + } + } + if($eof) { + # End of file => Restart the sensor + close $fh; +# waitpid($self->{'sensor_pid'},0); + $self->{'sensor_pid'} = + open($fh, "-|", $opt::sensor) || + ::die_bug("Cannot open: $opt::sensor"); + $self->{'sensor_fh'} = $fh; + } + + return $Global::last_sensor_reading; +} + +sub load_status { + # Returns: + # loadavg or sensor measurement + my $self = shift; + + if($opt::sensor) { + if(not defined $self->{'load_status'} or + $self->{'load_status_cache_time'} + $self->{'recheck'} < time) { + $self->{'load_status'} = $self->read_sensor(); + while (not defined $self->{'load_status'}) { + sleep 1; + $self->{'load_status'} = $self->read_sensor(); + } + $self->{'load_status_cache_time'} = time - 0.001; + } + } else { + # Normal load avg + # Cache for some seconds + if(not defined $self->{'load_status'} or + $self->{'load_status_cache_time'} + $self->{'recheck'} < time) { + $self->{'load_status'} = load_status_linux() if $^O ne 'darwin'; + $self->{'load_status'} = load_status_darwin() if $^O eq 'darwin'; + $self->{'load_status_cache_time'} = time; + } + } + ::debug("load_status: ".$self->{'load_status'}."\n"); + return $self->{'load_status'}; +} + +sub undef_as_zero { + my $a = shift; + return $a ? $a : 0; +} + + +sub load_status_linux { + my ($loadavg); + if(open(IN,"/proc/loadavg")) { + # Linux specific (but fast) + my $upString = <IN>; + if($upString =~ m/^(\d+\.\d+)/) { + $loadavg = $1; + } else { + ::die_bug("proc_loadavg"); + } + close IN; + } elsif (open(IN,"LANG=C uptime|")) { + my $upString = <IN>; + if($upString =~ m/averages?.\s*(\d+\.\d+)/) { + $loadavg = $1; + } else { + ::die_bug("uptime"); + } + close IN; + } + return $loadavg; +} + +sub load_status_darwin { + my $loadavg = `sysctl vm.loadavg`; + if($loadavg =~ /vm\.loadavg: \{ ([0-9.]+) ([0-9.]+) ([0-9.]+) \}/) { + $loadavg = $1; + } elsif (open(IN,"LANG=C uptime|")) { + my $upString = <IN>; + if($upString =~ m/averages?.\s*(\d+\.\d+)/) { + $loadavg = $1; + } else { + ::die_bug("uptime"); + } + close IN; + } + return $loadavg; +} + + +sub swap_status { + # Returns: + # (swap in)*(swap out) kb + my $self = shift; + my $status; + # Cache for some seconds + if(not defined $self->{'swap_status'} or + $self->{'swap_status_cache_time'}+$self->{'recheck'} < time) { + $status = swap_status_linux() if $^O ne 'darwin'; + $status = swap_status_darwin() if $^O eq 'darwin'; + $self->{'swap_status'} = ::max($status,0); + $self->{'swap_status_cache_time'} = time; + } + ::debug("swap_status: $self->{'swap_status'}\n"); + return $self->{'swap_status'}; +} + + +sub swap_status_linux { + my $swap_activity; + $swap_activity = "vmstat 1 2 | tail -n1 | awk '{print \$7*\$8}'"; + # Run swap_activity measuring. + return qx{ $swap_activity }; +} + +sub swap_status_darwin { + # Mach Virtual Memory Statistics: (page size of 4096 bytes, cache hits 0%) + # free active spec inactive wire faults copy 0fill reactive pageins pageout + # 298987 251463 162637 69437 265724 29730558 299022 2308237 1 110058 0 + # 298991 251479 162637 69437 265726 43 4 16 0 0 0 + my ($pagesize, $pageins, $pageouts); + my @vm_stat = `vm_stat 1 | head -n4`; + $pagesize = $1 if $vm_stat[0] =~ m/page size of (\d+) bytes/; + $pageins = (split(/\s+/,$vm_stat[3]))[9]; + $pageouts = (split(/\s+/,$vm_stat[3]))[10]; + return ($pageins*$pageouts*$pagesize)/1024; +} + + +sub mem_status { + # Returns: + # number of bytes (free+cache) + my $self = shift; + # Cache for one second + if(not defined $self->{'mem_status'} or + $self->{'mem_status_cache_time'}+$self->{'recheck'} < time) { + $self->{'mem_status'} = mem_status_linux() if $^O ne 'darwin'; + $self->{'mem_status'} = mem_status_darwin() if $^O eq 'darwin'; + $self->{'mem_status_cache_time'} = time; + } + ::debug("mem_status: $self->{'mem_status'}\n"); + return $self->{'mem_status'}; +} + + +sub mem_status_linux { + # total used free shared buffers cached + # Mem: 3366496 2901664 464832 0 179228 1850692 + # -/+ buffers/cache: 871744 2494752 + # Swap: 6445476 1396860 5048616 + my @free = `free`; + my $free = (split(/\s+/,$free[2]))[3]; + return $free*1024; +} + +sub mem_status_darwin { + # Mach Virtual Memory Statistics: (page size of 4096 bytes, cache hits 0%) + # free active spec inactive wire faults copy 0fill reactive pageins pageout + # 298987 251463 162637 69437 265724 29730558 299022 2308237 1 110058 0 + # 298991 251479 162637 69437 265726 43 4 16 0 0 0 + my ($pagesize, $pages_free, $pages_speculative); + my @vm_stat = `vm_stat 1 | head -n4`; + $pagesize = $1 if $vm_stat[0] =~ m/page size of (\d+) bytes/; + $pages_free = (split(/\s+/,$vm_stat[3]))[0]; + $pages_speculative = (split(/\s+/,$vm_stat[3]))[2]; + return ($pages_free+$pages_speculative)*$pagesize; +} + + +sub io_status { + # Returns: + # max percent for all devices + my $self = shift; + # Cache for one second + if(not defined $self->{'io_status'} or + $self->{'io_status_cache_time'}+$self->{'recheck'} < time) { + $self->{'io_status'} = io_status_linux() if $^O ne 'darwin'; + $self->{'io_status'} = io_status_darwin() if $^O eq 'darwin'; + $self->{'io_status_cache_time'} = time; + } + ::debug("io_status: $self->{'io_status'}\n"); + return $self->{'io_status'}; +} + + +sub io_status_linux { + # Device rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util + # sda 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + my @iostat_out = `LANG=C iostat -x 1 2`; + # throw away all execpt the last Device-section + my @iostat; + for(reverse @iostat_out) { + /Device/ and last; + my @col = (split(/\s+/,$_)); + # Util% is last column + push @iostat, pop @col; + } + my $io = ::max(@iostat); + return undef_as_zero($io)/10; +} + +sub io_status_darwin { + # disk0 disk1 disk2 + # KB/t tps MB/s KB/t tps MB/s KB/t tps MB/s + # 14.95 15 0.22 11.18 35 0.38 2.00 0 0.00 + # 0.00 0 0.00 0.00 0 0.00 0.00 0 0.00 + my @iostat_out = `LANG=C iostat -d -w 1 -c 2`; + # return the MB/s of the last second (not the %util) + my @iostat = split(/\s+/, $iostat_out[3]); + my $io = $iostat[3] + $iostat[6] + $iostat[9]; + return ::min($io, 10); +} + +$::exitsignal = $::exitstatus = 0; # Dummy diff --git a/src/niceload.pod b/src/niceload.pod new file mode 100644 index 0000000..75656f3 --- /dev/null +++ b/src/niceload.pod @@ -0,0 +1,433 @@ +#!/usr/bin/perl -w + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=head1 NAME + +niceload - slow down a program when the load average is above a certain limit + +=head1 SYNOPSIS + +B<niceload> [-v] [-h] [-n nice] [-I io] [-L load] [-M mem] [-N] +[--sensor program] [-t time] [-s time|-f factor] +( command | -p PID [-p PID ...] | --prg program ) + + +=head1 DESCRIPTION + +GNU B<niceload> will slow down a program when the load average (or +other system activity) is above a certain limit. When the limit is +reached the program will be suspended for some time. Then resumed +again for some time. Then the load average is checked again and we +start over. + +Instead of load average B<niceload> can also look at disk I/O, amount +of free memory, or swapping activity. + +If the load is 3.00 then the default settings will run a program +like this: + +run 1 second, suspend (3.00-1.00) seconds, run 1 second, suspend +(3.00-1.00) seconds, run 1 second, ... + +=head1 OPTIONS + +=over 9 + +=item B<-B> + +=item B<--battery> + +Suspend if the system is running on battery. Shorthand for: +-l -1 --sensor 'cat /sys/class/power_supply/BAT0/status +/proc/acpi/battery/BAT0/state 2>/dev/null | grep -i -q discharging; +echo $?' + + +=item B<-f> I<FACTOR> + +=item B<--factor> I<FACTOR> + +Suspend time factor. Dynamically set B<-s> as amount over limit * +factor. Default is 1. + + +=item B<-H> + +=item B<--hard> + +Hard limit. B<--hard> will suspend the process until the system is +under the limits. The default is B<--soft>. + + +=item B<--io> I<iolimit> + +=item B<-I> I<iolimit> + +Limit for I/O. The amount of disk I/O will be computed as a value 0 - +10, where 0 is no I/O and 10 is at least one disk is 100% saturated. + +B<--io> will set both B<--start-io> and B<--run-io>. + + +=item B<--load> I<loadlimit> + +=item B<-L> I<loadlimit> + +Limit for load average. + +B<--load> will set both B<--start-load> and B<--run-load>. + + +=item B<--mem> I<memlimit> + +=item B<-M> I<memlimit> + +Limit for free memory. This is the amount of bytes available as free ++ cache. This limit is treated opposite other limits: If the system +is above the limit the program will run, if it is below the limit the +program will stop + +I<memlimit> can be postfixed with K, M, G, T, or P which would +multiply the size with 1024, 1048576, 1073741824, or 1099511627776 +respectively. + +B<--mem> will set both B<--start-mem> and B<--run-mem>. + + +=item B<--noswap> + +=item B<-N> + +No swapping. If the system is swapping both in and out it is a good +indication that the system is memory stressed. + +B<--noswap> is over limit if the system is swapping both in and out. + +B<--noswap> will set both B<--start-noswap> and B<--run-noswap>. + + +=item B<--net> + +Shorthand for B<--nethops 3>. + + +=item B<--nethops> I<h> + +Network nice. Pause if the internet connection is overloaded. + +B<niceload> finds a router I<h> hops closer to the internet. It +B<ping>s this every second. If the latency is more than 50% bigger +than the median, it is regarded as being over the limit. + +B<--nethops> can be combined with B<--hard>. Without B<--hard> the +program may be able to queue up so much traffic that it will take +longer than the B<--suspend> time to clear it. B<--hard> is useful for +traffic that does not break by being suspended for a longer time. + +B<--nethops> can be combined with a high B<--suspend>. This way a +program can be allowed to do a bit of traffic now and then. This is +useful to keep the connection alive. + + +=item B<-n> I<niceness> + +=item B<--nice> I<niceness> + +Sets niceness. See B<nice>(1). + + +=item B<-p> I<PID>[,I<PID>] + +=item B<--pid> I<PID>[,I<PID>] + +Process IDs of processes to suspend. You can specify multiple process +IDs with multiple B<-p> I<PID> or by separating the PIDs with comma. + + +=item B<--prg> I<program> + +=item B<--program> I<program> + +Name of running program to suspend. You can specify multiple programs +with multiple B<--prg> I<program>. If no processes with the name +I<program> is found, B<niceload> with search for substrings containing +I<program>. + + +=item B<--quote> + +=item B<-q> + +Quote the command line. Useful if the command contains chars like *, +$, >, and " that should not be interpreted by the shell. + + +=item B<--run-io> I<iolimit> + +=item B<--ri> I<iolimit> + +=item B<--run-load> I<loadlimit> + +=item B<--rl> I<loadlimit> + +=item B<--run-mem> I<memlimit> + +=item B<--rm> I<memlimit> + +Run limit. The running program will be slowed down if the system is +above the limit. See: B<--io>, B<--load>, B<--mem>, B<--noswap>. + + +=item B<--sensor> I<sensor program> + +Read sensor. Use I<sensor program> to read a sensor. + +This will keep the CPU temperature below 80 deg C on GNU/Linux: + + niceload -l 80000 -f 0.001 --sensor 'sort -n /sys/devices/platform/coretemp*/temp*_input' gzip * + +This will stop if the disk space < 100000. + + niceload -H -l -100000 --sensor "df . | awk '{ print \$4 }'" echo + + +=item B<--start-io> I<iolimit> + +=item B<--si> I<iolimit> + +=item B<--start-load> I<loadlimit> + +=item B<--sl> I<loadlimit> + +=item B<--start-mem> I<memlimit> + +=item B<--sm> I<memlimit> + +Start limit. The program will not start until the system is below the +limit. See: B<--io>, B<--load>, B<--mem>, B<--noswap>. + + +=item B<--soft> + +=item B<-S> + +Soft limit. B<niceload> will suspend a process for a while and then +let it run for a second thus only slowing down a process while the +system is over one of the given limits. This is the default. + + +=item B<--suspend> I<SEC> + +=item B<-s> I<SEC> + +Suspend time. Suspend the command this many seconds when the max load +average is reached. + + +=item B<--recheck> I<SEC> + +=item B<-t> I<SEC> + +Recheck load time. Sleep SEC seconds before checking load +again. Default is 1 second. + + +=item B<--verbose> + +=item B<-v> + +Verbose. Print some extra output on what is happening. Use B<-v> until +you know what your are doing. + +=back + +=head1 EXAMPLE: See niceload in action + +In terminal 1 run: top + +In terminal 2 run: + +B<niceload -q perl -e '$|=1;do{$l==$r or print "."; $l=$r}until(($r=time-$^T)>>B<50)'> + +This will print a '.' every second for 50 seconds and eat a lot of +CPU. When the load rises to 1.0 the process is suspended. + + +=head1 EXAMPLE: Run updatedb + +Running B<updatedb> can often starve the system for disk I/O and thus result in a high load. + +Run B<updatedb> but suspend B<updatedb> if the load is above 2.00: + +B<niceload -L 2 updatedb> + + +=head1 EXAMPLE: Run rsync + +B<rsync> can, just like B<updatedb>, starve the system for disk I/O +and thus result in a high load. + +Run B<rsync> but keep load below 3.4. If load reaches 7 sleep for +(7-3.4)*12 seconds: + +B<niceload -L 3.4 -f 12 rsync -Ha /home/ /backup/home/> + + +=head1 EXAMPLE: Ensure enough disk cache + +Assume the program B<foo> uses 2 GB files intensively. B<foo> will run +fast if the files are in disk cache and be slow as a crawl if they are +not in the cache. + +To ensure 2 GB are reserved for disk cache run: + +B<niceload --hard --run-mem 2g foo> + +This will not guarantee that the 2 GB memory will be used for the +files for B<foo>, but it will stop B<foo> if the memory for disk cache +is too low. + + +=head1 ENVIRONMENT VARIABLES + +None. In future versions $NICELOAD will be able to contain default settings. + +=head1 EXIT STATUS + +Exit status should be the same as the command being run (untested). + +=head1 REPORTING BUGS + +Report bugs to <bug-parallel@gnu.org>. + +=head1 AUTHOR + +Copyright (C) 2004-11-19 Ole Tange, http://ole.tange.dk + +Copyright (C) 2005-2010 Ole Tange, http://ole.tange.dk + +Copyright (C) 2010-2022 Ole Tange, http://ole.tange.dk and Free +Software Foundation, Inc. + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this +documentation under the terms of the GNU Free Documentation License, +Version 1.3 or any later version published by the Free Software +Foundation; with no Invariant Sections, with no Front-Cover Texts, and +with no Back-Cover Texts. A copy of the license is included in the +file LICENSES/GFDL-1.3-or-later.txt. + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B<to Share> + +to copy, distribute and transmit the work + +=item B<to Remix> + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B<Attribution> + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B<Share Alike> + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B<Waiver> + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B<Public Domain> + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B<Other Rights> + +In no way are any of the following rights affected by the license: + +=over 2 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=back + +=over 9 + +=item B<Notice> + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as +LICENCES/CC-BY-SA-4.0.txt + + +=head1 DEPENDENCIES + +GNU B<niceload> uses Perl, and the Perl modules POSIX, and +Getopt::Long. + +=head1 SEE ALSO + +B<parallel>(1), B<nice>(1), B<uptime>(1) + +=cut diff --git a/src/parallel b/src/parallel new file mode 100755 index 0000000..e0b654e --- /dev/null +++ b/src/parallel @@ -0,0 +1,14979 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +# open3 used in Job::start +use IPC::Open3; +use POSIX; +# gensym used in Job::start +use Symbol qw(gensym); +# tempfile used in Job::start +use File::Temp qw(tempfile tempdir); +# mkpath used in openresultsfile +use File::Path; +# GetOptions used in get_options_from_array +use Getopt::Long; +# Used to ensure code quality +use strict; +use File::Basename; + +sub set_input_source_header($$) { + my ($command_ref,$input_source_fh_ref) = @_; + if(defined $opt::header and not $opt::pipe) { + # split with colsep or \t + # $header force $colsep = \t if undef? + my $delimiter = defined $opt::colsep ? $opt::colsep : "\t"; + # regexp for {= + my $left = "\Q$Global::parensleft\E"; + my $l = $Global::parensleft; + # regexp for =} + my $right = "\Q$Global::parensright\E"; + my $r = $Global::parensright; + if($opt::header ne "0") { + my $id = 1; + for my $fh (@$input_source_fh_ref) { + my $line = <$fh>; + chomp($line); + $line =~ s/\r$//; + ::debug("init", "Delimiter: '$delimiter'"); + for my $s (split /$delimiter/o, $line) { + ::debug("init", "Colname: '$s'"); + # Replace {colname} with {2} + for(@$command_ref, @Global::ret_files, + @Global::transfer_files, $opt::tagstring, + $opt::workdir, $opt::results, $opt::retries, + @Global::template_contents, @Global::template_names, + @opt::filter) { + # Skip if undefined + $_ or next; + s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; + # {=header1 ... =} => {=1 ... =} + s:$left $s (.*?) $right:$l$id$1$r:gx; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } + } + # Make it possible to do: + # parallel --header 0 echo {file2} {file1} :::: file1 file2 + my $id = 1; + for my $s (@opt::a) { + # ::: are put into files and given a filehandle + # ignore these and only keep the filenames. + fileno $s and next; + for(@$command_ref, @Global::ret_files, + @Global::transfer_files, $opt::tagstring, + $opt::workdir, $opt::results, $opt::retries, + @Global::template_contents, @Global::template_names, + @opt::filter) { + # Skip if undefined + $_ or next; + s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; + # {=header1 ... =} => {=1 ... =} + s:$left $s (.*?) $right:$l$id$1$r:gx; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } else { + my $id = 1; + for my $fh (@$input_source_fh_ref) { + $Global::input_source_header{$id} = $id; + $id++; + } + } +} + +sub max_jobs_running() { + # Compute $Global::max_jobs_running as the max number of jobs + # running on each sshlogin. + # Returns: + # $Global::max_jobs_running + if(not $Global::max_jobs_running) { + for my $sshlogin (values %Global::host) { + $sshlogin->max_jobs_running(); + } + } + if(not $Global::max_jobs_running) { + ::error("Cannot run any jobs."); + wait_and_exit(255); + } + return $Global::max_jobs_running; +} + +sub halt() { + # Compute exit value, + # wait for children to complete + # and exit + if($opt::halt and $Global::halt_when ne "never") { + if(not defined $Global::halt_exitstatus) { + if($Global::halt_pct) { + $Global::halt_exitstatus = + ::ceil($Global::total_failed / + ($Global::total_started || 1) * 100); + } elsif($Global::halt_count) { + $Global::halt_exitstatus = + ::min(undef_as_zero($Global::total_failed),101); + } + } + wait_and_exit($Global::halt_exitstatus); + } else { + if($Global::semaphore) { + # --semaphore runs a single job: + # Use exit value of that + wait_and_exit($Global::halt_exitstatus); + } else { + # 0 = all jobs succeeded + # 1-100 = n jobs failed + # 101 = >100 jobs failed + wait_and_exit(min(undef_as_zero($Global::exitstatus),101)); + } + } +} + + +sub __PIPE_MODE__() {} + + +sub pipepart_setup() { + # Compute the blocksize + # Generate the commands to extract the blocks + # Push the commands on queue + # Changes: + # @Global::cat_prepends + # $Global::JobQueue + if($opt::tee) { + # Prepend each command with + # < file + my $cat_string = "< ".Q($opt::a[0]); + for(1..$Global::JobQueue->total_jobs()) { + push @Global::cat_appends, $cat_string; + push @Global::cat_prepends, ""; + } + } else { + if(not $opt::blocksize) { + # --blocksize with 10 jobs per jobslot + $opt::blocksize = -10; + } + if($opt::roundrobin) { + # --blocksize with 1 job per jobslot + $opt::blocksize = -1; + } + if($opt::blocksize < 0) { + my $size = 0; + # Compute size of -a + for(@opt::a) { + if(-f $_) { + $size += -s $_; + } elsif(-b $_) { + $size += size_of_block_dev($_); + } elsif(-e $_) { + ::error("$_ is neither a file nor a block device"); + wait_and_exit(255); + } else { + ::error("File not found: $_"); + wait_and_exit(255); + } + } + # Run in total $job_slots*(- $blocksize) jobs + # Set --blocksize = size / no of proc / (- $blocksize) + $Global::dummy_jobs = 1; + $Global::blocksize = 1 + + int($size / max_jobs_running() / + -multiply_binary_prefix($opt::blocksize)); + } + @Global::cat_prepends = (map { pipe_part_files($_) } + # ::: are put into files and given a filehandle + # ignore these and only keep the filenames. + grep { ! fileno $_ } @opt::a); + # Unget the empty arg as many times as there are parts + $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget( + map { [Arg->new("\0noarg")] } @Global::cat_prepends + ); + } +} + +sub pipe_tee_setup() { + # Create temporary fifos + # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background + # This will spread the input to fifos + # Generate commands that reads from fifo1..N: + # cat fifo | user_command + # Changes: + # @Global::cat_prepends + my @fifos; + for(1..$Global::JobQueue->total_jobs()) { + push @fifos, tmpfifo(); + } + # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null + if(not fork()){ + # Test if tee supports --output-error=warn-nopipe + `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`; + my $opt = $? ? "" : "--output-error=warn-nopipe"; + ::debug("init","tee $opt"); + if($opt::dryrun) { + # This is not exactly what is run, but it gives the basic idea + print "mkfifo @fifos\n"; + print "tee $opt @fifos >/dev/null &\n"; + } else { + # Let tee inherit our stdin + # and redirect stdout to null + open STDOUT, ">","/dev/null"; + if($opt) { + exec "tee", $opt, @fifos; + } else { + exec "tee", @fifos; + } + } + exit(0); + } + # For each fifo + # (rm fifo1; grep 1) < fifo1 + # (rm fifo2; grep 2) < fifo2 + # (rm fifo3; grep 3) < fifo3 + # Remove the tmpfifo as soon as it is open + @Global::cat_prepends = map { "(rm $_;" } @fifos; + @Global::cat_appends = map { ") < $_" } @fifos; +} + + +sub parcat_script() { + # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos + my $script = q'{ + use POSIX qw(:errno_h); + use IO::Select; + use strict; + use threads; + use Thread::Queue; + use Fcntl qw(:DEFAULT :flock); + + my $opened :shared; + my $q = Thread::Queue->new(); + my $okq = Thread::Queue->new(); + my @producers; + + if(not @ARGV) { + if(-t *STDIN) { + print "Usage:\n"; + print " parcat file(s)\n"; + print " cat argfile | parcat\n"; + } else { + # Read arguments from stdin + chomp(@ARGV = <STDIN>); + } + } + my $files_to_open = 0; + # Default: fd = stdout + my $fd = 1; + for (@ARGV) { + # --rm = remove file when opened + /^--rm$/ and do { $opt::rm = 1; next; }; + # -1 = output to fd 1, -2 = output to fd 2 + /^-(\d+)$/ and do { $fd = $1; next; }; + push @producers, threads->create("producer", $_, $fd); + $files_to_open++; + } + + sub producer { + # Open a file/fifo, set non blocking, enqueue fileno of the file handle + my $file = shift; + my $output_fd = shift; + open(my $fh, "<", $file) || do { + print STDERR "parcat: Cannot open $file\n"; + exit(1); + }; + # Remove file when it has been opened + if($opt::rm) { + unlink $file; + } + set_fh_non_blocking($fh); + $opened++; + # Pass the fileno to parent + $q->enqueue(fileno($fh),$output_fd); + # Get an OK that the $fh is opened and we can release the $fh + while(1) { + my $ok = $okq->dequeue(); + if($ok == fileno($fh)) { last; } + # Not ours - very unlikely to happen + $okq->enqueue($ok); + } + return; + } + + my $s = IO::Select->new(); + my %buffer; + + sub add_file { + my $infd = shift; + my $outfd = shift; + open(my $infh, "<&=", $infd) || die; + open(my $outfh, ">&=", $outfd) || die; + $s->add($infh); + # Tell the producer now opened here and can be released + $okq->enqueue($infd); + # Initialize the buffer + @{$buffer{$infh}{$outfd}} = (); + $Global::fh{$outfd} = $outfh; + } + + sub add_files { + # Non-blocking dequeue + my ($infd,$outfd); + do { + ($infd,$outfd) = $q->dequeue_nb(2); + if(defined($outfd)) { + add_file($infd,$outfd); + } + } while(defined($outfd)); + } + + sub add_files_block { + # Blocking dequeue + my ($infd,$outfd) = $q->dequeue(2); + add_file($infd,$outfd); + } + + + my $fd; + my (@ready,$infh,$rv,$buf); + do { + # Wait until at least one file is opened + add_files_block(); + while($q->pending or keys %buffer) { + add_files(); + while(keys %buffer) { + @ready = $s->can_read(0.01); + if(not @ready) { + add_files(); + } + for $infh (@ready) { + # There is only one key, namely the output file descriptor + for my $outfd (keys %{$buffer{$infh}}) { + # TODO test if 60800 is optimal (2^17 is used elsewhere) + $rv = sysread($infh, $buf, 60800); + if (!$rv) { + if($! == EAGAIN) { + # Would block: Nothing read + next; + } else { + # Nothing read, but would not block: + # This file is done + $s->remove($infh); + for(@{$buffer{$infh}{$outfd}}) { + syswrite($Global::fh{$outfd},$_); + } + delete $buffer{$infh}; + # Closing the $infh causes it to block + # close $infh; + add_files(); + next; + } + } + # Something read. + # Find \n or \r for full line + my $i = (rindex($buf,"\n")+1); + if($i) { + # Print full line + for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) { + syswrite($Global::fh{$outfd},$_); + } + # @buffer = remaining half line + $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)]; + } else { + # Something read, but not a full line + push @{$buffer{$infh}{$outfd}}, $buf; + } + redo; + } + } + } + } + } while($opened < $files_to_open); + + for (@producers) { + $_->join(); + } + + sub set_fh_non_blocking { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + my $flags; + fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= &O_NONBLOCK; # Add non-blocking to the flags + fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle + } + }'; + return ::spacefree(3, $script); +} + +sub sharder_script() { + my $script = q{ + use B; + # Column separator + my $sep = shift; + # Which columns to shard on (count from 1) + my $col = shift; + # Which columns to shard on (count from 0) + my $col0 = $col - 1; + # Perl expression + my $perlexpr = shift; + my $bins = @ARGV; + # Open fifos for writing, fh{0..$bins} + my $t = 0; + my %fh; + for(@ARGV) { + open $fh{$t++}, ">", $_; + # open blocks until it is opened by reader + # so unlink only happens when it is ready + unlink $_; + } + if($perlexpr) { + my $subref = eval("sub { no strict; no warnings; $perlexpr }"); + while(<STDIN>) { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + { + local $_ = $F[$col0]; + &$subref(); + $fh = $fh{ hex(B::hash($_))%$bins }; + } + print $fh $_; + } + } else { + while(<STDIN>) { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + $fh = $fh{ hex(B::hash($F[$col0]))%$bins }; + print $fh $_; + } + } + # Close all open fifos + close values %fh; + }; + return ::spacefree(1, $script); +} + +sub binner_script() { + my $script = q{ + use B; + # Column separator + my $sep = shift; + # Which columns to shard on (count from 1) + my $col = shift; + # Which columns to shard on (count from 0) + my $col0 = $col - 1; + # Perl expression + my $perlexpr = shift; + my $bins = @ARGV; + # Open fifos for writing, fh{0..$bins} + my $t = 0; + my %fh; + # Let the last output fifo be the 0'th + open $fh{$t++}, ">", pop @ARGV; + for(@ARGV) { + open $fh{$t++}, ">", $_; + # open blocks until it is opened by reader + # so unlink only happens when it is ready + unlink $_; + } + if($perlexpr) { + my $subref = eval("sub { no strict; no warnings; $perlexpr }"); + while(<STDIN>) { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + { + local $_ = $F[$col0]; + &$subref(); + $fh = $fh{ $_%$bins }; + } + print $fh $_; + } + } else { + while(<STDIN>) { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + $fh = $fh{ $F[$col0]%$bins }; + print $fh $_; + } + } + # Close all open fifos + close values %fh; + }; + return ::spacefree(1, $script); +} + +sub pipe_shard_setup() { + # Create temporary fifos + # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background + # This will spread the input to fifos + # Generate commands that reads from fifo1..N: + # cat fifo | user_command + # Changes: + # @Global::cat_prepends + my @shardfifos; + my @parcatfifos; + # TODO $opt::jobs should be evaluated (100%) + # TODO $opt::jobs should be number of total_jobs if there are arguments + max_jobs_running(); + my $njobs = $Global::max_jobs_running; + for my $m (0..$njobs-1) { + for my $n (0..$njobs-1) { + # sharding to A B C D + # parcatting all As together + $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo(); + } + } + my $shardbin = ($opt::shard || $opt::bin); + my $script; + if($opt::bin) { + $script = binner_script(); + } else { + $script = sharder_script(); + } + + # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN + + if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) { + # Group by column name + # (Yes, this will also wrongly match a perlexpr like: chop) + my($read,$char,@line); + # A full line, but nothing more (the rest must be read by the child) + # $Global::header used to prepend block to each job + do { + $read = sysread(STDIN,$char,1); + push @line, $char; + } while($read and $char ne "\n"); + $Global::header = join "", @line; + } + my ($col, $perlexpr, $subref) = + column_perlexpr($shardbin, $Global::header, $opt::colsep); + if(not fork()) { + # Let the sharder inherit our stdin + # and redirect stdout to null + open STDOUT, ">","/dev/null"; + # The PERL_HASH_SEED must be the same for all sharders + # so B::hash will return the same value for any given input + $ENV{'PERL_HASH_SEED'} = $$; + exec qw(parallel --block 100k -q --pipe -j), $njobs, + qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","), + $col, $perlexpr, '{}', (map { (':::+', @{$_}) } @parcatfifos); + } + # For each fifo + # (rm fifo1; grep 1) < fifo1 + # (rm fifo2; grep 2) < fifo2 + # (rm fifo3; grep 3) < fifo3 + my $parcat = Q(parcat_script()); + if(not $parcat) { + ::error("'parcat' must be in path."); + ::wait_and_exit(255); + } + @Global::cat_prepends = map { "perl -e $parcat @$_ | " } @parcatfifos; +} + +sub pipe_part_files(@) { + # Given the bigfile + # find header and split positions + # make commands that 'cat's the partial file + # Input: + # $file = the file to read + # Returns: + # @commands that will cat_partial each part + my ($file) = @_; + my $buf = ""; + if(not -f $file and not -b $file) { + ::error("--pipepart only works on seekable files, not streams/pipes.", + "$file is not a seekable file."); + ::wait_and_exit(255); + } + + my $fh = open_or_exit($file); + my $firstlinelen = 0; + if($opt::skip_first_line) { + my $newline; + # Read a full line one byte at a time + while($firstlinelen += sysread($fh,$newline,1,0)) { + $newline eq "\n" and last; + } + } + my $header = find_header(\$buf,$fh); + # find positions + my @pos = find_split_positions($file,int($Global::blocksize), + $header,$firstlinelen); + # Make @cat_prepends + my @cat_prepends = (); + for(my $i=0; $i<$#pos; $i++) { + push(@cat_prepends, + cat_partial($file, $firstlinelen, $firstlinelen+length($header), + $pos[$i], $pos[$i+1])); + } + return @cat_prepends; +} + +sub find_header($$) { + # Compute the header based on $opt::header + # Input: + # $buf_ref = reference to read-in buffer + # $fh = filehandle to read from + # Uses: + # $opt::header + # $Global::blocksize + # $Global::header + # Returns: + # $header string + my ($buf_ref, $fh) = @_; + my $header = ""; + # $Global::header may be set in group_by_loop() + if($Global::header) { return $Global::header } + if($opt::header) { + if($opt::header eq ":") { $opt::header = "(.*\n)"; } + # Number = number of lines + $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; + while(sysread($fh,$$buf_ref,int($Global::blocksize),length $$buf_ref)) { + if($$buf_ref =~ s/^($opt::header)//) { + $header = $1; + last; + } + } + } + return $header; +} + +sub find_split_positions($$$) { + # Find positions in bigfile where recend is followed by recstart + # Input: + # $file = the file to read + # $block = (minimal) --block-size of each chunk + # $header = header to be skipped + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # @positions of block start/end + my($file, $block, $header, $firstlinelen) = @_; + my $skiplen = $firstlinelen + length $header; + my $size = -s $file; + if(-b $file) { + # $file is a blockdevice + $size = size_of_block_dev($file); + } + $block = int $block; + if($opt::groupby) { + return split_positions_for_group_by($file,$size,$block, + $header,$firstlinelen); + } + # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 + # The optimal dd blocksize for freebsd = 2^15..2^17 + # The optimal dd blocksize for ubuntu (AMD6376) = 2^16 + my $dd_block_size = 131072; # 2^17 + my @pos; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $fh = ::open_or_exit($file); + push(@pos,$skiplen); + for(my $pos = $block+$skiplen; $pos < $size; $pos += $block) { + my $buf; + if($recendrecstart eq "") { + # records ends anywhere + push(@pos,$pos); + } else { + # Seek the the block start + if(not sysseek($fh, $pos, 0)) { + ::error("Cannot seek to $pos in $file"); + edit(255); + } + while(sysread($fh,$buf,$dd_block_size,length $buf)) { + if($opt::regexp) { + # If match /$recend$recstart/ => Record position + if($buf =~ m:^(.*$recend)$recstart:os) { + # Start looking for next record _after_ this match + $pos += length($1); + push(@pos,$pos); + last; + } + } else { + # If match $recend$recstart => Record position + # TODO optimize to only look at the appended + # $dd_block_size + len $recendrecstart + # TODO increase $dd_block_size to optimize for longer records + my $i = index64(\$buf,$recendrecstart); + if($i != -1) { + # Start looking for next record _after_ this match + $pos += $i + length($recend); + push(@pos,$pos); + last; + } + } + } + } + } + if($pos[$#pos] != $size) { + # Last splitpoint was not at end of the file: add $size as the last + push @pos, $size; + } + close $fh; + return @pos; +} + +sub split_positions_for_group_by($$$$) { + my($fh); + sub value_at($) { + my $pos = shift; + if($pos != 0) { + seek($fh, $pos-1, 0) || die; + # Read half line + <$fh>; + } + # Read full line + my $linepos = tell($fh); + $_ = <$fh>; + if(defined $_) { + # Not end of file + my @F; + if(defined $group_by::col) { + $opt::colsep ||= "\t"; + @F = split /$opt::colsep/, $_; + $_ = $F[$group_by::col]; + } + eval $group_by::perlexpr; + } + return ($_,$linepos); + } + + sub binary_search_end($$$) { + my ($s,$spos,$epos) = @_; + # value_at($spos) == $s + # value_at($epos) != $s + my $posdif = $epos - $spos; + my ($v,$vpos); + while($posdif) { + ($v,$vpos) = value_at($spos+$posdif); + if($v eq $s) { + $spos = $vpos; + $posdif = $epos - $spos; + } else { + $epos = $vpos; + } + $posdif = int($posdif/2); + } + return($v,$vpos); + } + + sub binary_search_start($$$) { + my ($s,$spos,$epos) = @_; + # value_at($spos) != $s + # value_at($epos) == $s + my $posdif = $epos - $spos; + my ($v,$vpos); + while($posdif) { + ($v,$vpos) = value_at($spos+$posdif); + if($v eq $s) { + $epos = $vpos; + } else { + $spos = $vpos; + $posdif = $epos - $spos; + } + $posdif = int($posdif/2); + } + return($v,$vpos); + } + + my ($file,$size,$block,$header,$firstlinelen) = @_; + my ($a,$b,$c,$apos,$bpos,$cpos); + my @pos; + $fh = open_or_exit($file); + # Set $Global::group_by_column $Global::group_by_perlexpr + group_by_loop($fh,$opt::recsep); + # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos + $apos = $firstlinelen + length $header; + for(($a,$apos) = value_at($apos); $apos < $size;) { + push @pos, $apos; + $bpos = $apos + $block; + ($b,$bpos) = value_at($bpos); + if(eof($fh)) { + push @pos, $size; last; + } + $cpos = $bpos + $block; + ($c,$cpos) = value_at($cpos); + if($a eq $b) { + while($b eq $c) { + # Move bpos, cpos a block forward until $a == $b != $c + $bpos = $cpos; + $cpos += $block; + ($c,$cpos) = value_at($cpos); + if($cpos >= $size) { + $cpos = $size; + last; + } + } + # $a == $b != $c + # Binary search for $b ending between ($bpos,$cpos) + ($b,$bpos) = binary_search_end($b,$bpos,$cpos); + } else { + if($b eq $c) { + # $a != $b == $c + # Binary search for $b starting between ($apos,$bpos) + ($b,$bpos) = binary_search_start($b,$apos,$bpos); + } else { + # $a != $b != $c + # Binary search for $b ending between ($bpos,$cpos) + ($b,$bpos) = binary_search_end($b,$bpos,$cpos); + } + } + ($a,$apos) = ($b,$bpos); + } + if($pos[$#pos] != $size) { + # Last splitpoint was not at end of the file: add it + push @pos, $size; + } + return @pos; +} + +sub cat_partial($@) { + # Efficient command to copy from byte X to byte Y + # Input: + # $file = the file to read + # ($start, $end, [$start2, $end2, ...]) = start byte, end byte + # Returns: + # Efficient command to copy $start..$end, $start2..$end2, ... to stdout + my($file, @start_end) = @_; + my($start, $i); + # Convert (start,end) to (start,len) + my @start_len = map { + if(++$i % 2) { $start = $_; } else { $_-$start } + } @start_end; + # The optimal block size differs + # It has been measured on: + # AMD 6376: n*4k-1; small n + # AMD Neo N36L: 44k-200k + # Intel i7-3632QM: 55k- + # ARM Cortex A53: 4k-28k + # Intel i5-2410M: 36k-46k + # + # I choose 2^15-1 = 32767 + # q{ + # expseq() { + # perl -E ' + # $last = pop @ARGV; + # $first = shift || 1; + # $inc = shift || 1.03; + # for($i=$first; $i<=$last;$i*=$inc) { say int $i } + # ' "$@" + # } + # + # seq 111111111 > big; + # f() { ppar --test $1 -a big --pipepart --block -1 'md5sum > /dev/null'; } + # export -f f; + # expseq 1000 1.001 300000 | shuf | parallel -j1 --jl jl-md5sum f; + # }; + my $script = spacefree + (0, + q{ + while(@ARGV) { + sysseek(STDIN,shift,0) || die; + $left = shift; + while($read = + sysread(STDIN,$buf, $left > 32767 ? 32767 : $left)){ + $left -= $read; + syswrite(STDOUT,$buf); + } + } + }); + return "<". Q($file) . + " perl -e '$script' @start_len |"; +} + +sub column_perlexpr($$$) { + # Compute the column number (if any), perlexpression from combined + # string (such as --shard key, --groupby key, {=n perlexpr=} + # Input: + # $column_perlexpr = string with column and perl expression + # $header = header from input file (if column is column name) + # $colsep = column separator regexp + # Returns: + # $col = column number + # $perlexpr = perl expression + # $subref = compiled perl expression as sub reference + my ($column_perlexpr, $header, $colsep) = @_; + my ($col, $perlexpr, $subref); + if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) { + # Column name/number (possibly prefix) + if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) { + # Column number (possibly prefix) + $col = $1; + } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) { + # Column name (possibly prefix) + my $colname = $1; + # Split on --copsep pattern + my @headers = split /$colsep/, $header; + my %headers; + @headers{@headers} = (1..($#headers+1)); + $col = $headers{$colname}; + if(not defined $col) { + ::error("Column '$colname' $colsep not found in header",keys %headers); + ::wait_and_exit(255); + } + } + } + # What is left of $column_perlexpr is $perlexpr (possibly empty) + $perlexpr = $column_perlexpr; + $subref = eval("sub { no strict; no warnings; $perlexpr }"); + return($col, $perlexpr, $subref); +} + +sub group_by_loop($$) { + # Generate perl code for group-by loop + # Insert a $recsep when the column value changes + # The column value can be computed with $perlexpr + my($fh,$recsep) = @_; + my $groupby = $opt::groupby; + if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) { + # Group by column name + # (Yes, this will also wrongly match a perlexpr like: chop) + my($read,$char,@line); + # Read a full line, but nothing more + # (the rest must be read by the child) + # $Global::header used to prepend block to each job + do { + $read = sysread($fh,$char,1); + push @line, $char; + } while($read and $char ne "\n"); + $Global::header = join "", @line; + } + $opt::colsep ||= "\t"; + ($group_by::col, $group_by::perlexpr, $group_by::subref) = + column_perlexpr($groupby, $Global::header, $opt::colsep); + # Numbered 0..n-1 due to being used by $F[n] + if($group_by::col) { $group_by::col--; } + + my $loop = ::spacefree(0,q{ + BEGIN{ $last = "RECSEP"; } + { + local $_=COLVALUE; + PERLEXPR; + if(($last) ne $_) { + print "RECSEP"; + $last = $_; + } + } + }); + if(defined $group_by::col) { + $loop =~ s/COLVALUE/\$F[$group_by::col]/g; + } else { + $loop =~ s/COLVALUE/\$_/g; + } + $loop =~ s/PERLEXPR/$group_by::perlexpr/g; + $loop =~ s/RECSEP/$recsep/g; + return $loop; +} + +sub pipe_group_by_setup() { + # Record separator with 119 bit random value + $opt::recend = ''; + $opt::recstart = + join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20); + $opt::remove_rec_sep = 1; + my @filter; + push @filter, "perl"; + if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) { + # This is column number/name + # Use -a (auto-split) + push @filter, "-a"; + $opt::colsep ||= "\t"; + my $sep = $opt::colsep; + $sep =~ s/\t/\\t/g; + $sep =~ s/\"/\\"/g; + # man perlrun: -Fpattern [...] You can't use literal whitespace + $sep =~ s/ /\\040{1}/g; + push @filter, "-F$sep"; + } + push @filter, "-pe"; + push @filter, group_by_loop(*STDIN,$opt::recstart); + ::debug("init", "@filter\n"); + open(STDIN, '-|', @filter) || die ("Cannot start @filter"); + if(which("mbuffer")) { + # You get a speed up of 30% by going through mbuffer + open(STDIN, '-|', "mbuffer", "-q","-m6M","-b5") || + die ("Cannot start mbuffer"); + } +} + +sub spreadstdin() { + # read a record + # Spawn a job and print the record to it. + # Uses: + # $Global::blocksize + # STDIN + # $opt::r + # $Global::max_lines + # $Global::max_number_of_args + # $opt::regexp + # $Global::start_no_new_jobs + # $opt::roundrobin + # %Global::running + # Returns: N/A + + my $buf = ""; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $chunk_number = 1; + my $one_time_through; + my $two_gb = 2**31-1; + my $blocksize = int($Global::blocksize); + my $in = *STDIN; + my $timeout = $Global::blocktimeout; + + if($opt::skip_first_line) { + my $newline; + # Read a full line one byte at a time + while(sysread($in,$newline,1,0)) { + $newline eq "\n" and last; + } + } + my $header = find_header(\$buf,$in); + my $anything_written; + my $eof; + my $garbage_read; + + sub read_block() { + # Read a --blocksize from STDIN + # possibly interrupted by --blocktimeout + # Add up to the next full block + my $readsize = $blocksize - (length $buf) % $blocksize; + my ($nread,$alarm); + eval { + local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required + # --blocktimeout (or 0 if not set) + alarm $timeout; + if($] >= 5.026) { + do { + $nread = sysread $in, $buf, $readsize, length $buf; + $readsize -= $nread; + } while($readsize and $nread); + } else { + # Less efficient reading, but 32-bit sysread compatible + do { + $nread = sysread($in,substr($buf,length $buf,0),$readsize,0); + $readsize -= $nread; + } while($readsize and $nread); + } + alarm 0; + }; + if ($@) { + die unless $@ eq "alarm\n"; # propagate unexpected errors + $alarm = 1; + } else { + $alarm = 0; + } + $eof = not ($nread or $alarm); + } + + sub pass_n_line_records() { + # Pass records of N lines + my $n_lines = $buf =~ tr/\n/\n/; + my $last_newline_pos = rindex64(\$buf,"\n"); + # Go backwards until there are full n-line records + while($n_lines % $Global::max_lines) { + $n_lines--; + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + } + # Chop at $last_newline_pos as that is where n-line record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } + + sub pass_n_regexps() { + # Pass records of N regexps + # -N => (start..*?end){n} + # -L -N => (start..*?end){n*l} + if(not $garbage_read) { + $garbage_read = 1; + if($buf !~ /^$recstart/o) { + # Buf does not start with $recstart => There is garbage. + # Make a single record of the garbage + if($buf =~ + /(?s:^)( + (?:(?:(?!$recend$recstart)(?s:.))*?$recend) + ) + # Followed by recstart + (?=$recstart)/mox and length $1 > 0) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + } + + my $n_records = + $Global::max_number_of_args * ($Global::max_lines || 1); + # (?!negative lookahead) is needed to avoid backtracking + # See: https://unix.stackexchange.com/questions/439356/ + # (?s:.) = (.|[\n]) but faster + while($buf =~ + /(?s:^)( + # n more times recstart.*recend + (?:$recstart(?:(?!$recend$recstart)(?s:.))*?$recend){$n_records} + ) + # Followed by recstart + (?=$recstart)/mox and length $1 > 0) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + + sub pass_regexp() { + # Find the last recend-recstart in $buf + $eof and return; + # (?s:.) = (.|[\n]) but faster + if($buf =~ /^((?s:.)*$recend)$recstart(?s:.)*?$/mox) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + + sub pass_csv_record() { + # Pass CVS record + # We define a CSV record as an even number of " + end of line + # This works if you use " as quoting character + my $last_newline_pos = length $buf; + # Go backwards from the last \n and search for a position + # where there is an even number of " + do { + # find last EOL + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + # While uneven " + } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2 + and $last_newline_pos >= 0); + # Chop at $last_newline_pos as that is where CSV record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } + + sub pass_n() { + # Pass n records of --recend/--recstart + # -N => (start..*?end){n} + my $i = 0; + my $read_n_lines = + $Global::max_number_of_args * ($Global::max_lines || 1); + while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1 + and + length $buf) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + + sub pass() { + # Pass records of --recend/--recstart + # Split record at fixed string + # Find the last recend+recstart in $buf + $eof and return; + my $i = rindex64(\$buf,$recendrecstart); + if($i != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + + sub increase_blocksize_maybe() { + if(not $anything_written + and not $opt::blocktimeout + and not $Global::no_autoexpand_block) { + # Nothing was written - maybe the block size < record size? + # Increase blocksize exponentially up to 2GB-1 (2GB causes problems) + if($blocksize < $two_gb) { + my $old_blocksize = $blocksize; + $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb); + ::warning("A record was longer than $old_blocksize. " . + "Increasing to --blocksize $blocksize."); + } + } + } + + while(1) { + $anything_written = 0; + read_block(); + if($opt::r) { + # Remove empty lines + $buf =~ s/^\s*\n//gm; + if(length $buf == 0) { + if($eof) { + last; + } else { + next; + } + } + } + if($Global::max_lines and not $Global::max_number_of_args) { + # Pass n-line records + pass_n_line_records(); + } elsif($opt::csv) { + # Pass a full CSV record + pass_csv_record(); + } elsif($opt::regexp) { + # Split record at regexp + if($Global::max_number_of_args) { + pass_n_regexps(); + } else { + pass_regexp(); + } + } else { + # Pass normal --recend/--recstart record + if($Global::max_number_of_args) { + pass_n(); + } else { + pass(); + } + } + $eof and last; + increase_blocksize_maybe(); + ::debug("init", "Round\n"); + } + ::debug("init", "Done reading input\n"); + + # If there is anything left in the buffer write it + write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart, + $recend, length $buf); + + if($opt::retries) { + $Global::no_more_input = 1; + # We need to start no more jobs: At most we need to retry some + # of the already running. + my @running = values %Global::running; + # Stop any virgins. + for my $job (@running) { + if(defined $job and $job->virgin()) { + close $job->fh(0,"w"); + } + } + # Wait for running jobs to be done + my $sleep = 1; + while($Global::total_running > 0) { + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + } + } + $Global::start_no_new_jobs ||= 1; + if($opt::roundrobin) { + # Flush blocks to roundrobin procs + my $sleep = 1; + while(%Global::running) { + my $something_written = 0; + for my $job (values %Global::running) { + if($job->block_length()) { + $something_written += $job->non_blocking_write(); + } else { + close $job->fh(0,"w"); + } + } + if($something_written) { + $sleep = $sleep/2+0.001; + } + $sleep = ::reap_usleep($sleep); + } + } +} + +sub recstartrecend() { + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # $recstart,$recend with default values and regexp conversion + my($recstart,$recend); + if(defined($opt::recstart) and defined($opt::recend)) { + # If both --recstart and --recend is given then both must match + $recstart = $opt::recstart; + $recend = $opt::recend; + } elsif(defined($opt::recstart)) { + # If --recstart is given it must match start of record + $recstart = $opt::recstart; + $recend = ""; + } elsif(defined($opt::recend)) { + # If --recend is given then it must match end of record + $recstart = ""; + $recend = $opt::recend; + if($opt::regexp and $recend eq '') { + # --regexp --recend '' + $recend = '(?s:.)'; + } + } + + if($opt::regexp) { + # Do not allow /x comments - to avoid having to quote space + $recstart = "(?-x:".$recstart.")"; + $recend = "(?-x:".$recend.")"; + # If $recstart/$recend contains '|' + # the | should only apply to the regexp + $recstart = "(?:".$recstart.")"; + $recend = "(?:".$recend.")"; + } else { + # $recstart/$recend = printf strings (\n) + $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + } + return ($recstart,$recend); +} + +sub nindex($$) { + # See if string is in buffer N times + # Returns: + # the position where the Nth copy is found + my ($buf_ref, $str, $n) = @_; + my $i = 0; + for(1..$n) { + $i = index64($buf_ref,$str,$i+1); + if($i == -1) { last } + } + return $i; +} + +{ + my @robin_queue; + my $sleep = 1; + + sub round_robin_write($$$$$) { + # Input: + # $header_ref = ref to $header string + # $block_ref = ref to $block to be written + # $recstart = record start string + # $recend = record end string + # $endpos = end position of $block + # Uses: + # %Global::running + # Returns: + # $something_written = amount of bytes written + my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_; + my $written = 0; + my $block_passed = 0; + while(not $block_passed) { + # Continue flushing existing buffers + # until one is empty and a new block is passed + if(@robin_queue) { + # Rotate queue once so new blocks get a fair chance + # to be given to another slot + push @robin_queue, shift @robin_queue; + } else { + # Make a queue to spread the blocks evenly + push @robin_queue, (sort { $a->seq() <=> $b->seq() } + values %Global::running); + } + do { + $written = 0; + for my $job (@robin_queue) { + if($job->block_length() > 0) { + $written += $job->non_blocking_write(); + } else { + $job->set_block($header_ref, $buffer_ref, + $endpos, $recstart, $recend); + $block_passed = 1; + $written += $job->non_blocking_write(); + last; + } + } + if($written) { + $sleep = $sleep/1.5+0.001; + } + # Don't sleep if something is written + } while($written and not $block_passed); + $sleep = ::reap_usleep($sleep); + } + return $written; + } +} + +sub index64($$$) { + # Do index on strings > 2GB. + # index in Perl < v5.22 does not work for > 2GB + # Input: + # as index except STR which must be passed as a reference + # Output: + # as index + my $ref = shift; + my $match = shift; + my $pos = shift || 0; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # No point in doing extra work if we don't need to. + if($strlen < $block_size or $] > 5.022) { + return index($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos; + while($offset < $strlen) { + $ret = index( + substr($$ref, $offset, $block_size), + $match, $pos-$offset); + if($ret != -1) { + return $ret + $offset; + } + $offset += ($block_size - $matchlen - 1); + } + return -1; +} + +sub rindex64($@) { + # Do rindex on strings > 2GB. + # rindex in Perl < v5.22 does not work for > 2GB + # Input: + # as rindex except STR which must be passed as a reference + # Output: + # as rindex + my $ref = shift; + my $match = shift; + my $pos = shift; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # Default: search from end + $pos = defined $pos ? $pos : $strlen; + # No point in doing extra work if we don't need to. + if($strlen < $block_size or $] > 5.022) { + return rindex($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos - $block_size + $matchlen; + if($offset < 0) { + # The offset is less than a $block_size + # Set the $offset to 0 and + # Adjust block_size accordingly + $block_size = $block_size + $offset; + $offset = 0; + } + while($offset >= 0) { + $ret = rindex( + substr($$ref, $offset, $block_size), + $match); + if($ret != -1) { + return $ret + $offset; + } + $offset -= ($block_size - $matchlen - 1); + } + return -1; +} + +sub shorten($$) { + # Do: substr($buf,0,$i) = ""; + # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks + # Input: + # $buf_ref = \$buf + # $i = position to shorten to + # Returns: N/A + my ($buf_ref, $i) = @_; + my $two_gb = 2**31-1; + while($i > $two_gb) { + substr($$buf_ref,0,$two_gb) = ""; + $i -= $two_gb; + } + substr($$buf_ref,0,$i) = ""; +} + +sub write_record_to_pipe($$$$$$) { + # Fork then + # Write record from pos 0 .. $endpos to pipe + # Input: + # $chunk_number = sequence number - to see if already run + # $header_ref = reference to header string to prepend + # $buffer_ref = reference to record to write + # $recstart = start string of record + # $recend = end string of record + # $endpos = position in $buffer_ref where record ends + # Uses: + # $Global::job_already_run + # $opt::roundrobin + # @Global::virgin_jobs + # Returns: + # Number of chunks written (0 or 1) + my ($chunk_number, $header_ref, $buffer_ref, + $recstart, $recend, $endpos) = @_; + if($endpos == 0) { return 0; } + if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } + if($opt::roundrobin) { + # Write the block to one of the already running jobs + return round_robin_write($header_ref, $buffer_ref, + $recstart, $recend, $endpos); + } + # If no virgin found, backoff + my $sleep = 0.0001; # 0.01 ms - better performance on highend + while(not @Global::virgin_jobs) { + ::debug("pipe", "No virgin jobs"); + $sleep = ::reap_usleep($sleep); + # Jobs may not be started because of loadavg + # or too little time between each ssh login + # or retrying failed jobs. + start_more_jobs(); + } + my $job = shift @Global::virgin_jobs; + $job->set_block($header_ref, $buffer_ref, $endpos, $recstart, $recend); + $job->write_block(); + return 1; +} + + +sub __SEM_MODE__() {} + + +sub acquire_semaphore() { + # Acquires semaphore. If needed: spawns to the background + # Uses: + # @Global::host + # Returns: + # The semaphore to be released when jobs is complete + $Global::host{':'} = SSHLogin->new(":"); + my $sem = Semaphore->new($Semaphore::name, + $Global::host{':'}->max_jobs_running()); + $sem->acquire(); + if($Semaphore::fg) { + # skip + } else { + if(fork()) { + exit(0); + } else { + # If run in the background, the PID will change + $sem->pid_change(); + } + } + return $sem; +} + + +sub __PARSE_OPTIONS__() {} + +sub shell_completion() { + if($opt::shellcompletion eq "zsh") { + # if shell == zsh + zsh_competion(); + } elsif($opt::shellcompletion eq "bash") { + # if shell == bash + bash_competion(); + } elsif($opt::shellcompletion eq "auto") { + if($Global::shell =~ m:/zsh$|^zsh$:) { + # if shell == zsh + zsh_competion(); + } elsif($Global::shell =~ m:/bash$|^bash$:) { + # if shell == bash + bash_competion(); + } else { + ::error("--shellcompletion is not implemented for ". + "'$Global::shell'."); + wait_and_exit(255); + } + } else { + ::error("--shellcompletion is not implemented for ". + "'$opt::shellcompletion'."); + wait_and_exit(255); + } +} + +sub bash_competion() { + # Print: + # complete -F _comp_parallel parallel; + # _comp_parallel() { + # COMPREPLY=($(compgen -W "--options" -- + # "${COMP_WORDS[$COMP_CWORD]}")); + # }; + my @bash_completion = + ("complete -F _comp_parallel parallel;", + '_comp_parallel() { COMPREPLY=($(compgen -W "'); + my @och = options_completion_hash(); + while(@och) { + $_ = shift @och; + # Split input like: + # "joblog|jl=s[Logfile for executed jobs]:logfile:_files" + if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) { + my $opt = $1; + my $desc = $2; + my $argdesc = $3; + my $func = $4; + # opt=s => opt + $opt =~ s/[:=].$//; + if($opt =~ /^_/) { + # internal options start with --_ + # skip + } else { + push @bash_completion, + (map { (length $_ == 1) ? "-$_ " : "--$_ " } + split /\|/, $opt); + } + } + shift @och; + } + push @bash_completion,'" -- "${COMP_WORDS[$COMP_CWORD]}")); };'."\n"; + print @bash_completion; +} + +sub zsh_competion() { + my @zsh_completion = + ("compdef _comp_parallel parallel; ", + "setopt localoptions extended_glob; ", + "local -a _comp_priv_prefix; ", + "_comp_parallel() { ", + "_arguments "); + my @och = options_completion_hash(); + while(@och) { + $_ = shift @och; + # Split input like: + # "joblog|jl=s[Logfile for executed jobs]:logfile:_files" + if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) { + my $opt = $1; + my $desc = $2; + my $argdesc = $3; + my $func = $4; + # opt=s => opt + $opt =~ s/[:=].$//; + if($opt =~ /^_/) { + # internal options start with --_ + # skip + } else { + # {-o,--option} + my $zsh_opt = join(",", + (map { (length $_ == 1) ? "-$_" : "--$_" } + split /\|/, $opt)); + if($zsh_opt =~ /,/) { $zsh_opt = "{$zsh_opt}"; } + $desc =~ s/'/'"'"'/g; + $argdesc =~ s/'/'"'"'/g; + $func =~ s/'/'"'"'/g; + push @zsh_completion, $zsh_opt."'".$desc.$argdesc.$func."' "; + } + } + shift @och; + } + push @zsh_completion, + q{'(-)1:command: _command_names -e' }, + q{'*::arguments:{ _comp_priv_prefix=( '$words[1]' -n ${(kv)opt_args[(I)(-[ugHEP]|--(user|group|set-home|preserve-env|preserve-groups))]} ) ; _normal }'}, + "};\n"; + print @zsh_completion; +} + +sub options_hash() { + # Returns: + # %hash = for GetOptions + my %och = options_completion_hash(); + my %oh; + my ($k,$v); + while(($k,$v) = each %och) { + $k =~ s/\[.*//; + $oh{$k} = $v; + } + return %oh; +} + +sub options_completion_hash() { + # Returns: + # %hash = for GetOptions and shell completion + return + ("debug|D=s" => \$opt::D, + "xargs[Insert as many arguments as the command line length permits]" + => \$opt::xargs, + "m[Multiple arguments]" => \$opt::m, + ("X[Insert as many arguments with context as the command line ". + "length permits]" + => \$opt::X), + "v[Verbose]" => \@opt::v, + "sql=s[Use --sql-master instead (obsolete)]:DBURL" => \$opt::retired, + ("sql-master|sqlmaster=s". + "[Submit jobs via SQL server. DBURL must point to a table, which ". + "will contain --joblog, the values, and output]:DBURL" + => \$opt::sqlmaster), + ("sql-worker|sqlworker=s". + "[Execute jobs via SQL server. Read the input sources variables ". + "from the table pointed to by DBURL.]:DBURL" + => \$opt::sqlworker), + ("sql-and-worker|sqlandworker=s". + "[--sql-master DBURL --sql-worker DBURL]:DBURL" + => \$opt::sqlandworker), + ("joblog|jl=s[Logfile for executed jobs]:logfile:_files" + => \$opt::joblog), + ("results|result|res=s[Save the output into files]:name:_files" + => \$opt::results), + "resume[Resumes from the last unfinished job]" => \$opt::resume, + ("resume-failed|resumefailed". + "[Retry all failed and resume from the last unfinished job]" + => \$opt::resume_failed), + ("retry-failed|retryfailed[Retry all failed jobs in joblog]" + => \$opt::retry_failed), + "silent[Silent]" => \$opt::silent, + ("keep-order|keeporder|k". + "[Keep sequence of output same as the order of input]" + => \$opt::keeporder), + ("no-keep-order|nokeeporder|nok|no-k". + "[Overrides an earlier --keep-order (e.g. if set in ". + "~/.parallel/config)]" + => \$opt::nokeeporder), + "group[Group output]" => \$opt::group, + "g" => \$opt::retired, + ("ungroup|u". + "[Output is printed as soon as possible and bypasses GNU parallel ". + "internal processing]" + => \$opt::ungroup), + ("latest-line|latestline|ll". + "[Print latest line of each job]" + => \$opt::latestline), + ("line-buffer|line-buffered|linebuffer|linebuffered|lb". + "[Buffer output on line basis]" + => \$opt::linebuffer), + ("tmux". + "[Use tmux for output. Start a tmux session and run each job in a ". + "window in that session. No other output will be produced]" + => \$opt::tmux), + ("tmux-pane|tmuxpane". + "[Use tmux for output but put output into panes in the first ". + "window. Useful if you want to monitor the progress of less than ". + "100 concurrent jobs]" + => \$opt::tmuxpane), + "null|0[Use NUL as delimiter]" => \$opt::null, + "quote|q[Quote command]" => \$opt::quote, + # Replacement strings + ("parens=s[Use parensstring instead of {==}]:parensstring" + => \$opt::parens), + ('rpl=s[Define replacement string]:"tag perl expression"' + => \@opt::rpl), + "plus[Add more replacement strings]" => \$opt::plus, + ("I=s". + "[Use the replacement string replace-str instead of {}]:replace-str" + => \$opt::I), + ("extensionreplace|er=s". + "[Use the replacement string replace-str instead of {.} for input ". + "line without extension]:replace-str" + => \$opt::U), + "U=s" => \$opt::retired, + ("basenamereplace|bnr=s". + "[Use the replacement string replace-str instead of {/} for ". + "basename of input line]:replace-str" + => \$opt::basenamereplace), + ("dirnamereplace|dnr=s". + "[Use the replacement string replace-str instead of {//} for ". + "dirname of input line]:replace-str" + => \$opt::dirnamereplace), + ("basenameextensionreplace|bner=s". + "[Use the replacement string replace-str instead of {/.} for ". + "basename of input line without extension]:replace-str" + => \$opt::basenameextensionreplace), + ("seqreplace=s". + "[Use the replacement string replace-str instead of {#} for job ". + "sequence number]:replace-str" + => \$opt::seqreplace), + ("slotreplace=s". + "[Use the replacement string replace-str instead of {%} for job ". + "slot number]:replace-str" + => \$opt::slotreplace), + ("jobs|j=s". + "[(Add +N to/Subtract -N from/Multiply N%) the number of CPU ". + "threads or read parameter from file]:_files" + => \$opt::jobs), + ("delay=s". + "[Delay starting next job by duration]:duration" => \$opt::delay), + ("ssh-delay|sshdelay=f". + "[Delay starting next ssh by duration]:duration" + => \$opt::sshdelay), + ("load=s". + "[Only start jobs if load is less than max-load]:max-load" + => \$opt::load), + "noswap[Do not start job is computer is swapping]" => \$opt::noswap, + ("max-line-length-allowed|maxlinelengthallowed". + "[Print maximal command line length]" + => \$opt::max_line_length_allowed), + ("number-of-cpus|numberofcpus". + "[Print the number of physical CPU cores and exit (obsolete)]" + => \$opt::number_of_cpus), + ("number-of-sockets|numberofsockets". + "[Print the number of CPU sockets and exit]" + => \$opt::number_of_sockets), + ("number-of-cores|numberofcores". + "[Print the number of physical CPU cores and exit]" + => \$opt::number_of_cores), + ("number-of-threads|numberofthreads". + "[Print the number of hyperthreaded CPU cores and exit]" + => \$opt::number_of_threads), + ("use-sockets-instead-of-threads|usesocketsinsteadofthreads". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_sockets_instead_of_threads), + ("use-cores-instead-of-threads|usecoresinsteadofthreads". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_cores_instead_of_threads), + ("use-cpus-instead-of-cores|usecpusinsteadofcores". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_cpus_instead_of_cores), + ("shell-quote|shellquote|shell_quote". + "[Does not run the command but quotes it. Useful for making ". + "quoted composed commands for GNU parallel]" + => \@opt::shellquote), + ('nice=i[Run the command at this niceness]:niceness:($(seq -20 19))' + => \$opt::nice), + "tag[Tag lines with arguments]" => \$opt::tag, + ("tag-string|tagstring=s". + "[Tag lines with a string]:str" => \$opt::tagstring), + "ctag[Color tag]:str" => \$opt::ctag, + "ctag-string|ctagstring=s[Colour tagstring]:str" => \$opt::ctagstring, + "color|colour[Colourize output]" => \$opt::color, + ("color-failed|colour-failed|colorfailed|colourfailed|". + "color-fail|colour-fail|colorfail|colourfail|cf". + "[Colour failed jobs red]" + => \$opt::colorfailed), + ("onall[Run all the jobs on all computers given with --sshlogin]" + => \$opt::onall), + "nonall[--onall with no arguments]" => \$opt::nonall, + ("filter-hosts|filterhosts|filter-host[Remove down hosts]" + => \$opt::filter_hosts), + ('sshlogin|S=s'. + '[Distribute jobs to remote computers]'. + ':[@hostgroups/][ncpus/]sshlogin'. + '[,[@hostgroups/][ncpus/]sshlogin[,...]] or @hostgroup'. + ':_users') => \@opt::sshlogin, + ("sshloginfile|slf=s". + "[File with sshlogins on separate lines. Lines starting with '#' ". + "are ignored.]:filename:_files" + => \@opt::sshloginfile), + ("controlmaster|M". + "[Use ssh's ControlMaster to make ssh connections faster]" + => \$opt::controlmaster), + ("ssh=s". + "[Use this command instead of ssh for remote access]:sshcommand" + => \$opt::ssh), + ("transfer-file|transferfile|transfer-files|transferfiles|tf=s". + "[Transfer filename to remote computers]:filename:_files" + => \@opt::transfer_files), + ("return=s[Transfer files from remote computers]:filename:_files" + => \@opt::return), + ("trc=s[--transfer --return filename --cleanup]:filename:_files" + => \@opt::trc), + "transfer[Transfer files to remote computers]" => \$opt::transfer, + "cleanup[Remove transferred files]" => \$opt::cleanup, + ("basefile|bf=s". + "[Transfer file to each sshlogin before first job is started]". + ":file:_files" + => \@opt::basefile), + ("template|tmpl=s". + "[Replace replacement strings in file and save it in repl]". + ":file=repl:_files" + => \%opt::template), + "B=s" => \$opt::retired, + "ctrl-c|ctrlc" => \$opt::retired, + "no-ctrl-c|no-ctrlc|noctrlc" => \$opt::retired, + ("work-dir|workdir|wd=s". + "[Jobs will be run in the dir mydir. (default: the current dir ". + "for the local machine, the login dir for remote computers)]". + ":mydir:_cd" + => \$opt::workdir), + "W=s" => \$opt::retired, + ("rsync-opts|rsyncopts=s[Options to pass on to rsync]:options" + => \$opt::rsync_opts), + ("tmpdir|tempdir=s[Directory for temporary files]:dirname:_cd" + => \$opt::tmpdir), + ("use-compress-program|compress-program|". + "usecompressprogram|compressprogram=s". + "[Use prg for compressing temporary files]:prg:_commands" + => \$opt::compress_program), + ("use-decompress-program|decompress-program|". + "usedecompressprogram|decompressprogram=s". + "[Use prg for decompressing temporary files]:prg:_commands" + => \$opt::decompress_program), + "compress[Compress temporary files]" => \$opt::compress, + "open-tty|o[Open terminal tty]" => \$opt::open_tty, + "tty[Open terminal tty]" => \$opt::tty, + "T" => \$opt::retired, + "H=i" => \$opt::retired, + ("dry-run|dryrun|dr". + "[Print the job to run on stdout (standard output), but do not ". + "run the job]" + => \$opt::dryrun), + "progress[Show progress of computations]" => \$opt::progress, + ("eta[Show the estimated number of seconds before finishing]" + => \$opt::eta), + "bar[Show progress as a progress bar]" => \$opt::bar, + ("total-jobs|totaljobs|total=s". + "[Set total number of jobs]" => \$opt::totaljobs), + "shuf[Shuffle jobs]" => \$opt::shuf, + ("arg-sep|argsep=s". + "[Use sep-str instead of ::: as separator string]:sep-str" + => \$opt::arg_sep), + ("arg-file-sep|argfilesep=s". + "[Use sep-str instead of :::: as separator string ". + "between command and argument files]:sep-str" + => \$opt::arg_file_sep), + ('trim=s[Trim white space in input]:trim_method:'. + '((n\:"No trim" l\:"Left\ trim" r\:"Right trim" '. + 'lr\:"Both trim" rl\:"Both trim"))' + => \$opt::trim), + "env=s[Copy environment variable var]:var:_vars" => \@opt::env, + "recordenv|record-env[Record environment]" => \$opt::record_env, + ('session'. + '[Record names in current environment in $PARALLEL_IGNORED_NAMES '. + 'and exit. Only used with env_parallel. '. + 'Aliases, functions, and variables with names i]' + => \$opt::session), + ('plain[Ignore --profile, $PARALLEL, and ~/.parallel/config]' + => \$opt::plain), + ("profile|J=s". + "[Use profile profilename for options]:profilename:_files" + => \@opt::profile), + "tollef" => \$opt::tollef, + "gnu[Behave like GNU parallel]" => \$opt::gnu, + "link|xapply[Link input sources]" => \$opt::link, + "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource, + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing + # these lines. + ("bibtex|citation". + "[Print the citation notice and BibTeX entry for GNU parallel, ". + "silence citation notice for all future runs, and exit. ". + "It will not run any commands]" + => \$opt::citation), + "will-cite|willcite|nn|nonotice|no-notice" => \$opt::willcite, + # Termination and retries + ('halt-on-error|haltonerror|halt=s'. + '[When should GNU parallel terminate]'. + ':when:((now\:"kill all running jobs and halt immediately" '. + 'soon\:"wait for all running jobs to complete, start no new jobs"))' + => \$opt::halt), + 'limit=s[Dynamic job limit]:"command args"' => \$opt::limit, + ("memfree=s". + "[Minimum memory free when starting another job]:size" + => \$opt::memfree), + ("memsuspend=s". + "[Suspend jobs when there is less memory available]:size" + => \$opt::memsuspend), + "retries=s[Try failing jobs n times]:n" => \$opt::retries, + ("timeout=s". + "[Time out for command. If the command runs for longer than ". + "duration seconds it will get killed as per --term-seq]:duration" + => \$opt::timeout), + ("term-seq|termseq=s". + "[Termination sequence]:sequence" => \$opt::termseq), + # xargs-compatibility - implemented, man, testsuite + ("max-procs|maxprocs|P=s". + "[Add N to/Subtract N from/Multiply N% with/ the number of CPU ". + "threads or read parameter from file]:+N/-N/N%/N/procfile:_files" + => \$opt::jobs), + ("delimiter|d=s[Input items are terminated by delim]:delim" + => \$opt::d), + ("max-chars|maxchars|s=s[Limit length of command]:max-chars" + => \$opt::max_chars), + ("arg-file|argfile|a=s". + "[Use input-file as input source]:input-file:_files" => \@opt::a), + "no-run-if-empty|norunifempty|r[Do not run empty input]" => \$opt::r, + ("replace|i:s". + "[This option is deprecated; use -I instead]:replace-str" + => \$opt::i), + "E=s" => \$opt::eof, + ("eof|e:s[Set the end of file string to eof-str]:eof-str" + => \$opt::eof), + ("process-slot-var|processslotvar=s". + "[Set this variable to job slot number]:varname" + => \$opt::process_slot_var), + ("max-args|maxargs|n=s". + "[Use at most max-args arguments per command line]:max-args" + => \$opt::max_args), + ("max-replace-args|maxreplaceargs|N=s". + "[Use at most max-args arguments per command line]:max-args" + => \$opt::max_replace_args), + "col-sep|colsep|C=s[Column separator]:regexp" => \$opt::colsep, + "csv[Treat input as CSV-format]"=> \$opt::csv, + ("help|h[Print a summary of the options to GNU parallel and exit]" + => \$opt::help), + ("L=s[When used with --pipe: Read records of recsize]:recsize" + => \$opt::L), + ("max-lines|maxlines|l:f". + "[When used with --pipe: Read records of recsize lines]:recsize" + => \$opt::max_lines), + "interactive|p[Ask user before running a job]" => \$opt::interactive, + ("verbose|t[Print the job to be run on stderr (standard error)]" + => \$opt::verbose), + ("version|V[Print the version GNU parallel and exit]" + => \$opt::version), + ('min-version|minversion=i'. + '[Print the version GNU parallel and exit]'. + ':version:($(parallel --minversion 0))' + => \$opt::minversion), + ("show-limits|showlimits". + "[Display limits given by the operating system]" + => \$opt::show_limits), + ("exit|x[Exit if the size (see the -s option) is exceeded]" + => \$opt::x), + # Semaphore + "semaphore[Work as a counting semaphore]" => \$opt::semaphore, + ("semaphore-timeout|semaphoretimeout|st=s". + "[If secs > 0: If the semaphore is not released within secs ". + "seconds, take it anyway]:secs" + => \$opt::semaphoretimeout), + ("semaphore-name|semaphorename|id=s". + "[Use name as the name of the semaphore]:name" + => \$opt::semaphorename), + "fg[Run command in foreground]" => \$opt::fg, + "bg[Run command in background]" => \$opt::bg, + "wait[Wait for all commands to complete]" => \$opt::wait, + # Shebang #!/usr/bin/parallel --shebang + ("shebang|hashbang". + "[GNU parallel can be called as a shebang (#!) command as the ". + "first line of a script. The content of the file will be treated ". + "as inputsource]" + => \$opt::shebang), + ("_pipe-means-argfiles[internal]" + => \$opt::_pipe_means_argfiles), + "Y" => \$opt::retired, + ("skip-first-line|skipfirstline". + "[Do not use the first line of input]" + => \$opt::skip_first_line), + "bug" => \$opt::bug, + # --pipe + ("pipe|spreadstdin". + "[Spread input to jobs on stdin (standard input)]" => \$opt::pipe), + ("round-robin|roundrobin|round". + "[Distribute chunks of standard input in a round robin fashion]" + => \$opt::roundrobin), + "recstart=s" => \$opt::recstart, + ("recend=s". + "[Split record between endstring and startstring]:endstring" + => \$opt::recend), + ("regexp|regex". + "[Interpret --recstart and --recend as regular expressions]" + => \$opt::regexp), + ("remove-rec-sep|removerecsep|rrs". + "[Remove record separator]" => \$opt::remove_rec_sep), + ("output-as-files|outputasfiles|files[Save output to files]" + => \$opt::files), + ("block-size|blocksize|block=s". + "[Size of block in bytes to read at a time]:size" + => \$opt::blocksize), + ("block-timeout|blocktimeout|bt=s". + "[Timeout for reading block when using --pipe]:duration" + => \$opt::blocktimeout), + "header=s[Use regexp as header]:regexp" => \$opt::header, + "cat[Create a temporary file with content]" => \$opt::cat, + "fifo[Create a temporary fifo with content]" => \$opt::fifo, + ("pipe-part|pipepart[Pipe parts of a physical file]" + => \$opt::pipepart), + "tee[Pipe all data to all jobs]" => \$opt::tee, + ("shard=s". + "[Use shardexpr as shard key and shard input to the jobs]:shardexpr" + => \$opt::shard), + ("bin=s". + "[Use binexpr as binning key and bin input to the jobs]:binexpr" + => \$opt::bin), + "group-by|groupby=s[Group input by value]:val" => \$opt::groupby, + # + ("hgrp|hostgrp|hostgroup|hostgroups[Enable hostgroups on arguments]" + => \$opt::hostgroups), + "embed[Embed GNU parallel in a shell script]" => \$opt::embed, + ("filter=s[Only run jobs where filter is true]:filter" + => \@opt::filter), + "_parset=s[Generate shell code for parset]" => \$opt::_parset, + ("shell-completion|shellcompletion=s". + "[Generate shell code for shell completion]" + => \$opt::shellcompletion), + # Parameter for testing optimal values + "_test=s" => \$opt::_test, + ); +} + +sub get_options_from_array($@) { + # Run GetOptions on @array + # Input: + # $array_ref = ref to @ARGV to parse + # @keep_only = Keep only these options + # Uses: + # @ARGV + # Returns: + # true if parsing worked + # false if parsing failed + # @$array_ref is changed + my ($array_ref, @keep_only) = @_; + if(not @$array_ref) { + # Empty array: No need to look more at that + return 1; + } + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + # If @keep_only set: Ignore all values except @keep_only + my %options = options_hash(); + if(@keep_only) { + my (%keep,@dummy); + @keep{@keep_only} = @keep_only; + for my $k (grep { not $keep{$_} } keys %options) { + # Store the value of the option in @dummy + $options{$k} = \@dummy; + } + } + my $retval = GetOptions(%options); + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return $retval; +} + +sub parse_parset() { + $Global::progname = "parset"; + @Global::parset_vars = split /[ ,]/, $opt::_parset; + my $var_or_assoc = shift @Global::parset_vars; + # Legal names: var _v2ar arrayentry[2] + my @illegal = (grep { not /^[a-zA-Z_][a-zA-Z_0-9]*(\[\d+\])?$/ } + @Global::parset_vars); + if(@illegal) { + ::error + ("@illegal is an invalid variable name.", + "Variable names must be letter followed by letters or digits.", + "Usage:", + " parset varname GNU Parallel options and command"); + wait_and_exit(255); + } + if($var_or_assoc eq "assoc") { + my $var = shift @Global::parset_vars; + print "$var=("; + $Global::parset = "assoc"; + $Global::parset_endstring=")\n"; + } elsif($var_or_assoc eq "var") { + if($#Global::parset_vars > 0) { + $Global::parset = "var"; + } else { + my $var = shift @Global::parset_vars; + print "$var=("; + $Global::parset = "array"; + $Global::parset_endstring=")\n"; + } + } else { + ::die_bug("parset: unknown '$opt::_parset'"); + } +} + +sub parse_options(@) { + # Returns: N/A + init_globals(); + my @argv_before = @ARGV; + @ARGV = read_options(); + + # Before changing these line, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be added to a public hall of shame by + # removing the lines. + if(defined $opt::citation) { + citation(\@argv_before,\@ARGV); + wait_and_exit(0); + } + # no-* overrides * + if($opt::nokeeporder) { $opt::keeporder = undef; } + + if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 + if($opt::bug) { ::die_bug("test-bug"); } + $Global::debug = $opt::D; + $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) + || $ENV{'SHELL'} || "/bin/sh"; + if(not -x $Global::shell and not which($Global::shell)) { + ::error("Shell '$Global::shell' not found."); + wait_and_exit(255); + } + ::debug("init","Global::shell $Global::shell\n"); + $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:; + if(defined $opt::_parset) { parse_parset(); } + if(defined $opt::X) { $Global::ContextReplace = 1; } + if(defined $opt::silent) { $Global::verbose = 0; } + if(defined $opt::null) { $/ = "\0"; } + if(defined $opt::d) { $/ = unquote_printf($opt::d) } + parse_replacement_string_options(); + $opt::tag ||= $opt::ctag; + $opt::tagstring ||= $opt::ctagstring; + if(defined $opt::ctag or defined $opt::ctagstring + or defined $opt::color) { + $Global::color = 1; + } + if($opt::linebuffer or $opt::latestline) { + $Global::linebuffer = 1; + Job::latestline_init(); + } + if(defined $opt::tag and not defined $opt::tagstring) { + # Default = {} + $opt::tagstring = $Global::parensleft.$Global::parensright; + } + if(defined $opt::tagstring) { + $opt::tagstring = unquote_printf($opt::tagstring); + if($opt::tagstring =~ + /\Q$Global::parensleft\E.*\S+.*\Q$Global::parensright\E/ + and + $Global::linebuffer) { + # --tagstring contains {= ... =} and --linebuffer => + # recompute replacement string for each use (do not cache) + $Global::cache_replacement_eval = 0; + } + } + if(defined $opt::interactive) { $Global::interactive = $opt::interactive; } + if(defined $opt::quote) { $Global::quoting = 1; } + if(defined $opt::r) { $Global::ignore_empty = 1; } + if(defined $opt::verbose) { $Global::stderr_verbose = 1; } + if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } + if(defined $opt::max_args) { + $opt::max_args = multiply_binary_prefix($opt::max_args); + $Global::max_number_of_args = $opt::max_args; + } + if(defined $opt::blocktimeout) { + $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout)); + if($Global::blocktimeout < 1) { + ::error("--block-timeout must be at least 1"); + wait_and_exit(255); + } + } + if(defined $opt::timeout) { + $Global::timeoutq = TimeoutQueue->new($opt::timeout); + } + if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } + $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts || + $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR'; + # Default: Same nice level as GNU Parallel is started at + $opt::nice ||= eval { getpriority(0,0) } || 0; + if(defined $opt::help) { usage(); exit(0); } + if(defined $opt::shellcompletion) { shell_completion(); exit(0); } + if(defined $opt::embed) { embed(); exit(0); } + if(defined $opt::sqlandworker) { + $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker; + } + if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; } + if(defined $opt::colsep) { $Global::trim = 'lr'; } + if(defined $opt::csv) { + if(not $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;") { + ::error("The perl module Text::CSV is not installed."); + ::error("Try installing libtext-csv-perl or perl-Text-CSV."); + wait_and_exit(255); + } + $opt::colsep = defined $opt::colsep ? $opt::colsep : ","; + my $csv_setting = { binary => 1, sep_char => $opt::colsep }; + my $sep = $csv_setting->{sep_char}; + $Global::csv = Text::CSV->new($csv_setting) + or die "Cannot use CSV: ".Text::CSV->error_diag (); + } + if(defined $opt::header) { + $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; + } + if(defined $opt::trim) { $Global::trim = $opt::trim; } + if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } + if(defined $opt::arg_file_sep) { + $Global::arg_file_sep = $opt::arg_file_sep; + } + if(not defined $opt::process_slot_var) { + $opt::process_slot_var = 'PARALLEL_JOBSLOT0'; + } + if(defined $opt::number_of_sockets) { + print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_cpus) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_cores) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_threads) { + print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_line_length_allowed) { + print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_chars) { + $opt::max_chars = multiply_binary_prefix($opt::max_chars); + } + if(defined $opt::version) { version(); wait_and_exit(0); } + if(defined $opt::record_env) { record_env(); wait_and_exit(0); } + if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } + if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } + if(@opt::return) { push @Global::ret_files, @opt::return; } + if($opt::transfer) { + push @Global::transfer_files, $opt::i || $opt::I || "{}"; + } + push @Global::transfer_files, @opt::transfer_files; + if(%opt::template) { + while (my ($source, $template_name) = each %opt::template) { + if(open(my $tmpl, "<", $source)) { + local $/; # $/ = undef => slurp whole file + my $content = <$tmpl>; + push @Global::template_names, $template_name; + push @Global::template_contents, $content; + ::debug("tmpl","Name: $template_name\n$content\n"); + } else { + ::error("Cannot open '$source'."); + wait_and_exit(255); + } + } + } + if(not defined $opt::recstart and + not defined $opt::recend) { $opt::recend = "\n"; } + $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M"); + if($Global::blocksize > 2**31-1 and not $opt::pipepart) { + warning("--blocksize >= 2G causes problems. Using 2G-1."); + $Global::blocksize = 2**31-1; + } + if($^O eq "cygwin" and + ($opt::pipe or $opt::pipepart or $opt::roundrobin) + and $Global::blocksize > 65535) { + warning("--blocksize >= 64K causes problems on Cygwin."); + } + $opt::memfree = multiply_binary_prefix($opt::memfree); + $opt::memsuspend = multiply_binary_prefix($opt::memsuspend); + $Global::memlimit = $opt::memsuspend + $opt::memfree; + check_invalid_option_combinations(); + if((defined $opt::fifo or defined $opt::cat) and not $opt::pipepart) { + $opt::pipe = 1; + } + if(defined $opt::minversion) { + print $Global::version,"\n"; + if($Global::version < $opt::minversion) { + wait_and_exit(255); + } else { + wait_and_exit(0); + } + } + if(not defined $opt::delay) { + # Set --delay to --sshdelay if not set + $opt::delay = $opt::sshdelay; + } + $Global::sshdelayauto = $opt::sshdelay =~ s/auto$//; + $opt::sshdelay = multiply_time_units($opt::sshdelay); + $Global::delayauto = $opt::delay =~ s/auto$//; + $opt::delay = multiply_time_units($opt::delay); + if($opt::compress_program) { + $opt::compress = 1; + $opt::decompress_program ||= $opt::compress_program." -dc"; + } + + if(defined $opt::results) { + # Is the output a dir or CSV-file? + if($opt::results =~ /\.csv$/i) { + # CSV with , as separator + $Global::csvsep = ","; + $Global::membuffer ||= 1; + } elsif($opt::results =~ /\.tsv$/i) { + # CSV with TAB as separator + $Global::csvsep = "\t"; + $Global::membuffer ||= 1; + } elsif($opt::results =~ /\.json$/i) { + # JSON output + $Global::jsonout ||= 1; + $Global::membuffer ||= 1; + } + } + if($opt::compress) { + my ($compress, $decompress) = find_compression_program(); + $opt::compress_program ||= $compress; + $opt::decompress_program ||= $decompress; + if(($opt::results and not $Global::csvsep) or $opt::files) { + # No need for decompressing + $opt::decompress_program = "cat >/dev/null"; + } + } + if(defined $opt::dryrun) { + # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks + $opt::ungroup = 0; + $opt::group = 1; + } + if(defined $opt::nonall) { + # Append a dummy empty argument if there are no arguments + # on the command line to avoid reading from STDIN. + # arg_sep = random 50 char + # \0noarg => nothing (not the empty string) + $Global::arg_sep = join "", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50); + push @ARGV, $Global::arg_sep, "\0noarg"; + } + if(defined $opt::tee) { + if(not defined $opt::jobs) { + $opt::jobs = 0; + } + } + if(defined $opt::tty) { + # Defaults for --tty: -j1 -u + # Can be overridden with -jXXX -g + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if(not defined $opt::group) { + $opt::ungroup = 1; + } + } + if(@opt::trc) { + push @Global::ret_files, @opt::trc; + if(not @Global::transfer_files) { + # Defaults to --transferfile {} + push @Global::transfer_files, $opt::i || $opt::I || "{}"; + } + $opt::cleanup = 1; + } + if(defined $opt::max_lines) { + if($opt::max_lines eq "-0") { + # -l -0 (swallowed -0) + $opt::max_lines = 1; + $opt::null = 1; + $/ = "\0"; + } else { + $opt::max_lines = multiply_binary_prefix($opt::max_lines); + if ($opt::max_lines == 0) { + # If not given (or if 0 is given) => 1 + $opt::max_lines = 1; + } + } + + $Global::max_lines = $opt::max_lines; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + + # Read more than one arg at a time (-L, -N) + if(defined $opt::L) { + $opt::L = multiply_binary_prefix($opt::L); + $Global::max_lines = $opt::L; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + if(defined $opt::max_replace_args) { + $opt::max_replace_args = + multiply_binary_prefix($opt::max_replace_args); + $Global::max_number_of_args = $opt::max_replace_args; + $Global::ContextReplace = 1; + } + if((defined $opt::L or defined $opt::max_replace_args) + and + not ($opt::xargs or $opt::m)) { + $Global::ContextReplace = 1; + } + if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) { + # Deal with ::: :::+ :::: and ::::+ + @ARGV = read_args_from_command_line(); + } + parse_semaphore(); + + if(defined $opt::eta) { $opt::progress = $opt::eta; } + if(defined $opt::bar) { $opt::progress = $opt::bar; } + if(defined $opt::bar or defined $opt::latestline) { + my $fh = $Global::status_fd || *STDERR; + eval q{ + # Enable utf8 if possible + use utf8; + binmode $fh, "encoding(utf8)"; + *decode_utf8 = \&Encode::decode_utf8; + }; + if(eval { decode_utf8("x") }) { + # Great: decode works + } else { + # UTF8-decode not supported: Dummy decode + eval q{sub decode_utf8($;$) { $_[0]; }}; + } + } + + # Funding a free software project is hard. GNU Parallel is no + # exception. On top of that it seems the less visible a project + # is, the harder it is to get funding. And the nature of GNU + # Parallel is that it will never be seen by "the guy with the + # checkbook", but only by the people doing the actual work. + # + # This problem has been covered by others - though no solution has + # been found: + # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer + # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/ + # + # The FAQ tells you why the citation notice exists: + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # + # If you want GNU Parallel to be maintained in the future, and not + # just wither away like so many other free software tools, you + # need to help finance the development. + # + # The citation notice is a simple way of doing so, as citations + # makes it possible to me to get a job where I can maintain GNU + # Parallel as part of the job. + # + # This means you can help financing development + # + # WITHOUT PAYING A SINGLE CENT! + # + # Before implementing the citation notice it was discussed with + # the users: + # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html + # + # Having to spend 10 seconds on running 'parallel --citation' once + # is no doubt not an ideal solution, but no one has so far come up + # with an ideal solution - neither for funding GNU Parallel nor + # other free software. + # + # If you believe you have the perfect solution, you should try it + # out, and if it works, you should post it on the email + # list. Ideas that will cost work and which have not been tested + # are, however, unlikely to be prioritized. + # + # Please note that GPL version 3 gives you the right to fork GNU + # Parallel under a new name, but it does not give you the right to + # distribute modified copies with the citation notice disabled in + # a way where the software can be confused with GNU Parallel. To + # do that you need to be the owner of the GNU Parallel + # trademark. The xt:Commerce case shows this. + # + # Description of the xt:Commerce case in OLG Duesseldorf + # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx + # + # The verdict in German + # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09 + # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09 + # + # Other free software limiting derivates by the same name: + # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects + # https://tm.joomla.org/trademark-faq.html + # https://www.mozilla.org/en-US/foundation/trademarks/faq/ + # + # Running 'parallel --citation' one single time takes less than 10 + # seconds, and will silence the citation notice for future + # runs. If that is too much trouble for you, why not use one of + # the alternatives instead? + # See a list in: 'man parallel_alternatives' + # + # If you want GNU Parallel to be maintained in the future you + # should keep this line. + citation_notice(); + # *YOU* will be harming free software by removing the notice. You + # accept to be added to a public hall of shame by removing the + # line. This is because _YOU_ actively make it harder to justify + # spending time developing GNU Parallel. + + parse_halt(); + + if($ENV{'PARALLEL_ENV'}) { + # Read environment and set $Global::parallel_env + # Must be done before is_acceptable_command_line_length() + my $penv = $ENV{'PARALLEL_ENV'}; + # unset $PARALLEL_ENV: It should not be given to children + # because it takes up a lot of env space + delete $ENV{'PARALLEL_ENV'}; + if(-e $penv) { + # This is a file/fifo: Replace envvar with content of file + open(my $parallel_env, "<", $penv) || + ::die_bug("Cannot read parallel_env from $penv"); + local $/; # Put <> in slurp mode + $penv = <$parallel_env>; + close $parallel_env; + } + # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV + $penv =~ s/\001/\n/g; + if($penv =~ /\0/) { + ::warning('\0 (NUL) in environment is not supported'); + } + $Global::parallel_env = $penv; + } + + parse_sshlogin(); + if(defined $opt::show_limits) { show_limits(); } + + if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { + # As we do not know the max line length on the remote machine + # long commands generated by xargs may fail + # If $opt::max_replace_args is set, it is probably safe + ::warning("Using -X or -m with --sshlogin may fail."); + } + + if(not defined $opt::jobs) { $opt::jobs = "100%"; } + open_joblog(); + open_json_csv(); + if($opt::sqlmaster or $opt::sqlworker) { + $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker); + } + if($opt::sqlworker) { $Global::membuffer ||= 1; } + # The sqlmaster groups the arguments, so the should just read one + if($opt::sqlworker and not $opt::sqlmaster) { + $Global::max_number_of_args = 1; + } + if($Global::color or $opt::colorfailed) { Job::init_color(); } +} + +sub check_invalid_option_combinations() { + if(defined $opt::timeout and + $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) { + ::error("--timeout must be seconds or percentage."); + wait_and_exit(255); + } + if(defined $opt::fifo and defined $opt::cat) { + ::error("--fifo cannot be combined with --cat."); + ::wait_and_exit(255); + } + if(defined $opt::retries and defined $opt::roundrobin) { + ::error("--retries cannot be combined with --roundrobin."); + ::wait_and_exit(255); + } + if(defined $opt::pipepart and + (defined $opt::L or defined $opt::max_lines + or defined $opt::max_replace_args)) { + ::error("--pipepart is incompatible with --max-replace-args, ". + "--max-lines, and -L."); + wait_and_exit(255); + } + if(defined $opt::group and $opt::ungroup) { + ::error("--group cannot be combined with --ungroup."); + ::wait_and_exit(255); + } + if(defined $opt::group and $opt::linebuffer) { + ::error("--group cannot be combined with --line-buffer."); + ::wait_and_exit(255); + } + if(defined $opt::ungroup and $opt::linebuffer) { + ::error("--ungroup cannot be combined with --line-buffer."); + ::wait_and_exit(255); + } + if(defined $opt::tollef and not $opt::gnu) { + ::error("--tollef has been retired.", + "Remove --tollef or use --gnu to override --tollef."); + ::wait_and_exit(255); + } + if(defined $opt::retired) { + ::error("-g has been retired. Use --group.", + "-B has been retired. Use --bf.", + "-T has been retired. Use --tty.", + "-U has been retired. Use --er.", + "-W has been retired. Use --wd.", + "-Y has been retired. Use --shebang.", + "-H has been retired. Use --halt.", + "--sql has been retired. Use --sqlmaster.", + "--ctrlc has been retired.", + "--noctrlc has been retired."); + ::wait_and_exit(255); + } + if($opt::groupby) { + if(not $opt::pipe and not $opt::pipepart) { + $opt::pipe = 1; + } + if($opt::remove_rec_sep) { + ::error("--remove-rec-sep is not compatible with --groupby"); + ::wait_and_exit(255); + } + if($opt::recstart) { + ::error("--recstart is not compatible with --groupby"); + ::wait_and_exit(255); + } + if($opt::recend ne "\n") { + ::error("--recend is not compatible with --groupby"); + ::wait_and_exit(255); + } + } +} + +sub init_globals() { + # Defaults: + $Global::version = 20221122; + $Global::progname = 'parallel'; + $::name = "GNU Parallel"; + $Global::infinity = 2**31; + $Global::debug = 0; + $Global::verbose = 0; + # Don't quote every part of the command line + $Global::quoting = 0; + # Quote replacement strings + $Global::quote_replace = 1; + $Global::total_completed = 0; + $Global::cache_replacement_eval = 1; + # Read only table with default --rpl values + %Global::replace = + ( + '{}' => '', + '{#}' => '1 $_=$job->seq()', + '{%}' => '1 $_=$job->slot()', + '{/}' => 's:.*/::', + '{//}' => + ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '. + '$_ = dirname($_);'), + '{/.}' => 's:.*/::; s:\.[^/.]*$::;', + '{.}' => 's:\.[^/.]*$::', + ); + %Global::plus = + ( + # {} = {+/}/{/} + # = {.}.{+.} = {+/}/{/.}.{+.} + # = {..}.{+..} = {+/}/{/..}.{+..} + # = {...}.{+...} = {+/}/{/...}.{+...} + '{+/}' => 's:/[^/]*$:: || s:.*$::', + # a.b => b; a => '' + '{+.}' => 's:.*\.:: || s:.*$::', + # a.b.c => b.c; a.b => ''; a => '' + '{+..}' => 's:.*\.([^/.]*\.[^/.]*)$:$1: || s:.*$::', + '{+...}' => 's:.*\.([^/.]*\.[^/.]*\.[^/.]*)$:$1: || s:.*$::', + '{..}' => 's:\.[^/.]*\.[^/.]*$::', + '{...}' => 's:\.[^/.]*\.[^/.]*\.[^/.]*$::', + '{/..}' => 's:.*/::; s:\.[^/.]*\.[^/.]*$::', + '{/...}' => 's:.*/::; s:\.[^/.]*\.[^/.]*\.[^/.]*$::', + # n choose k = Binomial coefficient + '{choose_k}' => ('for $t (2..$#arg)'. + '{ if($arg[$t-1] ge $arg[$t]) { skip() } }'), + # unique values: Skip job if any args are the same + '{uniq}' => 'if(::uniq(@arg) != @arg) { skip(); }', + # {##} = number of jobs + '{##}' => '1 $_=total_jobs()', + # {0%} = 0-padded jobslot + '{0%}' => ('1 $f=1+int((log($Global::max_jobs_running||1)/log(10)));'. + '$_=sprintf("%0${f}d",slot())'), + # {0%} = 0-padded seq + '{0#}' => ('1 $f=1+int((log(total_jobs())/log(10)));'. + '$_=sprintf("%0${f}d",seq())'), + + ## Bash inspired replacement strings + # Bash ${a:-myval} + '{:-([^}]+?)}' => '$_ ||= $$1', + # Bash ${a:2} + '{:(\d+?)}' => 'substr($_,0,$$1) = ""', + # Bash ${a:2:3} + '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);', + # echo {#z.*z.} ::: z.z.z.foo => z.foo + # echo {##z.*z.} ::: z.z.z.foo => foo + # Bash ${a#bc} + '{#([^#}][^}]*?)}' => + '$nongreedy=::make_regexp_ungreedy($$1);s/^$nongreedy(.*)/$1/;', + # Bash ${a##bc} + '{##([^#}][^}]*?)}' => 's/^$$1//;', + # echo {%.z.*z} ::: foo.z.z.z => foo.z + # echo {%%.z.*z} ::: foo.z.z.z => foo + # Bash ${a%def} + '{%([^}]+?)}' => + '$nongreedy=::make_regexp_ungreedy($$1);s/(.*)$nongreedy$/$1/;', + # Bash ${a%%def} + '{%%([^}]+?)}' => 's/$$1$//;', + # Bash ${a/def/ghi} ${a/def/} + '{/([^#%}/]+?)/([^}]*?)}' => 's/$$1/$$2/;', + # Bash ${a/#def/ghi} ${a/#def/} + '{/#([^}]+?)/([^}]*?)}' => 's/^$$1/$$2/g;', + # Bash ${a/%def/ghi} ${a/%def/} + '{/%([^}]+?)/([^}]*?)}' => 's/$$1$/$$2/g;', + # Bash ${a//def/ghi} ${a//def/} + '{//([^}]+?)/([^}]*?)}' => 's/$$1/$$2/g;', + # Bash ${a^a} + '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;', + # Bash ${a^^a} + '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;', + # Bash ${a,A} + '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;', + # Bash ${a,,A} + '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;', + + # {slot} = $PARALLEL_JOBSLOT + '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()', + # {host} = ssh host + '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()', + # {sshlogin} = sshlogin + '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()', + # {hgrp} = hostgroups of the host + '{hgrp}' => '1 $_="\${PARALLEL_HOSTGROUPS}";uq()', + # {agrp} = hostgroups of the argument + '{agrp}' => '1 $_="\${PARALLEL_ARGHOSTGROUPS}";uq()', + ); + # Modifiable copy of %Global::replace + %Global::rpl = %Global::replace; + $/ = "\n"; + $Global::ignore_empty = 0; + $Global::interactive = 0; + $Global::stderr_verbose = 0; + $Global::default_simultaneous_sshlogins = 9; + $Global::exitstatus = 0; + $Global::arg_sep = ":::"; + $Global::arg_file_sep = "::::"; + $Global::trim = 'n'; + $Global::max_jobs_running = 0; + $Global::job_already_run = ''; + $ENV{'TMPDIR'} ||= "/tmp"; + $ENV{'PARALLEL_REMOTE_TMPDIR'} ||= "/tmp"; + $ENV{'OLDPWD'} = $ENV{'PWD'}; + if(not $ENV{HOME}) { + # $ENV{HOME} is sometimes not set if called from PHP + ::warning("\$HOME not set. Using /tmp."); + $ENV{HOME} = "/tmp"; + } + # no warnings to allow for undefined $XDG_* + no warnings 'uninitialized'; + # If $PARALLEL_HOME is set, but does not exist, try making it. + if(defined $ENV{'PARALLEL_HOME'}) { + eval { File::Path::mkpath($ENV{'PARALLEL_HOME'}); }; + } + # $xdg_config_home is needed to make env_parallel.fish stop complaining + my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'}; + # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel, + # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel + # Keep only dirs that exist + @Global::config_dirs = + (grep { -d $_ } + $ENV{'PARALLEL_HOME'}, + (map { "$_/parallel" } + $xdg_config_home, + split /:/, $ENV{'XDG_CONFIG_DIRS'}), + $ENV{'HOME'} . "/.parallel"); + # Use first dir as config dir + $Global::config_dir = $Global::config_dirs[0] || + $ENV{'HOME'} . "/.parallel"; + if($ENV{'PARALLEL_HOME'} =~ /./ and not -d $ENV{'PARALLEL_HOME'}) { + ::warning("\$PARALLEL_HOME ($ENV{'PARALLEL_HOME'}) does not exist."); + ::warning("Using $Global::config_dir"); + } + # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel, + # Keep only dirs that exist + @Global::cache_dirs = + (grep { -d $_ } + $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel"); + $Global::cache_dir = $Global::cache_dirs[0] || + $ENV{'HOME'} . "/.parallel"; + Job::init_color(); +} + +sub parse_halt() { + # $opt::halt flavours + # Uses: + # $opt::halt + # $Global::halt_when + # $Global::halt_fail + # $Global::halt_success + # $Global::halt_pct + # $Global::halt_count + if(defined $opt::halt) { + my %halt_expansion = ( + "0" => "never", + "1" => "soon,fail=1", + "2" => "now,fail=1", + "-1" => "soon,success=1", + "-2" => "now,success=1", + ); + # Expand -2,-1,0,1,2 into long form + $opt::halt = $halt_expansion{$opt::halt} || $opt::halt; + # --halt 5% == --halt soon,fail=5% + $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/; + # Split: soon,fail=5% + my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt; + if(not grep { $when eq $_ } qw(never soon now)) { + ::error("--halt must have 'never', 'soon', or 'now'."); + ::wait_and_exit(255); + } + $Global::halt_when = $when; + if($when ne "never") { + if($fail_success eq "fail") { + $Global::halt_fail = 1; + } elsif($fail_success eq "success") { + $Global::halt_success = 1; + } elsif($fail_success eq "done") { + $Global::halt_done = 1; + } else { + ::error("--halt $when must be followed by ,success or ,fail."); + ::wait_and_exit(255); + } + if($pct_count =~ /^(\d+)%$/) { + $Global::halt_pct = $1/100; + } elsif($pct_count =~ /^(\d+)$/) { + $Global::halt_count = $1; + } else { + ::error("--halt $when,$fail_success ". + "must be followed by ,number or ,percent%."); + ::wait_and_exit(255); + } + } + } +} + +sub parse_replacement_string_options() { + # Deal with --rpl + # Uses: + # %Global::rpl + # $Global::parensleft + # $Global::parensright + # $opt::parens + # $Global::parensleft + # $Global::parensright + # $opt::plus + # %Global::plus + # $opt::I + # $opt::U + # $opt::i + # $opt::basenamereplace + # $opt::dirnamereplace + # $opt::seqreplace + # $opt::slotreplace + # $opt::basenameextensionreplace + + sub rpl($$) { + # Modify %Global::rpl + # Replace $old with $new + my ($old,$new) = @_; + if($old ne $new) { + $Global::rpl{$new} = $Global::rpl{$old}; + delete $Global::rpl{$old}; + } + } + my $parens = "{==}"; + if(defined $opt::parens) { $parens = $opt::parens; } + my $parenslen = 0.5*length $parens; + $Global::parensleft = substr($parens,0,$parenslen); + $Global::parensright = substr($parens,$parenslen); + if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } + if(defined $opt::I) { rpl('{}',$opt::I); } + if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } + if(defined $opt::U) { rpl('{.}',$opt::U); } + if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } + if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } + if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } + if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } + if(defined $opt::basenameextensionreplace) { + rpl('{/.}',$opt::basenameextensionreplace); + } + for(@opt::rpl) { + # Create $Global::rpl entries for --rpl options + # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" + my ($shorthand,$long) = split/\s/,$_,2; + $Global::rpl{$shorthand} = $long; + } +} + +sub parse_semaphore() { + # Semaphore defaults + # Must be done before computing number of processes and max_line_length + # because when running as a semaphore GNU Parallel does not read args + # Uses: + # $opt::semaphore + # $Global::semaphore + # $opt::semaphoretimeout + # $Semaphore::timeout + # $opt::semaphorename + # $Semaphore::name + # $opt::fg + # $Semaphore::fg + # $opt::wait + # $Semaphore::wait + # $opt::bg + # @opt::a + # @Global::unget_argv + # $Global::default_simultaneous_sshlogins + # $opt::jobs + # $Global::interactive + $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' + if(defined $opt::semaphore) { $Global::semaphore = 1; } + if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } + if(defined $opt::semaphorename) { $Global::semaphore = 1; } + if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) { + $Global::semaphore = 1; + } + if(defined $opt::bg) { $Global::semaphore = 1; } + if(defined $opt::wait and not $opt::sqlmaster) { + $Global::semaphore = 1; @ARGV = "true"; + } + if($Global::semaphore) { + if(@opt::a) { + # Assign the first -a to STDIN + open(STDIN,"<",shift @opt::a); + if(@opt::a) { + # We currently have no way of dealing with more -a + ::error("A semaphore cannot take input from more files\n"); + ::wait_and_exit(255); + } + } + @opt::a = ("/dev/null"); + # Append a dummy empty argument + # \0 => nothing (not the empty string) + push(@Global::unget_argv, [Arg->new("\0noarg")]); + $Semaphore::timeout = int(multiply_time_units($opt::semaphoretimeout)) + || 0; + if(defined $opt::semaphorename) { + $Semaphore::name = $opt::semaphorename; + } else { + local $/ = "\n"; + $Semaphore::name = `tty`; + chomp $Semaphore::name; + } + $Semaphore::fg = $opt::fg; + $Semaphore::wait = $opt::wait; + $Global::default_simultaneous_sshlogins = 1; + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if($Global::interactive and $opt::bg) { + ::error("Jobs running in the ". + "background cannot be interactive."); + ::wait_and_exit(255); + } + } +} + +sub record_env() { + # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars + # Returns: N/A + my $ignore_filename = $Global::config_dir . "/ignored_vars"; + if(open(my $vars_fh, ">", $ignore_filename)) { + print $vars_fh map { $_,"\n" } keys %ENV; + } else { + ::error("Cannot write to $ignore_filename."); + ::wait_and_exit(255); + } +} + +sub open_joblog() { + # Open joblog as specified by --joblog + # Uses: + # $opt::resume + # $opt::resume_failed + # $opt::joblog + # $opt::results + # $Global::job_already_run + # %Global::fh + my $append = 0; + if(($opt::resume or $opt::resume_failed) + and + not ($opt::joblog or $opt::results)) { + ::error("--resume and --resume-failed require --joblog or --results."); + ::wait_and_exit(255); + } + if(defined $opt::joblog and $opt::joblog =~ s/^\+//) { + # --joblog +filename = append to filename + $append = 1; + } + if($opt::joblog + and + ($opt::sqlmaster + or + not $opt::sqlworker)) { + # Do not log if --sqlworker + if($opt::resume || $opt::resume_failed || $opt::retry_failed) { + if(open(my $joblog_fh, "<", $opt::joblog)) { + # Read the joblog + # Override $/ with \n because -d might be set + local $/ = "\n"; + # If there is a header: Open as append later + $append = <$joblog_fh>; + my $joblog_regexp; + if($opt::retry_failed) { + # Make a regexp that matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + my @group; + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + $group[$1-1] = "true"; + } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) { + # Grab out the command + $group[$1-1] = $3; + } else { + chomp; + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + if(@group) { + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + if(grep /\0/, @group) { + # force --null to deal with \n in commandlines + ::warning("Command lines contain newline. ". + "Forcing --null."); + $opt::null = 1; + $/ = "\0"; + } + # Replace \0 with '\n' as used in print_joblog() + print $outfh (map { s/\0/\n/g; $_,$/ } + map { $_ } @group); + seek $outfh, 0, 0; + exit_if_disk_full(); + # Set filehandle to -a + @opt::a = ($outfh); + } + # Remove $command (so -a is run) + @ARGV = (); + } + if($opt::resume || $opt::resume_failed) { + if($opt::resume_failed) { + # Make a regexp that matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + } else { + # Just match the job number + $joblog_regexp='^(\d+)'; + } + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) { + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + } + close $joblog_fh; + } + # $opt::null may be set if the commands contain \n + if($opt::null) { $/ = "\0"; } + } + if($opt::dryrun) { + # Do not write to joblog in a dry-run + if(not open($Global::joblog, ">", "/dev/null")) { + ::error("Cannot write to --joblog $opt::joblog."); + ::wait_and_exit(255); + } + } elsif($append) { + # Append to joblog + if(not open($Global::joblog, ">>", $opt::joblog)) { + ::error("Cannot append to --joblog $opt::joblog."); + ::wait_and_exit(255); + } + } else { + if($opt::joblog eq "-") { + # Use STDOUT as joblog + $Global::joblog = $Global::fh{1}; + } elsif(not open($Global::joblog, ">", $opt::joblog)) { + # Overwrite the joblog + ::error("Cannot write to --joblog $opt::joblog."); + ::wait_and_exit(255); + } + print $Global::joblog + join("\t", "Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command" + ). "\n"; + } + } +} + +sub open_json_csv() { + if($opt::results) { + # Output as JSON/CSV/TSV + if($opt::results eq "-.csv" + or + $opt::results eq "-.tsv" + or + $opt::results eq "-.json") { + # Output as JSON/CSV/TSV on stdout + open $Global::csv_fh, ">&", "STDOUT" or + ::die_bug("Can't dup STDOUT in csv: $!"); + # Do not print any other output to STDOUT + # by forcing all other output to /dev/null + open my $fd, ">", "/dev/null" or + ::die_bug("Can't >/dev/null in csv: $!"); + $Global::fh{1} = $fd; + $Global::fh{2} = $fd; + } elsif($Global::csvsep or $Global::jsonout) { + if(not open($Global::csv_fh,">",$opt::results)) { + ::error("Cannot open results file `$opt::results': ". + "$!."); + wait_and_exit(255); + } + } + } +} + +sub find_compression_program() { + # Find a fast compression program + # Returns: + # $compress_program = compress program with options + # $decompress_program = decompress program with options + + # Search for these. Sorted by speed on 128 core + + # seq 120000000|shuf > 1gb & + # apt-get update + # apt install make g++ htop + # wget -O - pi.dk/3 | bash + # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz + # git clone https://github.com/facebook/zstd.git + # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin) + # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz + # chmod +x /usr/local/bin/lrz + # wait + # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2" + # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz" + # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread + # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread + # sort -nk4 jl-? + + # 1-core: + # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip + # 4-cores: + # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip + # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2 + # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip + # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip + # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip + + my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip + lrz pxz bzip2 lzma xz clzip); + for my $p (@prg) { + if(which($p)) { + return ("$p -c -1","$p -dc"); + } + } + # Fall back to cat + return ("cat","cat"); +} + +sub read_options() { + # Read options from command line, profile and $PARALLEL + # Uses: + # $opt::shebang_wrap + # $opt::shebang + # @ARGV + # $opt::plain + # @opt::profile + # $ENV{'HOME'} + # $ENV{'PARALLEL'} + # Returns: + # @ARGV_no_opt = @ARGV without --options + + # This must be done first as this may exec myself + if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or + $ARGV[0] =~ /^--shebang-?wrap/ or + $ARGV[0] =~ /^--hashbang/)) { + # Program is called from #! line in script + # remove --shebang-wrap if it is set + $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); + # remove --shebang if it is set + $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); + # remove --hashbang if it is set + $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); + if($opt::shebang) { + my $argfile = Q(pop @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --skip-first-line -a $argfile @ARGV"; + } + if($opt::shebang_wrap) { + my @options; + my @parser; + if ($^O eq 'freebsd') { + # FreeBSD's #! puts different values in @ARGV than Linux' does + my @nooptions = @ARGV; + get_options_from_array(\@nooptions); + while($#ARGV > $#nooptions) { + push @options, shift @ARGV; + } + while(@ARGV and $ARGV[0] ne ":::") { + push @parser, shift @ARGV; + } + if(@ARGV and $ARGV[0] eq ":::") { + shift @ARGV; + } + } else { + @options = shift @ARGV; + } + my $script = Q(shift @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --_pipe-means-argfiles @options @parser $script ". + "::: @ARGV"; + } + } + if($ARGV[0] =~ / --shebang(-?wrap)? /) { + ::warning("--shebang and --shebang-wrap must be the first ". + "argument.\n"); + } + + Getopt::Long::Configure("bundling","require_order"); + my @ARGV_copy = @ARGV; + my @ARGV_orig = @ARGV; + # Check if there is a --profile to set @opt::profile + get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); + my @ARGV_profile = (); + my @ARGV_env = (); + if(not $opt::plain) { + # Add options from $PARALLEL_HOME/config and other profiles + my @config_profiles = ( + "/etc/parallel/config", + (map { "$_/config" } @Global::config_dirs), + $ENV{'HOME'}."/.parallelrc"); + my @profiles = @config_profiles; + if(@opt::profile) { + # --profile overrides default profiles + @profiles = (); + for my $profile (@opt::profile) { + if($profile =~ m:^\./|^/:) { + # Look for ./profile in . + # Look for /profile in / + push @profiles, grep { -r $_ } $profile; + } else { + # Look for the $profile in @Global::config_dirs + push @profiles, grep { -r $_ } + map { "$_/$profile" } @Global::config_dirs; + } + } + } + for my $profile (@profiles) { + if(-r $profile) { + ::debug("init","Read $profile\n"); + local $/ = "\n"; + open (my $in_fh, "<", $profile) || + ::die_bug("read-profile: $profile"); + while(<$in_fh>) { + /^\s*\#/ and next; + chomp; + push @ARGV_profile, shell_words($_); + } + close $in_fh; + } else { + if(grep /^\Q$profile\E$/, @config_profiles) { + # config file is not required to exist + } else { + ::error("$profile not readable."); + wait_and_exit(255); + } + } + } + # Add options from shell variable $PARALLEL + if($ENV{'PARALLEL'}) { + push @ARGV_env, shell_words($ENV{'PARALLEL'}); + } + # Add options from env_parallel.csh via $PARALLEL_CSH + if($ENV{'PARALLEL_CSH'}) { + push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'}); + } + } + Getopt::Long::Configure("bundling","require_order"); + get_options_from_array(\@ARGV_profile) || die_usage(); + get_options_from_array(\@ARGV_env) || die_usage(); + get_options_from_array(\@ARGV) || die_usage(); + # What were the options given on the command line? + # Used to start --sqlworker + my $ai = arrayindex(\@ARGV_orig, \@ARGV); + @Global::options_in_argv = @ARGV_orig[0..$ai-1]; + # Prepend non-options to @ARGV (such as commands like 'nice') + unshift @ARGV, @ARGV_profile, @ARGV_env; + return @ARGV; +} + +sub arrayindex() { + # Similar to Perl's index function, but for arrays + # Input: + # $arr_ref1 = ref to @array1 to search in + # $arr_ref2 = ref to @array2 to search for + # Returns: + # $pos = position of @array1 in @array2, -1 if not found + my ($arr_ref1,$arr_ref2) = @_; + my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1; + my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2; + my $i = index($array1_as_string,$array2_as_string,0); + if($i == -1) { return -1 } + my @before = split /\0/, substr($array1_as_string,0,$i); + return $#before; +} + +sub read_args_from_command_line() { + # Arguments given on the command line after: + # ::: ($Global::arg_sep) + # :::: ($Global::arg_file_sep) + # :::+ ($Global::arg_sep with --link) + # ::::+ ($Global::arg_file_sep with --link) + # Removes the arguments from @ARGV and: + # - puts filenames into -a + # - puts arguments into files and add the files to -a + # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+ + # Input: + # @::ARGV = command option ::: arg arg arg :::: argfiles + # Uses: + # $Global::arg_sep + # $Global::arg_file_sep + # $opt::_pipe_means_argfiles + # $opt::pipe + # @opt::a + # Returns: + # @argv_no_argsep = @::ARGV without ::: and :::: and following args + my @new_argv = (); + for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_sep."+" + or + $arg eq $Global::arg_file_sep + or + $arg eq $Global::arg_file_sep."+") { + my $group_sep = $arg; # This group of args is args or argfiles + my @group; + while(defined ($arg = shift @ARGV)) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_sep."+" + or + $arg eq $Global::arg_file_sep + or + $arg eq $Global::arg_file_sep."+") { + # exit while loop if finding new separator + last; + } else { + # If not hitting ::: :::+ :::: or ::::+ + # Append it to the group + push @group, $arg; + } + } + my $is_linked = ($group_sep =~ /\+$/) ? 1 : 0; + my $is_file = ($group_sep eq $Global::arg_file_sep + or + $group_sep eq $Global::arg_file_sep."+"); + if($is_file) { + # :::: / ::::+ + push @opt::linkinputsource, map { $is_linked } @group; + } else { + # ::: / :::+ + push @opt::linkinputsource, $is_linked; + } + if($is_file + or ($opt::_pipe_means_argfiles and $opt::pipe) + ) { + # Group of file names on the command line. + # Append args into -a + push @opt::a, @group; + } else { + # Group of arguments on the command line. + # Put them into a file. + # Create argfile + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + print $outfh map { $_,$/ } @group; + seek $outfh, 0, 0; + exit_if_disk_full(); + # Append filehandle to -a + push @opt::a, $outfh; + } + if(defined($arg)) { + # $arg is ::: :::+ :::: or ::::+ + # so there is another group + redo; + } else { + # $arg is undef -> @ARGV empty + last; + } + } + push @new_argv, $arg; + } + # Output: @ARGV = command to run with options + return @new_argv; +} + +sub cleanup() { + # Returns: N/A + unlink keys %Global::unlink; + map { rmdir $_ } keys %Global::unlink; + if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } + for(keys %Global::sshmaster) { + # If 'ssh -M's are running: kill them + kill "TERM", $_; + } +} + + +sub __QUOTING_ARGUMENTS_FOR_SHELL__() {} + +sub shell_quote(@) { + # Input: + # @strings = strings to be quoted + # Returns: + # @shell_quoted_strings = string quoted as needed by the shell + return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_); +} + +sub shell_quote_scalar_rc($) { + # Quote for the rc-shell + my $a = $_[0]; + if(defined $a) { + if(($a =~ s/'/''/g) + + + ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_csh($) { + # Quote for (t)csh + my $a = $_[0]; + if(defined $a) { + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + + + # quote newline in csh as \\\n + ($a =~ s/[\n]/"\\\n"/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_default($) { + # Quote for other shells (Bourne compatibles) + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + local $_ = $_[0]; + if(/[^-_.+a-z0-9\/]/i) { + s/'/'"'"'/g; # "-quote '-quotes + $_ = "'$_'"; # '-quote entire string + s/^''//; # Remove unneeded '' at ends + s/''$//; # (faster than s/^''|''$//g) + return $_; + } elsif ($_ eq "") { + return "''"; + } else { + # No quoting needed + return $_; + } +} + +sub shell_quote_scalar($) { + # Quote the string so the shell will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + + # Speed optimization: Choose the correct shell_quote_scalar_* + # and call that directly from now on + no warnings 'redefine'; + if($Global::cshell) { + # (t)csh + *shell_quote_scalar = \&shell_quote_scalar_csh; + } elsif($Global::shell =~ m:(^|/)rc$:) { + # rc-shell + *shell_quote_scalar = \&shell_quote_scalar_rc; + } else { + # other shells + *shell_quote_scalar = \&shell_quote_scalar_default; + } + # The sub is now redefined. Call it + return shell_quote_scalar($_[0]); +} + +sub Q($) { + # Q alias for ::shell_quote_scalar + my $ret = shell_quote_scalar($_[0]); + no warnings 'redefine'; + *Q = \&::shell_quote_scalar; + return $ret; +} + +sub shell_quote_file($) { + # Quote the string so shell will not expand any special chars + # and prepend ./ if needed + # Input: + # $filename = filename to be shell quoted + # Returns: + # $quoted_filename = filename quoted with \ and ./ if needed + my $a = shift; + if(defined $a) { + if($a =~ m:^/: or $a =~ m:^\./:) { + # /abs/path or ./rel/path => skip + } else { + # rel/path => ./rel/path + $a = "./".$a; + } + } + return Q($a); +} + +sub shell_words(@) { + # Input: + # $string = shell line + # Returns: + # @shell_words = $string split into words as shell would do + $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; + return Text::ParseWords::shellwords(@_); +} + +sub perl_quote_scalar($) { + # Quote the string so perl's eval will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $perl_quoted = string quoted with \ as needed by perl's eval + my $a = $_[0]; + if(defined $a) { + $a =~ s/[\\\"\$\@]/\\$&/go; + } + return $a; +} + +# -w complains about prototype +sub pQ($) { + # pQ alias for ::perl_quote_scalar + my $ret = perl_quote_scalar($_[0]); + *pQ = \&::perl_quote_scalar; + return $ret; +} + +sub unquote_printf() { + # Convert \t \n \r \000 \0 + # Inputs: + # $string = string with \t \n \r \num \0 + # Returns: + # $replaced = string with TAB NEWLINE CR <ascii-num> NUL + $_ = shift; + s/\\t/\t/g; + s/\\n/\n/g; + s/\\r/\r/g; + s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge; + s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge; + return $_; +} + + +sub __FILEHANDLES__() {} + + +sub save_stdin_stdout_stderr() { + # Remember the original STDIN, STDOUT and STDERR + # and file descriptors opened by the shell (e.g. 3>/tmp/foo) + # Uses: + # %Global::fh + # $Global::original_stderr + # $Global::original_stdin + # Returns: N/A + + # TODO Disabled until we have an open3 that will take n filehandles + # for my $fdno (1..61) { + # # /dev/fd/62 and above are used by bash for <(cmd) + # # Find file descriptors that are already opened (by the shell) + # Only focus on stdout+stderr for now + for my $fdno (1..2) { + my $fh; + # 2-argument-open is used to be compatible with old perl 5.8.0 + # bug #43570: Perl 5.8.0 creates 61 files + if(open($fh,">&=$fdno")) { + $Global::fh{$fdno}=$fh; + } + } + open $Global::original_stderr, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::status_fd, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::original_stdin, "<&", "STDIN" or + ::die_bug("Can't dup STDIN: $!"); +} + +sub enough_file_handles() { + # Check that we have enough filehandles available for starting + # another job + # Uses: + # $opt::ungroup + # %Global::fh + # Returns: + # 1 if ungrouped (thus not needing extra filehandles) + # 0 if too few filehandles + # 1 if enough filehandles + if(not $opt::ungroup) { + my %fh; + my $enough_filehandles = 1; + # perl uses 7 filehandles for something? + # open3 uses 2 extra filehandles temporarily + # We need a filehandle for each redirected file descriptor + # (normally just STDOUT and STDERR) + for my $i (1..(7+2+keys %Global::fh)) { + $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); + } + for (values %fh) { close $_; } + return $enough_filehandles; + } else { + # Ungrouped does not need extra file handles + return 1; + } +} + +sub open_or_exit($) { + # Open a file name or exit if the file cannot be opened + # Inputs: + # $file = filehandle or filename to open + # Uses: + # $Global::original_stdin + # Returns: + # $fh = file handle to read-opened file + my $file = shift; + if($file eq "-") { + return ($Global::original_stdin || *STDIN); + } + if(ref $file eq "GLOB") { + # This is an open filehandle + return $file; + } + my $fh = gensym; + if(not open($fh, "<", $file)) { + ::error("Cannot open input file `$file': No such file or directory."); + wait_and_exit(255); + } + return $fh; +} + +sub set_fh_blocking($) { + # Set filehandle as blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + # Get the current flags on the filehandle + fcntl($fh, &F_GETFL, $flags) || die $!; + # Remove non-blocking from the flags + $flags &= ~&O_NONBLOCK; + # Set the flags on the filehandle + fcntl($fh, &F_SETFL, $flags) || die $!; +} + +sub set_fh_non_blocking($) { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + # Get the current flags on the filehandle + fcntl($fh, &F_GETFL, $flags) || die $!; + # Add non-blocking to the flags + $flags |= &O_NONBLOCK; + # Set the flags on the filehandle + fcntl($fh, &F_SETFL, $flags) || die $!; +} + + +sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {} + + +# Variable structure: +# +# $Global::running{$pid} = Pointer to Job-object +# @Global::virgin_jobs = Pointer to Job-object that have received no input +# $Global::host{$sshlogin} = Pointer to SSHLogin-object +# $Global::total_running = total number of running jobs +# $Global::total_started = total jobs started +# $Global::max_procs_file = filename if --jobs is given a filename +# $Global::JobQueue = JobQueue object for the queue of jobs +# $Global::timeoutq = queue of times where jobs timeout +# $Global::newest_job = Job object of the most recent job started +# $Global::newest_starttime = timestamp of $Global::newest_job +# @Global::sshlogin +# $Global::minimal_command_line_length = min len supported by all sshlogins +# $Global::start_no_new_jobs = should more jobs be started? +# $Global::original_stderr = file handle for STDERR when the program started +# $Global::total_started = total number of jobs started +# $Global::joblog = filehandle of joblog +# $Global::debug = Is debugging on? +# $Global::exitstatus = status code of GNU Parallel +# $Global::quoting = quote the command to run + +sub init_run_jobs() { + # Set Global variables and progress signal handlers + # Do the copying of basefiles + # Returns: N/A + $Global::total_running = 0; + $Global::total_started = 0; + $SIG{USR1} = \&list_running_jobs; + $SIG{USR2} = \&toggle_progress; + if(@opt::basefile) { setup_basefile(); } +} + +{ + my $last_time; + my %last_mtime; + my $max_procs_file_last_mod; + + sub changed_procs_file { + # If --jobs is a file and it is modfied: + # Force recomputing of max_jobs_running for each $sshlogin + # Uses: + # $Global::max_procs_file + # %Global::host + # Returns: N/A + if($Global::max_procs_file) { + # --jobs filename + my $mtime = (stat($Global::max_procs_file))[9]; + $max_procs_file_last_mod ||= 0; + if($mtime > $max_procs_file_last_mod) { + # file changed: Force re-computing max_jobs_running + $max_procs_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_jobs_running(undef); + } + } + } + } + + sub changed_sshloginfile { + # If --slf is changed: + # reload --slf + # filter_hosts + # setup_basefile + # Uses: + # @opt::sshloginfile + # @Global::sshlogin + # %Global::host + # $opt::filter_hosts + # Returns: N/A + if(@opt::sshloginfile) { + # Is --sshloginfile changed? + for my $slf (@opt::sshloginfile) { + my $actual_file = expand_slf_shorthand($slf); + my $mtime = (stat($actual_file))[9]; + $last_mtime{$actual_file} ||= $mtime; + if($mtime - $last_mtime{$actual_file} > 1) { + ::debug("run", + "--sshloginfile $actual_file changed. reload\n"); + $last_mtime{$actual_file} = $mtime; + # Reload $slf + # Empty sshlogins + @Global::sshlogin = (); + for (values %Global::host) { + # Don't start new jobs on any host + # except the ones added back later + $_->set_max_jobs_running(0); + } + # This will set max_jobs_running on the SSHlogins + read_sshloginfile($actual_file); + parse_sshlogin(); + $opt::filter_hosts and filter_hosts(); + setup_basefile(); + } + } + } + } + + sub start_more_jobs { + # Run start_another_job() but only if: + # * not $Global::start_no_new_jobs set + # * not JobQueue is empty + # * not load on server is too high + # * not server swapping + # * not too short time since last remote login + # Uses: + # %Global::host + # $Global::start_no_new_jobs + # $Global::JobQueue + # $opt::pipe + # $opt::load + # $opt::noswap + # $opt::delay + # $Global::newest_starttime + # Returns: + # $jobs_started = number of jobs started + my $jobs_started = 0; + if($Global::start_no_new_jobs) { + return $jobs_started; + } + if(time - ($last_time||0) > 1) { + # At most do this every second + $last_time = time; + changed_procs_file(); + changed_sshloginfile(); + } + # This will start 1 job on each --sshlogin (if possible) + # thus distribute the jobs on the --sshlogins round robin + for my $sshlogin (values %Global::host) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more jobs in the queue + last; + } + debug("run", "Running jobs before on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), "\n"); + if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { + if($opt::delay + and + $opt::delay-0.008 > ::now()-$Global::newest_starttime) { + # It has been too short since last start + next; + } + if($opt::load and $sshlogin->loadavg_too_high()) { + # The load is too high or unknown + next; + } + if($opt::noswap and $sshlogin->swapping()) { + # The server is swapping + next; + } + if($opt::limit and $sshlogin->limit()) { + # Over limit + next; + } + if(($opt::memfree or $opt::memsuspend) + and + $sshlogin->memfree() < $Global::memlimit) { + # The server has not enough mem free + ::debug("mem", "Not starting job: not enough mem\n"); + next; + } + if($sshlogin->too_fast_remote_login()) { + # It has been too short since last login + next; + } + debug("run", $sshlogin->string(), + " has ", $sshlogin->jobs_running(), + " out of ", $sshlogin->max_jobs_running(), + " jobs running. Start another.\n"); + if(start_another_job($sshlogin) == 0) { + # No more jobs to start on this $sshlogin + debug("run","No jobs started on ", + $sshlogin->string(), "\n"); + next; + } + $sshlogin->inc_jobs_running(); + $sshlogin->set_last_login_at(::now()); + $jobs_started++; + } + debug("run","Running jobs after on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), " of ", + $sshlogin->max_jobs_running(), "\n"); + } + + return $jobs_started; + } +} + +{ + my $no_more_file_handles_warned; + + sub start_another_job() { + # If there are enough filehandles + # and JobQueue not empty + # and not $job is in joblog + # Then grab a job from Global::JobQueue, + # start it at sshlogin + # mark it as virgin_job + # Inputs: + # $sshlogin = the SSHLogin to start the job on + # Uses: + # $Global::JobQueue + # $opt::pipe + # $opt::results + # $opt::resume + # @Global::virgin_jobs + # Returns: + # 1 if another jobs was started + # 0 otherwise + my $sshlogin = shift; + # Do we have enough file handles to start another job? + if(enough_file_handles()) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more commands to run + debug("start", "Not starting: JobQueue empty\n"); + return 0; + } else { + my $job; + # Skip jobs already in job log + # Skip jobs already in results + do { + $job = get_job_with_sshlogin($sshlogin); + if(not defined $job) { + # No command available for that sshlogin + debug("start", "Not starting: no jobs available for ", + $sshlogin->string(), "\n"); + return 0; + } + if($job->is_already_in_joblog()) { + $job->free_slot(); + } + } while ($job->is_already_in_joblog() + or + ($opt::results and $opt::resume + and $job->is_already_in_results())); + debug("start", "Command to run on '", + $job->sshlogin()->string(), "': '", + $job->replaced(),"'\n"); + if($job->start()) { + if($opt::pipe) { + if($job->virgin()) { + push(@Global::virgin_jobs,$job); + } else { + # Block already set: This is a retry + $job->write_block(); + } + } + debug("start", "Started as seq ", $job->seq(), + " pid:", $job->pid(), "\n"); + return 1; + } else { + # Not enough processes to run the job. + # Put it back on the queue. + $Global::JobQueue->unget($job); + # Count down the number of jobs to run for this SSHLogin. + my $max = $sshlogin->max_jobs_running(); + if($max > 1) { $max--; } else { + my @arg; + for my $record (@{$job->{'commandline'}{'arg_list'}}) { + push @arg, map { $_->orig() } @$record; + } + ::error("No more processes: cannot run a single job. ". + "Something is wrong at @arg."); + ::wait_and_exit(255); + } + $sshlogin->set_max_jobs_running($max); + # Sleep up to 300 ms to give other processes time to die + ::usleep(rand()*300); + ::warning("No more processes: ". + "Decreasing number of running jobs to $max.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + return 0; + } + } + } else { + # No more file handles + $no_more_file_handles_warned++ or + ::warning("No more file handles. ", + "Try running 'parallel -j0 -N 100 --pipe parallel -j0'", + "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)", + "or increasing 'nofile' in /etc/security/limits.conf", + "or increasing /proc/sys/fs/file-max"); + debug("start", "No more file handles. "); + return 0; + } + } +} + +sub init_progress() { + # Uses: + # $opt::bar + # Returns: + # list of computers for progress output + $|=1; + if($opt::bar) { + return("",""); + } + my %progress = progress(); + return ("\nComputers / CPU cores / Max jobs to run\n", + $progress{'workerlist'}); +} + +sub drain_job_queue(@) { + # Uses: + # $opt::progress + # $Global::total_running + # $Global::max_jobs_running + # %Global::running + # $Global::JobQueue + # %Global::host + # $Global::start_no_new_jobs + # Returns: N/A + my @command = @_; + if($opt::progress) { + ::status_no_nl(init_progress()); + } + my $last_header = ""; + my $sleep = 0.2; + my $sleepsum = 0; + do { + while($Global::total_running > 0) { + debug("init",$Global::total_running, "==", scalar + keys %Global::running," slots: ", $Global::max_jobs_running); + if($opt::pipe) { + # When using --pipe sometimes file handles are not + # closed properly + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + } + if($opt::progress) { + my %progress = progress(); + if($last_header ne $progress{'header'}) { + ::status("", $progress{'header'}); + $last_header = $progress{'header'}; + } + ::status_no_nl("\r",$progress{'status'}); + } + if($Global::total_running < $Global::max_jobs_running + and not $Global::JobQueue->empty()) { + # These jobs may not be started because of loadavg + # or too little time between each ssh login. + if(start_more_jobs() > 0) { + # Exponential back-on if jobs were started + $sleep = $sleep/2+0.001; + } + } + # Exponential back-off sleeping + $sleep = ::reap_usleep($sleep); + $sleepsum += $sleep; + if($sleepsum >= 1000) { + # At most do this every second + $sleepsum = 0; + changed_procs_file(); + changed_sshloginfile(); + start_more_jobs(); + } + } + if(not $Global::JobQueue->empty()) { + # These jobs may not be started: + # * because there the --filter-hosts has removed all + if(not %Global::host) { + ::error("There are no hosts left to run on."); + ::wait_and_exit(255); + } + # * because of loadavg + # * because of too little time between each ssh login. + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + if($Global::max_jobs_running == 0) { + ::warning("There are no job slots available. Increase --jobs."); + } + } + while($opt::sqlmaster and not $Global::sql->finished()) { + # SQL master + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + if($Global::start_sqlworker) { + # Start an SQL worker as we are now sure there is work to do + $Global::start_sqlworker = 0; + if(my $pid = fork()) { + $Global::unkilled_sqlworker = $pid; + } else { + # Replace --sql/--sqlandworker with --sqlworker + my @ARGV = (map { s/^--sql(andworker)?$/--sqlworker/; $_ } + @Global::options_in_argv); + # exec the --sqlworker + exec($0,@ARGV,@command); + } + } + } + } while ($Global::total_running > 0 + or + not $Global::start_no_new_jobs and not $Global::JobQueue->empty() + or + $opt::sqlmaster and not $Global::sql->finished()); + if($opt::progress) { + my %progress = progress(); + ::status("\r".$progress{'status'}); + } +} + +sub toggle_progress() { + # Turn on/off progress view + # Uses: + # $opt::progress + # Returns: N/A + $opt::progress = not $opt::progress; + if($opt::progress) { + ::status_no_nl(init_progress()); + } +} + +sub progress() { + # Uses: + # $opt::bar + # $opt::eta + # %Global::host + # $Global::total_started + # Returns: + # $workerlist = list of workers + # $header = that will fit on the screen + # $status = message that will fit on the screen + if($opt::bar) { + return ("workerlist" => "", "header" => "", "status" => bar()); + } + my $eta = ""; + my ($status,$header)=("",""); + if($opt::eta) { + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + } + my $termcols = terminal_columns(); + my @workers = sort keys %Global::host; + my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers; + my $workerno = 1; + my %workerno = map { ($_=>$workerno++) } @workers; + my $workerlist = ""; + for my $w (@workers) { + $workerlist .= + $workerno{$w}.":".$sshlogin{$w} ." / ". + ($Global::host{$w}->ncpus() || "-")." / ". + $Global::host{$w}->max_jobs_running()."\n"; + } + $status = "c"x($termcols+1); + # Select an output format that will fit on a single line + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX%/XX.Xs s2:XX/XX/XX%/XX.Xs s3:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/". + "%of started jobs/Average seconds to complete"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + my $completed = + ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $sshlogin{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } + } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX%/X.Xs 2:XX/XX/XX%/X.Xs 3:XX/XX/XX%/X.Xs 4:XX/XX/XX%/X.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + my $completed = + ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $workerno{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } + } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + sprintf("%s:%d/%d/%d%%", + $sshlogin{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) + } + } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + sprintf("%s:%d/%d/%d%%", + $workerno{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) + } + } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ", + map { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ", + map { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ", + map { sprintf("%s:%d/%d", $workerno{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ", + map { sprintf("%s:%d", $sshlogin{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ", + map { sprintf("%s:%d", + $workerno{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + return ("workerlist" => $workerlist, "header" => $header, + "status" => $status); +} + +{ + + my ($first_completed, $smoothed_avg_time, $last_eta); + + sub compute_eta { + # Calculate important numbers for ETA + # Returns: + # $total = number of jobs in total + # $completed = number of jobs completed + # $left = number of jobs left + # $pctcomplete = percent of jobs completed + # $avgtime = averaged time + # $eta = smoothed eta + my $completed = $Global::total_completed; + # In rare cases with -X will $completed > total_jobs() + my $total = ::max($Global::JobQueue->total_jobs(),$completed); + my $left = $total - $completed; + if(not $completed) { + return($total, $completed, $left, 0, 0, 0); + } + my $pctcomplete = ::min($completed / $total,100); + $first_completed ||= time; + my $timepassed = (time - $first_completed); + my $avgtime = $timepassed / $completed; + $smoothed_avg_time ||= $avgtime; + # Smooth the eta so it does not jump wildly + $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + + $pctcomplete * $avgtime; + my $eta = int($left * $smoothed_avg_time); + if($eta*0.90 < $last_eta and $last_eta < $eta) { + # Eta jumped less that 10% up: Keep the last eta instead + $eta = $last_eta; + } else { + $last_eta = $eta; + } + return($total, $completed, $left, $pctcomplete, $avgtime, $eta); + } +} + +{ + my ($rev,$reset); + + sub bar() { + # Return: + # $status = bar with eta, completed jobs, arg and pct + $rev ||= "\033[7m"; + $reset ||= "\033[0m"; + my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = + compute_eta(); + my $arg = $Global::newest_job ? + $Global::newest_job->{'commandline'}-> + replace_placeholders(["\257<\257>"],0,0) : ""; + $arg = decode_utf8($arg); + my $eta_dhms = ::seconds_to_time_units($eta); + my $bar_text = + sprintf("%d%% %d:%d=%s %s", + $pctcomplete*100, $completed, $left, $eta_dhms, $arg); + my $terminal_width = terminal_columns(); + my $s = sprintf("%-${terminal_width}s", + substr($bar_text." "x$terminal_width, + 0,$terminal_width)); + my $width = int($terminal_width * $pctcomplete); + substr($s,$width,0) = $reset; + my $zenity = sprintf("%-${terminal_width}s", + substr("# $eta sec $arg", + 0,$terminal_width)); + # Prefix with zenity header + $s = "\r" . $zenity . "\r" . $pctcomplete*100 . + "\r" . $rev . $s . $reset; + return $s; + } +} + +{ + my ($rows,$columns,$last_update_time); + + sub compute_terminal_size() { + # && true is to force spawning a shell and not just exec'ing + my @tput = qx{ tput lines cols </dev/tty 2>/dev/null && true }; + $rows = 0 + $tput[0]; + $columns = 0 + $tput[1]; + if(not ($rows && $columns)) { + # && true is to force spawning a shell and not just exec'ing + my $stty = qx{ stty -a </dev/tty 2>/dev/null && true }; + # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS + # MacOSX/IRIX/AIX/Tru64 + $stty =~ /(\d+) columns/ and do { $columns = $1; }; + $stty =~ /(\d+) rows/ and do { $rows = $1; }; + # GNU/Linux/Solaris + $stty =~ /columns (\d+)/ and do { $columns = $1; }; + $stty =~ /rows (\d+)/ and do { $rows = $1; }; + # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana + $stty =~ /columns = (\d+)/ and do { $columns = $1; }; + $stty =~ /rows = (\d+)/ and do { $rows = $1; }; + # QNX + $stty =~ /rows=(\d+),(\d+)/ and do { ($rows,$columns) = ($1,$2); }; + } + if(not ($rows && $columns)) { + # && true is to force spawning a shell and not just exec'ing + my $resize = qx{ resize 2>/dev/null && true }; + $resize =~ /COLUMNS=(\d+);/ and do { $columns ||= $1; }; + $resize =~ /LINES=(\d+);/ and do { $rows ||= $1; }; + } + $rows ||= 24; + $columns ||= 80; + } + + sub update_terminal_size() { + # Only update once per second. + if($last_update_time < time) { + $last_update_time = time; + compute_terminal_size(); + # Set signal WINdow CHange to force recompute + $SIG{WINCH} = \&compute_terminal_size; + } + } + + sub terminal_rows() { + # Get the number of rows of the terminal. + # Returns: + # number of rows of the screen + update_terminal_size(); + return $rows; + } + + sub terminal_columns() { + # Get the number of columns of the terminal. + # Returns: + # number of columns of the screen + update_terminal_size(); + return $columns; + } +} + +# Prototype forwarding +sub get_job_with_sshlogin($); +sub get_job_with_sshlogin($) { + # Input: + # $sshlogin = which host should the job be run on? + # Uses: + # $opt::hostgroups + # $Global::JobQueue + # Returns: + # $job = next job object for $sshlogin if any available + my $sshlogin = shift; + my $job; + + if ($opt::hostgroups) { + my @other_hostgroup_jobs = (); + + while($job = $Global::JobQueue->get()) { + if($sshlogin->in_hostgroups($job->hostgroups())) { + # Found a job to be run on a hostgroup of this + # $sshlogin + last; + } else { + # This job was not in the hostgroups of $sshlogin + push @other_hostgroup_jobs, $job; + } + } + $Global::JobQueue->unget(@other_hostgroup_jobs); + if(not defined $job) { + # No more jobs + return undef; + } + } else { + $job = $Global::JobQueue->get(); + if(not defined $job) { + # No more jobs + ::debug("start", "No more jobs: JobQueue empty\n"); + return undef; + } + } + if(not $job->suspended()) { + $job->set_sshlogin($sshlogin); + } + if(defined $opt::retries and $job->failed_here()) { + # This command with these args failed for this sshlogin + my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); + # Only look at the Global::host that have > 0 jobslots + if($no_of_failed_sshlogins == + grep { $_->max_jobs_running() > 0 } values %Global::host + and $job->failed_here() == $min_failures) { + # It failed the same or more times on another host: + # run it on this host + } else { + # If it failed fewer times on another host: + # Find another job to run + my $nextjob; + if(not $Global::JobQueue->empty()) { + # This can potentially recurse for all args + no warnings 'recursion'; + $nextjob = get_job_with_sshlogin($sshlogin); + } + # Push the command back on the queue + $Global::JobQueue->unget($job); + return $nextjob; + } + } + return $job; +} + + +sub __REMOTE_SSH__() {} + + +sub read_sshloginfiles(@) { + # Read a list of --slf's + # Input: + # @files = files or symbolic file names to read + # Returns: N/A + for my $s (@_) { + read_sshloginfile(expand_slf_shorthand($s)); + } +} + +sub expand_slf_shorthand($) { + # Expand --slf shorthand into a read file name + # Input: + # $file = file or symbolic file name to read + # Returns: + # $file = actual file name to read + my $file = shift; + if($file eq "-") { + # skip: It is stdin + } elsif($file eq "..") { + $file = $Global::config_dir."/sshloginfile"; + } elsif($file eq ".") { + $file = "/etc/parallel/sshloginfile"; + } elsif(not -r $file) { + for(@Global::config_dirs) { + if(not -r $_."/".$file) { + # Try prepending $PARALLEL_HOME + ::error("Cannot open $file."); + ::wait_and_exit(255); + } else { + $file = $_."/".$file; + last; + } + } + } + return $file; +} + +sub read_sshloginfile($) { + # Read sshloginfile into @Global::sshlogin + # Input: + # $file = file to read + # Uses: + # @Global::sshlogin + # Returns: N/A + local $/ = "\n"; + my $file = shift; + my $close = 1; + my $in_fh; + ::debug("init","--slf ",$file); + if($file eq "-") { + $in_fh = *STDIN; + $close = 0; + } else { + if(not open($in_fh, "<", $file)) { + # Try the filename + ::error("Cannot open $file."); + ::wait_and_exit(255); + } + } + while(<$in_fh>) { + chomp; + /^\s*#/ and next; + /^\s*$/ and next; + push @Global::sshlogin, $_; + } + if($close) { + close $in_fh; + } +} + +sub parse_sshlogin() { + # Parse @Global::sshlogin into %Global::host. + # Keep only hosts that are in one of the given ssh hostgroups. + # Uses: + # @Global::sshlogin + # $Global::minimal_command_line_length + # %Global::host + # $opt::transfer + # @opt::return + # $opt::cleanup + # @opt::basefile + # @opt::trc + # Returns: N/A + my @login; + if(not @Global::sshlogin) { @Global::sshlogin = (":"); } + for my $sshlogin (@Global::sshlogin) { + # Split up -S sshlogin,sshlogin + # Parse ,, and \, as , but do not split on that + # -S "ssh -J jump1,,jump2 host1,host2" => + # ssh -J jump1,jump2 host1 + # host2 + # Protect \, and ,, as \0 + $sshlogin =~ s/\\,|,,/\0/g; + for my $s (split /,|\n/, $sshlogin) { + # Replace \0 => , + $s =~ s/\0/,/g; + if ($s eq ".." or $s eq "-") { + # This may add to @Global::sshlogin - possibly bug + read_sshloginfile(expand_slf_shorthand($s)); + } else { + $s =~ s/\s*$//; + push (@login, $s); + } + } + } + $Global::minimal_command_line_length = 100_000_000; + my @allowed_hostgroups; + for my $ncpu_sshlogin_string (::uniq(@login)) { + my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); + my $sshlogin_string = $sshlogin->string(); + if($sshlogin_string eq "") { + # This is an ssh group: -S @webservers + push @allowed_hostgroups, $sshlogin->hostgroups(); + next; + } + if($Global::host{$sshlogin_string}) { + # This sshlogin has already been added: + # It is probably a host that has come back + # Set the max_jobs_running back to the original + debug("run","Already seen $sshlogin_string\n"); + if($sshlogin->{'ncpus'}) { + # If ncpus set by '#/' of the sshlogin, overwrite it: + $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); + } + $Global::host{$sshlogin_string}->set_max_jobs_running(undef); + next; + } + $sshlogin->set_maxlength(Limits::Command::max_length()); + + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); + $Global::host{$sshlogin_string} = $sshlogin; + } + $Global::usable_command_line_length = + # Usable len = maxlen - 3000 for wrapping, div 2 for hexing + int(($Global::minimal_command_line_length - 3000)/2); + if($opt::max_chars) { + if($opt::max_chars <= $Global::usable_command_line_length) { + $Global::usable_command_line_length = $opt::max_chars; + } else { + ::warning("Value for option -s should be < ". + $Global::usable_command_line_length."."); + } + } + if(@allowed_hostgroups) { + # Remove hosts that are not in these groups + while (my ($string, $sshlogin) = each %Global::host) { + if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { + delete $Global::host{$string}; + } + } + } + + # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); + if(@Global::transfer_files or @opt::return + or $opt::cleanup or @opt::basefile) { + if(not remote_hosts()) { + # There are no remote hosts + if(@opt::trc) { + ::warning("--trc ignored as there are no remote --sshlogin."); + } elsif (defined $opt::transfer) { + ::warning("--transfer ignored as there are ". + "no remote --sshlogin."); + } elsif (@opt::transfer_files) { + ::warning("--transferfile ignored as there ". + "are no remote --sshlogin."); + } elsif (@opt::return) { + ::warning("--return ignored as there are no remote --sshlogin."); + } elsif (defined $opt::cleanup and not %opt::template) { + ::warning("--cleanup ignored as there ". + "are no remote --sshlogin."); + } elsif (@opt::basefile) { + ::warning("--basefile ignored as there ". + "are no remote --sshlogin."); + } + } + } +} + +sub remote_hosts() { + # Return sshlogins that are not ':' + # Uses: + # %Global::host + # Returns: + # list of sshlogins with ':' removed + return grep !/^:$/, keys %Global::host; +} + +sub setup_basefile() { + # Transfer basefiles to each $sshlogin + # This needs to be done before first jobs on $sshlogin is run + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my @cmd; + my $rsync_destdir; + my $workdir; + for my $sshlogin (values %Global::host) { + if($sshlogin->local()) { next } + for my $file (@opt::basefile) { + if($file !~ m:^/: and $opt::workdir eq "...") { + ::error("Work dir '...' will not work with relative basefiles."); + ::wait_and_exit(255); + } + if(not $workdir) { + my $dummycmdline = + CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{}); + my $dummyjob = Job->new($dummycmdline); + $workdir = $dummyjob->workdir(); + } + push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir); + } + } + debug("init", "basesetup: @cmd\n"); + my ($exitstatus,$stdout_ref,$stderr_ref) = + run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5); + if($exitstatus) { + my @stdout = @$stdout_ref; + my @stderr = @$stderr_ref; + ::error("Copying of --basefile failed: @stdout@stderr"); + ::wait_and_exit(255); + } +} + +sub cleanup_basefile() { + # Remove the basefiles transferred + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my @cmd; + my $workdir; + if(not $workdir) { + my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{}); + my $dummyjob = Job->new($dummycmdline); + $workdir = $dummyjob->workdir(); + } + for my $sshlogin (values %Global::host) { + if($sshlogin->local()) { next } + for my $file (@opt::basefile) { + push @cmd, $sshlogin->cleanup_cmd($file,$workdir); + } + } + debug("init", "basecleanup: @cmd\n"); + my ($exitstatus,$stdout_ref,$stderr_ref) = + run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5); + if($exitstatus) { + my @stdout = @$stdout_ref; + my @stderr = @$stderr_ref; + ::error("Cleanup of --basefile failed: @stdout@stderr"); + ::wait_and_exit(255); + } +} + +sub run_gnu_parallel() { + my ($stdin,@args) = @_; + my $cmd = join "",map { " $_ & " } split /\n/, $stdin; + print $Global::original_stderr ` $cmd wait` ; + return 0 +} + +sub _run_gnu_parallel() { + # Run GNU Parallel + # This should ideally just fork an internal copy + # and not start it through a shell + # Input: + # $stdin = data to provide on stdin for GNU Parallel + # @args = command line arguments + # Returns: + # $exitstatus = exitcode of GNU Parallel run + # \@stdout = standard output + # \@stderr = standard error + my ($stdin,@args) = @_; + my ($exitstatus,@stdout,@stderr); + my ($stdin_fh,$stdout_fh)=(gensym(),gensym()); + my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par"); + unlink $stderrname; + + my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh, + $0,qw(--plain --shell /bin/sh --will-cite), @args); + if(my $writerpid = fork()) { + close $stdin_fh; + @stdout = <$stdout_fh>; + # Now stdout is closed: + # These pids should be dead or die very soon + while(kill 0, $writerpid) { ::usleep(1); } + die; +# reap $writerpid; +# while(kill 0, $pid) { ::usleep(1); } +# reap $writerpid; + $exitstatus = $?; + seek $stderr_fh, 0, 0; + @stderr = <$stderr_fh>; + close $stdout_fh; + close $stderr_fh; + } else { + close $stdout_fh; + close $stderr_fh; + print $stdin_fh $stdin; + close $stdin_fh; + exit(0); + } + return ($exitstatus,\@stdout,\@stderr); +} + +sub filter_hosts() { + # Remove down --sshlogins from active duty. + # Find ncpus, ncores, maxlen, time-to-login for each host. + # Uses: + # %Global::host + # $Global::minimal_command_line_length + # $opt::use_sockets_instead_of_threads + # $opt::use_cores_instead_of_threads + # $opt::use_cpus_instead_of_cores + # Returns: N/A + + my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref, + $maxlen_ref, $echo_ref, $down_hosts_ref) = + parse_host_filtering(parallelized_host_filtering()); + + delete @Global::host{@$down_hosts_ref}; + @$down_hosts_ref and ::warning("Removed @$down_hosts_ref."); + + $Global::minimal_command_line_length = 100_000_000; + while (my ($string, $sshlogin) = each %Global::host) { + if($sshlogin->local()) { next } + my ($nsockets,$ncores,$nthreads,$time_to_login,$maxlen) = + ($nsockets_ref->{$string},$ncores_ref->{$string}, + $nthreads_ref->{$string},$time_to_login_ref->{$string}, + $maxlen_ref->{$string}); + defined $nsockets or ::die_bug("nsockets missing: $string"); + defined $ncores or ::die_bug("ncores missing: $string"); + defined $nthreads or ::die_bug("nthreads missing: $string"); + defined $time_to_login or ::die_bug("time_to_login missing: $string"); + defined $maxlen or ::die_bug("maxlen missing: $string"); + # ncpus may be set by 4/hostname or may be undefined yet + my $ncpus = $sshlogin->{'ncpus'}; + # $nthreads may be 0 if GNU Parallel is not installed remotely + $ncpus = $nthreads || $ncpus || $sshlogin->ncpus(); + if($opt::use_cpus_instead_of_cores) { + $ncpus = $ncores || $ncpus; + } elsif($opt::use_sockets_instead_of_threads) { + $ncpus = $nsockets || $ncpus; + } elsif($opt::use_cores_instead_of_threads) { + $ncpus = $ncores || $ncpus; + } + $sshlogin->set_ncpus($ncpus); + $sshlogin->set_time_to_login($time_to_login); + $maxlen = $maxlen || Limits::Command::max_length(); + $sshlogin->set_maxlength($maxlen); + ::debug("init", "Timing from -S:$string ", + " ncpus:", $ncpus, + " nsockets:",$nsockets, + " ncores:", $ncores, + " nthreads:",$nthreads, + " time_to_login:", $time_to_login, + " maxlen:", $maxlen, + " min_max_len:", $Global::minimal_command_line_length,"\n"); + } +} + +sub parse_host_filtering() { + # Input: + # @lines = output from parallelized_host_filtering() + # Returns: + # \%nsockets = number of sockets of {host} + # \%ncores = number of cores of {host} + # \%nthreads = number of hyperthreaded cores of {host} + # \%time_to_login = time_to_login on {host} + # \%maxlen = max command len on {host} + # \%echo = echo received from {host} + # \@down_hosts = list of hosts with no answer + local $/ = "\n"; + my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo, + @down_hosts); + for (@_) { + ::debug("init","Read: ",$_); + chomp; + my @col = split /\t/, $_; + if($col[0] =~ /^parallel: Warning:/) { + # Timed out job: Ignore it + next; + } elsif(defined $col[6]) { + # This is a line from --joblog + # seq host time spent sent received exit signal command + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores + if($col[0] eq "Seq" and $col[1] eq "Host" and + $col[2] eq "Starttime") { + # Header => skip + next; + } + # Get server from: eval true server\; + $col[8] =~ /eval .?true.?\s([^\;]+);/ or + ::die_bug("col8 does not contain host: $col[8] in $_"); + my $host = $1; + $host =~ tr/\\//d; + $Global::host{$host} or next; + if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") { + # exit == 255 or exit == timeout (-1): ssh failed/timedout + # exit == 1: lsh failed + # Remove sshlogin + ::debug("init", "--filtered $host\n"); + push(@down_hosts, $host); + } elsif($col[6] eq "127") { + # signal == 127: parallel not installed remote + # Set nsockets, ncores, nthreads = 1 + ::warning("Could not figure out ". + "number of cpus on $host. Using 1."); + $nsockets{$host} = 1; + $ncores{$host} = 1; + $nthreads{$host} = 1; + $maxlen{$host} = Limits::Command::max_length(); + } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { + # Remember how log it took to log in + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo + $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); + } else { + ::die_bug("host check unmatched long jobline: $_"); + } + } elsif($Global::host{$col[0]}) { + # This output from --number-of-cores, --number-of-cpus, + # --max-line-length-allowed + # ncores: server 8 + # ncpus: server 2 + # maxlen: server 131071 + if(/parallel: Warning: Cannot figure out number of/) { + next; + } + if(/\t(perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed|Disconnected from|Received disconnect from)/ + or + /\tWarning: / + ) { + # Skip these (from perl): + # perl: warning: Setting locale failed. + # perl: warning: Please check that your locale settings: + # LANGUAGE = (unset), + # LC_ALL = (unset), + # LANG = "en_US.UTF-8" + # are supported and installed on your system. + # perl: warning: Falling back to the standard locale ("C"). + # Disconnected from 127.0.0.1 port 22 + # + # Skip these (from ssh): + # Warning: Permanently added * to the list of known hosts. + # Warning: Identity file * not accessible: * + } elsif(not defined $nsockets{$col[0]}) { + $nsockets{$col[0]} = $col[1]; + } elsif(not defined $ncores{$col[0]}) { + $ncores{$col[0]} = $col[1]; + } elsif(not defined $nthreads{$col[0]}) { + $nthreads{$col[0]} = $col[1]; + } elsif(not defined $maxlen{$col[0]}) { + $maxlen{$col[0]} = $col[1]; + } elsif(not defined $echo{$col[0]}) { + $echo{$col[0]} = $col[1]; + } else { + ::die_bug("host check too many col0: $_"); + } + } else { + ::die_bug("host check unmatched short jobline ($col[0]): $_"); + } + } + @down_hosts = uniq(@down_hosts); + return(\%nsockets, \%ncores, \%nthreads, \%time_to_login, + \%maxlen, \%echo, \@down_hosts); +} + +sub parallelized_host_filtering() { + # Uses: + # %Global::host + # Returns: + # text entries with: + # * joblog line + # * hostname \t number of cores + # * hostname \t number of cpus + # * hostname \t max-line-length-allowed + # * hostname \t empty + + sub sshwrapped { + # Wrap with ssh and --env + # Return $default_value if command fails + my $sshlogin = shift; + my $command = shift; + # wrapper that returns output "0\n" if the command fails + # E.g. parallel not installed => "0\n" + my $wcmd = q(perl -e '$a=`).$command.q(`; print $? ? "0".v010 : $a'); + my $commandline = CommandLine->new(1,[$wcmd],{},0,0,[],[],[],[],{},{}); + my $job = Job->new($commandline); + $job->set_sshlogin($sshlogin); + $job->wrapped(); + return($job->{'wrapped'}); + } + + my(@sockets, @cores, @threads, @maxline, @echo); + while (my ($host, $sshlogin) = each %Global::host) { + if($host eq ":") { next } + # The 'true' is used to get the $host out later + push(@sockets, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-sockets")."\n\0"); + push(@cores, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-cores")."\n\0"); + push(@threads, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-threads")."\n\0"); + push(@maxline, $host."\t"."true $host; ". + sshwrapped($sshlogin, + "parallel --max-line-length-allowed")."\n\0"); + # 'echo' is used to get the fastest possible ssh login time + push(@echo, $host."\t"."true $host; ". + $sshlogin->wrap("echo $host")."\n\0"); + } + # --timeout 10: Setting up an SSH connection and running a simple + # command should never take > 10 sec. + # --delay 0.1: If multiple sshlogins use the same proxy the delay + # will make it less likely to overload the ssh daemon. + # --retries 3: If the ssh daemon is overloaded, try 3 times + my $cmd = + "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ". + "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true "; + $cmd = $Global::shell." -c ".Q($cmd); + ::debug("init", $cmd, "\n"); + my @out; + my $prepend = ""; + + my ($host_fh,$in,$err); + open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd"); + ::debug("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo); + + if(not fork()) { + # Give the commands to run to the $cmd + close $host_fh; + print $in @sockets, @cores, @threads, @maxline, @echo; + close $in; + exit(); + } + close $in; + # If -0: $/ must be \n + local $/ = "\n"; + for(<$host_fh>) { + # TODO incompatible with '-quoting. Needs to be fixed differently + #if(/\'$/) { + # # if last char = ' then append next line + # # This may be due to quoting of \n in environment var + # $prepend .= $_; + # next; + #} + $_ = $prepend . $_; + $prepend = ""; + push @out, $_; + } + close $host_fh; + return @out; +} + +sub onall($@) { + # Runs @command on all hosts. + # Uses parallel to run @command on each host. + # --jobs = number of hosts to run on simultaneously. + # For each host a parallel command with the args will be running. + # Uses: + # $Global::debug + # $Global::exitstatus + # $Global::joblog + # $Global::quoting + # $opt::D + # $opt::arg_file_sep + # $opt::arg_sep + # $opt::colsep + # $opt::files + # $opt::group + # $opt::joblog + # $opt::jobs + # $opt::keeporder + # $opt::linebuffer + # $opt::max_chars + # $opt::plain + # $opt::retries + # $opt::tag + # $opt::tee + # $opt::timeout + # $opt::ungroup + # %Global::host + # @opt::basefile + # @opt::env + # @opt::v + # Input: + # @command = command to run on all hosts + # Returns: N/A + sub tmp_joblog { + # Input: + # $joblog = filename of joblog - undef if none + # Returns: + # $tmpfile = temp file for joblog - undef if none + my $joblog = shift; + if(not defined $joblog) { + return undef; + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); + close $fh; + return $tmpfile; + } + my ($input_source_fh_ref,@command) = @_; + if($Global::quoting) { + @command = shell_quote(@command); + } + + # Copy all @input_source_fh (-a and :::) into tempfiles + my @argfiles = (); + for my $fh (@$input_source_fh_ref) { + my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + if(@opt::basefile) { setup_basefile(); } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + # + # Pass some of the options to the sub-parallels, not all of them as + # -P should only go to the first, and -S should not be copied at all. + my $options = + join(" ", + ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""), + ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""), + ((defined $opt::memsuspend) ? "--memfree ".$opt::memsuspend : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::group) ? "--group" : ""), + ((defined $opt::jobs) ? "-P $opt::jobs" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::plain) ? "--plain" : ""), + (($opt::ungroup == 1) ? "-u" : ""), + ((defined $opt::tee) ? "--tee" : ""), + ); + my $suboptions = + join(" ", + ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), + ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), + ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), + ((defined $opt::files) ? "--files" : ""), + ((defined $opt::group) ? "--group" : ""), + ((defined $opt::cleanup) ? "--cleanup" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::plus) ? "--plus" : ""), + ((defined $opt::retries) ? "--retries ".$opt::retries : ""), + ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), + (($opt::ungroup == 1) ? "-u" : ""), + ((defined $opt::ssh) ? "--ssh '".$opt::ssh."'" : ""), + ((defined $opt::tee) ? "--tee" : ""), + ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""), + (@Global::transfer_files ? map { "--tf ".Q($_) } + @Global::transfer_files : ""), + (@Global::ret_files ? map { "--return ".Q($_) } + @Global::ret_files : ""), + (@opt::env ? map { "--env ".Q($_) } @opt::env : ""), + (map { "-v" } @opt::v), + ); + ::debug("init", "| $0 $options\n"); + open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") || + ::die_bug("This does not run GNU Parallel: $0 $options"); + my @joblogs; + for my $host (sort keys %Global::host) { + my $sshlogin = $Global::host{$host}; + my $joblog = tmp_joblog($opt::joblog); + if($joblog) { + push @joblogs, $joblog; + $joblog = "--joblog $joblog"; + } + my $quad = $opt::arg_file_sep || "::::"; + # If PARALLEL_ENV is set: Pass it on + my $penv=$Global::parallel_env ? + "PARALLEL_ENV=".Q($Global::parallel_env) : + ''; + ::debug("init", "$penv $0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".Q($sshlogin->string()) : ""), + " -S ", Q($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"); + print $parallel_fh "$penv $0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".Q($sshlogin->string()) : ""), + " -S ", Q($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\0"; + } + close $parallel_fh; + $Global::exitstatus = $? >> 8; + debug("init", "--onall exitvalue ", $?); + if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } + $Global::debug or unlink(@argfiles); + my %seen; + for my $joblog (@joblogs) { + # Append to $joblog + open(my $fh, "<", $joblog) || + ::die_bug("Cannot open tmp joblog $joblog"); + # Skip first line (header); + <$fh>; + print $Global::joblog (<$fh>); + close $fh; + unlink($joblog); + } +} + + +sub __SIGNAL_HANDLING__() {} + + +sub sigtstp() { + # Send TSTP signal (Ctrl-Z) to all children process groups + # Uses: + # %SIG + # Returns: N/A + signal_children("TSTP"); +} + +sub sigpipe() { + # Send SIGPIPE signal to all children process groups + # Uses: + # %SIG + # Returns: N/A + signal_children("PIPE"); +} + +sub signal_children() { + # Send signal to all children process groups + # and GNU Parallel itself + # Uses: + # %SIG + # Returns: N/A + my $signal = shift; + debug("run", "Sending $signal "); + kill $signal, map { -$_ } keys %Global::running; + # Use default signal handler for GNU Parallel itself + $SIG{$signal} = undef; + kill $signal, $$; +} + +sub save_original_signal_handler() { + # Remember the original signal handler + # Uses: + # %Global::original_sig + # Returns: N/A + $SIG{INT} = sub { + if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } + wait_and_exit(255); + }; + $SIG{TERM} = sub { + if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } + wait_and_exit(255); + }; + %Global::original_sig = %SIG; + $SIG{TERM} = sub {}; # Dummy until jobs really start + $SIG{ALRM} = 'IGNORE'; + # Allow Ctrl-Z to suspend and `fg` to continue + $SIG{TSTP} = \&sigtstp; + $SIG{PIPE} = \&sigpipe; + $SIG{CONT} = sub { + # Set $SIG{TSTP} again (it is undef'ed in sigtstp() ) + $SIG{TSTP} = \&sigtstp; + for my $job (values %Global::running) { + if($job->suspended()) { + # Force jobs to suspend, if they are marked as suspended. + # --memsupspend can suspend a job that will be resumed + # if the user presses CTRL-Z followed by `fg`. + $job->suspend(); + } else { + # Resume the rest of the jobs + $job->resume(); + } + } + }; +} + +sub list_running_jobs() { + # Print running jobs on tty + # Uses: + # %Global::running + # Returns: N/A + for my $job (values %Global::running) { + ::status("$Global::progname: ".$job->replaced()); + } +} + +sub start_no_new_jobs() { + # Start no more jobs + # Uses: + # %Global::original_sig + # %Global::unlink + # $Global::start_no_new_jobs + # Returns: N/A + unlink keys %Global::unlink; + ::status + ("$Global::progname: SIGHUP received. No new jobs will be started.", + "$Global::progname: Waiting for these ".(keys %Global::running). + " jobs to finish. Send SIGTERM to stop now."); + list_running_jobs(); + $Global::start_no_new_jobs ||= 1; +} + +sub reapers() { + # Run reaper until there are no more left + # Returns: + # @pids_reaped = pids of reaped processes + my @pids_reaped; + my $pid; + while($pid = reaper()) { + push @pids_reaped, $pid; + } + return @pids_reaped; +} + +sub reaper() { + # A job finished: + # * Set exitstatus, exitsignal, endtime. + # * Free ressources for new job + # * Update median runtime + # * Print output + # * If --halt = now: Kill children + # * Print progress + # Uses: + # %Global::running + # $opt::timeout + # $Global::timeoutq + # $opt::keeporder + # $Global::total_running + # Returns: + # $stiff = PID of child finished + my $stiff; + debug("run", "Reaper "); + if(($stiff = waitpid(-1, &WNOHANG)) <= 0) { + # No jobs waiting to be reaped + return 0; + } + + # $stiff = pid of dead process + my $job = $Global::running{$stiff}; + + # '-a <(seq 10)' will give us a pid not in %Global::running + # The same will one of the ssh -M: ignore + $job or return 0; + delete $Global::running{$stiff}; + $Global::total_running--; + if($job->{'commandline'}{'skip'}) { + # $job->skip() was called + $job->set_exitstatus(-2); + $job->set_exitsignal(0); + } else { + $job->set_exitstatus($? >> 8); + $job->set_exitsignal($? & 127); + } + + debug("run", "\nseq ",$job->seq()," died (", $job->exitstatus(), ")"); + if($Global::delayauto or $Global::sshdelayauto) { + if($job->exitstatus()) { + # Job failed: Increase delay (if $opt::(ssh)delay set) + $opt::delay &&= $opt::delay * 1.3; + $opt::sshdelay &&= $opt::sshdelay * 1.3; + } else { + # Job succeeded: Decrease delay (if $opt::(ssh)delay set) + $opt::delay &&= $opt::delay * 0.9; + $opt::sshdelay &&= $opt::sshdelay * 0.9; + } + debug("run", "delay:$opt::delay ssh:$opt::sshdelay "); + } + $job->set_endtime(::now()); + my $sshlogin = $job->sshlogin(); + $sshlogin->dec_jobs_running(); + if($job->should_be_retried()) { + # Free up file handles + $job->free_ressources(); + } else { + # The job is done + $sshlogin->inc_jobs_completed(); + # Free the jobslot + $job->free_slot(); + if($opt::timeout and not $job->exitstatus()) { + # Update average runtime for timeout only for successful jobs + $Global::timeoutq->update_median_runtime($job->runtime()); + } + if($opt::keeporder and not $opt::latestline) { + # --latestline fixes --keeporder in Job::row() + $job->print_earlier_jobs(); + } else { + $job->print(); + } + if($job->should_we_halt() eq "now") { + # Kill children + ::kill_sleep_seq($job->pid()); + ::killall(); + ::wait_and_exit($Global::halt_exitstatus); + } + } + $job->cleanup(); + + if($opt::progress) { + my %progress = progress(); + ::status_no_nl("\r",$progress{'status'}); + } + + debug("run", "jobdone \n"); + return $stiff; +} + + +sub __USAGE__() {} + + +sub killall() { + # Kill all jobs by killing their process groups + # Uses: + # $Global::start_no_new_jobs = we are stopping + # $Global::killall = Flag to not run reaper + $Global::start_no_new_jobs ||= 1; + # Do not reap killed children: Ignore them instead + $Global::killall ||= 1; + kill_sleep_seq(keys %Global::running); +} + +sub kill_sleep_seq(@) { + # Send jobs TERM,TERM,KILL to processgroups + # Input: + # @pids = list of pids that are also processgroups + # Convert pids to process groups ($processgroup = -$pid) + my @pgrps = map { -$_ } @_; + my @term_seq = split/,/,$opt::termseq; + if(not @term_seq) { + @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); + } + # for each signal+waittime: kill process groups still not dead + while(@term_seq) { + @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps); + } +} + +sub kill_sleep() { + # Kill pids with a signal and wait a while for them to die + # Input: + # $signal = signal to send to @pids + # $sleep_max = number of ms to sleep at most before returning + # @pids = pids to kill (actually process groups) + # Uses: + # $Global::killall = set by killall() to avoid calling reaper + # Returns: + # @pids = pids still alive + my ($signal, $sleep_max, @pids) = @_; + ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n"); + kill $signal, @pids; + my $sleepsum = 0; + my $sleep = 0.001; + + while(@pids and $sleepsum < $sleep_max) { + if($Global::killall) { + # Killall => don't run reaper + while(waitpid(-1, &WNOHANG) > 0) { + $sleep = $sleep/2+0.001; + } + } elsif(reapers()) { + $sleep = $sleep/2+0.001; + } + $sleep *= 1.1; + ::usleep($sleep); + $sleepsum += $sleep; + # Keep only living children + @pids = grep { kill(0, $_) } @pids; + } + return @pids; +} + +sub wait_and_exit($) { + # If we do not wait, we sometimes get segfault + # Returns: N/A + my $error = shift; + unlink keys %Global::unlink; + if($error) { + # Kill all jobs without printing + killall(); + } + for (keys %Global::unkilled_children) { + # Kill any (non-jobs) children (e.g. reserved processes) + kill 9, $_; + waitpid($_,0); + delete $Global::unkilled_children{$_}; + } + if($Global::unkilled_sqlworker) { + waitpid($Global::unkilled_sqlworker,0); + } + # Avoid: Warning: unable to close filehandle properly: No space + # left on device during global destruction. + $SIG{__WARN__} = sub {}; + if($opt::_parset) { + # Make the shell script return $error + print "$Global::parset_endstring\nreturn $error"; + } + exit($error); +} + +sub die_usage() { + # Returns: N/A + usage(); + wait_and_exit(255); +} + +sub usage() { + # Returns: N/A + print join + ("\n", + "Usage:", + "", + "$Global::progname [options] [command [arguments]] < list_of_arguments", + "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", + "cat ... | $Global::progname --pipe [options] [command [arguments]]", + "", + "-j n Run n jobs in parallel", + "-k Keep same order", + "-X Multiple arguments with context replace", + "--colsep regexp Split input on regexp for positional replacements", + "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", + "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", + "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", + " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", + "", + "-S sshlogin Example: foo\@server.example.com", + "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", + "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", + "--onall Run the given command with argument on all sshlogins", + "--nonall Run the given command with no arguments on all sshlogins", + "", + "--pipe Split stdin (standard input) to multiple jobs.", + "--recend str Record end separator for --pipe.", + "--recstart str Record start separator for --pipe.", + "", + "GNU Parallel can do much more. See 'man $Global::progname' for details", + "", + "Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + " Tange, O. (2022, November 22). GNU Parallel 20221122 ('Херсо́н').", + " Zenodo. https://doi.org/10.5281/zenodo.7347980", + "", + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing + # these lines. + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "",); +} + +sub citation_notice() { + # if --will-cite or --plain: do nothing + # if stderr redirected: do nothing + # if $PARALLEL_HOME/will-cite: do nothing + # else: print citation notice to stderr + if($opt::willcite + or + $opt::plain + or + not -t $Global::original_stderr + or + grep { -e "$_/will-cite" } @Global::config_dirs) { + # skip + } else { + ::status + ("Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + " Tange, O. (2022, November 22). GNU Parallel 20221122 ('Херсо́н').", + " Zenodo. https://doi.org/10.5281/zenodo.7347980", + "", + # Before changing these line, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by + # removing these lines. + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "More about funding GNU Parallel and the citation notice:", + "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice", + "", + "To silence this citation notice: run 'parallel --citation' once.", + "" + ); + mkdir $Global::config_dir; + # Number of times the user has run GNU Parallel without showing + # willingness to cite + my $runs = 0; + if(open (my $fh, "<", $Global::config_dir. + "/runs-without-willing-to-cite")) { + $runs = <$fh>; + close $fh; + } + $runs++; + if(open (my $fh, ">", $Global::config_dir. + "/runs-without-willing-to-cite")) { + print $fh $runs; + close $fh; + if($runs >= 10) { + ::status("Come on: You have run parallel $runs times. ". + "Isn't it about time ", + "you run 'parallel --citation' once to silence ". + "the citation notice?", + ""); + } + } + } +} + +sub status(@) { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh map { ($_, "\n") } @w; + flush $fh; +} + +sub status_no_nl(@) { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh @w; + flush $fh; +} + +sub warning(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); +} + +{ + my %warnings; + sub warning_once(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + $warnings{@w}++ or + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); + } +} + +sub error(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + status(map { ($prog.": Error: ". $_); } @w); +} + +sub die_bug($) { + my $bugid = shift; + print STDERR + ("$Global::progname: This should not happen. You have found a bug. ", + "Please follow\n", + "https://www.gnu.org/software/parallel/man.html#reporting-bugs\n", + "\n", + "Include this in the report:\n", + "* The version number: $Global::version\n", + "* The bugid: $bugid\n", + "* The command line being run\n", + "* The files being read (put the files on a webserver if they are big)\n", + "\n", + "If you get the error on smaller/fewer files, please include those instead.\n"); + ::wait_and_exit(255); +} + +sub version() { + # Returns: N/A + print join + ("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free Software", + "Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: https://www.gnu.org/software/${Global::progname}\n", + "When using programs that use GNU Parallel to process data for publication", + "please cite as described in 'parallel --citation'.\n", + ); +} + +sub citation() { + # Returns: N/A + my ($all_argv_ref,$argv_options_removed_ref) = @_; + my $all_argv = "@$all_argv_ref"; + my $no_opts = "@$argv_options_removed_ref"; + $all_argv=~s/--citation//; + if($all_argv ne $no_opts) { + ::warning("--citation ignores all other options and arguments."); + ::status(""); + } + + ::status( + "Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + "\@software{tange_2022_7347980,", + " author = {Tange, Ole},", + " title = {GNU Parallel 20221122 ('Херсо́н')},", + " month = Nov,", + " year = 2022,", + " note = {{GNU Parallel is a general parallelizer to run", + " multiple serial command line programs in parallel", + " without changing them.}},", + " publisher = {Zenodo},", + " doi = {10.5281/zenodo.7347980},", + " url = {https://doi.org/10.5281/zenodo.7347980}", + "}", + "", + "(Feel free to use \\nocite{tange_2022_7347980})", + "", + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing + # these lines. + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "More about funding GNU Parallel and the citation notice:", + "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html", + "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice", + "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt", + "", + "If you send a copy of your published article to tange\@gnu.org, it will be", + "mentioned in the release notes of next version of GNU Parallel.", + "" + ); + while(not grep { -e "$_/will-cite" } @Global::config_dirs) { + print "\nType: 'will cite' and press enter.\n> "; + my $input = <STDIN>; + if(not defined $input) { + exit(255); + } + if($input =~ /will cite/i) { + mkdir $Global::config_dir; + if(open (my $fh, ">", $Global::config_dir."/will-cite")) { + close $fh; + ::status( + "", + "Thank you for your support: You are the reason why there is funding to", + "continue maintaining GNU Parallel. On behalf of future versions of", + "GNU Parallel, which would not exist without your support:", + "", + " THANK YOU SO MUCH", + "", + "It is really appreciated. The citation notice is now silenced.", + ""); + } else { + ::status( + "", + "Thank you for your support. It is much appreciated. The citation", + "cannot permanently be silenced. Use '--will-cite' instead.", + "", + "If you use '--will-cite' in scripts to be run by others you are making", + "it harder for others to see the citation notice. The development of", + "GNU Parallel is indirectly financed through citations, so if users", + "do not know they should cite then you are making it harder to finance", + "development. However, if you pay 10000 EUR, you should feel free to", + "use '--will-cite' in scripts.", + ""); + last; + } + } + } +} + +sub show_limits() { + # Returns: N/A + print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", + "Maximal usable size of command: ", + $Global::usable_command_line_length,"\n", + "\n", + "Execution will continue now, ", + "and it will try to read its input\n", + "and run commands; if this is not ", + "what you wanted to happen, please\n", + "press CTRL-D or CTRL-C\n"); +} + +sub embed() { + # Give an embeddable version of GNU Parallel + # Tested with: bash, zsh, ksh, ash, dash, sh + my $randomstring = "cut-here-".join"", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20); + if(not -f $0 or not -r $0) { + ::error("--embed only works if parallel is a readable file"); + exit(255); + } + if(open(my $fh, "<", $0)) { + # Read the source from $0 + my @source = <$fh>; + my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER}; + my @env_parallel_source = (); + my $shell = $Global::shell; + $shell =~ s:.*/::; + for(which("env_parallel.$shell")) { + -r $_ or next; + # Read the source of env_parallel.shellname + open(my $env_parallel_source_fh, $_) || die; + @env_parallel_source = <$env_parallel_source_fh>; + close $env_parallel_source_fh; + last; + } + print "#!$Global::shell + +# Copyright (C) 2007-2022 $user, Ole Tange, http://ole.tange.dk +# and Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +"; + + print q! +# Embedded GNU Parallel created with --embed +parallel() { + # Start GNU Parallel without leaving temporary files + # + # Not all shells support 'perl <(cat ...)' + # This is a complex way of doing: + # perl <(cat <<'cut-here' + # [...] + # ) "$@" + # and also avoiding: + # [1]+ Done cat + + # Make a temporary fifo that perl can read from + _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo); + do { + $f = "/tmp/parallel-".join"", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $f); + mkfifo($f,0600); + print $f;'` + # Put source code into temporary file + # so it is easy to copy to the fifo + _file_with_GNU_Parallel_source=`mktemp`; +!, + "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n", + @source, + $randomstring,"\n", + q! + # Copy the source code from the file to the fifo + # and remove the file and fifo ASAP + # 'sh -c' is needed to avoid + # [1]+ Done cat + sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &" + + # Read the source from the fifo + perl $_fifo_with_GNU_Parallel_source "$@" +} +!, + @env_parallel_source, + q! + +# This will call the functions above +parallel -k echo ::: Put your code here +env_parallel --session +env_parallel -k echo ::: Put your code here +parset p,y,c,h -k echo ::: Put your code here +echo $p $y $c $h +echo You can also activate GNU Parallel for interactive use by: +echo . "$0" +!; + } else { + ::error("Cannot open $0"); + exit(255); + } + ::status("Redirect the output to a file and add your changes at the end:", + " $0 --embed > new_script"); +} + + +sub __GENERIC_COMMON_FUNCTION__() {} + + +sub mkdir_or_die($) { + # If dir is not executable: die + my $dir = shift; + # The eval is needed to catch exception from mkdir + eval { File::Path::mkpath($dir); }; + if(not -x $dir) { + ::error("Cannot change into non-executable dir $dir: $!"); + ::wait_and_exit(255); + } +} + +sub tmpfile(@) { + # Create tempfile as $TMPDIR/parXXXXX + # Returns: + # $filehandle = opened file handle + # $filename = file name created + my($filehandle,$filename) = + ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); + if(wantarray) { + return($filehandle,$filename); + } else { + # Separate unlink due to NFS dealing badly with File::Temp + unlink $filename; + return $filehandle; + } +} + +sub tmpname($) { + # Select a name that does not exist + # Do not create the file as it may be used for creating a socket (by tmux) + # Remember the name in $Global::unlink to avoid hitting the same name twice + my $name = shift; + my($tmpname); + if(not -w $ENV{'TMPDIR'}) { + if(not -e $ENV{'TMPDIR'}) { + ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'"); + } else { + ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'"); + } + ::wait_and_exit(255); + } + do { + $tmpname = $ENV{'TMPDIR'}."/".$name. + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $tmpname or $Global::unlink{$tmpname}++); + return $tmpname; +} + +sub tmpfifo() { + # Find an unused name and mkfifo on it + my $tmpfifo = tmpname("fif"); + mkfifo($tmpfifo,0600); + return $tmpfifo; +} + +sub rm(@) { + # Remove file and remove it from %Global::unlink + # Uses: + # %Global::unlink + delete @Global::unlink{@_}; + unlink @_; +} + +sub size_of_block_dev() { + # Like -s but for block devices + # Input: + # $blockdev = file name of block device + # Returns: + # $size = in bytes, undef if error + my $blockdev = shift; + if(open(my $fh, "<", $blockdev)) { + seek($fh,0,2) || ::die_bug("cannot seek $blockdev"); + my $size = tell($fh); + close $fh; + return $size; + } else { + ::error("cannot open $blockdev"); + wait_and_exit(255); + } +} + +sub qqx(@) { + # Like qx but with clean environment (except for @keep) + # and STDERR ignored + # This is needed if the environment contains functions + # that /bin/sh does not understand + my %env; + # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID + # ssh with Kerberos needs KRB5CCNAME + # sshpass needs SSHPASS + # tmux needs LC_CTYPE + # lsh needs HOME LOGNAME + my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE + HOME LOGNAME SSHPASS); + @env{@keep} = @ENV{@keep}; + local %ENV; + %ENV = %env; + if($Global::debug) { + # && true is to force spawning a shell and not just exec'ing + return qx{ @_ && true }; + } else { + # CygWin does not respect 2>/dev/null + # so we do that by hand + # This trick does not work: + # https://stackoverflow.com/q/13833088/363028 + # local *STDERR; + # open(STDERR, ">", "/dev/null"); + open(local *CHILD_STDIN, '<', '/dev/null') or die $!; + open(local *CHILD_STDERR, '>', '/dev/null') or die $!; + my $out; + # eval is needed if open3 fails (e.g. command line too long) + eval { + my $pid = open3( + '<&CHILD_STDIN', + $out, + '>&CHILD_STDERR', + # && true is to force spawning a shell and not just exec'ing + "@_ && true"); + my @arr = <$out>; + close $out; + # Make sure $? is set + waitpid($pid, 0); + return wantarray ? @arr : join "",@arr; + } or do { + # If eval fails, force $?=false + `false`; + }; + } +} + +sub uniq(@) { + # Remove duplicates and return unique values + return keys %{{ map { $_ => 1 } @_ }}; +} + +sub min(@) { + # Returns: + # Minimum value of array + my $min; + for (@_) { + # Skip undefs + defined $_ or next; + defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef + $min = ($min < $_) ? $min : $_; + } + return $min; +} + +sub max(@) { + # Returns: + # Maximum value of array + my $max; + for (@_) { + # Skip undefs + defined $_ or next; + defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef + $max = ($max > $_) ? $max : $_; + } + return $max; +} + +sub sum(@) { + # Returns: + # Sum of values of array + my @args = @_; + my $sum = 0; + for (@args) { + # Skip undefs + $_ and do { $sum += $_; } + } + return $sum; +} + +sub undef_as_zero($) { + my $a = shift; + return $a ? $a : 0; +} + +sub undef_as_empty($) { + my $a = shift; + return $a ? $a : ""; +} + +sub undef_if_empty($) { + if(defined($_[0]) and $_[0] eq "") { + return undef; + } + return $_[0]; +} + +sub multiply_binary_prefix(@) { + # Evalualte numbers with binary prefix + # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 + # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 + # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 + # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 + # 13G = 13*1024*1024*1024 = 13958643712 + # Input: + # $s = string with prefixes + # Returns: + # $value = int with prefixes multiplied + my @v = @_; + for(@v) { + defined $_ or next; + s/ki/*1024/gi; + s/mi/*1024*1024/gi; + s/gi/*1024*1024*1024/gi; + s/ti/*1024*1024*1024*1024/gi; + s/pi/*1024*1024*1024*1024*1024/gi; + s/ei/*1024*1024*1024*1024*1024*1024/gi; + s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; + s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; + s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; + + s/K/*1024/g; + s/M/*1024*1024/g; + s/G/*1024*1024*1024/g; + s/T/*1024*1024*1024*1024/g; + s/P/*1024*1024*1024*1024*1024/g; + s/E/*1024*1024*1024*1024*1024*1024/g; + s/Z/*1024*1024*1024*1024*1024*1024*1024/g; + s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; + s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; + + s/k/*1000/g; + s/m/*1000*1000/g; + s/g/*1000*1000*1000/g; + s/t/*1000*1000*1000*1000/g; + s/p/*1000*1000*1000*1000*1000/g; + s/e/*1000*1000*1000*1000*1000*1000/g; + s/z/*1000*1000*1000*1000*1000*1000*1000/g; + s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; + s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; + + $_ = eval $_; + } + return wantarray ? @v : $v[0]; +} + +sub multiply_time_units($) { + # Evalualte numbers with time units + # s=1, m=60, h=3600, d=86400 + # Input: + # $s = string time units + # Returns: + # $value = int in seconds + my @v = @_; + for(@v) { + defined $_ or next; + if(/[dhms]/i) { + s/s/*1+/gi; + s/m/*60+/gi; + s/h/*3600+/gi; + s/d/*86400+/gi; + # 1m/3 => 1*60+/3 => 1*60/3 + s/\+(\D)/$1/gi; + } + $_ = eval $_."-0"; + } + return wantarray ? @v : $v[0]; +} + +sub seconds_to_time_units() { + # Convert seconds into ??d??h??m??s + # s=1, m=60, h=3600, d=86400 + # Input: + # $s = int in seconds + # Returns: + # $str = string time units + my $s = shift; + my $str; + my $d = int($s/86400); + $s -= $d * 86400; + my $h = int($s/3600); + $s -= $h * 3600; + my $m = int($s/60); + $s -= $m * 60; + if($d) { + $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s); + } elsif($h) { + $str = sprintf("%dh%02dm%02ds",$h,$m,$s); + } elsif($m) { + $str = sprintf("%dm%02ds",$m,$s); + } else { + $str = sprintf("%ds",$s); + } + return $str; +} + +{ + my ($disk_full_fh, $b8193, $error_printed); + sub exit_if_disk_full() { + # Checks if $TMPDIR is full by writing 8kb to a tmpfile + # If the disk is full: Exit immediately. + # Returns: + # N/A + if(not $disk_full_fh) { + $disk_full_fh = ::tmpfile(SUFFIX => ".df"); + $b8193 = "b"x8193; + } + # Linux does not discover if a disk is full if writing <= 8192 + # Tested on: + # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos + # ntfs reiserfs tmpfs ubifs vfat xfs + # TODO this should be tested on different OS similar to this: + # + # doit() { + # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop + # seq 100000 | parallel --tmpdir /mnt/loop/ true & + # seq 6900000 > /mnt/loop/i && echo seq OK + # seq 6980868 > /mnt/loop/i + # seq 10000 > /mnt/loop/ii + # sleep 3 + # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ + # echo >&2 + # } + print $disk_full_fh $b8193; + if(not $disk_full_fh + or + tell $disk_full_fh != 8193) { + # On raspbian the disk can be full except for 10 chars. + if(not $error_printed) { + ::error("Output is incomplete.", + "Cannot append to buffer file in $ENV{'TMPDIR'}.", + "Is the disk full?", + "Change \$TMPDIR with --tmpdir or use --compress."); + $error_printed = 1; + } + ::wait_and_exit(255); + } + truncate $disk_full_fh, 0; + seek($disk_full_fh, 0, 0) || die; + } +} + +sub spacefree($$) { + # Remove comments and spaces + # Inputs: + # $spaces = keep 1 space? + # $s = string to remove spaces from + # Returns: + # $s = with spaces removed + my $spaces = shift; + my $s = shift; + $s =~ s/#.*//mg; + if(1 == $spaces) { + $s =~ s/\s+/ /mg; + } elsif(2 == $spaces) { + # Keep newlines + $s =~ s/\n\n+/\n/sg; + $s =~ s/[ \t]+/ /mg; + } elsif(3 == $spaces) { + # Keep perl code required space + $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg; + $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg; + } else { + $s =~ s/\s//mg; + } + return $s; +} + +{ + my $hostname; + sub hostname() { + local $/ = "\n"; + if(not $hostname) { + $hostname = `hostname`; + chomp($hostname); + $hostname ||= "nohostname"; + } + return $hostname; + } +} + +sub which(@) { + # Input: + # @programs = programs to find the path to + # Returns: + # @full_path = full paths to @programs. Nothing if not found + my @which; + for my $prg (@_) { + push(@which, grep { not -d $_ and -x $_ } + map { $_."/".$prg } split(":",$ENV{'PATH'})); + if($prg =~ m:/:) { + # Test if program with full path exists + push(@which, grep { not -d $_ and -x $_ } $prg); + } + } + ::debug("which", "$which[0] in $ENV{'PATH'}\n"); + return wantarray ? @which : $which[0]; +} + +{ + my ($regexp,$shell,%fakename); + + sub parent_shell { + # Input: + # $pid = pid to see if (grand)*parent is a shell + # Returns: + # $shellpath = path to shell - undef if no shell found + my $pid = shift; + ::debug("init","Parent of $pid\n"); + if(not $regexp) { + # All shells known to mankind + # + # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh + # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh + + my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh ksh + ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh + static-sh tcsh yash zsh -sh -csh -bash), + '-sh (sh)' # sh on FreeBSD + ); + # Can be formatted as: + # [sh] -sh sh busybox sh -sh (sh) + # /bin/sh /sbin/sh /opt/csw/sh + # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh + $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")"; + $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'. + '(-?)('. $shell. '))( *$| [^(])'; + %fakename = ( + # sh disguises itself as -sh (sh) on FreeBSD + "-sh (sh)" => ["sh"], + # csh and tcsh disguise themselves as -sh/-csh + # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh + # but sh also disguises itself as -sh + # (TODO When does that happen?) + "-sh" => ["sh"], + "-csh" => ["tcsh", "csh"], + # ash disguises itself as -ash + "-ash" => ["ash", "dash", "sh"], + # dash disguises itself as -dash + "-dash" => ["dash", "ash", "sh"], + # bash disguises itself as -bash + "-bash" => ["bash", "sh"], + # ksh disguises itself as -ksh + "-ksh" => ["ksh", "sh"], + # zsh disguises itself as -zsh + "-zsh" => ["zsh", "sh"], + ); + } + if($^O eq "linux") { + # Optimized for GNU/Linux + my $testpid = $pid; + my $shellpath; + my $shellline; + while($testpid) { + if(open(my $fd, "<", "/proc/$testpid/cmdline")) { + local $/="\0"; + chomp($shellline = <$fd>); + if($shellline =~ /$regexp/o) { + my $shellname = $4 || $8; + my $dash = $3 || $7; + if($shellname eq "sh" and $dash) { + # -sh => csh or sh + if($shellpath = readlink "/proc/$testpid/exe") { + ::debug("init","procpath $shellpath\n"); + if($shellpath =~ m:/$shell$:o) { + ::debug("init", + "proc which ".$shellpath." => "); + return $shellpath; + } + } + } + ::debug("init", "which ".$shellname." => "); + $shellpath = (which($shellname, + @{$fakename{$shellname}}))[0]; + ::debug("init", "shell path $shellpath\n"); + return $shellpath; + } + } + # Get parent pid + if(open(my $fd, "<", "/proc/$testpid/stat")) { + my $line = <$fd>; + close $fd; + # Parent pid is field 4 + $testpid = (split /\s+/, $line)[3]; + } else { + # Something is wrong: fall back to old method + last; + } + } + } + # if -sh or -csh try readlink /proc/$$/exe + my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); + my $shellpath; + my $testpid = $pid; + while($testpid) { + if($name_of_ref->{$testpid} =~ /$regexp/o) { + my $shellname = $4 || $8; + my $dash = $3 || $7; + if($shellname eq "sh" and $dash) { + # -sh => csh or sh + if($shellpath = readlink "/proc/$testpid/exe") { + ::debug("init","procpath $shellpath\n"); + if($shellpath =~ m:/$shell$:o) { + ::debug("init", "proc which ".$shellpath." => "); + return $shellpath; + } + } + } + ::debug("init", "which ".$shellname." => "); + $shellpath = (which($shellname,@{$fakename{$shellname}}))[0]; + ::debug("init", "shell path $shellpath\n"); + $shellpath and last; + } + if($testpid == $parent_of_ref->{$testpid}) { + # In Solaris zones, the PPID of the zsched process is itself + last; + } + $testpid = $parent_of_ref->{$testpid}; + } + return $shellpath; + } +} + +{ + my %pid_parentpid_cmd; + + sub pid_table() { + # Returns: + # %children_of = { pid -> children of pid } + # %parent_of = { pid -> pid of parent } + # %name_of = { pid -> commandname } + + if(not %pid_parentpid_cmd) { + # Filter for SysV-style `ps` + my $sysv = q( ps -ef |). + q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print "@F[1,2] $_"' ); + # Minix uses cols 2,3 and can have newlines in the command + # so lines not having numbers in cols 2,3 must be ignored + my $minix = q( ps -ef |). + q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' ); + # BSD-style `ps` + my $bsd = q(ps -o pid,ppid,command -ax); + %pid_parentpid_cmd = + ( + 'aix' => $sysv, + 'android' => $sysv, + 'cygwin' => $sysv, + 'darwin' => $bsd, + 'dec_osf' => $sysv, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $sysv, + 'hpux' => $sysv, + 'linux' => $sysv, + 'mirbsd' => $bsd, + 'minix' => $minix, + 'msys' => $sysv, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $sysv, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $sysv, + 'syllable' => "echo ps not supported", + ); + } + $pid_parentpid_cmd{$^O} or + ::die_bug("pid_parentpid_cmd for $^O missing"); + + my (@pidtable,%parent_of,%children_of,%name_of); + # Table with pid -> children of pid + @pidtable = `$pid_parentpid_cmd{$^O}`; + my $p=$$; + for (@pidtable) { + # must match: 24436 21224 busybox ash + # must match: 24436 21224 <<empty on MacOSX running cubase>> + # must match: 24436 21224 <<empty on system running Viber>> + # or: perl -e 'while($0=" "){}' + if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/ + or + /^\s*(\S+)\s+(\S+)\s+()$/) { + $parent_of{$1} = $2; + push @{$children_of{$2}}, $1; + $name_of{$1} = $3; + } else { + ::die_bug("pidtable format: $_"); + } + } + return(\%children_of, \%parent_of, \%name_of); + } +} + +sub now() { + # Returns time since epoch as in seconds with 3 decimals + # Uses: + # @Global::use + # Returns: + # $time = time now with millisecond accuracy + if(not $Global::use{"Time::HiRes"}) { + if(eval "use Time::HiRes qw ( time );") { + eval "sub TimeHiRestime { return Time::HiRes::time };"; + } else { + eval "sub TimeHiRestime { return time() };"; + } + $Global::use{"Time::HiRes"} = 1; + } + + return (int(TimeHiRestime()*1000))/1000; +} + +sub usleep($) { + # Sleep this many milliseconds. + # Input: + # $ms = milliseconds to sleep + my $ms = shift; + ::debug("timing",int($ms),"ms "); + select(undef, undef, undef, $ms/1000); +} + +sub make_regexp_ungreedy { + my $regexp = shift; + my $class_state = 0; + my $escape_state = 0; + my $found = 0; + my $ungreedy = ""; + my $c; + + for $c (split (//, $regexp)) { + if ($found) { + if($c ne "?") { $ungreedy .= "?"; } + $found = 0; + } + $ungreedy .= $c; + + if ($escape_state) { $escape_state = 0; next; } + if ($c eq "\\") { $escape_state = 1; next; } + if ($c eq '[') { $class_state = 1; next; } + if ($class_state) { + if($c eq ']') { $class_state = 0; } + next; + } + # Quantifiers: + * {...} + if ($c =~ /[*}+]/) { $found = 1; } + } + if($found) { $ungreedy .= '?'; } + return $ungreedy; +} + + +sub __KILLER_REAPER__() {} + +sub reap_usleep() { + # Reap dead children. + # If no dead children: Sleep specified amount with exponential backoff + # Input: + # $ms = milliseconds to sleep + # Returns: + # $ms/2+0.001 if children reaped + # $ms*1.1 if no children reaped + my $ms = shift; + if(reapers()) { + if(not $Global::total_completed % 100) { + if($opt::timeout) { + # Force cleaning the timeout queue for every 100 jobs + # Fixes potential memleak + $Global::timeoutq->process_timeouts(); + } + } + # Sleep exponentially shorter (1/2^n) if a job finished + return $ms/2+0.001; + } else { + if($opt::timeout) { + $Global::timeoutq->process_timeouts(); + } + if($opt::memfree) { + kill_youngster_if_not_enough_mem($opt::memfree*0.5); + } + if($opt::memsuspend) { + suspend_young_if_not_enough_mem($opt::memsuspend); + } + if($opt::limit) { + kill_youngest_if_over_limit(); + } + exit_if_disk_full(); + if($Global::linebuffer) { + my $something_printed = 0; + if($opt::keeporder and not $opt::latestline) { + for my $job (values %Global::running) { + $something_printed += $job->print_earlier_jobs(); + } + } else { + for my $job (values %Global::running) { + $something_printed += $job->print(); + } + } + if($something_printed) { $ms = $ms/2+0.001; } + } + if($ms > 0.002) { + # When a child dies, wake up from sleep (or select(,,,)) + $SIG{CHLD} = sub { kill "ALRM", $$ }; + if($opt::delay and not $Global::linebuffer) { + # The 0.004s is approximately the time it takes for one round + my $next_earliest_start = + $Global::newest_starttime + $opt::delay - 0.004; + my $remaining_ms = 1000 * ($next_earliest_start - ::now()); + # The next job can only start at $next_earliest_start + # so sleep until then (but sleep at least $ms) + usleep(::max($ms,$remaining_ms)); + } else { + usleep($ms); + } + # --compress needs $SIG{CHLD} unset + $SIG{CHLD} = 'DEFAULT'; + } + # Sleep exponentially longer (1.1^n) if a job did not finish, + # though at most 1000 ms. + return (($ms < 1000) ? ($ms * 1.1) : ($ms)); + } +} + +sub kill_youngest_if_over_limit() { + # Check each $sshlogin we are over limit + # If over limit: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}) { + if($sshlogin->limit() == 2) { + $job->kill(); + last; + } + } + } +} + +sub suspend_young_if_not_enough_mem() { + # Check each $sshlogin if there is enough mem. + # If less than $limit free mem: suspend some of the young children + # Else: Resume all jobs + # Uses: + # %Global::running + my $limit = shift; + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + my $free = $sshlogin->memfree(); + if($free < 2*$limit) { + # Suspend all jobs (resume some of them later) + map { $_->suspended() or $_->suspend(); } @{$jobs_of{$sshlogin}}; + my @jobs = (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}); + # how many should be running? + # limit*1 => 1; + # limit*1.5 => 2; + # limit*1.75 => 4; + # free < limit*(2-1/2^n); + # => + # 1/(2-free/limit) < 2^n; + my $run = int(1/(2-$free/$limit)); + $run = ::min($run,$#jobs); + # Resume the oldest running + for my $job ((sort { $a->seq() <=> $b->seq() } @jobs)[0..$run]) { + ::debug("mem","\nResume ",$run+1, " jobs. Seq ", + $job->seq(), " resumed ", + $sshlogin->memfree()," < ",2*$limit); + $job->resume(); + } + } else { + for my $job (@{$jobs_of{$sshlogin}}) { + if($job->suspended()) { + $job->resume(); + ::debug("mem","\nResume ",$#{$jobs_of{$sshlogin}}+1, + " jobs. Seq ", $job->seq(), " resumed ", + $sshlogin->memfree()," > ",2*$limit); + last; + } + } + } + } +} + +sub kill_youngster_if_not_enough_mem() { + # Check each $sshlogin if there is enough mem. + # If less than 50% enough free mem: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my $limit = shift; + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}) { + if($sshlogin->memfree() < $limit) { + ::debug("mem","\n",map { $_->seq()." " } + (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}})); + ::debug("mem","\n", $job->seq(), "killed ", + $sshlogin->memfree()," < ",$limit); + $job->kill(); + $sshlogin->memfree_recompute(); + } else { + last; + } + } + ::debug("mem","Free mem OK? ", + $sshlogin->memfree()," > ",$limit); + } +} + + +sub __DEBUGGING__() {} + + +sub debug(@) { + # Uses: + # $Global::debug + # %Global::fh + # Returns: N/A + $Global::debug or return; + @_ = grep { defined $_ ? $_ : "" } @_; + if($Global::debug eq "all" or $Global::debug eq $_[0]) { + if($Global::fh{2}) { + # Original stderr was saved + my $stderr = $Global::fh{2}; + print $stderr @_[1..$#_]; + } else { + print STDERR @_[1..$#_]; + } + } +} + +sub my_memory_usage() { + # Returns: + # memory usage if found + # 0 otherwise + use strict; + use FileHandle; + + local $/ = "\n"; + my $pid = $$; + if(-e "/proc/$pid/stat") { + my $fh = FileHandle->new("</proc/$pid/stat"); + + my $data = <$fh>; + chomp $data; + $fh->close; + + my @procinfo = split(/\s+/,$data); + + return undef_as_zero($procinfo[22]); + } else { + return 0; + } +} + +sub my_size() { + # Returns: + # $size = size of object if Devel::Size is installed + # -1 otherwise + my @size_this = (@_); + eval "use Devel::Size qw(size total_size)"; + if ($@) { + return -1; + } else { + return total_size(@_); + } +} + +sub my_dump(@) { + # Returns: + # ascii expression of object if Data::Dump(er) is installed + # error code otherwise + my @dump_this = (@_); + eval "use Data::Dump qw(dump);"; + if ($@) { + # Data::Dump not installed + eval "use Data::Dumper;"; + if ($@) { + my $err = "Neither Data::Dump nor Data::Dumper is installed\n". + "Not dumping output\n"; + ::status($err); + return $err; + } else { + return Dumper(@dump_this); + } + } else { + # Create a dummy Data::Dump:dump as Hans Schou sometimes has + # it undefined + eval "sub Data::Dump:dump {}"; + eval "use Data::Dump qw(dump);"; + return (Data::Dump::dump(@dump_this)); + } +} + +sub my_croak(@) { + eval "use Carp; 1"; + $Carp::Verbose = 1; + croak(@_); +} + +sub my_carp() { + eval "use Carp; 1"; + $Carp::Verbose = 1; + carp(@_); +} + + +sub __OBJECT_ORIENTED_PARTS__() {} + + +package SSHLogin; + +sub new($$) { + my $class = shift; + my $s = shift; + my $origs = $s; + my %hostgroups; + my $ncpus; + my $sshcommand; + my $user; + my $password; + my $host; + my $port; + my $local; + my $string; + # SSHLogins can have these formats: + # @grp+grp/ncpu//usr/bin/ssh user@server + # ncpu//usr/bin/ssh user@server + # /usr/bin/ssh user@server + # user@server + # ncpu/user@server + # @grp+grp/user@server + # above with: user:password@server + # above with: user@server:port + # So: + # [@grp+grp][ncpu/][ssh command ][[user][:password]@][server[:port]] + + # [@grp+grp]/ncpu//usr/bin/ssh user:pass@server:port + if($s =~ s:^\@([^/]+)/?::) { + # Look for SSHLogin hostgroups + %hostgroups = map { $_ => 1 } split(/\+/, $1); + } + # An SSHLogin is always in the hostgroup of its "numcpu/host" + $hostgroups{$s} = 1; + + # [ncpu/]/usr/bin/ssh user:pass@server:port + if ($s =~ s:^(\d+)/::) { $ncpus = $1; } + + # [/usr/bin/ssh ]user:pass@server:port + if($s =~ s/^(.*) //) { $sshcommand = $1; } + + # [user:pass@]server:port + if($s =~ s/^([^@]+)@//) { + my $userpw = $1; + # user[:pass] + if($userpw =~ s/:(.*)//) { + $password = $1; + if($password eq "") { $password = $ENV{'SSHPASS'} } + if(not ::which("sshpass")) { + ::error("--sshlogin with password requires sshpass installed"); + ::wait_and_exit(255); + } + } + $user = $userpw; + } + # [server]:port + if(not $s =~ /:.*:/ + and + $s =~ s/^([-a-z0-9._]+)//i) { + # Not IPv6 (IPv6 has 2 or more ':') + $host = $1; + } elsif($s =~ s/^(\\[\[\]box0-9a-f.]+)//i) { + # RFC2673 allows for: + # \[b11010000011101] \[o64072/14] \[xd074/14] \[208.116.0.0/14] + $host = $1; + } elsif($s =~ s/^\[([0-9a-f:]+)\]//i + or + $s =~ s/^([0-9a-f:]+)//i) { + # RFC5952 + # [2001:db8::1]:80 + # 2001:db8::1.80 + # 2001:db8::1p80 + # 2001:db8::1#80 + # 2001:db8::1:80 - not supported + # 2001:db8::1 port 80 - not supported + $host = $1; + } + + # [:port] + if($s =~ s/^:(\w+)//i) { + $port = $1; + } elsif($s =~ s/^[p\.\#](\w+)//i) { + # RFC5952 + # 2001:db8::1.80 + # 2001:db8::1p80 + # 2001:db8::1#80 + $port = $1; + } + + if($s and $s ne ':') { + ::die_bug("SSHLogin parser failed on '$origs' => '$s'"); + } + + $string = + # Only include the sshcommand in $string if it is set by user + ($sshcommand && $sshcommand." "). + ($user && $user."@"). + ($host && $host). + ($port && ":$port"); + if($host eq ':') { + $local = 1; + $string = ":"; + } else { + $sshcommand ||= $opt::ssh || $ENV{'PARALLEL_SSH'} || "ssh"; + } + # An SSHLogin is always in the hostgroup of its $string-name + $hostgroups{$string} = 1; + @Global::hostgroups{keys %hostgroups} = values %hostgroups; + # Used for file names for loadavg + my $no_slash_string = $string; + $no_slash_string =~ s/[^-a-z0-9:]/_/gi; + return bless { + 'string' => $string, + 'jobs_running' => 0, + 'jobs_completed' => 0, + 'maxlength' => undef, + 'max_jobs_running' => undef, + 'orig_max_jobs_running' => undef, + 'ncpus' => $ncpus, + 'sshcommand' => $sshcommand, + 'user' => $user, + 'password' => $password, + 'host' => $host, + 'port' => $port, + 'hostgroups' => \%hostgroups, + 'local' => $local, + 'control_path_dir' => undef, + 'control_path' => undef, + 'time_to_login' => undef, + 'last_login_at' => undef, + 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" . + $no_slash_string . "/loadavg", + 'loadavg' => undef, + 'last_loadavg_update' => 0, + 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" . + $no_slash_string . "/swap_activity", + 'swap_activity' => undef, + }, ref($class) || $class; +} + +sub DESTROY($) { + my $self = shift; + # Remove temporary files if they are created. + ::rm($self->{'loadavg_file'}); + ::rm($self->{'swap_activity_file'}); +} + +sub string($) { + my $self = shift; + return $self->{'string'}; +} + +sub host($) { + my $self = shift; + return $self->{'host'}; +} + +sub sshcmd($) { + # Give the ssh command without hostname + # Returns: + # "sshpass -e ssh -p port -l user" + my $self = shift; + my @local; + # [sshpass -e] ssh -p port -l user + if($self->{'password'}) { push @local, "sshpass -e"; } + # [ssh] -p port -l user + push @local, $self->{'sshcommand'}; + # [-p port] -l user + if($self->{'port'}) { push @local, '-p',$self->{'port'}; } + # [-l user] + if($self->{'user'}) { push @local, '-l',$self->{'user'}; } + if($opt::controlmaster) { + # Use control_path to make ssh faster + my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; + push @local, "-S", $control_path; + if(not $self->{'control_path'}{$control_path}++) { + # Master is not running for this control_path + # Start it + my $pid = fork(); + if($pid) { + $Global::sshmaster{$pid} ||= 1; + } else { + $SIG{'TERM'} = undef; + # Run a sleep that outputs data, so it will discover + # if the ssh connection closes. + my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}'); + # Ignore the 'foo' being printed + open(STDOUT,">","/dev/null"); + # STDERR >/dev/null to ignore + open(STDERR,">","/dev/null"); + open(STDIN,"<","/dev/null"); + exec(@local, "-MT", $self->{'host'}, "--", + "perl", "-e", $sleep); + } + } + } + + return "@local"; +} + +sub wrap($@) { + # Input: + # @cmd = shell command to run on remote + # Returns: + # $sshwrapped = ssh remote @cmd + my $self = shift; + my @remote = @_; + return(join " ", + $self->sshcmd(), $self->{'host'}, "--", "exec", @remote); +} + +sub hexwrap($@) { + # Input: + # @cmd = perl expresion to eval + # Returns: + # $hexencoded = perl command that decodes hex and evals @cmd + my $self = shift; + my $cmd = join("",@_); + + # "#" is needed because Perl on MacOS X adds NULs + # when running pack q/H10000000/ + my $hex = unpack "H*", $cmd."#"; + # csh does not deal well with > 1000 chars in one word + # Insert space every 1000 char + $hex =~ s/\G.{1000}\K/ /sg; + # Explanation: + # Write this without special chars: eval pack 'H*', join '',@ARGV + # GNU_Parallel_worker = String so people can see this is from GNU Parallel + # eval+ = way to write 'eval ' without space (gives warning) + # pack+ = way to write 'pack ' without space + # q/H10000000/, = almost the same as "H*" but does not use * + # join+q//, = join '', + return('perl -X -e '. + 'GNU_Parallel_worker,eval+pack+q/H10000000/,join+q//,@ARGV '. + $hex); +} + +sub jobs_running($) { + my $self = shift; + return ($self->{'jobs_running'} || "0"); +} + +sub inc_jobs_running($) { + my $self = shift; + $self->{'jobs_running'}++; +} + +sub dec_jobs_running($) { + my $self = shift; + $self->{'jobs_running'}--; +} + +sub set_maxlength($$) { + my $self = shift; + $self->{'maxlength'} = shift; +} + +sub maxlength($) { + my $self = shift; + return $self->{'maxlength'}; +} + +sub jobs_completed() { + my $self = shift; + return $self->{'jobs_completed'}; +} + +sub in_hostgroups() { + # Input: + # @hostgroups = the hostgroups to look for + # Returns: + # true if intersection of @hostgroups and the hostgroups of this + # SSHLogin is non-empty + my $self = shift; + return grep { defined $self->{'hostgroups'}{$_} } @_; +} + +sub hostgroups() { + my $self = shift; + return keys %{$self->{'hostgroups'}}; +} + +sub inc_jobs_completed($) { + my $self = shift; + $self->{'jobs_completed'}++; + $Global::total_completed++; +} + +sub set_max_jobs_running($$) { + my $self = shift; + if(defined $self->{'max_jobs_running'}) { + $Global::max_jobs_running -= $self->{'max_jobs_running'}; + } + $self->{'max_jobs_running'} = shift; + + if(defined $self->{'max_jobs_running'}) { + # max_jobs_running could be resat if -j is a changed file + $Global::max_jobs_running += $self->{'max_jobs_running'}; + } + # Initialize orig to the first non-zero value that comes around + $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; +} + +sub memfree() { + # Returns: + # $memfree in bytes + my $self = shift; + $self->memfree_recompute(); + # Return 1 if not defined. + return (not defined $self->{'memfree'} or $self->{'memfree'}) +} + +sub memfree_recompute() { + my $self = shift; + my $script = memfreescript(); + + # TODO add sshlogin and backgrounding + # Run the script twice if it gives 0 (typically intermittent error) + $self->{'memfree'} = ::qqx($script) || ::qqx($script); + if(not $self->{'memfree'}) { + ::die_bug("Less than 1 byte memory free"); + } + #::debug("mem","New free:",$self->{'memfree'}," "); +} + +{ + my $script; + + sub memfreescript() { + # Returns: + # shellscript for giving available memory in bytes + if(not $script) { + my %script_of = ( + # /proc/meminfo + # MemFree: 7012 kB + # Buffers: 19876 kB + # Cached: 431192 kB + # SwapCached: 0 kB + "linux" => ( + q{ + print 1024 * qx{ + awk '/^((Swap)?Cached|MemFree|Buffers):/ + { sum += \$2} END { print sum }' + /proc/meminfo } + }), + # Android uses same code as GNU/Linux + "android" => ( + q{ + print 1024 * qx{ + awk '/^((Swap)?Cached|MemFree|Buffers):/ + { sum += \$2} END { print sum }' + /proc/meminfo } + }), + # $ vmstat 1 1 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99 + "hpux" => ( + q{ + print (((reverse `vmstat 1 1`)[0] + =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) + }), + # $ vmstat 1 2 + # kthr memory page disk faults cpu + # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97 + # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92 + # + # The second free value is correct + "solaris" => ( + q{ + print (((reverse `vmstat 1 2`)[0] + =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) + }), + # hw.pagesize: 4096 + # vm.stats.vm.v_cache_count: 0 + # vm.stats.vm.v_inactive_count: 79574 + # vm.stats.vm.v_free_count: 4507 + "freebsd" => ( + q{ + for(qx{/sbin/sysctl -a}) { + if (/^([^:]+):\s+(.+)\s*$/s) { + $sysctl->{$1} = $2; + } + } + print $sysctl->{"hw.pagesize"} * + ($sysctl->{"vm.stats.vm.v_cache_count"} + + $sysctl->{"vm.stats.vm.v_inactive_count"} + + $sysctl->{"vm.stats.vm.v_free_count"}); + }), + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # Pages free: 198061. + # Pages active: 159701. + # Pages inactive: 47378. + # Pages speculative: 29707. + # Pages wired down: 89231. + # "Translation faults": 928901425. + # Pages copy-on-write: 156988239. + # Pages zero filled: 271267894. + # Pages reactivated: 48895. + # Pageins: 1798068. + # Pageouts: 257. + # Object cache: 6603 hits of 1713223 lookups (0% hit rate) + 'darwin' => ( + q{ + $vm = `vm_stat`; + print (($vm =~ /page size of (\d+)/)[0] * + (($vm =~ /Pages free:\s+(\d+)/)[0] + + ($vm =~ /Pages inactive:\s+(\d+)/)[0])); + }), + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate command + for my $os (keys %script_of) { + $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}'; + } + $script = "perl -e " . ::Q(::spacefree(1,$perlscript)); + } + return $script; + } +} + +sub limit($) { + # Returns: + # 0 = Below limit. Start another job. + # 1 = Over limit. Start no jobs. + # 2 = Kill youngest job + my $self = shift; + + if(not defined $self->{'limitscript'}) { + my %limitscripts = + ("io" => q! + io() { + limit=$1; + io_file=$2; + # Do the measurement in the background + ((tmp=$(tempfile); + LANG=C iostat -x 1 2 > $tmp; + mv $tmp $io_file) </dev/null >/dev/null & ); + perl -e '-e $ARGV[0] or exit(1); + for(reverse <>) { + /Device/ and last; + /(\S+)$/ and $max = $max > $1 ? $max : $1; } + exit ('$limit' < $max)' $io_file; + }; + io %s %s + !, + "mem" => q! + mem() { + limit=$1; + awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2} + END { + if (sum*1024 < '$limit'/2) { exit 2; } + else { exit (sum*1024 < '$limit') } + }' /proc/meminfo; + }; + mem %s; + !, + "load" => q! + load() { + limit=$1; + ps ax -o state,command | + grep -E '^[DOR].[^[]' | + wc -l | + perl -ne 'exit ('$limit' < $_)'; + }; + load %s + !, + ); + my ($cmd,@args) = split /\s+/,$opt::limit; + if($limitscripts{$cmd}) { + my $tmpfile = ::tmpname("parlmt"); + ++$Global::unlink{$tmpfile}; + $self->{'limitscript'} = + ::spacefree(1, sprintf($limitscripts{$cmd}, + ::multiply_binary_prefix(@args),$tmpfile)); + } else { + $self->{'limitscript'} = $opt::limit; + } + } + + my %env = %ENV; + local %ENV = %env; + $ENV{'SSHLOGIN'} = $self->string(); + system($Global::shell,"-c",$self->{'limitscript'}); + #::qqx($self->{'limitscript'}); + ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n"); + return $?>>8; +} + + +sub swapping($) { + my $self = shift; + my $swapping = $self->swap_activity(); + return (not defined $swapping or $swapping) +} + +sub swap_activity($) { + # If the currently known swap activity is too old: + # Recompute a new one in the background + # Returns: + # last swap activity computed + my $self = shift; + # Should we update the swap_activity file? + my $update_swap_activity_file = 0; + # Test with (on 64 core machine): + # seq 100 | parallel --lb -j100 'seq 1000 | parallel --noswap -j 1 true' + if(open(my $swap_fh, "<", $self->{'swap_activity_file'})) { + my $swap_out = <$swap_fh>; + close $swap_fh; + if($swap_out =~ /^(\d+)$/) { + $self->{'swap_activity'} = $1; + ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); + } + ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); + if(time - $self->{'last_swap_activity_update'} > 10) { + # last swap activity update was started 10 seconds ago + ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); + $update_swap_activity_file = 1; + } + } else { + ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); + $self->{'swap_activity'} = undef; + $update_swap_activity_file = 1; + } + if($update_swap_activity_file) { + ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); + $self->{'last_swap_activity_update'} = time; + my $dir = ::dirname($self->{'swap_activity_file'}); + -d $dir or eval { File::Path::mkpath($dir); }; + my $swap_activity; + $swap_activity = swapactivityscript(); + if(not $self->local()) { + $swap_activity = $self->wrap($swap_activity); + } + # Run swap_activity measuring. + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + my $file = $self->{'swap_activity_file'}; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); + ::debug("swap", "\n", $swap_activity, "\n"); + ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)"); + } + return $self->{'swap_activity'}; +} + +{ + my $script; + + sub swapactivityscript() { + # Returns: + # shellscript for detecting swap activity + # + # arguments for vmstat are OS dependant + # swap_in and swap_out are in different columns depending on OS + # + if(not $script) { + my %vmstat = ( + # linux: $7*$8 + # $ vmstat 1 2 + # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- + # r b swpd free buff cache si so bi bo in cs us sy id wa + # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 + # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 + 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # solaris: $6*$7 + # $ vmstat -S 1 2 + # kthr memory page disk faults cpu + # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 + # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 + 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], + + # darwin (macosx): $21*$22 + # $ vm_stat -c 2 1 + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts + # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 + # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 + 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], + + # ultrix: $12*$13 + # $ vmstat -S 1 2 + # procs faults cpu memory page disk + # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 + # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 + # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 + 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], + + # aix: $6*$7 + # $ vmstat 1 2 + # System configuration: lcpu=1 mem=2048MB + # + # kthr memory page faults cpu + # ----- ----------- ------------------------ ------------ ----------- + # r b avm fre re pi po fr sr cy in sy cs us sy id wa + # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 + # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 + 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], + + # freebsd: $8*$9 + # $ vmstat -H 1 2 + # procs memory page disks faults cpu + # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id + # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 + # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 + 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], + + # mirbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id + # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 + # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 + 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # netbsd: $7*$8 + # $ vmstat 1 2 + # procs memory page disks faults cpu + # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id + # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 + # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 + 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # openbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id + # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 + # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 + 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # hpux: $8*$9 + # $ vmstat 1 2 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 + # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 + 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # dec_osf (tru64): $11*$12 + # $ vmstat 1 2 + # Virtual Memory Statistics: (pagesize = 8192) + # procs memory pages intr cpu + # r w u act free wire fault cow zero react pin pout in sy cs us sy id + # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 + # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 + 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], + + # gnu (hurd): $7*$8 + # $ vmstat -k 1 2 + # (pagesize: 4, size: 512288, swap size: 894972) + # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree + # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 + # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 + 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], + + # -nto (qnx has no swap) + #-irix + #-svr5 (scosysv) + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate vmstat command + for my $os (keys %vmstat) { + $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ + $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . + $vmstat{$os}[1] . '}"` }'; + } + $script = "perl -e " . ::Q($perlscript); + } + return $script; + } +} + +sub too_fast_remote_login($) { + my $self = shift; + if($self->{'last_login_at'} and $self->{'time_to_login'}) { + # sshd normally allows 10 simultaneous logins + # A login takes time_to_login + # So time_to_login/5 should be safe + # If now <= last_login + time_to_login/5: Then it is too soon. + my $too_fast = (::now() <= $self->{'last_login_at'} + + $self->{'time_to_login'}/5); + ::debug("run", "Too fast? $too_fast "); + return $too_fast; + } else { + # No logins so far (or time_to_login not computed): it is not too fast + return 0; + } +} + +sub last_login_at($) { + my $self = shift; + return $self->{'last_login_at'}; +} + +sub set_last_login_at($$) { + my $self = shift; + $self->{'last_login_at'} = shift; +} + +sub loadavg_too_high($) { + my $self = shift; + my $loadavg = $self->loadavg(); + if(defined $loadavg) { + ::debug("load", "Load $loadavg > ",$self->max_loadavg()); + return $loadavg >= $self->max_loadavg(); + } else { + # Unknown load: Assume load is too high + return 1; + } +} + + + +sub loadavg($) { + # If the currently know loadavg is too old: + # Recompute a new one in the background + # The load average is computed as the number of processes waiting + # for disk or CPU right now. So it is the server load this instant + # and not averaged over several minutes. This is needed so GNU + # Parallel will at most start one job that will push the load over + # the limit. + # + # Returns: + # $last_loadavg = last load average computed (undef if none) + + my $self = shift; + sub loadavg_cmd() { + if(not $Global::loadavg_cmd) { + # aix => "ps -ae -o state,command" # state wrong + # bsd => "ps ax -o state,command" + # sysv => "ps -ef -o s -o comm" + # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \ + # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + # awk '{print $2,$1}' + # dec_osf => bsd + # dragonfly => bsd + # freebsd => bsd + # gnu => bsd + # hpux => ps -el|awk '{print $2,$14,$15}' + # irix => ps -ef -o state -o comm + # linux => bsd + # minix => ps el|awk '{print \$1,\$11}' + # mirbsd => bsd + # netbsd => bsd + # openbsd => bsd + # solaris => sysv + # svr5 => sysv + # ultrix => ps -ax | awk '{print $3,$5}' + # unixware => ps -el|awk '{print $2,$14,$15}' + my $ps = ::spacefree(1,q{ + $sysv="ps -ef -o s -o comm"; + $sysv2="ps -ef -o state -o comm"; + $bsd="ps ax -o state,command"; + # Treat threads as processes + $bsd2="ps axH -o state,command"; + $psel="ps -el|awk '{ print \$2,\$14,\$15 }'"; + $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n"; + /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + awk '{print $2,$1}' }; + $dummy="echo S COMMAND;echo R dummy"; + %ps=( + # TODO Find better code for AIX/Android + 'aix' => "uptime", + 'android' => "uptime", + 'cygwin' => $cygwin, + 'darwin' => $bsd, + 'dec_osf' => $sysv2, + 'dragonfly' => $bsd, + 'freebsd' => $bsd2, + 'gnu' => $bsd, + 'hpux' => $psel, + 'irix' => $sysv2, + 'linux' => $bsd2, + 'minix' => "ps el|awk '{print \$1,\$11}'", + 'mirbsd' => $bsd, + 'msys' => $cygwin, + 'netbsd' => $bsd, + 'nto' => $dummy, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $psel, + 'ultrix' => "ps -ax | awk '{print \$3,\$5}'", + 'MSWin32' => $sysv, + ); + print `$ps{$^O}`; + }); + # The command is too long for csh, so base64_wrap the command + $Global::loadavg_cmd = $self->hexwrap($ps); + } + return $Global::loadavg_cmd; + } + # Should we update the loadavg file? + my $update_loadavg_file = 0; + if(open(my $load_fh, "<", $self->{'loadavg_file'})) { + local $/; # $/ = undef => slurp whole file + my $load_out = <$load_fh>; + close $load_fh; + if($load_out =~ /\S/) { + # Content can be empty if ~/ is on NFS + # due to reading being non-atomic. + # + # Count lines starting with D,O,R but command does not start with [ + my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm); + if($load > 0) { + # load is overestimated by 1 + $self->{'loadavg'} = $load - 1; + ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n"); + } elsif ($load_out=~/average: (\d+.\d+)/) { + # AIX does not support instant load average + # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55 + $self->{'loadavg'} = $1; + } else { + ::die_bug("loadavg_invalid_content: " . + $self->{'loadavg_file'} . "\n$load_out"); + } + } + $update_loadavg_file = 1; + } else { + ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); + $self->{'loadavg'} = undef; + $update_loadavg_file = 1; + } + if($update_loadavg_file) { + ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); + $self->{'last_loadavg_update'} = time; + my $dir = ::dirname($self->{'swap_activity_file'}); + -d $dir or eval { File::Path::mkpath($dir); }; + -w $dir or ::die_bug("Cannot write to $dir"); + my $cmd = ""; + if($self->{'string'} ne ":") { + $cmd = $self->wrap(loadavg_cmd()); + } else { + $cmd .= loadavg_cmd(); + } + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + ::debug("load", "Update load\n"); + my $file = $self->{'loadavg_file'}; + # tmpfile on same filesystem as $file + my $tmpfile = $file.$$; + $ENV{'SSHPASS'} = $self->{'password'}; + ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )"); + } + return $self->{'loadavg'}; +} + +sub max_loadavg($) { + my $self = shift; + # If --load is a file it might be changed + if($Global::max_load_file) { + my $mtime = (stat($Global::max_load_file))[9]; + if($mtime > $Global::max_load_file_last_mod) { + $Global::max_load_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_loadavg(undef); + } + } + } + if(not defined $self->{'max_loadavg'}) { + $self->{'max_loadavg'} = + $self->compute_max_loadavg($opt::load); + } + ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); + return $self->{'max_loadavg'}; +} + +sub set_max_loadavg($$) { + my $self = shift; + $self->{'max_loadavg'} = shift; +} + +sub compute_max_loadavg($) { + # Parse the max loadaverage that the user asked for using --load + # Returns: + # max loadaverage + my $self = shift; + my $loadspec = shift; + my $load; + if(defined $loadspec) { + if($loadspec =~ /^\+(\d+)$/) { + # E.g. --load +2 + my $j = $1; + $load = + $self->ncpus() + $j; + } elsif ($loadspec =~ /^-(\d+)$/) { + # E.g. --load -2 + my $j = $1; + $load = + $self->ncpus() - $j; + } elsif ($loadspec =~ /^(\d+)\%$/) { + my $j = $1; + $load = + $self->ncpus() * $j / 100; + } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { + $load = $1; + } elsif (-f $loadspec) { + $Global::max_load_file = $loadspec; + $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; + if(open(my $in_fh, "<", $Global::max_load_file)) { + my $opt_load_file = join("",<$in_fh>); + close $in_fh; + $load = $self->compute_max_loadavg($opt_load_file); + } else { + ::error("Cannot open $loadspec."); + ::wait_and_exit(255); + } + } else { + ::error("Parsing of --load failed."); + ::die_usage(); + } + if($load < 0.01) { + $load = 0.01; + } + } + return $load; +} + +sub time_to_login($) { + my $self = shift; + return $self->{'time_to_login'}; +} + +sub set_time_to_login($$) { + my $self = shift; + $self->{'time_to_login'} = shift; +} + +sub max_jobs_running($) { + my $self = shift; + if(not defined $self->{'max_jobs_running'}) { + my $nproc = $self->compute_number_of_processes($opt::jobs); + $self->set_max_jobs_running($nproc); + } + return $self->{'max_jobs_running'}; +} + +sub orig_max_jobs_running($) { + my $self = shift; + return $self->{'orig_max_jobs_running'}; +} + +sub compute_number_of_processes($) { + # Number of processes wanted and limited by system resources + # Returns: + # Number of processes + my $self = shift; + my $opt_P = shift; + my $wanted_processes = $self->user_requested_processes($opt_P); + if(not defined $wanted_processes) { + $wanted_processes = $Global::default_simultaneous_sshlogins; + } + ::debug("load", "Wanted procs: $wanted_processes\n"); + my $system_limit = + $self->processes_available_by_system_limit($wanted_processes); + ::debug("load", "Limited to procs: $system_limit\n"); + return $system_limit; +} + +{ + my @children; + my $max_system_proc_reached; + my $more_filehandles; + my %fh; + my $tmpfhname; + my $count_jobs_already_read; + my @jobs; + my $job; + my @args; + my $arg; + + sub reserve_filehandles($) { + # Reserves filehandle + my $n = shift; + for (1..$n) { + $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null"); + } + } + + sub reserve_process() { + # Spawn a dummy process + my $child; + if($child = fork()) { + push @children, $child; + $Global::unkilled_children{$child} = 1; + } elsif(defined $child) { + # This is the child + # The child takes one process slot + # It will be killed later + $SIG{'TERM'} = $Global::original_sig{'TERM'}; + if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") { + # The exec does not work on Cygwin and QNX + sleep 10101010; + } else { + # 'exec sleep' takes less RAM than sleeping in perl + exec 'sleep', 10101; + } + exit(0); + } else { + # Failed to spawn + $max_system_proc_reached = 1; + } + } + + sub get_args_or_jobs() { + # Get an arg or a job (depending on mode) + if($Global::semaphore or ($opt::pipe and not $opt::tee)) { + # Skip: No need to get args + return 1; + } elsif(defined $opt::retries and $count_jobs_already_read) { + # For retries we may need to run all jobs on this sshlogin + # so include the already read jobs for this sshlogin + $count_jobs_already_read--; + return 1; + } else { + if($opt::X or $opt::m) { + # The arguments may have to be re-spread over several jobslots + # So pessimistically only read one arg per jobslot + # instead of a full commandline + if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + push(@jobs, $job); + return 1; + } + } else { + $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + push(@args, $arg); + return 1; + } + } else { + # If there are no more command lines, then we have a process + # per command line, so no need to go further + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + # Replacement must happen here due to seq() + $job and $job->replaced(); + push(@jobs, $job); + return 1; + } + } + } + } + + sub cleanup() { + # Cleanup: Close the files + for (values %fh) { close $_ } + # Cleanup: Kill the children + for my $pid (@children) { + kill 9, $pid; + waitpid($pid,0); + delete $Global::unkilled_children{$pid}; + } + # Cleanup: Unget the command_lines or the @args + $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(@args); + @args = (); + $Global::JobQueue->unget(@jobs); + @jobs = (); + } + + sub processes_available_by_system_limit($) { + # If the wanted number of processes is bigger than the system limits: + # Limit them to the system limits + # Limits are: File handles, number of input lines, processes, + # and taking > 1 second to spawn 10 extra processes + # Returns: + # Number of processes + my $self = shift; + my $wanted_processes = shift; + my $system_limit = 0; + my $slow_spawning_warning_printed = 0; + my $time = time; + $more_filehandles = 1; + $tmpfhname = "TmpFhNamE"; + + # perl uses 7 filehandles for something? + # parallel uses 1 for memory_usage + # parallel uses 4 for ? + reserve_filehandles(12); + # Two processes for load avg and ? + reserve_process(); + reserve_process(); + + # For --retries count also jobs already run + $count_jobs_already_read = $Global::JobQueue->next_seq(); + my $wait_time_for_getting_args = 0; + my $start_time = time; + if($wanted_processes < $Global::infinity) { + $Global::dummy_jobs = 1; + } + while(1) { + $system_limit >= $wanted_processes and last; + not $more_filehandles and last; + $max_system_proc_reached and last; + + my $before_getting_arg = time; + if(!$Global::dummy_jobs) { + get_args_or_jobs() or last; + } + $wait_time_for_getting_args += time - $before_getting_arg; + $system_limit++; + + # Every simultaneous process uses 2 filehandles to write to + # and 2 filehandles to read from + reserve_filehandles(4); + + # System process limit + reserve_process(); + + my $forktime = time - $time - $wait_time_for_getting_args; + ::debug("run", "Time to fork $system_limit procs: ". + $wait_time_for_getting_args, " ", $forktime, + " (processes so far: ", $system_limit,")\n"); + if($system_limit > 10 and + $forktime > 1 and + $forktime > $system_limit * 0.01) { + # It took more than 0.01 second to fork a processes on avg. + # Give the user a warning. He can press Ctrl-C if this + # sucks. + ::warning_once( + "Starting $system_limit processes took > $forktime sec.", + "Consider adjusting -j. Press CTRL-C to stop."); + } + } + cleanup(); + + if($system_limit < $wanted_processes) { + # The system_limit is less than the wanted_processes + if($system_limit < 1 and not $Global::JobQueue->empty()) { + ::warning("Cannot spawn any jobs.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + ::wait_and_exit(255); + } + if(not $more_filehandles) { + ::warning("Only enough file handles to run ". + $system_limit. " jobs in parallel.", + "Try running 'parallel -j0 -N $system_limit --pipe parallel -j0'", + "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)", + "or increasing 'nofile' in /etc/security/limits.conf", + "or increasing /proc/sys/fs/file-max"); + } + if($max_system_proc_reached) { + ::warning("Only enough available processes to run ". + $system_limit. " jobs in parallel.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + } + } + if($] == 5.008008 and $system_limit > 1000) { + # https://savannah.gnu.org/bugs/?36942 + $system_limit = 1000; + } + if($Global::JobQueue->empty()) { + $system_limit ||= 1; + } + if($self->string() ne ":" and + $system_limit > $Global::default_simultaneous_sshlogins) { + $system_limit = + $self->simultaneous_sshlogin_limit($system_limit); + } + return $system_limit; + } +} + +sub simultaneous_sshlogin_limit($) { + # Test by logging in wanted number of times simultaneously + # Returns: + # min($wanted_processes,$working_simultaneous_ssh_logins-1) + my $self = shift; + my $wanted_processes = shift; + if($self->{'time_to_login'}) { + return $wanted_processes; + } + + # Try twice because it guesses wrong sometimes + # Choose the minimal + my $ssh_limit = + ::min($self->simultaneous_sshlogin($wanted_processes), + $self->simultaneous_sshlogin($wanted_processes)); + if($ssh_limit < $wanted_processes) { + my $serverlogin = $self->string(); + ::warning("ssh to $serverlogin only allows ". + "for $ssh_limit simultaneous logins.", + "You may raise this by changing", + "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.", + "You can also try --sshdelay 0.1", + "Using only ".($ssh_limit-1)." connections ". + "to avoid race conditions."); + # Race condition can cause problem if using all sshs. + if($ssh_limit > 1) { $ssh_limit -= 1; } + } + return $ssh_limit; +} + +sub simultaneous_sshlogin($) { + # Using $sshlogin try to see if we can do $wanted_processes + # simultaneous logins + # (ssh host echo simul-login & ssh host echo simul-login & ...) | + # grep simul|wc -l + # Input: + # $wanted_processes = Try for this many logins in parallel + # Returns: + # $ssh_limit = Number of succesful parallel logins + local $/ = "\n"; + my $self = shift; + my $wanted_processes = shift; + my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; + # TODO sh -c wrapper to work for csh + my $cmd = ($sshdelay.$self->wrap("echo simultaneouslogin"). + "</dev/null 2>&1 &")x$wanted_processes; + ::debug("init","Trying $wanted_processes logins at ".$self->string()."\n"); + open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or + ::die_bug("simultaneouslogin"); + my $ssh_limit = <$simul_fh>; + close $simul_fh; + chomp $ssh_limit; + return $ssh_limit; +} + +sub set_ncpus($$) { + my $self = shift; + $self->{'ncpus'} = shift; +} + +sub user_requested_processes($) { + # Parse the number of processes that the user asked for using -j + # Input: + # $opt_P = string formatted as for -P + # Returns: + # $processes = the number of processes to run on this sshlogin + my $self = shift; + my $opt_P = shift; + my $processes; + if(defined $opt_P) { + if($opt_P =~ /^\+(\d+)$/) { + # E.g. -P +2 + my $j = $1; + $processes = + $self->ncpus() + $j; + } elsif ($opt_P =~ /^-(\d+)$/) { + # E.g. -P -2 + my $j = $1; + $processes = + $self->ncpus() - $j; + } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) { + # E.g. -P 10.5% + my $j = $1; + $processes = + $self->ncpus() * $j / 100; + } elsif ($opt_P =~ /^(\d+)$/) { + $processes = $1; + if($processes == 0) { + # -P 0 = infinity (or at least close) + $processes = $Global::infinity; + } + } elsif (-f $opt_P) { + $Global::max_procs_file = $opt_P; + if(open(my $in_fh, "<", $Global::max_procs_file)) { + my $opt_P_file = join("",<$in_fh>); + close $in_fh; + $processes = $self->user_requested_processes($opt_P_file); + } else { + ::error("Cannot open $opt_P."); + ::wait_and_exit(255); + } + } else { + ::error("Parsing of --jobs/-j/--max-procs/-P failed."); + ::die_usage(); + } + $processes = ::ceil($processes); + } + return $processes; +} + +sub ncpus($) { + # Number of CPU threads + # --use_sockets_instead_of_threads = count socket instead + # --use_cores_instead_of_threads = count physical cores instead + # Returns: + # $ncpus = number of cpu (threads) on this sshlogin + local $/ = "\n"; + my $self = shift; + if(not defined $self->{'ncpus'}) { + if($self->local()) { + if($opt::use_sockets_instead_of_threads) { + $self->{'ncpus'} = socket_core_thread()->{'sockets'}; + } elsif($opt::use_cores_instead_of_threads) { + $self->{'ncpus'} = socket_core_thread()->{'cores'}; + } else { + $self->{'ncpus'} = socket_core_thread()->{'threads'}; + } + } else { + my $ncpu; + $ENV{'SSHPASS'} = $self->{'password'}; + ::debug("init",("echo | ".$self->wrap("parallel --number-of-sockets"))); + if($opt::use_sockets_instead_of_threads + or + $opt::use_cpus_instead_of_cores) { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-sockets")); + } elsif($opt::use_cores_instead_of_threads) { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-cores")); + } else { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-threads")); + } + chomp $ncpu; + if($ncpu =~ /^\s*[0-9]+\s*$/s) { + $self->{'ncpus'} = $ncpu; + } else { + ::warning("Could not figure out ". + "number of cpus on ".$self->string." ($ncpu). Using 1."); + $self->{'ncpus'} = 1; + } + } + } + return $self->{'ncpus'}; +} + + +sub nproc() { + # Returns: + # Number of threads using `nproc` + my $no_of_threads = ::qqx("nproc"); + chomp $no_of_threads; + return $no_of_threads; +} + +sub no_of_sockets() { + return socket_core_thread()->{'sockets'}; +} + +sub no_of_cores() { + return socket_core_thread()->{'cores'}; +} + +sub no_of_threads() { + return socket_core_thread()->{'threads'}; +} + +sub socket_core_thread() { + # Returns: + # { + # 'sockets' => #sockets = number of socket with CPU present + # 'cores' => #cores = number of physical cores + # 'threads' => #threads = number of compute cores (hyperthreading) + # 'active' => #taskset_threads = number of taskset limited cores + # } + my $cpu; + if ($^O eq 'linux') { + $cpu = sct_gnu_linux($cpu); + } elsif ($^O eq 'android') { + $cpu = sct_android($cpu); + } elsif ($^O eq 'freebsd') { + $cpu = sct_freebsd($cpu); + } elsif ($^O eq 'netbsd') { + $cpu = sct_netbsd($cpu); + } elsif ($^O eq 'openbsd') { + $cpu = sct_openbsd($cpu); + } elsif ($^O eq 'gnu') { + $cpu = sct_hurd($cpu); + } elsif ($^O eq 'darwin') { + $cpu = sct_darwin($cpu); + } elsif ($^O eq 'solaris') { + $cpu = sct_solaris($cpu); + } elsif ($^O eq 'aix') { + $cpu = sct_aix($cpu); + } elsif ($^O eq 'hpux') { + $cpu = sct_hpux($cpu); + } elsif ($^O eq 'nto') { + $cpu = sct_qnx($cpu); + } elsif ($^O eq 'svr5') { + $cpu = sct_openserver($cpu); + } elsif ($^O eq 'irix') { + $cpu = sct_irix($cpu); + } elsif ($^O eq 'dec_osf') { + $cpu = sct_tru64($cpu); + } else { + # Try all methods until we find something that works + $cpu = (sct_gnu_linux($cpu) + || sct_android($cpu) + || sct_freebsd($cpu) + || sct_netbsd($cpu) + || sct_openbsd($cpu) + || sct_hurd($cpu) + || sct_darwin($cpu) + || sct_solaris($cpu) + || sct_aix($cpu) + || sct_hpux($cpu) + || sct_qnx($cpu) + || sct_openserver($cpu) + || sct_irix($cpu) + || sct_tru64($cpu) + ); + } + if(not $cpu) { + # Fall back: Set all to nproc + my $nproc = nproc(); + if($nproc) { + $cpu->{'sockets'} = + $cpu->{'cores'} = + $cpu->{'threads'} = + $cpu->{'active'} = + $nproc; + } + } + if(not $cpu) { + ::warning("Cannot figure out number of cpus. Using 1."); + $cpu->{'sockets'} = + $cpu->{'cores'} = + $cpu->{'threads'} = + $cpu->{'active'} = + 1 + } + $cpu->{'sockets'} ||= 1; + $cpu->{'threads'} ||= $cpu->{'cores'}; + $cpu->{'active'} ||= $cpu->{'threads'}; + chomp($cpu->{'sockets'}, + $cpu->{'cores'}, + $cpu->{'threads'}, + $cpu->{'active'}); + # Choose minimum of active and actual + my $mincpu; + $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'}); + $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'}); + $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'}); + return $mincpu; +} + +sub sct_gnu_linux($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + my $cpu = shift; + + sub read_topology($) { + my $prefix = shift; + my %sibiling; + my %socket; + my $thread; + for($thread = 0; + -r "$prefix/cpu$thread/topology/physical_package_id"; + $thread++) { + open(my $fh,"<", + "$prefix/cpu$thread/topology/physical_package_id") + || die; + $socket{<$fh>}++; + close $fh; + } + for($thread = 0; + -r "$prefix/cpu$thread/topology/thread_siblings"; + $thread++) { + open(my $fh,"<", + "$prefix/cpu$thread/topology/thread_siblings") + || die; + $sibiling{<$fh>}++; + close $fh; + } + $cpu->{'sockets'} = keys %socket; + $cpu->{'cores'} = keys %sibiling; + $cpu->{'threads'} = $thread; + } + + sub read_cpuinfo(@) { + my @cpuinfo = @_; + $cpu->{'sockets'} = 0; + $cpu->{'cores'} = 0; + $cpu->{'threads'} = 0; + my %seen; + my %phy_seen; + my $physicalid; + for(@cpuinfo) { + # physical id : 0 + if(/^physical id.*[:](.*)/) { + $physicalid = $1; + if(not $phy_seen{$1}++) { + $cpu->{'sockets'}++; + } + } + # core id : 3 + if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) { + $cpu->{'cores'}++; + } + # processor : 2 + /^processor.*[:]\s*\d/i and $cpu->{'threads'}++; + } + $cpu->{'cores'} ||= $cpu->{'threads'}; + $cpu->{'cpus'} ||= $cpu->{'threads'}; + $cpu->{'sockets'} ||= 1; + } + + sub read_lscpu(@) { + my @lscpu = @_; + my $threads_per_core; + my $cores_per_socket; + for(@lscpu) { + /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1; + /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1; + /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1; + /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2; + } + if($threads_per_core and $cpu->{'threads'}) { + $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core; + } + $cpu->{'cpus'} ||= $cpu->{'threads'}; + } + + local $/ = "\n"; # If delimiter is set, then $/ will be wrong + my @cpuinfo; + my @lscpu; + if($ENV{'PARALLEL_CPUINFO'}) { + # Use CPUINFO from environment - used for testing only + read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'}); + } elsif($ENV{'PARALLEL_LSCPU'}) { + # Use LSCPU from environment - used for testing only + read_lscpu(split/(?<=\n)/,$ENV{'PARALLEL_LSCPU'}); + } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") { + # Use CPUPREFIX from environment - used for testing only + read_topology($ENV{'PARALLEL_CPUPREFIX'}); + } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) { + # Skip /proc/cpuinfo - already set + } else { + # Not debugging: Look at this computer + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + open(my $in_fh, "-|", "lscpu")) { + # Parse output from lscpu + read_lscpu(<$in_fh>); + close $in_fh; + } + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") { + read_topology("/sys/devices/system/cpu"); + } + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + open(my $in_fh, "<", "/proc/cpuinfo")) { + # Read /proc/cpuinfo + read_cpuinfo(<$in_fh>); + close $in_fh; + } + } + if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) { + # if 'taskset' is used to limit number of threads + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return $cpu; +} + +sub sct_android($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + # Use GNU/Linux + return sct_gnu_linux($_[0]); +} + +sub sct_freebsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }) + or + ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })); + $cpu->{'threads'} ||= + (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }) + or + ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })); + return $cpu; +} + +sub sct_netbsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu"); + return $cpu; +} + +sub sct_openbsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu'); + return $cpu; +} + +sub sct_hurd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("nproc"); + return $cpu; +} + +sub sct_darwin($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + (::qqx('sysctl -n hw.physicalcpu') + or + ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' })); + $cpu->{'threads'} ||= + (::qqx('sysctl -n hw.logicalcpu') + or + ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' })); + return $cpu; +} + +sub sct_solaris($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/bin/kstat") { + my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id"); + if($#chip_id >= 0) { + $cpu->{'sockets'} ||= $#chip_id +1; + } + my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq"); + if($#core_id >= 0) { + $cpu->{'cores'} ||= $#core_id +1; + } + } + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo -p"); + if($#psrinfo >= 0) { + $cpu->{'sockets'} ||= $psrinfo[0]; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance"); + if($#prtconf >= 0) { + $cpu->{'cores'} ||= $#prtconf +1; + } + } + } + return $cpu; +} + +sub sct_aix($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/sbin/lscfg") { + if(open(my $in_fh, "-|", + "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) { + $cpu->{'cores'} = <$in_fh>; + close $in_fh; + } + } + } + if(not $cpu->{'threads'}) { + if(-x "/usr/bin/vmstat") { + if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) { + while(<$in_fh>) { + /lcpu=([0-9]*) / and $cpu->{'threads'} = $1; + } + close $in_fh; + } + } + } + return $cpu; +} + +sub sct_hpux($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'}); + $cpu->{'threads'} ||= + ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'}); + return $cpu; +} + +sub sct_qnx($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + # BUG: It is not known how to calculate this. + + return $cpu; +} + +sub sct_openserver($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo"); + if($#psrinfo >= 0) { + $cpu->{'cores'} = $#psrinfo +1; + } + } + } + $cpu->{'sockets'} ||= $cpu->{'cores'}; + return $cpu; +} + +sub sct_irix($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' }); + return $cpu; +} + +sub sct_tru64($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("sizer -pr"); + $cpu->{'sockets'} ||= $cpu->{'cores'}; + $cpu->{'threads'} ||= $cpu->{'cores'}; + + return $cpu; +} + +sub sshcommand($) { + # Returns: + # $sshcommand = the command (incl options) to run when using ssh + my $self = shift; + if (not defined $self->{'sshcommand'}) { + ::die_bug("sshcommand not set"); + } + return $self->{'sshcommand'}; +} + +sub local($) { + my $self = shift; + return $self->{'local'}; +} + +sub control_path_dir($) { + # Returns: + # $control_path_dir = dir of control path (for -M) + my $self = shift; + if(not defined $self->{'control_path_dir'}) { + $self->{'control_path_dir'} = + # Use $ENV{'TMPDIR'} as that is typically not + # NFS mounted + File::Temp::tempdir($ENV{'TMPDIR'} + . "/control_path_dir-XXXX", + CLEANUP => 1); + } + return $self->{'control_path_dir'}; +} + +sub rsync_transfer_cmd($) { + # Command to run to transfer a file + # Input: + # $file = filename of file to transfer + # $workdir = destination dir + # Returns: + # $cmd = rsync command to run to transfer $file ("" if unreadable) + my $self = shift; + my $file = shift; + my $workdir = shift; + if(not -r $file) { + ::warning($file. " is not readable and will not be transferred."); + return "true"; + } + my $rsync_destdir; + my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./? + if($relpath) { + $rsync_destdir = ::shell_quote_file($workdir); + } else { + # rsync /foo/bar / + $rsync_destdir = "/"; + } + $file = ::shell_quote_file($file); + # Make dir if it does not exist + return($self->wrap("mkdir -p $rsync_destdir") . " && " . + $self->rsync()." $file ".$self->{'host'}.":$rsync_destdir"); +} + +{ + my $rsync_protocol; + + sub rsync($) { + sub rsync_protocol { + # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. + # If the version >= 3.1.0: downgrade to protocol 30 + # Returns: + # $rsync = "rsync" or "rsync --protocol 30" + if(not $rsync_protocol) { + my @out = `rsync --version`; + if(not @out) { + if(::which("rsync")) { + ::die_bug("'rsync --version' gave no output."); + } else { + ::error("'rsync' is not in \$PATH."); + ::wait_and_exit(255); + } + } + for (@out) { + # rsync version 3.1.3 protocol version 31 + # rsync version v3.2.3 protocol version 31 + if(/version v?(\d+.\d+)(.\d+)?/) { + if($1 >= 3.1) { + # Version 3.1.0 or later: Downgrade to protocol 30 + $rsync_protocol = "rsync --protocol 30"; + } else { + $rsync_protocol = "rsync"; + } + } + } + $rsync_protocol or + ::die_bug("Cannot figure out version of rsync: @out"); + } + return $rsync_protocol; + } + my $self = shift; + + return rsync_protocol()." ".$ENV{'PARALLEL_RSYNC_OPTS'}. + " -e".::Q($self->sshcmd()); + } +} + +sub cleanup_cmd($$$) { + # Command to run to remove the remote file + # Input: + # $file = filename to remove + # $workdir = destination dir + # Returns: + # $cmd = ssh command to run to remove $file and empty parent dirs + my $self = shift; + my $file = shift; + my $workdir = shift; + my $f = $file; + if($f =~ m:/\./:) { + # foo/bar/./baz/quux => workdir/baz/quux + # /foo/bar/./baz/quux => workdir/baz/quux + $f =~ s:.*/\./:$workdir/:; + } elsif($f =~ m:^[^/]:) { + # foo/bar => workdir/foo/bar + $f = $workdir."/".$f; + } + my @subdirs = split m:/:, ::dirname($f); + my @rmdir; + my $dir = ""; + for(@subdirs) { + $dir .= $_."/"; + unshift @rmdir, ::shell_quote_file($dir); + } + my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : ""; + if(defined $opt::workdir and $opt::workdir eq "...") { + $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; + } + my $rmf = "sh -c ". + ::Q("rm -f ".::shell_quote_file($f)." 2>/dev/null;".$rmdir); + return $self->wrap(::Q($rmf)); +} + +package JobQueue; + +sub new($) { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift; + my $max_number_of_args = shift; + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my $commandlinequeue = CommandLineQueue->new + ($commandref, $read_from, $context_replace, $max_number_of_args, + $transfer_files, $return_files, $template_names, $template_contents); + my @unget = (); + return bless { + 'unget' => \@unget, + 'commandlinequeue' => $commandlinequeue, + 'this_job_no' => 0, + 'total_jobs' => undef, + }, ref($class) || $class; +} + +sub get($) { + my $self = shift; + + $self->{'this_job_no'}++; + if(@{$self->{'unget'}}) { + my $job = shift @{$self->{'unget'}}; + # {%} may have changed, so flush computed values + $job && $job->flush_cache(); + return $job; + } else { + my $commandline = $self->{'commandlinequeue'}->get(); + if(defined $commandline) { + return Job->new($commandline); + } else { + $self->{'this_job_no'}--; + return undef; + } + } +} + +sub unget($) { + my $self = shift; + unshift @{$self->{'unget'}}, @_; + $self->{'this_job_no'} -= @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'commandlinequeue'}->empty(); + ::debug("run", "JobQueue->empty $empty "); + return $empty; +} + +sub total_jobs($) { + my $self = shift; + if(not defined $self->{'total_jobs'}) { + if($opt::pipe and not $opt::tee) { + ::error("--pipe is incompatible with --eta/--bar/--shuf"); + ::wait_and_exit(255); + } + if($opt::totaljobs) { + $self->{'total_jobs'} = $opt::totaljobs; + } elsif($opt::sqlworker) { + $self->{'total_jobs'} = $Global::sql->total_jobs(); + } else { + my $record; + my @arg_records; + my $record_queue = $self->{'commandlinequeue'}{'arg_queue'}; + my $start = time; + while($record = $record_queue->get()) { + push @arg_records, $record; + if(time - $start > 10) { + ::warning("Reading ".scalar(@arg_records). + " arguments took longer than 10 seconds."); + $opt::eta && ::warning("Consider removing --eta."); + $opt::bar && ::warning("Consider removing --bar."); + $opt::shuf && ::warning("Consider removing --shuf."); + last; + } + } + while($record = $record_queue->get()) { + push @arg_records, $record; + } + if($opt::shuf and @arg_records) { + my $i = @arg_records; + while (--$i) { + my $j = int rand($i+1); + @arg_records[$i,$j] = @arg_records[$j,$i]; + } + } + $record_queue->unget(@arg_records); + # $#arg_records = number of args - 1 + # We have read one @arg_record for this job (so add 1 more) + my $num_args = $#arg_records + 2; + # This jobs is not started so -1 + my $started_jobs = $self->{'this_job_no'} - 1; + my $max_args = ::max($Global::max_number_of_args,1); + $self->{'total_jobs'} = ::ceil($num_args / $max_args) + + $started_jobs; + ::debug("init","Total jobs: ".$self->{'total_jobs'}. + " ($num_args/$max_args + $started_jobs)\n"); + } + } + return $self->{'total_jobs'}; +} + +sub flush_total_jobs($) { + # Unset total_jobs to force recomputing + my $self = shift; + ::debug("init","flush Total jobs: "); + $self->{'total_jobs'} = undef; +} + +sub next_seq($) { + my $self = shift; + + return $self->{'commandlinequeue'}->seq(); +} + +sub quote_args($) { + my $self = shift; + return $self->{'commandlinequeue'}->quote_args(); +} + + +package Job; + +sub new($) { + my $class = shift; + my $commandlineref = shift; + return bless { + 'commandline' => $commandlineref, # CommandLine object + 'workdir' => undef, # --workdir + # filehandle for stdin (used for --pipe) + # filename for writing stdout to (used for --files) + # remaining data not sent to stdin (used for --pipe) + # tmpfiles to cleanup when job is done + 'unlink' => [], + # amount of data sent via stdin (used for --pipe) + 'transfersize' => 0, # size of files using --transfer + 'returnsize' => 0, # size of files using --return + 'pid' => undef, + # hash of { SSHLogins => number of times the command failed there } + 'failed' => undef, + 'sshlogin' => undef, + # The commandline wrapped with rsync and ssh + 'sshlogin_wrap' => undef, + 'exitstatus' => undef, + 'exitsignal' => undef, + # Timestamp for timeout if any + 'timeout' => undef, + 'virgin' => 1, + # Output used for SQL and CSV-output + 'output' => { 1 => [], 2 => [] }, + 'halfline' => { 1 => [], 2 => [] }, + }, ref($class) || $class; +} + +sub flush_cache($) { + my $self = shift; + $self->{'commandline'}->flush_cache(); +} + +sub replaced($) { + my $self = shift; + $self->{'commandline'} or ::die_bug("commandline empty"); + return $self->{'commandline'}->replaced(); +} + +{ + my $next_available_row; + + sub row($) { + my $self = shift; + if(not defined $self->{'row'}) { + if($opt::keeporder) { + $self->{'row'} = $self->seq(); + } else { + $self->{'row'} = ++$next_available_row; + } + } + return $self->{'row'}; + } +} + +sub seq($) { + my $self = shift; + return $self->{'commandline'}->seq(); +} + +sub set_seq($$) { + my $self = shift; + return $self->{'commandline'}->set_seq(shift); +} + +sub slot($) { + my $self = shift; + return $self->{'commandline'}->slot(); +} + +sub free_slot($) { + my $self = shift; + push @Global::slots, $self->slot(); +} + +{ + my($cattail); + + sub cattail() { + # Returns: + # $cattail = perl program for: + # cattail "decomp-prg" wpid [file_stdin] [file_to_unlink] + # decomp-prg = decompress program + # wpid = pid of writer program + # file_stdin = file_to_decompress + # file_to_unlink = unlink this file + if(not $cattail) { + $cattail = q{ + # cat followed by tail (possibly with rm as soon at the file is opened) + # If $writerpid dead: finish after this round + use Fcntl; + $|=1; + + my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV; + if($read_file) { + open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); + } else { + *IN = *STDIN; + } + while(! -s $comfile) { + # Writer has not opened the buffer file, so we cannot remove it yet + $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + # The writer and we have both opened the file, so it is safe to unlink it + unlink $unlink_file; + unlink $comfile; + + my $first_round = 1; + my $flags; + fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= O_NONBLOCK; # Add non-blocking to the flags + fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle + + while(1) { + # clear EOF + seek(IN,0,1); + my $writer_running = kill 0, $writerpid; + $read = sysread(IN,$buf,131072); + if($read) { + if($first_round) { + # Only start the command if there any input to process + $first_round = 0; + open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); + } + + # Blocking print + while($buf) { + my $bytes_written = syswrite(OUT,$buf); + # syswrite may be interrupted by SIGHUP + substr($buf,0,$bytes_written) = ""; + } + # Something printed: Wait less next time + $sleep /= 2; + } else { + if(eof(IN) and not $writer_running) { + # Writer dead: There will never be sent more to the decompressor + close OUT; + exit; + } + # TODO This could probably be done more efficiently using select(2) + # Nothing read: Wait longer before next read + # Up to 100 milliseconds + $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + } + + sub usleep { + # Sleep this many milliseconds. + my $secs = shift; + select(undef, undef, undef, $secs/1000); + } + }; + $cattail =~ s/#.*//mg; + $cattail =~ s/\s+/ /g; + } + return $cattail; + } +} + +sub openoutputfiles($) { + # Open files for STDOUT and STDERR + # Set file handles in $self->fh + my $self = shift; + my ($outfhw, $errfhw, $outname, $errname); + + if($opt::latestline) { + # Do not save to files: Use non-blocking pipe + my ($outfhr, $errfhr); + pipe($outfhr, $outfhw) || die; + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$outfhw); + $self->set_fh(1,'r',$outfhr); + $self->set_fh(2,'r',$outfhr); + # Make it possible to read non-blocking from the pipe + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + # Return immediately because we do not need setting filenames + return; + } elsif($Global::linebuffer and not + ($opt::keeporder or $opt::files or $opt::results or + $opt::compress or $opt::compress_program or + $opt::decompress_program)) { + # Do not save to files: Use non-blocking pipe + my ($outfhr, $errfhr); + pipe($outfhr, $outfhw) || die; + pipe($errfhr, $errfhw) || die; + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'r',$outfhr); + $self->set_fh(2,'r',$errfhr); + # Make it possible to read non-blocking from the pipe + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + # Return immediately because we do not need setting filenames + return; + } elsif($opt::results and not $Global::csvsep and not $Global::jsonout) { + # If --results, but not --results *.csv/*.tsv + my $out = $self->{'commandline'}->results_out(); + my $seqname; + if($out eq $opt::results or $out =~ m:/$:) { + # $opt::results = simple string or ending in / + # => $out is a dir/ + # prefix/name1/val1/name2/val2/seq + $seqname = $out."seq"; + # prefix/name1/val1/name2/val2/stdout + $outname = $out."stdout"; + # prefix/name1/val1/name2/val2/stderr + $errname = $out."stderr"; + } else { + # $opt::results = replacement string not ending in / + # => $out is a file + $outname = $out; + $errname = "$out.err"; + $seqname = "$out.seq"; + } + my $seqfhw; + if(not open($seqfhw, "+>", $seqname)) { + ::error("Cannot write to `$seqname'."); + ::wait_and_exit(255); + } + print $seqfhw $self->seq(); + close $seqfhw; + if(not open($outfhw, "+>", $outname)) { + ::error("Cannot write to `$outname'."); + ::wait_and_exit(255); + } + if(not open($errfhw, "+>", $errname)) { + ::error("Cannot write to `$errname'."); + ::wait_and_exit(255); + } + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",""); + if($opt::sqlworker) { + # Save the filenames in SQL table + $Global::sql->update("SET Stdout = ?, Stderr = ? ". + "WHERE Seq = ". $self->seq(), + $outname, $errname); + } + } elsif(not $opt::ungroup) { + # To group we create temporary files for STDOUT and STDERR + # To avoid the cleanup unlink the files immediately (but keep them open) + if($opt::files) { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + # --files => only remove stderr + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",$errname); + } else { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + } else { + # --ungroup + open($outfhw,">&",$Global::fh{1}) || die; + open($errfhw,">&",$Global::fh{2}) || die; + # File name must be empty as it will otherwise be printed + $outname = ""; + $errname = ""; + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + # Set writing FD + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'name',$outname); + $self->set_fh(2,'name',$errname); + if($opt::compress) { + $self->filter_through_compress(); + } elsif(not $opt::ungroup) { + $self->grouped(); + } + if($Global::linebuffer) { + # Make it possible to read non-blocking from + # the buffer files + # Used for --linebuffer with -k, --files, --res, --compress* + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + } +} + +sub print_verbose_dryrun($) { + # If -v set: print command to stdout (possibly buffered) + # This must be done before starting the command + my $self = shift; + if($Global::verbose or $opt::dryrun) { + my $fh = $self->fh(1,"w"); + if($Global::verbose <= 1) { + print $fh $self->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print $fh $self->wrapped(),"\n"; + } + } + if($opt::sqlworker) { + $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(), + $self->replaced()); + } +} + +sub add_rm($) { + # Files to remove when job is done + my $self = shift; + push @{$self->{'unlink'}}, @_; +} + +sub get_rm($) { + # Files to remove when job is done + my $self = shift; + return @{$self->{'unlink'}}; +} + +sub cleanup($) { + # Remove files when job is done + my $self = shift; + unlink $self->get_rm(); + delete @Global::unlink{$self->get_rm()}; +} + +sub grouped($) { + my $self = shift; + # Set reading FD if using --group (--ungroup does not need) + for my $fdno (1,2) { + # Re-open the file for reading + # so fdw can be closed seperately + # and fdr can be seeked seperately (for --line-buffer) + open(my $fdr,"<", $self->fh($fdno,'name')) || + ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name')); + $self->set_fh($fdno,'r',$fdr); + # Unlink if not debugging + $Global::debug or ::rm($self->fh($fdno,"unlink")); + } +} + +sub empty_input_wrapper($) { + # If no input: exit(0) + # If some input: Pass input as input to command on STDIN + # This avoids starting the command if there is no input. + # Input: + # $command = command to pipe data to + # Returns: + # $wrapped_command = the wrapped command + my $command = shift; + # The optimal block size differs + # It has been measured on: + # AMD 6376: 59000 + # <big ppar --pipe --block 100M --test $1 -j1 'cat >/dev/null'; + my $script = + ::spacefree(0,q{ + if(sysread(STDIN, $buf, 1)) { + open($fh, "|-", @ARGV) || die; + syswrite($fh, $buf); + while($read = sysread(STDIN, $buf, 59000)) { + syswrite($fh, $buf); + } + close $fh; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + } + }); + ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n"); + if($Global::cshell + and + length $command > 499) { + # csh does not like words longer than 1000 (499 quoted) + # $command = "perl -e '".base64_zip_eval()."' ". + # join" ",string_zip_base64( + # 'exec "'.::perl_quote_scalar($command).'"'); + return 'perl -e '.::Q($script)." ". + base64_wrap("exec \"$Global::shell\",'-c',\"". + ::perl_quote_scalar($command).'"'); + } else { + return 'perl -e '.::Q($script)." ". + $Global::shell." -c ".::Q($command); + } +} + +sub filter_through_compress($) { + my $self = shift; + # Send stdout to stdin for $opt::compress_program(1) + # Send stderr to stdin for $opt::compress_program(2) + # cattail get pid: $pid = $self->fh($fdno,'rpid'); + my $cattail = cattail(); + + for my $fdno (1,2) { + # Make a communication file. + my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac"); + close $fh; + # Compressor: (echo > $comfile; compress pipe) > output + # When the echo is written to $comfile, + # it is known that output file is opened, + # thus output file can then be removed by the decompressor. + # empty_input_wrapper is needed for plzip + my $wpid = open(my $fdw,"|-", "(echo > $comfile; ". + empty_input_wrapper($opt::compress_program).") >". + ::Q($self->fh($fdno,'name'))) || die $?; + $self->set_fh($fdno,'w',$fdw); + $self->set_fh($fdno,'wpid',$wpid); + # Decompressor: open output; -s $comfile > 0: rm $comfile output; + # decompress output > stdout + my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile, + $opt::decompress_program, $wpid, + $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) + || die $?; + $self->set_fh($fdno,'r',$fdr); + $self->set_fh($fdno,'rpid',$rpid); + } +} + +sub set_fh($$$$) { + # Set file handle + my ($self, $fd_no, $key, $fh) = @_; + $self->{'fd'}{$fd_no,$key} = $fh; +} + +sub fh($) { + # Get file handle + my ($self, $fd_no, $key) = @_; + return $self->{'fd'}{$fd_no,$key}; +} + +sub write_block($) { + my $self = shift; + my $stdin_fh = $self->fh(0,"w"); + if(fork()) { + # Close in parent + close $stdin_fh; + } else { + # If writing is to a closed pipe: + # Do not call signal handler, but let nothing be written + local $SIG{PIPE} = undef; + + for my $part ( + grep { defined $_ } + $self->{'header'},$self->{'block'}) { + # syswrite may not write all in one go, + # so make sure everything is written. + my $written; + while($written = syswrite($stdin_fh,$$part)) { + substr($$part,0,$written) = ""; + } + } + close $stdin_fh; + exit(0); + } +} + +sub write($) { + my $self = shift; + my $remaining_ref = shift; + my $stdin_fh = $self->fh(0,"w"); + + my $len = length $$remaining_ref; + # syswrite may not write all in one go, + # so make sure everything is written. + my $written; + + # If writing is to a closed pipe: + # Do not call signal handler, but let nothing be written + local $SIG{PIPE} = undef; + while($written = syswrite($stdin_fh,$$remaining_ref)){ + substr($$remaining_ref,0,$written) = ""; + } +} + +sub set_block($$$$$$) { + # Copy stdin buffer from $block_ref up to $endpos + # Prepend with $header_ref if virgin (i.e. not --roundrobin) + # Remove $recstart and $recend if needed + # Input: + # $header_ref = ref to $header to prepend + # $buffer_ref = ref to $buffer containing the block + # $endpos = length of $block to pass on + # $recstart = --recstart regexp + # $recend = --recend regexp + # Returns: + # N/A + my $self = shift; + my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_; + $self->{'header'} = $header_ref; + if($opt::roundrobin or $opt::remove_rec_sep or defined $opt::retries) { + my $a = ""; + if(($opt::roundrobin or defined $opt::retries) and $self->virgin()) { + $a .= $$header_ref; + } + # Job is no longer virgin + $self->set_virgin(0); + # Make a full copy because $buffer will change + $a .= substr($$buffer_ref,0,$endpos); + $self->{'block'} = \$a; + if($opt::remove_rec_sep) { + remove_rec_sep($self->{'block'},$recstart,$recend); + } + $self->{'block_length'} = length ${$self->{'block'}}; + } else { + $self->set_virgin(0); + for(substr($$buffer_ref,0,$endpos)) { + $self->{'block'} = \$_; + } + $self->{'block_length'} = $endpos + length ${$self->{'header'}}; + } + $self->{'block_pos'} = 0; + $self->add_transfersize($self->{'block_length'}); +} + +sub block_ref($) { + my $self = shift; + return $self->{'block'}; +} + +sub block_length($) { + my $self = shift; + return $self->{'block_length'}; +} + +sub remove_rec_sep($) { + # Remove --recstart and --recend from $block + # Input: + # $block_ref = reference to $block to be modified + # $recstart = --recstart + # $recend = --recend + # Uses: + # $opt::regexp = Are --recstart/--recend regexp? + # Returns: + # N/A + my ($block_ref,$recstart,$recend) = @_; + # Remove record separator + if($opt::regexp) { + $$block_ref =~ s/$recend$recstart//gom; + $$block_ref =~ s/^$recstart//os; + $$block_ref =~ s/$recend$//os; + } else { + $$block_ref =~ s/\Q$recend$recstart\E//gom; + $$block_ref =~ s/^\Q$recstart\E//os; + $$block_ref =~ s/\Q$recend\E$//os; + } +} + +sub non_blocking_write($) { + my $self = shift; + my $something_written = 0; + + my $in = $self->fh(0,"w"); + my $rv = syswrite($in, + substr(${$self->{'block'}},$self->{'block_pos'})); + if (!defined($rv) && $! == ::EAGAIN()) { + # would block - but would have written + $something_written = 0; + # avoid triggering auto expanding block size + $Global::no_autoexpand_block ||= 1; + } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) { + # incomplete write + # Remove the written part + $self->{'block_pos'} += $rv; + $something_written = $rv; + } else { + # successfully wrote everything + # Empty block to free memory + my $a = ""; + $self->set_block(\$a,\$a,0,"",""); + $something_written = $rv; + } + ::debug("pipe", "Non-block: ", $something_written); + return $something_written; +} + + +sub virgin($) { + my $self = shift; + return $self->{'virgin'}; +} + +sub set_virgin($$) { + my $self = shift; + $self->{'virgin'} = shift; +} + +sub pid($) { + my $self = shift; + return $self->{'pid'}; +} + +sub set_pid($$) { + my $self = shift; + $self->{'pid'} = shift; +} + +sub starttime($) { + # Returns: + # UNIX-timestamp this job started + my $self = shift; + return sprintf("%.3f",$self->{'starttime'}); +} + +sub set_starttime($@) { + my $self = shift; + my $starttime = shift || ::now(); + $self->{'starttime'} = $starttime; + $opt::sqlworker and + $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(), + $starttime); +} + +sub runtime($) { + # Returns: + # Run time in seconds with 3 decimals + my $self = shift; + return sprintf("%.3f", + int(($self->endtime() - $self->starttime())*1000)/1000); +} + +sub endtime($) { + # Returns: + # UNIX-timestamp this job ended + # 0 if not ended yet + my $self = shift; + return ($self->{'endtime'} || 0); +} + +sub set_endtime($$) { + my $self = shift; + my $endtime = shift; + $self->{'endtime'} = $endtime; + $opt::sqlworker and + $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(), + $self->runtime()); +} + +sub is_timedout($) { + # Is the job timedout? + # Input: + # $delta_time = time that the job may run + # Returns: + # True or false + my $self = shift; + my $delta_time = shift; + return time > $self->{'starttime'} + $delta_time; +} + +sub kill($) { + my $self = shift; + $self->set_exitstatus(-1); + ::kill_sleep_seq($self->pid()); +} + +sub suspend($) { + my $self = shift; + my @pgrps = map { -$_ } $self->pid(); + kill "STOP", @pgrps; + $self->set_suspended(1); +} + +sub set_suspended($$) { + my $self = shift; + $self->{'suspended'} = shift; +} + +sub suspended($) { + my $self = shift; + return $self->{'suspended'}; +} + +sub resume($) { + my $self = shift; + my @pgrps = map { -$_ } $self->pid(); + kill "CONT", @pgrps; + $self->set_suspended(0); +} + +sub failed($) { + # return number of times failed for this $sshlogin + # Input: + # $sshlogin + # Returns: + # Number of times failed for $sshlogin + my $self = shift; + my $sshlogin = shift; + return $self->{'failed'}{$sshlogin}; +} + +sub failed_here($) { + # return number of times failed for the current $sshlogin + # Returns: + # Number of times failed for this sshlogin + my $self = shift; + return $self->{'failed'}{$self->sshlogin()}; +} + +sub add_failed($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + $self->{'failed'}{$sshlogin}++; +} + +sub add_failed_here($) { + # increase the number of times failed for the current $sshlogin + my $self = shift; + $self->{'failed'}{$self->sshlogin()}++; +} + +sub reset_failed($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + delete $self->{'failed'}{$sshlogin}; +} + +sub reset_failed_here($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + delete $self->{'failed'}{$self->sshlogin()}; +} + +sub min_failed($) { + # Returns: + # the number of sshlogins this command has failed on + # the minimal number of times this command has failed + my $self = shift; + my $min_failures = + ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); + my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; + return ($number_of_sshlogins_failed_on,$min_failures); +} + +sub total_failed($) { + # Returns: + # $total_failures = the number of times this command has failed + my $self = shift; + my $total_failures = 0; + for (values %{$self->{'failed'}}) { + $total_failures += $_; + } + return $total_failures; +} + +{ + my $script; + + sub postpone_exit_and_cleanup { + # Command to remove files and dirs (given as args) without + # affecting the exit value in $?/$status. + if(not $script) { + $script = "perl -e '". + ::spacefree(0,q{ + $bash=shift; + $csh=shift; + for(@ARGV){ + unlink; + rmdir; + } + if($bash=~s/(\d+)h/$1/) { + exit $bash; + } + exit $csh; + }). + # `echo \$?h` is needed to make fish not complain + "' ".'"`echo \\\\\\\\\$?h`" "$status" '; + } + return $script + } +} + +{ + my $script; + + sub fifo_wrap() { + # Script to create a fifo, run a command on the fifo + # while copying STDIN to the fifo, and finally + # remove the fifo and return the exit code of the command. + if(not $script) { + # {} == $PARALLEL_TMP for --fifo + # To make it csh compatible a wrapper needs to: + # * mkfifo + # * spawn $command & + # * cat > fifo + # * waitpid to get the exit code from $command + # * be less than 1000 chars long + + # The optimal block size differs + # It has been measured on: + # AMD 6376: 4095 + # ppar -a big --pipepart --block -1 --test $1 --fifo 'cat {} >/dev/null'; + $script = "perl -e '". + (::spacefree + (0, q{ + ($s,$c,$f) = @ARGV; + # mkfifo $PARALLEL_TMP + system "mkfifo", $f; + # spawn $shell -c $command & + $pid = fork || exec $s, "-c", $c; + open($o,">",$f) || die $!; + # cat > $PARALLEL_TMP + while(sysread(STDIN,$buf,4095)){ + syswrite $o, $buf; + } + close $o; + # waitpid to get the exit code from $command + waitpid $pid,0; + # Cleanup + unlink $f; + exit $?/256; + }))."'"; + } + return $script; + } +} + +sub wrapped($) { + # Wrap command with: + # * --shellquote + # * --nice + # * --cat + # * --fifo + # * --sshlogin + # * --pipepart (@Global::cat_prepends) + # * --tee (@Global::cat_prepends) + # * --pipe + # * --tmux + # The ordering of the wrapping is important: + # * --nice/--cat/--fifo should be done on the remote machine + # * --pipepart/--pipe should be done on the local machine inside --tmux + # Uses: + # @opt::shellquote + # $opt::nice + # $Global::shell + # $opt::cat + # $opt::fifo + # @Global::cat_prepends + # $opt::pipe + # $opt::tmux + # Returns: + # $self->{'wrapped'} = the command wrapped with the above + my $self = shift; + if(not defined $self->{'wrapped'}) { + my $command = $self->replaced(); + # Bug in Bash and Ksh when running multiline aliases + # This will force them to run correctly, but will fail in + # tcsh so we do not do it. + # $command .= "\n\n"; + if(@opt::shellquote) { + # Quote one time for each --shellquote + my $c = $command; + for(@opt::shellquote) { + $c = ::Q($c); + } + # Prepend "echo" (it is written in perl because + # quoting '-e' causes problem in some versions and + # csh's version does something wrong) + $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c); + } + if($Global::parallel_env) { + # If $PARALLEL_ENV set, put that in front of the command + # Used for env_parallel.* + if($Global::shell =~ /zsh/) { + # The extra 'eval' will make aliases work, too + $command = $Global::parallel_env."\n". + "eval ".::Q($command); + } else { + $command = $Global::parallel_env."\n".$command; + } + } + if($opt::cat) { + # In '--cat' and '--fifo' {} == $PARALLEL_TMP. + # This is to make it possible to compute $PARALLEL_TMP on + # the fly when running remotely. + # $ENV{PARALLEL_TMP} is set in the remote wrapper before + # the command is run. + # + # Prepend 'cat > $PARALLEL_TMP;' + # Append 'unlink $PARALLEL_TMP without affecting $?' + $command = + 'cat > $PARALLEL_TMP;'. + $command.";". postpone_exit_and_cleanup(). + '$PARALLEL_TMP'; + } elsif($opt::fifo) { + # Prepend fifo-wrapper. In essence: + # mkfifo {} + # ( $command ) & + # # $command must read {}, otherwise this 'cat' will block + # cat > {}; + # wait; rm {} + # without affecting $? + $command = fifo_wrap(). " ". + $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';'; + } + # Wrap with ssh + tranferring of files + $command = $self->sshlogin_wrap($command); + if(@Global::cat_prepends) { + # --pipepart: prepend: + # < /tmp/foo perl -e 'while(@ARGV) { + # sysseek(STDIN,shift,0) || die; $left = shift; + # while($read = sysread(STDIN,$buf, ($left > 60800 ? 60800 : $left))){ + # $left -= $read; syswrite(STDOUT,$buf); + # } + # }' 0 0 0 11 | + # + # --pipepart --tee: prepend: + # < dash-a-file + # + # --pipe --tee: wrap: + # (rm fifo; ... ) < fifo + # + # --pipe --shard X: + # (rm fifo; ... ) < fifo + $command = (shift @Global::cat_prepends). "($command)". + (shift @Global::cat_appends); + } elsif($opt::pipe and not $opt::roundrobin) { + # Wrap with EOF-detector to avoid starting $command if EOF. + $command = empty_input_wrapper($command); + } + if($opt::tmux) { + # Wrap command with 'tmux' + $command = $self->tmux_wrap($command); + } + if($Global::cshell + and + length $command > 499) { + # csh does not like words longer than 1000 (499 quoted) + # $command = "perl -e '".base64_zip_eval()."' ". + # join" ",string_zip_base64( + # 'exec "'.::perl_quote_scalar($command).'"'); + $command = base64_wrap("exec \"$Global::shell\",'-c',\"". + ::perl_quote_scalar($command).'"'); + } + $self->{'wrapped'} = $command; + } + return $self->{'wrapped'}; +} + +sub set_sshlogin($$) { + my $self = shift; + my $sshlogin = shift; + $self->{'sshlogin'} = $sshlogin; + delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong + delete $self->{'wrapped'}; + + if($opt::sqlworker) { + # Identify worker as --sqlworker often runs on different machines + # If local: Use hostname + my $host = $sshlogin->local() ? ::hostname() : $sshlogin->host(); + $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host); + } +} + +sub sshlogin($) { + my $self = shift; + return $self->{'sshlogin'}; +} + +sub string_base64($) { + # Base64 encode strings into 1000 byte blocks. + # 1000 bytes is the largest word size csh supports + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + my @base64 = unpack("(A1000)*",encode_base64((join"",@_),"")); + return @base64; +} + +sub string_zip_base64($) { + # Pipe string through 'bzip2 -9' and base64 encode it into 1000 + # byte blocks. + # 1000 bytes is the largest word size csh supports + # Zipping will make exporting big environments work, too + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + my($zipin_fh, $zipout_fh,@base64); + ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9"); + if(fork) { + close $zipin_fh; + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + # Split base64 encoded into 1000 byte blocks + @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),"")); + close $zipout_fh; + } else { + close $zipout_fh; + print $zipin_fh @_; + close $zipin_fh; + exit; + } + ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n"); + return @base64; +} + +sub base64_zip_eval() { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * pipes through 'bzip2 -dc' + # * evals the result + # Reverse of string_zip_base64 + eval + # Will be wrapped in ' so single quote is forbidden + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64"; + eval"@GNU_Parallel"; + $chld = $SIG{CHLD}; + $SIG{CHLD} = "IGNORE"; + # Search for bzip2. Not found => use default path + my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2"; + # $in = stdin on $zip, $out = stdout from $zip + # Forget my() to save chars for csh + # my($in, $out,$eval); + open3($in,$out,">&STDERR",$zip,"-dc"); + if(my $perlpid = fork) { + close $in; + $eval = join "", <$out>; + close $out; + } else { + close $out; + # Pipe decoded base64 into 'bzip2 -dc' + print $in (decode_base64(join"",@ARGV)); + close $in; + exit; + } + wait; + $SIG{CHLD} = $chld; + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub base64_wrap($) { + # base64 encode Perl code + # Split it into chunks of < 1000 bytes + # Prepend it with a decoder that eval's it + # Input: + # $eval_string = Perl code to run + # Returns: + # $shell_command = shell command that runs $eval_string + my $eval_string = shift; + return + "perl -e ". + ::Q(base64_zip_eval())." ". + join" ",::shell_quote(string_zip_base64($eval_string)); +} + +sub base64_eval($) { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * evals the result + # Reverse of string_base64 + eval + # Will be wrapped in ' so single quote is forbidden. + # Spaces are stripped so spaces cannot be significant. + # The funny 'use IPC::Open3'-syntax is to avoid spaces and + # to make it clear that this is a GNU Parallel command + # when looking at the process table. + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); + eval "@GNU_Parallel"; + my $eval = decode_base64(join"",@ARGV); + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub sshlogin_wrap($) { + # Wrap the command with the commands needed to run remotely + # Input: + # $command = command to run + # Returns: + # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands + sub monitor_parent_sshd_script { + # This script is to solve the problem of + # * not mixing STDERR and STDOUT + # * terminating with ctrl-c + # If its parent is ssh: all good + # If its parent is init(1): ssh died, so kill children + my $monitor_parent_sshd_script; + + if(not $monitor_parent_sshd_script) { + $monitor_parent_sshd_script = + # This will be packed in ', so only use " + ::spacefree + (0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'. + '$tmpdir = $ENV{"TMPDIR"} || "'. + ::perl_quote_scalar($ENV{'PARALLEL_REMOTE_TMPDIR'}).'";'. + '$nice = '.$opt::nice.';'. + '$termseq = "'.$opt::termseq.'";'. + # } + q{ + # Check that $tmpdir is writable + -w $tmpdir || + die("$tmpdir\040is\040not\040writable.". + "\040Set\040PARALLEL_REMOTE_TMPDIR"); + # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR + do { + $ENV{PARALLEL_TMP} = $tmpdir."/par". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $ENV{PARALLEL_TMP}); + # Set $script to a non-existent file name in $TMPDIR + do { + $script = $tmpdir."/par-job-$ENV{PARALLEL_SEQ}_". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $script); + # Create a script from the hex code + # that removes itself and runs the commands + open($fh,">",$script) || die; + # \040 = space - but we remove spaces in the script + # ' needed due to rc-shell + print($fh("rm\040\'$script\'\n",$bashfunc.$cmd)); + close $fh; + my $parent = getppid; + my $done = 0; + $SIG{CHLD} = sub { $done = 1; }; + $pid = fork; + unless($pid) { + # Make own process group to be able to kill HUP it later + eval { setpgrp }; + # Set nice value + eval { setpriority(0,0,$nice) }; + # Run the script + exec($shell,$script); + die("exec\040failed: $!"); + } + while((not $done) and (getppid == $parent)) { + # Parent pid is not changed, so sshd is alive + # Exponential sleep up to 1 sec + $s = $s < 1 ? 0.001 + $s * 1.03 : $s; + select(undef, undef, undef, $s); + } + if(not $done) { + # sshd is dead: User pressed Ctrl-C + # Kill as per --termseq + my @term_seq = split/,/,$termseq; + if(not @term_seq) { + @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); + } + while(@term_seq && kill(0,-$pid)) { + kill(shift @term_seq, -$pid); + select(undef, undef, undef, (shift @term_seq)/1000); + } + } + wait; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + }); + } + return $monitor_parent_sshd_script; + } + + sub vars_to_export { + # Uses: + # @opt::env + my @vars = ("parallel_bash_environment"); + for my $varstring (@opt::env) { + # Split up --env VAR1,VAR2 + push @vars, split /,/, $varstring; + } + for (@vars) { + if(-r $_ and not -d) { + # Read as environment definition bug #44041 + # TODO parse this + my $fh = ::open_or_exit($_); + $Global::envdef = join("",<$fh>); + close $fh; + } + } + if(grep { /^_$/ } @vars) { + local $/ = "\n"; + # --env _ + # Include all vars that are not in a clean environment + if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) { + my @ignore = <$vars_fh>; + chomp @ignore; + my %ignore; + @ignore{@ignore} = @ignore; + close $vars_fh; + push @vars, grep { not defined $ignore{$_} } keys %ENV; + @vars = grep { not /^_$/ } @vars; + } else { + ::error("Run '$Global::progname --record-env' ". + "in a clean environment first."); + ::wait_and_exit(255); + } + } + # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2) + # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%% + + push(@vars, "PARALLEL_PID", "PARALLEL_SEQ", + "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST", + "PARALLEL_HOSTGROUPS", "PARALLEL_ARGHOSTGROUPS", + "PARALLEL_JOBSLOT", $opt::process_slot_var, + map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars); + # Keep only defined variables + return grep { defined($ENV{$_}) } @vars; + } + + sub env_as_eval { + # Returns: + # $eval = '$ENV{"..."}=...; ...' + my @vars = vars_to_export(); + my $csh_friendly = not grep { /\n/ } @ENV{@vars}; + my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; + my @non_functions = (grep { !/PARALLEL_ENV/ } + grep { substr($ENV{$_},0,4) ne "() {" } @vars); + + # eval of @envset will set %ENV + my $envset = join"", map { + '$ENV{"'.::perl_quote_scalar($_).'"}="'. + ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions; + + # running @bashfunc on the command line, will set the functions + my @bashfunc = map { + my $v=$_; + s/BASH_FUNC_(.*)(\(\)|%%)/$1/; + "$_$ENV{$v};\nexport -f $_ 2> /dev/null;\n" } @bash_functions; + # eval $bashfuncset will set $bashfunc + my $bashfuncset; + if(@bashfunc) { + # Functions are not supported for all shells + if($Global::shell !~ m:(^|/)(ash|bash|rbash|zsh|rzsh|dash|ksh):) { + ::warning("Shell functions may not be supported in $Global::shell."); + } + $bashfuncset = + '@bash_functions=qw('."@bash_functions".");". + ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{ + if($shell=~/csh/) { + print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n"; + exec "false"; + } + }). + "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";'; + } else { + $bashfuncset = '$bashfunc = "";' + } + if($ENV{'parallel_bash_environment'}) { + $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";'; + } + ::debug("base64",$envset,$bashfuncset,"\n"); + return $csh_friendly,$envset,$bashfuncset; + } + + my $self = shift; + my $command = shift; + # TODO test that *sh -c 'parallel --env' use *sh + if(not defined $self->{'sshlogin_wrap'}{$command}) { + my $sshlogin = $self->sshlogin(); + $ENV{'PARALLEL_SEQ'} = $self->seq(); + $ENV{$opt::process_slot_var} = -1 + + ($ENV{'PARALLEL_JOBSLOT'} = $self->slot()); + $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string(); + $ENV{'PARALLEL_SSHHOST'} = $sshlogin->host(); + if ($opt::hostgroups) { + $ENV{'PARALLEL_HOSTGROUPS'} = join '+', $sshlogin->hostgroups(); + $ENV{'PARALLEL_ARGHOSTGROUPS'} = join '+', $self->hostgroups(); + } + $ENV{'PARALLEL_PID'} = $$; + if($sshlogin->local()) { + if($opt::workdir) { + # Create workdir if needed. Then cd to it. + my $wd = $self->workdir(); + if($opt::workdir eq "." or $opt::workdir eq "...") { + # If $wd does not start with '/': Prepend $HOME + $wd =~ s:^([^/]):$ENV{'HOME'}/$1:; + } + ::mkdir_or_die($wd); + my $post = ""; + if($opt::workdir eq "...") { + $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";"); + + } + $command = "cd ".::Q($wd)." || exit 255; " . + $command . $post;; + } + if(@opt::env) { + # Prepend with environment setter, which sets functions in zsh + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $perl_code = $envset.$bashfuncset. + '@ARGV="'.::perl_quote_scalar($command).'";'. + "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;"; + if(length $perl_code > 999 + or + not $csh_friendly + or + $command =~ /\n/) { + # csh does not deal well with > 1000 chars in one word + # csh does not deal well with $ENV with \n + $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code); + } else { + $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code); + } + } else { + $self->{'sshlogin_wrap'}{$command} = $command; + } + } else { + my $pwd = ""; + if($opt::workdir) { + # Create remote workdir if needed. Then cd to it. + my $wd = ::pQ($self->workdir()); + $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}. + qq{print(STDERR "parallel: Cannot chdir to $wd\\n") &&}. + qq{exit 255;}; + } + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $cmd = $command; + # q// does not quote \, so we must do that + $cmd =~ s/\\/\\\\/g; + + my $remote_command = $sshlogin->hexwrap + ($pwd.$envset.$bashfuncset.'$cmd='."q\0".$cmd."\0;". + monitor_parent_sshd_script()); + my ($pre,$post,$cleanup)=("","",""); + # --transfer + $pre .= $self->sshtransfer(); + # --return + $post .= $self->sshreturn(); + # --cleanup + $post .= $self->sshcleanup(); + if($post) { + # We need to save the exit status of the job + $post = exitstatuswrapper($post); + } + $self->{'sshlogin_wrap'}{$command} = + ($pre + . $sshlogin->wrap($remote_command) + . ";" + . $post); + } + } + return $self->{'sshlogin_wrap'}{$command}; +} + +sub fill_templates($) { + # Replace replacement strings in template(s) + # Returns: + # @templates - File names of replaced templates + my $self = shift; + + if(%opt::template) { + my @template_name = + map { $self->{'commandline'}->replace_placeholders([$_],0,0) } + @{$self->{'commandline'}{'template_names'}}; + ::debug("tmpl","Names: @template_name\n"); + for(my $i = 0; $i <= $#template_name; $i++) { + open(my $fh, ">", $template_name[$i]) || die; + print $fh $self->{'commandline'}-> + replace_placeholders([$self->{'commandline'} + {'template_contents'}[$i]],0,0); + close $fh; + } + if($opt::cleanup) { + $self->add_rm(@template_name); + } + } +} + +sub filter($) { + # Replace replacement strings in filter(s) and evaluate them + # Returns: + # $run - 1=yes, undef=no + my $self = shift; + my $run = 1; + if(@opt::filter) { + for my $eval ($self->{'commandline'}-> + replace_placeholders(\@opt::filter,0,0)) { + $run &&= eval $eval; + } + $self->{'commandline'}{'skip'} ||= not $run; + } + return $run; +} + +sub transfer($) { + # Files to transfer + # Non-quoted and with {...} substituted + # Returns: + # @transfer - File names of files to transfer + my $self = shift; + + my $transfersize = 0; + my @transfer = $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'transfer_files'},0,0); + for(@transfer) { + # filesize + if(-e $_) { + $transfersize += (stat($_))[7]; + } + } + $self->add_transfersize($transfersize); + return @transfer; +} + +sub transfersize($) { + my $self = shift; + return $self->{'transfersize'}; +} + +sub add_transfersize($) { + my $self = shift; + my $transfersize = shift; + $self->{'transfersize'} += $transfersize; + $opt::sqlworker and + $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(), + $self->{'transfersize'}); +} + +sub sshtransfer($) { + # Returns for each transfer file: + # rsync $file remote:$workdir + my $self = shift; + my @pre; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + for my $file ($self->transfer()) { + push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; + } + return join("",@pre); +} + +sub return($) { + # Files to return + # Non-quoted and with {...} substituted + # Returns: + # @non_quoted_filenames + my $self = shift; + return $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'return_files'},0,0); +} + +sub returnsize($) { + # This is called after the job has finished + # Returns: + # $number_of_bytes transferred in return + my $self = shift; + for my $file ($self->return()) { + if(-e $file) { + $self->{'returnsize'} += (stat($file))[7]; + } + } + return $self->{'returnsize'}; +} + +sub add_returnsize($) { + my $self = shift; + my $returnsize = shift; + $self->{'returnsize'} += $returnsize; + $opt::sqlworker and + $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(), + $self->{'returnsize'}); +} + +sub sshreturn($) { + # Returns for each return-file: + # rsync remote:$workdir/$file . + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $pre = ""; + for my $file ($self->return()) { + $file =~ s:^\./::g; # Remove ./ if any + my $relpath = ($file !~ m:^/:) || + ($file =~ m:/\./:); # Is the path relative or /./? + my $cd = ""; + my $wd = ""; + if($relpath) { + # rsync -avR /foo/./bar/baz.c remote:/tmp/ + # == (on old systems) + # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ + $wd = ::shell_quote_file($self->workdir()."/"); + } + # Only load File::Basename if actually needed + $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; + # dir/./file means relative to dir, so remove dir on remote + $file =~ m:(.*)/\./:; + my $basedir = $1 ? ::shell_quote_file($1."/") : ""; + my $nobasedir = $file; + $nobasedir =~ s:.*/\./::; + $cd = ::shell_quote_file(::dirname($nobasedir)); + my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync"); + my $basename = ::Q(::shell_quote_file(::basename($file))); + # --return + # mkdir -p /home/tange/dir/subdir/; + # rsync (--protocol 30) -rlDzR + # --rsync-path="cd /home/tange/dir/subdir/; rsync" + # server:file.gz /home/tange/dir/subdir/ + $pre .= "mkdir -p $basedir$cd" . " && " . + $sshlogin->rsync(). " $rsync_cd -- ".$sshlogin->host().':'. + $basename . " ".$basedir.$cd.";"; + } + return $pre; +} + +sub sshcleanup($) { + # Return the sshcommand needed to remove the file + # Returns: + # ssh command needed to remove files from sshlogin + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + my $cleancmd = ""; + + for my $file ($self->remote_cleanup()) { + my @subworkdirs = parentdirs_of($file); + $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; + } + if(defined $opt::workdir and $opt::workdir eq "...") { + $cleancmd .= $sshlogin->wrap("rm -rf " . ::Q($workdir).';'); + } + return $cleancmd; +} + +sub remote_cleanup($) { + # Returns: + # Files to remove at cleanup + my $self = shift; + if($opt::cleanup) { + my @transfer = $self->transfer(); + my @return = $self->return(); + return (@transfer,@return); + } else { + return (); + } +} + +sub exitstatuswrapper(@) { + # Input: + # @shellcode = shell code to execute + # Returns: + # shell script that returns current status after executing @shellcode + if($Global::cshell) { + return ('set _EXIT_status=$status; ' . + join(" ",@_). + 'exit $_EXIT_status;'); + } else { + return ('_EXIT_status=$?; ' . + join(" ",@_). + 'exit $_EXIT_status;'); + } +} + +sub workdir($) { + # Returns: + # the workdir on a remote machine + my $self = shift; + if(not defined $self->{'workdir'}) { + my $workdir; + if(defined $opt::workdir) { + if($opt::workdir eq ".") { + # . means current dir + my $home = $ENV{'HOME'}; + eval 'use Cwd'; + my $cwd = cwd(); + $workdir = $cwd; + if($home) { + # If homedir exists: remove the homedir from + # workdir if cwd starts with homedir + # E.g. /home/foo/my/dir => my/dir + # E.g. /tmp/my/dir => /tmp/my/dir + my ($home_dev, $home_ino) = (stat($home))[0,1]; + my $parent = ""; + my @dir_parts = split(m:/:,$cwd); + my $part; + while(defined ($part = shift @dir_parts)) { + $part eq "" and next; + $parent .= "/".$part; + my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; + if($parent_dev == $home_dev and $parent_ino == $home_ino) { + # dev and ino is the same: We found the homedir. + $workdir = join("/",@dir_parts); + last; + } + } + } + if($workdir eq "") { + $workdir = "."; + } + } elsif($opt::workdir eq "...") { + $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ + . "-" . $self->seq(); + } else { + $workdir = $self->{'commandline'}-> + replace_placeholders([$opt::workdir],0,0); + #$workdir = $opt::workdir; + # Rsync treats /./ special. We dont want that + $workdir =~ s:/\./:/:g; # Remove /./ + $workdir =~ s:(.)/+$:$1:; # Remove ending / if any + $workdir =~ s:^\./::g; # Remove starting ./ if any + } + } else { + $workdir = "."; + } + $self->{'workdir'} = $workdir; + } + return $self->{'workdir'}; +} + +sub parentdirs_of($) { + # Return: + # all parentdirs except . of this dir or file - sorted desc by length + my $d = shift; + my @parents = (); + while($d =~ s:/[^/]+$::) { + if($d ne ".") { + push @parents, $d; + } + } + return @parents; +} + +sub start($) { + # Setup STDOUT and STDERR for a job and start it. + # Returns: + # job-object or undef if job not to run + + sub open3_setpgrp_internal { + # Run open3+setpgrp followed by the command + # Input: + # $stdin_fh = Filehandle to use as STDIN + # $stdout_fh = Filehandle to use as STDOUT + # $stderr_fh = Filehandle to use as STDERR + # $command = Command to run + # Returns: + # $pid = Process group of job started + my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; + my $pid; + local (*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + # The eval is needed to catch exception from open3 + eval { + if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) { + # Each child gets its own process group to make it safe to killall + eval{ setpgrp(0,0) }; + eval{ setpriority(0,0,$opt::nice) }; + exec($Global::shell,"-c",$command) + || ::die_bug("open3-$stdin_fh ".substr($command,0,200)); + } + }; + return $pid; + } + + sub open3_setpgrp_external { + # Run open3 on $command wrapped with a perl script doing setpgrp + # Works on systems that do not support open3(,,,"-") + # Input: + # $stdin_fh = Filehandle to use as STDIN + # $stdout_fh = Filehandle to use as STDOUT + # $stderr_fh = Filehandle to use as STDERR + # $command = Command to run + # Returns: + # $pid = Process group of job started + my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; + local (*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + + my $pid; + my @setpgrp_wrap = + ('perl','-e', + "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;". + "exec '$Global::shell', '-c', \@ARGV"); + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command) + || ::die_bug("open3-$stdin_fh"); + 1; + }; + return $pid; + } + + sub redefine_open3_setpgrp { + my $setgprp_cache = shift; + # Select and run open3_setpgrp_internal/open3_setpgrp_external + no warnings 'redefine'; + my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst"); + # Test to see if open3(x,x,x,"-") is fully supported + # Can an exported bash function be called via open3? + my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '. + 'else { exec("bash","-c","testfun && true"); }'; + my $bash = + ::shell_quote_scalar_default( + "testfun() { rm $name; }; export -f testfun; ". + "perl -MIPC::Open3 -e ". + ::shell_quote_scalar_default($script) + ); + my $redefine_eval; + # Redirect STDERR temporarily, + # so errors on MacOS X are ignored. + open my $saveerr, ">&STDERR"; + open STDERR, '>', "/dev/null"; + # Run the test + ::debug("init",qq{bash -c $bash 2>/dev/null}); + qx{ bash -c $bash 2>/dev/null }; + open STDERR, ">&", $saveerr; + + if(-e $name) { + # Does not support open3(x,x,x,"-") + # or does not have bash: + # Use (slow) external version + unlink($name); + $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external'; + ::debug("init","open3_setpgrp_external chosen\n"); + } else { + # Supports open3(x,x,x,"-") + # This is 0.5 ms faster to run + $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal'; + ::debug("init","open3_setpgrp_internal chosen\n"); + } + if(open(my $fh, ">", $setgprp_cache)) { + print $fh $redefine_eval; + close $fh; + } else { + ::debug("init","Cannot write to $setgprp_cache"); + } + eval $redefine_eval; + } + + sub open3_setpgrp { + my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" . + ::hostname() . "/setpgrp_func"; + sub read_cache() { + -e $setgprp_cache || return 0; + local $/ = undef; + open(my $fh, "<", $setgprp_cache) || return 0; + eval <$fh> || return 0; + close $fh; + return 1; + } + if(not read_cache()) { + redefine_open3_setpgrp($setgprp_cache); + } + # The sub is now redefined. Call it + return open3_setpgrp(@_); + } + + my $job = shift; + # Get the shell command to be executed (possibly with ssh infront). + my $command = $job->wrapped(); + my $pid; + + if($Global::interactive or $Global::stderr_verbose) { + $job->interactive_start(); + } + # Must be run after $job->interactive_start(): + # $job->interactive_start() may call $job->skip() + if($job->{'commandline'}{'skip'} + or + not $job->filter()) { + # $job->skip() was called or job filtered + $command = "true"; + } + $job->openoutputfiles(); + $job->print_verbose_dryrun(); + my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); + if($opt::dryrun or $opt::sqlmaster) { $command = "true"; } + $ENV{'PARALLEL_SEQ'} = $job->seq(); + $ENV{'PARALLEL_PID'} = $$; + $ENV{$opt::process_slot_var} = -1 + + ($ENV{'PARALLEL_JOBSLOT'} = $job->slot()); + $ENV{'PARALLEL_TMP'} = ::tmpname("par"); + $job->add_rm($ENV{'PARALLEL_TMP'}); + $job->fill_templates(); + $ENV{'SSHPASS'} = $job->{'sshlogin'}->{'password'}; + ::debug("run", $Global::total_running, " processes . Starting (", + $job->seq(), "): $command\n"); + if($opt::pipe) { + my ($stdin_fh) = ::gensym(); + $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command); + if($opt::roundrobin and not $opt::keeporder) { + # --keep-order will make sure the order will be reproducible + ::set_fh_non_blocking($stdin_fh); + } + $job->set_fh(0,"w",$stdin_fh); + if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); } + } elsif(($opt::tty or $opt::open_tty) and -c "/dev/tty" and + open(my $devtty_fh, "<", "/dev/tty")) { + # Give /dev/tty to the command if no one else is using it + # The eval is needed to catch exception from open3 + local (*IN,*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + *IN = $devtty_fh; + # The eval is needed to catch exception from open3 + my @wrap = ('perl','-e', + "eval\{setpriority\(0,0,$opt::nice\)\}\;". + "exec '$Global::shell', '-c', \@ARGV"); + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command) + || ::die_bug("open3-/dev/tty"); + 1; + }; + close $devtty_fh; + $job->set_virgin(0); + } elsif($Global::semaphore) { + # Allow sem to read from stdin + $pid = open3_setpgrp("<&STDIN",$stdout_fh,$stderr_fh,$command); + $job->set_virgin(0); + } else { + $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command); + $job->set_virgin(0); + } + if($pid) { + # A job was started + $Global::total_running++; + $Global::total_started++; + $job->set_pid($pid); + $job->set_starttime(); + $Global::running{$job->pid()} = $job; + if($opt::timeout) { + $Global::timeoutq->insert($job); + } + $Global::newest_job = $job; + $Global::newest_starttime = ::now(); + return $job; + } else { + # No more processes + ::debug("run", "Cannot spawn more jobs.\n"); + return undef; + } +} + +sub interactive_start($) { + my $self = shift; + my $command = $self->wrapped(); + if($Global::interactive) { + my $answer; + ::status_no_nl("$command ?..."); + do{ + open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty"); + $answer = <$tty_fh>; + close $tty_fh; + # Sometime we get an empty string (not even \n) + # Do not know why, so let us just ignore it and try again + } while(length $answer < 1); + if (not ($answer =~ /^\s*y/i)) { + $self->{'commandline'}->skip(); + } + } else { + print $Global::original_stderr "$command\n"; + } +} + +{ + my $tmuxsocket; + + sub tmux_wrap($) { + # Wrap command with tmux for session pPID + # Input: + # $actual_command = the actual command being run (incl ssh wrap) + my $self = shift; + my $actual_command = shift; + # Temporary file name. Used for fifo to communicate exit val + my $tmpfifo = ::tmpname("tmx"); + $self->add_rm($tmpfifo); + + if(length($tmpfifo) >=100) { + ::error("tmux does not support sockets with path > 100."); + ::wait_and_exit(255); + } + if($opt::tmuxpane) { + # Move the command into a pane in window 0 + $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '. + $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '. + $actual_command; + } + my $visual_command = $self->replaced(); + my $title = $visual_command; + if($visual_command =~ /\0/) { + ::error("Command line contains NUL. tmux is confused by NUL."); + ::wait_and_exit(255); + } + # ; causes problems + # ascii 194-245 annoys tmux + $title =~ tr/[\011-\016;\302-\365]/ /s; + $title = ::Q($title); + + my $l_act = length($actual_command); + my $l_tit = length($title); + my $l_fifo = length($tmpfifo); + # The line to run contains a 118 chars extra code + the title 2x + my $l_tot = 2 * $l_tit + $l_act + $l_fifo; + + my $quoted_space75 = ::Q(" ")x75; + while($l_tit < 1000 and + ( + (890 < $l_tot and $l_tot < 1350) + or + (9250 < $l_tot and $l_tot < 9800) + )) { + # tmux blocks for certain lengths: + # 900 < title + command < 1200 + # 9250 < title + command < 9800 + # but only if title < 1000, so expand the title with 75 spaces + # The measured lengths are: + # 996 < (title + whole command) < 1127 + # 9331 < (title + whole command) < 9636 + $title .= $quoted_space75; + $l_tit = length($title); + $l_tot = 2 * $l_tit + $l_act + $l_fifo; + } + + my $tmux; + $ENV{'PARALLEL_TMUX'} ||= "tmux"; + if(not $tmuxsocket) { + $tmuxsocket = ::tmpname("tms"); + ::debug("tmux", "Start: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach"); + if($opt::fg) { + if(not fork) { + # Run tmux in the foreground + # Wait for the socket to appear + while (not -e $tmuxsocket) { } + `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`; + exit; + } + } + ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach"); + } + $tmux = "sh -c '". + $ENV{'PARALLEL_TMUX'}. + " -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" . + $ENV{'PARALLEL_TMUX'}. + " -S $tmuxsocket new-window -t p$$ -n $title"; + + ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ", + $Limits::Command::line_max_len, " tot ", + $l_tot, "\n"); + + return "mkfifo $tmpfifo && $tmux ". + # Run in tmux + ::Q + ( + "(".$actual_command.');'. + # The triple print is needed - otherwise the testsuite fails + q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ]. + $tmpfifo."&". + "echo $title; echo \007Job finished at: `date`;sleep 10" + ). + # Run outside tmux + # Read a / separated line: 0h/2 for csh, 2/0 for bash. + # If csh the first will be 0h, so use the second as exit value. + # Otherwise just use the first value as exit value. + q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; }. + q{/(\d+)h/ and exit($1);exit$c' }.$tmpfifo; + } +} + +sub is_already_in_results($) { + # Do we already have results for this job? + # Returns: + # $job_already_run = bool whether there is output for this or not + my $job = $_[0]; + if($Global::csvsep) { + if($opt::joblog) { + # OK: You can look for job run in joblog + return 0 + } else { + ::warning_once( + "--resume --results .csv/.tsv/.json is not supported yet\n"); + # TODO read and parse the file + return 0 + } + } + my $out = $job->{'commandline'}->results_out(); + ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n"); + return(-e $out."stdout" or -f $out); +} + +sub is_already_in_joblog($) { + my $job = shift; + return vec($Global::job_already_run,$job->seq(),1); +} + +sub set_job_in_joblog($) { + my $job = shift; + vec($Global::job_already_run,$job->seq(),1) = 1; +} + +sub should_be_retried($) { + # Should this job be retried? + # Returns + # 0 - do not retry + # 1 - job queued for retry + my $self = shift; + if (not defined $opt::retries) { return 0; } + if(not $self->exitstatus() and not $self->exitsignal()) { + # Completed with success. If there is a recorded failure: forget it + $self->reset_failed_here(); + return 0; + } else { + # The job failed. Should it be retried? + $self->add_failed_here(); + my $retries = $self->{'commandline'}-> + replace_placeholders([$opt::retries],0,0); + # 0 = Inf + if($retries == 0) { $retries = 2**31; } + # Ignore files already unlinked to avoid memory leak + $self->{'unlink'} = [ grep { -e $_ } @{$self->{'unlink'}} ]; + map { -e $_ or delete $Global::unlink{$_} } keys %Global::unlink; + if($self->total_failed() == $retries) { + # This has been retried enough + return 0; + } else { + # This command should be retried + $self->set_endtime(undef); + $self->reset_exitstatus(); + $Global::JobQueue->unget($self); + ::debug("run", "Retry ", $self->seq(), "\n"); + return 1; + } + } +} + +{ + my (%print_later,$job_seq_to_print); + + sub print_earlier_jobs($) { + # Print jobs whose output is postponed due to --keep-order + # Returns: N/A + my $job = shift; + $print_later{$job->seq()} = $job; + $job_seq_to_print ||= 1; + my $returnsize = 0; + ::debug("run", "Looking for: $job_seq_to_print ", + "This: ", $job->seq(), "\n"); + for(;vec($Global::job_already_run,$job_seq_to_print,1); + $job_seq_to_print++) {} + while(my $j = $print_later{$job_seq_to_print}) { + $returnsize += $j->print(); + if($j->endtime()) { + # Job finished - look at the next + delete $print_later{$job_seq_to_print}; + $job_seq_to_print++; + next; + } else { + # Job not finished yet - look at it again next round + last; + } + } + return $returnsize; + } +} + +sub print($) { + # Print the output of the jobs + # Returns: N/A + my $self = shift; + + ::debug("print", ">>joboutput ", $self->replaced(), "\n"); + if($opt::dryrun) { + # Nothing was printed to this job: + # cleanup tmp files if --files was set + ::rm($self->fh(1,"name")); + } + if($opt::pipe and $self->virgin() and not $opt::tee) { + # Skip --joblog, --dryrun, --verbose + } else { + if($opt::ungroup) { + # NULL returnsize = 0 returnsize + $self->returnsize() or $self->add_returnsize(0); + if($Global::joblog and defined $self->{'exitstatus'}) { + # Add to joblog when finished + $self->print_joblog(); + # Printing is only relevant for grouped/--line-buffer output. + $opt::ungroup and return; + } + } + # Check for disk full + ::exit_if_disk_full(); + } + + my $returnsize = $self->returnsize(); + my @fdno; + if($opt::latestline) { + @fdno = (1); + } else { + @fdno = (sort { $a <=> $b } keys %Global::fh); + } + for my $fdno (@fdno) { + # Sort by file descriptor numerically: 1,2,3,..,9,10,11 + $fdno == 0 and next; + my $out_fh = $Global::fh{$fdno}; + my $in_fh = $self->fh($fdno,"r"); + if(not $in_fh) { + if(not $Job::file_descriptor_warning_printed{$fdno}++) { + # ::warning("File descriptor $fdno not defined\n"); + } + next; + } + ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n"); + if($Global::linebuffer) { + # Line buffered print out + $self->print_linebuffer($fdno,$in_fh,$out_fh); + } elsif($opt::files) { + $self->print_files($fdno,$in_fh,$out_fh); + } elsif($opt::results) { + $self->print_results($fdno,$in_fh,$out_fh); + } else { + $self->print_normal($fdno,$in_fh,$out_fh); + } + flush $out_fh; + } + ::debug("print", "<<joboutput\n"); + if(defined $self->{'exitstatus'} + and not ($self->virgin() and $opt::pipe)) { + if($Global::joblog and not $opt::sqlworker) { + # Add to joblog when finished + $self->print_joblog(); + } + if($opt::sqlworker and not $opt::results) { + $Global::sql->output($self); + } + if($Global::csvsep) { + # Add output to CSV when finished + $self->print_csv(); + } + if($Global::jsonout) { + $self->print_json(); + } + } + return $returnsize - $self->returnsize(); +} + +{ + my %jsonmap; + + sub print_json($) { + my $self = shift; + sub jsonquote($) { + my $a = shift; + if(not $jsonmap{"\001"}) { + map { $jsonmap{sprintf("%c",$_)} = + sprintf '\u%04x', $_ } 0..31; + } + $a =~ s/\\/\\\\/g; + $a =~ s/\"/\\"/g; + $a =~ s/([\000-\037])/$jsonmap{$1}/g; + return $a; + } + + my $cmd; + if($Global::verbose <= 1) { + $cmd = jsonquote($self->replaced()); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = jsonquote(join " ", @{$self->{'commandline'}}); + } + my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'}; + + # Memory optimization: Overwrite with the joined output + $self->{'output'}{1} = join("", @{$self->{'output'}{1}}); + $self->{'output'}{2} = join("", @{$self->{'output'}{2}}); + # { + # "Seq": 12, + # "Host": "/usr/bin/ssh foo@lo", + # "Starttime": 1608344711.743, + # "JobRuntime": 0.01, + # "Send": 0, + # "Receive": 10, + # "Exitval": 0, + # "Signal": 0, + # "Command": "echo 1", + # "V": [ + # "1" + # ], + # "Stdout": "1\n", + # "Stderr": "" + # } + # + printf($Global::csv_fh + q({ "Seq": %s, "Host": "%s", "Starttime": %s, "JobRuntime": %s, ). + q("Send": %s, "Receive": %s, "Exitval": %s, "Signal": %s, ). + q("Command": "%s", "V": [ %s ], "Stdout": "%s", "Stderr": "%s" }). + "\n", + $self->seq(), + jsonquote($self->sshlogin()->string()), + $self->starttime(), sprintf("%0.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd, + (join ",", + map { '"'.jsonquote($_).'"' } @$record_ref[1..$#$record_ref], + ), + jsonquote($self->{'output'}{1}), + jsonquote($self->{'output'}{2}) + ); + } +} + +{ + my $header_printed; + + sub print_csv($) { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = join " ", @{$self->{'commandline'}}; + } + my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'}; + + if(not $header_printed) { + # Variable headers + # Normal => V1..Vn + # --header : => first value from column + my @V; + if($opt::header) { + my $i = 1; + @V = (map { $Global::input_source_header{$i++} } + @$record_ref[1..$#$record_ref]); + } else { + my $V = "V1"; + @V = (map { $V++ } @$record_ref[1..$#$record_ref]); + } + print $Global::csv_fh + (map { $$_ } + combine_ref("Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command", + @V, + "Stdout","Stderr" + )),"\n"; + $header_printed++; + } + # Memory optimization: Overwrite with the joined output + $self->{'output'}{1} = join("", @{$self->{'output'}{1}}); + $self->{'output'}{2} = join("", @{$self->{'output'}{2}}); + print $Global::csv_fh + (map { $$_ } + combine_ref + ($self->seq(), + $self->sshlogin()->string(), + $self->starttime(), sprintf("%0.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), \$cmd, + \@$record_ref[1..$#$record_ref], + \$self->{'output'}{1}, + \$self->{'output'}{2})),"\n"; + } +} + +sub combine_ref($) { + # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu) + my @part = @_; + my $sep = $Global::csvsep; + my $quot = '"'; + my @out = (); + + my $must_be_quoted; + for my $column (@part) { + # Memory optimization: Content transferred as reference + if(ref $column ne "SCALAR") { + # Convert all columns to scalar references + my $v = $column; + $column = \$v; + } + if(not defined $$column) { + $$column = ''; + next; + } + + $must_be_quoted = 0; + + if($$column =~ s/$quot/$quot$quot/go){ + # Replace " => "" + $must_be_quoted ||=1; + } + if($$column =~ /[\s\Q$sep\E]/o){ + # Put quotes around if the column contains , + $must_be_quoted ||=1; + } + + $Global::use{"bytes"} ||= eval "use bytes; 1;"; + if ($$column =~ /\0/) { + # Contains \0 => put quotes around + $must_be_quoted ||=1; + } + if($must_be_quoted){ + push @out, \$sep, \$quot, $column, \$quot; + } else { + push @out, \$sep, $column; + } + } + # Remove the first $sep: ,val,"val" => val,"val" + shift @out; + return @out; +} + +sub print_files($) { + # Print the name of the file containing stdout on stdout + # Uses: + # $opt::pipe + # $opt::group = Print when job is done + # $opt::linebuffer = Print ASAP + # Returns: N/A + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if($opt::compress) { + # Kill the decompressor which will not be needed + CORE::kill "TERM", $self->fh($fdno,"rpid"); + } + close $in_fh; + + if($opt::pipe and $self->virgin()) { + # Nothing was printed to this job: + # cleanup unused tmp files because --files was set + for my $fdno (1,2) { + ::rm($self->fh($fdno,"name")); + ::rm($self->fh($fdno,"unlink")); + } + } elsif($fdno == 1 and $self->fh($fdno,"name")) { + print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n"; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, + $self->tag(), $self->fh($fdno,"name"); + } + $self->add_returnsize(-s $self->fh($fdno,"name")); + # Mark as printed - do not print again + $self->set_fh($fdno,"name",undef); + } +} + + +# Different print types +# (--ll | --ll --bar | --lb | --group | --parset | --sql-worker) +# (--files | --results (.json|.csv|.tsv) ) +# --color-failed +# --color +# --keep-order +# --tag +# --bar +{ + my ($up,$currow,$maxrow); + my ($minvisible,%print_later,%notvisible); + my (%binmodeset,%tab); + + sub latestline_init() { + # cursor_up cuu1 = up one line + $up = `sh -c "tput cuu1 </dev/tty" 2>/dev/null`; + chomp($up); + $currow = 1; + $maxrow = 1; + $minvisible = 1; + for(0..8) { + $tab{$_} = " "x(8-($_%8)); + } + } + + sub print_latest_line($) { + my $self = shift; + my $out_fh = shift; + my $row = $self->row(); + # Is row visible? + if(not ($minvisible <= $row + and + $row < $minvisible + ::terminal_rows() - 1)) { + return; + } + if(not $binmodeset{$out_fh}++) { + # Enable utf8 if possible + eval q{ binmode $out_fh, "encoding(utf8)"; }; + } + my ($color,$reset_color) = $self->color(); + # Strings with TABs give the wrong length. Untabify strings + my $termcol = ::terminal_columns(); + my $untabify_tag = ::decode_utf8($self->untabtag()); + my $taglen = length $untabify_tag; + my $truncated_tag = ""; + my $strlen = $termcol - $taglen; + my $untabify_str = ::decode_utf8($self->{$out_fh,'latestline'}); + $untabify_str =~ s/\t/$tab{$-[0]%8}/g; + my $strspc = $strlen - length $untabify_str; + $strlen--; + if($strlen < 0) { $strlen = 0;} + # Line is shorter than terminal width: add " " + # Line is longer than terminal width: add ">" + my $truncated = ($strspc > 0) ? " " : ">"; + if($taglen > $termcol) { + # Tag is longer than terminal width: add ">" to tag + # Remove $truncated (it will not be shown at all) + $taglen = $termcol - 1; + $truncated_tag = ">"; + $truncated = ""; + } + + $maxrow = $row > $maxrow ? $row : $maxrow; + printf($out_fh + ("%s%s%s". # up down \r + "%.${taglen}s%s". # tag + "%s%.${strlen}s%s%s". # color + line + "%s" # down + ), + "$up"x($currow - $row), + "\n"x($row - $currow), + "\r", $untabify_tag,$truncated_tag, + $color, $untabify_str, $truncated, $reset_color, + "\n"x($maxrow - $row + 1)); + $currow = $maxrow + 1; + } + + sub print_linebuffer($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + if(defined $self->{'exitstatus'}) { + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($opt::compress) { + if($?) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + # Blocked reading in final round + for my $fdno (1,2) { ::set_fh_blocking($self->fh($fdno,'r')); } + } + if($opt::latestline) { $print_later{$self->row()} = $self; } + } + if(not $self->virgin()) { + if($opt::files or ($opt::results and not $Global::csvsep)) { + # Print filename + if($fdno == 1 and not $self->fh($fdno,"printed")) { + print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n"; + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, $self->tag(), + $self->fh($fdno,"name")); + } + $self->set_fh($fdno,"printed",1); + } + # No need for reading $in_fh, as it is from "cat >/dev/null" + } else { + # Read halflines and print full lines + my $outputlength = 0; + my $halfline_ref = $self->{'halfline'}{$fdno}; + my ($buf,$i,$rv); + # 1310720 gives 1.2 GB/s + # 131072 gives 0.9 GB/s + # The optimal block size differs + # It has been measured on: + # AMD 6376: 60800 (>70k is also reasonable) + # Intel i7-3632QM: 52-59k, 170-175k + # seq 64 | ppar --_test $1 --lb \ + # 'yes {} `seq 1000`|head -c 10000000' >/dev/null + while($rv = sysread($in_fh, $buf, 60800)) { + $outputlength += $rv; + # TODO --recend + # Treat both \n and \r as line end + # Only test for \r if there is no \n + # Test: + # perl -e '$a="x"x1000000; + # $b="$a\r$a\n$a\r$a\n"; + # map { print $b,$_ } 1..10' + $i = ((rindex($buf,"\n")+1) || (rindex($buf,"\r")+1)); + if($i) { + if($opt::latestline) { + # Keep the latest full line + my $l = join('', @$halfline_ref, + substr($buf,0,$i-1)); + my $j = ((rindex($l,"\n")+1) || + (rindex($l,"\r")+1)); + $self->{$out_fh,'latestline'} = substr($l,$j); + # Remove the processed part + # by keeping the unprocessed part + @$halfline_ref = (substr($buf,$i)); + } else { + # One or more complete lines were found + if($Global::color) { + my $print = join("",@$halfline_ref, + substr($buf,0,$i)); + chomp($print); + my ($color,$reset_color) = $self->color(); + my $colortag = $color.$self->tag(); + # \n => reset \n color tag + $print =~ s{([\n\r])(?=.|$)} + {$reset_color$1$colortag}gs; + print($out_fh $colortag, $print, + $reset_color, "\n"); + } elsif($opt::tag or defined $opt::tagstring) { + # Replace ^ with $tag within the full line + if($Global::cache_replacement_eval) { + # Replace with the same value for tag + my $tag = $self->tag(); + unshift @$halfline_ref, $tag; + # TODO --recend that can be partially in + # @$halfline_ref + substr($buf,0,$i-1) =~ + s/([\n\r])(?=.|$)/$1$tag/gs; + } else { + # Replace with freshly computed tag-value + unshift @$halfline_ref, $self->tag(); + substr($buf,0,$i-1) =~ + s/([\n\r])(?=.|$)/$1.$self->tag()/gse; + } + # The length changed, + # so find the new ending pos + $i = ::max((rindex($buf,"\n")+1), + (rindex($buf,"\r")+1)); + # Print the partial line (halfline) + # and the last half + print $out_fh @$halfline_ref, substr($buf,0,$i); + } else { + # Print the partial line (halfline) + # and the last half + print $out_fh @$halfline_ref, substr($buf,0,$i); + } + # Buffer in memory for SQL and CSV-output + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, + @$halfline_ref, substr($buf,0,$i)); + } + # Remove the printed part by keeping the unprinted + @$halfline_ref = (substr($buf,$i)); + } + } else { + # No newline, so append to the halfline + push @$halfline_ref, $buf; + } + } + $self->add_returnsize($outputlength); + if($opt::latestline) { $self->print_latest_line($out_fh); } + } + if(defined $self->{'exitstatus'}) { + if($opt::latestline) { + # Force re-computing color if --colorfailed + if($opt::colorfailed) { delete $self->{'color'}; } + $self->print_latest_line($out_fh); + # Print latest line from jobs that are already done + while($print_later{$minvisible}) { + $print_later{$minvisible}->print_latest_line($out_fh); + delete $print_later{$minvisible}; + $minvisible++; + } + # Print latest line from jobs that are on screen now + for(my $row = $minvisible; + $row < $minvisible -1 + ::terminal_rows(); + $row++) { + $print_later{$row} and + $print_later{$row}->print_latest_line($out_fh); + } + } + if($opt::files or ($opt::results and not $Global::csvsep)) { + $self->add_returnsize(-s $self->fh($fdno,"name")); + } else { + # If the job is dead: print the remaining partial line + # read remaining + my $halfline_ref = $self->{'halfline'}{$fdno}; + if(grep /./, @$halfline_ref) { + my $returnsize = 0; + for(@{$self->{'halfline'}{$fdno}}) { + $returnsize += length $_; + } + $self->add_returnsize($returnsize); + if($opt::tag or defined $opt::tagstring) { + # Prepend $tag the the remaining half line + unshift @$halfline_ref, $self->tag(); + } + # Print the partial line (halfline) + print $out_fh @{$self->{'halfline'}{$fdno}}; + # Buffer in memory for SQL and CSV-output + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, @$halfline_ref); + } + @$halfline_ref = (); + } + } + if($self->fh($fdno,"rpid") and + CORE::kill 0, $self->fh($fdno,"rpid")) { + # decompress still running + } else { + # decompress done: close fh + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } + } + } + } +} + +sub free_ressources() { + my $self = shift; + if(not $opt::ungroup) { + my $fh; + for my $fdno (sort { $a <=> $b } keys %Global::fh) { + $fh = $self->fh($fdno,"w"); + $fh and close $fh; + $fh = $self->fh($fdno,"r"); + $fh and close $fh; + } + } +} + +sub print_parset($) { + # Wrap output with shell script code to set as variables + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $outputlength = 0; + + ::debug("parset","print $Global::parset"); + if($Global::parset eq "assoc") { + # Start: (done in parse_parset()) + # eval "`echo 'declare -A myassoc; myassoc=( + # Each: (done here) + # [$'a\tb']=$'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # )'`" + print '[',::Q($self->{'commandline'}-> + replace_placeholders(["\257<\257>"],0,0)),']='; + } elsif($Global::parset eq "array") { + # Start: (done in parse_parset()) + # eval "`echo 'myassoc=( + # Each: (done here) + # $'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # )'`" + } elsif($Global::parset eq "var") { + # Start: (done in parse_parset()) + # <empty> + # Each: (done here) + # var=$'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # <empty> + if(not @Global::parset_vars) { + ::error("Too few named destination variables"); + ::wait_and_exit(255); + } + print shift @Global::parset_vars,"="; + } + local $/ = "\n"; + my $tag = $self->tag(); + my @out; + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs; + push @out, $tag,$_; + } + # Remove last newline + # This often makes it easier to use the output in shell + @out and ${out[$#out]} =~ s/\n$//s; + print ::Q(join("",@out)),"\n"; + return $outputlength; +} + +sub print_normal($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if(not $self->virgin()) { + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $outputlength = 0; + my @output; + + if($Global::parset and $fdno == 1) { + $outputlength += $self->print_parset($fdno,$in_fh,$out_fh); + } elsif(defined $opt::tag or defined $opt::tagstring + or $Global::color or $opt::colorfailed) { + if($Global::color or $opt::colorfailed) { + my ($color,$reset_color) = $self->color(); + my $colortag = $color.$self->tag(); + # Read line by line + local $/ = "\n"; + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + chomp; + s{([\n\r])(?=.|$)}{$reset_color$1$colortag}gs; + print $out_fh $colortag,$_,$reset_color,"\n"; + } + } else { + my $tag = $self->tag(); + my $pretag = 1; + my $s; + while(sysread($in_fh,$buf,32767)) { + $outputlength += length $buf; + $buf =~ s/(?<=[\r\n])(?=.)/$tag/gs; + print $out_fh ($pretag ? $tag : ""),$buf; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, + ($pretag ? $tag : ""),$buf; + } + # Should next print start with a tag? + $s = substr($buf, -1); + # This is faster than ($s eq "\n") || ($s eq "\r") + $pretag = ($s eq "\n") ? 1 : ($s eq "\r"); + } + } + } else { + # Most efficient way of copying data from $in_fh to $out_fh + # Intel i7-3632QM: 25k- + while(sysread($in_fh,$buf,32767)) { + print $out_fh $buf; + $outputlength += length $buf; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, $buf; + } + } + } + if($fdno == 1) { + $self->add_returnsize($outputlength); + } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } +} + +sub print_results($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if(not $self->virgin()) { + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $outputlength = 0; + my @output; + + if($Global::membuffer) { + # Read data into membuffer + if($opt::tag or $opt::tagstring) { + # Read line by line + local $/ = "\n"; + my $tag = $self->tag(); + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs; + push @{$self->{'output'}{$fdno}}, $tag, $_; + } + } else { + # Most efficient way of copying data from $in_fh to $out_fh + while(sysread($in_fh,$buf,60000)) { + $outputlength += length $buf; + push @{$self->{'output'}{$fdno}}, $buf; + } + } + } else { + # Not membuffer: No need to read the file + if($opt::compress) { + $outputlength = -1; + } else { + # Determine $outputlength = file length + seek($in_fh, 0, 2) || ::die_bug("cannot seek result"); + $outputlength = tell($in_fh); + } + } + if($fdno == 1) { $self->add_returnsize($outputlength); } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } +} + +sub print_joblog($) { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = $self->wrapped(); + } + # Newlines make it hard to parse the joblog + $cmd =~ s/\n/\0/g; + print $Global::joblog + join("\t", $self->seq(), $self->sshlogin()->string(), + $self->starttime(), sprintf("%10.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd + ). "\n"; + flush $Global::joblog; + $self->set_job_in_joblog(); +} + +sub tag($) { + my $self = shift; + if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) { + if(defined $opt::tag or defined $opt::tagstring) { + $self->{'tag'} = + ($self->{'commandline'}-> + replace_placeholders([$opt::tagstring],0,0)). + "\t"; + } else { + # No tag + $self->{'tag'} = ""; + } + } + return $self->{'tag'}; +} + +sub untabtag($) { + # tag with \t replaced with spaces + my $self = shift; + my $tag = $self->tag(); + if(not defined $self->{'untab'}{$tag}) { + my $t = $tag; + $t =~ s/\t/" "x(8-($-[0]%8))/eg; + $self->{'untab'}{$tag} = $t; + } + return $self->{'untab'}{$tag}; +} + +{ + my (@color,$eol,$reset_color,$init); + + sub init_color() { + if(not $init) { + $init = 1; + # color combinations that are readable: black/white text + # on colored background, but not white on yellow + my @color_combinations = + # Force each color code to have the same length in chars + # This will make \t work as expected + ((map { [sprintf("%03d",$_),"000"] } + 6..7,9..11,13..15,40..51,75..87,113..123,147..159, + 171..182,185..231,249..254), + (map { [sprintf("%03d",$_),231] } + 1..9,12..13,16..45,52..81,88..114,124..149, + 160..178,180,182..184,196..214,232..250)); + # reorder list so adjacent colors are dissimilar + # %23 and %7 were found experimentally + @color_combinations = @color_combinations[ + sort { ($a%23 <=> $b%23) or ($b%7 <=> $a%7) } + 0..$#color_combinations + ]; + @color = map { + # TODO Can this be done with `tput` codes? + "\033[48;5;".$_->[0].";38;5;".$_->[1]."m" + } @color_combinations; + + # clr_eol el = clear to end of line + $eol = `sh -c "tput el </dev/tty" 2>/dev/null`; + chomp($eol); + if($eol eq "") { $eol = "\033[K"; } + # exit_attribute_mode sgr0 = turn off all attributes + $reset_color = `sh -c "tput sgr0 </dev/tty" 2>/dev/null`; + chomp($reset_color); + if($reset_color eq "") { $reset_color = "\033[m"; } + } + } + + sub color($) { + my $self = shift; + if(not defined $self->{'color'}) { + if($Global::color) { + # Choose a value based on the seq + $self->{'color'} = $color[$self->seq() % ($#color+1)].$eol; + $self->{'reset_color'} = $reset_color; + } else { + $self->{'color'} = ""; + $self->{'reset_color'} = ""; + } + if($opt::colorfailed) { + if($self->exitstatus()) { + # White on Red + # Can this be done more generally? + $self->{'color'} = + "\033[48;5;"."196".";38;5;"."231"."m".$eol; + $self->{'reset_color'} = $reset_color; + } + } + } + return ($self->{'color'},$self->{'reset_color'}); + } +} + +sub hostgroups($) { + my $self = shift; + if(not defined $self->{'hostgroups'}) { + $self->{'hostgroups'} = + $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; + } + return @{$self->{'hostgroups'}}; +} + +sub exitstatus($) { + my $self = shift; + return $self->{'exitstatus'}; +} + +sub set_exitstatus($$) { + my $self = shift; + my $exitstatus = shift; + if($exitstatus) { + # Overwrite status if non-zero + $self->{'exitstatus'} = $exitstatus; + } else { + # Set status but do not overwrite + # Status may have been set by --timeout + $self->{'exitstatus'} ||= $exitstatus; + } + $opt::sqlworker and + $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(), + $exitstatus); +} + +sub reset_exitstatus($) { + my $self = shift; + undef $self->{'exitstatus'}; +} + +sub exitsignal($) { + my $self = shift; + return $self->{'exitsignal'}; +} + +sub set_exitsignal($$) { + my $self = shift; + my $exitsignal = shift; + $self->{'exitsignal'} = $exitsignal; + $opt::sqlworker and + $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(), + $exitsignal); +} + +{ + my $total_jobs; + + sub should_we_halt { + # Should we halt? Immediately? Gracefully? + # Returns: N/A + my $job = shift; + my $limit; + if($Global::semaphore) { + # Emulate Bash's +128 if there is a signal + $Global::halt_exitstatus = + ($job->exitstatus() + or + $job->exitsignal() ? $job->exitsignal() + 128 : 0); + } + if($job->exitstatus() or $job->exitsignal()) { + # Job failed + $Global::exitstatus++; + $Global::total_failed++; + if($Global::halt_fail) { + ::status("$Global::progname: This job failed:", + $job->replaced()); + $limit = $Global::total_failed; + } + } elsif($Global::halt_success) { + ::status("$Global::progname: This job succeeded:", + $job->replaced()); + $limit = $Global::total_completed - $Global::total_failed; + } + if($Global::halt_done) { + ::status("$Global::progname: This job finished:", + $job->replaced()); + $limit = $Global::total_completed; + } + if(not defined $limit) { + return "" + } + # --halt # => 1..100 (number of jobs failed, 101 means > 100) + # --halt % => 1..100 (pct of jobs failed) + if($Global::halt_pct and not $Global::halt_count) { + $total_jobs ||= $Global::JobQueue->total_jobs(); + # From the pct compute the number of jobs that must fail/succeed + $Global::halt_count = $total_jobs * $Global::halt_pct; + } + if($limit >= $Global::halt_count) { + # At least N jobs have failed/succeded/completed + # or at least N% have failed/succeded/completed + # So we should prepare for exit + if($Global::halt_fail or $Global::halt_done) { + # Set exit status + if(not defined $Global::halt_exitstatus) { + if($Global::halt_pct) { + # --halt now,fail=X% or soon,fail=X% + # --halt now,done=X% or soon,done=X% + $Global::halt_exitstatus = + ::ceil($Global::total_failed / $total_jobs * 100); + } elsif($Global::halt_count) { + # --halt now,fail=X or soon,fail=X + # --halt now,done=X or soon,done=X + $Global::halt_exitstatus = + ::min($Global::total_failed,101); + } + if($Global::halt_count and $Global::halt_count == 1) { + # --halt now,fail=1 or soon,fail=1 + # --halt now,done=1 or soon,done=1 + # Emulate Bash's +128 if there is a signal + $Global::halt_exitstatus = + ($job->exitstatus() + or + $job->exitsignal() ? $job->exitsignal() + 128 : 0); + } + } + ::debug("halt","Pct: ",$Global::halt_pct, + " count: ",$Global::halt_count, + " status: ",$Global::halt_exitstatus,"\n"); + } elsif($Global::halt_success) { + $Global::halt_exitstatus = 0; + } + if($Global::halt_when eq "soon") { + $Global::start_no_new_jobs ||= 1; + if(scalar(keys %Global::running) > 0) { + # Only warn if there are more jobs running + ::status + ("$Global::progname: Starting no more jobs. ". + "Waiting for ". (keys %Global::running). + " jobs to finish."); + } + } + return($Global::halt_when); + } + return ""; + } +} + + +package CommandLine; + +sub new($) { + my $class = shift; + my $seq = shift; + my $commandref = shift; + $commandref || die; + my $arg_queue = shift; + my $context_replace = shift; + my $max_number_of_args = shift; # for -N and normal (-n1) + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my $replacecount_ref = shift; + my $len_ref = shift; + my %replacecount = %$replacecount_ref; + my %len = %$len_ref; + for (keys %$replacecount_ref) { + # Total length of this replacement string {} replaced with all args + $len{$_} = 0; + } + return bless { + 'command' => $commandref, + 'seq' => $seq, + 'len' => \%len, + 'arg_list' => [], + 'arg_list_flat' => [], + 'arg_list_flat_orig' => [undef], + 'arg_queue' => $arg_queue, + 'max_number_of_args' => $max_number_of_args, + 'replacecount' => \%replacecount, + 'context_replace' => $context_replace, + 'transfer_files' => $transfer_files, + 'return_files' => $return_files, + 'template_names' => $template_names, + 'template_contents' => $template_contents, + 'replaced' => undef, + }, ref($class) || $class; +} + +sub flush_cache() { + my $self = shift; + for my $arglist (@{$self->{'arg_list'}}) { + for my $arg (@$arglist) { + $arg->flush_cache(); + } + } + $self->{'arg_queue'}->flush_cache(); + $self->{'replaced'} = undef; +} + +sub seq($) { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq($$) { + my $self = shift; + $self->{'seq'} = shift; +} + +sub slot($) { + # Find the number of a free job slot and return it + # Uses: + # @Global::slots - list with free jobslots + # Returns: + # $jobslot = number of jobslot + my $self = shift; + if(not $self->{'slot'}) { + if(not @Global::slots) { + # $max_slot_number will typically be $Global::max_jobs_running + push @Global::slots, ++$Global::max_slot_number; + } + $self->{'slot'} = shift @Global::slots; + } + return $self->{'slot'}; +} + +{ + my $already_spread; + my $darwin_max_len; + + sub populate($) { + # Add arguments from arg_queue until the number of arguments or + # max line length is reached + # Uses: + # $Global::usable_command_line_length + # $opt::cat + # $opt::fifo + # $Global::JobQueue + # $opt::m + # $opt::X + # $Global::max_jobs_running + # Returns: N/A + my $self = shift; + my $next_arg; + my $max_len = $Global::usable_command_line_length || die; + if($^O eq "darwin") { + # Darwin's limit is affected by: + # * number of environment names (variables+functions) + # * size of environment + # * the length of arguments: + # a one-char argument lowers the limit by 5 + # To be safe assume all arguments are one-char + # The max_len is cached between runs, but if the size of + # the environment is different we need to recompute the + # usable max length for this run of GNU Parallel + # See https://unix.stackexchange.com/a/604943/2972 + if(not $darwin_max_len) { + my $envc = (keys %ENV); + my $envn = length join"",(keys %ENV); + my $envv = length join"",(values %ENV); + $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10; + ::debug("init", + "length: $darwin_max_len ". + "3+($max_len - $envn - $envv)/5 - $envc*2"); + } + $max_len = $darwin_max_len; + } + if($opt::cat or $opt::fifo) { + # Get the empty arg added by --pipepart (if any) + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + # $PARALLEL_TMP will point to a tempfile that will be used as {} + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}-> + unget([Arg->new('$PARALLEL_TMP')]); + } + while (not $self->{'arg_queue'}->empty()) { + $next_arg = $self->{'arg_queue'}->get(); + if(not defined $next_arg) { + next; + } + $self->push($next_arg); + if($self->len() >= $max_len) { + # Command length is now > max_length + # If there are arguments: remove the last + # If there are no arguments: Error + # TODO stuff about -x opt_x + if($self->number_of_args() > 1) { + # There is something to work on + $self->{'arg_queue'}->unget($self->pop()); + last; + } else { + my $args = join(" ", map { $_->orig() } @$next_arg); + ::error("Command line too long (". + $self->len(). " >= ". + $max_len. + ") at input ". + $self->{'arg_queue'}->arg_number(). + ": ". + ((length $args > 50) ? + (substr($args,0,50))."..." : + $args)); + $self->{'arg_queue'}->unget($self->pop()); + ::wait_and_exit(255); + } + } + + if(defined $self->{'max_number_of_args'}) { + if($self->number_of_args() >= $self->{'max_number_of_args'}) { + last; + } + } + } + if(($opt::m or $opt::X) and not $already_spread + and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { + # -m or -X and EOF => Spread the arguments over all jobslots + # (unless they are already spread) + $already_spread ||= 1; + if($self->number_of_args() > 1) { + $self->{'max_number_of_args'} = + ::ceil($self->number_of_args()/$Global::max_jobs_running); + $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = + $self->{'max_number_of_args'}; + $self->{'arg_queue'}->unget($self->pop_all()); + while($self->number_of_args() < $self->{'max_number_of_args'}) { + $self->push($self->{'arg_queue'}->get()); + } + } + $Global::JobQueue->flush_total_jobs(); + } + + if($opt::sqlmaster) { + # Insert the V1..Vn for this $seq in SQL table + # instead of generating one + $Global::sql->insert_records($self->seq(), $self->{'command'}, + $self->{'arg_list_flat_orig'}); + } + } +} + +sub push($) { + # Add one or more records as arguments + # Returns: N/A + my $self = shift; + my $record = shift; + push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record; + push @{$self->{'arg_list_flat'}}, @$record; + push @{$self->{'arg_list'}}, $record; + # Make @arg available for {= =} + *Arg::arg = $self->{'arg_list_flat_orig'}; + + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + my $col; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + if($perlexpr =~ /^(-?\d+)(?:\D.*|)$/) { + # Positional replacement string + # Deal with negative positional replacement string + $col = ($1 < 0) ? $1 : $1-1; + if(defined($record->[$col])) { + $self->{'len'}{$perlexpr} += + length $record->[$col]->replace($perlexpr,$quote_arg,$self); + } + } else { + for my $arg (@$record) { + if(defined $arg) { + $self->{'len'}{$perlexpr} += + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + } +} + +sub pop($) { + # Remove last argument + # Returns: + # the last record + my $self = shift; + my $record = pop @{$self->{'arg_list'}}; + # pop off arguments from @$record + splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1; + splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1; + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + for my $perlexpr (keys %{$self->{'replacecount'}}) { + if($perlexpr =~ /^(\d+) /) { + # Positional + defined($record->[$1-1]) or next; + $self->{'len'}{$perlexpr} -= + length $record->[$1-1]->replace($perlexpr,$quote_arg,$self); + } else { + for my $arg (@$record) { + if(defined $arg) { + $self->{'len'}{$perlexpr} -= + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + } + return $record; +} + +sub pop_all($) { + # Remove all arguments and zeros the length of replacement perlexpr + # Returns: + # all records + my $self = shift; + my @popped = @{$self->{'arg_list'}}; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + $self->{'len'}{$perlexpr} = 0; + } + $self->{'arg_list'} = []; + $self->{'arg_list_flat_orig'} = [undef]; + $self->{'arg_list_flat'} = []; + return @popped; +} + +sub number_of_args($) { + # The number of records + # Returns: + # number of records + my $self = shift; + # This is really the number of records + return $#{$self->{'arg_list'}}+1; +} + +sub number_of_recargs($) { + # The number of args in records + # Returns: + # number of args records + my $self = shift; + my $sum = 0; + my $nrec = scalar @{$self->{'arg_list'}}; + if($nrec) { + $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); + } + return $sum; +} + +sub args_as_string($) { + # Returns: + # all unmodified arguments joined with ' ' (similar to {}) + my $self = shift; + return (join " ", map { $_->orig() } + map { @$_ } @{$self->{'arg_list'}}); +} + +sub results_out($) { + sub max_file_name_length { + # Figure out the max length of a subdir + # TODO and the max total length + # Ext4 = 255,130816 + # Uses: + # $Global::max_file_length is set + # Returns: + # $Global::max_file_length + my $testdir = shift; + + my $upper = 100_000_000; + # Dir length of 8 chars is supported everywhere + my $len = 8; + my $dir = "d"x$len; + do { + rmdir($testdir."/".$dir); + $len *= 16; + $dir = "d"x$len; + } while ($len < $upper and mkdir $testdir."/".$dir); + # Then search for the actual max length between $len/16 and $len + my $min = $len/16; + my $max = $len; + while($max-$min > 5) { + # If we are within 5 chars of the exact value: + # it is not worth the extra time to find the exact value + my $test = int(($min+$max)/2); + $dir = "d"x$test; + if(mkdir $testdir."/".$dir) { + rmdir($testdir."/".$dir); + $min = $test; + } else { + $max = $test; + } + } + $Global::max_file_length = $min; + return $min; + } + + my $self = shift; + my $out = $self->replace_placeholders([$opt::results],0,0); + if($out eq $opt::results) { + # $opt::results simple string: Append args_as_dirname + my $args_as_dirname = $self->args_as_dirname(); + # Output in: prefix/name1/val1/name2/val2/stdout + $out = $opt::results."/".$args_as_dirname; + if(-d $out or eval{ File::Path::mkpath($out); }) { + # OK + } else { + # mkpath failed: Argument probably too long. + # Set $Global::max_file_length, which will keep the individual + # dir names shorter than the max length + max_file_name_length($opt::results); + $args_as_dirname = $self->args_as_dirname(); + # prefix/name1/val1/name2/val2/ + $out = $opt::results."/".$args_as_dirname; + File::Path::mkpath($out); + } + $out .="/"; + } else { + if($out =~ m:/$:) { + # / = dir + if(-d $out or eval{ File::Path::mkpath($out); }) { + # OK + } else { + ::error("Cannot make dir '$out'."); + ::wait_and_exit(255); + } + } else { + $out =~ m:(.*)/:; + File::Path::mkpath($1); + } + } + return $out; +} + +sub args_as_dirname($) { + # Returns: + # all unmodified arguments joined with '/' (similar to {}) + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + # If $Global::max_file_length: Keep subdirs < $Global::max_file_length + my $self = shift; + my @res = (); + + for my $rec_ref (@{$self->{'arg_list'}}) { + # If headers are used, sort by them. + # Otherwise keep the order from the command line. + my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); + for my $n (@header_indexes_sorted) { + CORE::push(@res, + $Global::input_source_header{$n}, + map { my $s = $_; + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + $s =~ s/\\/\\\\/g; + $s =~ s/\t/\\t/g; + $s =~ s/\0/\\0/g; + $s =~ s:/:\\_:g; + if($Global::max_file_length) { + # Keep each subdir shorter than the longest + # allowed file name + $s = substr($s,0,$Global::max_file_length); + } + $s; } + $rec_ref->[$n-1]->orig()); + } + } + return join "/", @res; +} + +sub header_indexes_sorted($) { + # Sort headers first by number then by name. + # E.g.: 1a 1b 11a 11b + # Returns: + # Indexes of %Global::input_source_header sorted + my $max_col = shift; + + no warnings 'numeric'; + for my $col (1 .. $max_col) { + # Make sure the header is defined. If it is not: use column number + if(not defined $Global::input_source_header{$col}) { + $Global::input_source_header{$col} = $col; + } + } + my @header_indexes_sorted = sort { + # Sort headers numerically then asciibetically + $Global::input_source_header{$a} <=> $Global::input_source_header{$b} + or + $Global::input_source_header{$a} cmp $Global::input_source_header{$b} + } 1 .. $max_col; + return @header_indexes_sorted; +} + +sub len($) { + # Uses: + # @opt::shellquote + # The length of the command line with args substituted + my $self = shift; + my $len = 0; + # Add length of the original command with no args + # Length of command w/ all replacement args removed + $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; + ::debug("length", "noncontext + command: $len\n"); + # MacOS has an overhead of 8 bytes per argument + my $darwin = ($^O eq "darwin") ? 8 : 0; + my $recargs = $self->number_of_recargs(); + if($self->{'context_replace'}) { + # Context is duplicated for each arg + $len += $recargs * $self->{'len'}{'context'}; + for my $replstring (keys %{$self->{'replacecount'}}) { + # If the replacements string is more than once: mulitply its length + $len += $self->{'len'}{$replstring} * + $self->{'replacecount'}{$replstring}; + ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", + $self->{'replacecount'}{$replstring}, "\n"); + } + # echo 11 22 33 44 55 66 77 88 99 1010 + # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 + # 5 + ctxgrp*arg + ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, + " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); + # Add space between context groups + $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); + if($darwin) { + $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin; + } + } else { + # Each replacement string may occur several times + # Add the length for each time + $len += 1*$self->{'len'}{'context'}; + ::debug("length", "context+noncontext + command: $len\n"); + for my $replstring (keys %{$self->{'replacecount'}}) { + # (space between recargs + length of replacement) + # * number this replacement is used + $len += ($recargs -1 + $self->{'len'}{$replstring}) * + $self->{'replacecount'}{$replstring}; + if($darwin) { + $len += ($recargs * $self->{'replacecount'}{$replstring} + * $darwin); + } + } + } + if(defined $Global::parallel_env) { + # If we are using --env, add the prefix for that, too. + $len += length $Global::parallel_env; + } + if($Global::quoting) { + # Pessimistic length if -q is set + # Worse than worst case: ' => "'" + " => '"' + # TODO can we count the number of expanding chars? + # and count them in arguments, too? + $len *= 3; + } + if(@opt::shellquote) { + # Pessimistic length if --shellquote is set + # Worse than worst case: ' => "'" + for(@opt::shellquote) { + $len *= 3; + } + $len *= 5; + } + if(@opt::sshlogin) { + # Pessimistic length if remote + # Worst case is BASE64 encoding 3 bytes -> 4 bytes + $len = int($len*4/3); + } + return $len; +} + +sub replaced($) { + # Uses: + # $Global::quote_replace + # $Global::quoting + # Returns: + # $replaced = command with place holders replaced and prepended + my $self = shift; + if(not defined $self->{'replaced'}) { + # Don't quote arguments if the input is the full command line + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP + $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg; + $self->{'replaced'} = $self-> + replace_placeholders($self->{'command'},$Global::quoting, + $quote_arg); + my $len = length $self->{'replaced'}; + if ($len != $self->len()) { + ::debug("length", $len, " != ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } else { + ::debug("length", $len, " == ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } + } + return $self->{'replaced'}; +} + +sub replace_placeholders($$$$) { + # Replace foo{}bar with fooargbar + # Input: + # $targetref = command as shell words + # $quote = should everything be quoted? + # $quote_arg = should replaced arguments be quoted? + # Uses: + # @Arg::arg = arguments as strings to be use in {= =} + # Returns: + # @target with placeholders replaced + my $self = shift; + my $targetref = shift; + my $quote = shift; + my $quote_arg = shift; + my %replace; + + # Token description: + # \0spc = unquoted space + # \0end = last token element + # \0ign = dummy token to be ignored + # \257<...\257> = replacement expression + # " " = quoted space, that splits -X group + # text = normal text - possibly part of -X group + my $spacer = 0; + my @tokens = grep { length $_ > 0 } map { + if(/^\257<|^ $/) { + # \257<...\257> or space + $_ + } else { + # Split each space/tab into a token + split /(?=\s)|(?<=\s)/ + } + } + # Split \257< ... \257> into own token + map { split /(?=\257<)|(?<=\257>)/ } + # Insert "\0spc" between every element + # This space should never be quoted + map { $spacer++ ? ("\0spc",$_) : $_ } + map { $_ eq "" ? "\0empty" : $_ } + @$targetref; + + if(not @tokens) { + # @tokens is empty: Return empty array + return @tokens; + } + ::debug("replace", "Tokens ".join":",@tokens,"\n"); + # Make it possible to use $arg[2] in {= =} + *Arg::arg = $self->{'arg_list_flat_orig'}; + # Flat list: + # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] + # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ] + if(not @{$self->{'arg_list_flat'}}) { + @{$self->{'arg_list_flat'}} = Arg->new(""); + } + my $argref = $self->{'arg_list_flat'}; + # Number of arguments - used for positional arguments + my $n = $#$argref+1; + + # $self is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = $self; + # @replaced = tokens with \257< \257> replaced + my @replaced; + if($self->{'context_replace'}) { + my @ctxgroup; + for my $t (@tokens,"\0end") { + # \0end = last token was end of tokens. + if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") { + # Context group complete: Replace in it + if(grep { /^\257</ } @ctxgroup) { + # Context group contains a replacement string: + # Copy once per arg + my $space = "\0ign"; + for my $arg (@$argref) { + my $normal_replace; + # Push output + # Put unquoted space before each context group + # except the first + CORE::push @replaced, $space, map { + $a = $_; + if($a =~ + s{\257<(-?\d+)?(.*)\257>} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace + $argref->[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }sgxe) { + # Token is \257<..\257> + } else { + if($Global::escape_string_present) { + # Command line contains \257: + # Unescape it \257\256 => \257 + $a =~ s/\257\256/\257/g; + } + } + $a + } @ctxgroup; + $normal_replace or last; + $space = "\0spc"; + } + } else { + # Context group has no a replacement string: Copy it once + CORE::push @replaced, map { + $Global::escape_string_present and s/\257\256/\257/g; $_; + } @ctxgroup; + } + # New context group + @ctxgroup=(); + } + if($t eq "\0spc" or $t eq " ") { + CORE::push @replaced,$t; + } else { + CORE::push @ctxgroup,$t; + } + } + } else { + # @group = @token + # Replace in group + # Push output + # repquote = no if {} first on line, no if $quote, yes otherwise + for my $t (@tokens) { + if($t =~ /^\257</) { + my $space = "\0ign"; + for my $arg (@$argref) { + my $normal_replace; + $a = $t; + $a =~ + s{\257<(-?\d+)?(.*)\257>} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($argref->[$1 > 0 ? $1-1 : $n+$1] ? + # If defined: replace + $argref->[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }sgxe; + CORE::push @replaced, $space, $a; + $normal_replace or last; + $space = "\0spc"; + } + } else { + # No replacement + CORE::push @replaced, map { + $Global::escape_string_present and s/\257\256/\257/g; $_; + } $t; + } + } + } + *Arg::arg = []; + ::debug("replace","Replaced: ".join":",@replaced,"\n"); + + # Put tokens into groups that may be quoted. + my @quotegroup; + my @quoted; + for (map { $_ eq "\0empty" ? "" : $_ } + grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" } + @replaced, "\0end") { + if($_ eq "\0spc" or $_ eq "\0end") { + # \0spc splits quotable groups + if($quote) { + if(@quotegroup) { + CORE::push @quoted, ::Q(join"",@quotegroup);; + } + } else { + CORE::push @quoted, join"",@quotegroup; + } + @quotegroup = (); + } else { + CORE::push @quotegroup, $_; + } + } + ::debug("replace","Quoted: ".join":",@quoted,"\n"); + return wantarray ? @quoted : "@quoted"; +} + +sub skip($) { + # Skip this job + my $self = shift; + $self->{'skip'} = 1; +} + + +package CommandLineQueue; + +sub new($) { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift || 0; + my $max_number_of_args = shift; + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my @unget = (); + my $posrpl; + my ($replacecount_ref, $len_ref); + my @command = @$commandref; + my $seq = 1; + # Replace replacement strings with {= perl expr =} + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + @command = merge_rpl_parts(@command); + + # Protect matching inside {= perl expr =} + # by replacing {= and =} with \257< and \257> + # in options that can contain replacement strings: + # @command, --transferfile, --return, + # --tagstring, --workdir, --results + for(@command, @$transfer_files, @$return_files, + @$template_names, @$template_contents, + $opt::tagstring, $opt::workdir, $opt::results, $opt::retries, + @opt::filter) { + # Skip if undefined + defined($_) or next; + # Escape \257 => \257\256 + $Global::escape_string_present += s/\257/\257\256/g; + # Needs to match rightmost left parens (Perl defaults to leftmost) + # to deal with: {={==} and {={==}=} + # Replace {= -> \257< and =} -> \257> + # + # Complex way to do: + # s/{=(.*)=}/\257<$1\257>/g + # which would not work + s[\Q$Global::parensleft\E # Match {= + # Match . unless the next string is {= or =} + # needed to force matching the shortest {= =} + ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?) + \Q$Global::parensright\E ] # Match =} + {\257<$1\257>}gxs; + for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) { + # Replace long --rpl's before short ones, as a short may be a + # substring of a long: + # --rpl '% s/a/b/' --rpl '%% s/b/a/' + # + # Replace the shorthand string (--rpl) + # with the {= perl expr =} + # + # Avoid searching for shorthand strings inside existing {= perl expr =} + # + # Replace $$1 in {= perl expr =} with groupings in shorthand string + # + # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;' + # echo {/.tar/.gz} ::: UU.tar.gz + my ($prefix,$grp_regexp,$postfix) = + $rpl =~ /^( [^(]* ) # Prefix - e.g. {%% + ( \(.*\) )? # Group capture regexp - e.g (.*) + ( [^)]* )$ # Postfix - e.g } + /xs; + $grp_regexp ||= ''; + my $rplval = $Global::rpl{$rpl}; + while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? ) + # Don't replace after \257 unless \257> + \Q$prefix\E $grp_regexp \Q$postfix\E} + { + # The start remains the same + my $unchanged = $1; + # Dummy entry to start at 1. + my @grp = (1); + # $2 = first ()-group in $grp_regexp + # Put $2 in $grp[1], Put $3 in $grp[2] + # so first ()-group in $grp_regexp is $grp[1]; + for(my $i = 2; defined $grp[$#grp]; $i++) { + push @grp, eval '$'.$i; + } + my $rv = $rplval; + # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2 + # in the code to be executed + $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx; + # prepend with $_pAr_gRp1 = perlquote($1), + my $set_args = ""; + for(my $i = 1;defined $grp[$i]; $i++) { + $set_args .= "\$_pAr_gRp$i = \"" . + ::perl_quote_scalar($grp[$i]) . "\";"; + } + $unchanged . "\257<" . $set_args . $rv . "\257>" + }gxes) { + } + # Do the same for the positional replacement strings + $posrpl = $rpl; + if($posrpl =~ s/^\{//) { + # Only do this if the shorthand start with { + $prefix=~s/^\{//; + # Don't replace after \257 unless \257> + while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? ) + \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E} + { + # The start remains the same + my $unchanged = $1; + my $position = $2; + # Dummy entry to start at 1. + my @grp = (1); + # $3 = first ()-group in $grp_regexp + # Put $3 in $grp[1], Put $4 in $grp[2] + # so first ()-group in $grp_regexp is $grp[1]; + for(my $i = 3; defined $grp[$#grp]; $i++) { + push @grp, eval '$'.$i; + } + my $rv = $rplval; + # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2 + # in the code to be executed + $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx; + # prepend with $_pAr_gRp1 = perlquote($1), + my $set_args = ""; + for(my $i = 1;defined $grp[$i]; $i++) { + $set_args .= "\$_pAr_gRp$i = \"" . + ::perl_quote_scalar($grp[$i]) . "\";"; + } + $unchanged . "\257<" . $position . $set_args . $rv . "\257>" + }gxes) { + } + } + } + } + # Add {} if no replacement strings in @command + ($replacecount_ref, $len_ref, @command) = + replacement_counts_and_lengths($transfer_files, $return_files, + $template_names, $template_contents, + @command); + if("@command" =~ /^[^ \t\n=]*\257</) { + # Replacement string is (part of) the command (and not just + # argument or variable definition V1={}) + # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2} + # Do no quote (Otherwise it will fail if the input contains spaces) + $Global::quote_replace = 0; + } + + if($opt::sqlmaster and $Global::sql->append()) { + $seq = $Global::sql->max_seq() + 1; + } + + return bless { + ('unget' => \@unget, + 'command' => \@command, + 'replacecount' => $replacecount_ref, + 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), + 'context_replace' => $context_replace, + 'len' => $len_ref, + 'max_number_of_args' => $max_number_of_args, + 'size' => undef, + 'transfer_files' => $transfer_files, + 'return_files' => $return_files, + 'template_names' => $template_names, + 'template_contents' => $template_contents, + 'seq' => $seq, + ) + }, ref($class) || $class; +} + +sub merge_rpl_parts($) { + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + # Input: + # @in = the @command as given by the user + # Uses: + # $Global::parensleft + # $Global::parensright + # Returns: + # @command with parts merged to keep {= and =} as one + my @in = @_; + my @out; + my $l = quotemeta($Global::parensleft); + my $r = quotemeta($Global::parensright); + + while(@in) { + my $s = shift @in; + $_ = $s; + # Remove matching (right most) parens + while(s/(.*)$l.*?$r/$1/os) {} + if(/$l/o) { + # Missing right parens + while(@in) { + $s .= " ".shift @in; + $_ = $s; + while(s/(.*)$l.*?$r/$1/os) {} + if(not /$l/o) { + last; + } + } + } + push @out, $s; + } + return @out; +} + +sub replacement_counts_and_lengths($$@) { + # Count the number of different replacement strings. + # Find the lengths of context for context groups and non-context + # groups. + # If no {} found in @command: add it to @command + # + # Input: + # \@transfer_files = array of filenames to transfer + # \@return_files = array of filenames to return + # \@template_names = array of names to copy to + # \@template_contents = array of contents to write + # @command = command template + # Output: + # \%replacecount, \%len, @command + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my @command = @_; + my (%replacecount,%len); + my $sum = 0; + while($sum == 0) { + # Count how many times each replacement string is used + my @cmd = @command; + my $contextlen = 0; + my $noncontextlen = 0; + my $contextgroups = 0; + for my $c (@cmd) { + while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "s/a/b/" => 2 } + $replacecount{$1}++; + $sum++; + } + # Measure the length of the context around the {= perl expr =} + # Use that {=...=} has been replaced with \000 above + # So there is no need to deal with \257< + while($c =~ s/ (\S*\000\S*) //xs) { + my $w = $1; + $w =~ tr/\000//d; # Remove all \000's + $contextlen += length($w); + $contextgroups++; + } + # All {= perl expr =} have been removed: The rest is non-context + $noncontextlen += length $c; + } + for(@$transfer_files, @$return_files, + @$template_names, @$template_contents, + @opt::filter, + $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) { + # Options that can contain replacement strings + defined($_) or next; + my $t = $_; + while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + # But for tagstring we just need to mark it as seen + $replacecount{$1} ||= 1; + } + } + if($opt::bar) { + # If the command does not contain {} force it to be computed + # as it is being used by --bar + $replacecount{""} ||= 1; + } + + $len{'context'} = 0+$contextlen; + $len{'noncontext'} = $noncontextlen; + $len{'contextgroups'} = $contextgroups; + $len{'noncontextgroups'} = @cmd-$contextgroups; + ::debug("length", "@command Context: ", $len{'context'}, + " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, + " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); + if($sum == 0) { + if(not @command) { + # Default command = {} + @command = ("\257<\257>"); + } elsif(($opt::pipe or $opt::pipepart) + and not $opt::fifo and not $opt::cat) { + # With --pipe / --pipe-part you can have no replacement + last; + } else { + # Append {} to the command if there are no {...}'s and no {=...=} + push @command, ("\257<\257>"); + } + } + } + return(\%replacecount,\%len,@command); +} + +sub get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + my $cmd_line = shift @{$self->{'unget'}}; + return ($cmd_line); + } else { + if($opt::sqlworker) { + # Get the sequence number from the SQL table + $self->set_seq($SQL::next_seq); + # Get the command from the SQL table + $self->{'command'} = $SQL::command_ref; + my @command; + # Recompute replace counts based on the read command + ($self->{'replacecount'}, + $self->{'len'}, @command) = + replacement_counts_and_lengths($self->{'transfer_files'}, + $self->{'return_files'}, + $self->{'template_name'}, + $self->{'template_contents'}, + @$SQL::command_ref); + if("@command" =~ /^[^ \t\n=]*\257</) { + # Replacement string is (part of) the command (and not just + # argument or variable definition V1={}) + # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2} + # Do no quote (Otherwise it will fail if the input contains spaces) + $Global::quote_replace = 0; + } + } + + my $cmd_line = CommandLine->new($self->seq(), + $self->{'command'}, + $self->{'arg_queue'}, + $self->{'context_replace'}, + $self->{'max_number_of_args'}, + $self->{'transfer_files'}, + $self->{'return_files'}, + $self->{'template_names'}, + $self->{'template_contents'}, + $self->{'replacecount'}, + $self->{'len'}, + ); + $cmd_line->populate(); + ::debug("run","cmd_line->number_of_args ", + $cmd_line->number_of_args(), "\n"); + if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) { + if($cmd_line->replaced() eq "") { + # Empty command - pipe requires a command + ::error("--pipe/--pipepart must have a command to pipe into ". + "(e.g. 'cat')."); + ::wait_and_exit(255); + } + } elsif($cmd_line->number_of_args() == 0) { + # We did not get more args - maybe at EOF string? + return undef; + } + $self->set_seq($self->seq()+1); + return $cmd_line; + } +} + +sub unget($) { + my $self = shift; + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_queue'}->empty(); + ::debug("run", "CommandLineQueue->empty $empty"); + return $empty; +} + +sub seq($) { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq($$) { + my $self = shift; + $self->{'seq'} = shift; +} + +sub quote_args($) { + my $self = shift; + # If there is not command emulate |bash + return $self->{'command'}; +} + + +package Limits::Command; + +# Maximal command line length (for -m and -X) +sub max_length($) { + # Find the max_length of a command line and cache it + # Returns: + # number of chars on the longest command line allowed + if(not $Limits::Command::line_max_len) { + # Disk cache of max command line length + my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() . + "/linelen"; + my $cached_limit; + local $/ = undef; + if(open(my $fh, "<", $len_cache)) { + $cached_limit = <$fh>; + $cached_limit || ::die_bug("Cannot read $len_cache"); + close $fh; + } + if(not $cached_limit) { + $cached_limit = real_max_length(); + # If $HOME is write protected: Do not fail + my $dir = ::dirname($len_cache); + -d $dir or eval { File::Path::mkpath($dir); }; + open(my $fh, ">", $len_cache.$$); + print $fh $cached_limit; + close $fh; + rename $len_cache.$$, $len_cache || ::die_bug("rename cache file"); + } + $Limits::Command::line_max_len = tmux_length($cached_limit); + } + return int($Limits::Command::line_max_len); +} + +sub real_max_length() { + # Find the max_length of a command line + # Returns: + # The maximal command line length with 1 byte arguments + # return find_max(" c"); + return find_max("c"); +} + +sub find_max($) { + my $string = shift; + # This is slow on Cygwin, so give Cygwin users a warning + if($^O eq "cygwin" or $^O eq "msys") { + ::warning("Finding the maximal command line length. ". + "This may take up to 1 minute.") + } + # Use an upper bound of 100 MB if the shell allows for infinite + # long lengths + my $upper = 100_000_000; + my $lower; + # 1000 is supported everywhere, so the search can start anywhere 1..999 + # 324 makes the search much faster on Cygwin, so let us use that + my $len = 324; + do { + if($len > $upper) { return $len }; + $lower = $len; + $len *= 16; + ::debug("init", "Maxlen: $lower<$len<$upper(".($upper-$lower)."): "); + } while (is_acceptable_command_line_length($len,$string)); + # Then search for the actual max length between + # last successful length ($len/16) and upper bound + return binary_find_max(int($len/16),$len,$string); +} + + +# Prototype forwarding +sub binary_find_max($$$); +sub binary_find_max($$$) { + # Given a lower and upper bound find the max (length or args) of a + # command line + # Returns: + # number of chars on the longest command line allowed + my ($lower, $upper, $string) = (@_); + if($lower == $upper + or $lower == $upper-1 + or $lower/$upper > 0.99) { + # $lower is +- 1 or within 1%: Don't search more + return $lower; + } + # Unevenly split binary search which is faster for Microsoft Windows. + # Guessing too high is cheap. Guessing too low is expensive. + my $split = ($^O eq "cygwin" or $^O eq "msys") ? 0.93 : 0.5; + my $middle = int (($upper-$lower)*$split + $lower); + ::debug("init", "Maxlen: $lower<$middle<$upper(".($upper-$lower)."): "); + if (is_acceptable_command_line_length($middle,$string)) { + return binary_find_max($middle,$upper,$string); + } else { + return binary_find_max($lower,$middle,$string); + } +} + +{ + my $prg; + + sub is_acceptable_command_line_length($$) { + # Test if a command line of this length can run + # in the current environment + # If the string is " x" it tests how many args are allowed + # Returns: + # 0 if the command line length is too long + # 1 otherwise + my $len = shift; + my $string = shift; + if($Global::parallel_env) { + $len += length $Global::parallel_env; + } + # Force using non-built-in command + $prg ||= ::which("echo"); + ::qqx("$prg ".${string}x(($len-1-length $prg)/length $string)); + ::debug("init", "$len=$?\n"); + return not $?; + } +} + +sub tmux_length($) { + # If $opt::tmux set, find the limit for tmux + # tmux 1.8 has a 2kB limit + # tmux 1.9 has a 16kB limit + # tmux 2.0 has a 16kB limit + # tmux 2.1 has a 16kB limit + # tmux 2.2 has a 16kB limit + # Input: + # $len = maximal command line length + # Returns: + # $tmux_len = maximal length runable in tmux + local $/ = "\n"; + my $len = shift; + if($opt::tmux) { + $ENV{'PARALLEL_TMUX'} ||= "tmux"; + if(not ::which($ENV{'PARALLEL_TMUX'})) { + ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH."); + ::wait_and_exit(255); + } + my @out; + for my $l (1, 2020, 16320, 30000, $len) { + my $tmpfile = ::tmpname("tms"); + my $tmuxcmd = $ENV{'PARALLEL_TMUX'}. + " -S $tmpfile new-session -d -n echo $l". + ("t"x$l). " && echo $l; rm -f $tmpfile"; + push @out, ::qqx($tmuxcmd); + ::rm($tmpfile); + } + ::debug("tmux","tmux-out ",@out); + chomp @out; + # The arguments is given 3 times on the command line + # and the tmux wrapping is around 30 chars + # (29 for tmux1.9, 33 for tmux1.8) + my $tmux_len = ::max(@out); + $len = ::min($len,int($tmux_len/4-33)); + ::debug("tmux","tmux-length ",$len); + } + return $len; +} + + +package RecordQueue; + +sub new($) { + my $class = shift; + my $fhs = shift; + my $colsep = shift; + my @unget = (); + my $arg_sub_queue; + if($opt::sqlworker) { + # Open SQL table + $arg_sub_queue = SQLRecordQueue->new(); + } elsif(defined $colsep) { + # Open one file with colsep or CSV + $arg_sub_queue = RecordColQueue->new($fhs); + } else { + # Open one or more files if multiple -a + $arg_sub_queue = MultifileQueue->new($fhs); + } + return bless { + 'unget' => \@unget, + 'arg_number' => 0, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + $self->{'arg_number'}++; + # Flush cached computed replacements in Arg-objects + # To fix: parallel --bar echo {%} ::: a b c ::: d e f + my $ret = shift @{$self->{'unget'}}; + if($ret) { + map { $_->flush_cache() } @$ret; + } + return $ret; + } + my $ret = $self->{'arg_sub_queue'}->get(); + if($ret) { + if(grep { index($_->orig(),"\0") > 0 } @$ret) { + # Allow for \0 in position 0 because GNU Parallel uses "\0noarg" + # to mean no-string + ::warning("A NUL character in the input was replaced with \\0.", + "NUL cannot be passed through in the argument list.", + "Did you mean to use the --null option?"); + for(grep { index($_->orig(),"\0") > 0 } @$ret) { + # Replace \0 with \\0 + my $a = $_->orig(); + $a =~ s/\0/\\0/g; + $_->set_orig($a); + } + } + if(defined $Global::max_number_of_args + and $Global::max_number_of_args == 0) { + ::debug("run", "Read 1 but return 0 args\n"); + # \0noarg => nothing (not the empty string) + map { $_->set_orig("\0noarg"); } @$ret; + } + # Flush cached computed replacements in Arg-objects + # To fix: parallel --bar echo {%} ::: a b c ::: d e f + map { $_->flush_cache() } @$ret; + } + return $ret; +} + +sub unget($) { + my $self = shift; + ::debug("run", "RecordQueue-unget\n"); + $self->{'arg_number'} -= @_; + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } + $self->{'arg_sub_queue'}->flush_cache(); +} + +sub arg_number($) { + my $self = shift; + return $self->{'arg_number'}; +} + + +package RecordColQueue; + +sub new($) { + my $class = shift; + my $fhs = shift; + my @unget = (); + my $arg_sub_queue = MultifileQueue->new($fhs); + return bless { + 'unget' => \@unget, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + if($self->{'arg_sub_queue'}->empty()) { + return undef; + } + my $in_record = $self->{'arg_sub_queue'}->get(); + if(defined $in_record) { + my @out_record = (); + for my $arg (@$in_record) { + ::debug("run", "RecordColQueue::arg $arg\n"); + my $line = $arg->orig(); + ::debug("run", "line='$line'\n"); + if($line ne "") { + if($opt::csv) { + # Parse CSV and put it into a record + chomp $line; + if(not $Global::csv->parse($line)) { + die "CSV has unexpected format: ^$line^"; + } + for($Global::csv->fields()) { + push @out_record, Arg->new($_); + } + } else { + # Split --colsep into record + for my $s (split /$opt::colsep/o, $line, -1) { + push @out_record, Arg->new($s); + } + } + } else { + push @out_record, Arg->new(""); + } + } + return \@out_record; + } else { + return undef; + } +} + +sub unget($) { + my $self = shift; + ::debug("run", "RecordColQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordColQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $arg (@{$self->{'unget'}}) { + $arg->flush_cache(); + } + $self->{'arg_sub_queue'}->flush_cache(); +} + + +package SQLRecordQueue; + +sub new($) { + my $class = shift; + my @unget = (); + return bless { + 'unget' => \@unget, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + return $Global::sql->get_record(); +} + +sub unget($) { + my $self = shift; + ::debug("run", "SQLRecordQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + if(@{$self->{'unget'}}) { return 0; } + my $get = $self->get(); + if(defined $get) { + $self->unget($get); + } + my $empty = not $get; + ::debug("run", "SQLRecordQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } +} + + +package MultifileQueue; + +@Global::unget_argv=(); + +sub new($$) { + my $class = shift; + my $fhs = shift; + for my $fh (@$fhs) { + if(-t $fh and -t ($Global::status_fd || *STDERR)) { + ::warning( + "Input is read from the terminal. You are either an expert", + "(in which case: YOU ARE AWESOME!) or maybe you forgot", + "::: or :::: or -a or to pipe data into parallel. If so", + "consider going through the tutorial: man parallel_tutorial", + "Press CTRL-D to exit."); + } + } + return bless { + 'unget' => \@Global::unget_argv, + 'fhs' => $fhs, + 'arg_matrix' => undef, + }, ref($class) || $class; +} + +sub get($) { + my $self = shift; + if($opt::link) { + return $self->link_get(); + } else { + return $self->nest_get(); + } +} + +sub unget($) { + my $self = shift; + ::debug("run", "MultifileQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @Global::unget_argv) && + not @{$self->{'unget'}}; + for my $fh (@{$self->{'fhs'}}) { + $empty &&= eof($fh); + } + ::debug("run", "MultifileQueue->empty $empty "); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}, @{$self->{'arg_matrix'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } +} + +sub link_get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend; + my $empty = 1; + for my $i (0..$#{$self->{'fhs'}}) { + my $fh = $self->{'fhs'}[$i]; + my $arg = read_arg_from_fh($fh); + if(defined $arg) { + # Record $arg for recycling at end of file + push @{$self->{'arg_matrix'}[$i]}, $arg; + push @record, $arg; + $empty = 0; + } else { + ::debug("run", "EOA "); + # End of file: Recycle arguments + push @{$self->{'arg_matrix'}[$i]}, shift @{$self->{'arg_matrix'}[$i]}; + # return last @{$args->{'args'}{$fh}}; + push @record, @{$self->{'arg_matrix'}[$i]}[-1]; + } + } + if($empty) { + return undef; + } else { + return \@record; + } +} + +sub nest_get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend; + my $empty = 1; + my $no_of_inputsources = $#{$self->{'fhs'}} + 1; + if(not $self->{'arg_matrix'}) { + # Initialize @arg_matrix with one arg from each file + # read one line from each file + my @first_arg_set; + my $all_empty = 1; + for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + if(defined $arg) { + $all_empty = 0; + } + $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); + push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; + } + if($all_empty) { + # All filehandles were at eof or eof-string + return undef; + } + return [@first_arg_set]; + } + + # Treat the case with one input source special. For multiple + # input sources we need to remember all previously read values to + # generate all combinations. But for one input source we can + # forget the value after first use. + if($no_of_inputsources == 1) { + my $arg = read_arg_from_fh($self->{'fhs'}[0]); + if(defined($arg)) { + return [$arg]; + } + return undef; + } + for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { + if(eof($self->{'fhs'}[$fhno])) { + next; + } else { + # read one + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + defined($arg) || next; # If we just read an EOF string: Treat this as EOF + my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; + $self->{'arg_matrix'}[$fhno][$len] = $arg; + # make all new combinations + my @combarg = (); + for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { + push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}], + # Is input source --link'ed to the next? + $opt::linkinputsource[$fhn+1]); + } + # Find only combinations with this new entry + $combarg[2*$fhno] = [$len,$len]; + # map combinations + # [ 1, 3, 7 ], [ 2, 4, 1 ] + # => + # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ] + my @mapped; + for my $c (expand_combinations(@combarg)) { + my @a; + for my $n (0 .. $no_of_inputsources - 1 ) { + push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; + } + push @mapped, \@a; + } + # append the mapped to the ungotten arguments + push @{$self->{'unget'}}, @mapped; + # get the first + if(@mapped) { + return shift @{$self->{'unget'}}; + } + } + } + # all are eof or at EOF string; return from the unget queue + return shift @{$self->{'unget'}}; +} + +{ + my $cr_count = 0; + my $nl_count = 0; + my $dos_crnl_determined; + sub read_arg_from_fh($) { + # Read one Arg from filehandle + # Returns: + # Arg-object with one read line + # undef if end of file + my $fh = shift; + my $prepend; + my $arg; + my $half_record = 0; + do {{ + # This makes 10% faster + if(not defined ($arg = <$fh>)) { + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(not $dos_crnl_determined and not defined $opt::d) { + # Warn if input has CR-NL and -d is not set + if($arg =~ /\r$/) { + $cr_count++; + } else { + $nl_count++; + } + if($cr_count == 3 or $nl_count == 3) { + $dos_crnl_determined = 1; + if($nl_count == 0 and $cr_count == 3) { + ::warning('The first three values end in CR-NL. '. + 'Consider using -d "\r\n"'); + } + } + } + if($opt::csv) { + # We need to read a full CSV line. + if(($arg =~ y/"/"/) % 2 ) { + # The number of " on the line is uneven: + # If we were in a half_record => we have a full record now + # If we were outside a half_record => + # we are in a half record now + $half_record = not $half_record; + } + if($half_record) { + # CSV half-record with quoting: + # col1,"col2 2""x3"" board newline <-this one + # cont",col3 + $prepend .= $arg; + redo; + } else { + # Now we have a full CSV record + } + } + # Remove delimiter + chomp $arg; + if($Global::end_of_file_string and + $arg eq $Global::end_of_file_string) { + # Ignore the rest of input file + close $fh; + ::debug("run", "EOF-string ($arg) met\n"); + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(defined $prepend) { + $arg = $prepend.$arg; # For line continuation + undef $prepend; + } + if($Global::ignore_empty) { + if($arg =~ /^\s*$/) { + redo; # Try the next line + } + } + if($Global::max_lines) { + if($arg =~ /\s$/) { + # Trailing space => continued on next line + $prepend = $arg; + redo; + } + } + }} while (1 == 0); # Dummy loop {{}} for redo + if(defined $arg) { + return Arg->new($arg); + } else { + ::die_bug("multiread arg undefined"); + } + } +} + +# Prototype forwarding +sub expand_combinations(@); +sub expand_combinations(@) { + # Input: + # ([xmin,xmax], [ymin,ymax], ...) + # Returns: ([x,y,...],[x,y,...]) + # where xmin <= x <= xmax and ymin <= y <= ymax + my $minmax_ref = shift; + my $link = shift; # This is linked to the next input source + my $xmin = $$minmax_ref[0]; + my $xmax = $$minmax_ref[1]; + my @p; + if(@_) { + my @rest = expand_combinations(@_); + if($link) { + # Linked to next col with --link/:::+/::::+ + # TODO BUG does not wrap values if not same number of vals + push(@p, map { [$$_[0], @$_] } + grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest); + } else { + # If there are more columns: Compute those recursively + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, map { [$x, @$_] } @rest; + } + } + } else { + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, [$x]; + } + } + return @p; +} + + +package Arg; + +sub new($) { + my $class = shift; + my $orig = shift; + my @hostgroups; + if($opt::hostgroups) { + if($orig =~ s:@(.+)::) { + # We found hostgroups on the arg + @hostgroups = split(/\+/, $1); + if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { + # This hostgroup is not defined using -S + # Add it + ::warning("Adding hostgroups: @hostgroups"); + # Add sshlogin + for(grep { not defined $Global::hostgroups{$_} } @hostgroups) { + my $sshlogin = SSHLogin->new($_); + my $sshlogin_string = $sshlogin->string(); + $Global::host{$sshlogin_string} = $sshlogin; + $Global::hostgroups{$sshlogin_string} = 1; + } + } + } else { + # No hostgroup on the arg => any hostgroup + @hostgroups = (keys %Global::hostgroups); + } + } + return bless { + 'orig' => $orig, + 'hostgroups' => \@hostgroups, + }, ref($class) || $class; +} + +sub Q($) { + # Q alias for ::shell_quote_scalar + my $ret = ::Q($_[0]); + no warnings 'redefine'; + *Q = \&::Q; + return $ret; +} + +sub pQ($) { + # pQ alias for ::perl_quote_scalar + my $ret = ::pQ($_[0]); + no warnings 'redefine'; + *pQ = \&::pQ; + return $ret; +} + +sub hash($) { + $Global::use{"DBI"} ||= eval "use B; 1;"; + B::hash(@_); +} + +sub total_jobs() { + return $Global::JobQueue->total_jobs(); +} + +{ + my %perleval; + my $job; + sub skip() { + # shorthand for $job->skip(); + $job->skip(); + } + sub slot() { + # shorthand for $job->slot(); + $job->slot(); + } + sub seq() { + # shorthand for $job->seq(); + $job->seq(); + } + sub uq() { + # Do not quote this arg + $Global::unquote_arg = 1; + } + sub yyyy_mm_dd_hh_mm_ss() { + # ISO8601 2038-01-19T03:14:08 + ::strftime("%Y-%m-%dT%H:%M:%S", localtime(time())); + } + sub yyyy_mm_dd_hh_mm() { + # ISO8601 2038-01-19T03:14 + ::strftime("%Y-%m-%dT%H:%M", localtime(time())); + } + sub yyyy_mm_dd() { + # ISO8601 2038-01-19 + ::strftime("%Y-%m-%d", localtime(time())); + } + sub hh_mm_ss() { + # ISO8601 03:14:08 + ::strftime("%H:%M:%S", localtime(time())); + } + sub hh_mm() { + # ISO8601 03:14 + ::strftime("%H:%M", localtime(time())); + } + sub yyyymmddhhmmss() { + # ISO8601 20380119 + ISO8601 031408 + ::strftime("%Y%m%d%H%M%S", localtime(time())); + } + sub yyyymmddhhmm() { + # ISO8601 20380119 + ISO8601 0314 + ::strftime("%Y%m%d%H%M", localtime(time())); + } + sub yyyymmdd() { + # ISO8601 20380119 + ::strftime("%Y%m%d", localtime(time())); + } + sub hhmmss() { + # ISO8601 031408 + ::strftime("%H%M%S", localtime(time())); + } + sub hhmm() { + # ISO8601 0314 + ::strftime("%H%M", localtime(time())); + } + + sub replace($$$$) { + # Calculates the corresponding value for a given perl expression + # Returns: + # The calculated string (quoted if asked for) + my $self = shift; + my $perlexpr = shift; # E.g. $_=$_ or s/.gz// + my $quote = shift; # should the string be quoted? + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + $job = shift; + # Positional replace treated as normal replace + $perlexpr =~ s/^(-?\d+)? *//; + if(not $Global::cache_replacement_eval + or + not $self->{'cache'}{$perlexpr}) { + # Only compute the value once + # Use $_ as the variable to change + local $_; + if($Global::trim eq "n") { + $_ = $self->{'orig'}; + } else { + # Trim the input + $_ = trim_of($self->{'orig'}); + } + ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); + if(not $perleval{$perlexpr}) { + # Make an anonymous function of the $perlexpr + # And more importantly: Compile it only once + if($perleval{$perlexpr} = + eval('sub { no strict; no warnings; my $job = shift; '. + $perlexpr.' }')) { + # All is good + } else { + # The eval failed. Maybe $perlexpr is invalid perl? + ::error("Cannot use $perlexpr: $@"); + ::wait_and_exit(255); + } + } + # Execute the function + $perleval{$perlexpr}->($job); + $self->{'cache'}{$perlexpr} = $_; + if($Global::unquote_arg) { + # uq() was called in perlexpr + $self->{'cache'}{'unquote'}{$perlexpr} = 1; + # Reset for next perlexpr + $Global::unquote_arg = 0; + } + } + # Return the value quoted if needed + if($self->{'cache'}{'unquote'}{$perlexpr}) { + return($self->{'cache'}{$perlexpr}); + } else { + return($quote ? Q($self->{'cache'}{$perlexpr}) + : $self->{'cache'}{$perlexpr}); + } + } +} + +sub flush_cache($) { + # Flush cache of computed values + my $self = shift; + $self->{'cache'} = undef; +} + +sub orig($) { + my $self = shift; + return $self->{'orig'}; +} + +sub set_orig($$) { + my $self = shift; + $self->{'orig'} = shift; +} + +sub trim_of($) { + # Removes white space as specifed by --trim: + # n = nothing + # l = start + # r = end + # lr|rl = both + # Returns: + # string with white space removed as needed + my @strings = map { defined $_ ? $_ : "" } (@_); + my $arg; + if($Global::trim eq "n") { + # skip + } elsif($Global::trim eq "l") { + for my $arg (@strings) { $arg =~ s/^\s+//; } + } elsif($Global::trim eq "r") { + for my $arg (@strings) { $arg =~ s/\s+$//; } + } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { + for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } + } else { + ::error("--trim must be one of: r l rl lr."); + ::wait_and_exit(255); + } + return wantarray ? @strings : "@strings"; +} + + +package TimeoutQueue; + +sub new($) { + my $class = shift; + my $delta_time = shift; + my ($pct); + if($delta_time =~ /(\d+(\.\d+)?)%/) { + # Timeout in percent + $pct = $1/100; + $delta_time = 1_000_000; + } + $delta_time = ::multiply_time_units($delta_time); + + return bless { + 'queue' => [], + 'delta_time' => $delta_time, + 'pct' => $pct, + 'remedian_idx' => 0, + 'remedian_arr' => [], + 'remedian' => undef, + }, ref($class) || $class; +} + +sub delta_time($) { + my $self = shift; + return $self->{'delta_time'}; +} + +sub set_delta_time($$) { + my $self = shift; + $self->{'delta_time'} = shift; +} + +sub remedian($) { + my $self = shift; + return $self->{'remedian'}; +} + +sub set_remedian($$) { + # Set median of the last 999^3 (=997002999) values using Remedian + # + # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A + # robust averaging method for large data sets." Journal of the + # American Statistical Association 85.409 (1990): 97-104. + my $self = shift; + my $val = shift; + my $i = $self->{'remedian_idx'}++; + my $rref = $self->{'remedian_arr'}; + $rref->[0][$i%999] = $val; + $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; + $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; + $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; +} + +sub update_median_runtime($) { + # Update delta_time based on runtime of finished job if timeout is + # a percentage + my $self = shift; + my $runtime = shift; + if($self->{'pct'}) { + $self->set_remedian($runtime); + $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); + ::debug("run", "Timeout: $self->{'delta_time'}s "); + } +} + +sub process_timeouts($) { + # Check if there was a timeout + my $self = shift; + # $self->{'queue'} is sorted by start time + while (@{$self->{'queue'}}) { + my $job = $self->{'queue'}[0]; + if($job->endtime()) { + # Job already finished. No need to timeout the job + # This could be because of --keep-order + shift @{$self->{'queue'}}; + } elsif($job->is_timedout($self->{'delta_time'})) { + # Need to shift off queue before kill + # because kill calls usleep that calls process_timeouts + shift @{$self->{'queue'}}; + ::warning("This job was killed because it timed out:", + $job->replaced()); + $job->kill(); + } else { + # Because they are sorted by start time the rest are later + last; + } + } +} + +sub insert($) { + my $self = shift; + my $in = shift; + push @{$self->{'queue'}}, $in; +} + + +package SQL; + +sub new($) { + my $class = shift; + my $dburl = shift; + $Global::use{"DBI"} ||= eval "use DBI; 1;"; + # +DBURL = append to this DBURL + my $append = $dburl=~s/^\+//; + my %options = parse_dburl(get_alias($dburl)); + my %driveralias = ("sqlite" => "SQLite", + "sqlite3" => "SQLite", + "pg" => "Pg", + "postgres" => "Pg", + "postgresql" => "Pg", + "csv" => "CSV", + "oracle" => "Oracle", + "ora" => "Oracle"); + my $driver = $driveralias{$options{'databasedriver'}} || + $options{'databasedriver'}; + my $database = $options{'database'}; + my $host = $options{'host'} ? ";host=".$options{'host'} : ""; + my $port = $options{'port'} ? ";port=".$options{'port'} : ""; + my $dsn = "DBI:$driver:dbname=$database$host$port"; + my $userid = $options{'user'}; + my $password = $options{'password'};; + if(not grep /$driver/, DBI->available_drivers) { + ::error("$driver not supported. Are you missing a perl DBD::$driver module?"); + ::wait_and_exit(255); + } + my $dbh; + if($driver eq "CSV") { + # CSV does not use normal dsn + if(-d $database) { + $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", }) + or die $DBI::errstr; + } else { + ::error("$database is not a directory."); + ::wait_and_exit(255); + } + } else { + $dbh = DBI->connect($dsn, $userid, $password, + { RaiseError => 1, AutoInactiveDestroy => 1 }) + or die $DBI::errstr; + } + $dbh->{'PrintWarn'} = $Global::debug || 0; + $dbh->{'PrintError'} = $Global::debug || 0; + $dbh->{'RaiseError'} = 1; + $dbh->{'ShowErrorStatement'} = 1; + $dbh->{'HandleError'} = sub {}; + if(not defined $options{'table'}) { + ::error("The DBURL ($dburl) must contain a table."); + ::wait_and_exit(255); + } + + return bless { + 'dbh' => $dbh, + 'driver' => $driver, + 'max_number_of_args' => undef, + 'table' => $options{'table'}, + 'append' => $append, + }, ref($class) || $class; +} + +# Prototype forwarding +sub get_alias($); +sub get_alias($) { + my $alias = shift; + $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql: + if ($alias !~ /^:/) { + return $alias; + } + + # Find the alias + my $path; + if (-l $0) { + ($path) = readlink($0) =~ m|^(.*)/|; + } else { + ($path) = $0 =~ m|^(.*)/|; + } + + my @deprecated = ("$ENV{HOME}/.dburl.aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for (@deprecated) { + if(-r $_) { + ::warning("$_ is deprecated. ". + "Use .sql/aliases instead (read man sql)."); + } + } + my @urlalias=(); + check_permissions("$ENV{HOME}/.sql/aliases"); + check_permissions("$ENV{HOME}/.dburl.aliases"); + my @search = ("$ENV{HOME}/.sql/aliases", + "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for my $alias_file (@search) { + # local $/ needed if -0 set + local $/ = "\n"; + if(-r $alias_file) { + open(my $in, "<", $alias_file) || die; + push @urlalias, <$in>; + close $in; + } + } + my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/; + # If we saw this before: we have an alias loop + if(grep {$_ eq $alias_part } @Private::seen_aliases) { + ::error("$alias_part is a cyclic alias."); + exit -1; + } else { + push @Private::seen_aliases, $alias_part; + } + + my $dburl; + for (@urlalias) { + /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; } + } + + if($dburl) { + return get_alias($dburl.$rest); + } else { + ::error("$alias is not defined in @search"); + exit(-1); + } +} + +sub check_permissions($) { + my $file = shift; + + if(-e $file) { + if(not -o $file) { + my $username = (getpwuid($<))[0]; + ::warning("$file should be owned by $username: ". + "chown $username $file"); + } + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, + $atime,$mtime,$ctime,$blksize,$blocks) = stat($file); + if($mode & 077) { + my $username = (getpwuid($<))[0]; + ::warning("$file should be only be readable by $username: ". + "chmod 600 $file"); + } + } +} + +sub parse_dburl($) { + my $url = shift; + my %options = (); + # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]] + + if($url=~m!^(?:sql:)? # You can prefix with 'sql:' + ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)| + (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1) + (?: + ([^:@/][^:@]*|) # Username ($2) + (?: + :([^@]*) # Password ($3) + )? + @)? + ([^:/]*)? # Hostname ($4) + (?: + : + ([^/]*)? # Port ($5) + )? + (?: + / + ([^/?]*)? # Database ($6) + )? + (?: + / + ([^?]*)? # Table ($7) + )? + (?: + \? + (.*)? # Query ($8) + )? + $!ix) { + $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1))); + $options{user} = ::undef_if_empty(uri_unescape($2)); + $options{password} = ::undef_if_empty(uri_unescape($3)); + $options{host} = ::undef_if_empty(uri_unescape($4)); + $options{port} = ::undef_if_empty(uri_unescape($5)); + $options{database} = ::undef_if_empty(uri_unescape($6)); + $options{table} = ::undef_if_empty(uri_unescape($7)); + $options{query} = ::undef_if_empty(uri_unescape($8)); + ::debug("sql", "dburl $url\n"); + ::debug("sql", "databasedriver ", $options{databasedriver}, + " user ", $options{user}, + " password ", $options{password}, " host ", $options{host}, + " port ", $options{port}, " database ", $options{database}, + " table ", $options{table}, " query ", $options{query}, "\n"); + } else { + ::error("$url is not a valid DBURL"); + exit 255; + } + return %options; +} + +sub uri_unescape($) { + # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm + # to avoid depending on URI::Escape + # This section is (C) Gisle Aas. + # Note from RFC1630: "Sequences which start with a percent sign + # but are not followed by two hexadecimal characters are reserved + # for future extension" + my $str = shift; + if (@_ && wantarray) { + # not executed for the common case of a single argument + my @str = ($str, @_); # need to copy + foreach (@str) { + s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + } + return @str; + } + $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; + $str; +} + +sub run($) { + my $self = shift; + my $stmt = shift; + if($self->{'driver'} eq "CSV") { + $stmt=~ s/;$//; + if($stmt eq "BEGIN" or + $stmt eq "COMMIT") { + return undef; + } + } + my @retval; + my $dbh = $self->{'dbh'}; + ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n"); + # Execute with the rest of the args - if any + my $rv; + my $sth; + my $lockretry = 0; + while($lockretry < 10) { + $sth = $dbh->prepare($stmt); + if($sth + and + eval { $rv = $sth->execute(@_) }) { + last; + } else { + if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/ + or + $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) { + # This is fine: + # It is just a worker that reported back too late - + # another worker had finished the job first + # and the table was then dropped + $rv = $sth = 0; + last; + } + if($DBI::errstr =~ /locked/) { + ::debug("sql", "Lock retry: $lockretry"); + $lockretry++; + ::usleep(rand()*300); + } elsif(not $sth) { + # Try again + $lockretry++; + } else { + ::error($DBI::errstr); + ::wait_and_exit(255); + } + } + } + if($lockretry >= 10) { + ::die_bug("retry > 10: $DBI::errstr"); + } + if($rv < 0 and $DBI::errstr){ + ::error($DBI::errstr); + ::wait_and_exit(255); + } + return $sth; +} + +sub get($) { + my $self = shift; + my $sth = $self->run(@_); + my @retval; + # If $sth = 0 it means the table was dropped by another process + while($sth) { + my @row = $sth->fetchrow_array(); + @row or last; + push @retval, \@row; + } + return \@retval; +} + +sub table($) { + my $self = shift; + return $self->{'table'}; +} + +sub append($) { + my $self = shift; + return $self->{'append'}; +} + +sub update($) { + my $self = shift; + my $stmt = shift; + my $table = $self->table(); + $self->run("UPDATE $table $stmt",@_); +} + +sub output($) { + my $self = shift; + my $commandline = shift; + + $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ". + $commandline->seq(), + join("",@{$commandline->{'output'}{1}}), + join("",@{$commandline->{'output'}{2}})); +} + +sub max_number_of_args($) { + # Maximal number of args for this table + my $self = shift; + if(not $self->{'max_number_of_args'}) { + # Read the number of args from the SQL table + my $table = $self->table(); + my $v = $self->get("SELECT * FROM $table LIMIT 1;"); + my @reserved_columns = qw(Seq Host Starttime JobRuntime Send + Receive Exitval _Signal Command Stdout Stderr); + if(not $v) { + ::error("$table contains no records"); + } + # Count the number of Vx columns + $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns; + } + return $self->{'max_number_of_args'}; +} + +sub set_max_number_of_args($$) { + my $self = shift; + $self->{'max_number_of_args'} = shift; +} + +sub create_table($) { + my $self = shift; + if($self->append()) { return; } + my $max_number_of_args = shift; + $self->set_max_number_of_args($max_number_of_args); + my $table = $self->table(); + $self->run(qq(DROP TABLE IF EXISTS $table;)); + # BIGINT and TEXT are not supported in these databases or are too small + my %vartype = ( + "Oracle" => { "BIGINT" => "NUMBER(19,0)", + "TEXT" => "CLOB", }, + "mysql" => { "TEXT" => "BLOB", }, + "CSV" => { "BIGINT" => "INT", + "FLOAT" => "REAL", }, + ); + my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT"; + my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT"; + my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)"; + my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args()); + $self->run(qq{CREATE TABLE $table + (Seq $BIGINT, + Host $TEXT, + Starttime $FLOAT, + JobRuntime $FLOAT, + Send $BIGINT, + Receive $BIGINT, + Exitval $BIGINT, + _Signal $BIGINT, + Command $TEXT,}. + $v_def. + qq{Stdout $TEXT, + Stderr $TEXT);}); +} + +sub insert_records($) { + my $self = shift; + my $seq = shift; + my $command_ref = shift; + my $record_ref = shift; + my $table = $self->table(); + # For SQL encode the command with \257 space as split points + my $command = join("\257 ",@$command_ref); + my @v_cols = map { ", V$_" } (1..$self->max_number_of_args()); + # Two extra value due to $seq, Exitval, Send + my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4); + $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ". + "VALUES ($v_vals);", $seq, $command, -1000, + 0, @$record_ref[1..$#$record_ref]); +} + + +sub get_record($) { + my $self = shift; + my @retval; + my $table = $self->table(); + my @v_cols = map { ", V$_" } (1..$self->max_number_of_args()); + my $rand = "Reserved-".$$.rand(); + my $v; + my $more_pending; + + do { + if($self->{'driver'} eq "CSV") { + # Sub SELECT is not supported in CSV + # So to minimize the race condition below select a job at random + my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ". + "WHERE Exitval = -1000 LIMIT 100;"); + $v = [ sort { rand() > 0.5 } @$r ]; + } else { + # Avoid race condition where multiple workers get the same job + # by setting Stdout to a unique string + # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL + $self->update("SET Stdout = ?,Exitval = ? ". + "WHERE Seq = (". + " SELECT * FROM (". + " SELECT min(Seq) FROM $table WHERE Exitval = -1000". + " ) AS dummy". + ") AND Exitval = -1000;", $rand, -1210); + # If a parallel worker overwrote the unique string this will get nothing + $v = $self->get("SELECT Seq, Command @v_cols FROM $table ". + "WHERE Stdout = ?;", $rand); + } + if($v->[0]) { + my $val_ref = $v->[0]; + # Mark record as taken + my $seq = shift @$val_ref; + # Save the sequence number to use when running the job + $SQL::next_seq = $seq; + $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220); + # Command is encoded with '\257 space' as splitting char + my @command = split /\257 /, shift @$val_ref; + $SQL::command_ref = \@command; + for (@$val_ref) { + push @retval, Arg->new($_); + } + } else { + # If the record was updated by another job in parallel, + # then we may not be done, so see if there are more jobs pending + $more_pending = + $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210); + } + } while (not $v->[0] and $more_pending->[0]); + + if(@retval) { + return \@retval; + } else { + return undef; + } +} + +sub total_jobs($) { + my $self = shift; + my $table = $self->table(); + my $v = $self->get("SELECT count(*) FROM $table;"); + if($v->[0]) { + return $v->[0]->[0]; + } else { + ::die_bug("SQL::total_jobs"); + } +} + +sub max_seq($) { + my $self = shift; + my $table = $self->table(); + my $v = $self->get("SELECT max(Seq) FROM $table;"); + if($v->[0]) { + return $v->[0]->[0]; + } else { + ::die_bug("SQL::max_seq"); + } +} + +sub finished($) { + # Check if there are any jobs left in the SQL table that do not + # have a "real" exitval + my $self = shift; + if($opt::wait or $Global::start_sqlworker) { + my $table = $self->table(); + my $rv = $self->get("select Seq,Exitval from $table ". + "where Exitval <= -1000 limit 1"); + return not $rv->[0]; + } else { + return 1; + } +} + +package Semaphore; + +# This package provides a counting semaphore +# +# If a process dies without releasing the semaphore the next process +# that needs that entry will clean up dead semaphores +# +# The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each +# file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the +# process holding the entry. If the process dies, the entry can be +# taken by another process. + +sub new($) { + my $class = shift; + my $id = shift; + my $count = shift; + $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex + $id = "id-".$id; # To distinguish it from a process id + my $parallel_locks = $Global::cache_dir . "/semaphores"; + -d $parallel_locks or ::mkdir_or_die($parallel_locks); + my $lockdir = "$parallel_locks/$id"; + my $lockfile = $lockdir.".lock"; + if(-d $parallel_locks and -w $parallel_locks + and -r $parallel_locks and -x $parallel_locks) { + # skip + } else { + ::error("Semaphoredir must be writable: '$parallel_locks'"); + ::wait_and_exit(255); + } + + if($count < 1) { ::die_bug("semaphore-count: $count"); } + return bless { + 'lockfile' => $lockfile, + 'lockfh' => Symbol::gensym(), + 'lockdir' => $lockdir, + 'id' => $id, + 'idfile' => $lockdir."/".$id, + 'pid' => $$, + 'pidfile' => $lockdir."/".$$.'@'.::hostname(), + 'count' => $count + 1 # nlinks returns a link for the 'id-' as well + }, ref($class) || $class; +} + +sub remove_dead_locks($) { + my $self = shift; + my $lockdir = $self->{'lockdir'}; + + for my $d (glob "$lockdir/*") { + $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; + my ($pid, $host) = ($1, $2); + if($host eq ::hostname()) { + if(kill 0, $pid) { + ::debug("sem", "Alive: $pid $d\n"); + } else { + ::debug("sem", "Dead: $d\n"); + ::rm($d); + } + } + } +} + +sub acquire($) { + my $self = shift; + my $sleep = 1; # 1 ms + my $start_time = time; + while(1) { + # Can we get a lock? + $self->atomic_link_if_count_less_than() and last; + $self->remove_dead_locks(); + # Retry slower and slower up to 1 second + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + time - $start_time > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Stealing the semaphore."); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("timeout_write_idfile: $self->{'idfile'}"); + close $fh; + } + link $self->{'idfile'}, $self->{'pidfile'}; + last; + } + if($opt::semaphoretimeout < 0 + and + time - $start_time > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + exit(1); + last; + } + } + } + ::debug("sem", "acquired $self->{'pid'}\n"); +} + +sub release($) { + my $self = shift; + ::rm($self->{'pidfile'}); + if($self->nlinks() == 1) { + # This is the last link, so atomic cleanup + $self->lock(); + if($self->nlinks() == 1) { + ::rm($self->{'idfile'}); + rmdir $self->{'lockdir'}; + } + $self->unlock(); + } + ::debug("run", "released $self->{'pid'}\n"); +} + +sub pid_change($) { + # This should do what release()+acquire() would do without having + # to re-acquire the semaphore + my $self = shift; + + my $old_pidfile = $self->{'pidfile'}; + $self->{'pid'} = $$; + $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname(); + my $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + ::rm($old_pidfile); +} + +sub atomic_link_if_count_less_than($) { + # Link $file1 to $file2 if nlinks to $file1 < $count + my $self = shift; + my $retval = 0; + $self->lock(); + my $nlinks = $self->nlinks(); + ::debug("sem","$nlinks<$self->{'count'} "); + if($nlinks < $self->{'count'}) { + -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'}); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("write_idfile: $self->{'idfile'}"); + close $fh; + } + $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + } + $self->unlock(); + ::debug("sem", "atomic $retval"); + return $retval; +} + +sub nlinks($) { + my $self = shift; + if(-e $self->{'idfile'}) { + return (stat(_))[3]; + } else { + return 0; + } +} + +sub lock($) { + my $self = shift; + my $sleep = 100; # 100 ms + my $total_sleep = 0; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $locked = 0; + while(not $locked) { + if(tell($self->{'lockfh'}) == -1) { + # File not open + open($self->{'lockfh'}, ">", $self->{'lockfile'}) + or ::debug("run", "Cannot open $self->{'lockfile'}"); + } + if($self->{'lockfh'}) { + # File is open + chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw + if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { + # The file is locked: No need to retry + $locked = 1; + last; + } else { + if ($! =~ m/Function not implemented/) { + ::warning("flock: $!", + "Will wait for a random while."); + ::usleep(rand(5000)); + # File cannot be locked: No need to retry + $locked = 2; + last; + } + } + } + # Locking failed in first round + # Sleep and try again + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + $total_sleep += $sleep; + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + $total_sleep/1000 > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Taking the semaphore."); + $locked = 3; + last; + } + if($opt::semaphoretimeout < 0 + and + $total_sleep/1000 > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + $locked = 4; + last; + } + } else { + if($total_sleep/1000 > 30) { + ::warning("Semaphore stuck for 30 seconds. ". + "Consider using --semaphoretimeout."); + } + } + } + ::debug("run", "locked $self->{'lockfile'}"); +} + +sub unlock($) { + my $self = shift; + ::rm($self->{'lockfile'}); + close $self->{'lockfh'}; + ::debug("run", "unlocked\n"); +} + +# Keep perl -w happy + +$opt::x = $Semaphore::timeout = $Semaphore::wait = +$Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg = +$Global::max_slot_number = $opt::session; + +package main; + +sub main() { + save_stdin_stdout_stderr(); + save_original_signal_handler(); + parse_options(); + ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fh), "\n"); + my $number_of_args; + if($Global::max_number_of_args) { + $number_of_args = $Global::max_number_of_args; + } elsif ($opt::X or $opt::m or $opt::xargs) { + $number_of_args = undef; + } else { + $number_of_args = 1; + } + + my @command = @ARGV; + my @input_source_fh; + if($opt::pipepart) { + if($opt::tee) { + @input_source_fh = map { open_or_exit($_) } @opt::a; + # Remove the first: It will be the file piped. + shift @input_source_fh; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); + } + } else { + # -a is used for data - not for command line args + @input_source_fh = map { open_or_exit($_) } "/dev/null"; + } + } else { + @input_source_fh = map { open_or_exit($_) } @opt::a; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); + } + } + + if($opt::skip_first_line) { + # Skip the first line for the first file handle + my $fh = $input_source_fh[0]; + <$fh>; + } + + set_input_source_header(\@command,\@input_source_fh); + if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { + # Parallel check all hosts are up. Remove hosts that are down + filter_hosts(); + } + + + if($opt::sqlmaster and $opt::sqlworker) { + # Start a real --sqlworker in the background later + $Global::start_sqlworker = 1; + $opt::sqlworker = undef; + } + + if($opt::nonall or $opt::onall) { + onall(\@input_source_fh,@command); + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); + } + + $Global::JobQueue = JobQueue->new( + \@command, \@input_source_fh, $Global::ContextReplace, + $number_of_args, \@Global::transfer_files, \@Global::ret_files, + \@Global::template_names, \@Global::template_contents + ); + + if($opt::sqlmaster) { + # Create SQL table to hold joblog + output + # Figure out how many arguments are in a job + # (It is affected by --colsep, -N, $number_source_fh) + my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}; + my $record = $record_queue->get(); + my $no_of_values = $number_of_args * (1+$#{$record}); + $record_queue->unget($record); + $Global::sql->create_table($no_of_values); + if($opt::sqlworker) { + # Start a real --sqlworker in the background later + $Global::start_sqlworker = 1; + $opt::sqlworker = undef; + } + } + + if($opt::pipepart) { + pipepart_setup(); + } elsif($opt::pipe) { + if($opt::tee) { + pipe_tee_setup(); + } elsif($opt::shard or $opt::bin) { + pipe_shard_setup(); + } elsif($opt::groupby) { + pipe_group_by_setup(); + } + } + + if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) { + # Count the number of jobs or shuffle all jobs + # before starting any. + # Must be done after ungetting any --pipepart jobs. + $Global::JobQueue->total_jobs(); + } + # Compute $Global::max_jobs_running + # Must be done after ungetting any --pipepart jobs. + max_jobs_running(); + + init_run_jobs(); + my $sem; + if($Global::semaphore) { + $sem = acquire_semaphore(); + } + $SIG{TERM} = $Global::original_sig{TERM}; + $SIG{HUP} = \&start_no_new_jobs; + + if($opt::tee or $opt::shard or $opt::bin) { + # All jobs must be running in parallel for --tee/--shard/--bin + while(start_more_jobs()) {} + $Global::start_no_new_jobs = 1; + if(not $Global::JobQueue->empty()) { + if($opt::tee) { + ::error("--tee requires --jobs to be higher. Try --jobs 0."); + } elsif($opt::bin) { + ::error("--bin requires --jobs to be higher than the number of", + "arguments. Increase --jobs."); + } elsif($opt::shard) { + ::error("--shard requires --jobs to be higher than the number of", + "arguments. Increase --jobs."); + } else { + ::die_bug("--bin/--shard/--tee should not get here"); + } + ::wait_and_exit(255); + } + } elsif($opt::pipe and not $opt::pipepart and not $opt::semaphore) { + # Fill all jobslots + while(start_more_jobs()) {} + spreadstdin(); + } else { + # Reap the finished jobs and start more + while(reapers() + start_more_jobs()) {} + } + ::debug("init", "Start draining\n"); + drain_job_queue(@command); + ::debug("init", "Done draining\n"); + reapers(); + ::debug("init", "Done reaping\n"); + if($Global::semaphore) { $sem->release(); } + cleanup(); + ::debug("init", "Halt\n"); + halt(); +} + +main(); diff --git a/src/parallel.pod b/src/parallel.pod new file mode 100644 index 0000000..4101e6a --- /dev/null +++ b/src/parallel.pod @@ -0,0 +1,4520 @@ +#!/usr/bin/perl -w + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=encoding utf8 + +=head1 NAME + +parallel - build and execute shell command lines from standard input +in parallel + + +=head1 SYNOPSIS + +B<parallel> [options] [I<command> [arguments]] < list_of_arguments + +B<parallel> [options] [I<command> [arguments]] ( B<:::> arguments | +B<:::+> arguments | B<::::> argfile(s) | B<::::+> argfile(s) ) ... + +B<parallel> --semaphore [options] I<command> + +B<#!/usr/bin/parallel> --shebang [options] [I<command> [arguments]] + +B<#!/usr/bin/parallel> --shebang-wrap [options] [I<command> +[arguments]] + + +=head1 DESCRIPTION + +STOP! + +Read the B<Reader's guide> below if you are new to GNU B<parallel>. + +GNU B<parallel> is a shell tool for executing jobs in parallel using +one or more computers. A job can be a single command or a small script +that has to be run for each of the lines in the input. The typical +input is a list of files, a list of hosts, a list of users, a list of +URLs, or a list of tables. A job can also be a command that reads from +a pipe. GNU B<parallel> can then split the input into blocks and pipe +a block into each command in parallel. + +If you use xargs and tee today you will find GNU B<parallel> very easy +to use as GNU B<parallel> is written to have the same options as +xargs. If you write loops in shell, you will find GNU B<parallel> may +be able to replace most of the loops and make them run faster by +running several jobs in parallel. + +GNU B<parallel> makes sure output from the commands is the same output +as you would get had you run the commands sequentially. This makes it +possible to use output from GNU B<parallel> as input for other +programs. + +For each line of input GNU B<parallel> will execute I<command> with +the line as arguments. If no I<command> is given, the line of input is +executed. Several lines will be run in parallel. GNU B<parallel> can +often be used as a substitute for B<xargs> or B<cat | bash>. + + +=head2 Reader's guide + +GNU B<parallel> includes the 4 types of documentation: Tutorial, +how-to, reference and explanation. + + +=head3 Tutorial + +If you prefer reading a book buy B<GNU Parallel 2018> at +https://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html +or download it at: https://doi.org/10.5281/zenodo.1146014 Read at +least chapter 1+2. It should take you less than 20 minutes. + +Otherwise start by watching the intro videos for a quick introduction: +https://youtube.com/playlist?list=PL284C9FF2488BC6D1 + +If you want to dive deeper: spend a couple of hours walking through +the tutorial (B<man parallel_tutorial>). Your command line will love +you for it. + + +=head3 How-to + +You can find a lot of examples of use in B<man +parallel_examples>. They will give you an idea of what GNU B<parallel> +is capable of, and you may find a solution you can simply adapt to +your situation. + + +=head3 Reference + +If you need a one page printable cheat sheet you can find it on: +https://www.gnu.org/software/parallel/parallel_cheat.pdf + +The man page is the reference for all options. + + +=head3 Design discussion + +If you want to know the design decisions behind GNU B<parallel>, try: +B<man parallel_design>. This is also a good intro if you intend to +change GNU B<parallel>. + + + +=head1 OPTIONS + +=over 4 + +=item I<command> + +Command to execute. + +If I<command> or the following arguments contain +replacement strings (such as B<{}>) every instance will be substituted +with the input. + +If I<command> is given, GNU B<parallel> solve the same tasks as +B<xargs>. If I<command> is not given GNU B<parallel> will behave +similar to B<cat | sh>. + +The I<command> must be an executable, a script, a composed command, an +alias, or a function. + +B<Bash functions>: B<export -f> the function first or use B<env_parallel>. + +B<Bash, Csh, or Tcsh aliases>: Use B<env_parallel>. + +B<Zsh, Fish, Ksh, and Pdksh functions and aliases>: Use B<env_parallel>. + +=item B<{}> + +Input line. + +This replacement string will be replaced by a full line read from the +input source. The input source is normally stdin (standard input), but +can also be given with B<--arg-file>, B<:::>, or B<::::>. + +The replacement string B<{}> can be changed with B<-I>. + +If the command line contains no replacement strings then B<{}> will be +appended to the command line. + +Replacement strings are normally quoted, so special characters are not +parsed by the shell. The exception is if the command starts with a +replacement string; then the string is not quoted. + +See also: B<--plus> B<{.}> B<{/}> B<{//}> B<{/.}> B<{#}> B<{%}> +B<{>I<n>B<}> B<{=>I<perl expression>B<=}> + + +=item B<{.}> + +Input line without extension. + +This replacement string will be replaced by the input with the +extension removed. If the input line contains B<.> after the last +B</>, the last B<.> until the end of the string will be removed and +B<{.}> will be replaced with the remaining. E.g. I<foo.jpg> becomes +I<foo>, I<subdir/foo.jpg> becomes I<subdir/foo>, +I<sub.dir/foo.jpg> becomes I<sub.dir/foo>, I<sub.dir/bar> remains +I<sub.dir/bar>. If the input line does not contain B<.> it will remain +unchanged. + +The replacement string B<{.}> can be changed with B<--extensionreplace> + +See also: B<{}> B<--extensionreplace> + + +=item B<{/}> + +Basename of input line. + +This replacement string will be replaced by the input with the +directory part removed. + +See also: B<{}> B<--basenamereplace> + + +=item B<{//}> + +Dirname of input line. + +This replacement string will be replaced by the dir of the input +line. See B<dirname>(1). + +See also: B<{}> B<--dirnamereplace> + + +=item B<{/.}> + +Basename of input line without extension. + +This replacement string will be replaced by the input with the +directory and extension part removed. B<{/.}> is a combination of +B<{/}> and B<{.}>. + +See also: B<{}> B<--basenameextensionreplace> + + +=item B<{#}> + +Sequence number of the job to run. + +This replacement string will be replaced by the sequence number of the +job being run. It contains the same number as $PARALLEL_SEQ. + +See also: B<{}> B<--seqreplace> + + +=item B<{%}> + +Job slot number. + +This replacement string will be replaced by the job's slot number +between 1 and number of jobs to run in parallel. There will never be 2 +jobs running at the same time with the same job slot number. + +If the job needs to be retried (e.g using B<--retries> or +B<--retry-failed>) the job slot is not automatically updated. You +should then instead use B<$PARALLEL_JOBSLOT>: + + $ do_test() { + id="$3 {%}=$1 PARALLEL_JOBSLOT=$2" + echo run "$id"; + sleep 1 + # fail if {%} is odd + return `echo $1%2 | bc` + } + $ export -f do_test + $ parallel -j3 --jl mylog do_test {%} \$PARALLEL_JOBSLOT {} ::: A B C D + run A {%}=1 PARALLEL_JOBSLOT=1 + run B {%}=2 PARALLEL_JOBSLOT=2 + run C {%}=3 PARALLEL_JOBSLOT=3 + run D {%}=1 PARALLEL_JOBSLOT=1 + $ parallel --retry-failed -j3 --jl mylog do_test {%} \$PARALLEL_JOBSLOT {} ::: A B C D + run A {%}=1 PARALLEL_JOBSLOT=1 + run C {%}=3 PARALLEL_JOBSLOT=2 + run D {%}=1 PARALLEL_JOBSLOT=3 + +Notice how {%} and $PARALLEL_JOBSLOT differ in the retry run of C and D. + +See also: B<{}> B<--jobs> B<--slotreplace> + + +=item B<{>I<n>B<}> + +Argument from input source I<n> or the I<n>'th argument. + +This positional replacement string will be replaced by the input from +input source I<n> (when used with B<--arg-file> or B<::::>) or with the +I<n>'th argument (when used with B<-N>). If I<n> is negative it refers +to the I<n>'th last argument. + +See also: B<{}> B<{>I<n>.B<}> B<{>I<n>/B<}> B<{>I<n>//B<}> +B<{>I<n>/.B<}> + + +=item B<{>I<n>.B<}> + +Argument from input source I<n> or the I<n>'th argument without +extension. + +B<{>I<n>.B<}> is a combination of B<{>I<n>B<}> and B<{.}>. + +This positional replacement string will be replaced by the input from +input source I<n> (when used with B<--arg-file> or B<::::>) or with the +I<n>'th argument (when used with B<-N>). The input will have the +extension removed. + +See also: B<{>I<n>B<}> B<{.}> + + +=item B<{>I<n>/B<}> + +Basename of argument from input source I<n> or the I<n>'th argument. + +B<{>I<n>/B<}> is a combination of B<{>I<n>B<}> and B<{/}>. + +This positional replacement string will be replaced by the input from +input source I<n> (when used with B<--arg-file> or B<::::>) or with the +I<n>'th argument (when used with B<-N>). The input will have the +directory (if any) removed. + +See also: B<{>I<n>B<}> B<{/}> + + +=item B<{>I<n>//B<}> + +Dirname of argument from input source I<n> or the I<n>'th argument. + +B<{>I<n>//B<}> is a combination of B<{>I<n>B<}> and B<{//}>. + +This positional replacement string will be replaced by the dir of the +input from input source I<n> (when used with B<--arg-file> or B<::::>) or with +the I<n>'th argument (when used with B<-N>). See B<dirname>(1). + +See also: B<{>I<n>B<}> B<{//}> + + +=item B<{>I<n>/.B<}> + +Basename of argument from input source I<n> or the I<n>'th argument +without extension. + +B<{>I<n>/.B<}> is a combination of B<{>I<n>B<}>, B<{/}>, and +B<{.}>. + +This positional replacement string will be replaced by the input from +input source I<n> (when used with B<--arg-file> or B<::::>) or with the +I<n>'th argument (when used with B<-N>). The input will have the +directory (if any) and extension removed. + +See also: B<{>I<n>B<}> B<{/.}> + + +=item B<{=>I<perl expression>B<=}> + +Replace with calculated I<perl expression>. + +B<$_> will contain the same as B<{}>. After evaluating I<perl +expression> B<$_> will be used as the value. It is recommended to only +change $_ but you have full access to all of GNU B<parallel>'s +internal functions and data structures. + +The expression must give the same result if evaluated twice - +otherwise the behaviour is undefined. E.g. this will not work as expected: + + parallel echo '{= $_= ++$wrong_counter =}' ::: a b c + +A few convenience functions and data structures have been made: + +=over 15 + +=item Z<> B<Q(>I<string>B<)> + +shell quote a string + +=item Z<> B<pQ(>I<string>B<)> + +perl quote a string + +=item Z<> B<uq()> (or B<uq>) + +do not quote current replacement string + +=item Z<> B<hash(val)> + +compute B::hash(val) + +=item Z<> B<total_jobs()> + +number of jobs in total + +=item Z<> B<slot()> + +slot number of job + +=item Z<> B<seq()> + +sequence number of job + +=item Z<> B<@arg> + +the arguments + +=item Z<> B<skip()> + +skip this job (see also B<--filter>) + +=item Z<> B<yyyy_mm_dd_hh_mm_ss()> + +=item Z<> B<yyyy_mm_dd_hh_mm()> + +=item Z<> B<yyyy_mm_dd()> + +=item Z<> B<hh_mm_ss()> + +=item Z<> B<hh_mm()> + +=item Z<> B<yyyymmddhhmmss()> + +=item Z<> B<yyyymmddhhmm()> + +=item Z<> B<yyyymmdd()> + +=item Z<> B<hhmmss()> + +=item Z<> B<hhmm()> + +time functions + +=back + +Example: + + seq 10 | parallel echo {} + 1 is {= '$_++' =} + parallel csh -c {= '$_="mkdir ".Q($_)' =} ::: '12" dir' + seq 50 | parallel echo job {#} of {= '$_=total_jobs()' =} + +See also: B<--rpl> B<--parens> B<{}> B<{=>I<n> I<perl expression>B<=}> + + +=item B<{=>I<n> I<perl expression>B<=}> + +Positional equivalent to B<{=>I<perl expression>B<=}>. + +To understand positional replacement strings see B<{>I<n>B<}>. + +See also: B<{=>I<perl expression>B<=}> B<{>I<n>B<}> + + +=item B<:::> I<arguments> + +Use arguments on the command line as input source. + +Unlike other options for GNU B<parallel> B<:::> is placed after the +I<command> and before the arguments. + +The following are equivalent: + + (echo file1; echo file2) | parallel gzip + parallel gzip ::: file1 file2 + parallel gzip {} ::: file1 file2 + parallel --arg-sep ,, gzip {} ,, file1 file2 + parallel --arg-sep ,, gzip ,, file1 file2 + parallel ::: "gzip file1" "gzip file2" + +To avoid treating B<:::> as special use B<--arg-sep> to set the +argument separator to something else. + +If multiple B<:::> are given, each group will be treated as an input +source, and all combinations of input sources will be +generated. E.g. ::: 1 2 ::: a b c will result in the combinations +(1,a) (1,b) (1,c) (2,a) (2,b) (2,c). This is useful for replacing +nested for-loops. + +B<:::>, B<::::>, and B<--arg-file> can be mixed. So these are equivalent: + + parallel echo {1} {2} {3} ::: 6 7 ::: 4 5 ::: 1 2 3 + parallel echo {1} {2} {3} :::: <(seq 6 7) <(seq 4 5) \ + :::: <(seq 1 3) + parallel -a <(seq 6 7) echo {1} {2} {3} :::: <(seq 4 5) \ + :::: <(seq 1 3) + parallel -a <(seq 6 7) -a <(seq 4 5) echo {1} {2} {3} \ + ::: 1 2 3 + seq 6 7 | parallel -a - -a <(seq 4 5) echo {1} {2} {3} \ + ::: 1 2 3 + seq 4 5 | parallel echo {1} {2} {3} :::: <(seq 6 7) - \ + ::: 1 2 3 + +See also: B<--arg-sep> B<--arg-file> B<::::> B<:::+> B<::::+> B<--link> + + +=item B<:::+> I<arguments> + +Like B<:::> but linked like B<--link> to the previous input source. + +Contrary to B<--link>, values do not wrap: The shortest input source +determines the length. + +Example: + + parallel echo ::: a b c :::+ 1 2 3 ::: X Y :::+ 11 22 + +See also: B<::::+> B<--link> + + +=item B<::::> I<argfiles> + +Another way to write B<--arg-file> I<argfile1> B<--arg-file> I<argfile2> ... + +B<:::> and B<::::> can be mixed. + +See also: B<--arg-file> B<:::> B<::::+> B<--link> + + +=item B<::::+> I<argfiles> + +Like B<::::> but linked like B<--link> to the previous input source. + +Contrary to B<--link>, values do not wrap: The shortest input source +determines the length. + +See also: B<--arg-file> B<:::+> B<--link> + + +=item B<--null> + +=item B<-0> + +Use NUL as delimiter. + +Normally input lines will end in \n (newline). If they end in \0 +(NUL), then use this option. It is useful for processing arguments +that may contain \n (newline). + +Shorthand for B<--delimiter '\0'>. + +See also: B<--delimiter> + + +=item B<--arg-file> I<input-file> + +=item B<-a> I<input-file> + +Use I<input-file> as input source. + +If you use this option, stdin (standard input) is given to the first +process run. Otherwise, stdin (standard input) is redirected from +/dev/null. + +If multiple B<--arg-file> are given, each I<input-file> will be treated as an +input source, and all combinations of input sources will be +generated. E.g. The file B<foo> contains B<1 2>, the file +B<bar> contains B<a b c>. B<-a foo> B<-a bar> will result in the combinations +(1,a) (1,b) (1,c) (2,a) (2,b) (2,c). This is useful for replacing +nested for-loops. + +See also: B<--link> B<{>I<n>B<}> B<::::> B<::::+> B<:::> + + +=item B<--arg-file-sep> I<sep-str> + +Use I<sep-str> instead of B<::::> as separator string between command +and argument files. + +Useful if B<::::> is used for something else by the command. + +See also: B<::::> + + +=item B<--arg-sep> I<sep-str> + +Use I<sep-str> instead of B<:::> as separator string. + +Useful if B<:::> is used for something else by the command. + +Also useful if you command uses B<:::> but you still want to read +arguments from stdin (standard input): Simply change B<--arg-sep> to a +string that is not in the command line. + +See also: B<:::> + + +=item B<--bar> (alpha testing) + +Show progress as a progress bar. + +In the bar is shown: % of jobs completed, estimated seconds left, and +number of jobs started. + +It is compatible with B<zenity>: + + seq 1000 | parallel -j30 --bar '(echo {};sleep 0.1)' \ + 2> >(perl -pe 'BEGIN{$/="\r";$|=1};s/\r/\n/g' | + zenity --progress --auto-kill) | wc + +See also: B<--eta> B<--progress> B<--total-jobs> + + +=item B<--basefile> I<file> + +=item B<--bf> I<file> + +I<file> will be transferred to each sshlogin before first job is +started. + +It will be removed if B<--cleanup> is active. The file may be a script +to run or some common base data needed for the job. Multiple +B<--bf> can be specified to transfer more basefiles. The I<file> will be +transferred the same way as B<--transferfile>. + +See also: B<--sshlogin> B<--transfer> B<--return> B<--cleanup> +B<--workdir> + +=item B<--basenamereplace> I<replace-str> + +=item B<--bnr> I<replace-str> + +Use the replacement string I<replace-str> instead of B<{/}> for +basename of input line. + +See also: B<{/}> + + +=item B<--basenameextensionreplace> I<replace-str> + +=item B<--bner> I<replace-str> + +Use the replacement string I<replace-str> instead of B<{/.}> for basename of input line without extension. + +See also: B<{/.}> + + +=item B<--bin> I<binexpr> + +Use I<binexpr> as binning key and bin input to the jobs. + +I<binexpr> is [column number|column name] [perlexpression] e.g.: + + 3 + Address + 3 $_%=100 + Address s/\D//g + +Each input line is split using B<--colsep>. The value of the column is +put into $_, the perl expression is executed, the resulting value is +is the job slot that will be given the line. If the value is bigger +than the number of jobslots the value will be modulo number of jobslots. + +This is similar to B<--shard> but the hashing algorithm is a simple +modulo, which makes it predictible which jobslot will receive which +value. + +The performance is in the order of 100K rows per second. Faster if the +I<bincol> is small (<10), slower if it is big (>100). + +B<--bin> requires B<--pipe> and a fixed numeric value for B<--jobs>. + +See also: SPREADING BLOCKS OF DATA B<--group-by> B<--round-robin> +B<--shard> + + +=item B<--bg> + +Run command in background. + +GNU B<parallel> will normally wait for the completion of a job. With +B<--bg> GNU B<parallel> will not wait for completion of the command +before exiting. + +This is the default if B<--semaphore> is set. + +Implies B<--semaphore>. + +See also: B<--fg> B<man sem> + + +=cut + +# You accept to be added to a public hall of shame by +# removing this section. +=item B<--bibtex> + +=item B<--citation> + +Print the citation notice and BibTeX entry for GNU B<parallel>, +silence citation notice for all future runs, and exit. It will not run +any commands. + +If it is impossible for you to run B<--citation> you can instead use +B<--will-cite>, which will run commands, but which will only silence +the citation notice for this single run. + +If you use B<--will-cite> in scripts to be run by others you are +making it harder for others to see the citation notice. The +development of GNU B<parallel> is indirectly financed through +citations, so if your users do not know they should cite then you are +making it harder to finance development. However, if you pay 10000 +EUR, you have done your part to finance future development and should +feel free to use B<--will-cite> in scripts. + +If you do not want to help financing future development by letting +other users see the citation notice or by paying, then please consider +using another tool instead of GNU B<parallel>. You can find some of +the alternatives in B<man parallel_alternatives>. + + +=item B<--block> I<size> + +=item B<--block-size> I<size> + +Size of block in bytes to read at a time. + +The I<size> can be postfixed with K, M, G, T, P, k, m, g, t, or p. + +GNU B<parallel> tries to meet the block size but can be off by the +length of one record. For performance reasons I<size> should be bigger +than a two records. GNU B<parallel> will warn you and automatically +increase the size if you choose a I<size> that is too small. + +If you use B<-N>, B<--block> should be bigger than N+1 records. + +I<size> defaults to 1M. + +When using B<--pipe-part> a negative block size is not interpreted as a +blocksize but as the number of blocks each jobslot should have. So +this will run 10*5 = 50 jobs in total: + + parallel --pipe-part -a myfile --block -10 -j5 wc + +This is an efficient alternative to B<--round-robin> because data is +never read by GNU B<parallel>, but you can still have very few +jobslots process large amounts of data. + +See also: UNIT PREFIX B<-N> B<--pipe> B<--pipe-part> B<--round-robin> +B<--block-timeout> + +=item B<--block-timeout> I<duration> + +=item B<--bt> I<duration> + +Timeout for reading block when using B<--pipe>. + +If it takes longer than I<duration> to read a full block, use the +partial block read so far. + +I<duration> is in seconds, but can be postfixed with s, m, h, or d. + +See also: TIME POSTFIXES B<--pipe> B<--block> + + +=item B<--cat> + +Create a temporary file with content. + +Normally B<--pipe>/B<--pipe-part> will give data to the program on +stdin (standard input). With B<--cat> GNU B<parallel> will create a +temporary file with the name in B<{}>, so you can do: B<parallel +--pipe --cat wc {}>. + +Implies B<--pipe> unless B<--pipe-part> is used. + +See also: B<--pipe> B<--pipe-part> B<--fifo> + + +=item B<--cleanup> + +Remove transferred files. + +B<--cleanup> will remove the transferred files on the remote computer +after processing is done. + + find log -name '*gz' | parallel \ + --sshlogin server.example.com --transferfile {} \ + --return {.}.bz2 --cleanup "zcat {} | bzip -9 >{.}.bz2" + +With B<--transferfile {}> the file transferred to the remote computer +will be removed on the remote computer. Directories on the remote +computer containing the file will be removed if they are empty. + +With B<--return> the file transferred from the remote computer will be +removed on the remote computer. Directories on the remote +computer containing the file will be removed if they are empty. + +B<--cleanup> is ignored when not used with B<--basefile>, +B<--transfer>, B<--transferfile> or B<--return>. + +See also: B<--basefile> B<--transfer> B<--transferfile> B<--sshlogin> +B<--return> + + +=item B<--color> (beta testing) + +Colour output. + +Colour the output. Each job gets its own colour combination +(background+foreground). + +B<--color> is ignored when using B<-u>. + +See also: B<--color-failed> + + +=item B<--color-failed> (beta testing) + +=item B<--cf> (beta testing) + +Colour the output from failing jobs white on red. + +Useful if you have a lot of jobs and want to focus on the failing +jobs. + +B<--color-failed> is ignored when using B<-u>, B<--line-buffer> and +unreliable when using B<--latest-line>. + +See also: B<--color> + + +=item B<--colsep> I<regexp> + +=item B<-C> I<regexp> + +Column separator. + +The input will be treated as a table with I<regexp> separating the +columns. The n'th column can be accessed using B<{>I<n>B<}> or +B<{>I<n>.B<}>. E.g. B<{3}> is the 3rd column. + +If there are more input sources, each input source will be separated, +but the columns from each input source will be linked. + + parallel --colsep '-' echo {4} {3} {2} {1} \ + ::: A-B C-D ::: e-f g-h + +B<--colsep> implies B<--trim rl>, which can be overridden with +B<--trim n>. + +I<regexp> is a Perl Regular Expression: +https://perldoc.perl.org/perlre.html + +See also: B<--csv> B<{>I<n>B<}> B<--trim> B<--link> + + +=item B<--compress> + +Compress temporary files. + +If the output is big and very compressible this will take up less disk +space in $TMPDIR and possibly be faster due to less disk I/O. + +GNU B<parallel> will try B<pzstd>, B<lbzip2>, B<pbzip2>, B<zstd>, +B<pigz>, B<lz4>, B<lzop>, B<plzip>, B<lzip>, B<lrz>, B<gzip>, B<pxz>, +B<lzma>, B<bzip2>, B<xz>, B<clzip>, in that order, and use the first +available. + +GNU B<parallel> will use up to 8 processes per job waiting to be +printed. See B<man parallel_design> for details. + +See also: B<--compress-program> + + +=item B<--compress-program> I<prg> + +=item B<--decompress-program> I<prg> + +Use I<prg> for (de)compressing temporary files. + +It is assumed that I<prg -dc> will decompress stdin (standard input) +to stdout (standard output) unless B<--decompress-program> is given. + +See also: B<--compress> + + +=item B<--csv> (alpha testing) + +Treat input as CSV-format. + +B<--colsep> sets the field delimiter. It works very much like +B<--colsep> except it deals correctly with quoting. Compare: + + echo '"1 big, 2 small","2""x4"" plank",12.34' | + parallel --csv echo {1} of {2} at {3} + + echo '"1 big, 2 small","2""x4"" plank",12.34' | + parallel --colsep ',' echo {1} of {2} at {3} + +Even quoted newlines are parsed correctly: + + (echo '"Start of field 1 with newline' + echo 'Line 2 in field 1";value 2') | + parallel --csv --colsep ';' echo Field 1: {1} Field 2: {2} + +When used with B<--pipe> only pass full CSV-records. + +See also: B<--pipe> B<--link> B<{>I<n>B<}> B<--colsep> B<--header> + + +=item B<--ctag> (obsolete: use B<--color> B<--tag>) + +Color tag. + +If the values look very similar looking at the output it can be hard +to tell when a new value is used. B<--ctag> gives each value a random +color. + +See also: B<--color> B<--tag> + + +=item B<--ctagstring> I<str> (obsolete: use B<--color> B<--tagstring>) + +Color tagstring. + +See also: B<--color> B<--ctag> B<--tagstring> + + +=item B<--delay> I<duration> + +Delay starting next job by I<duration>. + +GNU B<parallel> will not start another job for the next I<duration>. + +I<duration> is in seconds, but can be postfixed with s, m, h, or d. + +If you append 'auto' to I<duration> (e.g. 13m3sauto) GNU B<parallel> +will automatically try to find the optimal value: If a job fails, +I<duration> is increased by 30%. If a job succeeds, I<duration> is +decreased by 10%. + +See also: TIME POSTFIXES B<--retries> B<--ssh-delay> + + +=item B<--delimiter> I<delim> + +=item B<-d> I<delim> + +Input items are terminated by I<delim>. + +The specified delimiter may be characters, C-style character escapes +such as \n, or octal or hexadecimal escape codes. Octal and +hexadecimal escape codes are understood as for the printf command. + +See also: B<--colsep> + + +=item B<--dirnamereplace> I<replace-str> + +=item B<--dnr> I<replace-str> + +Use the replacement string I<replace-str> instead of B<{//}> for +dirname of input line. + +See also: B<{//}> + + +=item B<--dry-run> + +Print the job to run on stdout (standard output), but do not run the +job. + +Use B<-v -v> to include the wrapping that GNU B<parallel> generates +(for remote jobs, B<--tmux>, B<--nice>, B<--pipe>, B<--pipe-part>, +B<--fifo> and B<--cat>). Do not count on this literally, though, as +the job may be scheduled on another computer or the local computer if +: is in the list. + +See also: B<-v> + + +=item B<-E> I<eof-str> + +Set the end of file string to I<eof-str>. + +If the end of file string occurs as a line of input, the rest of the +input is not read. If neither B<-E> nor B<-e> is used, no end of file +string is used. + + +=item B<--eof>[=I<eof-str>] + +=item B<-e>[I<eof-str>] + +This option is a synonym for the B<-E> option. + +Use B<-E> instead, because it is POSIX compliant for B<xargs> while +this option is not. If I<eof-str> is omitted, there is no end of file +string. If neither B<-E> nor B<-e> is used, no end of file string is +used. + + +=item B<--embed> + +Embed GNU B<parallel> in a shell script. + +If you need to distribute your script to someone who does not want to +install GNU B<parallel> you can embed GNU B<parallel> in your own +shell script: + + parallel --embed > new_script + +After which you add your code at the end of B<new_script>. This is tested +on B<ash>, B<bash>, B<dash>, B<ksh>, B<sh>, and B<zsh>. + + +=item B<--env> I<var> + +Copy exported environment variable I<var>. + +This will copy I<var> to the environment that the command is run +in. This is especially useful for remote execution. + +In Bash I<var> can also be a Bash function - just remember to B<export +-f> the function. + +The variable '_' is special. It will copy all exported environment +variables except for the ones mentioned in ~/.parallel/ignored_vars. + +To copy the full environment (both exported and not exported +variables, arrays, and functions) use B<env_parallel>. + +See also: B<--record-env> B<--session> B<--sshlogin> I<command> +B<env_parallel> + + +=item B<--eta> + +Show the estimated number of seconds before finishing. + +This forces GNU B<parallel> to read all jobs before starting to find +the number of jobs (unless you use B<--total-jobs>). GNU B<parallel> +normally only reads the next job to run. + +The estimate is based on the runtime of finished jobs, so the first +estimate will only be shown when the first job has finished. + +Implies B<--progress>. + +See also: B<--bar> B<--progress> B<--total-jobs> + + +=item B<--fg> + +Run command in foreground. + +With B<--tmux> and B<--tmuxpane> GNU B<parallel> will start B<tmux> in +the foreground. + +With B<--semaphore> GNU B<parallel> will run the command in the +foreground (opposite B<--bg>), and wait for completion of the command +before exiting. Exit code will be that of the command. + +See also: B<--bg> B<man sem> + + +=item B<--fifo> + +Create a temporary fifo with content. + +Normally B<--pipe> and B<--pipe-part> will give data to the program on +stdin (standard input). With B<--fifo> GNU B<parallel> will create a +temporary fifo with the name in B<{}>, so you can do: + + parallel --pipe --fifo wc {} + +Beware: If the fifo is never opened for reading, the job will block forever: + + seq 1000000 | parallel --fifo echo This will block + seq 1000000 | parallel --fifo 'echo This will not block < {}' + +By using B<--fifo> instead of B<--cat> you may save I/O as B<--cat> +will write to a temporary file, whereas B<--fifo> will not. + +Implies B<--pipe> unless B<--pipe-part> is used. + +See also: B<--cat> B<--pipe> B<--pipe-part> + + +=item B<--filter> I<filter> + +Only run jobs where I<filter> is true. + +I<filter> can contain replacement strings and Perl code. Example: + + parallel --filter '{1} < {2}+1' echo ::: {1..3} ::: {1..3} + +Outputs: 1,1 1,2 1,3 2,2 2,3 3,3 + +See also: B<skip()> B<--no-run-if-empty> + + +=item B<--filter-hosts> (alpha testing) + +Remove down hosts. + +For each remote host: check that login through ssh works. If not: do +not use this host. + +For performance reasons, this check is performed only at the start and +every time B<--sshloginfile> is changed. If an host goes down after +the first check, it will go undetected until B<--sshloginfile> is +changed; B<--retries> can be used to mitigate this. + +Currently you can I<not> put B<--filter-hosts> in a profile, +$PARALLEL, /etc/parallel/config or similar. This is because GNU +B<parallel> uses GNU B<parallel> to compute this, so you will get an +infinite loop. This will likely be fixed in a later release. + +See also: B<--sshloginfile> B<--sshlogin> B<--retries> + + +=item B<--gnu> + +Behave like GNU B<parallel>. + +This option historically took precedence over B<--tollef>. The +B<--tollef> option is now retired, and therefore may not be +used. B<--gnu> is kept for compatibility. + + +=item B<--group> + +Group output. + +Output from each job is grouped together and is only printed when the +command is finished. Stdout (standard output) first followed by stderr +(standard error). + +This takes in the order of 0.5ms CPU time per job and depends on the +speed of your disk for larger output. It can be disabled with B<-u>, +but this means output from different commands can get mixed. + +B<--group> is the default. Can be reversed with B<-u>. + +See also: B<--line-buffer> B<--ungroup> B<--tag> + + +=item B<--group-by> I<val> + +Group input by value. + +Combined with B<--pipe>/B<--pipe-part> B<--group-by> groups lines with +the same value into a record. + +The value can be computed from the full line or from a single column. + +I<val> can be: + +=over 15 + +=item Z<> column number + +Use the value in the column numbered. + +=item Z<> column name + +Treat the first line as a header and use the value in the column +named. + +(Not supported with B<--pipe-part>). + +=item Z<> perl expression + +Run the perl expression and use $_ as the value. + +=item Z<> column number perl expression + +Put the value of the column put in $_, run the perl expression, and use $_ as the value. + +=item Z<> column name perl expression + +Put the value of the column put in $_, run the perl expression, and use $_ as the value. + +(Not supported with B<--pipe-part>). + +=back + +Example: + + UserID, Consumption + 123, 1 + 123, 2 + 12-3, 1 + 221, 3 + 221, 1 + 2/21, 5 + +If you want to group 123, 12-3, 221, and 2/21 into 4 records and pass +one record at a time to B<wc>: + + tail -n +2 table.csv | \ + parallel --pipe --colsep , --group-by 1 -kN1 wc + +Make GNU B<parallel> treat the first line as a header: + + cat table.csv | \ + parallel --pipe --colsep , --header : --group-by 1 -kN1 wc + +Address column by column name: + + cat table.csv | \ + parallel --pipe --colsep , --header : --group-by UserID -kN1 wc + +If 12-3 and 123 are really the same UserID, remove non-digits in +UserID when grouping: + + cat table.csv | parallel --pipe --colsep , --header : \ + --group-by 'UserID s/\D//g' -kN1 wc + +See also: SPREADING BLOCKS OF DATA B<--pipe> B<--pipe-part> B<--bin> +B<--shard> B<--round-robin> + + +=item B<--help> + +=item B<-h> + +Print a summary of the options to GNU B<parallel> and exit. + + +=item B<--halt-on-error> I<val> + +=item B<--halt> I<val> + +When should GNU B<parallel> terminate? + +In some situations it makes no sense to run all jobs. GNU +B<parallel> should simply stop as soon as a condition is met. + +I<val> defaults to B<never>, which runs all jobs no matter what. + +I<val> can also take on the form of I<when>,I<why>. + +I<when> can be 'now' which means kill all running jobs and halt +immediately, or it can be 'soon' which means wait for all running jobs +to complete, but start no new jobs. + +I<why> can be 'fail=X', 'fail=Y%', 'success=X', 'success=Y%', +'done=X', or 'done=Y%' where X is the number of jobs that has to fail, +succeed, or be done before halting, and Y is the percentage of jobs +that has to fail, succeed, or be done before halting. + +Example: + +=over 23 + +=item Z<> --halt now,fail=1 + +exit when a job has failed. Kill running jobs. + +=item Z<> --halt soon,fail=3 + +exit when 3 jobs have failed, but wait for running jobs to complete. + +=item Z<> --halt soon,fail=3% + +exit when 3% of the jobs have failed, but wait for running jobs to complete. + +=item Z<> --halt now,success=1 + +exit when a job has succeeded. Kill running jobs. + +=item Z<> --halt soon,success=3 + +exit when 3 jobs have succeeded, but wait for running jobs to complete. + +=item Z<> --halt now,success=3% + +exit when 3% of the jobs have succeeded. Kill running jobs. + +=item Z<> --halt now,done=1 + +exit when a job has finished. Kill running jobs. + +=item Z<> --halt soon,done=3 + +exit when 3 jobs have finished, but wait for running jobs to complete. + +=item Z<> --halt now,done=3% + +exit when 3% of the jobs have finished. Kill running jobs. + +=back + +For backwards compatibility these also work: + +=over 12 + +=item Z<>0 + +never + +=item Z<>1 + +soon,fail=1 + +=item Z<>2 + +now,fail=1 + +=item Z<>-1 + +soon,success=1 + +=item Z<>-2 + +now,success=1 + +=item Z<>1-99% + +soon,fail=1-99% + +=back + + +=item B<--header> I<regexp> + +Use regexp as header. + +For normal usage the matched header (typically the first line: +B<--header '.*\n'>) will be split using B<--colsep> (which will +default to '\t') and column names can be used as replacement +variables: B<{column name}>, B<{column name/}>, B<{column name//}>, +B<{column name/.}>, B<{column name.}>, B<{=column name perl expression +=}>, .. + +For B<--pipe> the matched header will be prepended to each output. + +B<--header :> is an alias for B<--header '.*\n'>. + +If I<regexp> is a number, it is a fixed number of lines. + +B<--header 0> is special: It will make replacement strings for files +given with B<--arg-file> or B<::::>. It will make B<{foo/bar}> for the +file B<foo/bar>. + +See also: B<--colsep> B<--pipe> B<--pipe-part> B<--arg-file> + + +=item B<--hostgroups> + +=item B<--hgrp> + +Enable hostgroups on arguments. + +If an argument contains '@' the string after '@' will be removed and +treated as a list of hostgroups on which this job is allowed to +run. If there is no B<--sshlogin> with a corresponding group, the job +will run on any hostgroup. + +Example: + + parallel --hostgroups \ + --sshlogin @grp1/myserver1 -S @grp1+grp2/myserver2 \ + --sshlogin @grp3/myserver3 \ + echo ::: my_grp1_arg@grp1 arg_for_grp2@grp2 third@grp1+grp3 + +B<my_grp1_arg> may be run on either B<myserver1> or B<myserver2>, +B<third> may be run on either B<myserver1> or B<myserver3>, +but B<arg_for_grp2> will only be run on B<myserver2>. + +See also: B<--sshlogin> B<$PARALLEL_HOSTGROUPS> B<$PARALLEL_ARGHOSTGROUPS> + + +=item B<-I> I<replace-str> + +Use the replacement string I<replace-str> instead of B<{}>. + +See also: B<{}> + + +=item B<--replace> [I<replace-str>] + +=item B<-i> [I<replace-str>] + +This option is deprecated; use B<-I> instead. + +This option is a synonym for B<-I>I<replace-str> if I<replace-str> is +specified, and for B<-I {}> otherwise. + +See also: B<{}> + + +=item B<--joblog> I<logfile> + +=item B<--jl> I<logfile> + +Logfile for executed jobs. + +Save a list of the executed jobs to I<logfile> in the following TAB +separated format: sequence number, sshlogin, start time as seconds +since epoch, run time in seconds, bytes in files transferred, bytes in +files returned, exit status, signal, and command run. + +For B<--pipe> bytes transferred and bytes returned are number of input +and output of bytes. + +If B<logfile> is prepended with '+' log lines will be appended to the +logfile. + +To convert the times into ISO-8601 strict do: + + cat logfile | perl -a -F"\t" -ne \ + 'chomp($F[2]=`date -d \@$F[2] +%FT%T`); print join("\t",@F)' + +If the host is long, you can use B<column -t> to pretty print it: + + cat joblog | column -t + +See also: B<--resume> B<--resume-failed> + + +=item B<--jobs> I<N> + +=item B<-j> I<N> + +=item B<--max-procs> I<N> + +=item B<-P> I<N> + +Number of jobslots on each machine. + +Run up to N jobs in parallel. 0 means as many as possible (this can +take a while to determine). Default is 100% which will run one job per +CPU thread on each machine. + +Due to a bug B<-j 0> will also evaluate replacement strings twice up +to the number of joblots: + + # This will not count from 1 but from number-of-jobslots + seq 10000 | parallel -j0 echo '{= $_ = $foo++; =}' | head + # This will count from 1 + seq 10000 | parallel -j100 echo '{= $_ = $foo++; =}' | head + +If B<--semaphore> is set, the default is 1 thus making a mutex. + +See also: B<--use-cores-instead-of-threads> +B<--use-sockets-instead-of-threads> + + + +=item B<--jobs> I<+N> + +=item B<-j> I<+N> + +=item B<--max-procs> I<+N> + +=item B<-P> I<+N> + +Add N to the number of CPU threads. + +Run this many jobs in parallel. + +See also: B<--number-of-threads> B<--number-of-cores> +B<--number-of-sockets> + + +=item B<--jobs> I<-N> + +=item B<-j> I<-N> + +=item B<--max-procs> I<-N> + +=item B<-P> I<-N> + +Subtract N from the number of CPU threads. + +Run this many jobs in parallel. If the evaluated number is less than +1 then 1 will be used. + +See also: B<--number-of-threads> B<--number-of-cores> +B<--number-of-sockets> + + +=item B<--jobs> I<N>% + +=item B<-j> I<N>% + +=item B<--max-procs> I<N>% + +=item B<-P> I<N>% + +Multiply N% with the number of CPU threads. + +Run this many jobs in parallel. + +See also: B<--number-of-threads> B<--number-of-cores> +B<--number-of-sockets> + + +=item B<--jobs> I<procfile> + +=item B<-j> I<procfile> + +=item B<--max-procs> I<procfile> + +=item B<-P> I<procfile> + +Read parameter from file. + +Use the content of I<procfile> as parameter for +I<-j>. E.g. I<procfile> could contain the string 100% or +2 or 10. If +I<procfile> is changed when a job completes, I<procfile> is read again +and the new number of jobs is computed. If the number is lower than +before, running jobs will be allowed to finish but new jobs will not +be started until the wanted number of jobs has been reached. This +makes it possible to change the number of simultaneous running jobs +while GNU B<parallel> is running. + + +=item B<--keep-order> + +=item B<-k> + +Keep sequence of output same as the order of input. + +Normally the output of a job will be printed as soon as the job +completes. Try this to see the difference: + + parallel -j4 sleep {}\; echo {} ::: 2 1 4 3 + parallel -j4 -k sleep {}\; echo {} ::: 2 1 4 3 + +If used with B<--onall> or B<--nonall> the output will grouped by +sshlogin in sorted order. + +B<--keep-order> cannot keep the output order when used with B<--pipe +--round-robin>. Here it instead means, that the jobslots will get the +same blocks as input in the same order in every run if the input is +kept the same. Run each of these twice and compare: + + seq 10000000 | parallel --pipe --round-robin 'sleep 0.$RANDOM; wc' + seq 10000000 | parallel --pipe -k --round-robin 'sleep 0.$RANDOM; wc' + +B<-k> only affects the order in which the output is printed - not the +order in which jobs are run. + +See also: B<--group> B<--line-buffer> + + +=item B<-L> I<recsize> + +When used with B<--pipe>: Read records of I<recsize>. + +When used otherwise: Use at most I<recsize> nonblank input lines per +command line. Trailing blanks cause an input line to be logically +continued on the next input line. + +B<-L 0> means read one line, but insert 0 arguments on the command +line. + +I<recsize> can be postfixed with K, M, G, T, P, k, m, g, t, or p. + +Implies B<-X> unless B<-m>, B<--xargs>, or B<--pipe> is set. + +See also: UNIT PREFIX B<-N> B<--max-lines> B<--block> B<-X> B<-m> +B<--xargs> B<--pipe> + + +=item B<--max-lines> [I<recsize>] + +=item B<-l>[I<recsize>] + +When used with B<--pipe>: Read records of I<recsize> lines. + +When used otherwise: Synonym for the B<-L> option. Unlike B<-L>, the +I<recsize> argument is optional. If I<recsize> is not specified, +it defaults to one. The B<-l> option is deprecated since the POSIX +standard specifies B<-L> instead. + +B<-l 0> is an alias for B<-l 1>. + +Implies B<-X> unless B<-m>, B<--xargs>, or B<--pipe> is set. + +See also: UNIT PREFIX B<-N> B<--block> B<-X> B<-m> +B<--xargs> B<--pipe> + + +=item B<--limit> "I<command> I<args>" + +Dynamic job limit. + +Before starting a new job run I<command> with I<args>. The exit value +of I<command> determines what GNU B<parallel> will do: + +=over 4 + +=item Z<>0 + +Below limit. Start another job. + +=item Z<>1 + +Over limit. Start no jobs. + +=item Z<>2 + +Way over limit. Kill the youngest job. + +=back + +You can use any shell command. There are 3 predefined commands: + +=over 10 + +=item "io I<n>" + +Limit for I/O. The amount of disk I/O will be computed as a value +0-100, where 0 is no I/O and 100 is at least one disk is 100% +saturated. + +=item "load I<n>" + +Similar to B<--load>. + +=item "mem I<n>" + +Similar to B<--memfree>. + +=back + +See also: B<--memfree> B<--load> + + +=item B<--latest-line> (alpha testing) + +=item B<--ll> (alpha testing) + +Print the lastest line. Each job gets a single line that is updated +with the lastest output from the job. + +Example: + + slow_seq() { + seq "$@" | + perl -ne '$|=1; for(split//){ print; select($a,$a,$a,0.03);}' + } + export -f slow_seq + parallel --shuf -j99 --ll --tag --bar --color slow_seq {} ::: {1..300} + +See also: B<--line-buffer> + + +=item B<--line-buffer> (beta testing) + +=item B<--lb> (beta testing) + +Buffer output on line basis. + +B<--group> will keep the output together for a whole job. B<--ungroup> +allows output to mixup with half a line coming from one job and half a +line coming from another job. B<--line-buffer> fits between these two: +GNU B<parallel> will print a full line, but will allow for mixing +lines of different jobs. + +B<--line-buffer> takes more CPU power than both B<--group> and +B<--ungroup>, but can be much faster than B<--group> if the CPU is not +the limiting factor. + +Normally B<--line-buffer> does not buffer on disk, and can thus +process an infinite amount of data, but it will buffer on disk when +combined with: B<--keep-order>, B<--results>, B<--compress>, and +B<--files>. This will make it as slow as B<--group> and will limit +output to the available disk space. + +With B<--keep-order> B<--line-buffer> will output lines from the first +job continuously while it is running, then lines from the second job +while that is running. It will buffer full lines, but jobs will not +mix. Compare: + + parallel -j0 'echo {};sleep {};echo {}' ::: 1 3 2 4 + parallel -j0 --lb 'echo {};sleep {};echo {}' ::: 1 3 2 4 + parallel -j0 -k --lb 'echo {};sleep {};echo {}' ::: 1 3 2 4 + +See also: B<--group> B<--ungroup> B<--keep-order> B<--tag> + + +=item B<--link> + +=item B<--xapply> + +Link input sources. + +Read multiple input sources like the command B<xapply>. If multiple +input sources are given, one argument will be read from each of the +input sources. The arguments can be accessed in the command as B<{1}> +.. B<{>I<n>B<}>, so B<{1}> will be a line from the first input source, +and B<{6}> will refer to the line with the same line number from the +6th input source. + +Compare these two: + + parallel echo {1} {2} ::: 1 2 3 ::: a b c + parallel --link echo {1} {2} ::: 1 2 3 ::: a b c + +Arguments will be recycled if one input source has more arguments than the others: + + parallel --link echo {1} {2} {3} \ + ::: 1 2 ::: I II III ::: a b c d e f g + +See also: B<--header> B<:::+> B<::::+> + + +=item B<--load> I<max-load> + +Only start jobs if load is less than max-load. + +Do not start new jobs on a given computer unless the number of running +processes on the computer is less than I<max-load>. I<max-load> uses +the same syntax as B<--jobs>, so I<100%> for one per CPU is a valid +setting. Only difference is 0 which is interpreted as 0.01. + +See also: B<--limit> B<--jobs> + + +=item B<--controlmaster> + +=item B<-M> + +Use ssh's ControlMaster to make ssh connections faster. + +Useful if jobs run remote and are very fast to run. This is disabled +for sshlogins that specify their own ssh command. + +See also: B<--ssh> B<--sshlogin> + + +=item B<-m> + +Multiple arguments. + +Insert as many arguments as the command line length permits. If +multiple jobs are being run in parallel: distribute the arguments +evenly among the jobs. Use B<-j1> or B<--xargs> to avoid this. + +If B<{}> is not used the arguments will be appended to the +line. If B<{}> is used multiple times each B<{}> will be replaced +with all the arguments. + +Support for B<-m> with B<--sshlogin> is limited and may fail. + +If in doubt use B<-X> as that will most likely do what is needed. + +See also: B<-X> B<--xargs> + + +=item B<--memfree> I<size> + +Minimum memory free when starting another job. + +The I<size> can be postfixed with K, M, G, T, P, k, m, g, t, or p. + +If the jobs take up very different amount of RAM, GNU B<parallel> will +only start as many as there is memory for. If less than I<size> bytes +are free, no more jobs will be started. If less than 50% I<size> bytes +are free, the youngest job will be killed (as per B<--term-seq>), and +put back on the queue to be run later. + +B<--retries> must be set to determine how many times GNU B<parallel> +should retry a given job. + +See also: UNIT PREFIX B<--term-seq> B<--retries> B<--memsuspend> + + +=item B<--memsuspend> I<size> + +Suspend jobs when there is less memory available. + +If the available memory falls below 2 * I<size>, GNU B<parallel> will +suspend some of the running jobs. If the available memory falls below +I<size>, only one job will be running. + +If a single job takes up at most I<size> RAM, all jobs will complete +without running out of memory. If you have swap available, you can +usually lower I<size> to around half the size of a single job - with +the slight risk of swapping a little. + +Jobs will be resumed when more RAM is available - typically when the +oldest job completes. + +B<--memsuspend> only works on local jobs because there is no obvious +way to suspend remote jobs. + +I<size> can be postfixed with K, M, G, T, P, k, m, g, t, or p. + +See also: UNIT PREFIX B<--memfree> + + +=item B<--minversion> I<version> + +Print the version GNU B<parallel> and exit. + +If the current version of GNU B<parallel> is less than I<version> the +exit code is 255. Otherwise it is 0. + +This is useful for scripts that depend on features only available from +a certain version of GNU B<parallel>: + + parallel --minversion 20170422 && + echo halt done=50% supported from version 20170422 && + parallel --halt now,done=50% echo ::: {1..100} + +See also: B<--version> + + +=item B<--max-args> I<max-args> + +=item B<-n> I<max-args> + +Use at most I<max-args> arguments per command line. + +Fewer than I<max-args> arguments will be used if the size (see the +B<-s> option) is exceeded, unless the B<-x> option is given, in which +case GNU B<parallel> will exit. + +B<-n 0> means read one argument, but insert 0 arguments on the command +line. + +I<max-args> can be postfixed with K, M, G, T, P, k, m, g, t, or p (see +UNIT PREFIX). + +Implies B<-X> unless B<-m> is set. + +See also: B<-X> B<-m> B<--xargs> B<--max-replace-args> + + +=item B<--max-replace-args> I<max-args> + +=item B<-N> I<max-args> + +Use at most I<max-args> arguments per command line. + +Like B<-n> but also makes replacement strings B<{1}> +.. B<{>I<max-args>B<}> that represents argument 1 .. I<max-args>. If +too few args the B<{>I<n>B<}> will be empty. + +B<-N 0> means read one argument, but insert 0 arguments on the command +line. + +This will set the owner of the homedir to the user: + + tr ':' '\n' < /etc/passwd | parallel -N7 chown {1} {6} + +Implies B<-X> unless B<-m> or B<--pipe> is set. + +I<max-args> can be postfixed with K, M, G, T, P, k, m, g, t, or p. + +When used with B<--pipe> B<-N> is the number of records to read. This +is somewhat slower than B<--block>. + +See also: UNIT PREFIX B<--pipe> B<--block> B<-m> B<-X> B<--max-args> + + +=item B<--nonall> + +B<--onall> with no arguments. + +Run the command on all computers given with B<--sshlogin> but take no +arguments. GNU B<parallel> will log into B<--jobs> number of computers +in parallel and run the job on the computer. B<-j> adjusts how many +computers to log into in parallel. + +This is useful for running the same command (e.g. uptime) on a list of +servers. + +See also: B<--onall> B<--sshlogin> + + +=item B<--onall> + +Run all the jobs on all computers given with B<--sshlogin>. + +GNU B<parallel> will log into B<--jobs> number of computers in +parallel and run one job at a time on the computer. The order of the +jobs will not be changed, but some computers may finish before others. + +When using B<--group> the output will be grouped by each server, so +all the output from one server will be grouped together. + +B<--joblog> will contain an entry for each job on each server, so +there will be several job sequence 1. + +See also: B<--nonall> B<--sshlogin> + + +=item B<--open-tty> + +=item B<-o> + +Open terminal tty. + +Similar to B<--tty> but does not set B<--jobs> or B<--ungroup>. + +See also: B<--tty> + + +=item B<--output-as-files> + +=item B<--outputasfiles> + +=item B<--files> + +Save output to files. + +Instead of printing the output to stdout (standard output) the output +of each job is saved in a file and the filename is then printed. + +See also: B<--results> + + +=item B<--pipe> + +=item B<--spreadstdin> + +Spread input to jobs on stdin (standard input). + +Read a block of data from stdin (standard input) and give one block of +data as input to one job. + +The block size is determined by B<--block> (default: 1M). The strings +B<--recstart> and B<--recend> tell GNU B<parallel> how a record starts +and/or ends. The block read will have the final partial record removed +before the block is passed on to the job. The partial record will be +prepended to next block. + +You can limit the number of records to be passed with B<-N>, and set +the record size with B<-L>. + +B<--pipe> maxes out at around 1 GB/s input, and 100 MB/s output. If +performance is important use B<--pipe-part>. + +B<--fifo> and B<--cat> will give stdin (standard input) on a fifo or a +temporary file. + +If data is arriving slowly, you can use B<--block-timeout> to finish +reading a block early. + +The data can be spread between the jobs in specific ways using +B<--round-robin>, B<--bin>, B<--shard>, B<--group-by>. See the +section: SPREADING BLOCKS OF DATA + +See also: B<--block> B<--block-timeout> B<--recstart> B<--recend> +B<--fifo> B<--cat> B<--pipe-part> B<-N> B<-L> B<--round-robin> + + +=item B<--pipe-part> + +Pipe parts of a physical file. + +B<--pipe-part> works similar to B<--pipe>, but is much faster. + +B<--pipe-part> has a few limitations: + +=over 3 + +=item * + +The file must be a normal file or a block device (technically it must +be seekable) and must be given using B<--arg-file> or B<::::>. The file cannot +be a pipe, a fifo, or a stream as they are not seekable. + +If using a block device with lot of NUL bytes, remember to set +B<--recend ''>. + +=item * + +Record counting (B<-N>) and line counting (B<-L>/B<-l>) do not +work. Instead use B<--recstart> and B<--recend> to determine +where records end. + +=back + +See also: B<--pipe> B<--recstart> B<--recend> B<--arg-file> B<::::> + + +=item B<--plain> + +Ignore B<--profile>, $PARALLEL, and ~/.parallel/config. + +Ignore any B<--profile>, $PARALLEL, and ~/.parallel/config to get full +control on the command line (used by GNU B<parallel> internally when +called with B<--sshlogin>). + +See also: B<--profile> + + +=item B<--plus> + +Add more replacement strings. + +Activate additional replacement strings: {+/} {+.} {+..} {+...} {..} +{...} {/..} {/...} {##}. The idea being that '{+foo}' matches the opposite of +'{foo}' and {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} = +{+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...} + +B<{##}> is the total number of jobs to be run. It is incompatible with +B<-X>/B<-m>/B<--xargs>. + +B<{0%}> zero-padded jobslot. + +B<{0#}> zero-padded sequence number. + +B<{choose_k}> is inspired by n choose k: Given a list of n elements, +choose k. k is the number of input sources and n is the number of +arguments in an input source. The content of the input sources must +be the same and the arguments must be unique. + +B<{uniq}> skips jobs where values from two input sources are the same. + +Shorthands for variables: + + {slot} $PARALLEL_JOBSLOT (see {%}) + {sshlogin} $PARALLEL_SSHLOGIN + {host} $PARALLEL_SSHHOST + {agrp} $PARALLEL_ARGHOSTGROUPS + {hgrp} $PARALLEL_HOSTGROUPS + +The following dynamic replacement strings are also activated. They are +inspired by bash's parameter expansion: + + {:-str} str if the value is empty + {:num} remove the first num characters + {:pos:len} substring from position pos length len + {#regexp} remove prefix regexp (non-greedy) + {##regexp} remove prefix regexp (greedy) + {%regexp} remove postfix regexp (non-greedy) + {%%regexp} remove postfix regexp (greedy) + {/regexp/str} replace one regexp with str + {//regexp/str} replace every regexp with str + {^str} uppercase str if found at the start + {^^str} uppercase str + {,str} lowercase str if found at the start + {,,str} lowercase str + +See also: B<--rpl> B<{}> + + +=item B<--process-slot-var> I<varname> + +Set the environment variable I<varname> to the jobslot number-1. + + seq 10 | parallel --process-slot-var=name echo '$name' {} + + +=item B<--progress> + +Show progress of computations. + +List the computers involved in the task with number of CPUs detected +and the max number of jobs to run. After that show progress for each +computer: number of running jobs, number of completed jobs, and +percentage of all jobs done by this computer. The percentage will only +be available after all jobs have been scheduled as GNU B<parallel> +only read the next job when ready to schedule it - this is to avoid +wasting time and memory by reading everything at startup. + +By sending GNU B<parallel> SIGUSR2 you can toggle turning on/off +B<--progress> on a running GNU B<parallel> process. + +See also: B<--eta> B<--bar> + + +=item B<--max-line-length-allowed> (alpha testing) + +Print maximal command line length. + +Print the maximal number of characters allowed on the command line and +exit (used by GNU B<parallel> itself to determine the line length +on remote computers). + +See also: B<--show-limits> + + +=item B<--number-of-cpus> (obsolete) + +Print the number of physical CPU cores and exit. + + +=item B<--number-of-cores> + +Print the number of physical CPU cores and exit (used by GNU B<parallel> itself +to determine the number of physical CPU cores on remote computers). + +See also: B<--number-of-sockets> B<--number-of-threads> +B<--use-cores-instead-of-threads> B<--jobs> + + +=item B<--number-of-sockets> + +Print the number of filled CPU sockets and exit (used by GNU +B<parallel> itself to determine the number of filled CPU sockets on +remote computers). + +See also: B<--number-of-cores> B<--number-of-threads> +B<--use-sockets-instead-of-threads> B<--jobs> + + +=item B<--number-of-threads> + +Print the number of hyperthreaded CPU cores and exit (used by GNU +B<parallel> itself to determine the number of hyperthreaded CPU cores +on remote computers). + +See also: B<--number-of-cores> B<--number-of-sockets> B<--jobs> + + +=item B<--no-keep-order> + +Overrides an earlier B<--keep-order> (e.g. if set in +B<~/.parallel/config>). + + +=item B<--nice> I<niceness> + +Run the command at this niceness. + +By default GNU B<parallel> will run jobs at the same nice level as GNU +B<parallel> is started - both on the local machine and remote servers, +so you are unlikely to ever use this option. + +Setting B<--nice> will override this nice level. If the nice level is +smaller than the current nice level, it will only affect remote jobs +(e.g. if current level is 10 then B<--nice 5> will cause local jobs to +be run at level 10, but remote jobs run at nice level 5). + + +=item B<--interactive> + +=item B<-p> + +Ask user before running a job. + +Prompt the user about whether to run each command line and read a line +from the terminal. Only run the command line if the response starts +with 'y' or 'Y'. Implies B<-t>. + + +=item B<--_parset> I<type>,I<varname> + +Used internally by B<parset>. + +Generate shell code to be eval'ed which will set the variable(s) +I<varname>. I<type> can be 'assoc' for associative array or 'var' for +normal variables. + +The only supported use is as part of B<parset>. + + +=item B<--parens> I<parensstring> + +Use I<parensstring> instead of B<{==}>. + +Define start and end parenthesis for B<{=perl expression=}>. The +left and the right parenthesis can be multiple characters and are +assumed to be the same length. The default is B<{==}> giving B<{=> as +the start parenthesis and B<=}> as the end parenthesis. + +Another useful setting is B<,,,,> which would make both parenthesis +B<,,>: + + parallel --parens ,,,, echo foo is ,,s/I/O/g,, ::: FII + +See also: B<--rpl> B<{=>I<perl expression>B<=}> + + +=item B<--profile> I<profilename> + +=item B<-J> I<profilename> + +Use profile I<profilename> for options. + +This is useful if you want to have multiple profiles. You could have +one profile for running jobs in parallel on the local computer and a +different profile for running jobs on remote computers. + +I<profilename> corresponds to the file ~/.parallel/I<profilename>. + +You can give multiple profiles by repeating B<--profile>. If parts of +the profiles conflict, the later ones will be used. + +Default: ~/.parallel/config + +See also: PROFILE FILES + + +=item B<--quote> + +=item B<-q> + +Quote I<command>. + +If your command contains special characters that should not be +interpreted by the shell (e.g. ; \ | *), use B<--quote> to escape +these. The command must be a simple command (see B<man bash>) without +redirections and without variable assignments. + +Most people will not need this. Quoting is disabled by default. + +See also: QUOTING I<command> B<--shell-quote> B<uq()> B<Q()> + + +=item B<--no-run-if-empty> + +=item B<-r> + +Do not run empty input. + +If the stdin (standard input) only contains whitespace, do not run the +command. + +If used with B<--pipe> this is slow. + +See also: I<command> B<--pipe> B<--interactive> + + +=item B<--noswap> + +Do not start job is computer is swapping. + +Do not start new jobs on a given computer if there is both swap-in and +swap-out activity. + +The swap activity is only sampled every 10 seconds as the sampling +takes 1 second to do. + +Swap activity is computed as (swap-in)*(swap-out) which in practice is +a good value: swapping out is not a problem, swapping in is not a +problem, but both swapping in and out usually indicates a problem. + +B<--memfree> and B<--memsuspend> may give better results, so try using +those first. + +See also: B<--memfree> B<--memsuspend> + + +=item B<--record-env> + +Record exported environment. + +Record current exported environment variables in +B<~/.parallel/ignored_vars>. This will ignore variables currently set +when using B<--env _>. So you should set the variables/fuctions, you +want to use I<after> running B<--record-env>. + +See also: B<--env> B<--session> B<env_parallel> + + +=item B<--recstart> I<startstring> + +=item B<--recend> I<endstring> + +Split record between I<endstring> and I<startstring>. + +If B<--recstart> is given I<startstring> will be used to split at record start. + +If B<--recend> is given I<endstring> will be used to split at record end. + +If both B<--recstart> and B<--recend> are given the combined string +I<endstring>I<startstring> will have to match to find a split +position. This is useful if either I<startstring> or I<endstring> +match in the middle of a record. + +If neither B<--recstart> nor B<--recend> are given, then B<--recend> +defaults to '\n'. To have no record separator (e.g. for binary files) +use B<--recend "">. + +B<--recstart> and B<--recend> are used with B<--pipe>. + +Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular +expressions. This is slow, however. + +Use B<--remove-rec-sep> to remove B<--recstart> and B<--recend> before +passing the block to the job. + +See also: B<--pipe> B<--regexp> B<--remove-rec-sep> + + +=item B<--regexp> + +Use B<--regexp> to interpret B<--recstart> and B<--recend> as regular +expressions. This is slow, however. + +See also: B<--pipe> B<--regexp> B<--remove-rec-sep> B<--recstart> +B<--recend> + + +=item B<--remove-rec-sep> + +=item B<--removerecsep> + +=item B<--rrs> + +Remove record separator. + +Remove the text matched by B<--recstart> and B<--recend> before piping +it to the command. + +Only used with B<--pipe>/B<--pipe-part>. + +See also: B<--pipe> B<--regexp> B<--pipe-part> B<--recstart> +B<--recend> + + +=item B<--results> I<name> + +=item B<--res> I<name> + +Save the output into files. + +B<Simple string output dir> + +If I<name> does not contain replacement strings and does not end in +B<.csv/.tsv>, the output will be stored in a directory tree rooted at +I<name>. Within this directory tree, each command will result in +three files: I<name>/<ARGS>/stdout and I<name>/<ARGS>/stderr, +I<name>/<ARGS>/seq, where <ARGS> is a sequence of directories +representing the header of the input source (if using B<--header :>) +or the number of the input source and corresponding values. + +E.g: + + parallel --header : --results foo echo {a} {b} \ + ::: a I II ::: b III IIII + +will generate the files: + + foo/a/II/b/III/seq + foo/a/II/b/III/stderr + foo/a/II/b/III/stdout + foo/a/II/b/IIII/seq + foo/a/II/b/IIII/stderr + foo/a/II/b/IIII/stdout + foo/a/I/b/III/seq + foo/a/I/b/III/stderr + foo/a/I/b/III/stdout + foo/a/I/b/IIII/seq + foo/a/I/b/IIII/stderr + foo/a/I/b/IIII/stdout + +and + + parallel --results foo echo {1} {2} ::: I II ::: III IIII + +will generate the files: + + foo/1/II/2/III/seq + foo/1/II/2/III/stderr + foo/1/II/2/III/stdout + foo/1/II/2/IIII/seq + foo/1/II/2/IIII/stderr + foo/1/II/2/IIII/stdout + foo/1/I/2/III/seq + foo/1/I/2/III/stderr + foo/1/I/2/III/stdout + foo/1/I/2/IIII/seq + foo/1/I/2/IIII/stderr + foo/1/I/2/IIII/stdout + + +B<CSV file output> + +If I<name> ends in B<.csv>/B<.tsv> the output will be a CSV-file +named I<name>. + +B<.csv> gives a comma separated value file. B<.tsv> gives a TAB +separated value file. + +B<-.csv>/B<-.tsv> are special: It will give the file on stdout +(standard output). + + +B<JSON file output> + +If I<name> ends in B<.json> the output will be a JSON-file +named I<name>. + +B<-.json> is special: It will give the file on stdout (standard +output). + + +B<Replacement string output file> + +If I<name> contains a replacement string and the replaced result does +not end in /, then the standard output will be stored in a file named +by this result. Standard error will be stored in the same file name +with '.err' added, and the sequence number will be stored in the same +file name with '.seq' added. + +E.g. + + parallel --results my_{} echo ::: foo bar baz + +will generate the files: + + my_bar + my_bar.err + my_bar.seq + my_baz + my_baz.err + my_baz.seq + my_foo + my_foo.err + my_foo.seq + + +B<Replacement string output dir> + +If I<name> contains a replacement string and the replaced result ends +in /, then output files will be stored in the resulting dir. + +E.g. + + parallel --results my_{}/ echo ::: foo bar baz + +will generate the files: + + my_bar/seq + my_bar/stderr + my_bar/stdout + my_baz/seq + my_baz/stderr + my_baz/stdout + my_foo/seq + my_foo/stderr + my_foo/stdout + +See also: B<--output-as-files> B<--tag> B<--header> B<--joblog> + + +=item B<--resume> + +Resumes from the last unfinished job. + +By reading B<--joblog> or the +B<--results> dir GNU B<parallel> will figure out the last unfinished +job and continue from there. As GNU B<parallel> only looks at the +sequence numbers in B<--joblog> then the input, the command, and +B<--joblog> all have to remain unchanged; otherwise GNU B<parallel> +may run wrong commands. + +See also: B<--joblog> B<--results> B<--resume-failed> B<--retries> + + +=item B<--resume-failed> + +Retry all failed and resume from the last unfinished job. + +By reading +B<--joblog> GNU B<parallel> will figure out the failed jobs and run +those again. After that it will resume last unfinished job and +continue from there. As GNU B<parallel> only looks at the sequence +numbers in B<--joblog> then the input, the command, and B<--joblog> +all have to remain unchanged; otherwise GNU B<parallel> may run wrong +commands. + +See also: B<--joblog> B<--resume> B<--retry-failed> B<--retries> + + +=item B<--retry-failed> + +Retry all failed jobs in joblog. + +By reading B<--joblog> GNU +B<parallel> will figure out the failed jobs and run those again. + +B<--retry-failed> ignores the command and arguments on the command +line: It only looks at the joblog. + +B<Differences between --resume, --resume-failed, --retry-failed> + +In this example B<exit {= $_%=2 =}> will cause every other job to fail. + + timeout -k 1 4 parallel --joblog log -j10 \ + 'sleep {}; exit {= $_%=2 =}' ::: {10..1} + +4 jobs completed. 2 failed: + + Seq [...] Exitval Signal Command + 10 [...] 1 0 sleep 1; exit 1 + 9 [...] 0 0 sleep 2; exit 0 + 8 [...] 1 0 sleep 3; exit 1 + 7 [...] 0 0 sleep 4; exit 0 + +B<--resume> does not care about the Exitval, but only looks at Seq. If +the Seq is run, it will not be run again. So if needed, you can change +the command for the seqs not run yet: + + parallel --resume --joblog log -j10 \ + 'sleep .{}; exit {= $_%=2 =}' ::: {10..1} + + Seq [...] Exitval Signal Command + [... as above ...] + 1 [...] 0 0 sleep .10; exit 0 + 6 [...] 1 0 sleep .5; exit 1 + 5 [...] 0 0 sleep .6; exit 0 + 4 [...] 1 0 sleep .7; exit 1 + 3 [...] 0 0 sleep .8; exit 0 + 2 [...] 1 0 sleep .9; exit 1 + +B<--resume-failed> cares about the Exitval, but also only looks at Seq +to figure out which commands to run. Again this means you can change +the command, but not the arguments. It will run the failed seqs and +the seqs not yet run: + + parallel --resume-failed --joblog log -j10 \ + 'echo {};sleep .{}; exit {= $_%=3 =}' ::: {10..1} + + Seq [...] Exitval Signal Command + [... as above ...] + 10 [...] 1 0 echo 1;sleep .1; exit 1 + 8 [...] 0 0 echo 3;sleep .3; exit 0 + 6 [...] 2 0 echo 5;sleep .5; exit 2 + 4 [...] 1 0 echo 7;sleep .7; exit 1 + 2 [...] 0 0 echo 9;sleep .9; exit 0 + +B<--retry-failed> cares about the Exitval, but takes the command from +the joblog. It ignores any arguments or commands given on the command +line: + + parallel --retry-failed --joblog log -j10 this part is ignored + + Seq [...] Exitval Signal Command + [... as above ...] + 10 [...] 1 0 echo 1;sleep .1; exit 1 + 6 [...] 2 0 echo 5;sleep .5; exit 2 + 4 [...] 1 0 echo 7;sleep .7; exit 1 + +See also: B<--joblog> B<--resume> B<--resume-failed> B<--retries> + + +=item B<--retries> I<n> + +Try failing jobs I<n> times. + +If a job fails, retry it on another computer on which it has not +failed. Do this I<n> times. If there are fewer than I<n> computers in +B<--sshlogin> GNU B<parallel> will re-use all the computers. This is +useful if some jobs fail for no apparent reason (such as network +failure). + +I<n>=0 means infinite. + +See also: B<--term-seq> B<--sshlogin> + + +=item B<--return> I<filename> + +Transfer files from remote computers. + +B<--return> is used with +B<--sshlogin> when the arguments are files on the remote computers. When +processing is done the file I<filename> will be transferred +from the remote computer using B<rsync> and will be put relative to +the default login dir. E.g. + + echo foo/bar.txt | parallel --return {.}.out \ + --sshlogin server.example.com touch {.}.out + +This will transfer the file I<$HOME/foo/bar.out> from the computer +I<server.example.com> to the file I<foo/bar.out> after running +B<touch foo/bar.out> on I<server.example.com>. + + parallel -S server --trc out/./{}.out touch {}.out ::: in/file + +This will transfer the file I<in/file.out> from the computer +I<server.example.com> to the files I<out/in/file.out> after running +B<touch in/file.out> on I<server>. + + echo /tmp/foo/bar.txt | parallel --return {.}.out \ + --sshlogin server.example.com touch {.}.out + +This will transfer the file I</tmp/foo/bar.out> from the computer +I<server.example.com> to the file I</tmp/foo/bar.out> after running +B<touch /tmp/foo/bar.out> on I<server.example.com>. + +Multiple files can be transferred by repeating the option multiple +times: + + echo /tmp/foo/bar.txt | parallel \ + --sshlogin server.example.com \ + --return {.}.out --return {.}.out2 touch {.}.out {.}.out2 + +B<--return> is ignored when used with B<--sshlogin :> or when not used +with B<--sshlogin>. + +For details on transferring see B<--transferfile>. + +See also: B<--transfer> B<--transferfile> B<--sshlogin> B<--cleanup> +B<--workdir> + + +=item B<--round-robin> + +=item B<--round> + +Distribute chunks of standard input in a round robin fashion. + +Normally B<--pipe> will give a single block to each instance of the +command. With B<--round-robin> all blocks will at random be written to +commands already running. This is useful if the command takes a long +time to initialize. + +B<--keep-order> will not work with B<--round-robin> as it is +impossible to track which input block corresponds to which output. + +B<--round-robin> implies B<--pipe>, except if B<--pipe-part> is given. + +See the section: SPREADING BLOCKS OF DATA. + +See also: B<--bin> B<--group-by> B<--shard> + + +=item B<--rpl> 'I<tag> I<perl expression>' + +Define replacement string. + +Use I<tag> as a replacement string for I<perl expression>. This makes +it possible to define your own replacement strings. GNU B<parallel>'s +7 replacement strings are implemented as: + + --rpl '{} ' + --rpl '{#} 1 $_=$job->seq()' + --rpl '{%} 1 $_=$job->slot()' + --rpl '{/} s:.*/::' + --rpl '{//} $Global::use{"File::Basename"} ||= + eval "use File::Basename; 1;"; $_ = dirname($_);' + --rpl '{/.} s:.*/::; s:\.[^/.]+$::;' + --rpl '{.} s:\.[^/.]+$::' + +The B<--plus> replacement strings are implemented as: + + --rpl '{+/} s:/[^/]*$:: || s:.*$::' + --rpl '{+.} s:.*\.:: || s:.*$::' + --rpl '{+..} s:.*\.([^/.]+\.[^/.]+)$:$1: || s:.*$::' + --rpl '{+...} s:.*\.([^/.]+\.[^/.]+\.[^/.]+)$:$1: || s:.*$::' + --rpl '{..} s:\.[^/.]+\.[^/.]+$::' + --rpl '{...} s:\.[^/.]+\.[^/.]+\.[^/.]+$::' + --rpl '{/..} s:.*/::; s:\.[^/.]+\.[^/.]+$::' + --rpl '{/...} s:.*/::; s:\.[^/.]+\.[^/.]+\.[^/.]+$::' + --rpl '{choose_k} + for $t (2..$#arg){ if($arg[$t-1] ge $arg[$t]) { skip() } }' + --rpl '{##} 1 $_=total_jobs()' + --rpl '{0%} 1 $f=1+int((log($Global::max_jobs_running||1)/ + log(10))); $_=sprintf("%0${f}d",slot())' + --rpl '{0#} 1 $f=1+int((log(total_jobs())/log(10))); + $_=sprintf("%0${f}d",seq())' + + --rpl '{:-([^}]+?)} $_ ||= $$1' + --rpl '{:(\d+?)} substr($_,0,$$1) = ""' + --rpl '{:(\d+?):(\d+?)} $_ = substr($_,$$1,$$2);' + --rpl '{#([^#}][^}]*?)} $nongreedy=::make_regexp_ungreedy($$1); + s/^$nongreedy(.*)/$1/;' + --rpl '{##([^#}][^}]*?)} s/^$$1//;' + --rpl '{%([^}]+?)} $nongreedy=::make_regexp_ungreedy($$1); + s/(.*)$nongreedy$/$1/;' + --rpl '{%%([^}]+?)} s/$$1$//;' + --rpl '{/([^}]+?)/([^}]*?)} s/$$1/$$2/;' + --rpl '{^([^}]+?)} s/^($$1)/uc($1)/e;' + --rpl '{^^([^}]+?)} s/($$1)/uc($1)/eg;' + --rpl '{,([^}]+?)} s/^($$1)/lc($1)/e;' + --rpl '{,,([^}]+?)} s/($$1)/lc($1)/eg;' + + --rpl '{slot} 1 $_="\${PARALLEL_JOBSLOT}";uq()' + --rpl '{host} 1 $_="\${PARALLEL_SSHHOST}";uq()' + --rpl '{sshlogin} 1 $_="\${PARALLEL_SSHLOGIN}";uq()' + --rpl '{hgrp} 1 $_="\${PARALLEL_HOSTGROUPS}";uq()' + --rpl '{agrp} 1 $_="\${PARALLEL_ARGHOSTGROUPS}";uq()' + +If the user defined replacement string starts with '{' it can also be +used as a positional replacement string (like B<{2.}>). + +It is recommended to only change $_ but you have full access to all +of GNU B<parallel>'s internal functions and data structures. + +Here are a few examples: + + Is the job sequence even or odd? + --rpl '{odd} $_ = seq() % 2 ? "odd" : "even"' + Pad job sequence with leading zeros to get equal width + --rpl '{0#} $f=1+int("".(log(total_jobs())/log(10))); + $_=sprintf("%0${f}d",seq())' + Job sequence counting from 0 + --rpl '{#0} $_ = seq() - 1' + Job slot counting from 2 + --rpl '{%1} $_ = slot() + 1' + Remove all extensions + --rpl '{:} s:(\.[^/]+)*$::' + +You can have dynamic replacement strings by including parenthesis in +the replacement string and adding a regular expression between the +parenthesis. The matching string will be inserted as $$1: + + parallel --rpl '{%(.*?)} s/$$1//' echo {%.tar.gz} ::: my.tar.gz + parallel --rpl '{:%(.+?)} s:$$1(\.[^/]+)*$::' \ + echo {:%_file} ::: my_file.tar.gz + parallel -n3 --rpl '{/:%(.*?)} s:.*/(.*)$$1(\.[^/]+)*$:$1:' \ + echo job {#}: {2} {2.} {3/:%_1} ::: a/b.c c/d.e f/g_1.h.i + +You can even use multiple matches: + + parallel --rpl '{/(.+?)/(.*?)} s/$$1/$$2/;' + echo {/replacethis/withthis} {/b/C} ::: a_replacethis_b + + parallel --rpl '{(.*?)/(.*?)} $_="$$2$_$$1"' \ + echo {swap/these} ::: -middle- + +See also: B<{=>I<perl expression>B<=}> B<--parens> + + +=item B<--rsync-opts> I<options> + +Options to pass on to B<rsync>. + +Setting B<--rsync-opts> takes precedence over setting the environment +variable $PARALLEL_RSYNC_OPTS. + + +=item B<--max-chars> I<max-chars> + +=item B<-s> I<max-chars> + +Limit length of command. + +Use at most I<max-chars> characters per command line, including the +command and initial-arguments and the terminating nulls at the ends of +the argument strings. The largest allowed value is system-dependent, +and is calculated as the argument length limit for exec, less the size +of your environment. The default value is the maximum. + +I<max-chars> can be postfixed with K, M, G, T, P, k, m, g, t, or p +(see UNIT PREFIX). + +Implies B<-X> unless B<-m> or B<--xargs> is set. + +See also: B<-X> B<-m> B<--xargs> B<--max-line-length-allowed> +B<--show-limits> + + +=item B<--show-limits> + +Display limits given by the operating system. + +Display the limits on the command-line length which are imposed by the +operating system and the B<-s> option. Pipe the input from /dev/null +(and perhaps specify --no-run-if-empty) if you don't want GNU B<parallel> +to do anything. + +See also: B<--max-chars> B<--max-line-length-allowed> B<--version> + + +=item B<--semaphore> + +Work as a counting semaphore. + +B<--semaphore> will cause GNU B<parallel> to start I<command> in the +background. When the number of jobs given by B<--jobs> is reached, GNU +B<parallel> will wait for one of these to complete before starting +another command. + +B<--semaphore> implies B<--bg> unless B<--fg> is specified. + +The command B<sem> is an alias for B<parallel --semaphore>. + +See also: B<man sem> B<--bg> B<--fg> B<--semaphore-name> +B<--semaphore-timeout> B<--wait> + + +=item B<--semaphore-name> I<name> + +=item B<--id> I<name> + +Use B<name> as the name of the semaphore. + +The default is the name of the controlling tty (output from B<tty>). + +The default normally works as expected when used interactively, but +when used in a script I<name> should be set. I<$$> or I<my_task_name> +are often a good value. + +The semaphore is stored in ~/.parallel/semaphores/ + +Implies B<--semaphore>. + +See also: B<man sem> B<--semaphore> + + +=item B<--semaphore-timeout> I<secs> + +=item B<--st> I<secs> + +If I<secs> > 0: If the semaphore is not released within I<secs> +seconds, take it anyway. + +If I<secs> < 0: If the semaphore is not released within I<secs> +seconds, exit. + +I<secs> is in seconds, but can be postfixed with s, m, h, or d (see +the section TIME POSTFIXES). + +Implies B<--semaphore>. + +See also: B<man sem> + + +=item B<--seqreplace> I<replace-str> + +Use the replacement string I<replace-str> instead of B<{#}> for +job sequence number. + +See also: B<{#}> + + +=item B<--session> + +Record names in current environment in B<$PARALLEL_IGNORED_NAMES> and +exit. + +Only used with B<env_parallel>. Aliases, functions, and variables with +names in B<$PARALLEL_IGNORED_NAMES> will not be copied. So you should +set variables/function you want copied I<after> running B<--session>. + +It is similar to B<--record-env>, but only for this session. + +Only supported in B<Ash, Bash, Dash, Ksh, Sh, and Zsh>. + +See also: B<--env> B<--record-env> B<env_parallel> + + +=item B<--shard> I<shardexpr> + +Use I<shardexpr> as shard key and shard input to the jobs. + +I<shardexpr> is [column number|column name] [perlexpression] e.g.: + + 3 + Address + 3 $_%=100 + Address s/\d//g + +Each input line is split using B<--colsep>. The value of the column is +put into $_, the perl expression is executed, the resulting value is +hashed so that all lines of a given value is given to the same job +slot. + +This is similar to sharding in databases. + +The performance is in the order of 100K rows per second. Faster if the +I<shardcol> is small (<10), slower if it is big (>100). + +B<--shard> requires B<--pipe> and a fixed numeric value for B<--jobs>. + +See the section: SPREADING BLOCKS OF DATA. + +See also: B<--bin> B<--group-by> B<--round-robin> + + +=item B<--shebang> + +=item B<--hashbang> + +GNU B<parallel> can be called as a shebang (#!) command as the first +line of a script. The content of the file will be treated as +inputsource. + +Like this: + + #!/usr/bin/parallel --shebang -r wget + + https://ftpmirror.gnu.org/parallel/parallel-20120822.tar.bz2 + https://ftpmirror.gnu.org/parallel/parallel-20130822.tar.bz2 + https://ftpmirror.gnu.org/parallel/parallel-20140822.tar.bz2 + +B<--shebang> must be set as the first option. + +On FreeBSD B<env> is needed: + + #!/usr/bin/env -S parallel --shebang -r wget + + https://ftpmirror.gnu.org/parallel/parallel-20120822.tar.bz2 + https://ftpmirror.gnu.org/parallel/parallel-20130822.tar.bz2 + https://ftpmirror.gnu.org/parallel/parallel-20140822.tar.bz2 + +There are many limitations of shebang (#!) depending on your operating +system. See details on https://www.in-ulm.de/~mascheck/various/shebang/ + +See also: B<--shebang-wrap> + + +=item B<--shebang-wrap> + +GNU B<parallel> can parallelize scripts by wrapping the shebang +line. If the program can be run like this: + + cat arguments | parallel the_program + +then the script can be changed to: + + #!/usr/bin/parallel --shebang-wrap /original/parser --options + +E.g. + + #!/usr/bin/parallel --shebang-wrap /usr/bin/python + +If the program can be run like this: + + cat data | parallel --pipe the_program + +then the script can be changed to: + + #!/usr/bin/parallel --shebang-wrap --pipe /orig/parser --opts + +E.g. + + #!/usr/bin/parallel --shebang-wrap --pipe /usr/bin/perl -w + +B<--shebang-wrap> must be set as the first option. + +See also: B<--shebang> + + +=item B<--shell-completion> I<shell> + +Generate shell completion code for interactive shells. + +Supported shells: bash zsh. + +Use I<auto> as I<shell> to automatically detect running shell. + +Activate the completion code with: + + zsh% eval "$(parallel --shell-completion auto)" + bash$ eval "$(parallel --shell-completion auto)" + +Or put this `/usr/share/zsh/site-functions/_parallel`, then `compinit` +to generate `~/.zcompdump`: + + #compdef parallel + + (( $+functions[_comp_parallel] )) || + eval "$(parallel --shell-completion auto)" && + _comp_parallel + + +=item B<--shell-quote> + +Does not run the command but quotes it. Useful for making quoted +composed commands for GNU B<parallel>. + +Multiple B<--shell-quote> with quote the string multiple times, so +B<parallel --shell-quote | parallel --shell-quote> can be written as +B<parallel --shell-quote --shell-quote>. + +See also: B<--quote> + + +=item B<--shuf> + +Shuffle jobs. + +When having multiple input sources it is hard to randomize +jobs. B<--shuf> will generate all jobs, and shuffle them before +running them. This is useful to get a quick preview of the results +before running the full batch. + +Combined with B<--halt soon,done=1%> you can run a random 1% sample of +all jobs: + + parallel --shuf --halt soon,done=1% echo ::: {1..100} ::: {1..100} + +See also: B<--halt> + + +=item B<--skip-first-line> + +Do not use the first line of input (used by GNU B<parallel> itself +when called with B<--shebang>). + + +=item B<--sql> I<DBURL> (obsolete) + +Use B<--sql-master> instead. + + +=item B<--sql-master> I<DBURL> + +Submit jobs via SQL server. I<DBURL> must point to a table, which will +contain the same information as B<--joblog>, the values from the input +sources (stored in columns V1 .. Vn), and the output (stored in +columns Stdout and Stderr). + +If I<DBURL> is prepended with '+' GNU B<parallel> assumes the table is +already made with the correct columns and appends the jobs to it. + +If I<DBURL> is not prepended with '+' the table will be dropped and +created with the correct amount of V-columns unless + +B<--sqlmaster> does not run any jobs, but it creates the values for +the jobs to be run. One or more B<--sqlworker> must be run to actually +execute the jobs. + +If B<--wait> is set, GNU B<parallel> will wait for the jobs to +complete. + +The format of a DBURL is: + + [sql:]vendor://[[user][:pwd]@][host][:port]/[db]/table + +E.g. + + sql:mysql://hr:hr@localhost:3306/hrdb/jobs + mysql://scott:tiger@my.example.com/pardb/paralleljobs + sql:oracle://scott:tiger@ora.example.com/xe/parjob + postgresql://scott:tiger@pg.example.com/pgdb/parjob + pg:///parjob + sqlite3:///%2Ftmp%2Fpardb.sqlite/parjob + csv:///%2Ftmp%2Fpardb/parjob + +Notice how / in the path of sqlite and CVS must be encoded as +%2F. Except the last / in CSV which must be a /. + +It can also be an alias from ~/.sql/aliases: + + :myalias mysql:///mydb/paralleljobs + +See also: B<--sql-and-worker> B<--sql-worker> B<--joblog> + + +=item B<--sql-and-worker> I<DBURL> + +Shorthand for: B<--sql-master> I<DBURL> B<--sql-worker> I<DBURL>. + +See also: B<--sql-master> B<--sql-worker> + + +=item B<--sql-worker> I<DBURL> + +Execute jobs via SQL server. Read the input sources variables from the +table pointed to by I<DBURL>. The I<command> on the command line +should be the same as given by B<--sqlmaster>. + +If you have more than one B<--sqlworker> jobs may be run more than +once. + +If B<--sqlworker> runs on the local machine, the hostname in the SQL +table will not be ':' but instead the hostname of the machine. + +See also: B<--sql-master> B<--sql-and-worker> + + +=item B<--ssh> I<sshcommand> + +GNU B<parallel> defaults to using B<ssh> for remote access. This can +be overridden with B<--ssh>. It can also be set on a per server +basis with B<--sshlogin>. + +See also: B<--sshlogin> + + +=item B<--ssh-delay> I<duration> + +Delay starting next ssh by I<duration>. + +GNU B<parallel> will not start another ssh for the next I<duration>. + +I<duration> is in seconds, but can be postfixed with s, m, h, or d. + +See also: TIME POSTFIXES B<--sshlogin> B<--delay> + + +=item B<--sshlogin> I<[@hostgroups/][ncpus/]sshlogin[,[@hostgroups/][ncpus/]sshlogin[,...]]> (alpha testing) + +=item B<--sshlogin> I<@hostgroup> (alpha testing) + +=item B<-S> I<[@hostgroups/][ncpus/]sshlogin[,[@hostgroups/][ncpus/]sshlogin[,...]]> (alpha testing) + +=item B<-S> I<@hostgroup> (alpha testing) + +Distribute jobs to remote computers. + +The jobs will be run on a list of remote computers. + +If I<hostgroups> is given, the I<sshlogin> will be added to that +hostgroup. Multiple hostgroups are separated by '+'. The I<sshlogin> +will always be added to a hostgroup named the same as I<sshlogin>. + +If only the I<@hostgroup> is given, only the sshlogins in that +hostgroup will be used. Multiple I<@hostgroup> can be given. + +GNU B<parallel> will determine the number of CPUs on the remote +computers and run the number of jobs as specified by B<-j>. If the +number I<ncpus> is given GNU B<parallel> will use this number for +number of CPUs on the host. Normally I<ncpus> will not be +needed. + +An I<sshlogin> is of the form: + + [sshcommand [options]] [username[:password]@]hostname + +If I<password> is given, B<sshpass> will be used. Otherwise the +sshlogin must not require a password (B<ssh-agent> and B<ssh-copy-id> +may help with that). + +If the hostname is an IPv6 address, the port can be given separated +with p or #. If the address is enclosed in [] you can also use :. +E.g. ::1p2222 ::1#2222 [::1]:2222 + +The sshlogin ':' is special, it means 'no ssh' and will therefore run +on the local computer. + +The sshlogin '..' is special, it read sshlogins from ~/.parallel/sshloginfile or +$XDG_CONFIG_HOME/parallel/sshloginfile + +The sshlogin '-' is special, too, it read sshlogins from stdin +(standard input). + +To specify more sshlogins separate the sshlogins by comma, newline (in +the same string), or repeat the options multiple times. + +GNU B<parallel> splits on , (comma) so if your sshlogin contains , +(comma) you need to replace it with \, or ,, + +For examples: see B<--sshloginfile>. + +The remote host must have GNU B<parallel> installed. + +B<--sshlogin> is known to cause problems with B<-m> and B<-X>. + +See also: B<--basefile> B<--transferfile> B<--return> B<--cleanup> +B<--trc> B<--sshloginfile> B<--workdir> B<--filter-hosts> +B<--ssh> + + +=item B<--sshloginfile> I<filename> + +=item B<--slf> I<filename> + +File with sshlogins. The file consists of sshlogins on separate +lines. Empty lines and lines starting with '#' are ignored. Example: + + server.example.com + username@server2.example.com + 8/my-8-cpu-server.example.com + 2/my_other_username@my-dualcore.example.net + # This server has SSH running on port 2222 + ssh -p 2222 server.example.net + 4/ssh -p 2222 quadserver.example.net + # Use a different ssh program + myssh -p 2222 -l myusername hexacpu.example.net + # Use a different ssh program with default number of CPUs + //usr/local/bin/myssh -p 2222 -l myusername hexacpu + # Use a different ssh program with 6 CPUs + 6//usr/local/bin/myssh -p 2222 -l myusername hexacpu + # Assume 16 CPUs on the local computer + 16/: + # Put server1 in hostgroup1 + @hostgroup1/server1 + # Put myusername@server2 in hostgroup1+hostgroup2 + @hostgroup1+hostgroup2/myusername@server2 + # Force 4 CPUs and put 'ssh -p 2222 server3' in hostgroup1 + @hostgroup1/4/ssh -p 2222 server3 + +When using a different ssh program the last argument must be the hostname. + +Multiple B<--sshloginfile> are allowed. + +GNU B<parallel> will first look for the file in current dir; if that +fails it look for the file in ~/.parallel. + +The sshloginfile '..' is special, it read sshlogins from +~/.parallel/sshloginfile + +The sshloginfile '.' is special, it read sshlogins from +/etc/parallel/sshloginfile + +The sshloginfile '-' is special, too, it read sshlogins from stdin +(standard input). + +If the sshloginfile is changed it will be re-read when a job finishes +though at most once per second. This makes it possible to add and +remove hosts while running. + +This can be used to have a daemon that updates the sshloginfile to +only contain servers that are up: + + cp original.slf tmp2.slf + while [ 1 ] ; do + nice parallel --nonall -j0 -k --slf original.slf \ + --tag echo | perl 's/\t$//' > tmp.slf + if diff tmp.slf tmp2.slf; then + mv tmp.slf tmp2.slf + fi + sleep 10 + done & + parallel --slf tmp2.slf ... + +See also: B<--filter-hosts> + + +=item B<--slotreplace> I<replace-str> + +Use the replacement string I<replace-str> instead of B<{%}> for +job slot number. + +See also: B<{%}> + + +=item B<--silent> + +Silent. + +The job to be run will not be printed. This is the default. Can be +reversed with B<-v>. + +See also: B<-v> + + +=item B<--template> I<file>=I<repl> + +=item B<--tmpl> I<file>=I<repl> + +Replace replacement strings in I<file> and save it in I<repl>. + +All replacement strings in the contents of I<file> will be +replaced. All replacement strings in the name I<repl> will be +replaced. + +With B<--cleanup> the new file will be removed when the job is done. + +If I<my.tmpl> contains this: + + Xval: {x} + Yval: {y} + FixedValue: 9 + # x with 2 decimals + DecimalX: {=x $_=sprintf("%.2f",$_) =} + TenX: {=x $_=$_*10 =} + RandomVal: {=1 $_=rand() =} + +it can be used like this: + + myprog() { echo Using "$@"; cat "$@"; } + export -f myprog + parallel --cleanup --header : --tmpl my.tmpl={#}.t myprog {#}.t \ + ::: x 1.234 2.345 3.45678 ::: y 1 2 3 + +See also: B<{}> B<--cleanup> + + +=item B<--tty> + +Open terminal tty. + +If GNU B<parallel> is used for starting a program that accesses the +tty (such as an interactive program) then this option may be +needed. It will default to starting only one job at a time +(i.e. B<-j1>), not buffer the output (i.e. B<-u>), and it will open a +tty for the job. + +You can of course override B<-j1> and B<-u>. + +Using B<--tty> unfortunately means that GNU B<parallel> cannot kill +the jobs (with B<--timeout>, B<--memfree>, or B<--halt>). This is due +to GNU B<parallel> giving each child its own process group, which is +then killed. Process groups are dependant on the tty. + +See also: B<--ungroup> B<--open-tty> + + +=item B<--tag> (alpha testing) + +Tag lines with arguments. + +Each output line will be prepended with the arguments and TAB +(\t). When combined with B<--onall> or B<--nonall> the lines will be +prepended with the sshlogin instead. + +B<--tag> is ignored when using B<-u>. + +See also: B<--tagstring> B<--ctag> + + +=item B<--tagstring> I<str> (alpha testing) + +Tag lines with a string. + +Each output line will be prepended with I<str> and TAB (\t). I<str> +can contain replacement strings such as B<{}>. + +B<--tagstring> is ignored when using B<-u>, B<--onall>, and B<--nonall>. + +See also: B<--tag> B<--ctagstring> + + +=item B<--tee> + +Pipe all data to all jobs. + +Used with B<--pipe>/B<--pipe-part> and B<:::>. + + seq 1000 | parallel --pipe --tee -v wc {} ::: -w -l -c + +How many numbers in 1..1000 contain 0..9, and how many bytes do they +fill: + + seq 1000 | parallel --pipe --tee --tag \ + 'grep {1} | wc {2}' ::: {0..9} ::: -l -c + +How many words contain a..z and how many bytes do they fill? + + parallel -a /usr/share/dict/words --pipe-part --tee --tag \ + 'grep {1} | wc {2}' ::: {a..z} ::: -l -c + +See also: B<:::> B<--pipe> B<--pipe-part> + + +=item B<--term-seq> I<sequence> + +Termination sequence. + +When a job is killed due to B<--timeout>, B<--memfree>, B<--halt>, or +abnormal termination of GNU B<parallel>, I<sequence> determines how +the job is killed. The default is: + + TERM,200,TERM,100,TERM,50,KILL,25 + +which sends a TERM signal, waits 200 ms, sends another TERM signal, +waits 100 ms, sends another TERM signal, waits 50 ms, sends a KILL +signal, waits 25 ms, and exits. GNU B<parallel> detects if a process +dies before the waiting time is up. + +See also: B<--halt> B<--timeout> B<--memfree> + + +=item B<--total-jobs> I<jobs> (alpha testing) + +=item B<--total> I<jobs> (alpha testing) + +Provide the total number of jobs for computing ETA which is also used +for B<--bar>. + +Without B<--total-jobs> GNU Parallel will read all jobs before +starting a job. B<--total-jobs> is useful if the input is generated +slowly. + +See also: B<--bar> B<--eta> + + +=item B<--tmpdir> I<dirname> + +Directory for temporary files. + +GNU B<parallel> normally buffers output into temporary files in +/tmp. By setting B<--tmpdir> you can use a different dir for the +files. Setting B<--tmpdir> is equivalent to setting $TMPDIR. + +See also: B<--compress> B<$TMPDIR> B<$PARALLEL_REMOTE_TMPDIR> + + +=item B<--tmux> (Long beta testing) + +Use B<tmux> for output. Start a B<tmux> session and run each job in a +window in that session. No other output will be produced. + +See also: B<--tmuxpane> + + +=item B<--tmuxpane> (Long beta testing) + +Use B<tmux> for output but put output into panes in the first window. +Useful if you want to monitor the progress of less than 100 concurrent +jobs. + +See also: B<--tmux> + + +=item B<--timeout> I<duration> + +Time out for command. If the command runs for longer than I<duration> +seconds it will get killed as per B<--term-seq>. + +If I<duration> is followed by a % then the timeout will dynamically be +computed as a percentage of the median average runtime of successful +jobs. Only values > 100% will make sense. + +I<duration> is in seconds, but can be postfixed with s, m, h, or d. + +See also: TIME POSTFIXES B<--term-seq> B<--retries> + + +=item B<--verbose> + +=item B<-t> + +Print the job to be run on stderr (standard error). + +See also: B<-v> B<--interactive> + + +=item B<--transfer> + +Transfer files to remote computers. + +Shorthand for: B<--transferfile {}>. + +See also: B<--transferfile>. + + +=item B<--transferfile> I<filename> + +=item B<--tf> I<filename> + +Transfer I<filename> to remote computers. + +B<--transferfile> is used with B<--sshlogin> to transfer files to the +remote computers. The files will be transferred using B<rsync> and +will be put relative to the work dir. + +The I<filename> will normally contain a replacement string. + +If the path contains /./ the remaining path will be relative to the +work dir (for details: see B<rsync>). If the work dir is +B</home/user>, the transferring will be as follows: + + /tmp/foo/bar => /tmp/foo/bar + tmp/foo/bar => /home/user/tmp/foo/bar + /tmp/./foo/bar => /home/user/foo/bar + tmp/./foo/bar => /home/user/foo/bar + +I<Examples> + +This will transfer the file I<foo/bar.txt> to the computer +I<server.example.com> to the file I<$HOME/foo/bar.txt> before running +B<wc foo/bar.txt> on I<server.example.com>: + + echo foo/bar.txt | parallel --transferfile {} \ + --sshlogin server.example.com wc + +This will transfer the file I</tmp/foo/bar.txt> to the computer +I<server.example.com> to the file I</tmp/foo/bar.txt> before running +B<wc /tmp/foo/bar.txt> on I<server.example.com>: + + echo /tmp/foo/bar.txt | parallel --transferfile {} \ + --sshlogin server.example.com wc + +This will transfer the file I</tmp/foo/bar.txt> to the computer +I<server.example.com> to the file I<foo/bar.txt> before running +B<wc ./foo/bar.txt> on I<server.example.com>: + + echo /tmp/./foo/bar.txt | parallel --transferfile {} \ + --sshlogin server.example.com wc {= s:.*/\./:./: =} + +B<--transferfile> is often used with B<--return> and B<--cleanup>. A +shorthand for B<--transferfile {}> is B<--transfer>. + +B<--transferfile> is ignored when used with B<--sshlogin :> or when +not used with B<--sshlogin>. + +See also: B<--workdir> B<--sshlogin> B<--basefile> B<--return> +B<--cleanup> + + +=item B<--trc> I<filename> + +Transfer, Return, Cleanup. Shorthand for: B<--transfer> B<--return> +I<filename> B<--cleanup> + +See also: B<--transfer> B<--return> B<--cleanup> + + +=item B<--trim> <n|l|r|lr|rl> + +Trim white space in input. + +=over 4 + +=item n + +No trim. Input is not modified. This is the default. + +=item l + +Left trim. Remove white space from start of input. E.g. " a bc " -> "a bc ". + +=item r + +Right trim. Remove white space from end of input. E.g. " a bc " -> " a bc". + +=item lr + +=item rl + +Both trim. Remove white space from both start and end of input. E.g. " +a bc " -> "a bc". This is the default if B<--colsep> is used. + +=back + +See also: B<--no-run-if-empty> B<{}> B<--colsep> + + +=item B<--ungroup> + +=item B<-u> + +Ungroup output. + +Output is printed as soon as possible and bypasses GNU B<parallel> +internal processing. This may cause output from different commands to +be mixed thus should only be used if you do not care about the +output. Compare these: + + seq 4 | parallel -j0 \ + 'sleep {};echo -n start{};sleep {};echo {}end' + seq 4 | parallel -u -j0 \ + 'sleep {};echo -n start{};sleep {};echo {}end' + +It also disables B<--tag>. GNU B<parallel> outputs faster with +B<-u>. Compare the speeds of these: + + parallel seq ::: 300000000 >/dev/null + parallel -u seq ::: 300000000 >/dev/null + parallel --line-buffer seq ::: 300000000 >/dev/null + +Can be reversed with B<--group>. + +See also: B<--line-buffer> B<--group> + + +=item B<--extensionreplace> I<replace-str> + +=item B<--er> I<replace-str> + +Use the replacement string I<replace-str> instead of B<{.}> for input +line without extension. + +See also: B<{.}> + + +=item B<--use-sockets-instead-of-threads> + +See also: B<--use-cores-instead-of-threads> + + +=item B<--use-cores-instead-of-threads> + +=item B<--use-cpus-instead-of-cores> (obsolete) + +Determine how GNU B<parallel> counts the number of CPUs. + +GNU B<parallel> uses this number when the number of jobslots +(B<--jobs>) is computed relative to the number of CPUs (e.g. 100% or ++1). + +CPUs can be counted in three different ways: + +=over 8 + +=item sockets + +The number of filled CPU sockets (i.e. the number of physical chips). + +=item cores + +The number of physical cores (i.e. the number of physical compute +cores). + +=item threads + +The number of hyperthreaded cores (i.e. the number of virtual +cores - with some of them possibly being hyperthreaded) + +=back + +Normally the number of CPUs is computed as the number of CPU +threads. With B<--use-sockets-instead-of-threads> or +B<--use-cores-instead-of-threads> you can force it to be computed as +the number of filled sockets or number of cores instead. + +Most users will not need these options. + +B<--use-cpus-instead-of-cores> is a (misleading) alias for +B<--use-sockets-instead-of-threads> and is kept for backwards +compatibility. + +See also: B<--number-of-threads> B<--number-of-cores> +B<--number-of-sockets> + + +=item B<-v> + +Verbose. + +Print the job to be run on stdout (standard output). Can be reversed +with B<--silent>. + +Use B<-v> B<-v> to print the wrapping ssh command when running remotely. + +See also: B<-t> + + +=item B<--version> + +=item B<-V> + +Print the version GNU B<parallel> and exit. + + +=item B<--workdir> I<mydir> + +=item B<--wd> I<mydir> + +Jobs will be run in the dir I<mydir>. The default is the current dir +for the local machine, and the login dir for remote computers. + +Files transferred using B<--transferfile> and B<--return> will be +relative to I<mydir> on remote computers. + +The special I<mydir> value B<...> will create working dirs under +B<~/.parallel/tmp/>. If B<--cleanup> is given these dirs will be +removed. + +The special I<mydir> value B<.> uses the current working dir. If the +current working dir is beneath your home dir, the value B<.> is +treated as the relative path to your home dir. This means that if your +home dir is different on remote computers (e.g. if your login is +different) the relative path will still be relative to your home dir. + +To see the difference try: + + parallel -S server pwd ::: "" + parallel --wd . -S server pwd ::: "" + parallel --wd ... -S server pwd ::: "" + +I<mydir> can contain GNU B<parallel>'s replacement strings. + + +=item B<--wait> + +Wait for all commands to complete. + +Used with B<--semaphore> or B<--sqlmaster>. + +See also: B<man sem> + + +=item B<-X> + +Multiple arguments with context replace. Insert as many arguments as +the command line length permits. If multiple jobs are being run in +parallel: distribute the arguments evenly among the jobs. Use B<-j1> +to avoid this. + +If B<{}> is not used the arguments will be appended to the line. If +B<{}> is used as part of a word (like I<pic{}.jpg>) then the whole +word will be repeated. If B<{}> is used multiple times each B<{}> will +be replaced with the arguments. + +Normally B<-X> will do the right thing, whereas B<-m> can give +unexpected results if B<{}> is used as part of a word. + +Support for B<-X> with B<--sshlogin> is limited and may fail. + +See also: B<-m> + + +=item B<--exit> + +=item B<-x> + +Exit if the size (see the B<-s> option) is exceeded. + + +=item B<--xargs> + +Multiple arguments. Insert as many arguments as the command line +length permits. + +If B<{}> is not used the arguments will be appended to the +line. If B<{}> is used multiple times each B<{}> will be replaced +with all the arguments. + +Support for B<--xargs> with B<--sshlogin> is limited and may fail. + +See also: B<-X> + + +=back + + +=head1 EXAMPLES + +See: B<man parallel_examples> + + +=head1 SPREADING BLOCKS OF DATA + +B<--round-robin>, B<--pipe-part>, B<--shard>, B<--bin> and +B<--group-by> are all specialized versions of B<--pipe>. + +In the following I<n> is the number of jobslots given by B<--jobs>. A +record starts with B<--recstart> and ends with B<--recend>. It is +typically a full line. A chunk is a number of full records that is +approximately the size of a block. A block can contain half records, a +chunk cannot. + +B<--pipe> starts one job per chunk. It reads blocks from stdin +(standard input). It finds a record end near a block border and passes +a chunk to the program. + +B<--pipe-part> starts one job per chunk - just like normal +B<--pipe>. It first finds record endings near all block borders in the +file and then starts the jobs. By using B<--block -1> it will set the +block size to size-of-file/I<n>. Used this way it will start I<n> +jobs in total. + +B<--round-robin> starts I<n> jobs in total. It reads a block and +passes a chunk to whichever job is ready to read. It does not parse +the content except for identifying where a record ends to make sure it +only passes full records. + +B<--shard> starts I<n> jobs in total. It parses each line to read the +value in the given column. Based on this value the line is passed to +one of the I<n> jobs. All lines having this value will be given to the +same jobslot. + +B<--bin> works like B<--shard> but the value of the column is the +jobslot number it will be passed to. If the value is bigger than I<n>, +then I<n> will be subtracted from the value until the values is +smaller than or equal to I<n>. + +B<--group-by> starts one job per chunk. Record borders are not given +by B<--recend>/B<--recstart>. Instead a record is defined by a number +of lines having the same value in a given column. So the value of a +given column changes at a chunk border. With B<--pipe> every line is +parsed, with B<--pipe-part> only a few lines are parsed to find the +chunk border. + +B<--group-by> can be combined with B<--round-robin> or B<--pipe-part>. + + +=head1 TIME POSTFIXES + +Arguments that give a duration are given in seconds, but can be +expressed as floats postfixed with B<s>, B<m>, B<h>, or B<d> which +would multiply the float by 1, 60, 60*60, or 60*60*24. Thus these are +equivalent: 100000 and 1d3.5h16.6m4s. + + +=head1 UNIT PREFIX + +Many numerical arguments in GNU B<parallel> can be postfixed with K, +M, G, T, P, k, m, g, t, or p which would multiply the number with +1024, 1048576, 1073741824, 1099511627776, 1125899906842624, 1000, +1000000, 1000000000, 1000000000000, or 1000000000000000, respectively. + +You can even give it as a math expression. E.g. 1000000 can be written +as 1M-12*2.024*2k. + + +=head1 QUOTING + +GNU B<parallel> is very liberal in quoting. You only need to quote +characters that have special meaning in shell: + + ( ) $ ` ' " < > ; | \ + +and depending on context these needs to be quoted, too: + + ~ & # ! ? space * { + +Therefore most people will never need more quoting than putting '\' +in front of the special characters. + +Often you can simply put \' around every ': + + perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"' file + +can be quoted: + + parallel perl -ne \''/^\S+\s+\S+$/ and print $ARGV,"\n"'\' ::: file + +However, when you want to use a shell variable you need to quote the +$-sign. Here is an example using $PARALLEL_SEQ. This variable is set +by GNU B<parallel> itself, so the evaluation of the $ must be done by +the sub shell started by GNU B<parallel>: + + seq 10 | parallel -N2 echo seq:\$PARALLEL_SEQ arg1:{1} arg2:{2} + +If the variable is set before GNU B<parallel> starts you can do this: + + VAR=this_is_set_before_starting + echo test | parallel echo {} $VAR + +Prints: B<test this_is_set_before_starting> + +It is a little more tricky if the variable contains more than one space in a row: + + VAR="two spaces between each word" + echo test | parallel echo {} \'"$VAR"\' + +Prints: B<test two spaces between each word> + +If the variable should not be evaluated by the shell starting GNU +B<parallel> but be evaluated by the sub shell started by GNU +B<parallel>, then you need to quote it: + + echo test | parallel VAR=this_is_set_after_starting \; echo {} \$VAR + +Prints: B<test this_is_set_after_starting> + +It is a little more tricky if the variable contains space: + + echo test |\ + parallel VAR='"two spaces between each word"' echo {} \'"$VAR"\' + +Prints: B<test two spaces between each word> + +$$ is the shell variable containing the process id of the shell. This +will print the process id of the shell running GNU B<parallel>: + + seq 10 | parallel echo $$ + +And this will print the process ids of the sub shells started by GNU +B<parallel>. + + seq 10 | parallel echo \$\$ + +If the special characters should not be evaluated by the sub shell +then you need to protect it against evaluation from both the shell +starting GNU B<parallel> and the sub shell: + + echo test | parallel echo {} \\\$VAR + +Prints: B<test $VAR> + +GNU B<parallel> can protect against evaluation by the sub shell by +using -q: + + echo test | parallel -q echo {} \$VAR + +Prints: B<test $VAR> + +This is particularly useful if you have lots of quoting. If you want +to run a perl script like this: + + perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"' file + +It needs to be quoted like one of these: + + ls | parallel perl -ne '/^\\S+\\s+\\S+\$/\ and\ print\ \$ARGV,\"\\n\"' + ls | parallel perl -ne \''/^\S+\s+\S+$/ and print $ARGV,"\n"'\' + +Notice how spaces, \'s, "'s, and $'s need to be quoted. GNU +B<parallel> can do the quoting by using option -q: + + ls | parallel -q perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"' + +However, this means you cannot make the sub shell interpret special +characters. For example because of B<-q> this WILL NOT WORK: + + ls *.gz | parallel -q "zcat {} >{.}" + ls *.gz | parallel -q "zcat {} | bzip2 >{.}.bz2" + +because > and | need to be interpreted by the sub shell. + +If you get errors like: + + sh: -c: line 0: syntax error near unexpected token + sh: Syntax error: Unterminated quoted string + sh: -c: line 0: unexpected EOF while looking for matching `'' + sh: -c: line 1: syntax error: unexpected end of file + zsh:1: no matches found: + +then you might try using B<-q>. + +If you are using B<bash> process substitution like B<<(cat foo)> then +you may try B<-q> and prepending I<command> with B<bash -c>: + + ls | parallel -q bash -c 'wc -c <(echo {})' + +Or for substituting output: + + ls | parallel -q bash -c \ + 'tar c {} | tee >(gzip >{}.tar.gz) | bzip2 >{}.tar.bz2' + +B<Conclusion>: If this is confusing consider avoiding having to deal +with quoting by writing a small script or a function (remember to +B<export -f> the function) and have GNU B<parallel> call that. + + +=head1 LIST RUNNING JOBS + +If you want a list of the jobs currently running you can run: + + killall -USR1 parallel + +GNU B<parallel> will then print the currently running jobs on stderr +(standard error). + + +=head1 COMPLETE RUNNING JOBS BUT DO NOT START NEW JOBS + +If you regret starting a lot of jobs you can simply break GNU B<parallel>, +but if you want to make sure you do not have half-completed jobs you +should send the signal B<SIGHUP> to GNU B<parallel>: + + killall -HUP parallel + +This will tell GNU B<parallel> to not start any new jobs, but wait until +the currently running jobs are finished before exiting. + + +=head1 ENVIRONMENT VARIABLES + +=over 9 + +=item $PARALLEL_HOME + +Dir where GNU B<parallel> stores config files, semaphores, and caches +information between invocations. If set to a non-existent dir, the dir +will be created. + +Default: $HOME/.parallel. + + +=item $PARALLEL_ARGHOSTGROUPS + +When using B<--hostgroups> GNU B<parallel> sets this to the hostgroups +of the job. + +Remember to quote the $, so it gets evaluated by the correct shell. Or +use B<--plus> and {agrp}. + + +=item $PARALLEL_HOSTGROUPS + +When using B<--hostgroups> GNU B<parallel> sets this to the hostgroups +of the sshlogin that the job is run on. + +Remember to quote the $, so it gets evaluated by the correct shell. Or +use B<--plus> and {hgrp}. + + +=item $PARALLEL_JOBSLOT + +Set by GNU B<parallel> and can be used in jobs run by GNU B<parallel>. +Remember to quote the $, so it gets evaluated by the correct shell. Or +use B<--plus> and {slot}. + +$PARALLEL_JOBSLOT is the jobslot of the job. It is equal to {%} unless +the job is being retried. See {%} for details. + + +=item $PARALLEL_PID + +Set by GNU B<parallel> and can be used in jobs run by GNU B<parallel>. +Remember to quote the $, so it gets evaluated by the correct shell. + +This makes it possible for the jobs to communicate directly to GNU +B<parallel>. + +B<Example:> If each of the jobs tests a solution and one of jobs finds +the solution the job can tell GNU B<parallel> not to start more jobs +by: B<kill -HUP $PARALLEL_PID>. This only works on the local +computer. + + +=item $PARALLEL_RSYNC_OPTS + +Options to pass on to B<rsync>. Defaults to: -rlDzR. + + +=item $PARALLEL_SHELL + +Use this shell for the commands run by GNU B<parallel>: + +=over 2 + +=item * + +$PARALLEL_SHELL. If undefined use: + +=item * + +The shell that started GNU B<parallel>. If that cannot be determined: + +=item * + +$SHELL. If undefined use: + +=item * + +/bin/sh + +=back + + +=item $PARALLEL_SSH + +GNU B<parallel> defaults to using the B<ssh> command for remote +access. This can be overridden with $PARALLEL_SSH, which again can be +overridden with B<--ssh>. It can also be set on a per server basis +(see B<--sshlogin>). + + +=item $PARALLEL_SSHHOST + +Set by GNU B<parallel> and can be used in jobs run by GNU B<parallel>. +Remember to quote the $, so it gets evaluated by the correct shell. Or +use B<--plus> and {host}. + + +$PARALLEL_SSHHOST is the host part of an sshlogin line. E.g. + + 4//usr/bin/specialssh user@host + +becomes: + + host + + +=item $PARALLEL_SSHLOGIN + +Set by GNU B<parallel> and can be used in jobs run by GNU B<parallel>. +Remember to quote the $, so it gets evaluated by the correct shell. Or +use B<--plus> and {sshlogin}. + + +The value is the sshlogin line with number of threads removed. E.g. + + 4//usr/bin/specialssh user@host + +becomes: + + /usr/bin/specialssh user@host + + +=item $PARALLEL_SEQ + +Set by GNU B<parallel> and can be used in jobs run by GNU B<parallel>. +Remember to quote the $, so it gets evaluated by the correct shell. + +$PARALLEL_SEQ is the sequence number of the job running. + +B<Example:> + + seq 10 | parallel -N2 \ + echo seq:'$'PARALLEL_SEQ arg1:{1} arg2:{2} + +{#} is a shorthand for $PARALLEL_SEQ. + + +=item $PARALLEL_TMUX + +Path to B<tmux>. If unset the B<tmux> in $PATH is used. + + +=item $TMPDIR + +Directory for temporary files. + +See also: B<--tmpdir> + + +=item $PARALLEL_REMOTE_TMPDIR + +Directory for temporary files on remote servers. + +See also: B<--tmpdir> + + +=item $PARALLEL + +The environment variable $PARALLEL will be used as default options for +GNU B<parallel>. If the variable contains special shell characters +(e.g. $, *, or space) then these need to be to be escaped with \. + +B<Example:> + + cat list | parallel -j1 -k -v ls + cat list | parallel -j1 -k -v -S"myssh user@server" ls + +can be written as: + + cat list | PARALLEL="-kvj1" parallel ls + cat list | PARALLEL='-kvj1 -S myssh\ user@server' \ + parallel echo + +Notice the \ after 'myssh' is needed because 'myssh' and 'user@server' +must be one argument. + +See also: B<--profile> + +=back + + +=head1 DEFAULT PROFILE (CONFIG FILE) + +The global configuration file /etc/parallel/config, followed by user +configuration file ~/.parallel/config (formerly known as .parallelrc) +will be read in turn if they exist. Lines starting with '#' will be +ignored. The format can follow that of the environment variable +$PARALLEL, but it is often easier to simply put each option on its own +line. + +Options on the command line take precedence, followed by the +environment variable $PARALLEL, user configuration file +~/.parallel/config, and finally the global configuration file +/etc/parallel/config. + +Note that no file that is read for options, nor the environment +variable $PARALLEL, may contain retired options such as B<--tollef>. + +=head1 PROFILE FILES + +If B<--profile> set, GNU B<parallel> will read the profile from that +file rather than the global or user configuration files. You can have +multiple B<--profiles>. + +Profiles are searched for in B<~/.parallel>. If the name starts with +B</> it is seen as an absolute path. If the name starts with B<./> it +is seen as a relative path from current dir. + +Example: Profile for running a command on every sshlogin in +~/.ssh/sshlogins and prepend the output with the sshlogin: + + echo --tag -S .. --nonall > ~/.parallel/nonall_profile + parallel -J nonall_profile uptime + +Example: Profile for running every command with B<-j-1> and B<nice> + + echo -j-1 nice > ~/.parallel/nice_profile + parallel -J nice_profile bzip2 -9 ::: * + +Example: Profile for running a perl script before every command: + + echo "perl -e '\$a=\$\$; print \$a,\" \",'\$PARALLEL_SEQ',\" \";';" \ + > ~/.parallel/pre_perl + parallel -J pre_perl echo ::: * + +Note how the $ and " need to be quoted using \. + +Example: Profile for running distributed jobs with B<nice> on the +remote computers: + + echo -S .. nice > ~/.parallel/dist + parallel -J dist --trc {.}.bz2 bzip2 -9 ::: * + + +=head1 EXIT STATUS + +Exit status depends on B<--halt-on-error> if one of these is used: +success=X, success=Y%, fail=Y%. + +=over 6 + +=item Z<>0 + +All jobs ran without error. If success=X is used: X jobs ran without +error. If success=Y% is used: Y% of the jobs ran without error. + +=item Z<>1-100 + +Some of the jobs failed. The exit status gives the number of failed +jobs. If Y% is used the exit status is the percentage of jobs that +failed. + +=item Z<>101 + +More than 100 jobs failed. + +=item Z<>255 + +Other error. + +=item Z<>-1 (In joblog and SQL table) + +Killed by Ctrl-C, timeout, not enough memory or similar. + +=item Z<>-2 (In joblog and SQL table) + +skip() was called in B<{= =}>. + +=item Z<>-1000 (In SQL table) + +Job is ready to run (set by --sqlmaster). + +=item Z<>-1220 (In SQL table) + +Job is taken by worker (set by --sqlworker). + +=back + +If fail=1 is used, the exit status will be the exit status of the +failing job. + + +=head1 DIFFERENCES BETWEEN GNU Parallel AND ALTERNATIVES + +See: B<man parallel_alternatives> + + +=head1 BUGS + +=head2 Quoting of newline + +Because of the way newline is quoted this will not work: + + echo 1,2,3 | parallel -vkd, "echo 'a{}b'" + +However, these will all work: + + echo 1,2,3 | parallel -vkd, echo a{}b + echo 1,2,3 | parallel -vkd, "echo 'a'{}'b'" + echo 1,2,3 | parallel -vkd, "echo 'a'"{}"'b'" + + +=head2 Speed + +=head3 Startup + +GNU B<parallel> is slow at starting up - around 250 ms the first time +and 150 ms after that. + +=head3 Job startup + +Starting a job on the local machine takes around 3-10 ms. This can be +a big overhead if the job takes very few ms to run. Often you can +group small jobs together using B<-X> which will make the overhead +less significant. Or you can run multiple GNU B<parallel>s as +described in B<EXAMPLE: Speeding up fast jobs>. + +=head3 SSH + +When using multiple computers GNU B<parallel> opens B<ssh> connections +to them to figure out how many connections can be used reliably +simultaneously (Namely SSHD's MaxStartups). This test is done for each +host in serial, so if your B<--sshloginfile> contains many hosts it may +be slow. + +If your jobs are short you may see that there are fewer jobs running +on the remote systems than expected. This is due to time spent logging +in and out. B<-M> may help here. + +=head3 Disk access + +A single disk can normally read data faster if it reads one file at a +time instead of reading a lot of files in parallel, as this will avoid +disk seeks. However, newer disk systems with multiple drives can read +faster if reading from multiple files in parallel. + +If the jobs are of the form read-all-compute-all-write-all, so +everything is read before anything is written, it may be faster to +force only one disk access at the time: + + sem --id diskio cat file | compute | sem --id diskio cat > file + +If the jobs are of the form read-compute-write, so writing starts +before all reading is done, it may be faster to force only one reader +and writer at the time: + + sem --id read cat file | compute | sem --id write cat > file + +If the jobs are of the form read-compute-read-compute, it may be +faster to run more jobs in parallel than the system has CPUs, as some +of the jobs will be stuck waiting for disk access. + +=head2 --nice limits command length + +The current implementation of B<--nice> is too pessimistic in the max +allowed command length. It only uses a little more than half of what +it could. This affects B<-X> and B<-m>. If this becomes a real problem for +you, file a bug-report. + +=head2 Aliases and functions do not work + +If you get: + + Can't exec "command": No such file or directory + +or: + + open3: exec of by command failed + +or: + + /bin/bash: command: command not found + +it may be because I<command> is not known, but it could also be +because I<command> is an alias or a function. If it is a function you +need to B<export -f> the function first or use B<env_parallel>. An +alias will only work if you use B<env_parallel>. + +=head2 Database with MySQL fails randomly + +The B<--sql*> options may fail randomly with MySQL. This problem does +not exist with PostgreSQL. + + +=head1 REPORTING BUGS + +Report bugs to <parallel@gnu.org> or +https://savannah.gnu.org/bugs/?func=additem&group=parallel + +When you write your report, please keep in mind, that you must give +the reader enough information to be able to run exactly what you +run. So you need to include all data and programs that you use to +show the problem. + +See a perfect bug report on +https://lists.gnu.org/archive/html/bug-parallel/2015-01/msg00000.html + +Your bug report should always include: + +=over 2 + +=item * + +The error message you get (if any). If the error message is not from +GNU B<parallel> you need to show why you think GNU B<parallel> caused +this. + +=item * + +The complete output of B<parallel --version>. If you are not running +the latest released version (see https://ftp.gnu.org/gnu/parallel/) you +should specify why you believe the problem is not fixed in that +version. + +=item * + +A minimal, complete, and verifiable example (See description on +https://stackoverflow.com/help/mcve). + +It should be a complete example that others can run which shows the +problem including all files needed to run the example. This should +preferably be small and simple, so try to remove as many options as +possible. + +A combination of B<yes>, B<seq>, B<cat>, B<echo>, B<wc>, and B<sleep> +can reproduce most errors. + +If your example requires large files, see if you can make them with +something like B<seq 100000000> > B<bigfile> or B<yes | head -n +1000000000> > B<file>. If you need multiple columns: B<paste <(seq +1000) <(seq 1000 1999)> + +If your example requires remote execution, see if you can use +B<localhost> - maybe using another login. + +If you have access to a different system (maybe a VirtualBox on your +own machine), test if your MCVE shows the problem on that system. If +it does not, read below. + +=item * + +The output of your example. If your problem is not easily reproduced +by others, the output might help them figure out the problem. + +=item * + +Whether you have watched the intro videos +(https://www.youtube.com/playlist?list=PL284C9FF2488BC6D1), walked +through the tutorial (man parallel_tutorial), and read the examples +(man parallel_examples). + +=back + +=head2 Bug dependent on environment + +If you suspect the error is dependent on your environment or +distribution, please see if you can reproduce the error on one of +these VirtualBox images: +https://sourceforge.net/projects/virtualboximage/files/ +https://www.osboxes.org/virtualbox-images/ + +Specifying the name of your distribution is not enough as you may have +installed software that is not in the VirtualBox images. + +If you cannot reproduce the error on any of the VirtualBox images +above, see if you can build a VirtualBox image on which you can +reproduce the error. If not you should assume the debugging will be +done through you. That will put a lot more burden on you and it is +extra important you give any information that help. In general the +problem will be fixed faster and with much less work for you if you +can reproduce the error on a VirtualBox - even if you have to build a +VirtualBox image. + +=head2 In summary + +Your report must include: + +=over 2 + +=item * + +B<parallel --version> + +=item * + +output + error message + +=item * + +full example including all files + +=item * + +VirtualBox image, if you cannot reproduce it on other systems + +=back + + + +=head1 AUTHOR + +When using GNU B<parallel> for a publication please cite: + +O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ;login: +The USENIX Magazine, February 2011:42-47. + +This helps funding further development; and it won't cost you a cent. +If you pay 10000 EUR you should feel free to use GNU Parallel without citing. + +Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk + +Copyright (C) 2008-2010 Ole Tange, http://ole.tange.dk + +Copyright (C) 2010-2022 Ole Tange, http://ole.tange.dk and Free +Software Foundation, Inc. + +Parts of the manual concerning B<xargs> compatibility is inspired by +the manual of B<xargs> from GNU findutils 4.4.2. + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <https://www.gnu.org/licenses/>. + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this +documentation under the terms of the GNU Free Documentation License, +Version 1.3 or any later version published by the Free Software +Foundation; with no Invariant Sections, with no Front-Cover Texts, and +with no Back-Cover Texts. A copy of the license is included in the +file LICENSES/GFDL-1.3-or-later.txt. + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B<to Share> + +to copy, distribute and transmit the work + +=item B<to Remix> + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B<Attribution> + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B<Share Alike> + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B<Waiver> + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B<Public Domain> + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B<Other Rights> + +In no way are any of the following rights affected by the license: + +=over 2 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=back + +=over 9 + +=item B<Notice> + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as +LICENCES/CC-BY-SA-4.0.txt + + +=head1 DEPENDENCIES + +GNU B<parallel> uses Perl, and the Perl modules Getopt::Long, +IPC::Open3, Symbol, IO::File, POSIX, and File::Temp. + +For B<--csv> it uses the Perl module Text::CSV. + +For remote usage it uses B<rsync> with B<ssh>. + + +=head1 SEE ALSO + +B<parallel_tutorial>(1), B<env_parallel>(1), B<parset>(1), +B<parsort>(1), B<parallel_alternatives>(1), B<parallel_design>(7), +B<niceload>(1), B<sql>(1), B<ssh>(1), B<ssh-agent>(1), B<sshpass>(1), +B<ssh-copy-id>(1), B<rsync>(1) + +=cut diff --git a/src/parallel_alternatives.pod b/src/parallel_alternatives.pod new file mode 100644 index 0000000..6e7e780 --- /dev/null +++ b/src/parallel_alternatives.pod @@ -0,0 +1,3916 @@ +#!/usr/bin/perl -w + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=encoding utf8 + +=head1 NAME + +parallel_alternatives - Alternatives to GNU B<parallel> + + +=head1 DIFFERENCES BETWEEN GNU Parallel AND ALTERNATIVES + +There are a lot programs that share functionality with GNU +B<parallel>. Some of these are specialized tools, and while GNU +B<parallel> can emulate many of them, a specialized tool can be better +at a given task. GNU B<parallel> strives to include the best of the +general functionality without sacrificing ease of use. + +B<parallel> has existed since 2002-01-06 and as GNU B<parallel> since +2010. A lot of the alternatives have not had the vitality to survive +that long, but have come and gone during that time. + +GNU B<parallel> is actively maintained with a new release every month +since 2010. Most other alternatives are fleeting interests of the +developers with irregular releases and only maintained for a few +years. + + +=head2 SUMMARY LEGEND + +The following features are in some of the comparable tools: + +=head3 Inputs + +=over + +=item I1. Arguments can be read from stdin + +=item I2. Arguments can be read from a file + +=item I3. Arguments can be read from multiple files + +=item I4. Arguments can be read from command line + +=item I5. Arguments can be read from a table + +=item I6. Arguments can be read from the same file using #! (shebang) + +=item I7. Line oriented input as default (Quoting of special chars not needed) + +=back + + +=head3 Manipulation of input + +=over + +=item M1. Composed command + +=item M2. Multiple arguments can fill up an execution line + +=item M3. Arguments can be put anywhere in the execution line + +=item M4. Multiple arguments can be put anywhere in the execution line + +=item M5. Arguments can be replaced with context + +=item M6. Input can be treated as the complete command line + +=back + + +=head3 Outputs + +=over + +=item O1. Grouping output so output from different jobs do not mix + +=item O2. Send stderr (standard error) to stderr (standard error) + +=item O3. Send stdout (standard output) to stdout (standard output) + +=item O4. Order of output can be same as order of input + +=item O5. Stdout only contains stdout (standard output) from the command + +=item O6. Stderr only contains stderr (standard error) from the command + +=item O7. Buffering on disk + +=item O8. No temporary files left if killed + +=item O9. Test if disk runs full during run + +=item O10. Output of a line bigger than 4 GB + +=back + + +=head3 Execution + +=over + +=item E1. Running jobs in parallel + +=item E2. List running jobs + +=item E3. Finish running jobs, but do not start new jobs + +=item E4. Number of running jobs can depend on number of cpus + +=item E5. Finish running jobs, but do not start new jobs after first failure + +=item E6. Number of running jobs can be adjusted while running + +=item E7. Only spawn new jobs if load is less than a limit + +=back + + +=head3 Remote execution + +=over + +=item R1. Jobs can be run on remote computers + +=item R2. Basefiles can be transferred + +=item R3. Argument files can be transferred + +=item R4. Result files can be transferred + +=item R5. Cleanup of transferred files + +=item R6. No config files needed + +=item R7. Do not run more than SSHD's MaxStartups can handle + +=item R8. Configurable SSH command + +=item R9. Retry if connection breaks occasionally + +=back + + +=head3 Semaphore + +=over + +=item S1. Possibility to work as a mutex + +=item S2. Possibility to work as a counting semaphore + +=back + + +=head3 Legend + +=over + +=item - = no + +=item x = not applicable + +=item ID = yes + +=back + +As every new version of the programs are not tested the table may be +outdated. Please file a bug report if you find errors (See REPORTING +BUGS). + +parallel: + +=over + +=item I1 I2 I3 I4 I5 I6 I7 + +=item M1 M2 M3 M4 M5 M6 + +=item O1 O2 O3 O4 O5 O6 O7 O8 O9 O10 + +=item E1 E2 E3 E4 E5 E6 E7 + +=item R1 R2 R3 R4 R5 R6 R7 R8 R9 + +=item S1 S2 + +=back + + +=head2 DIFFERENCES BETWEEN xargs AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 I2 - - - - - + +=item - M2 M3 - - - + +=item - O2 O3 - O5 O6 + +=item E1 - - - - - - + +=item - - - - - x - - - + +=item - - + +=back + +B<xargs> offers some of the same possibilities as GNU B<parallel>. + +B<xargs> deals badly with special characters (such as space, \, ' and +"). To see the problem try this: + + touch important_file + touch 'not important_file' + ls not* | xargs rm + mkdir -p "My brother's 12\" records" + ls | xargs rmdir + touch 'c:\windows\system32\clfs.sys' + echo 'c:\windows\system32\clfs.sys' | xargs ls -l + +You can specify B<-0>, but many input generators are not optimized for +using B<NUL> as separator but are optimized for B<newline> as +separator. E.g. B<awk>, B<ls>, B<echo>, B<tar -v>, B<head> (requires +using B<-z>), B<tail> (requires using B<-z>), B<sed> (requires using +B<-z>), B<perl> (B<-0> and \0 instead of \n), B<locate> (requires +using B<-0>), B<find> (requires using B<-print0>), B<grep> (requires +using B<-z> or B<-Z>), B<sort> (requires using B<-z>). + +GNU B<parallel>'s newline separation can be emulated with: + + cat | xargs -d "\n" -n1 command + +B<xargs> can run a given number of jobs in parallel, but has no +support for running number-of-cpu-cores jobs in parallel. + +B<xargs> has no support for grouping the output, therefore output may +run together, e.g. the first half of a line is from one process and +the last half of the line is from another process. The example +B<Parallel grep> cannot be done reliably with B<xargs> because of +this. To see this in action try: + + parallel perl -e '\$a=\"1\".\"{}\"x10000000\;print\ \$a,\"\\n\"' \ + '>' {} ::: a b c d e f g h + # Serial = no mixing = the wanted result + # 'tr -s a-z' squeezes repeating letters into a single letter + echo a b c d e f g h | xargs -P1 -n1 grep 1 | tr -s a-z + # Compare to 8 jobs in parallel + parallel -kP8 -n1 grep 1 ::: a b c d e f g h | tr -s a-z + echo a b c d e f g h | xargs -P8 -n1 grep 1 | tr -s a-z + echo a b c d e f g h | xargs -P8 -n1 grep --line-buffered 1 | \ + tr -s a-z + +Or try this: + + slow_seq() { + echo Count to "$@" + seq "$@" | + perl -ne '$|=1; for(split//){ print; select($a,$a,$a,0.100);}' + } + export -f slow_seq + # Serial = no mixing = the wanted result + seq 8 | xargs -n1 -P1 -I {} bash -c 'slow_seq {}' + # Compare to 8 jobs in parallel + seq 8 | parallel -P8 slow_seq {} + seq 8 | xargs -n1 -P8 -I {} bash -c 'slow_seq {}' + +B<xargs> has no support for keeping the order of the output, therefore +if running jobs in parallel using B<xargs> the output of the second +job cannot be postponed till the first job is done. + +B<xargs> has no support for running jobs on remote computers. + +B<xargs> has no support for context replace, so you will have to create the +arguments. + +If you use a replace string in B<xargs> (B<-I>) you can not force +B<xargs> to use more than one argument. + +Quoting in B<xargs> works like B<-q> in GNU B<parallel>. This means +composed commands and redirection require using B<bash -c>. + + ls | parallel "wc {} >{}.wc" + ls | parallel "echo {}; ls {}|wc" + +becomes (assuming you have 8 cores and that none of the filenames +contain space, " or '). + + ls | xargs -d "\n" -P8 -I {} bash -c "wc {} >{}.wc" + ls | xargs -d "\n" -P8 -I {} bash -c "echo {}; ls {}|wc" + +A more extreme example can be found on: +https://unix.stackexchange.com/q/405552/ + +https://www.gnu.org/software/findutils/ + + +=head2 DIFFERENCES BETWEEN find -exec AND GNU Parallel + +Summary (see legend above): + +=over + +=item - - - x - x - + +=item - M2 M3 - - - - + +=item - O2 O3 O4 O5 O6 + +=item - - - - - - - + +=item - - - - - - - - - + +=item x x + +=back + +B<find -exec> offers some of the same possibilities as GNU B<parallel>. + +B<find -exec> only works on files. Processing other input (such as +hosts or URLs) will require creating these inputs as files. B<find +-exec> has no support for running commands in parallel. + +https://www.gnu.org/software/findutils/ (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN make -j AND GNU Parallel + +Summary (see legend above): + +=over + +=item - - - - - - - + +=item - - - - - - + +=item O1 O2 O3 - x O6 + +=item E1 - - - E5 - + +=item - - - - - - - - - + +=item - - + +=back + +B<make -j> can run jobs in parallel, but requires a crafted Makefile +to do this. That results in extra quoting to get filenames containing +newlines to work correctly. + +B<make -j> computes a dependency graph before running jobs. Jobs run +by GNU B<parallel> does not depend on each other. + +(Very early versions of GNU B<parallel> were coincidentally implemented +using B<make -j>). + +https://www.gnu.org/software/make/ (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN ppss AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 I2 - - - - I7 + +=item M1 - M3 - - M6 + +=item O1 - - x - - + +=item E1 E2 ?E3 E4 - - - + +=item R1 R2 R3 R4 - - ?R7 ? ? + +=item - - + +=back + +B<ppss> is also a tool for running jobs in parallel. + +The output of B<ppss> is status information and thus not useful for +using as input for another command. The output from the jobs are put +into files. + +The argument replace string ($ITEM) cannot be changed. Arguments must +be quoted - thus arguments containing special characters (space '"&!*) +may cause problems. More than one argument is not supported. Filenames +containing newlines are not processed correctly. When reading input +from a file null cannot be used as a terminator. B<ppss> needs to read +the whole input file before starting any jobs. + +Output and status information is stored in ppss_dir and thus requires +cleanup when completed. If the dir is not removed before running +B<ppss> again it may cause nothing to happen as B<ppss> thinks the +task is already done. GNU B<parallel> will normally not need cleaning +up if running locally and will only need cleaning up if stopped +abnormally and running remote (B<--cleanup> may not complete if +stopped abnormally). The example B<Parallel grep> would require extra +postprocessing if written using B<ppss>. + +For remote systems PPSS requires 3 steps: config, deploy, and +start. GNU B<parallel> only requires one step. + +=head3 EXAMPLES FROM ppss MANUAL + +Here are the examples from B<ppss>'s manual page with the equivalent +using GNU B<parallel>: + + 1$ ./ppss.sh standalone -d /path/to/files -c 'gzip ' + + 1$ find /path/to/files -type f | parallel gzip + + 2$ ./ppss.sh standalone -d /path/to/files -c 'cp "$ITEM" /destination/dir ' + + 2$ find /path/to/files -type f | parallel cp {} /destination/dir + + 3$ ./ppss.sh standalone -f list-of-urls.txt -c 'wget -q ' + + 3$ parallel -a list-of-urls.txt wget -q + + 4$ ./ppss.sh standalone -f list-of-urls.txt -c 'wget -q "$ITEM"' + + 4$ parallel -a list-of-urls.txt wget -q {} + + 5$ ./ppss config -C config.cfg -c 'encode.sh ' -d /source/dir \ + -m 192.168.1.100 -u ppss -k ppss-key.key -S ./encode.sh \ + -n nodes.txt -o /some/output/dir --upload --download; + ./ppss deploy -C config.cfg + ./ppss start -C config + + 5$ # parallel does not use configs. If you want a different username put it in nodes.txt: user@hostname + find source/dir -type f | + parallel --sshloginfile nodes.txt --trc {.}.mp3 lame -a {} -o {.}.mp3 --preset standard --quiet + + 6$ ./ppss stop -C config.cfg + + 6$ killall -TERM parallel + + 7$ ./ppss pause -C config.cfg + + 7$ Press: CTRL-Z or killall -SIGTSTP parallel + + 8$ ./ppss continue -C config.cfg + + 8$ Enter: fg or killall -SIGCONT parallel + + 9$ ./ppss.sh status -C config.cfg + + 9$ killall -SIGUSR2 parallel + +https://github.com/louwrentius/PPSS + + +=head2 DIFFERENCES BETWEEN pexec AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 I2 - I4 I5 - - + +=item M1 - M3 - - M6 + +=item O1 O2 O3 - O5 O6 + +=item E1 - - E4 - E6 - + +=item R1 - - - - R6 - - - + +=item S1 - + +=back + +B<pexec> is also a tool for running jobs in parallel. + +=head3 EXAMPLES FROM pexec MANUAL + +Here are the examples from B<pexec>'s info page with the equivalent +using GNU B<parallel>: + + 1$ pexec -o sqrt-%s.dat -p "$(seq 10)" -e NUM -n 4 -c -- \ + 'echo "scale=10000;sqrt($NUM)" | bc' + + 1$ seq 10 | parallel -j4 'echo "scale=10000;sqrt({})" | \ + bc > sqrt-{}.dat' + + 2$ pexec -p "$(ls myfiles*.ext)" -i %s -o %s.sort -- sort + + 2$ ls myfiles*.ext | parallel sort {} ">{}.sort" + + 3$ pexec -f image.list -n auto -e B -u star.log -c -- \ + 'fistar $B.fits -f 100 -F id,x,y,flux -o $B.star' + + 3$ parallel -a image.list \ + 'fistar {}.fits -f 100 -F id,x,y,flux -o {}.star' 2>star.log + + 4$ pexec -r *.png -e IMG -c -o - -- \ + 'convert $IMG ${IMG%.png}.jpeg ; "echo $IMG: done"' + + 4$ ls *.png | parallel 'convert {} {.}.jpeg; echo {}: done' + + 5$ pexec -r *.png -i %s -o %s.jpg -c 'pngtopnm | pnmtojpeg' + + 5$ ls *.png | parallel 'pngtopnm < {} | pnmtojpeg > {}.jpg' + + 6$ for p in *.png ; do echo ${p%.png} ; done | \ + pexec -f - -i %s.png -o %s.jpg -c 'pngtopnm | pnmtojpeg' + + 6$ ls *.png | parallel 'pngtopnm < {} | pnmtojpeg > {.}.jpg' + + 7$ LIST=$(for p in *.png ; do echo ${p%.png} ; done) + pexec -r $LIST -i %s.png -o %s.jpg -c 'pngtopnm | pnmtojpeg' + + 7$ ls *.png | parallel 'pngtopnm < {} | pnmtojpeg > {.}.jpg' + + 8$ pexec -n 8 -r *.jpg -y unix -e IMG -c \ + 'pexec -j -m blockread -d $IMG | \ + jpegtopnm | pnmscale 0.5 | pnmtojpeg | \ + pexec -j -m blockwrite -s th_$IMG' + + 8$ # Combining GNU B<parallel> and GNU B<sem>. + ls *jpg | parallel -j8 'sem --id blockread cat {} | jpegtopnm |' \ + 'pnmscale 0.5 | pnmtojpeg | sem --id blockwrite cat > th_{}' + + # If reading and writing is done to the same disk, this may be + # faster as only one process will be either reading or writing: + ls *jpg | parallel -j8 'sem --id diskio cat {} | jpegtopnm |' \ + 'pnmscale 0.5 | pnmtojpeg | sem --id diskio cat > th_{}' + +https://www.gnu.org/software/pexec/ + + +=head2 DIFFERENCES BETWEEN xjobs AND GNU Parallel + +B<xjobs> is also a tool for running jobs in parallel. It only supports +running jobs on your local computer. + +B<xjobs> deals badly with special characters just like B<xargs>. See +the section B<DIFFERENCES BETWEEN xargs AND GNU Parallel>. + +=head3 EXAMPLES FROM xjobs MANUAL + +Here are the examples from B<xjobs>'s man page with the equivalent +using GNU B<parallel>: + + 1$ ls -1 *.zip | xjobs unzip + + 1$ ls *.zip | parallel unzip + + 2$ ls -1 *.zip | xjobs -n unzip + + 2$ ls *.zip | parallel unzip >/dev/null + + 3$ find . -name '*.bak' | xjobs gzip + + 3$ find . -name '*.bak' | parallel gzip + + 4$ ls -1 *.jar | sed 's/\(.*\)/\1 > \1.idx/' | xjobs jar tf + + 4$ ls *.jar | parallel jar tf {} '>' {}.idx + + 5$ xjobs -s script + + 5$ cat script | parallel + + 6$ mkfifo /var/run/my_named_pipe; + xjobs -s /var/run/my_named_pipe & + echo unzip 1.zip >> /var/run/my_named_pipe; + echo tar cf /backup/myhome.tar /home/me >> /var/run/my_named_pipe + + 6$ mkfifo /var/run/my_named_pipe; + cat /var/run/my_named_pipe | parallel & + echo unzip 1.zip >> /var/run/my_named_pipe; + echo tar cf /backup/myhome.tar /home/me >> /var/run/my_named_pipe + +https://www.maier-komor.de/xjobs.html (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN prll AND GNU Parallel + +B<prll> is also a tool for running jobs in parallel. It does not +support running jobs on remote computers. + +B<prll> encourages using BASH aliases and BASH functions instead of +scripts. GNU B<parallel> supports scripts directly, functions if they +are exported using B<export -f>, and aliases if using B<env_parallel>. + +B<prll> generates a lot of status information on stderr (standard +error) which makes it harder to use the stderr (standard error) output +of the job directly as input for another program. + +=head3 EXAMPLES FROM prll's MANUAL + +Here is the example from B<prll>'s man page with the equivalent +using GNU B<parallel>: + + 1$ prll -s 'mogrify -flip $1' *.jpg + + 1$ parallel mogrify -flip ::: *.jpg + +https://github.com/exzombie/prll (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN dxargs AND GNU Parallel + +B<dxargs> is also a tool for running jobs in parallel. + +B<dxargs> does not deal well with more simultaneous jobs than SSHD's +MaxStartups. B<dxargs> is only built for remote run jobs, but does not +support transferring of files. + +https://web.archive.org/web/20120518070250/http://www. +semicomplete.com/blog/geekery/distributed-xargs.html (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN mdm/middleman AND GNU Parallel + +middleman(mdm) is also a tool for running jobs in parallel. + +=head3 EXAMPLES FROM middleman's WEBSITE + +Here are the shellscripts of +https://web.archive.org/web/20110728064735/http://mdm. +berlios.de/usage.html ported to GNU B<parallel>: + + 1$ seq 19 | parallel buffon -o - | sort -n > result + cat files | parallel cmd + find dir -execdir sem cmd {} \; + +https://github.com/cklin/mdm (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN xapply AND GNU Parallel + +B<xapply> can run jobs in parallel on the local computer. + +=head3 EXAMPLES FROM xapply's MANUAL + +Here are the examples from B<xapply>'s man page with the equivalent +using GNU B<parallel>: + + 1$ xapply '(cd %1 && make all)' */ + + 1$ parallel 'cd {} && make all' ::: */ + + 2$ xapply -f 'diff %1 ../version5/%1' manifest | more + + 2$ parallel diff {} ../version5/{} < manifest | more + + 3$ xapply -p/dev/null -f 'diff %1 %2' manifest1 checklist1 + + 3$ parallel --link diff {1} {2} :::: manifest1 checklist1 + + 4$ xapply 'indent' *.c + + 4$ parallel indent ::: *.c + + 5$ find ~ksb/bin -type f ! -perm -111 -print | \ + xapply -f -v 'chmod a+x' - + + 5$ find ~ksb/bin -type f ! -perm -111 -print | \ + parallel -v chmod a+x + + 6$ find */ -... | fmt 960 1024 | xapply -f -i /dev/tty 'vi' - + + 6$ sh <(find */ -... | parallel -s 1024 echo vi) + + 6$ find */ -... | parallel -s 1024 -Xuj1 vi + + 7$ find ... | xapply -f -5 -i /dev/tty 'vi' - - - - - + + 7$ sh <(find ... | parallel -n5 echo vi) + + 7$ find ... | parallel -n5 -uj1 vi + + 8$ xapply -fn "" /etc/passwd + + 8$ parallel -k echo < /etc/passwd + + 9$ tr ':' '\012' < /etc/passwd | \ + xapply -7 -nf 'chown %1 %6' - - - - - - - + + 9$ tr ':' '\012' < /etc/passwd | parallel -N7 chown {1} {6} + + 10$ xapply '[ -d %1/RCS ] || echo %1' */ + + 10$ parallel '[ -d {}/RCS ] || echo {}' ::: */ + + 11$ xapply -f '[ -f %1 ] && echo %1' List | ... + + 11$ parallel '[ -f {} ] && echo {}' < List | ... + +https://www.databits.net/~ksb/msrc/local/bin/xapply/xapply.html + + +=head2 DIFFERENCES BETWEEN AIX apply AND GNU Parallel + +B<apply> can build command lines based on a template and arguments - +very much like GNU B<parallel>. B<apply> does not run jobs in +parallel. B<apply> does not use an argument separator (like B<:::>); +instead the template must be the first argument. + +=head3 EXAMPLES FROM IBM's KNOWLEDGE CENTER + +Here are the examples from IBM's Knowledge Center and the +corresponding command using GNU B<parallel>: + +=head4 To obtain results similar to those of the B<ls> command, enter: + + 1$ apply echo * + 1$ parallel echo ::: * + +=head4 To compare the file named a1 to the file named b1, and +the file named a2 to the file named b2, enter: + + 2$ apply -2 cmp a1 b1 a2 b2 + 2$ parallel -N2 cmp ::: a1 b1 a2 b2 + +=head4 To run the B<who> command five times, enter: + + 3$ apply -0 who 1 2 3 4 5 + 3$ parallel -N0 who ::: 1 2 3 4 5 + +=head4 To link all files in the current directory to the directory +/usr/joe, enter: + + 4$ apply 'ln %1 /usr/joe' * + 4$ parallel ln {} /usr/joe ::: * + +https://www-01.ibm.com/support/knowledgecenter/ +ssw_aix_71/com.ibm.aix.cmds1/apply.htm (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN paexec AND GNU Parallel + +B<paexec> can run jobs in parallel on both the local and remote computers. + +B<paexec> requires commands to print a blank line as the last +output. This means you will have to write a wrapper for most programs. + +B<paexec> has a job dependency facility so a job can depend on another +job to be executed successfully. Sort of a poor-man's B<make>. + +=head3 EXAMPLES FROM paexec's EXAMPLE CATALOG + +Here are the examples from B<paexec>'s example catalog with the equivalent +using GNU B<parallel>: + +=head4 1_div_X_run + + 1$ ../../paexec -s -l -c "`pwd`/1_div_X_cmd" -n +1 <<EOF [...] + + 1$ parallel echo {} '|' `pwd`/1_div_X_cmd <<EOF [...] + +=head4 all_substr_run + + 2$ ../../paexec -lp -c "`pwd`/all_substr_cmd" -n +3 <<EOF [...] + + 2$ parallel echo {} '|' `pwd`/all_substr_cmd <<EOF [...] + +=head4 cc_wrapper_run + + 3$ ../../paexec -c "env CC=gcc CFLAGS=-O2 `pwd`/cc_wrapper_cmd" \ + -n 'host1 host2' \ + -t '/usr/bin/ssh -x' <<EOF [...] + + 3$ parallel echo {} '|' "env CC=gcc CFLAGS=-O2 `pwd`/cc_wrapper_cmd" \ + -S host1,host2 <<EOF [...] + + # This is not exactly the same, but avoids the wrapper + parallel gcc -O2 -c -o {.}.o {} \ + -S host1,host2 <<EOF [...] + +=head4 toupper_run + + 4$ ../../paexec -lp -c "`pwd`/toupper_cmd" -n +10 <<EOF [...] + + 4$ parallel echo {} '|' ./toupper_cmd <<EOF [...] + + # Without the wrapper: + parallel echo {} '| awk {print\ toupper\(\$0\)}' <<EOF [...] + +https://github.com/cheusov/paexec + + +=head2 DIFFERENCES BETWEEN map(sitaramc) AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 - - I4 - - (I7) + +=item M1 (M2) M3 (M4) M5 M6 + +=item - O2 O3 - O5 - - N/A N/A O10 + +=item E1 - - - - - - + +=item - - - - - - - - - + +=item - - + +=back + +(I7): Only under special circumstances. See below. + +(M2+M4): Only if there is a single replacement string. + +B<map> rejects input with special characters: + + echo "The Cure" > My\ brother\'s\ 12\"\ records + + ls | map 'echo %; wc %' + +It works with GNU B<parallel>: + + ls | parallel 'echo {}; wc {}' + +Under some circumstances it also works with B<map>: + + ls | map 'echo % works %' + +But tiny changes make it reject the input with special characters: + + ls | map 'echo % does not work "%"' + +This means that many UTF-8 characters will be rejected. This is by +design. From the web page: "As such, programs that I<quietly handle +them, with no warnings at all,> are doing their users a disservice." + +B<map> delays each job by 0.01 s. This can be emulated by using +B<parallel --delay 0.01>. + +B<map> prints '+' on stderr when a job starts, and '-' when a job +finishes. This cannot be disabled. B<parallel> has B<--bar> if you +need to see progress. + +B<map>'s replacement strings (% %D %B %E) can be simulated in GNU +B<parallel> by putting this in B<~/.parallel/config>: + + --rpl '%' + --rpl '%D $_=Q(::dirname($_));' + --rpl '%B s:.*/::;s:\.[^/.]+$::;' + --rpl '%E s:.*\.::' + +B<map> does not have an argument separator on the command line, but +uses the first argument as command. This makes quoting harder which again +may affect readability. Compare: + + map -p 2 'perl -ne '"'"'/^\S+\s+\S+$/ and print $ARGV,"\n"'"'" * + + parallel -q perl -ne '/^\S+\s+\S+$/ and print $ARGV,"\n"' ::: * + +B<map> can do multiple arguments with context replace, but not without +context replace: + + parallel --xargs echo 'BEGIN{'{}'}END' ::: 1 2 3 + + map "echo 'BEGIN{'%'}END'" 1 2 3 + +B<map> has no support for grouping. So this gives the wrong results: + + parallel perl -e '\$a=\"1{}\"x10000000\;print\ \$a,\"\\n\"' '>' {} \ + ::: a b c d e f + ls -l a b c d e f + parallel -kP4 -n1 grep 1 ::: a b c d e f > out.par + map -n1 -p 4 'grep 1' a b c d e f > out.map-unbuf + map -n1 -p 4 'grep --line-buffered 1' a b c d e f > out.map-linebuf + map -n1 -p 1 'grep --line-buffered 1' a b c d e f > out.map-serial + ls -l out* + md5sum out* + +=head3 EXAMPLES FROM map's WEBSITE + +Here are the examples from B<map>'s web page with the equivalent using +GNU B<parallel>: + + 1$ ls *.gif | map convert % %B.png # default max-args: 1 + + 1$ ls *.gif | parallel convert {} {.}.png + + 2$ map "mkdir %B; tar -C %B -xf %" *.tgz # default max-args: 1 + + 2$ parallel 'mkdir {.}; tar -C {.} -xf {}' ::: *.tgz + + 3$ ls *.gif | map cp % /tmp # default max-args: 100 + + 3$ ls *.gif | parallel -X cp {} /tmp + + 4$ ls *.tar | map -n 1 tar -xf % + + 4$ ls *.tar | parallel tar -xf + + 5$ map "cp % /tmp" *.tgz + + 5$ parallel cp {} /tmp ::: *.tgz + + 6$ map "du -sm /home/%/mail" alice bob carol + + 6$ parallel "du -sm /home/{}/mail" ::: alice bob carol + or if you prefer running a single job with multiple args: + 6$ parallel -Xj1 "du -sm /home/{}/mail" ::: alice bob carol + + 7$ cat /etc/passwd | map -d: 'echo user %1 has shell %7' + + 7$ cat /etc/passwd | parallel --colsep : 'echo user {1} has shell {7}' + + 8$ export MAP_MAX_PROCS=$(( `nproc` / 2 )) + + 8$ export PARALLEL=-j50% + +https://github.com/sitaramc/map (Last checked: 2020-05) + + +=head2 DIFFERENCES BETWEEN ladon AND GNU Parallel + +B<ladon> can run multiple jobs on files in parallel. + +B<ladon> only works on files and the only way to specify files is +using a quoted glob string (such as \*.jpg). It is not possible to +list the files manually. + +As replacement strings it uses FULLPATH DIRNAME BASENAME EXT RELDIR +RELPATH + +These can be simulated using GNU B<parallel> by putting this in +B<~/.parallel/config>: + + --rpl 'FULLPATH $_=Q($_);chomp($_=qx{readlink -f $_});' + --rpl 'DIRNAME $_=Q(::dirname($_));chomp($_=qx{readlink -f $_});' + --rpl 'BASENAME s:.*/::;s:\.[^/.]+$::;' + --rpl 'EXT s:.*\.::' + --rpl 'RELDIR $_=Q($_);chomp(($_,$c)=qx{readlink -f $_;pwd}); + s:\Q$c/\E::;$_=::dirname($_);' + --rpl 'RELPATH $_=Q($_);chomp(($_,$c)=qx{readlink -f $_;pwd}); + s:\Q$c/\E::;' + +B<ladon> deals badly with filenames containing " and newline, and it +fails for output larger than 200k: + + ladon '*' -- seq 36000 | wc + +=head3 EXAMPLES FROM ladon MANUAL + +It is assumed that the '--rpl's above are put in B<~/.parallel/config> +and that it is run under a shell that supports '**' globbing (such as B<zsh>): + + 1$ ladon "**/*.txt" -- echo RELPATH + + 1$ parallel echo RELPATH ::: **/*.txt + + 2$ ladon "~/Documents/**/*.pdf" -- shasum FULLPATH >hashes.txt + + 2$ parallel shasum FULLPATH ::: ~/Documents/**/*.pdf >hashes.txt + + 3$ ladon -m thumbs/RELDIR "**/*.jpg" -- convert FULLPATH \ + -thumbnail 100x100^ -gravity center -extent 100x100 \ + thumbs/RELPATH + + 3$ parallel mkdir -p thumbs/RELDIR\; convert FULLPATH + -thumbnail 100x100^ -gravity center -extent 100x100 \ + thumbs/RELPATH ::: **/*.jpg + + 4$ ladon "~/Music/*.wav" -- lame -V 2 FULLPATH DIRNAME/BASENAME.mp3 + + 4$ parallel lame -V 2 FULLPATH DIRNAME/BASENAME.mp3 ::: ~/Music/*.wav + +https://github.com/danielgtaylor/ladon (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN jobflow AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 - - - - - I7 + +=item - - M3 - - (M6) + +=item O1 O2 O3 - O5 O6 (O7) - - O10 + +=item E1 - - - - E6 - + +=item - - - - - - - - - + +=item - - + +=back + + +B<jobflow> can run multiple jobs in parallel. + +Just like B<xargs> output from B<jobflow> jobs running in parallel mix +together by default. B<jobflow> can buffer into files with +B<-buffered> (placed in /run/shm), but these are not cleaned up if +B<jobflow> dies unexpectedly (e.g. by Ctrl-C). If the total output is +big (in the order of RAM+swap) it can cause the system to slow to a +crawl and eventually run out of memory. + +Just like B<xargs> redirection and composed commands require wrapping +with B<bash -c>. + +Input lines can at most be 4096 bytes. + +B<jobflow> is faster than GNU B<parallel> but around 6 times slower +than B<parallel-bash>. + +B<jobflow> has no equivalent for B<--pipe>, or B<--sshlogin>. + +B<jobflow> makes it possible to set resource limits on the running +jobs. This can be emulated by GNU B<parallel> using B<bash>'s B<ulimit>: + + jobflow -limits=mem=100M,cpu=3,fsize=20M,nofiles=300 myjob + + parallel 'ulimit -v 102400 -t 3 -f 204800 -n 300 myjob' + + +=head3 EXAMPLES FROM jobflow README + + 1$ cat things.list | jobflow -threads=8 -exec ./mytask {} + + 1$ cat things.list | parallel -j8 ./mytask {} + + 2$ seq 100 | jobflow -threads=100 -exec echo {} + + 2$ seq 100 | parallel -j100 echo {} + + 3$ cat urls.txt | jobflow -threads=32 -exec wget {} + + 3$ cat urls.txt | parallel -j32 wget {} + + 4$ find . -name '*.bmp' | \ + jobflow -threads=8 -exec bmp2jpeg {.}.bmp {.}.jpg + + 4$ find . -name '*.bmp' | \ + parallel -j8 bmp2jpeg {.}.bmp {.}.jpg + + 5$ seq 100 | jobflow -skip 10 -count 10 + + 5$ seq 100 | parallel --filter '{1} > 10 and {1} <= 20' echo + + 5$ seq 100 | parallel echo '{= $_>10 and $_<=20 or skip() =}' + +https://github.com/rofl0r/jobflow (Last checked: 2022-05) + + +=head2 DIFFERENCES BETWEEN gargs AND GNU Parallel + +B<gargs> can run multiple jobs in parallel. + +Older versions cache output in memory. This causes it to be extremely +slow when the output is larger than the physical RAM, and can cause +the system to run out of memory. + +See more details on this in B<man parallel_design>. + +Newer versions cache output in files, but leave files in $TMPDIR if it +is killed. + +Output to stderr (standard error) is changed if the command fails. + +=head3 EXAMPLES FROM gargs WEBSITE + + 1$ seq 12 -1 1 | gargs -p 4 -n 3 "sleep {0}; echo {1} {2}" + + 1$ seq 12 -1 1 | parallel -P 4 -n 3 "sleep {1}; echo {2} {3}" + + 2$ cat t.txt | gargs --sep "\s+" \ + -p 2 "echo '{0}:{1}-{2}' full-line: \'{}\'" + + 2$ cat t.txt | parallel --colsep "\\s+" \ + -P 2 "echo '{1}:{2}-{3}' full-line: \'{}\'" + +https://github.com/brentp/gargs + + +=head2 DIFFERENCES BETWEEN orgalorg AND GNU Parallel + +B<orgalorg> can run the same job on multiple machines. This is related +to B<--onall> and B<--nonall>. + +B<orgalorg> supports entering the SSH password - provided it is the +same for all servers. GNU B<parallel> advocates using B<ssh-agent> +instead, but it is possible to emulate B<orgalorg>'s behavior by +setting SSHPASS and by using B<--ssh "sshpass ssh">. + +To make the emulation easier, make a simple alias: + + alias par_emul="parallel -j0 --ssh 'sshpass ssh' --nonall --tag --lb" + +If you want to supply a password run: + + SSHPASS=`ssh-askpass` + +or set the password directly: + + SSHPASS=P4$$w0rd! + +If the above is set up you can then do: + + orgalorg -o frontend1 -o frontend2 -p -C uptime + par_emul -S frontend1 -S frontend2 uptime + + orgalorg -o frontend1 -o frontend2 -p -C top -bid 1 + par_emul -S frontend1 -S frontend2 top -bid 1 + + orgalorg -o frontend1 -o frontend2 -p -er /tmp -n \ + 'md5sum /tmp/bigfile' -S bigfile + par_emul -S frontend1 -S frontend2 --basefile bigfile \ + --workdir /tmp md5sum /tmp/bigfile + +B<orgalorg> has a progress indicator for the transferring of a +file. GNU B<parallel> does not. + +https://github.com/reconquest/orgalorg + + +=head2 DIFFERENCES BETWEEN Rust parallel AND GNU Parallel + +Rust parallel focuses on speed. It is almost as fast as B<xargs>, but +not as fast as B<parallel-bash>. It implements a few features from GNU +B<parallel>, but lacks many functions. All these fail: + + # Read arguments from file + parallel -a file echo + # Changing the delimiter + parallel -d _ echo ::: a_b_c_ + +These do something different from GNU B<parallel> + + # -q to protect quoted $ and space + parallel -q perl -e '$a=shift; print "$a"x10000000' ::: a b c + # Generation of combination of inputs + parallel echo {1} {2} ::: red green blue ::: S M L XL XXL + # {= perl expression =} replacement string + parallel echo '{= s/new/old/ =}' ::: my.new your.new + # --pipe + seq 100000 | parallel --pipe wc + # linked arguments + parallel echo ::: S M L :::+ sml med lrg ::: R G B :::+ red grn blu + # Run different shell dialects + zsh -c 'parallel echo \={} ::: zsh && true' + csh -c 'parallel echo \$\{\} ::: shell && true' + bash -c 'parallel echo \$\({}\) ::: pwd && true' + # Rust parallel does not start before the last argument is read + (seq 10; sleep 5; echo 2) | time parallel -j2 'sleep 2; echo' + tail -f /var/log/syslog | parallel echo + +Most of the examples from the book GNU Parallel 2018 do not work, thus +Rust parallel is not close to being a compatible replacement. + +Rust parallel has no remote facilities. + +It uses /tmp/parallel for tmp files and does not clean up if +terminated abruptly. If another user on the system uses Rust parallel, +then /tmp/parallel will have the wrong permissions and Rust parallel +will fail. A malicious user can setup the right permissions and +symlink the output file to one of the user's files and next time the +user uses Rust parallel it will overwrite this file. + + attacker$ mkdir /tmp/parallel + attacker$ chmod a+rwX /tmp/parallel + # Symlink to the file the attacker wants to zero out + attacker$ ln -s ~victim/.important-file /tmp/parallel/stderr_1 + victim$ seq 1000 | parallel echo + # This file is now overwritten with stderr from 'echo' + victim$ cat ~victim/.important-file + +If /tmp/parallel runs full during the run, Rust parallel does not +report this, but finishes with success - thereby risking data loss. + +https://github.com/mmstick/parallel + + +=head2 DIFFERENCES BETWEEN Rush AND GNU Parallel + +B<rush> (https://github.com/shenwei356/rush) is written in Go and +based on B<gargs>. + +Just like GNU B<parallel> B<rush> buffers in temporary files. But +opposite GNU B<parallel> B<rush> does not clean up, if the process +dies abnormally. + +B<rush> has some string manipulations that can be emulated by putting +this into ~/.parallel/config (/ is used instead of %, and % is used +instead of ^ as that is closer to bash's ${var%postfix}): + + --rpl '{:} s:(\.[^/]+)*$::' + --rpl '{:%([^}]+?)} s:$$1(\.[^/]+)*$::' + --rpl '{/:%([^}]*?)} s:.*/(.*)$$1(\.[^/]+)*$:$1:' + --rpl '{/:} s:(.*/)?([^/.]+)(\.[^/]+)*$:$2:' + --rpl '{@(.*?)} /$$1/ and $_=$1;' + +=head3 EXAMPLES FROM rush's WEBSITE + +Here are the examples from B<rush>'s website with the equivalent +command in GNU B<parallel>. + +B<1. Simple run, quoting is not necessary> + + 1$ seq 1 3 | rush echo {} + + 1$ seq 1 3 | parallel echo {} + +B<2. Read data from file (`-i`)> + + 2$ rush echo {} -i data1.txt -i data2.txt + + 2$ cat data1.txt data2.txt | parallel echo {} + +B<3. Keep output order (`-k`)> + + 3$ seq 1 3 | rush 'echo {}' -k + + 3$ seq 1 3 | parallel -k echo {} + + +B<4. Timeout (`-t`)> + + 4$ time seq 1 | rush 'sleep 2; echo {}' -t 1 + + 4$ time seq 1 | parallel --timeout 1 'sleep 2; echo {}' + +B<5. Retry (`-r`)> + + 5$ seq 1 | rush 'python unexisted_script.py' -r 1 + + 5$ seq 1 | parallel --retries 2 'python unexisted_script.py' + +Use B<-u> to see it is really run twice: + + 5$ seq 1 | parallel -u --retries 2 'python unexisted_script.py' + +B<6. Dirname (`{/}`) and basename (`{%}`) and remove custom +suffix (`{^suffix}`)> + + 6$ echo dir/file_1.txt.gz | rush 'echo {/} {%} {^_1.txt.gz}' + + 6$ echo dir/file_1.txt.gz | + parallel --plus echo {//} {/} {%_1.txt.gz} + +B<7. Get basename, and remove last (`{.}`) or any (`{:}`) extension> + + 7$ echo dir.d/file.txt.gz | rush 'echo {.} {:} {%.} {%:}' + + 7$ echo dir.d/file.txt.gz | parallel 'echo {.} {:} {/.} {/:}' + +B<8. Job ID, combine fields index and other replacement strings> + + 8$ echo 12 file.txt dir/s_1.fq.gz | + rush 'echo job {#}: {2} {2.} {3%:^_1}' + + 8$ echo 12 file.txt dir/s_1.fq.gz | + parallel --colsep ' ' 'echo job {#}: {2} {2.} {3/:%_1}' + +B<9. Capture submatch using regular expression (`{@regexp}`)> + + 9$ echo read_1.fq.gz | rush 'echo {@(.+)_\d}' + + 9$ echo read_1.fq.gz | parallel 'echo {@(.+)_\d}' + +B<10. Custom field delimiter (`-d`)> + + 10$ echo a=b=c | rush 'echo {1} {2} {3}' -d = + + 10$ echo a=b=c | parallel -d = echo {1} {2} {3} + +B<11. Send multi-lines to every command (`-n`)> + + 11$ seq 5 | rush -n 2 -k 'echo "{}"; echo' + + 11$ seq 5 | + parallel -n 2 -k \ + 'echo {=-1 $_=join"\n",@arg[1..$#arg] =}; echo' + + 11$ seq 5 | rush -n 2 -k 'echo "{}"; echo' -J ' ' + + 11$ seq 5 | parallel -n 2 -k 'echo {}; echo' + + +B<12. Custom record delimiter (`-D`), note that empty records are not used.> + + 12$ echo a b c d | rush -D " " -k 'echo {}' + + 12$ echo a b c d | parallel -d " " -k 'echo {}' + + 12$ echo abcd | rush -D "" -k 'echo {}' + + Cannot be done by GNU Parallel + + 12$ cat fasta.fa + >seq1 + tag + >seq2 + cat + gat + >seq3 + attac + a + cat + + 12$ cat fasta.fa | rush -D ">" \ + 'echo FASTA record {#}: name: {1} sequence: {2}' -k -d "\n" + # rush fails to join the multiline sequences + + 12$ cat fasta.fa | (read -n1 ignore_first_char; + parallel -d '>' --colsep '\n' echo FASTA record {#}: \ + name: {1} sequence: '{=2 $_=join"",@arg[2..$#arg]=}' + ) + +B<13. Assign value to variable, like `awk -v` (`-v`)> + + 13$ seq 1 | + rush 'echo Hello, {fname} {lname}!' -v fname=Wei -v lname=Shen + + 13$ seq 1 | + parallel -N0 \ + 'fname=Wei; lname=Shen; echo Hello, ${fname} ${lname}!' + + 13$ for var in a b; do \ + 13$ seq 1 3 | rush -k -v var=$var 'echo var: {var}, data: {}'; \ + 13$ done + +In GNU B<parallel> you would typically do: + + 13$ seq 1 3 | parallel -k echo var: {1}, data: {2} ::: a b :::: - + +If you I<really> want the var: + + 13$ seq 1 3 | + parallel -k var={1} ';echo var: $var, data: {}' ::: a b :::: - + +If you I<really> want the B<for>-loop: + + 13$ for var in a b; do + export var; + seq 1 3 | parallel -k 'echo var: $var, data: {}'; + done + +Contrary to B<rush> this also works if the value is complex like: + + My brother's 12" records + + +B<14. Preset variable (`-v`), avoid repeatedly writing verbose replacement strings> + + 14$ # naive way + echo read_1.fq.gz | rush 'echo {:^_1} {:^_1}_2.fq.gz' + + 14$ echo read_1.fq.gz | parallel 'echo {:%_1} {:%_1}_2.fq.gz' + + 14$ # macro + removing suffix + echo read_1.fq.gz | + rush -v p='{:^_1}' 'echo {p} {p}_2.fq.gz' + + 14$ echo read_1.fq.gz | + parallel 'p={:%_1}; echo $p ${p}_2.fq.gz' + + 14$ # macro + regular expression + echo read_1.fq.gz | rush -v p='{@(.+?)_\d}' 'echo {p} {p}_2.fq.gz' + + 14$ echo read_1.fq.gz | parallel 'p={@(.+?)_\d}; echo $p ${p}_2.fq.gz' + +Contrary to B<rush> GNU B<parallel> works with complex values: + + 14$ echo "My brother's 12\"read_1.fq.gz" | + parallel 'p={@(.+?)_\d}; echo $p ${p}_2.fq.gz' + +B<15. Interrupt jobs by `Ctrl-C`, rush will stop unfinished commands and exit.> + + 15$ seq 1 20 | rush 'sleep 1; echo {}' + ^C + + 15$ seq 1 20 | parallel 'sleep 1; echo {}' + ^C + +B<16. Continue/resume jobs (`-c`). When some jobs failed (by +execution failure, timeout, or canceling by user with `Ctrl + C`), +please switch flag `-c/--continue` on and run again, so that `rush` +can save successful commands and ignore them in I<NEXT> run.> + + 16$ seq 1 3 | rush 'sleep {}; echo {}' -t 3 -c + cat successful_cmds.rush + seq 1 3 | rush 'sleep {}; echo {}' -t 3 -c + + 16$ seq 1 3 | parallel --joblog mylog --timeout 2 \ + 'sleep {}; echo {}' + cat mylog + seq 1 3 | parallel --joblog mylog --retry-failed \ + 'sleep {}; echo {}' + +Multi-line jobs: + + 16$ seq 1 3 | rush 'sleep {}; echo {}; \ + echo finish {}' -t 3 -c -C finished.rush + cat finished.rush + seq 1 3 | rush 'sleep {}; echo {}; \ + echo finish {}' -t 3 -c -C finished.rush + + 16$ seq 1 3 | + parallel --joblog mylog --timeout 2 'sleep {}; echo {}; \ + echo finish {}' + cat mylog + seq 1 3 | + parallel --joblog mylog --retry-failed 'sleep {}; echo {}; \ + echo finish {}' + +B<17. A comprehensive example: downloading 1K+ pages given by +three URL list files using `phantomjs save_page.js` (some page +contents are dynamically generated by Javascript, so `wget` does not +work). Here I set max jobs number (`-j`) as `20`, each job has a max +running time (`-t`) of `60` seconds and `3` retry changes +(`-r`). Continue flag `-c` is also switched on, so we can continue +unfinished jobs. Luckily, it's accomplished in one run :)> + + 17$ for f in $(seq 2014 2016); do \ + /bin/rm -rf $f; mkdir -p $f; \ + cat $f.html.txt | rush -v d=$f -d = \ + 'phantomjs save_page.js "{}" > {d}/{3}.html' \ + -j 20 -t 60 -r 3 -c; \ + done + +GNU B<parallel> can append to an existing joblog with '+': + + 17$ rm mylog + for f in $(seq 2014 2016); do + /bin/rm -rf $f; mkdir -p $f; + cat $f.html.txt | + parallel -j20 --timeout 60 --retries 4 --joblog +mylog \ + --colsep = \ + phantomjs save_page.js {1}={2}={3} '>' $f/{3}.html + done + +B<18. A bioinformatics example: mapping with `bwa`, and +processing result with `samtools`:> + + 18$ ref=ref/xxx.fa + threads=25 + ls -d raw.cluster.clean.mapping/* \ + | rush -v ref=$ref -v j=$threads -v p='{}/{%}' \ + 'bwa mem -t {j} -M -a {ref} {p}_1.fq.gz {p}_2.fq.gz >{p}.sam;\ + samtools view -bS {p}.sam > {p}.bam; \ + samtools sort -T {p}.tmp -@ {j} {p}.bam -o {p}.sorted.bam; \ + samtools index {p}.sorted.bam; \ + samtools flagstat {p}.sorted.bam > {p}.sorted.bam.flagstat; \ + /bin/rm {p}.bam {p}.sam;' \ + -j 2 --verbose -c -C mapping.rush + +GNU B<parallel> would use a function: + + 18$ ref=ref/xxx.fa + export ref + thr=25 + export thr + bwa_sam() { + p="$1" + bam="$p".bam + sam="$p".sam + sortbam="$p".sorted.bam + bwa mem -t $thr -M -a $ref ${p}_1.fq.gz ${p}_2.fq.gz > "$sam" + samtools view -bS "$sam" > "$bam" + samtools sort -T ${p}.tmp -@ $thr "$bam" -o "$sortbam" + samtools index "$sortbam" + samtools flagstat "$sortbam" > "$sortbam".flagstat + /bin/rm "$bam" "$sam" + } + export -f bwa_sam + ls -d raw.cluster.clean.mapping/* | + parallel -j 2 --verbose --joblog mylog bwa_sam + +=head3 Other B<rush> features + +B<rush> has: + +=over 4 + +=item * B<awk -v> like custom defined variables (B<-v>) + +With GNU B<parallel> you would simply set a shell variable: + + parallel 'v={}; echo "$v"' ::: foo + echo foo | rush -v v={} 'echo {v}' + +Also B<rush> does not like special chars. So these B<do not work>: + + echo does not work | rush -v v=\" 'echo {v}' + echo "My brother's 12\" records" | rush -v v={} 'echo {v}' + +Whereas the corresponding GNU B<parallel> version works: + + parallel 'v=\"; echo "$v"' ::: works + parallel 'v={}; echo "$v"' ::: "My brother's 12\" records" + +=item * Exit on first error(s) (-e) + +This is called B<--halt now,fail=1> (or shorter: B<--halt 2>) when +used with GNU B<parallel>. + +=item * Settable records sending to every command (B<-n>, default 1) + +This is also called B<-n> in GNU B<parallel>. + +=item * Practical replacement strings + +=over 4 + +=item {:} remove any extension + +With GNU B<parallel> this can be emulated by: + + parallel --plus echo '{/\..*/}' ::: foo.ext.bar.gz + +=item {^suffix}, remove suffix + +With GNU B<parallel> this can be emulated by: + + parallel --plus echo '{%.bar.gz}' ::: foo.ext.bar.gz + +=item {@regexp}, capture submatch using regular expression + +With GNU B<parallel> this can be emulated by: + + parallel --rpl '{@(.*?)} /$$1/ and $_=$1;' \ + echo '{@\d_(.*).gz}' ::: 1_foo.gz + +=item {%.}, {%:}, basename without extension + +With GNU B<parallel> this can be emulated by: + + parallel echo '{= s:.*/::;s/\..*// =}' ::: dir/foo.bar.gz + +And if you need it often, you define a B<--rpl> in +B<$HOME/.parallel/config>: + + --rpl '{%.} s:.*/::;s/\..*//' + --rpl '{%:} s:.*/::;s/\..*//' + +Then you can use them as: + + parallel echo {%.} {%:} ::: dir/foo.bar.gz + +=back + +=item * Preset variable (macro) + +E.g. + + echo foosuffix | rush -v p={^suffix} 'echo {p}_new_suffix' + +With GNU B<parallel> this can be emulated by: + + echo foosuffix | + parallel --plus 'p={%suffix}; echo ${p}_new_suffix' + +Opposite B<rush> GNU B<parallel> works fine if the input contains +double space, ' and ": + + echo "1'6\" foosuffix" | + parallel --plus 'p={%suffix}; echo "${p}"_new_suffix' + + +=item * Commands of multi-lines + +While you I<can> use multi-lined commands in GNU B<parallel>, to +improve readability GNU B<parallel> discourages the use of multi-line +commands. In most cases it can be written as a function: + + seq 1 3 | + parallel --timeout 2 --joblog my.log 'sleep {}; echo {}; \ + echo finish {}' + +Could be written as: + + doit() { + sleep "$1" + echo "$1" + echo finish "$1" + } + export -f doit + seq 1 3 | parallel --timeout 2 --joblog my.log doit + +The failed commands can be resumed with: + + seq 1 3 | + parallel --resume-failed --joblog my.log 'sleep {}; echo {};\ + echo finish {}' + +=back + +https://github.com/shenwei356/rush + + +=head2 DIFFERENCES BETWEEN ClusterSSH AND GNU Parallel + +ClusterSSH solves a different problem than GNU B<parallel>. + +ClusterSSH opens a terminal window for each computer and using a +master window you can run the same command on all the computers. This +is typically used for administrating several computers that are almost +identical. + +GNU B<parallel> runs the same (or different) commands with different +arguments in parallel possibly using remote computers to help +computing. If more than one computer is listed in B<-S> GNU B<parallel> may +only use one of these (e.g. if there are 8 jobs to be run and one +computer has 8 cores). + +GNU B<parallel> can be used as a poor-man's version of ClusterSSH: + +B<parallel --nonall -S server-a,server-b do_stuff foo bar> + +https://github.com/duncs/clusterssh + + +=head2 DIFFERENCES BETWEEN coshell AND GNU Parallel + +B<coshell> only accepts full commands on standard input. Any quoting +needs to be done by the user. + +Commands are run in B<sh> so any B<bash>/B<tcsh>/B<zsh> specific +syntax will not work. + +Output can be buffered by using B<-d>. Output is buffered in memory, +so big output can cause swapping and therefore be terrible slow or +even cause out of memory. + +https://github.com/gdm85/coshell (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN spread AND GNU Parallel + +B<spread> runs commands on all directories. + +It can be emulated with GNU B<parallel> using this Bash function: + + spread() { + _cmds() { + perl -e '$"=" && ";print "@ARGV"' "cd {}" "$@" + } + parallel $(_cmds "$@")'|| echo exit status $?' ::: */ + } + +This works except for the B<--exclude> option. + +(Last checked: 2017-11) + + +=head2 DIFFERENCES BETWEEN pyargs AND GNU Parallel + +B<pyargs> deals badly with input containing spaces. It buffers stdout, +but not stderr. It buffers in RAM. {} does not work as replacement +string. It does not support running functions. + +B<pyargs> does not support composed commands if run with B<--lines>, +and fails on B<pyargs traceroute gnu.org fsf.org>. + +=head3 Examples + + seq 5 | pyargs -P50 -L seq + seq 5 | parallel -P50 --lb seq + + seq 5 | pyargs -P50 --mark -L seq + seq 5 | parallel -P50 --lb \ + --tagstring OUTPUT'[{= $_=$job->replaced()=}]' seq + # Similar, but not precisely the same + seq 5 | parallel -P50 --lb --tag seq + + seq 5 | pyargs -P50 --mark command + # Somewhat longer with GNU Parallel due to the special + # --mark formatting + cmd="$(echo "command" | parallel --shellquote)" + wrap_cmd() { + echo "MARK $cmd $@================================" >&3 + echo "OUTPUT START[$cmd $@]:" + eval $cmd "$@" + echo "OUTPUT END[$cmd $@]" + } + (seq 5 | env_parallel -P2 wrap_cmd) 3>&1 + # Similar, but not exactly the same + seq 5 | parallel -t --tag command + + (echo '1 2 3';echo 4 5 6) | pyargs --stream seq + (echo '1 2 3';echo 4 5 6) | perl -pe 's/\n/ /' | + parallel -r -d' ' seq + # Similar, but not exactly the same + parallel seq ::: 1 2 3 4 5 6 + +https://github.com/robertblackwell/pyargs (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN concurrently AND GNU Parallel + +B<concurrently> runs jobs in parallel. + +The output is prepended with the job number, and may be incomplete: + + $ concurrently 'seq 100000' | (sleep 3;wc -l) + 7165 + +When pretty printing it caches output in memory. Output mixes by using +test MIX below whether or not output is cached. + +There seems to be no way of making a template command and have +B<concurrently> fill that with different args. The full commands must +be given on the command line. + +There is also no way of controlling how many jobs should be run in +parallel at a time - i.e. "number of jobslots". Instead all jobs are +simply started in parallel. + +https://github.com/kimmobrunfeldt/concurrently (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN map(soveran) AND GNU Parallel + +B<map> does not run jobs in parallel by default. The README suggests using: + + ... | map t 'sleep $t && say done &' + +But this fails if more jobs are run in parallel than the number of +available processes. Since there is no support for parallelization in +B<map> itself, the output also mixes: + + seq 10 | map i 'echo start-$i && sleep 0.$i && echo end-$i &' + +The major difference is that GNU B<parallel> is built for parallelization +and B<map> is not. So GNU B<parallel> has lots of ways of dealing with the +issues that parallelization raises: + +=over 4 + +=item * + +Keep the number of processes manageable + +=item * + +Make sure output does not mix + +=item * + +Make Ctrl-C kill all running processes + +=back + +=head3 EXAMPLES FROM maps WEBSITE + +Here are the 5 examples converted to GNU Parallel: + + 1$ ls *.c | map f 'foo $f' + 1$ ls *.c | parallel foo + + 2$ ls *.c | map f 'foo $f; bar $f' + 2$ ls *.c | parallel 'foo {}; bar {}' + + 3$ cat urls | map u 'curl -O $u' + 3$ cat urls | parallel curl -O + + 4$ printf "1\n1\n1\n" | map t 'sleep $t && say done' + 4$ printf "1\n1\n1\n" | parallel 'sleep {} && say done' + 4$ parallel 'sleep {} && say done' ::: 1 1 1 + + 5$ printf "1\n1\n1\n" | map t 'sleep $t && say done &' + 5$ printf "1\n1\n1\n" | parallel -j0 'sleep {} && say done' + 5$ parallel -j0 'sleep {} && say done' ::: 1 1 1 + +https://github.com/soveran/map (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN loop AND GNU Parallel + +B<loop> mixes stdout and stderr: + + loop 'ls /no-such-file' >/dev/null + +B<loop>'s replacement string B<$ITEM> does not quote strings: + + echo 'two spaces' | loop 'echo $ITEM' + +B<loop> cannot run functions: + + myfunc() { echo joe; } + export -f myfunc + loop 'myfunc this fails' + +=head3 EXAMPLES FROM loop's WEBSITE + +Some of the examples from https://github.com/Miserlou/Loop/ can be +emulated with GNU B<parallel>: + + # A couple of functions will make the code easier to read + $ loopy() { + yes | parallel -uN0 -j1 "$@" + } + $ export -f loopy + $ time_out() { + parallel -uN0 -q --timeout "$@" ::: 1 + } + $ match() { + perl -0777 -ne 'grep /'"$1"'/,$_ and print or exit 1' + } + $ export -f match + + $ loop 'ls' --every 10s + $ loopy --delay 10s ls + + $ loop 'touch $COUNT.txt' --count-by 5 + $ loopy touch '{= $_=seq()*5 =}'.txt + + $ loop --until-contains 200 -- \ + ./get_response_code.sh --site mysite.biz` + $ loopy --halt now,success=1 \ + './get_response_code.sh --site mysite.biz | match 200' + + $ loop './poke_server' --for-duration 8h + $ time_out 8h loopy ./poke_server + + $ loop './poke_server' --until-success + $ loopy --halt now,success=1 ./poke_server + + $ cat files_to_create.txt | loop 'touch $ITEM' + $ cat files_to_create.txt | parallel touch {} + + $ loop 'ls' --for-duration 10min --summary + # --joblog is somewhat more verbose than --summary + $ time_out 10m loopy --joblog my.log ./poke_server; cat my.log + + $ loop 'echo hello' + $ loopy echo hello + + $ loop 'echo $COUNT' + # GNU Parallel counts from 1 + $ loopy echo {#} + # Counting from 0 can be forced + $ loopy echo '{= $_=seq()-1 =}' + + $ loop 'echo $COUNT' --count-by 2 + $ loopy echo '{= $_=2*(seq()-1) =}' + + $ loop 'echo $COUNT' --count-by 2 --offset 10 + $ loopy echo '{= $_=10+2*(seq()-1) =}' + + $ loop 'echo $COUNT' --count-by 1.1 + # GNU Parallel rounds 3.3000000000000003 to 3.3 + $ loopy echo '{= $_=1.1*(seq()-1) =}' + + $ loop 'echo $COUNT $ACTUALCOUNT' --count-by 2 + $ loopy echo '{= $_=2*(seq()-1) =} {#}' + + $ loop 'echo $COUNT' --num 3 --summary + # --joblog is somewhat more verbose than --summary + $ seq 3 | parallel --joblog my.log echo; cat my.log + + $ loop 'ls -foobarbatz' --num 3 --summary + # --joblog is somewhat more verbose than --summary + $ seq 3 | parallel --joblog my.log -N0 ls -foobarbatz; cat my.log + + $ loop 'echo $COUNT' --count-by 2 --num 50 --only-last + # Can be emulated by running 2 jobs + $ seq 49 | parallel echo '{= $_=2*(seq()-1) =}' >/dev/null + $ echo 50| parallel echo '{= $_=2*(seq()-1) =}' + + $ loop 'date' --every 5s + $ loopy --delay 5s date + + $ loop 'date' --for-duration 8s --every 2s + $ time_out 8s loopy --delay 2s date + + $ loop 'date -u' --until-time '2018-05-25 20:50:00' --every 5s + $ seconds=$((`date -d 2019-05-25T20:50:00 +%s` - `date +%s`))s + $ time_out $seconds loopy --delay 5s date -u + + $ loop 'echo $RANDOM' --until-contains "666" + $ loopy --halt now,success=1 'echo $RANDOM | match 666' + + $ loop 'if (( RANDOM % 2 )); then + (echo "TRUE"; true); + else + (echo "FALSE"; false); + fi' --until-success + $ loopy --halt now,success=1 'if (( $RANDOM % 2 )); then + (echo "TRUE"; true); + else + (echo "FALSE"; false); + fi' + + $ loop 'if (( RANDOM % 2 )); then + (echo "TRUE"; true); + else + (echo "FALSE"; false); + fi' --until-error + $ loopy --halt now,fail=1 'if (( $RANDOM % 2 )); then + (echo "TRUE"; true); + else + (echo "FALSE"; false); + fi' + + $ loop 'date' --until-match "(\d{4})" + $ loopy --halt now,success=1 'date | match [0-9][0-9][0-9][0-9]' + + $ loop 'echo $ITEM' --for red,green,blue + $ parallel echo ::: red green blue + + $ cat /tmp/my-list-of-files-to-create.txt | loop 'touch $ITEM' + $ cat /tmp/my-list-of-files-to-create.txt | parallel touch + + $ ls | loop 'cp $ITEM $ITEM.bak'; ls + $ ls | parallel cp {} {}.bak; ls + + $ loop 'echo $ITEM | tr a-z A-Z' -i + $ parallel 'echo {} | tr a-z A-Z' + # Or more efficiently: + $ parallel --pipe tr a-z A-Z + + $ loop 'echo $ITEM' --for "`ls`" + $ parallel echo {} ::: "`ls`" + + $ ls | loop './my_program $ITEM' --until-success; + $ ls | parallel --halt now,success=1 ./my_program {} + + $ ls | loop './my_program $ITEM' --until-fail; + $ ls | parallel --halt now,fail=1 ./my_program {} + + $ ./deploy.sh; + loop 'curl -sw "%{http_code}" http://coolwebsite.biz' \ + --every 5s --until-contains 200; + ./announce_to_slack.sh + $ ./deploy.sh; + loopy --delay 5s --halt now,success=1 \ + 'curl -sw "%{http_code}" http://coolwebsite.biz | match 200'; + ./announce_to_slack.sh + + $ loop "ping -c 1 mysite.com" --until-success; ./do_next_thing + $ loopy --halt now,success=1 ping -c 1 mysite.com; ./do_next_thing + + $ ./create_big_file -o my_big_file.bin; + loop 'ls' --until-contains 'my_big_file.bin'; + ./upload_big_file my_big_file.bin + # inotifywait is a better tool to detect file system changes. + # It can even make sure the file is complete + # so you are not uploading an incomplete file + $ inotifywait -qmre MOVED_TO -e CLOSE_WRITE --format %w%f . | + grep my_big_file.bin + + $ ls | loop 'cp $ITEM $ITEM.bak' + $ ls | parallel cp {} {}.bak + + $ loop './do_thing.sh' --every 15s --until-success --num 5 + $ parallel --retries 5 --delay 15s ::: ./do_thing.sh + +https://github.com/Miserlou/Loop/ (Last checked: 2018-10) + + +=head2 DIFFERENCES BETWEEN lorikeet AND GNU Parallel + +B<lorikeet> can run jobs in parallel. It does this based on a +dependency graph described in a file, so this is similar to B<make>. + +https://github.com/cetra3/lorikeet (Last checked: 2018-10) + + +=head2 DIFFERENCES BETWEEN spp AND GNU Parallel + +B<spp> can run jobs in parallel. B<spp> does not use a command +template to generate the jobs, but requires jobs to be in a +file. Output from the jobs mix. + +https://github.com/john01dav/spp (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN paral AND GNU Parallel + +B<paral> prints a lot of status information and stores the output from +the commands run into files. This means it cannot be used the middle +of a pipe like this + + paral "echo this" "echo does not" "echo work" | wc + +Instead it puts the output into files named like +B<out_#_I<command>.out.log>. To get a very similar behaviour with GNU +B<parallel> use B<--results +'out_{#}_{=s/[^\sa-z_0-9]//g;s/\s+/_/g=}.log' --eta> + +B<paral> only takes arguments on the command line and each argument +should be a full command. Thus it does not use command templates. + +This limits how many jobs it can run in total, because they all need +to fit on a single command line. + +B<paral> has no support for running jobs remotely. + +=head3 EXAMPLES FROM README.markdown + +The examples from B<README.markdown> and the corresponding command run +with GNU B<parallel> (B<--results +'out_{#}_{=s/[^\sa-z_0-9]//g;s/\s+/_/g=}.log' --eta> is omitted from +the GNU B<parallel> command): + + 1$ paral "command 1" "command 2 --flag" "command arg1 arg2" + 1$ parallel ::: "command 1" "command 2 --flag" "command arg1 arg2" + + 2$ paral "sleep 1 && echo c1" "sleep 2 && echo c2" \ + "sleep 3 && echo c3" "sleep 4 && echo c4" "sleep 5 && echo c5" + 2$ parallel ::: "sleep 1 && echo c1" "sleep 2 && echo c2" \ + "sleep 3 && echo c3" "sleep 4 && echo c4" "sleep 5 && echo c5" + # Or shorter: + parallel "sleep {} && echo c{}" ::: {1..5} + + 3$ paral -n=0 "sleep 5 && echo c5" "sleep 4 && echo c4" \ + "sleep 3 && echo c3" "sleep 2 && echo c2" "sleep 1 && echo c1" + 3$ parallel ::: "sleep 5 && echo c5" "sleep 4 && echo c4" \ + "sleep 3 && echo c3" "sleep 2 && echo c2" "sleep 1 && echo c1" + # Or shorter: + parallel -j0 "sleep {} && echo c{}" ::: 5 4 3 2 1 + + 4$ paral -n=1 "sleep 5 && echo c5" "sleep 4 && echo c4" \ + "sleep 3 && echo c3" "sleep 2 && echo c2" "sleep 1 && echo c1" + 4$ parallel -j1 "sleep {} && echo c{}" ::: 5 4 3 2 1 + + 5$ paral -n=2 "sleep 5 && echo c5" "sleep 4 && echo c4" \ + "sleep 3 && echo c3" "sleep 2 && echo c2" "sleep 1 && echo c1" + 5$ parallel -j2 "sleep {} && echo c{}" ::: 5 4 3 2 1 + + 6$ paral -n=5 "sleep 5 && echo c5" "sleep 4 && echo c4" \ + "sleep 3 && echo c3" "sleep 2 && echo c2" "sleep 1 && echo c1" + 6$ parallel -j5 "sleep {} && echo c{}" ::: 5 4 3 2 1 + + 7$ paral -n=1 "echo a && sleep 0.5 && echo b && sleep 0.5 && \ + echo c && sleep 0.5 && echo d && sleep 0.5 && \ + echo e && sleep 0.5 && echo f && sleep 0.5 && \ + echo g && sleep 0.5 && echo h" + 7$ parallel ::: "echo a && sleep 0.5 && echo b && sleep 0.5 && \ + echo c && sleep 0.5 && echo d && sleep 0.5 && \ + echo e && sleep 0.5 && echo f && sleep 0.5 && \ + echo g && sleep 0.5 && echo h" + +https://github.com/amattn/paral (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN concurr AND GNU Parallel + +B<concurr> is built to run jobs in parallel using a client/server +model. + +=head3 EXAMPLES FROM README.md + +The examples from B<README.md>: + + 1$ concurr 'echo job {#} on slot {%}: {}' : arg1 arg2 arg3 arg4 + 1$ parallel 'echo job {#} on slot {%}: {}' ::: arg1 arg2 arg3 arg4 + + 2$ concurr 'echo job {#} on slot {%}: {}' :: file1 file2 file3 + 2$ parallel 'echo job {#} on slot {%}: {}' :::: file1 file2 file3 + + 3$ concurr 'echo {}' < input_file + 3$ parallel 'echo {}' < input_file + + 4$ cat file | concurr 'echo {}' + 4$ cat file | parallel 'echo {}' + +B<concurr> deals badly empty input files and with output larger than +64 KB. + +https://github.com/mmstick/concurr (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN lesser-parallel AND GNU Parallel + +B<lesser-parallel> is the inspiration for B<parallel --embed>. Both +B<lesser-parallel> and B<parallel --embed> define bash functions that +can be included as part of a bash script to run jobs in parallel. + +B<lesser-parallel> implements a few of the replacement strings, but +hardly any options, whereas B<parallel --embed> gives you the full +GNU B<parallel> experience. + +https://github.com/kou1okada/lesser-parallel (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN npm-parallel AND GNU Parallel + +B<npm-parallel> can run npm tasks in parallel. + +There are no examples and very little documentation, so it is hard to +compare to GNU B<parallel>. + +https://github.com/spion/npm-parallel (Last checked: 2019-01) + + +=head2 DIFFERENCES BETWEEN machma AND GNU Parallel + +B<machma> runs tasks in parallel. It gives time stamped +output. It buffers in RAM. + +=head3 EXAMPLES FROM README.md + +The examples from README.md: + + 1$ # Put shorthand for timestamp in config for the examples + echo '--rpl '\ + \''{time} $_=::strftime("%Y-%m-%d %H:%M:%S",localtime())'\' \ + > ~/.parallel/machma + echo '--line-buffer --tagstring "{#} {time} {}"' \ + >> ~/.parallel/machma + + 2$ find . -iname '*.jpg' | + machma -- mogrify -resize 1200x1200 -filter Lanczos {} + find . -iname '*.jpg' | + parallel --bar -Jmachma mogrify -resize 1200x1200 \ + -filter Lanczos {} + + 3$ cat /tmp/ips | machma -p 2 -- ping -c 2 -q {} + 3$ cat /tmp/ips | parallel -j2 -Jmachma ping -c 2 -q {} + + 4$ cat /tmp/ips | + machma -- sh -c 'ping -c 2 -q $0 > /dev/null && echo alive' {} + 4$ cat /tmp/ips | + parallel -Jmachma 'ping -c 2 -q {} > /dev/null && echo alive' + + 5$ find . -iname '*.jpg' | + machma --timeout 5s -- mogrify -resize 1200x1200 \ + -filter Lanczos {} + 5$ find . -iname '*.jpg' | + parallel --timeout 5s --bar mogrify -resize 1200x1200 \ + -filter Lanczos {} + + 6$ find . -iname '*.jpg' -print0 | + machma --null -- mogrify -resize 1200x1200 -filter Lanczos {} + 6$ find . -iname '*.jpg' -print0 | + parallel --null --bar mogrify -resize 1200x1200 \ + -filter Lanczos {} + +https://github.com/fd0/machma (Last checked: 2019-06) + + +=head2 DIFFERENCES BETWEEN interlace AND GNU Parallel + +Summary (see legend above): + +=over + +=item - I2 I3 I4 - - - + +=item M1 - M3 - - M6 + +=item - O2 O3 - - - - x x + +=item E1 E2 - - - - - + +=item - - - - - - - - - + +=item - - + +=back + +B<interlace> is built for network analysis to run network tools in parallel. + +B<interface> does not buffer output, so output from different jobs mixes. + +The overhead for each target is O(n*n), so with 1000 targets it +becomes very slow with an overhead in the order of 500ms/target. + +=head3 EXAMPLES FROM interlace's WEBSITE + +Using B<prips> most of the examples from +https://github.com/codingo/Interlace can be run with GNU B<parallel>: + +Blocker + + commands.txt: + mkdir -p _output_/_target_/scans/ + _blocker_ + nmap _target_ -oA _output_/_target_/scans/_target_-nmap + interlace -tL ./targets.txt -cL commands.txt -o $output + + parallel -a targets.txt \ + mkdir -p $output/{}/scans/\; nmap {} -oA $output/{}/scans/{}-nmap + +Blocks + + commands.txt: + _block:nmap_ + mkdir -p _target_/output/scans/ + nmap _target_ -oN _target_/output/scans/_target_-nmap + _block:nmap_ + nikto --host _target_ + interlace -tL ./targets.txt -cL commands.txt + + _nmap() { + mkdir -p $1/output/scans/ + nmap $1 -oN $1/output/scans/$1-nmap + } + export -f _nmap + parallel ::: _nmap "nikto --host" :::: targets.txt + +Run Nikto Over Multiple Sites + + interlace -tL ./targets.txt -threads 5 \ + -c "nikto --host _target_ > ./_target_-nikto.txt" -v + + parallel -a targets.txt -P5 nikto --host {} \> ./{}_-nikto.txt + +Run Nikto Over Multiple Sites and Ports + + interlace -tL ./targets.txt -threads 5 -c \ + "nikto --host _target_:_port_ > ./_target_-_port_-nikto.txt" \ + -p 80,443 -v + + parallel -P5 nikto --host {1}:{2} \> ./{1}-{2}-nikto.txt \ + :::: targets.txt ::: 80 443 + +Run a List of Commands against Target Hosts + + commands.txt: + nikto --host _target_:_port_ > _output_/_target_-nikto.txt + sslscan _target_:_port_ > _output_/_target_-sslscan.txt + testssl.sh _target_:_port_ > _output_/_target_-testssl.txt + interlace -t example.com -o ~/Engagements/example/ \ + -cL ./commands.txt -p 80,443 + + parallel --results ~/Engagements/example/{2}:{3}{1} {1} {2}:{3} \ + ::: "nikto --host" sslscan testssl.sh ::: example.com ::: 80 443 + +CIDR notation with an application that doesn't support it + + interlace -t 192.168.12.0/24 -c "vhostscan _target_ \ + -oN _output_/_target_-vhosts.txt" -o ~/scans/ -threads 50 + + prips 192.168.12.0/24 | + parallel -P50 vhostscan {} -oN ~/scans/{}-vhosts.txt + +Glob notation with an application that doesn't support it + + interlace -t 192.168.12.* -c "vhostscan _target_ \ + -oN _output_/_target_-vhosts.txt" -o ~/scans/ -threads 50 + + # Glob is not supported in prips + prips 192.168.12.0/24 | + parallel -P50 vhostscan {} -oN ~/scans/{}-vhosts.txt + +Dash (-) notation with an application that doesn't support it + + interlace -t 192.168.12.1-15 -c \ + "vhostscan _target_ -oN _output_/_target_-vhosts.txt" \ + -o ~/scans/ -threads 50 + + # Dash notation is not supported in prips + prips 192.168.12.1 192.168.12.15 | + parallel -P50 vhostscan {} -oN ~/scans/{}-vhosts.txt + +Threading Support for an application that doesn't support it + + interlace -tL ./target-list.txt -c \ + "vhostscan -t _target_ -oN _output_/_target_-vhosts.txt" \ + -o ~/scans/ -threads 50 + + cat ./target-list.txt | + parallel -P50 vhostscan -t {} -oN ~/scans/{}-vhosts.txt + +alternatively + + ./vhosts-commands.txt: + vhostscan -t $target -oN _output_/_target_-vhosts.txt + interlace -cL ./vhosts-commands.txt -tL ./target-list.txt \ + -threads 50 -o ~/scans + + ./vhosts-commands.txt: + vhostscan -t "$1" -oN "$2" + parallel -P50 ./vhosts-commands.txt {} ~/scans/{}-vhosts.txt \ + :::: ./target-list.txt + +Exclusions + + interlace -t 192.168.12.0/24 -e 192.168.12.0/26 -c \ + "vhostscan _target_ -oN _output_/_target_-vhosts.txt" \ + -o ~/scans/ -threads 50 + + prips 192.168.12.0/24 | grep -xv -Ff <(prips 192.168.12.0/26) | + parallel -P50 vhostscan {} -oN ~/scans/{}-vhosts.txt + +Run Nikto Using Multiple Proxies + + interlace -tL ./targets.txt -pL ./proxies.txt -threads 5 -c \ + "nikto --host _target_:_port_ -useproxy _proxy_ > \ + ./_target_-_port_-nikto.txt" -p 80,443 -v + + parallel -j5 \ + "nikto --host {1}:{2} -useproxy {3} > ./{1}-{2}-nikto.txt" \ + :::: ./targets.txt ::: 80 443 :::: ./proxies.txt + +https://github.com/codingo/Interlace (Last checked: 2019-09) + + +=head2 DIFFERENCES BETWEEN otonvm Parallel AND GNU Parallel + +I have been unable to get the code to run at all. It seems unfinished. + +https://github.com/otonvm/Parallel (Last checked: 2019-02) + + +=head2 DIFFERENCES BETWEEN k-bx par AND GNU Parallel + +B<par> requires Haskell to work. This limits the number of platforms +this can work on. + +B<par> does line buffering in memory. The memory usage is 3x the +longest line (compared to 1x for B<parallel --lb>). Commands must be +given as arguments. There is no template. + +These are the examples from https://github.com/k-bx/par with the +corresponding GNU B<parallel> command. + + par "echo foo; sleep 1; echo foo; sleep 1; echo foo" \ + "echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success" + parallel --lb ::: "echo foo; sleep 1; echo foo; sleep 1; echo foo" \ + "echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success" + + par "echo foo; sleep 1; foofoo" \ + "echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success" + parallel --lb --halt 1 ::: "echo foo; sleep 1; foofoo" \ + "echo bar; sleep 1; echo bar; sleep 1; echo bar" && echo "success" + + par "PARPREFIX=[fooechoer] echo foo" "PARPREFIX=[bar] echo bar" + parallel --lb --colsep , --tagstring {1} {2} \ + ::: "[fooechoer],echo foo" "[bar],echo bar" + + par --succeed "foo" "bar" && echo 'wow' + parallel "foo" "bar"; true && echo 'wow' + +https://github.com/k-bx/par (Last checked: 2019-02) + +=head2 DIFFERENCES BETWEEN parallelshell AND GNU Parallel + +B<parallelshell> does not allow for composed commands: + + # This does not work + parallelshell 'echo foo;echo bar' 'echo baz;echo quuz' + +Instead you have to wrap that in a shell: + + parallelshell 'sh -c "echo foo;echo bar"' 'sh -c "echo baz;echo quuz"' + +It buffers output in RAM. All commands must be given on the command +line and all commands are started in parallel at the same time. This +will cause the system to freeze if there are so many jobs that there +is not enough memory to run them all at the same time. + +https://github.com/keithamus/parallelshell (Last checked: 2019-02) + +https://github.com/darkguy2008/parallelshell (Last checked: 2019-03) + + +=head2 DIFFERENCES BETWEEN shell-executor AND GNU Parallel + +B<shell-executor> does not allow for composed commands: + + # This does not work + sx 'echo foo;echo bar' 'echo baz;echo quuz' + +Instead you have to wrap that in a shell: + + sx 'sh -c "echo foo;echo bar"' 'sh -c "echo baz;echo quuz"' + +It buffers output in RAM. All commands must be given on the command +line and all commands are started in parallel at the same time. This +will cause the system to freeze if there are so many jobs that there +is not enough memory to run them all at the same time. + +https://github.com/royriojas/shell-executor (Last checked: 2019-02) + + +=head2 DIFFERENCES BETWEEN non-GNU par AND GNU Parallel + +B<par> buffers in memory to avoid mixing of jobs. It takes 1s per 1 +million output lines. + +B<par> needs to have all commands before starting the first job. The +jobs are read from stdin (standard input) so any quoting will have to +be done by the user. + +Stdout (standard output) is prepended with o:. Stderr (standard error) +is sendt to stdout (standard output) and prepended with e:. + +For short jobs with little output B<par> is 20% faster than GNU +B<parallel> and 60% slower than B<xargs>. + +https://github.com/UnixJunkie/PAR + +https://savannah.nongnu.org/projects/par (Last checked: 2019-02) + + +=head2 DIFFERENCES BETWEEN fd AND GNU Parallel + +B<fd> does not support composed commands, so commands must be wrapped +in B<sh -c>. + +It buffers output in RAM. + +It only takes file names from the filesystem as input (similar to B<find>). + +https://github.com/sharkdp/fd (Last checked: 2019-02) + + +=head2 DIFFERENCES BETWEEN lateral AND GNU Parallel + +B<lateral> is very similar to B<sem>: It takes a single command and +runs it in the background. The design means that output from parallel +running jobs may mix. If it dies unexpectly it leaves a socket in +~/.lateral/socket.PID. + +B<lateral> deals badly with too long command lines. This makes the +B<lateral> server crash: + + lateral run echo `seq 100000| head -c 1000k` + +Any options will be read by B<lateral> so this does not work +(B<lateral> interprets the B<-l>): + + lateral run ls -l + +Composed commands do not work: + + lateral run pwd ';' ls + +Functions do not work: + + myfunc() { echo a; } + export -f myfunc + lateral run myfunc + +Running B<emacs> in the terminal causes the parent shell to die: + + echo '#!/bin/bash' > mycmd + echo emacs -nw >> mycmd + chmod +x mycmd + lateral start + lateral run ./mycmd + +Here are the examples from https://github.com/akramer/lateral with the +corresponding GNU B<sem> and GNU B<parallel> commands: + + 1$ lateral start + for i in $(cat /tmp/names); do + lateral run -- some_command $i + done + lateral wait + + 1$ for i in $(cat /tmp/names); do + sem some_command $i + done + sem --wait + + 1$ parallel some_command :::: /tmp/names + + 2$ lateral start + for i in $(seq 1 100); do + lateral run -- my_slow_command < workfile$i > /tmp/logfile$i + done + lateral wait + + 2$ for i in $(seq 1 100); do + sem my_slow_command < workfile$i > /tmp/logfile$i + done + sem --wait + + 2$ parallel 'my_slow_command < workfile{} > /tmp/logfile{}' \ + ::: {1..100} + + 3$ lateral start -p 0 # yup, it will just queue tasks + for i in $(seq 1 100); do + lateral run -- command_still_outputs_but_wont_spam inputfile$i + done + # command output spam can commence + lateral config -p 10; lateral wait + + 3$ for i in $(seq 1 100); do + echo "command inputfile$i" >> joblist + done + parallel -j 10 :::: joblist + + 3$ echo 1 > /tmp/njobs + parallel -j /tmp/njobs command inputfile{} \ + ::: {1..100} & + echo 10 >/tmp/njobs + wait + +https://github.com/akramer/lateral (Last checked: 2019-03) + + +=head2 DIFFERENCES BETWEEN with-this AND GNU Parallel + +The examples from https://github.com/amritb/with-this.git and the +corresponding GNU B<parallel> command: + + with -v "$(cat myurls.txt)" "curl -L this" + parallel curl -L ::: myurls.txt + + with -v "$(cat myregions.txt)" \ + "aws --region=this ec2 describe-instance-status" + parallel aws --region={} ec2 describe-instance-status \ + :::: myregions.txt + + with -v "$(ls)" "kubectl --kubeconfig=this get pods" + ls | parallel kubectl --kubeconfig={} get pods + + with -v "$(ls | grep config)" "kubectl --kubeconfig=this get pods" + ls | grep config | parallel kubectl --kubeconfig={} get pods + + with -v "$(echo {1..10})" "echo 123" + parallel -N0 echo 123 ::: {1..10} + +Stderr is merged with stdout. B<with-this> buffers in RAM. It uses 3x +the output size, so you cannot have output larger than 1/3rd the +amount of RAM. The input values cannot contain spaces. Composed +commands do not work. + +B<with-this> gives some additional information, so the output has to +be cleaned before piping it to the next command. + +https://github.com/amritb/with-this.git (Last checked: 2019-03) + + +=head2 DIFFERENCES BETWEEN Tollef's parallel (moreutils) AND GNU Parallel + +Summary (see legend above): + +=over + +=item - - - I4 - - I7 + +=item - - M3 - - M6 + +=item - O2 O3 - O5 O6 - x x + +=item E1 - - - - - E7 + +=item - x x x x x x x x + +=item - - + +=back + +=head3 EXAMPLES FROM Tollef's parallel MANUAL + +B<Tollef> parallel sh -c "echo hi; sleep 2; echo bye" -- 1 2 3 + +B<GNU> parallel "echo hi; sleep 2; echo bye" ::: 1 2 3 + +B<Tollef> parallel -j 3 ufraw -o processed -- *.NEF + +B<GNU> parallel -j 3 ufraw -o processed ::: *.NEF + +B<Tollef> parallel -j 3 -- ls df "echo hi" + +B<GNU> parallel -j 3 ::: ls df "echo hi" + +(Last checked: 2019-08) + +=head2 DIFFERENCES BETWEEN rargs AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 - - - - - I7 + +=item - - M3 M4 - - + +=item - O2 O3 - O5 O6 - O8 - + +=item E1 - - E4 - - - + +=item - - - - - - - - - + +=item - - + +=back + +B<rargs> has elegant ways of doing named regexp capture and field ranges. + +With GNU B<parallel> you can use B<--rpl> to get a similar +functionality as regexp capture gives, and use B<join> and B<@arg> to +get the field ranges. But the syntax is longer. This: + + --rpl '{r(\d+)\.\.(\d+)} $_=join"$opt::colsep",@arg[$$1..$$2]' + +would make it possible to use: + + {1r3..6} + +for field 3..6. + +For full support of {n..m:s} including negative numbers use a dynamic +replacement string like this: + + + PARALLEL=--rpl\ \''{r((-?\d+)?)\.\.((-?\d+)?)((:([^}]*))?)} + $a = defined $$2 ? $$2 < 0 ? 1+$#arg+$$2 : $$2 : 1; + $b = defined $$4 ? $$4 < 0 ? 1+$#arg+$$4 : $$4 : $#arg+1; + $s = defined $$6 ? $$7 : " "; + $_ = join $s,@arg[$a..$b]'\' + export PARALLEL + +You can then do: + + head /etc/passwd | parallel --colsep : echo ..={1r..} ..3={1r..3} \ + 4..={1r4..} 2..4={1r2..4} 3..3={1r3..3} ..3:-={1r..3:-} \ + ..3:/={1r..3:/} -1={-1} -5={-5} -6={-6} -3..={1r-3..} + +=head3 EXAMPLES FROM rargs MANUAL + + ls *.bak | rargs -p '(.*)\.bak' mv {0} {1} + ls *.bak | parallel mv {} {.} + + cat download-list.csv | rargs -p '(?P<url>.*),(?P<filename>.*)' wget {url} -O {filename} + cat download-list.csv | parallel --csv wget {1} -O {2} + # or use regexps: + cat download-list.csv | + parallel --rpl '{url} s/,.*//' --rpl '{filename} s/.*?,//' wget {url} -O {filename} + + cat /etc/passwd | rargs -d: echo -e 'id: "{1}"\t name: "{5}"\t rest: "{6..::}"' + cat /etc/passwd | + parallel -q --colsep : echo -e 'id: "{1}"\t name: "{5}"\t rest: "{=6 $_=join":",@arg[6..$#arg]=}"' + +https://github.com/lotabout/rargs (Last checked: 2020-01) + + +=head2 DIFFERENCES BETWEEN threader AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 - - - - - - + +=item M1 - M3 - - M6 + +=item O1 - O3 - O5 - - N/A N/A + +=item E1 - - E4 - - - + +=item - - - - - - - - - + +=item - - + +=back + +Newline separates arguments, but newline at the end of file is treated +as an empty argument. So this runs 2 jobs: + + echo two_jobs | threader -run 'echo "$THREADID"' + +B<threader> ignores stderr, so any output to stderr is +lost. B<threader> buffers in RAM, so output bigger than the machine's +virtual memory will cause the machine to crash. + +https://github.com/voodooEntity/threader (Last checked: 2020-04) + + +=head2 DIFFERENCES BETWEEN runp AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 I2 - - - - - + +=item M1 - (M3) - - M6 + +=item O1 O2 O3 - O5 O6 - N/A N/A - + +=item E1 - - - - - - + +=item - - - - - - - - - + +=item - - + +=back + +(M3): You can add a prefix and a postfix to the input, so it means you can +only insert the argument on the command line once. + +B<runp> runs 10 jobs in parallel by default. B<runp> blocks if output +of a command is > 64 Kbytes. Quoting of input is needed. It adds +output to stderr (this can be prevented with -q) + +=head3 Examples as GNU Parallel + + base='https://images-api.nasa.gov/search' + query='jupiter' + desc='planet' + type='image' + url="$base?q=$query&description=$desc&media_type=$type" + + # Download the images in parallel using runp + curl -s $url | jq -r .collection.items[].href | \ + runp -p 'curl -s' | jq -r .[] | grep large | \ + runp -p 'curl -s -L -O' + + time curl -s $url | jq -r .collection.items[].href | \ + runp -g 1 -q -p 'curl -s' | jq -r .[] | grep large | \ + runp -g 1 -q -p 'curl -s -L -O' + + # Download the images in parallel + curl -s $url | jq -r .collection.items[].href | \ + parallel curl -s | jq -r .[] | grep large | \ + parallel curl -s -L -O + + time curl -s $url | jq -r .collection.items[].href | \ + parallel -j 1 curl -s | jq -r .[] | grep large | \ + parallel -j 1 curl -s -L -O + + +=head4 Run some test commands (read from file) + + # Create a file containing commands to run in parallel. + cat << EOF > /tmp/test-commands.txt + sleep 5 + sleep 3 + blah # this will fail + ls $PWD # PWD shell variable is used here + EOF + + # Run commands from the file. + runp /tmp/test-commands.txt > /dev/null + + parallel -a /tmp/test-commands.txt > /dev/null + +=head4 Ping several hosts and see packet loss (read from stdin) + + # First copy this line and press Enter + runp -p 'ping -c 5 -W 2' -s '| grep loss' + localhost + 1.1.1.1 + 8.8.8.8 + # Press Enter and Ctrl-D when done entering the hosts + + # First copy this line and press Enter + parallel ping -c 5 -W 2 {} '| grep loss' + localhost + 1.1.1.1 + 8.8.8.8 + # Press Enter and Ctrl-D when done entering the hosts + +=head4 Get directories' sizes (read from stdin) + + echo -e "$HOME\n/etc\n/tmp" | runp -q -p 'sudo du -sh' + + echo -e "$HOME\n/etc\n/tmp" | parallel sudo du -sh + # or: + parallel sudo du -sh ::: "$HOME" /etc /tmp + +=head4 Compress files + + find . -iname '*.txt' | runp -p 'gzip --best' + + find . -iname '*.txt' | parallel gzip --best + +=head4 Measure HTTP request + response time + + export CURL="curl -w 'time_total: %{time_total}\n'" + CURL="$CURL -o /dev/null -s https://golang.org/" + perl -wE 'for (1..10) { say $ENV{CURL} }' | + runp -q # Make 10 requests + + perl -wE 'for (1..10) { say $ENV{CURL} }' | parallel + # or: + parallel -N0 "$CURL" ::: {1..10} + +=head4 Find open TCP ports + + cat << EOF > /tmp/host-port.txt + localhost 22 + localhost 80 + localhost 81 + 127.0.0.1 443 + 127.0.0.1 444 + scanme.nmap.org 22 + scanme.nmap.org 23 + scanme.nmap.org 443 + EOF + + 1$ cat /tmp/host-port.txt | + runp -q -p 'netcat -v -w2 -z' 2>&1 | egrep '(succeeded!|open)$' + + # --colsep is needed to split the line + 1$ cat /tmp/host-port.txt | + parallel --colsep ' ' netcat -v -w2 -z 2>&1 | + egrep '(succeeded!|open)$' + # or use uq for unquoted: + 1$ cat /tmp/host-port.txt | + parallel netcat -v -w2 -z {=uq=} 2>&1 | + egrep '(succeeded!|open)$' + +https://github.com/jreisinger/runp (Last checked: 2020-04) + + +=head2 DIFFERENCES BETWEEN papply AND GNU Parallel + +Summary (see legend above): + +=over + +=item - - - I4 - - - + +=item M1 - M3 - - M6 + +=item - - O3 - O5 - - N/A N/A O10 + +=item E1 - - E4 - - - + +=item - - - - - - - - - + +=item - - + +=back + +B<papply> does not print the output if the command fails: + + $ papply 'echo %F; false' foo + "echo foo; false" did not succeed + +B<papply>'s replacement strings (%F %d %f %n %e %z) can be simulated in GNU +B<parallel> by putting this in B<~/.parallel/config>: + + --rpl '%F' + --rpl '%d $_=Q(::dirname($_));' + --rpl '%f s:.*/::;' + --rpl '%n s:.*/::;s:\.[^/.]+$::;' + --rpl '%e s:.*\.:.:' + --rpl '%z $_=""' + +B<papply> buffers in RAM, and uses twice the amount of output. So +output of 5 GB takes 10 GB RAM. + +The buffering is very CPU intensive: Buffering a line of 5 GB takes 40 +seconds (compared to 10 seconds with GNU B<parallel>). + + +=head3 Examples as GNU Parallel + + 1$ papply gzip *.txt + + 1$ parallel gzip ::: *.txt + + 2$ papply "convert %F %n.jpg" *.png + + 2$ parallel convert {} {.}.jpg ::: *.png + + +https://pypi.org/project/papply/ (Last checked: 2020-04) + + +=head2 DIFFERENCES BETWEEN async AND GNU Parallel + +Summary (see legend above): + +=over + +=item - - - I4 - - I7 + +=item - - - - - M6 + +=item - O2 O3 - O5 O6 - N/A N/A O10 + +=item E1 - - E4 - E6 - + +=item - - - - - - - - - + +=item S1 S2 + +=back + +B<async> is very similary to GNU B<parallel>'s B<--semaphore> mode +(aka B<sem>). B<async> requires the user to start a server process. + +The input is quoted like B<-q> so you need B<bash -c "...;..."> to run +composed commands. + +=head3 Examples as GNU Parallel + + 1$ S="/tmp/example_socket" + + 1$ ID=myid + + 2$ async -s="$S" server --start + + 2$ # GNU Parallel does not need a server to run + + 3$ for i in {1..20}; do + # prints command output to stdout + async -s="$S" cmd -- bash -c "sleep 1 && echo test $i" + done + + 3$ for i in {1..20}; do + # prints command output to stdout + sem --id "$ID" -j100% "sleep 1 && echo test $i" + # GNU Parallel will only print job when it is done + # If you need output from different jobs to mix + # use -u or --line-buffer + sem --id "$ID" -j100% --line-buffer "sleep 1 && echo test $i" + done + + 4$ # wait until all commands are finished + async -s="$S" wait + + 4$ sem --id "$ID" --wait + + 5$ # configure the server to run four commands in parallel + async -s="$S" server -j4 + + 5$ export PARALLEL=-j4 + + 6$ mkdir "/tmp/ex_dir" + for i in {21..40}; do + # redirects command output to /tmp/ex_dir/file* + async -s="$S" cmd -o "/tmp/ex_dir/file$i" -- \ + bash -c "sleep 1 && echo test $i" + done + + 6$ mkdir "/tmp/ex_dir" + for i in {21..40}; do + # redirects command output to /tmp/ex_dir/file* + sem --id "$ID" --result '/tmp/my-ex/file-{=$_=""=}'"$i" \ + "sleep 1 && echo test $i" + done + + 7$ sem --id "$ID" --wait + + 7$ async -s="$S" wait + + 8$ # stops server + async -s="$S" server --stop + + 8$ # GNU Parallel does not need to stop a server + + +https://github.com/ctbur/async/ (Last checked: 2020-11) + + +=head2 DIFFERENCES BETWEEN pardi AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 I2 - - - - I7 + +=item M1 - - - - M6 + +=item O1 O2 O3 O4 O5 - O7 - - O10 + +=item E1 - - E4 - - - + +=item - - - - - - - - - + +=item - - + +=back + +B<pardi> is very similar to B<parallel --pipe --cat>: It reads blocks +of data and not arguments. So it cannot insert an argument in the +command line. It puts the block into a temporary file, and this file +name (%IN) can be put in the command line. You can only use %IN once. + +It can also run full command lines in parallel (like: B<cat file | +parallel>). + +=head3 EXAMPLES FROM pardi test.sh + + 1$ time pardi -v -c 100 -i data/decoys.smi -ie .smi -oe .smi \ + -o data/decoys_std_pardi.smi \ + -w '(standardiser -i %IN -o %OUT 2>&1) > /dev/null' + + 1$ cat data/decoys.smi | + time parallel -N 100 --pipe --cat \ + '(standardiser -i {} -o {#} 2>&1) > /dev/null; cat {#}; rm {#}' \ + > data/decoys_std_pardi.smi + + 2$ pardi -n 1 -i data/test_in.types -o data/test_out.types \ + -d 'r:^#atoms:' -w 'cat %IN > %OUT' + + 2$ cat data/test_in.types | parallel -n 1 -k --pipe --cat \ + --regexp --recstart '^#atoms' 'cat {}' > data/test_out.types + + 3$ pardi -c 6 -i data/test_in.types -o data/test_out.types \ + -d 'r:^#atoms:' -w 'cat %IN > %OUT' + + 3$ cat data/test_in.types | parallel -n 6 -k --pipe --cat \ + --regexp --recstart '^#atoms' 'cat {}' > data/test_out.types + + 4$ pardi -i data/decoys.mol2 -o data/still_decoys.mol2 \ + -d 's:@<TRIPOS>MOLECULE' -w 'cp %IN %OUT' + + 4$ cat data/decoys.mol2 | + parallel -n 1 --pipe --cat --recstart '@<TRIPOS>MOLECULE' \ + 'cp {} {#}; cat {#}; rm {#}' > data/still_decoys.mol2 + + 5$ pardi -i data/decoys.mol2 -o data/decoys2.mol2 \ + -d b:10000 -w 'cp %IN %OUT' --preserve + + 5$ cat data/decoys.mol2 | + parallel -k --pipe --block 10k --recend '' --cat \ + 'cat {} > {#}; cat {#}; rm {#}' > data/decoys2.mol2 + +https://github.com/UnixJunkie/pardi (Last checked: 2021-01) + + +=head2 DIFFERENCES BETWEEN bthread AND GNU Parallel + +Summary (see legend above): + +=over + +=item - - - I4 - - - + +=item - - - - - M6 + +=item O1 - O3 - - - O7 O8 - - + +=item E1 - - - - - - + +=item - - - - - - - - - + +=item - - + +=back + +B<bthread> takes around 1 sec per MB of output. The maximal output +line length is 1073741759. + +You cannot quote space in the command, so you cannot run composed +commands like B<sh -c "echo a; echo b">. + +https://gitlab.com/netikras/bthread (Last checked: 2021-01) + + +=head2 DIFFERENCES BETWEEN simple_gpu_scheduler AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 - - - - - I7 + +=item M1 - - - - M6 + +=item - O2 O3 - - O6 - x x O10 + +=item E1 - - - - - - + +=item - - - - - - - - - + +=item - - + +=back + +=head3 EXAMPLES FROM simple_gpu_scheduler MANUAL + + 1$ simple_gpu_scheduler --gpus 0 1 2 < gpu_commands.txt + + 1$ parallel -j3 --shuf \ + CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =} {=uq;=}' < gpu_commands.txt + + 2$ simple_hypersearch "python3 train_dnn.py --lr {lr} --batch_size {bs}" \ + -p lr 0.001 0.0005 0.0001 -p bs 32 64 128 | + simple_gpu_scheduler --gpus 0,1,2 + + 2$ parallel --header : --shuf -j3 -v \ + CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =}' \ + python3 train_dnn.py --lr {lr} --batch_size {bs} \ + ::: lr 0.001 0.0005 0.0001 ::: bs 32 64 128 + + 3$ simple_hypersearch \ + "python3 train_dnn.py --lr {lr} --batch_size {bs}" \ + --n-samples 5 -p lr 0.001 0.0005 0.0001 -p bs 32 64 128 | + simple_gpu_scheduler --gpus 0,1,2 + + 3$ parallel --header : --shuf \ + CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1; seq() > 5 and skip() =}' \ + python3 train_dnn.py --lr {lr} --batch_size {bs} \ + ::: lr 0.001 0.0005 0.0001 ::: bs 32 64 128 + + 4$ touch gpu.queue + tail -f -n 0 gpu.queue | simple_gpu_scheduler --gpus 0,1,2 & + echo "my_command_with | and stuff > logfile" >> gpu.queue + + 4$ touch gpu.queue + tail -f -n 0 gpu.queue | + parallel -j3 CUDA_VISIBLE_DEVICES='{=1 $_=slot()-1 =} {=uq;=}' & + # Needed to fill job slots once + seq 3 | parallel echo true >> gpu.queue + # Add jobs + echo "my_command_with | and stuff > logfile" >> gpu.queue + # Needed to flush output from completed jobs + seq 3 | parallel echo true >> gpu.queue + +https://github.com/ExpectationMax/simple_gpu_scheduler (Last checked: +2021-01) + + +=head2 DIFFERENCES BETWEEN parasweep AND GNU Parallel + +B<parasweep> is a Python module for facilitating parallel parameter +sweeps. + +A B<parasweep> job will normally take a text file as input. The text +file contains arguments for the job. Some of these arguments will be +fixed and some of them will be changed by B<parasweep>. + +It does this by having a template file such as template.txt: + + Xval: {x} + Yval: {y} + FixedValue: 9 + # x with 2 decimals + DecimalX: {x:.2f} + TenX: ${x*10} + RandomVal: {r} + +and from this template it generates the file to be used by the job by +replacing the replacement strings. + +Being a Python module B<parasweep> integrates tighter with Python than +GNU B<parallel>. You get the parameters directly in a Python data +structure. With GNU B<parallel> you can use the JSON or CSV output +format to get something similar, but you would have to read the +output. + +B<parasweep> has a filtering method to ignore parameter combinations +you do not need. + +Instead of calling the jobs directly, B<parasweep> can use Python's +Distributed Resource Management Application API to make jobs run with +different cluster software. + + +GNU B<parallel> B<--tmpl> supports templates with replacement +strings. Such as: + + Xval: {x} + Yval: {y} + FixedValue: 9 + # x with 2 decimals + DecimalX: {=x $_=sprintf("%.2f",$_) =} + TenX: {=x $_=$_*10 =} + RandomVal: {=1 $_=rand() =} + +that can be used like: + + parallel --header : --tmpl my.tmpl={#}.t myprog {#}.t \ + ::: x 1 2 3 ::: y 1 2 3 + +Filtering is supported as: + + parallel --filter '{1} > {2}' echo ::: 1 2 3 ::: 1 2 3 + +https://github.com/eviatarbach/parasweep (Last checked: 2021-01) + + +=head2 DIFFERENCES BETWEEN parallel-bash AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 I2 - - - - - + +=item - - M3 - - M6 + +=item - O2 O3 - O5 O6 - O8 x O10 + +=item E1 - - - - - - + +=item - - - - - - - - - + +=item - - + +=back + +B<parallel-bash> is written in pure bash. It is really fast (overhead +of ~0.05 ms/job compared to GNU B<parallel>'s 3-10 ms/job). So if your +jobs are extremely short lived, and you can live with the quite +limited command, this may be useful. + +It works by making a queue for each process. Then the jobs are +distributed to the queues in a round robin fashion. Finally the queues +are started in parallel. This works fine, if you are lucky, but if +not, all the long jobs may end up in the same queue, so you may see: + + $ printf "%b\n" 1 1 1 4 1 1 1 4 1 1 1 4 | + time parallel -P4 sleep {} + (7 seconds) + $ printf "%b\n" 1 1 1 4 1 1 1 4 1 1 1 4 | + time ./parallel-bash.bash -p 4 -c sleep {} + (12 seconds) + +Because it uses bash lists, the total number of jobs is limited to +167000..265000 depending on your environment. You get a segmentation +fault, when you reach the limit. + +Ctrl-C does not stop spawning new jobs. Ctrl-Z does not suspend +running jobs. + + +=head3 EXAMPLES FROM parallel-bash + + 1$ some_input | parallel-bash -p 5 -c echo + + 1$ some_input | parallel -j 5 echo + + 2$ parallel-bash -p 5 -c echo < some_file + + 2$ parallel -j 5 echo < some_file + + 3$ parallel-bash -p 5 -c echo <<< 'some string' + + 3$ parallel -j 5 -c echo <<< 'some string' + + 4$ something | parallel-bash -p 5 -c echo {} {} + + 4$ something | parallel -j 5 echo {} {} + +https://reposhub.com/python/command-line-tools/Akianonymus-parallel-bash.html +(Last checked: 2021-06) + + +=head2 DIFFERENCES BETWEEN bash-concurrent AND GNU Parallel + +B<bash-concurrent> is more an alternative to B<make> than to GNU +B<parallel>. Its input is very similar to a Makefile, where jobs +depend on other jobs. + +It has a nice progress indicator where you can see which jobs +completed successfully, which jobs are currently running, which jobs +failed, and which jobs were skipped due to a depending job failed. +The indicator does not deal well with resizing the window. + +Output is cached in tempfiles on disk, but is only shown if there is +an error, so it is not meant to be part of a UNIX pipeline. If +B<bash-concurrent> crashes these tempfiles are not removed. + +It uses an O(n*n) algorithm, so if you have 1000 independent jobs it +takes 22 seconds to start it. + +https://github.com/themattrix/bash-concurrent +(Last checked: 2021-02) + + +=head2 DIFFERENCES BETWEEN spawntool AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 - - - - - - + +=item M1 - - - - M6 + +=item - O2 O3 - O5 O6 - x x O10 + +=item E1 - - - - - - + +=item - - - - - - - - - + +=item - - + +=back + +B<spawn> reads a full command line from stdin which it executes in +parallel. + + +http://code.google.com/p/spawntool/ +(Last checked: 2021-07) + + +=head2 DIFFERENCES BETWEEN go-pssh AND GNU Parallel + +Summary (see legend above): + +=over + +=item - - - - - - - + +=item M1 - - - - - + +=item O1 - - - - - - x x O10 + +=item E1 - - - - - - + +=item R1 R2 - - - R6 - - - + +=item - - + +=back + +B<go-pssh> does B<ssh> in parallel to multiple machines. It runs the +same command on multiple machines similar to B<--nonall>. + +The hostnames must be given as IP-addresses (not as hostnames). + +Output is sent to stdout (standard output) if command is successful, +and to stderr (standard error) if the command fails. + +=head3 EXAMPLES FROM go-pssh + + 1$ go-pssh -l <ip>,<ip> -u <user> -p <port> -P <passwd> -c "<command>" + + 1$ parallel -S 'sshpass -p <passwd> ssh -p <port> <user>@<ip>' \ + --nonall "<command>" + + 2$ go-pssh scp -f host.txt -u <user> -p <port> -P <password> \ + -s /local/file_or_directory -d /remote/directory + + 2$ parallel --nonall --slf host.txt \ + --basefile /local/file_or_directory/./ --wd /remote/directory + --ssh 'sshpass -p <password> ssh -p <port> -l <user>' true + + 3$ go-pssh scp -l <ip>,<ip> -u <user> -p <port> -P <password> \ + -s /local/file_or_directory -d /remote/directory + + 3$ parallel --nonall -S <ip>,<ip> \ + --basefile /local/file_or_directory/./ --wd /remote/directory + --ssh 'sshpass -p <password> ssh -p <port> -l <user>' true + +https://github.com/xuchenCN/go-pssh +(Last checked: 2021-07) + + +=head2 DIFFERENCES BETWEEN go-parallel AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 I2 - - - - I7 + +=item - - M3 - - M6 + +=item - O2 O3 - O5 - - x x - O10 + +=item E1 - - E4 - - - + +=item - - - - - - - - - + +=item - - + +=back + +B<go-parallel> uses Go templates for replacement strings. Quite +similar to the I<{= perl expr =}> replacement string. + +=head3 EXAMPLES FROM go-parallel + + 1$ go-parallel -a ./files.txt -t 'cp {{.Input}} {{.Input | dirname | dirname}}' + + 1$ parallel -a ./files.txt cp {} '{= $_=::dirname(::dirname($_)) =}' + + 2$ go-parallel -a ./files.txt -t 'mkdir -p {{.Input}} {{noExt .Input}}' + + 2$ parallel -a ./files.txt echo mkdir -p {} {.} + + 3$ go-parallel -a ./files.txt -t 'mkdir -p {{.Input}} {{.Input | basename | noExt}}' + + 3$ parallel -a ./files.txt echo mkdir -p {} {/.} + +https://github.com/mylanconnolly/parallel +(Last checked: 2021-07) + + +=head2 DIFFERENCES BETWEEN p AND GNU Parallel + +Summary (see legend above): + +=over + +=item - - - I4 - - N/A + +=item - - - - - M6 + +=item - O2 O3 - O5 O6 - x x - O10 + +=item E1 - - - - - - + +=item - - - - - - - - - + +=item - - + +=back + +B<p> is a tiny shell script. It can color output with some predefined +colors, but is otherwise quite limited. + +It maxes out at around 116000 jobs (probably due to limitations in Bash). + +=head3 EXAMPLES FROM p + +Some of the examples from B<p> cannot be implemented 100% by GNU +B<parallel>: The coloring is a bit different, and GNU B<parallel> +cannot have B<--tag> for some inputs and not for others. + +The coloring done by GNU B<parallel> is not exactly the same as B<p>. + + 1$ p -bc blue "ping 127.0.0.1" -uc red "ping 192.168.0.1" \ + -rc yellow "ping 192.168.1.1" -t example "ping example.com" + + 1$ parallel --lb -j0 --color --tag ping \ + ::: 127.0.0.1 192.168.0.1 192.168.1.1 example.com + + 2$ p "tail -f /var/log/httpd/access_log" \ + -bc red "tail -f /var/log/httpd/error_log" + + 2$ cd /var/log/httpd; + parallel --lb --color --tag tail -f ::: access_log error_log + + 3$ p tail -f "some file" \& p tail -f "other file with space.txt" + + 3$ parallel --lb tail -f ::: 'some file' "other file with space.txt" + + 4$ p -t project1 "hg pull project1" -t project2 \ + "hg pull project2" -t project3 "hg pull project3" + + 4$ parallel --lb hg pull ::: project{1..3} + +https://github.com/rudymatela/evenmoreutils/blob/master/man/p.1.adoc +(Last checked: 2022-04) + + +=head2 DIFFERENCES BETWEEN senechal AND GNU Parallel + +Summary (see legend above): + +=over + +=item I1 - - - - - - + +=item M1 - M3 - - M6 + +=item O1 - O3 O4 - - - x x - + +=item E1 - - - - - - + +=item - - - - - - - - - + +=item - - + +=back + +B<seneschal> only starts the first job after reading the last job, and +output from the first job is only printed after the last job finishes. + +1 byte of output requites 3.5 bytes of RAM. + +This makes it impossible to have a total output bigger than the +virtual memory. + +Even though output is kept in RAM outputing is quite slow: 30 MB/s. + +Output larger than 4 GB causes random problems - it looks like a race +condition. + +This: + + echo 1 | seneschal --prefix='yes `seq 1000`|head -c 1G' >/dev/null + +takes 4100(!) CPU seconds to run on a 64C64T server, but only 140 CPU +seconds on a 4C8T laptop. So it looks like B<seneschal> wastes a lot +of CPU time coordinating the CPUs. + +Compare this to: + + echo 1 | time -v parallel -N0 'yes `seq 1000`|head -c 1G' >/dev/null + +which takes 3-8 CPU seconds. + +=head3 EXAMPLES FROM seneschal README.md + + 1$ echo $REPOS | seneschal --prefix="cd {} && git pull" + + # If $REPOS is newline separated + 1$ echo "$REPOS" | parallel -k "cd {} && git pull" + # If $REPOS is space separated + 1$ echo -n "$REPOS" | parallel -d' ' -k "cd {} && git pull" + + COMMANDS="pwd + sleep 5 && echo boom + echo Howdy + whoami" + + 2$ echo "$COMMANDS" | seneschal --debug + + 2$ echo "$COMMANDS" | parallel -k -v + + 3$ ls -1 | seneschal --prefix="pushd {}; git pull; popd;" + + 3$ ls -1 | parallel -k "pushd {}; git pull; popd;" + # Or if current dir also contains files: + 3$ parallel -k "pushd {}; git pull; popd;" ::: */ + +https://github.com/TheWizardTower/seneschal +(Last checked: 2022-06) + + +=head2 Todo + +http://code.google.com/p/push/ (cannot compile) + +https://github.com/krashanoff/parallel + +https://github.com/Nukesor/pueue + +https://arxiv.org/pdf/2012.15443.pdf KumQuat + +https://arxiv.org/pdf/2007.09436.pdf PaSH: Light-touch Data-Parallel Shell Processing + +https://github.com/JeiKeiLim/simple_distribute_job + +https://github.com/reggi/pkgrun - not obvious how to use + +https://github.com/benoror/better-npm-run - not obvious how to use + +https://github.com/bahmutov/with-package + +https://github.com/flesler/parallel + +https://github.com/Julian/Verge + +https://manpages.ubuntu.com/manpages/xenial/man1/tsp.1.html + +https://vicerveza.homeunix.net/~viric/soft/ts/ + +https://github.com/chapmanjacobd/que + + + +=head1 TESTING OTHER TOOLS + +There are certain issues that are very common on parallelizing +tools. Here are a few stress tests. Be warned: If the tool is badly +coded it may overload your machine. + + +=head2 MIX: Output mixes + +Output from 2 jobs should not mix. If the output is not used, this +does not matter; but if the output I<is> used then it is important +that you do not get half a line from one job followed by half a line +from another job. + +If the tool does not buffer, output will most likely mix now and then. + +This test stresses whether output mixes. + + #!/bin/bash + + paralleltool="parallel -j0" + + cat <<-EOF > mycommand + #!/bin/bash + + # If a, b, c, d, e, and f mix: Very bad + perl -e 'print STDOUT "a"x3000_000," "' + perl -e 'print STDERR "b"x3000_000," "' + perl -e 'print STDOUT "c"x3000_000," "' + perl -e 'print STDERR "d"x3000_000," "' + perl -e 'print STDOUT "e"x3000_000," "' + perl -e 'print STDERR "f"x3000_000," "' + echo + echo >&2 + EOF + chmod +x mycommand + + # Run 30 jobs in parallel + seq 30 | + $paralleltool ./mycommand > >(tr -s abcdef) 2> >(tr -s abcdef >&2) + + # 'a c e' and 'b d f' should always stay together + # and there should only be a single line per job + + +=head2 STDERRMERGE: Stderr is merged with stdout + +Output from stdout and stderr should not be merged, but kept separated. + +This test shows whether stdout is mixed with stderr. + + #!/bin/bash + + paralleltool="parallel -j0" + + cat <<-EOF > mycommand + #!/bin/bash + + echo stdout + echo stderr >&2 + echo stdout + echo stderr >&2 + EOF + chmod +x mycommand + + # Run one job + echo | + $paralleltool ./mycommand > stdout 2> stderr + cat stdout + cat stderr + + +=head2 RAM: Output limited by RAM + +Some tools cache output in RAM. This makes them extremely slow if the +output is bigger than physical memory and crash if the output is +bigger than the virtual memory. + + #!/bin/bash + + paralleltool="parallel -j0" + + cat <<'EOF' > mycommand + #!/bin/bash + + # Generate 1 GB output + yes "`perl -e 'print \"c\"x30_000'`" | head -c 1G + EOF + chmod +x mycommand + + # Run 20 jobs in parallel + # Adjust 20 to be > physical RAM and < free space on /tmp + seq 20 | time $paralleltool ./mycommand | wc -c + + +=head2 DISKFULL: Incomplete data if /tmp runs full + +If caching is done on disk, the disk can run full during the run. Not +all programs discover this. GNU Parallel discovers it, if it stays +full for at least 2 seconds. + + #!/bin/bash + + paralleltool="parallel -j0" + + # This should be a dir with less than 100 GB free space + smalldisk=/tmp/shm/parallel + + TMPDIR="$smalldisk" + export TMPDIR + + max_output() { + # Force worst case scenario: + # Make GNU Parallel only check once per second + sleep 10 + # Generate 100 GB to fill $TMPDIR + # Adjust if /tmp is bigger than 100 GB + yes | head -c 100G >$TMPDIR/$$ + # Generate 10 MB output that will not be buffered due to full disk + perl -e 'print "X"x10_000_000' | head -c 10M + echo This part is missing from incomplete output + sleep 2 + rm $TMPDIR/$$ + echo Final output + } + + export -f max_output + seq 10 | $paralleltool max_output | tr -s X + + +=head2 CLEANUP: Leaving tmp files at unexpected death + +Some tools do not clean up tmp files if they are killed. If the tool +buffers on disk, they may not clean up, if they are killed. + + #!/bin/bash + + paralleltool=parallel + + ls /tmp >/tmp/before + seq 10 | $paralleltool sleep & + pid=$! + # Give the tool time to start up + sleep 1 + # Kill it without giving it a chance to cleanup + kill -9 $! + # Should be empty: No files should be left behind + diff <(ls /tmp) /tmp/before + + +=head2 SPCCHAR: Dealing badly with special file names. + +It is not uncommon for users to create files like: + + My brother's 12" *** record (costs $$$).jpg + +Some tools break on this. + + #!/bin/bash + + paralleltool=parallel + + touch "My brother's 12\" *** record (costs \$\$\$).jpg" + ls My*jpg | $paralleltool ls -l + + +=head2 COMPOSED: Composed commands do not work + +Some tools require you to wrap composed commands into B<bash -c>. + + echo bar | $paralleltool echo foo';' echo {} + + +=head2 ONEREP: Only one replacement string allowed + +Some tools can only insert the argument once. + + echo bar | $paralleltool echo {} foo {} + + +=head2 INPUTSIZE: Length of input should not be limited + +Some tools limit the length of the input lines artificially with no good +reason. GNU B<parallel> does not: + + perl -e 'print "foo."."x"x100_000_000' | parallel echo {.} + +GNU B<parallel> limits the command to run to 128 KB due to execve(1): + + perl -e 'print "x"x131_000' | parallel echo {} | wc + + +=head2 NUMWORDS: Speed depends on number of words + +Some tools become very slow if output lines have many words. + + #!/bin/bash + + paralleltool=parallel + + cat <<-EOF > mycommand + #!/bin/bash + + # 10 MB of lines with 1000 words + yes "`seq 1000`" | head -c 10M + EOF + chmod +x mycommand + + # Run 30 jobs in parallel + seq 30 | time $paralleltool -j0 ./mycommand > /dev/null + +=head2 4GB: Output with a line > 4GB should be OK + + #!/bin/bash + + paralleltool="parallel -j0" + + cat <<-EOF > mycommand + #!/bin/bash + + perl -e '\$a="a"x1000_000; for(1..5000) { print \$a }' + EOF + chmod +x mycommand + + # Run 1 job + seq 1 | $paralleltool ./mycommand | LC_ALL=C wc + + +=head1 AUTHOR + +When using GNU B<parallel> for a publication please cite: + +O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ;login: +The USENIX Magazine, February 2011:42-47. + +This helps funding further development; and it won't cost you a cent. +If you pay 10000 EUR you should feel free to use GNU Parallel without citing. + +Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk + +Copyright (C) 2008-2010 Ole Tange, http://ole.tange.dk + +Copyright (C) 2010-2022 Ole Tange, http://ole.tange.dk and Free +Software Foundation, Inc. + +Parts of the manual concerning B<xargs> compatibility is inspired by +the manual of B<xargs> from GNU findutils 4.4.2. + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <https://www.gnu.org/licenses/>. + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this +documentation under the terms of the GNU Free Documentation License, +Version 1.3 or any later version published by the Free Software +Foundation; with no Invariant Sections, with no Front-Cover Texts, and +with no Back-Cover Texts. A copy of the license is included in the +file LICENSES/GFDL-1.3-or-later.txt. + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B<to Share> + +to copy, distribute and transmit the work + +=item B<to Remix> + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B<Attribution> + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B<Share Alike> + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B<Waiver> + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B<Public Domain> + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B<Other Rights> + +In no way are any of the following rights affected by the license: + +=over 2 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=back + +=over 9 + +=item B<Notice> + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as +LICENCES/CC-BY-SA-4.0.txt + + +=head1 DEPENDENCIES + +GNU B<parallel> uses Perl, and the Perl modules Getopt::Long, +IPC::Open3, Symbol, IO::File, POSIX, and File::Temp. For remote usage +it also uses rsync with ssh. + + +=head1 SEE ALSO + +B<find>(1), B<xargs>(1), B<make>(1), B<pexec>(1), B<ppss>(1), +B<xjobs>(1), B<prll>(1), B<dxargs>(1), B<mdm>(1) + +=cut diff --git a/src/parallel_book.pod b/src/parallel_book.pod new file mode 100644 index 0000000..5bbb432 --- /dev/null +++ b/src/parallel_book.pod @@ -0,0 +1,403 @@ +#!/usr/bin/perl -w + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=encoding utf8 + +=head1 Why should you read this book? + +If you write shell scripts to do the same processing for different +input, then GNU B<parallel> will make your life easier and make your +scripts run faster. + +The book is written so you get the juicy parts first: The goal is that +you read just enough to get you going. GNU B<parallel> has an +overwhelming amount of special features to help in different +situations, and to avoid overloading you with information, the most +used features are presented first. + +All the examples are tested in Bash, and most will work in other +shells, too, but there are a few exceptions. So you are recommended to +use Bash while testing out the examples. + + +=head1 Learn GNU Parallel in 5 minutes + +You just need to run commands in parallel. You do not care about fine +tuning. + +To get going please run this to make some example files: + + # If your system does not have 'seq', replace 'seq' with 'jot' + seq 5 | parallel seq {} '>' example.{} + +=head2 Input sources + +GNU B<parallel> reads values from input sources. One input source is +the command line. The values are put after B<:::> : + + parallel echo ::: 1 2 3 4 5 + +This makes it easy to run the same program on some files: + + parallel wc ::: example.* + +If you give multiple B<:::>s, GNU B<parallel> will generate all +combinations: + + parallel wc ::: -l -c ::: example.* + +GNU B<parallel> can also read the values from stdin (standard input): + + seq 5 | parallel echo + + +=head2 Building the command line + +The command line is put before the B<:::>. It can contain contain a +command and options for the command: + + parallel wc -l ::: example.* + +The command can contain multiple programs. Just remember to quote +characters that are interpreted by the shell (such as B<;>): + + parallel echo counting lines';' wc -l ::: example.* + +The value will normally be appended to the command, but can be placed +anywhere by using the replacement string B<{}>: + + parallel echo counting {}';' wc -l {} ::: example.* + +When using multiple input sources you use the positional replacement +strings B<{1}> and B<{2}>: + + parallel echo count {1} in {2}';' wc {1} {2} ::: -l -c ::: example.* + +You can check what will be run with B<--dry-run>: + + parallel --dry-run echo count {1} in {2}';' wc {1} {2} ::: -l -c ::: example.* + +This is a good idea to do for every command until you are comfortable +with GNU B<parallel>. + +=head2 Controlling the output + +The output will be printed as soon as the command completes. This +means the output may come in a different order than the input: + + parallel sleep {}';' echo {} done ::: 5 4 3 2 1 + +You can force GNU B<parallel> to print in the order of the values with +B<--keep-order>/B<-k>. This will still run the commands in parallel. +The output of the later jobs will be delayed, until the earlier jobs +are printed: + + parallel -k sleep {}';' echo {} done ::: 5 4 3 2 1 + + +=head2 Controlling the execution + +If your jobs are compute intensive, you will most likely run one job +for each core in the system. This is the default for GNU B<parallel>. + +But sometimes you want more jobs running. You control the number of +job slots with B<-j>. Give B<-j> the number of jobs to run in +parallel: + + parallel -j50 \ + wget https://ftpmirror.gnu.org/parallel/parallel-{1}{2}22.tar.bz2 \ + ::: 2012 2013 2014 2015 2016 \ + ::: 01 02 03 04 05 06 07 08 09 10 11 12 + + +=head2 Pipe mode + +GNU B<parallel> can also pass blocks of data to commands on stdin +(standard input): + + seq 1000000 | parallel --pipe wc + +This can be used to process big text files. By default GNU B<parallel> +splits on \n (newline) and passes a block of around 1 MB to each job. + + +=head2 That's it + +You have now learned the basic use of GNU B<parallel>. This will +probably cover most cases of your use of GNU B<parallel>. + +The rest of this document will go into more details on each of the +sections and cover special use cases. + + +=head1 Learn GNU Parallel in an hour + +In this part we will dive deeper into what you learned in the first 5 minutes. + +To get going please run this to make some example files: + + seq 6 > seq6 + seq 6 -1 1 > seq-6 + +=head2 Input sources + +On top of the command line, input sources can also be stdin (standard +input or '-'), files and fifos and they can be mixed. Files are given +after B<-a> or B<::::>. So these all do the same: + + parallel echo Dice1={1} Dice2={2} ::: 1 2 3 4 5 6 ::: 6 5 4 3 2 1 + parallel echo Dice1={1} Dice2={2} :::: <(seq 6) :::: <(seq 6 -1 1) + parallel echo Dice1={1} Dice2={2} :::: seq6 seq-6 + parallel echo Dice1={1} Dice2={2} :::: seq6 :::: seq-6 + parallel -a seq6 -a seq-6 echo Dice1={1} Dice2={2} + parallel -a seq6 echo Dice1={1} Dice2={2} :::: seq-6 + parallel echo Dice1={1} Dice2={2} ::: 1 2 3 4 5 6 :::: seq-6 + cat seq-6 | parallel echo Dice1={1} Dice2={2} :::: seq6 - + +If stdin (standard input) is the only input source, you do not need the '-': + + cat seq6 | parallel echo Dice1={1} + +=head3 Linking input sources + +You can link multiple input sources with B<:::+> and B<::::+>: + + parallel echo {1}={2} ::: I II III IV V VI :::+ 1 2 3 4 5 6 + parallel echo {1}={2} ::: I II III IV V VI ::::+ seq6 + +The B<:::+> (and B<::::+>) will link each value to the corresponding +value in the previous input source, so value number 3 from the first +input source will be linked to value number 3 from the second input +source. + +You can combine B<:::+> and B<:::>, so you link 2 input sources, but +generate all combinations with other input sources: + + parallel echo Dice1={1}={2} Dice2={3}={4} ::: I II III IV V VI ::::+ seq6 \ + ::: VI V IV III II I ::::+ seq-6 + + +=head2 Building the command line + +=head3 The command + +The command can be a script, a binary or a Bash function if the +function is exported using B<export -f>: + + # Works only in Bash + my_func() { + echo in my_func "$1" + } + export -f my_func + parallel my_func ::: 1 2 3 + +If the command is complex, it often improves readability to make it +into a function. + + +=head3 The replacement strings + +GNU B<parallel> has some replacement strings to make it easier to +refer to the input read from the input sources. + +If the input is B<mydir/mysubdir/myfile.myext> then: + + {} = mydir/mysubdir/myfile.myext + {.} = mydir/mysubdir/myfile + {/} = myfile.myext + {//} = mydir/mysubdir + {/.} = myfile + {#} = the sequence number of the job + {%} = the job slot number + +When a job is started it gets a sequence number that starts at 1 and +increases by 1 for each new job. The job also gets assigned a slot +number. This number is from 1 to the number of jobs running in +parallel. It is unique between the running jobs, but is re-used as +soon as a job finishes. + +=head4 The positional replacement strings + +The replacement strings have corresponding positional replacement +strings. If the value from the 3rd input source is +B<mydir/mysubdir/myfile.myext>: + + {3} = mydir/mysubdir/myfile.myext + {3.} = mydir/mysubdir/myfile + {3/} = myfile.myext + {3//} = mydir/mysubdir + {3/.} = myfile + +So the number of the input source is simply prepended inside the {}'s. + + +=head1 Replacement strings + +--plus replacement strings + +change the replacement string (-I --extensionreplace --basenamereplace --basenamereplace --dirnamereplace --basenameextensionreplace --seqreplace --slotreplace + +--header with named replacement string + +{= =} + +Dynamic replacement strings + +=head2 Defining replacement strings + + + + +=head2 Copying environment + +env_parallel + +=head2 Controlling the output + +=head3 parset + +B<parset> is a shell function to get the output from GNU B<parallel> +into shell variables. + +B<parset> is fully supported for B<Bash/Zsh/Ksh> and partially supported +for B<ash/dash>. I will assume you run B<Bash>. + +To activate B<parset> you have to run: + + . `which env_parallel.bash` + +(replace B<bash> with your shell's name). + +Then you can run: + + parset a,b,c seq ::: 4 5 6 + echo "$c" + +or: + + parset 'a b c' seq ::: 4 5 6 + echo "$c" + +If you give a single variable, this will become an array: + + parset arr seq ::: 4 5 6 + echo "${arr[1]}" + +B<parset> has one limitation: If it reads from a pipe, the output will +be lost. + + echo This will not work | parset myarr echo + echo Nothing: "${myarr[*]}" + +Instead you can do this: + + echo This will work > tempfile + parset myarr echo < tempfile + echo ${myarr[*]} + +sql +cvs + + +=head2 Controlling the execution + +--dryrun -v + +=head2 Remote execution + +For this section you must have B<ssh> access with no password to 2 +servers: B<$server1> and B<$server2>. + + server1=server.example.com + server2=server2.example.net + +So you must be able to do this: + + ssh $server1 echo works + ssh $server2 echo works + +It can be setup by running 'ssh-keygen -t dsa; ssh-copy-id $server1' +and using an empty passphrase. Or you can use B<ssh-agent>. + +=head3 Workers + +=head3 --transferfile + +B<--transferfile> I<filename> will transfer I<filename> to the +worker. I<filename> can contain a replacement string: + + parallel -S $server1,$server2 --transferfile {} wc ::: example.* + parallel -S $server1,$server2 --transferfile {2} \ + echo count {1} in {2}';' wc {1} {2} ::: -l -c ::: example.* + +A shorthand for B<--transferfile {}> is B<--transfer>. + +=head3 --return + + + +=head3 --cleanup + +A shorthand for B<--transfer --return {} --cleanup> is B<--trc {}>. + + +=head2 Pipe mode + +--pipepart + + +=head2 That's it + +=head1 Advanced usage + +parset fifo, cmd substitution, arrayelements, array with var names and cmds, env_parset + + +env_parallel + +Interfacing with R. + +Interfacing with JSON/jq + +4dl() { + board="$(printf -- '%s' "${1}" | cut -d '/' -f4)" + thread="$(printf -- '%s' "${1}" | cut -d '/' -f6)" + wget -qO- "https://a.4cdn.org/${board}/thread/${thread}.json" | + jq -r ' + .posts + | map(select(.tim != null)) + | map((.tim | tostring) + .ext) + | map("https://i.4cdn.org/'"${board}"'/"+.)[] + ' | + parallel --gnu -j 0 wget -nv +} + +Interfacing with XML/? + +Interfacing with HTML/? + +=head2 Controlling the execution + +--termseq + + +=head2 Remote execution + +seq 10 | parallel --sshlogin 'ssh -i "key.pem" a@b.com' echo + +seq 10 | PARALLEL_SSH='ssh -i "key.pem"' parallel --sshlogin a@b.com echo + +seq 10 | parallel --ssh 'ssh -i "key.pem"' --sshlogin a@b.com echo + +ssh-agent + +The sshlogin file format + +Check if servers are up + + + +=cut diff --git a/src/parallel_cheat_bw.fodt b/src/parallel_cheat_bw.fodt new file mode 100644 index 0000000..fb3e80f --- /dev/null +++ b/src/parallel_cheat_bw.fodt @@ -0,0 +1,1001 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:rpt="http://openoffice.org/2005/report" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:officeooo="http://openoffice.org/2009/office" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:drawooo="http://openoffice.org/2010/draw" xmlns:calcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:css3t="http://www.w3.org/TR/css3-text/" office:version="1.2" office:mimetype="application/vnd.oasis.opendocument.text"> + <office:meta><meta:creation-date>2019-02-27T22:13:36.781924718</meta:creation-date><dc:date>2019-03-12T01:40:23.357999303</dc:date><meta:editing-duration>P11DT5H49M15S</meta:editing-duration><meta:editing-cycles>40</meta:editing-cycles><meta:generator>LibreOffice/6.0.6.2$Linux_X86_64 LibreOffice_project/00m0$Build-2</meta:generator><meta:print-date>2019-03-08T23:06:56.779293422</meta:print-date><meta:document-statistic meta:table-count="9" meta:image-count="0" meta:object-count="0" meta:page-count="1" meta:paragraph-count="47" meta:word-count="337" meta:character-count="1950" meta:non-whitespace-character-count="1661"/></office:meta> + <office:settings> + <config:config-item-set config:name="ooo:view-settings"> + <config:config-item config:name="ViewAreaTop" config:type="long">15875</config:config-item> + <config:config-item config:name="ViewAreaLeft" config:type="long">0</config:config-item> + <config:config-item config:name="ViewAreaWidth" config:type="long">22287</config:config-item> + <config:config-item config:name="ViewAreaHeight" config:type="long">11042</config:config-item> + <config:config-item config:name="ShowRedlineChanges" config:type="boolean">true</config:config-item> + <config:config-item config:name="InBrowseMode" config:type="boolean">false</config:config-item> + <config:config-item-map-indexed config:name="Views"> + <config:config-item-map-entry> + <config:config-item config:name="ViewId" config:type="string">view2</config:config-item> + <config:config-item config:name="ViewLeft" config:type="long">18724</config:config-item> + <config:config-item config:name="ViewTop" config:type="long">25474</config:config-item> + <config:config-item config:name="VisibleLeft" config:type="long">0</config:config-item> + <config:config-item config:name="VisibleTop" config:type="long">15875</config:config-item> + <config:config-item config:name="VisibleRight" config:type="long">22285</config:config-item> + <config:config-item config:name="VisibleBottom" config:type="long">26915</config:config-item> + <config:config-item config:name="ZoomType" config:type="short">0</config:config-item> + <config:config-item config:name="ViewLayoutColumns" config:type="short">1</config:config-item> + <config:config-item config:name="ViewLayoutBookMode" config:type="boolean">false</config:config-item> + <config:config-item config:name="ZoomFactor" config:type="short">250</config:config-item> + <config:config-item config:name="IsSelectedFrame" config:type="boolean">false</config:config-item> + <config:config-item config:name="AnchoredTextOverflowLegacy" config:type="boolean">false</config:config-item> + </config:config-item-map-entry> + </config:config-item-map-indexed> + </config:config-item-set> + <config:config-item-set config:name="ooo:configuration-settings"> + <config:config-item config:name="PrintPaperFromSetup" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintFaxName" config:type="string"/> + <config:config-item config:name="PrintSingleJobs" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintProspectRTL" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintProspect" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintReversed" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintTextPlaceholder" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintTables" config:type="boolean">true</config:config-item> + <config:config-item config:name="DoNotJustifyLinesWithManualBreak" config:type="boolean">false</config:config-item> + <config:config-item config:name="AlignTabStopPosition" config:type="boolean">true</config:config-item> + <config:config-item config:name="PrintLeftPages" config:type="boolean">true</config:config-item> + <config:config-item config:name="IgnoreFirstLineIndentInNumbering" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrinterSetup" config:type="base64Binary">uAH+/1hlcm94X1BoYXNlcl82MTQwRE4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQ1VQUzpYZXJveF9QaGFzZXJfNjE0MEROAAAAAAAAAAAWAAMA2QAAAAAAAAAEAAhSAAAEdAAASm9iRGF0YSAxCnByaW50ZXI9WGVyb3hfUGhhc2VyXzYxNDBETgpvcmllbnRhdGlvbj1Qb3J0cmFpdApjb3BpZXM9MQpjb2xsYXRlPWZhbHNlCm1hcmdpbmRhanVzdG1lbnQ9MCwwLDAsMApjb2xvcmRlcHRoPTI0CnBzbGV2ZWw9MApwZGZkZXZpY2U9MQpjb2xvcmRldmljZT0wClBQRENvbnRleERhdGEKRHVwbGV4Ok5vbmUAUGFnZVNpemU6QTQASW5wdXRTbG90OkF1dG9TZWxlY3QAABIAQ09NUEFUX0RVUExFWF9NT0RFDwBEdXBsZXhNb2RlOjpPZmY=</config:config-item> + <config:config-item config:name="CollapseEmptyCellPara" config:type="boolean">true</config:config-item> + <config:config-item config:name="RedlineProtectionKey" config:type="base64Binary"/> + <config:config-item config:name="UseOldPrinterMetrics" config:type="boolean">false</config:config-item> + <config:config-item config:name="UseOldNumbering" config:type="boolean">false</config:config-item> + <config:config-item config:name="AddExternalLeading" config:type="boolean">true</config:config-item> + <config:config-item config:name="TreatSingleColumnBreakAsPageBreak" config:type="boolean">false</config:config-item> + <config:config-item config:name="UpdateFromTemplate" config:type="boolean">true</config:config-item> + <config:config-item config:name="IsLabelDocument" config:type="boolean">false</config:config-item> + <config:config-item config:name="RsidRoot" config:type="int">816919</config:config-item> + <config:config-item config:name="ConsiderTextWrapOnObjPos" config:type="boolean">false</config:config-item> + <config:config-item config:name="TableRowKeep" config:type="boolean">false</config:config-item> + <config:config-item config:name="TabsRelativeToIndent" config:type="boolean">true</config:config-item> + <config:config-item config:name="SaveVersionOnClose" config:type="boolean">false</config:config-item> + <config:config-item config:name="UseFormerTextWrapping" config:type="boolean">false</config:config-item> + <config:config-item config:name="ChartAutoUpdate" config:type="boolean">true</config:config-item> + <config:config-item config:name="AddParaTableSpacingAtStart" config:type="boolean">true</config:config-item> + <config:config-item config:name="AllowPrintJobCancel" config:type="boolean">true</config:config-item> + <config:config-item config:name="AddParaTableSpacing" config:type="boolean">true</config:config-item> + <config:config-item config:name="PrintDrawings" config:type="boolean">true</config:config-item> + <config:config-item config:name="AddParaSpacingToTableCells" config:type="boolean">true</config:config-item> + <config:config-item config:name="UseFormerLineSpacing" config:type="boolean">false</config:config-item> + <config:config-item config:name="OutlineLevelYieldsNumbering" config:type="boolean">false</config:config-item> + <config:config-item config:name="LinkUpdateMode" config:type="short">1</config:config-item> + <config:config-item config:name="DoNotResetParaAttrsForNumFont" config:type="boolean">false</config:config-item> + <config:config-item config:name="ApplyUserData" config:type="boolean">true</config:config-item> + <config:config-item config:name="StylesNoDefault" config:type="boolean">false</config:config-item> + <config:config-item config:name="EmbeddedDatabaseName" config:type="string"/> + <config:config-item config:name="FloattableNomargins" config:type="boolean">false</config:config-item> + <config:config-item config:name="BackgroundParaOverDrawings" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrinterName" config:type="string">Xerox_Phaser_6140DN</config:config-item> + <config:config-item config:name="UseFormerObjectPositioning" config:type="boolean">false</config:config-item> + <config:config-item config:name="TabOverMargin" config:type="boolean">false</config:config-item> + <config:config-item config:name="SaveGlobalDocumentLinks" config:type="boolean">false</config:config-item> + <config:config-item config:name="CurrentDatabaseDataSource" config:type="string"/> + <config:config-item config:name="IsKernAsianPunctuation" config:type="boolean">false</config:config-item> + <config:config-item config:name="CharacterCompressionType" config:type="short">0</config:config-item> + <config:config-item config:name="SmallCapsPercentage66" config:type="boolean">false</config:config-item> + <config:config-item config:name="CurrentDatabaseCommand" config:type="string"/> + <config:config-item config:name="CurrentDatabaseCommandType" config:type="int">0</config:config-item> + <config:config-item config:name="FieldAutoUpdate" config:type="boolean">true</config:config-item> + <config:config-item config:name="IgnoreTabsAndBlanksForLineCalculation" config:type="boolean">false</config:config-item> + <config:config-item config:name="LoadReadonly" config:type="boolean">false</config:config-item> + <config:config-item config:name="DoNotCaptureDrawObjsOnPage" config:type="boolean">false</config:config-item> + <config:config-item config:name="ClipAsCharacterAnchoredWriterFlyFrames" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintBlackFonts" config:type="boolean">false</config:config-item> + <config:config-item config:name="DisableOffPagePositioning" config:type="boolean">true</config:config-item> + <config:config-item config:name="SurroundTextWrapSmall" config:type="boolean">false</config:config-item> + <config:config-item config:name="UnxForceZeroExtLeading" config:type="boolean">false</config:config-item> + <config:config-item config:name="TabAtLeftIndentForParagraphsInList" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintRightPages" config:type="boolean">true</config:config-item> + <config:config-item config:name="Rsid" config:type="int">1662577</config:config-item> + <config:config-item config:name="MathBaselineAlignment" config:type="boolean">true</config:config-item> + <config:config-item config:name="MsWordCompTrailingBlanks" config:type="boolean">false</config:config-item> + <config:config-item config:name="InvertBorderSpacing" config:type="boolean">false</config:config-item> + <config:config-item config:name="EmbedFonts" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrinterIndependentLayout" config:type="string">high-resolution</config:config-item> + <config:config-item config:name="TabOverflow" config:type="boolean">true</config:config-item> + <config:config-item config:name="PrintGraphics" config:type="boolean">true</config:config-item> + <config:config-item config:name="PropLineSpacingShrinksFirstLine" config:type="boolean">true</config:config-item> + <config:config-item config:name="UnbreakableNumberings" config:type="boolean">false</config:config-item> + <config:config-item config:name="AddFrameOffsets" config:type="boolean">false</config:config-item> + <config:config-item config:name="ClippedPictures" config:type="boolean">false</config:config-item> + <config:config-item config:name="EmbedSystemFonts" config:type="boolean">false</config:config-item> + <config:config-item config:name="ApplyParagraphMarkFormatToNumbering" config:type="boolean">false</config:config-item> + <config:config-item config:name="SubtractFlysAnchoredAtFlys" config:type="boolean">false</config:config-item> + <config:config-item config:name="AddVerticalFrameOffsets" config:type="boolean">false</config:config-item> + <config:config-item config:name="ProtectForm" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintEmptyPages" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintControls" config:type="boolean">true</config:config-item> + <config:config-item config:name="PrintHiddenText" config:type="boolean">false</config:config-item> + <config:config-item config:name="PrintAnnotationMode" config:type="short">0</config:config-item> + <config:config-item config:name="PrintPageBackground" config:type="boolean">true</config:config-item> + </config:config-item-set> + </office:settings> + <office:scripts> + <office:script script:language="ooo:Basic"> + <ooo:libraries xmlns:ooo="http://openoffice.org/2004/office" xmlns:xlink="http://www.w3.org/1999/xlink"> + <ooo:library-embedded ooo:name="Standard"/> + </ooo:libraries> + </office:script> + </office:scripts> + <office:font-face-decls> + <style:font-face style:name="Lohit Devanagari1" svg:font-family="'Lohit Devanagari'"/> + <style:font-face style:name="monospace" svg:font-family="monospace"/> + <style:font-face style:name="DejaVu Sans Mono" svg:font-family="'DejaVu Sans Mono'" style:font-family-generic="modern" style:font-pitch="fixed"/> + <style:font-face style:name="Liberation Mono" svg:font-family="'Liberation Mono'" style:font-family-generic="modern" style:font-pitch="fixed"/> + <style:font-face style:name="Bitstream Vera Serif1" svg:font-family="'Bitstream Vera Serif'" style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="Bitstream Vera Serif" svg:font-family="'Bitstream Vera Serif'" style:font-adornments="Roman" style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="Liberation Serif" svg:font-family="'Liberation Serif'" style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="Thorndale" svg:font-family="Thorndale" style:font-family-generic="roman" style:font-pitch="variable"/> + <style:font-face style:name="DejaVu Sans" svg:font-family="'DejaVu Sans'" style:font-family-generic="swiss" style:font-pitch="variable"/> + <style:font-face style:name="FreeSans" svg:font-family="FreeSans" style:font-adornments="Bold" style:font-family-generic="swiss" style:font-pitch="variable"/> + <style:font-face style:name="Liberation Sans" svg:font-family="'Liberation Sans'" style:font-family-generic="swiss" style:font-pitch="variable"/> + <style:font-face style:name="Lohit Devanagari" svg:font-family="'Lohit Devanagari'" style:font-family-generic="system" style:font-pitch="variable"/> + <style:font-face style:name="Noto Sans CJK SC Regular" svg:font-family="'Noto Sans CJK SC Regular'" style:font-family-generic="system" style:font-pitch="variable"/> + </office:font-face-decls> + <office:styles> + <style:default-style style:family="graphic"> + <style:graphic-properties svg:stroke-color="#3465a4" draw:fill-color="#729fcf" fo:wrap-option="no-wrap" draw:shadow-offset-x="0.1181in" draw:shadow-offset-y="0.1181in" draw:start-line-spacing-horizontal="0.1114in" draw:start-line-spacing-vertical="0.1114in" draw:end-line-spacing-horizontal="0.1114in" draw:end-line-spacing-vertical="0.1114in" style:flow-with-text="false"/> + <style:paragraph-properties style:text-autospace="ideograph-alpha" style:line-break="strict" style:font-independent-line-spacing="false"> + <style:tab-stops/> + </style:paragraph-properties> + <style:text-properties style:use-window-font-color="true" style:font-name="Liberation Serif" fo:font-size="12pt" fo:language="en" fo:country="US" style:letter-kerning="true" style:font-name-asian="Noto Sans CJK SC Regular" style:font-size-asian="10.5pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Lohit Devanagari" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN"/> + </style:default-style> + <style:default-style style:family="paragraph"> + <style:paragraph-properties fo:orphans="2" fo:widows="2" fo:hyphenation-ladder-count="no-limit" style:text-autospace="ideograph-alpha" style:punctuation-wrap="hanging" style:line-break="strict" style:tab-stop-distance="0.4925in" style:writing-mode="page"/> + <style:text-properties style:use-window-font-color="true" style:font-name="Liberation Serif" fo:font-size="12pt" fo:language="en" fo:country="US" style:letter-kerning="true" style:font-name-asian="Noto Sans CJK SC Regular" style:font-size-asian="10.5pt" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Lohit Devanagari" style:font-size-complex="12pt" style:language-complex="hi" style:country-complex="IN" fo:hyphenate="false" fo:hyphenation-remain-char-count="2" fo:hyphenation-push-char-count="2"/> + </style:default-style> + <style:default-style style:family="table"> + <style:table-properties table:border-model="collapsing"/> + </style:default-style> + <style:default-style style:family="table-row"> + <style:table-row-properties fo:keep-together="auto"/> + </style:default-style> + <style:style style:name="Standard" style:family="paragraph" style:class="text"/> + <style:style style:name="Heading" style:family="paragraph" style:parent-style-name="Standard" style:next-style-name="Text_20_body" style:class="text"> + <style:paragraph-properties fo:margin-top="0.1665in" fo:margin-bottom="0.0835in" loext:contextual-spacing="false" fo:keep-with-next="always"/> + <style:text-properties style:font-name="Liberation Sans" fo:font-family="'Liberation Sans'" style:font-family-generic="swiss" style:font-pitch="variable" fo:font-size="14pt" style:font-name-asian="Noto Sans CJK SC Regular" style:font-family-asian="'Noto Sans CJK SC Regular'" style:font-family-generic-asian="system" style:font-pitch-asian="variable" style:font-size-asian="14pt" style:font-name-complex="Lohit Devanagari" style:font-family-complex="'Lohit Devanagari'" style:font-family-generic-complex="system" style:font-pitch-complex="variable" style:font-size-complex="14pt"/> + </style:style> + <style:style style:name="Text_20_body" style:display-name="Text body" style:family="paragraph" style:parent-style-name="Standard" style:class="text"> + <style:paragraph-properties fo:margin-top="0in" fo:margin-bottom="0.0972in" loext:contextual-spacing="false" fo:line-height="115%"/> + </style:style> + <style:style style:name="List" style:family="paragraph" style:parent-style-name="Text_20_body" style:class="list"> + <style:text-properties style:font-size-asian="12pt" style:font-name-complex="Lohit Devanagari1" style:font-family-complex="'Lohit Devanagari'"/> + </style:style> + <style:style style:name="Caption" style:family="paragraph" style:parent-style-name="Standard" style:class="extra"> + <style:paragraph-properties fo:margin-top="0.0835in" fo:margin-bottom="0.0835in" loext:contextual-spacing="false" text:number-lines="false" text:line-number="0"/> + <style:text-properties fo:font-size="12pt" fo:font-style="italic" style:font-size-asian="12pt" style:font-style-asian="italic" style:font-name-complex="Lohit Devanagari1" style:font-family-complex="'Lohit Devanagari'" style:font-size-complex="12pt" style:font-style-complex="italic"/> + </style:style> + <style:style style:name="Index" style:family="paragraph" style:parent-style-name="Standard" style:class="index"> + <style:paragraph-properties text:number-lines="false" text:line-number="0"/> + <style:text-properties style:font-size-asian="12pt" style:font-name-complex="Lohit Devanagari1" style:font-family-complex="'Lohit Devanagari'"/> + </style:style> + <style:style style:name="Table_20_Contents" style:display-name="Table Contents" style:family="paragraph" style:parent-style-name="Standard" style:class="extra"> + <loext:graphic-properties draw:fill="none" draw:fill-color="#729fcf"/> + <style:paragraph-properties fo:line-height="115%" fo:background-color="transparent" style:shadow="none" text:number-lines="false" text:line-number="0"/> + <style:text-properties style:font-name="Bitstream Vera Serif" fo:font-family="'Bitstream Vera Serif'" style:font-style-name="Roman" style:font-family-generic="roman" style:font-pitch="variable" fo:font-size="10pt" style:font-size-asian="10.5pt"/> + </style:style> + <style:style style:name="Header" style:family="paragraph" style:parent-style-name="Standard" style:class="extra"> + <style:paragraph-properties text:number-lines="false" text:line-number="0"> + <style:tab-stops> + <style:tab-stop style:position="3.3472in" style:type="center"/> + <style:tab-stop style:position="6.6953in" style:type="right"/> + </style:tab-stops> + </style:paragraph-properties> + </style:style> + <style:style style:name="Table_20_Heading" style:display-name="Table Heading" style:family="paragraph" style:parent-style-name="Table_20_Contents" style:class="extra"> + <style:paragraph-properties fo:text-align="center" style:justify-single-word="false" text:number-lines="false" text:line-number="0"/> + <style:text-properties fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="Preformatted_20_Text" style:display-name="Preformatted Text" style:family="paragraph" style:parent-style-name="Standard" style:class="html" style:master-page-name=""> + <loext:graphic-properties draw:fill="none" draw:fill-color="#729fcf"/> + <style:paragraph-properties fo:margin-left="0.2in" fo:margin-right="0.2in" fo:margin-top="0in" fo:margin-bottom="0in" loext:contextual-spacing="true" fo:orphans="3" fo:widows="3" fo:text-indent="0in" style:auto-text-indent="false" style:page-number="auto" fo:background-color="transparent" style:shadow="none" fo:keep-with-next="auto" style:writing-mode="page"/> + <style:text-properties style:font-name="Liberation Mono" fo:font-family="'Liberation Mono'" style:font-family-generic="modern" style:font-pitch="fixed" fo:font-size="10pt" style:font-name-asian="DejaVu Sans Mono" style:font-family-asian="'DejaVu Sans Mono'" style:font-family-generic-asian="modern" style:font-pitch-asian="fixed" style:font-size-asian="10pt" style:font-name-complex="Liberation Mono" style:font-family-complex="'Liberation Mono'" style:font-family-generic-complex="modern" style:font-pitch-complex="fixed" style:font-size-complex="10pt"/> + </style:style> + <style:style style:name="Level1_20_body" style:display-name="Level1 body" style:family="paragraph" style:parent-style-name="Text_20_body"> + <style:paragraph-properties fo:margin-top="0.1in" fo:margin-bottom="0.1in" loext:contextual-spacing="false" fo:padding="0.1in" fo:border-left="0.51pt solid #000000" fo:border-right="none" fo:border-top="none" fo:border-bottom="none" style:shadow="none" style:writing-mode="page"/> + </style:style> + <style:style style:name="Numbering_20_Symbols" style:display-name="Numbering Symbols" style:family="text"/> + <style:style style:name="Source_20_Text" style:display-name="Source Text" style:family="text"> + <style:text-properties style:font-name="Liberation Mono" fo:font-family="'Liberation Mono'" style:font-family-generic="modern" style:font-pitch="fixed" fo:font-size="10.5pt" fo:font-weight="normal" fo:background-color="#dddddd" style:font-name-asian="DejaVu Sans Mono" style:font-family-asian="'DejaVu Sans Mono'" style:font-family-generic-asian="modern" style:font-pitch-asian="fixed" style:font-size-asian="9.60000038146973pt" style:font-weight-asian="normal" style:font-name-complex="Liberation Mono" style:font-family-complex="'Liberation Mono'" style:font-family-generic-complex="modern" style:font-pitch-complex="fixed" style:font-size-complex="11pt" style:font-weight-complex="normal"/> + </style:style> + <text:outline-style style:name="Outline"> + <text:outline-level-style text:level="1" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="2" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="3" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="4" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="5" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="6" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="7" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="8" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="9" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + <text:outline-level-style text:level="10" style:num-format=""> + <style:list-level-properties text:list-level-position-and-space-mode="label-alignment"> + <style:list-level-label-alignment text:label-followed-by="listtab"/> + </style:list-level-properties> + </text:outline-level-style> + </text:outline-style> + <text:notes-configuration text:note-class="footnote" style:num-format="1" text:start-value="0" text:footnotes-position="page" text:start-numbering-at="document"/> + <text:notes-configuration text:note-class="endnote" style:num-format="i" text:start-value="0"/> + <text:linenumbering-configuration text:number-lines="false" text:offset="0.1965in" style:num-format="1" text:number-position="left" text:increment="5"/> + </office:styles> + <office:automatic-styles> + <style:style style:name="Table7" style:family="table"> + <style:table-properties style:width="6.6951in" fo:margin-top="0in" fo:margin-bottom="0in" fo:break-before="auto" fo:break-after="auto" table:align="margins" fo:background-color="#dddddd" fo:keep-with-next="auto" style:may-break-between-rows="false" style:writing-mode="page" table:border-model="collapsing"> + <style:background-image/> + </style:table-properties> + </style:style> + <style:style style:name="Table7.A" style:family="table-column"> + <style:table-column-properties style:column-width="6.6951in" style:rel-column-width="65535*"/> + </style:style> + <style:style style:name="Table7.1" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:background-color="#000000" fo:keep-together="auto"> + <style:background-image/> + </style:table-row-properties> + </style:style> + <style:style style:name="Table7.A1" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="1pt solid #ffffff" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table7.2" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table7.A2" style:family="table-cell" style:data-style-name="N10100"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="none" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table8" style:family="table"> + <style:table-properties style:width="6.6951in" fo:margin-top="0in" fo:margin-bottom="0in" fo:break-before="auto" fo:break-after="auto" table:align="margins" fo:background-color="#dddddd" fo:keep-with-next="auto" style:may-break-between-rows="false" style:writing-mode="page" table:border-model="collapsing"> + <style:background-image/> + </style:table-properties> + </style:style> + <style:style style:name="Table8.A" style:family="table-column"> + <style:table-column-properties style:column-width="6.6951in" style:rel-column-width="65535*"/> + </style:style> + <style:style style:name="Table8.1" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:background-color="#000000" fo:keep-together="auto"> + <style:background-image/> + </style:table-row-properties> + </style:style> + <style:style style:name="Table8.A1" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="1pt solid #ffffff" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table8.2" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table8.A2" style:family="table-cell" style:data-style-name="N10100"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="none" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table8.3" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table8.4" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table9" style:family="table"> + <style:table-properties style:width="6.6951in" fo:margin-top="0in" fo:margin-bottom="0in" fo:break-before="auto" fo:break-after="auto" table:align="margins" fo:background-color="#dddddd" fo:keep-with-next="auto" style:may-break-between-rows="false" style:writing-mode="page" table:border-model="collapsing"> + <style:background-image/> + </style:table-properties> + </style:style> + <style:style style:name="Table9.A" style:family="table-column"> + <style:table-column-properties style:column-width="6.6951in" style:rel-column-width="65535*"/> + </style:style> + <style:style style:name="Table9.1" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:background-color="#000000" fo:keep-together="auto"> + <style:background-image/> + </style:table-row-properties> + </style:style> + <style:style style:name="Table9.A1" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="1pt solid #ffffff" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table9.2" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table9.A2" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="none" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table9.3" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table9.4" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table9.5" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table9.6" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table9.7" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table10" style:family="table"> + <style:table-properties style:width="6.6951in" fo:margin-top="0in" fo:margin-bottom="0in" fo:break-before="auto" fo:break-after="auto" table:align="margins" fo:background-color="#dddddd" fo:keep-with-next="auto" style:may-break-between-rows="false" style:writing-mode="page" table:border-model="collapsing"> + <style:background-image/> + </style:table-properties> + </style:style> + <style:style style:name="Table10.A" style:family="table-column"> + <style:table-column-properties style:column-width="2.3736in" style:rel-column-width="3418*"/> + </style:style> + <style:style style:name="Table10.B" style:family="table-column"> + <style:table-column-properties style:column-width="4.3215in" style:rel-column-width="6223*"/> + </style:style> + <style:style style:name="Table10.1" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:background-color="#000000" fo:keep-together="auto"> + <style:background-image/> + </style:table-row-properties> + </style:style> + <style:style style:name="Table10.A1" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="1pt solid #ffffff" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table10.B1" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table10.2" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table10.A2" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="none" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table10.B2" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="1pt solid #ffffff" fo:border-top="none" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table10.3" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table10.4" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table10.5" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table10.6" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table10.7" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table10.8" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table10.9" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table11" style:family="table"> + <style:table-properties style:width="6.6951in" fo:margin-top="0in" fo:margin-bottom="0in" fo:break-before="auto" fo:break-after="auto" table:align="margins" fo:background-color="#dddddd" fo:keep-with-next="auto" style:may-break-between-rows="false" style:writing-mode="page" table:border-model="collapsing"> + <style:background-image/> + </style:table-properties> + </style:style> + <style:style style:name="Table11.A" style:family="table-column"> + <style:table-column-properties style:column-width="6.6951in" style:rel-column-width="65535*"/> + </style:style> + <style:style style:name="Table11.1" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:background-color="#000000" fo:keep-together="auto"> + <style:background-image/> + </style:table-row-properties> + </style:style> + <style:style style:name="Table11.A1" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="1pt solid #ffffff" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table11.2" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table11.A2" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="none" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table12" style:family="table"> + <style:table-properties style:width="6.6951in" fo:margin-top="0in" fo:margin-bottom="0in" fo:break-before="auto" fo:break-after="auto" table:align="margins" fo:background-color="#dddddd" fo:keep-with-next="auto" style:may-break-between-rows="false" style:writing-mode="page" table:border-model="collapsing"> + <style:background-image/> + </style:table-properties> + </style:style> + <style:style style:name="Table12.A" style:family="table-column"> + <style:table-column-properties style:column-width="6.6951in" style:rel-column-width="65535*"/> + </style:style> + <style:style style:name="Table12.1" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:background-color="#000000" fo:keep-together="auto"> + <style:background-image/> + </style:table-row-properties> + </style:style> + <style:style style:name="Table12.A1" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="1pt solid #ffffff" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table12.2" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table12.A2" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="none" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table12.3" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table13" style:family="table"> + <style:table-properties style:width="6.6951in" fo:margin-top="0in" fo:margin-bottom="0in" fo:break-before="auto" fo:break-after="auto" table:align="margins" fo:background-color="#dddddd" fo:keep-with-next="auto" style:may-break-between-rows="false" style:writing-mode="page" table:border-model="collapsing"> + <style:background-image/> + </style:table-properties> + </style:style> + <style:style style:name="Table13.A" style:family="table-column"> + <style:table-column-properties style:column-width="6.6951in" style:rel-column-width="65535*"/> + </style:style> + <style:style style:name="Table13.1" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:background-color="#000000" fo:keep-together="auto"> + <style:background-image/> + </style:table-row-properties> + </style:style> + <style:style style:name="Table13.A1" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="1pt solid #ffffff" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table13.2" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table13.A2" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="none" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table1" style:family="table"> + <style:table-properties style:width="6.6951in" fo:margin-top="0in" fo:margin-bottom="0in" fo:break-before="auto" fo:break-after="auto" table:align="margins" fo:background-color="#dddddd" fo:keep-with-next="auto" style:may-break-between-rows="false" style:writing-mode="page" table:border-model="collapsing"> + <style:background-image/> + </style:table-properties> + </style:style> + <style:style style:name="Table1.A" style:family="table-column"> + <style:table-column-properties style:column-width="6.6951in" style:rel-column-width="65535*"/> + </style:style> + <style:style style:name="Table1.1" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:background-color="#000000" fo:keep-together="auto"> + <style:background-image/> + </style:table-row-properties> + </style:style> + <style:style style:name="Table1.A1" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="1pt solid #ffffff" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table1.2" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table1.A2" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="none" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table1.3" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table2" style:family="table"> + <style:table-properties style:width="6.6951in" fo:margin-top="0in" fo:margin-bottom="0in" fo:break-before="auto" fo:break-after="auto" table:align="margins" fo:background-color="#dddddd" fo:keep-with-next="auto" style:may-break-between-rows="false" style:writing-mode="page" table:border-model="collapsing"> + <style:background-image/> + </style:table-properties> + </style:style> + <style:style style:name="Table2.A" style:family="table-column"> + <style:table-column-properties style:column-width="6.6951in" style:rel-column-width="65535*"/> + </style:style> + <style:style style:name="Table2.1" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:background-color="#000000" fo:keep-together="auto"> + <style:background-image/> + </style:table-row-properties> + </style:style> + <style:style style:name="Table2.A1" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="1pt solid #ffffff" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table2.2" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="Table2.A2" style:family="table-cell"> + <style:table-cell-properties style:vertical-align="middle" fo:background-color="transparent" fo:padding-left="0.1in" fo:padding-right="0in" fo:padding-top="0in" fo:padding-bottom="0in" fo:border-left="1pt solid #ffffff" fo:border-right="none" fo:border-top="none" fo:border-bottom="1pt solid #ffffff" style:writing-mode="lr-tb"> + <style:background-image/> + </style:table-cell-properties> + </style:style> + <style:style style:name="Table2.3" style:family="table-row"> + <style:table-row-properties style:min-row-height="0.1778in" fo:keep-together="auto"/> + </style:style> + <style:style style:name="P1" style:family="paragraph" style:parent-style-name="Standard"> + <style:text-properties officeooo:paragraph-rsid="0015544e"/> + </style:style> + <style:style style:name="P2" style:family="paragraph" style:parent-style-name="Standard"> + <style:text-properties fo:font-size="8pt" officeooo:paragraph-rsid="001483d1" style:font-size-asian="7pt" style:font-size-complex="8pt"/> + </style:style> + <style:style style:name="P3" style:family="paragraph" style:parent-style-name="Text_20_body"> + <style:paragraph-properties fo:text-align="center" style:justify-single-word="false" fo:break-before="auto" fo:break-after="auto"/> + <style:text-properties fo:font-size="18pt" fo:font-weight="bold" officeooo:paragraph-rsid="001483d1" style:font-size-asian="18pt" style:font-weight-asian="bold" style:font-size-complex="18pt" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="P4" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:paragraph-properties fo:text-align="start" style:justify-single-word="false"/> + <style:text-properties fo:color="#000000" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" style:font-name="Thorndale" fo:font-size="12pt" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="bold" officeooo:rsid="0010bcb2" officeooo:paragraph-rsid="001483d1" style:font-size-asian="12pt" style:font-style-asian="normal" style:font-weight-asian="bold" style:font-size-complex="12pt" style:font-style-complex="normal" style:font-weight-complex="bold" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="P5" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties fo:font-weight="normal" officeooo:rsid="0012b9fe" officeooo:paragraph-rsid="001483d1" style:font-weight-asian="normal" style:font-weight-complex="normal"/> + </style:style> + <style:style style:name="P6" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="P7" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties fo:font-style="italic" officeooo:rsid="0012b9fe" officeooo:paragraph-rsid="001483d1" style:font-style-asian="italic" style:font-style-complex="italic"/> + </style:style> + <style:style style:name="P8" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties fo:font-style="italic" officeooo:paragraph-rsid="001483d1" style:font-style-asian="italic" style:font-style-complex="italic"/> + </style:style> + <style:style style:name="P9" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties officeooo:rsid="0012b9fe" officeooo:paragraph-rsid="001483d1"/> + </style:style> + <style:style style:name="P10" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties officeooo:rsid="00137669" officeooo:paragraph-rsid="001483d1"/> + </style:style> + <style:style style:name="P11" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties officeooo:paragraph-rsid="001483d1"/> + </style:style> + <style:style style:name="P12" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties officeooo:paragraph-rsid="0015544e"/> + </style:style> + <style:style style:name="P13" style:family="paragraph" style:parent-style-name="Table_20_Heading"> + <style:paragraph-properties fo:text-align="start" style:justify-single-word="false"/> + <style:text-properties style:use-window-font-color="true" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" style:font-name="FreeSans" fo:font-size="12pt" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="600" officeooo:paragraph-rsid="001483d1" style:font-size-asian="12pt" style:font-style-asian="normal" style:font-weight-asian="600" style:font-size-complex="12pt" style:font-style-complex="normal" style:font-weight-complex="600" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="P14" style:family="paragraph" style:parent-style-name="Table_20_Heading"> + <style:paragraph-properties fo:text-align="start" style:justify-single-word="false"/> + <style:text-properties style:use-window-font-color="true" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" style:font-name="FreeSans" fo:font-size="12pt" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="600" officeooo:paragraph-rsid="0015544e" style:font-size-asian="12pt" style:font-style-asian="normal" style:font-weight-asian="600" style:font-size-complex="12pt" style:font-style-complex="normal" style:font-weight-complex="600" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="P15" style:family="paragraph" style:parent-style-name="Table_20_Heading"> + <style:text-properties style:font-name="DejaVu Sans" fo:font-size="18pt" style:font-size-asian="18pt" style:font-size-complex="18pt"/> + </style:style> + <style:style style:name="P16" style:family="paragraph" style:parent-style-name="Standard"> + <style:text-properties officeooo:paragraph-rsid="0015544e"/> + </style:style> + <style:style style:name="P17" style:family="paragraph" style:parent-style-name="Standard"> + <style:paragraph-properties fo:text-align="end" style:justify-single-word="false"/> + <style:text-properties fo:font-size="6pt" officeooo:rsid="00180262" officeooo:paragraph-rsid="00180262" style:font-size-asian="6pt" style:font-size-complex="6pt"/> + </style:style> + <style:style style:name="P18" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties officeooo:paragraph-rsid="0016fdaa"/> + </style:style> + <style:style style:name="P19" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:paragraph-properties fo:text-align="start" style:justify-single-word="false"/> + <style:text-properties fo:color="#000000" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" style:font-name="Thorndale" fo:font-size="12pt" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="bold" officeooo:rsid="0010bcb2" officeooo:paragraph-rsid="001483d1" style:font-size-asian="12pt" style:font-style-asian="normal" style:font-weight-asian="bold" style:font-size-complex="12pt" style:font-style-complex="normal" style:font-weight-complex="bold" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="P20" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties fo:font-weight="bold" officeooo:rsid="0016fdaa" officeooo:paragraph-rsid="0016fdaa" style:font-weight-asian="bold" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="P21" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties fo:font-weight="bold" officeooo:paragraph-rsid="0016fdaa" style:font-weight-asian="bold" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="P22" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties officeooo:rsid="0016fdaa" officeooo:paragraph-rsid="0016fdaa"/> + </style:style> + <style:style style:name="P23" style:family="paragraph" style:parent-style-name="Table_20_Contents"> + <style:text-properties officeooo:paragraph-rsid="00180262"/> + </style:style> + <style:style style:name="P24" style:family="paragraph" style:parent-style-name="Table_20_Heading"> + <style:paragraph-properties fo:text-align="start" style:justify-single-word="false"/> + <style:text-properties style:use-window-font-color="true" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" style:font-name="FreeSans" fo:font-size="12pt" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="600" officeooo:rsid="001483d1" officeooo:paragraph-rsid="001483d1" style:font-size-asian="12pt" style:font-style-asian="normal" style:font-weight-asian="600" style:font-size-complex="12pt" style:font-style-complex="normal" style:font-weight-complex="600" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="P25" style:family="paragraph" style:parent-style-name="Table_20_Heading"> + <style:paragraph-properties fo:text-align="start" style:justify-single-word="false"/> + <style:text-properties style:use-window-font-color="true" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" style:font-name="FreeSans" fo:font-size="12pt" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="600" officeooo:rsid="0015544e" officeooo:paragraph-rsid="0015544e" style:font-size-asian="12pt" style:font-style-asian="normal" style:font-weight-asian="600" style:font-size-complex="12pt" style:font-style-complex="normal" style:font-weight-complex="600" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="P26" style:family="paragraph" style:parent-style-name="Table_20_Heading"> + <style:paragraph-properties fo:text-align="start" style:justify-single-word="false"/> + <style:text-properties officeooo:paragraph-rsid="001483d1"/> + </style:style> + <style:style style:name="T1" style:family="text"> + <style:text-properties fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="T2" style:family="text"> + <style:text-properties fo:font-weight="bold" officeooo:rsid="0015544e" style:font-weight-asian="bold" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="T3" style:family="text"> + <style:text-properties fo:font-weight="bold" officeooo:rsid="00180262" style:font-weight-asian="bold" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="T4" style:family="text"> + <style:text-properties officeooo:rsid="0012b9fe"/> + </style:style> + <style:style style:name="T5" style:family="text"> + <style:text-properties fo:font-weight="normal" style:font-weight-asian="normal" style:font-weight-complex="normal"/> + </style:style> + <style:style style:name="T6" style:family="text"> + <style:text-properties fo:font-weight="normal" officeooo:rsid="001483d1" style:font-weight-asian="normal" style:font-weight-complex="normal"/> + </style:style> + <style:style style:name="T7" style:family="text"> + <style:text-properties fo:font-weight="normal" officeooo:rsid="0015544e" style:font-weight-asian="normal" style:font-weight-complex="normal"/> + </style:style> + <style:style style:name="T8" style:family="text"> + <style:text-properties fo:font-weight="normal" officeooo:rsid="0016fdaa" style:font-weight-asian="normal" style:font-weight-complex="normal"/> + </style:style> + <style:style style:name="T9" style:family="text"> + <style:text-properties officeooo:rsid="00137669"/> + </style:style> + <style:style style:name="T10" style:family="text"> + <style:text-properties officeooo:rsid="001483d1"/> + </style:style> + <style:style style:name="T11" style:family="text"> + <style:text-properties style:font-name="DejaVu Sans" officeooo:rsid="000ccaed"/> + </style:style> + <style:style style:name="T12" style:family="text"> + <style:text-properties fo:color="#000000" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="normal" officeooo:rsid="0010bcb2" style:font-style-asian="normal" style:font-weight-asian="normal" style:font-style-complex="normal" style:font-weight-complex="normal" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="T13" style:family="text"> + <style:text-properties fo:color="#000000" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="normal" officeooo:rsid="0015544e" style:font-style-asian="normal" style:font-weight-asian="normal" style:font-style-complex="normal" style:font-weight-complex="normal" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="T14" style:family="text"> + <style:text-properties fo:color="#000000" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="normal" officeooo:rsid="001483d1" style:font-style-asian="normal" style:font-weight-asian="normal" style:font-style-complex="normal" style:font-weight-complex="normal" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="T15" style:family="text"> + <style:text-properties officeooo:rsid="0015544e"/> + </style:style> + <style:style style:name="T16" style:family="text"> + <style:text-properties style:use-window-font-color="true" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" style:font-name="FreeSans" fo:font-size="12pt" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="600" style:font-size-asian="12pt" style:font-style-asian="normal" style:font-weight-asian="600" style:font-size-complex="12pt" style:font-style-complex="normal" style:font-weight-complex="600" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="T17" style:family="text"> + <style:text-properties style:use-window-font-color="true" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" style:font-name="FreeSans" fo:font-size="12pt" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="600" officeooo:rsid="001483d1" style:font-size-asian="12pt" style:font-style-asian="normal" style:font-weight-asian="600" style:font-size-complex="12pt" style:font-style-complex="normal" style:font-weight-complex="600" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="T18" style:family="text"> + <style:text-properties style:use-window-font-color="true" style:text-outline="false" style:text-line-through-style="none" style:text-line-through-type="none" style:font-name="FreeSans" fo:font-size="12pt" fo:font-style="normal" fo:text-shadow="none" style:text-underline-style="none" fo:font-weight="600" officeooo:rsid="00195e71" style:font-size-asian="12pt" style:font-style-asian="normal" style:font-weight-asian="600" style:font-size-complex="12pt" style:font-style-complex="normal" style:font-weight-complex="600" style:text-overline-style="none" style:text-overline-color="font-color"/> + </style:style> + <style:style style:name="T19" style:family="text"> + <style:text-properties style:font-name="Bitstream Vera Serif1" fo:font-weight="normal" officeooo:rsid="0015544e" style:font-weight-asian="normal" style:font-weight-complex="normal"/> + </style:style> + <style:style style:name="T20" style:family="text"> + <style:text-properties style:font-name="Bitstream Vera Serif1" fo:font-weight="bold" officeooo:rsid="0015544e" style:font-weight-asian="bold" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="T21" style:family="text"> + <style:text-properties officeooo:rsid="0016fdaa"/> + </style:style> + <style:style style:name="T22" style:family="text"> + <style:text-properties fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"/> + </style:style> + <style:style style:name="T23" style:family="text"> + <style:text-properties fo:font-style="italic" fo:font-weight="normal" officeooo:rsid="0015544e" style:font-style-asian="italic" style:font-weight-asian="normal" style:font-style-complex="italic" style:font-weight-complex="normal"/> + </style:style> + <style:style style:name="T24" style:family="text"> + <style:text-properties officeooo:rsid="00180262"/> + </style:style> + <style:style style:name="T25" style:family="text"> + <style:text-properties fo:font-style="normal" style:font-style-asian="normal" style:font-style-complex="normal"/> + </style:style> + <style:style style:name="T26" style:family="text"> + <style:text-properties fo:font-style="normal" fo:font-weight="bold" style:font-style-asian="normal" style:font-weight-asian="bold" style:font-style-complex="normal" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="T27" style:family="text"> + <style:text-properties fo:font-style="normal" fo:font-weight="bold" officeooo:rsid="00180262" style:font-style-asian="normal" style:font-weight-asian="bold" style:font-style-complex="normal" style:font-weight-complex="bold"/> + </style:style> + <style:style style:name="T28" style:family="text"> + <style:text-properties officeooo:rsid="00195e71"/> + </style:style> + <style:page-layout style:name="pm1"> + <style:page-layout-properties fo:page-width="8.2701in" fo:page-height="11.6902in" style:num-format="1" style:print-orientation="portrait" fo:margin-top="0.7874in" fo:margin-bottom="0.7874in" fo:margin-left="0.7874in" fo:margin-right="0.7874in" style:writing-mode="lr-tb" style:layout-grid-color="#c0c0c0" style:layout-grid-lines="20" style:layout-grid-base-height="0.278in" style:layout-grid-ruby-height="0.139in" style:layout-grid-mode="none" style:layout-grid-ruby-below="false" style:layout-grid-print="false" style:layout-grid-display="false" style:footnote-max-height="0in"> + <style:columns fo:column-count="1" fo:column-gap="0in"/> + <style:footnote-sep style:width="0.0071in" style:distance-before-sep="0.0398in" style:distance-after-sep="0.0398in" style:line-style="solid" style:adjustment="left" style:rel-width="25%" style:color="#000000"/> + </style:page-layout-properties> + <style:header-style/> + <style:footer-style/> + </style:page-layout> + <number:text-style style:name="N10100" number:language="en" number:country="US"> + <number:text-content/> + </number:text-style> + </office:automatic-styles> + <office:master-styles> + <style:master-page style:name="Standard" style:page-layout-name="pm1"/> + </office:master-styles> + <office:body> + <office:text> + <office:forms form:automatic-focus="false" form:apply-design-mode="false"/> + <text:sequence-decls> + <text:sequence-decl text:display-outline-level="0" text:name="Illustration"/> + <text:sequence-decl text:display-outline-level="0" text:name="Table"/> + <text:sequence-decl text:display-outline-level="0" text:name="Text"/> + <text:sequence-decl text:display-outline-level="0" text:name="Drawing"/> + </text:sequence-decls> + <table:table table:name="Table7" table:style-name="Table7"> + <table:table-column table:style-name="Table7.A"/> + <table:table-row table:style-name="Table7.1"> + <table:table-cell table:style-name="Table7.A1" office:value-type="string"> + <text:p text:style-name="P15">GNU Para<text:span text:style-name="T22">ll</text:span>el Cheat Sheet</text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table7.2"> + <table:table-cell table:style-name="Table7.A2" office:value-type="float" office:value="0"> + <text:p text:style-name="P6">GNU Parallel is a replacement for <text:span text:style-name="Source_20_Text"><text:span text:style-name="T22">xargs</text:span></text:span> and <text:span text:style-name="Source_20_Text"><text:span text:style-name="T22">for</text:span></text:span> loops. It can also split a file or a stream into blocks and pass those to commands <text:span text:style-name="T15">running in parallel.</text:span></text:p> + </table:table-cell> + </table:table-row> + </table:table> + <text:p text:style-name="P2"/> + <table:table table:name="Table8" table:style-name="Table8"> + <table:table-column table:style-name="Table8.A"/> + <table:table-row table:style-name="Table8.1"> + <table:table-cell table:style-name="Table8.A1" office:value-type="string"> + <text:p text:style-name="P13">Example<text:span text:style-name="T10">s</text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table8.2"> + <table:table-cell table:style-name="Table8.A2" office:value-type="float" office:value="0"> + <text:p text:style-name="P12"><text:span text:style-name="T1">Compress all *.html files in parallel – 2 </text:span><text:span text:style-name="T3">jobs </text:span><text:span text:style-name="T1">per CPU thread </text:span><text:span text:style-name="T3">in parallel</text:span><text:line-break/><text:span text:style-name="Source_20_Text"><text:span text:style-name="T12">parallel --</text:span></text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T13">jobs 200% </text:span></text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T12">gzip ::: *.html</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table8.3"> + <table:table-cell table:style-name="Table8.A2" office:value-type="float" office:value="0"> + <text:p text:style-name="P23"><text:span text:style-name="T1">Convert all *.wav to *.mp3 using </text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T22">lame</text:span></text:span><text:span text:style-name="T1"> – </text:span><text:span text:style-name="T3">1</text:span><text:span text:style-name="T1"> job per CPU thread in parallel </text:span><text:span text:style-name="T3">(default)</text:span><text:line-break/><text:span text:style-name="Source_20_Text"><text:span text:style-name="T12">parallel lame {} -o {.}.mp3 ::: *.wav</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table8.4"> + <table:table-cell table:style-name="Table8.A2" office:value-type="float" office:value="0"> + <text:p text:style-name="P12"><text:span text:style-name="T1">Chop bigfile into 1MB blocks and grep for the string foobar</text:span><text:line-break/><text:span text:style-name="Source_20_Text"><text:span text:style-name="T14">cat bigfile | </text:span></text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T12">parallel --pipe grep foobar</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + </table:table> + <text:p text:style-name="P2"/> + <table:table table:name="Table9" table:style-name="Table9"> + <table:table-column table:style-name="Table9.A"/> + <table:table-row table:style-name="Table9.1"> + <table:table-cell table:style-name="Table9.A1" office:value-type="string"> + <text:p text:style-name="P13">Input sources</text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table9.2"> + <table:table-cell table:style-name="Table9.A2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T5">parallel echo ::: cmd line input source</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table9.3"> + <table:table-cell table:style-name="Table9.A2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T5">cat input_from_stdin | parallel echo</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table9.4"> + <table:table-cell table:style-name="Table9.A2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T5">parallel echo ::: multiple input source</text:span></text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T8">s ::: with </text:span></text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T5">values</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table9.5"> + <table:table-cell table:style-name="Table9.A2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T5">parallel -a input_from_file echo</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table9.6"> + <table:table-cell table:style-name="Table9.A2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T5">parallel echo :::: input_from_file</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table9.7"> + <table:table-cell table:style-name="Table9.A2" office:value-type="string"> + <text:p text:style-name="P10"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T5">parallel echo :::: input_from_file ::: and command line</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + </table:table> + <text:p text:style-name="P2"/> + <table:table table:name="Table10" table:style-name="Table10"> + <table:table-column table:style-name="Table10.A"/> + <table:table-column table:style-name="Table10.B"/> + <table:table-row table:style-name="Table10.1"> + <table:table-cell table:style-name="Table10.A1" office:value-type="string"> + <text:p text:style-name="P13">Replacement string</text:p> + </table:table-cell> + <table:table-cell table:style-name="Table10.B1" office:value-type="string"> + <text:p text:style-name="P26"><text:span text:style-name="T16">Value </text:span><text:span text:style-name="T18">if input is mydir/mysubdir/myfile.myext</text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table10.2"> + <table:table-cell table:style-name="Table10.A2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text">{}</text:span></text:p> + </table:table-cell> + <table:table-cell table:style-name="Table10.B2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text">mydir/mysubdir/myfile.myext</text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table10.3"> + <table:table-cell table:style-name="Table10.A2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text">{.}</text:span></text:p> + </table:table-cell> + <table:table-cell table:style-name="Table10.B2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text">mydir/mysubdir/myfile</text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table10.4"> + <table:table-cell table:style-name="Table10.A2" office:value-type="string"> + <text:p text:style-name="P18"><text:span text:style-name="Source_20_Text">{/}, {//}, </text:span><text:span text:style-name="Source_20_Text">{/.}</text:span></text:p> + </table:table-cell> + <table:table-cell table:style-name="Table10.B2" office:value-type="string"> + <text:p text:style-name="P18"><text:span text:style-name="Source_20_Text">myfile.myext, mydir/mysubdir, </text:span><text:span text:style-name="Source_20_Text">myfile</text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table10.5"> + <table:table-cell table:style-name="Table10.A2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text">{#}</text:span></text:p> + </table:table-cell> + <table:table-cell table:style-name="Table10.B2" office:value-type="string"> + <text:p text:style-name="P8"><text:span text:style-name="T4">T</text:span>he sequence number of the job</text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table10.6"> + <table:table-cell table:style-name="Table10.A2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text">{%}</text:span></text:p> + </table:table-cell> + <table:table-cell table:style-name="Table10.B2" office:value-type="string"> + <text:p text:style-name="P8"><text:span text:style-name="T4">T</text:span>he job slot number</text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table10.7"> + <table:table-cell table:style-name="Table10.A2" office:value-type="string"> + <text:p text:style-name="P5"><text:span text:style-name="Source_20_Text">{2}</text:span></text:p> + </table:table-cell> + <table:table-cell table:style-name="Table10.B2" office:value-type="string"> + <text:p text:style-name="P7"><text:span text:style-name="T21">Value from the s</text:span>econd input source</text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table10.8"> + <table:table-cell table:style-name="Table10.A2" office:value-type="string"> + <text:p text:style-name="P5"><text:span text:style-name="Source_20_Text">{2.} </text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T21">{2/} {2//} {2/.}</text:span></text:span></text:p> + </table:table-cell> + <table:table-cell table:style-name="Table10.B2" office:value-type="string"> + <text:p text:style-name="P7">Comb<text:span text:style-name="T21">ination </text:span>of {2} and {.} <text:span text:style-name="T21">{/} {//} {/.}</text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table10.9"> + <table:table-cell table:style-name="Table10.A2" office:value-type="string"> + <text:p text:style-name="P9"><text:span text:style-name="Source_20_Text">{= perl </text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T15">expression</text:span></text:span><text:span text:style-name="Source_20_Text"> =}</text:span></text:p> + </table:table-cell> + <table:table-cell table:style-name="Table10.B2" office:value-type="string"> + <text:p text:style-name="P7">Change $_ with perl expression</text:p> + </table:table-cell> + </table:table-row> + </table:table> + <text:p text:style-name="P2"/> + <table:table table:name="Table11" table:style-name="Table11"> + <table:table-column table:style-name="Table11.A"/> + <table:table-row table:style-name="Table11.1"> + <table:table-cell table:style-name="Table11.A1" office:value-type="string"> + <text:p text:style-name="P13">Control the output – <text:span text:style-name="T10">keep the same order as the input, prepend with input value</text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table11.2"> + <table:table-cell table:style-name="Table11.A2" office:value-type="string"> + <text:p text:style-name="P18"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T10">parallel --keep-order --tag "sleep {}; echo {}" ::: 5 4 3 2 1</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + </table:table> + <text:p text:style-name="P2"/> + <table:table table:name="Table12" table:style-name="Table12"> + <table:table-column table:style-name="Table12.A"/> + <table:table-row table:style-name="Table12.1"> + <table:table-cell table:style-name="Table12.A1" office:value-type="string"> + <text:p text:style-name="P13">Control the execution</text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table12.3"> + <table:table-cell table:style-name="Table12.A2" office:value-type="string"> + <text:p text:style-name="P20">Run 2 jobs in parallel – <text:span text:style-name="T24">command is a composed command</text:span></text:p> + <text:p text:style-name="P18"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T10">parallel --jobs 2 "sleep {}; echo {}" ::: 5 4 3 2 1</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table12.3"> + <table:table-cell table:style-name="Table12.A2" office:value-type="string"> + <text:p text:style-name="P20">See what will be run</text:p> + <text:p text:style-name="P22"><text:span text:style-name="Source_20_Text">parallel --dryrun echo </text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T28">{2} {1} </text:span></text:span><text:span text:style-name="Source_20_Text">::: </text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T28">bird flower fish ::: </text:span></text:span><text:span text:style-name="Source_20_Text">Red Green Blue</text:span></text:p> + </table:table-cell> + </table:table-row> + </table:table> + <text:p text:style-name="P2"/> + <table:table table:name="Table13" table:style-name="Table13"> + <table:table-column table:style-name="Table13.A"/> + <table:table-row table:style-name="Table13.1"> + <table:table-cell table:style-name="Table13.A1" office:value-type="string"> + <text:p text:style-name="P24">Remote execution</text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table13.2"> + <table:table-cell table:style-name="Table13.A2" office:value-type="string"> + <text:p text:style-name="P18"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T10">parallel -S server1 -S server2 "hostname; echo {}" ::: foo bar </text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + </table:table> + <text:p text:style-name="P2"/> + <table:table table:name="Table1" table:style-name="Table1"> + <table:table-column table:style-name="Table1.A"/> + <table:table-row table:style-name="Table1.1"> + <table:table-cell table:style-name="Table1.A1" office:value-type="string"> + <text:p text:style-name="P24">Pipe mode</text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table1.2"> + <table:table-cell table:style-name="Table1.A2" office:value-type="string"> + <text:p text:style-name="P11"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T10">cat bigfile | parallel --pipe wc -l</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table1.3"> + <table:table-cell table:style-name="Table1.A2" office:value-type="string"> + <text:p text:style-name="P4"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T20">Chop bigfile into one block per CPU thread and </text:span></text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T23">grep</text:span></text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T20"> for foobar</text:span></text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T5"><text:line-break/>parallel -a bigfile --pipepart --block -1 grep foobar</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + </table:table> + <text:p text:style-name="P2"/> + <table:table table:name="Table2" table:style-name="Table2"> + <table:table-column table:style-name="Table2.A"/> + <table:table-row table:style-name="Table2.1"> + <table:table-cell table:style-name="Table2.A1" office:value-type="string"> + <text:p text:style-name="P25">Read more – <text:span text:style-name="T21">Your command line will love you for it</text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table2.3"> + <table:table-cell table:style-name="Table2.A2" office:value-type="string"> + <text:p text:style-name="P21"><text:span text:style-name="Source_20_Text"><text:span text:style-name="T21">parallel --help; <text:s/></text:span></text:span><text:span text:style-name="Source_20_Text">man parallel; <text:s/></text:span><text:span text:style-name="Source_20_Text"><text:span text:style-name="T21">man parallel_tutorial; <text:s/>www.pi.dk/1</text:span></text:span></text:p> + </table:table-cell> + </table:table-row> + <table:table-row table:style-name="Table2.3"> + <table:table-cell table:style-name="Table2.A2" office:value-type="string"> + <text:p text:style-name="P6"><text:span text:style-name="T15">GNU Parallel 2018 </text:span>https://doi.org/10.5281/zenodo.1146014</text:p> + </table:table-cell> + </table:table-row> + </table:table> + <text:p text:style-name="P17">(CC-By-SA) 2019-03-<text:span text:style-name="T28">11</text:span> Ole Tange</text:p> + <text:p text:style-name="P1"/> + </office:text> + </office:body> +</office:document>
\ No newline at end of file diff --git a/src/parallel_design.pod b/src/parallel_design.pod new file mode 100644 index 0000000..85aee12 --- /dev/null +++ b/src/parallel_design.pod @@ -0,0 +1,1477 @@ +#!/usr/bin/perl -w + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=encoding utf8 + + +=head1 Design of GNU Parallel + +This document describes design decisions made in the development of +GNU B<parallel> and the reasoning behind them. It will give an +overview of why some of the code looks the way it does, and will help +new maintainers understand the code better. + + +=head2 One file program + +GNU B<parallel> is a Perl script in a single file. It is object +oriented, but contrary to normal Perl scripts each class is not in its +own file. This is due to user experience: The goal is that in a pinch +the user will be able to get GNU B<parallel> working simply by copying +a single file: No need to mess around with environment variables like +PERL5LIB. + + +=head2 Choice of programming language + +GNU B<parallel> is designed to be able to run on old systems. That +means that it cannot depend on a compiler being installed - and +especially not a compiler for a language that is younger than 20 years +old. + +The goal is that you can use GNU B<parallel> on any system, even if +you are not allowed to install additional software. + +Of all the systems I have experienced, I have yet to see a system that +had GCC installed that did not have Perl. The same goes for Rust, Go, +Haskell, and other younger languages. I have, however, seen systems +with Perl without any of the mentioned compilers. + +Most modern systems also have either Python2 or Python3 installed, but +you still cannot be certain which version, and since Python2 cannot +run under Python3, Python is not an option. + +Perl has the added benefit that implementing the {= perlexpr =} +replacement string was fairly easy. + +The primary drawback is that Perl is slow. So there is an overhead of +3-10 ms/job and 1 ms/MB output (and even more if you use B<--tag>). + + +=head2 Old Perl style + +GNU B<parallel> uses some old, deprecated constructs. This is due to a +goal of being able to run on old installations. Currently the target +is CentOS 3.9 and Perl 5.8.0. + + +=head2 Scalability up and down + +The smallest system GNU B<parallel> is tested on is a 32 MB ASUS +WL500gP. The largest is a 2 TB 128-core machine. It scales up to +around 100 machines - depending on the duration of each job. + + +=head2 Exponentially back off + +GNU B<parallel> busy waits. This is because the reason why a job is +not started may be due to load average (when using B<--load>), and +thus it will not make sense to just wait for a job to finish. Instead +the load average must be rechecked regularly. Load average is not the +only reason: B<--timeout> has a similar problem. + +To not burn up too much CPU GNU B<parallel> sleeps exponentially +longer and longer if nothing happens, maxing out at 1 second. + + +=head2 Shell compatibility + +It is a goal to have GNU B<parallel> work equally well in any +shell. However, in practice GNU B<parallel> is being developed in +B<bash> and thus testing in other shells is limited to reported bugs. + +When an incompatibility is found there is often not an easy fix: +Fixing the problem in B<csh> often breaks it in B<bash>. In these +cases the fix is often to use a small Perl script and call that. + + +=head2 env_parallel + +B<env_parallel> is a dummy shell script that will run if +B<env_parallel> is not an alias or a function and tell the user how to +activate the alias/function for the supported shells. + +The alias or function will copy the current environment and run the +command with GNU B<parallel> in the copy of the environment. + +The problem is that you cannot access all of the current environment +inside Perl. E.g. aliases, functions and unexported shell variables. + +The idea is therefore to take the environment and put it in +B<$PARALLEL_ENV> which GNU B<parallel> prepends to every command. + +The only way to have access to the environment is directly from the +shell, so the program must be written in a shell script that will be +sourced and there has to deal with the dialect of the relevant shell. + + +=head3 env_parallel.* + +These are the files that implements the alias or function +B<env_parallel> for a given shell. It could be argued that these +should be put in some obscure place under /usr/lib, but by putting +them in your path it becomes trivial to find the path to them and +B<source> them: + + source `which env_parallel.foo` + +The beauty is that they can be put anywhere in the path without the +user having to know the location. So if the user's path includes +/afs/bin/i386_fc5 or /usr/pkg/parallel/bin or +/usr/local/parallel/20161222/sunos5.6/bin the files can be put in the +dir that makes most sense for the sysadmin. + + +=head3 env_parallel.bash / env_parallel.sh / env_parallel.ash / +env_parallel.dash / env_parallel.zsh / env_parallel.ksh / +env_parallel.mksh + +B<env_parallel.(bash|sh|ash|dash|ksh|mksh|zsh)> defines the function +B<env_parallel>. It uses B<alias> and B<typeset> to dump the +configuration (with a few exceptions) into B<$PARALLEL_ENV> before +running GNU B<parallel>. + +After GNU B<parallel> is finished, B<$PARALLEL_ENV> is deleted. + + +=head3 env_parallel.csh + +B<env_parallel.csh> has two purposes: If B<env_parallel> is not an +alias: make it into an alias that sets B<$PARALLEL> with arguments +and calls B<env_parallel.csh>. + +If B<env_parallel> is an alias, then B<env_parallel.csh> uses +B<$PARALLEL> as the arguments for GNU B<parallel>. + +It exports the environment by writing a variable definition to a file +for each variable. The definitions of aliases are appended to this +file. Finally the file is put into B<$PARALLEL_ENV>. + +GNU B<parallel> is then run and B<$PARALLEL_ENV> is deleted. + + +=head3 env_parallel.fish + +First all functions definitions are generated using a loop and +B<functions>. + +Dumping the scalar variable definitions is harder. + +B<fish> can represent non-printable characters in (at least) 2 +ways. To avoid problems all scalars are converted to \XX quoting. + +Then commands to generate the definitions are made and separated by +NUL. + +This is then piped into a Perl script that quotes all values. List +elements will be appended using two spaces. + +Finally \n is converted into \1 because B<fish> variables cannot +contain \n. GNU B<parallel> will later convert all \1 from +B<$PARALLEL_ENV> into \n. + +This is then all saved in B<$PARALLEL_ENV>. + +GNU B<parallel> is called, and B<$PARALLEL_ENV> is deleted. + + +=head2 parset (supported in sh, ash, dash, bash, zsh, ksh, mksh) + +B<parset> is a shell function. This is the reason why B<parset> can +set variables: It runs in the shell which is calling it. + +It is also the reason why B<parset> does not work, when data is piped +into it: B<... | parset ...> makes B<parset> start in a subshell, and +any changes in environment can therefore not make it back to the +calling shell. + + +=head2 Job slots + +The easiest way to explain what GNU B<parallel> does is to assume that +there are a number of job slots, and when a slot becomes available a +job from the queue will be run in that slot. But originally GNU +B<parallel> did not model job slots in the code. Job slots have been +added to make it possible to use B<{%}> as a replacement string. + +While the job sequence number can be computed in advance, the job slot +can only be computed the moment a slot becomes available. So it has +been implemented as a stack with lazy evaluation: Draw one from an +empty stack and the stack is extended by one. When a job is done, push +the available job slot back on the stack. + +This implementation also means that if you re-run the same jobs, you +cannot assume jobs will get the same slots. And if you use remote +executions, you cannot assume that a given job slot will remain on the +same remote server. This goes double since number of job slots can be +adjusted on the fly (by giving B<--jobs> a file name). + + +=head2 Rsync protocol version + +B<rsync> 3.1.x uses protocol 31 which is unsupported by version +2.5.7. That means that you cannot push a file to a remote system using +B<rsync> protocol 31, if the remote system uses 2.5.7. B<rsync> does +not automatically downgrade to protocol 30. + +GNU B<parallel> does not require protocol 31, so if the B<rsync> +version is >= 3.1.0 then B<--protocol 30> is added to force newer +B<rsync>s to talk to version 2.5.7. + + +=head2 Compression + +GNU B<parallel> buffers output in temporary files. B<--compress> +compresses the buffered data. This is a bit tricky because there +should be no files to clean up if GNU B<parallel> is killed by a power +outage. + +GNU B<parallel> first selects a compression program. If the user has +not selected one, the first of these that is in $PATH is used: B<pzstd +lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 +lzma xz clzip>. They are sorted by speed on a 128 core machine. + +Schematically the setup is as follows: + + command started by parallel | compress > tmpfile + cattail tmpfile | uncompress | parallel which reads the output + +The setup is duplicated for both standard output (stdout) and standard +error (stderr). + +GNU B<parallel> pipes output from the command run into the compression +program which saves to a tmpfile. GNU B<parallel> records the pid of +the compress program. At the same time a small Perl script (called +B<cattail> above) is started: It basically does B<cat> followed by +B<tail -f>, but it also removes the tmpfile as soon as the first byte +is read, and it continuously checks if the pid of the compression +program is dead. If the compress program is dead, B<cattail> reads the +rest of tmpfile and exits. + +As most compression programs write out a header when they start, the +tmpfile in practice is removed by B<cattail> after around 40 ms. + +More detailed it works like this: + + bash ( command ) | + sh ( emptywrapper ( bash ( compound compress ) ) >tmpfile ) + cattail ( rm tmpfile; compound decompress ) < tmpfile + +This complex setup is to make sure compress program is only started if +there is input. This means each job will cause 8 processes to run. If +combined with B<--keep-order> these processes will run until the job +has been printed. + + +=head2 Wrapping + +The command given by the user can be wrapped in multiple +templates. Templates can be wrapped in other templates. + + + +=over 15 + +=item B<$COMMAND> + +the command to run. + + +=item B<$INPUT> + +the input to run. + + +=item B<$SHELL> + +the shell that started GNU Parallel. + + +=item B<$SSHLOGIN> + +the sshlogin. + + +=item B<$WORKDIR> + +the working dir. + + +=item B<$FILE> + +the file to read parts from. + + +=item B<$STARTPOS> + +the first byte position to read from B<$FILE>. + + +=item B<$LENGTH> + +the number of bytes to read from B<$FILE>. + + +=item --shellquote + +echo I<Double quoted $INPUT> + + +=item --nice I<pri> + +Remote: See B<The remote system wrapper>. + +Local: B<setpriority(0,0,$nice)> + +=item --cat + + cat > {}; $COMMAND {}; + perl -e '$bash = shift; + $csh = shift; + for(@ARGV) { unlink;rmdir; } + if($bash =~ s/h//) { exit $bash; } + exit $csh;' "$?h" "$status" {}; + +{} is set to B<$PARALLEL_TMP> which is a tmpfile. The Perl script +saves the exit value, unlinks the tmpfile, and returns the exit value +- no matter if the shell is B<bash>/B<ksh>/B<zsh> (using $?) or +B<*csh>/B<fish> (using $status). + +=item --fifo + + perl -e '($s,$c,$f) = @ARGV; + # mkfifo $PARALLEL_TMP + system "mkfifo", $f; + # spawn $shell -c $command & + $pid = fork || exec $s, "-c", $c; + open($o,">",$f) || die $!; + # cat > $PARALLEL_TMP + while(sysread(STDIN,$buf,131072)){ + syswrite $o, $buf; + } + close $o; + # waitpid to get the exit code from $command + waitpid $pid,0; + # Cleanup + unlink $f; + exit $?/256;' $SHELL -c $COMMAND $PARALLEL_TMP + +This is an elaborate way of: mkfifo {}; run B<$COMMAND> in the +background using B<$SHELL>; copying STDIN to {}; waiting for background +to complete; remove {} and exit with the exit code from B<$COMMAND>. + +It is made this way to be compatible with B<*csh>/B<fish>. + +=item --pipepart + + + < $FILE perl -e 'while(@ARGV) { + sysseek(STDIN,shift,0) || die; + $left = shift; + while($read = + sysread(STDIN,$buf, + ($left > 131072 ? 131072 : $left))){ + $left -= $read; + syswrite(STDOUT,$buf); + } + }' $STARTPOS $LENGTH + +This will read B<$LENGTH> bytes from B<$FILE> starting at B<$STARTPOS> +and send it to STDOUT. + +=item --sshlogin $SSHLOGIN + + ssh $SSHLOGIN "$COMMAND" + +=item --transfer + + ssh $SSHLOGIN mkdir -p ./$WORKDIR; + rsync --protocol 30 -rlDzR \ + -essh ./{} $SSHLOGIN:./$WORKDIR; + ssh $SSHLOGIN "$COMMAND" + +Read about B<--protocol 30> in the section B<Rsync protocol version>. + +=item --transferfile I<file> + +<<todo>> + +=item --basefile + +<<todo>> + +=item --return I<file> + + $COMMAND; _EXIT_status=$?; mkdir -p $WORKDIR; + rsync --protocol 30 \ + --rsync-path=cd\ ./$WORKDIR\;\ rsync \ + -rlDzR -essh $SSHLOGIN:./$FILE ./$WORKDIR; + exit $_EXIT_status; + +The B<--rsync-path=cd ...> is needed because old versions of B<rsync> +do not support B<--no-implied-dirs>. + +The B<$_EXIT_status> trick is to postpone the exit value. This makes it +incompatible with B<*csh> and should be fixed in the future. Maybe a +wrapping 'sh -c' is enough? + +=item --cleanup + +$RETURN is the wrapper from B<--return> + + $COMMAND; _EXIT_status=$?; $RETURN; + ssh $SSHLOGIN \(rm\ -f\ ./$WORKDIR/{}\;\ + rmdir\ ./$WORKDIR\ \>\&/dev/null\;\); + exit $_EXIT_status; + +B<$_EXIT_status>: see B<--return> above. + + +=item --pipe + + perl -e 'if(sysread(STDIN, $buf, 1)) { + open($fh, "|-", "@ARGV") || die; + syswrite($fh, $buf); + # Align up to 128k block + if($read = sysread(STDIN, $buf, 131071)) { + syswrite($fh, $buf); + } + while($read = sysread(STDIN, $buf, 131072)) { + syswrite($fh, $buf); + } + close $fh; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + }' $SHELL -c $COMMAND + +This small wrapper makes sure that B<$COMMAND> will never be run if +there is no data. + +=item --tmux + +<<TODO Fixup with '-quoting>> +mkfifo /tmp/tmx3cMEV && + sh -c 'tmux -S /tmp/tmsaKpv1 new-session -s p334310 -d "sleep .2" >/dev/null 2>&1'; +tmux -S /tmp/tmsaKpv1 new-window -t p334310 -n wc\ 10 \(wc\ 10\)\;\ perl\ -e\ \'while\(\$t++\<3\)\{\ print\ \$ARGV\[0\],\"\\n\"\ \}\'\ \$\?h/\$status\ \>\>\ /tmp/tmx3cMEV\&echo\ wc\\\ 10\;\ echo\ \Job\ finished\ at:\ \`date\`\;sleep\ 10; +exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' /tmp/tmx3cMEV + + +mkfifo I<tmpfile.tmx>; +tmux -S <tmpfile.tms> new-session -s pI<PID> -d 'sleep .2' >&/dev/null; +tmux -S <tmpfile.tms> new-window -t pI<PID> -n <<shell quoted input>> \(<<shell quoted input>>\)\;\ perl\ -e\ \'while\(\$t++\<3\)\{\ print\ \$ARGV\[0\],\"\\n\"\ \}\'\ \$\?h/\$status\ \>\>\ I<tmpfile.tmx>\&echo\ <<shell double quoted input>>\;echo\ \Job\ finished\ at:\ \`date\`\;sleep\ 10; +exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' I<tmpfile.tmx> + +First a FIFO is made (.tmx). It is used for communicating exit +value. Next a new tmux session is made. This may fail if there is +already a session, so the output is ignored. If all job slots finish +at the same time, then B<tmux> will close the session. A temporary +socket is made (.tms) to avoid a race condition in B<tmux>. It is +cleaned up when GNU B<parallel> finishes. + +The input is used as the name of the windows in B<tmux>. When the job +inside B<tmux> finishes, the exit value is printed to the FIFO (.tmx). +This FIFO is opened by B<perl> outside B<tmux>, and B<perl> then +removes the FIFO. B<Perl> blocks until the first value is read from +the FIFO, and this value is used as exit value. + +To make it compatible with B<csh> and B<bash> the exit value is +printed as: $?h/$status and this is parsed by B<perl>. + +There is a bug that makes it necessary to print the exit value 3 +times. + +Another bug in B<tmux> requires the length of the tmux title and +command to not have certain limits. When inside these limits, 75 '\ ' +are added to the title to force it to be outside the limits. + +You can map the bad limits using: + + perl -e 'sub r { int(rand(shift)).($_[0] && "\t".r(@_)) } print map { r(@ARGV)."\n" } 1..10000' 1600 1500 90 | + perl -ane '$F[0]+$F[1]+$F[2] < 2037 and print ' | + parallel --colsep '\t' --tagstring '{1}\t{2}\t{3}' tmux -S /tmp/p{%}-'{=3 $_="O"x$_ =}' \ + new-session -d -n '{=1 $_="O"x$_ =}' true'\ {=2 $_="O"x$_ =};echo $?;rm -f /tmp/p{%}-O*' + + perl -e 'sub r { int(rand(shift)).($_[0] && "\t".r(@_)) } print map { r(@ARGV)."\n" } 1..10000' 17000 17000 90 | + parallel --colsep '\t' --tagstring '{1}\t{2}\t{3}' \ + tmux -S /tmp/p{%}-'{=3 $_="O"x$_ =}' new-session -d -n '{=1 $_="O"x$_ =}' true'\ {=2 $_="O"x$_ =};echo $?;rm /tmp/p{%}-O*' + > value.csv 2>/dev/null + + R -e 'a<-read.table("value.csv");X11();plot(a[,1],a[,2],col=a[,4]+5,cex=0.1);Sys.sleep(1000)' + +For B<tmux 1.8> 17000 can be lowered to 2100. + +The interesting areas are title 0..1000 with (title + whole command) +in 996..1127 and 9331..9636. + +=back + +The ordering of the wrapping is important: + +=over 5 + +=item * + +$PARALLEL_ENV which is set in env_parallel.* must be prepended to the +command first, as the command may contain exported variables or +functions. + +=item * + +B<--nice>/B<--cat>/B<--fifo> should be done on the remote machine + +=item * + +B<--pipepart>/B<--pipe> should be done on the local machine inside B<--tmux> + +=back + + +=head2 Convenience options --nice --basefile --transfer --return +--cleanup --tmux --group --compress --cat --fifo --workdir --tag +--tagstring + +These are all convenience options that make it easier to do a +task. But more importantly: They are tested to work on corner cases, +too. Take B<--nice> as an example: + + nice parallel command ... + +will work just fine. But when run remotely, you need to move the nice +command so it is being run on the server: + + parallel -S server nice command ... + +And this will again work just fine, as long as you are running a +single command. When you are running a composed command you need nice +to apply to the whole command, and it gets harder still: + + parallel -S server -q nice bash -c 'command1 ...; cmd2 | cmd3' + +It is not impossible, but by using B<--nice> GNU B<parallel> will do +the right thing for you. Similarly when transferring files: It starts +to get hard when the file names contain space, :, `, *, or other +special characters. + +To run the commands in a B<tmux> session you basically just need to +quote the command. For simple commands that is easy, but when commands +contain special characters, it gets much harder to get right. + +B<--compress> not only compresses standard output (stdout) but also +standard error (stderr); and it does so into files, that are open but +deleted, so a crash will not leave these files around. + +B<--cat> and B<--fifo> are easy to do by hand, until you want to clean +up the tmpfile and keep the exit code of the command. + +The real killer comes when you try to combine several of these: Doing +that correctly for all corner cases is next to impossible to do by +hand. + +=head2 --shard + +The simple way to implement sharding would be to: + +=over 5 + +=item 1 + +start n jobs, + +=item 2 + +split each line into columns, + +=item 3 + +select the data from the relevant column + +=item 4 + +compute a hash value from the data + +=item 5 + +take the modulo n of the hash value + +=item 6 + +pass the full line to the jobslot that has the computed value + +=back + +Unfortunately Perl is rather slow at computing the hash value (and +somewhat slow at splitting into columns). + +One solution is to use a compiled language for the splitting and +hashing, but that would go against the design criteria of not +depending on a compiler. + +Luckily those tasks can be parallelized. So GNU B<parallel> starts n +sharders that do step 2-6, and passes blocks of 100k to each of those +in a round robin manner. To make sure these sharders compute the hash +the same way, $PERL_HASH_SEED is set to the same value for all sharders. + +Running n sharders poses a new problem: Instead of having n outputs +(one for each computed value) you now have n outputs for each of the n +values, so in total n*n outputs; and you need to merge these n*n +outputs together into n outputs. + +This can be done by simply running 'parallel -j0 --lb cat ::: +outputs_for_one_value', but that is rather inefficient, as it spawns a +process for each file. Instead the core code from 'parcat' is run, +which is also a bit faster. + +All the sharders and parcats communicate through named pipes that are +unlinked as soon as they are opened. + + +=head2 Shell shock + +The shell shock bug in B<bash> did not affect GNU B<parallel>, but the +solutions did. B<bash> first introduced functions in variables named: +I<BASH_FUNC_myfunc()> and later changed that to +I<BASH_FUNC_myfunc%%>. When transferring functions GNU B<parallel> +reads off the function and changes that into a function definition, +which is copied to the remote system and executed before the actual +command is executed. Therefore GNU B<parallel> needs to know how to +read the function. + +From version 20150122 GNU B<parallel> tries both the ()-version and +the %%-version, and the function definition works on both pre- and +post-shell shock versions of B<bash>. + + +=head2 The remote system wrapper + +The remote system wrapper does some initialization before starting the +command on the remote system. + +=head3 Make quoting unnecessary by hex encoding everything + +When you run B<ssh server foo> then B<foo> has to be quoted once: + + ssh server "echo foo; echo bar" + +If you run B<ssh server1 ssh server2 foo> then B<foo> has to be quoted +twice: + + ssh server1 ssh server2 \'"echo foo; echo bar"\' + +GNU B<parallel> avoids this by packing everyting into hex values and +running a command that does not need quoting: + + perl -X -e GNU_Parallel_worker,eval+pack+q/H10000000/,join+q//,@ARGV + +This command reads hex from the command line and converts that to +bytes that are then eval'ed as a Perl expression. + +The string B<GNU_Parallel_worker> is not needed. It is simply there to +let the user know, that this process is GNU B<parallel> working. + +=head3 Ctrl-C and standard error (stderr) + +If the user presses Ctrl-C the user expects jobs to stop. This works +out of the box if the jobs are run locally. Unfortunately it is not so +simple if the jobs are run remotely. + +If remote jobs are run in a tty using B<ssh -tt>, then Ctrl-C works, +but all output to standard error (stderr) is sent to standard output +(stdout). This is not what the user expects. + +If remote jobs are run without a tty using B<ssh> (without B<-tt>), +then output to standard error (stderr) is kept on stderr, but Ctrl-C +does not kill remote jobs. This is not what the user expects. + +So what is needed is a way to have both. It seems the reason why +Ctrl-C does not kill the remote jobs is because the shell does not +propagate the hang-up signal from B<sshd>. But when B<sshd> dies, the +parent of the login shell becomes B<init> (process id 1). So by +exec'ing a Perl wrapper to monitor the parent pid and kill the child +if the parent pid becomes 1, then Ctrl-C works and stderr is kept on +stderr. + +Ctrl-C does, however, kill the ssh connection, so any output from +a remote dying process is lost. + +To be able to kill all (grand)*children a new process group is +started. + + +=head3 --nice + +B<nice>ing the remote process is done by B<setpriority(0,0,$nice)>. A +few old systems do not implement this and B<--nice> is unsupported on +those. + + +=head3 Setting $PARALLEL_TMP + +B<$PARALLEL_TMP> is used by B<--fifo> and B<--cat> and must point to a +non-exitent file in B<$TMPDIR>. This file name is computed on the +remote system. + + +=head3 The wrapper + +The wrapper looks like this: + + $shell = $PARALLEL_SHELL || $SHELL; + $tmpdir = $TMPDIR || $PARALLEL_REMOTE_TMPDIR; + $nice = $opt::nice; + $termseq = $opt::termseq; + + # Check that $tmpdir is writable + -w $tmpdir || + die("$tmpdir is not writable.". + " Set PARALLEL_REMOTE_TMPDIR"); + # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR + do { + $ENV{PARALLEL_TMP} = $tmpdir."/par". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $ENV{PARALLEL_TMP}); + # Set $script to a non-existent file name in $TMPDIR + do { + $script = $tmpdir."/par". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $script); + # Create a script from the hex code + # that removes itself and runs the commands + open($fh,">",$script) || die; + # ' needed due to rc-shell + print($fh("rm \'$script\'\n",$bashfunc.$cmd)); + close $fh; + my $parent = getppid; + my $done = 0; + $SIG{CHLD} = sub { $done = 1; }; + $pid = fork; + unless($pid) { + # Make own process group to be able to kill HUP it later + eval { setpgrp }; + # Set nice value + eval { setpriority(0,0,$nice) }; + # Run the script + exec($shell,$script); + die("exec failed: $!"); + } + while((not $done) and (getppid == $parent)) { + # Parent pid is not changed, so sshd is alive + # Exponential sleep up to 1 sec + $s = $s < 1 ? 0.001 + $s * 1.03 : $s; + select(undef, undef, undef, $s); + } + if(not $done) { + # sshd is dead: User pressed Ctrl-C + # Kill as per --termseq + my @term_seq = split/,/,$termseq; + if(not @term_seq) { + @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); + } + while(@term_seq && kill(0,-$pid)) { + kill(shift @term_seq, -$pid); + select(undef, undef, undef, (shift @term_seq)/1000); + } + } + wait; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + + +=head2 Transferring of variables and functions + +Transferring of variables and functions given by B<--env> is done by +running a Perl script remotely that calls the actual command. The Perl +script sets B<$ENV{>I<variable>B<}> to the correct value before +exec'ing a shell that runs the function definition followed by the +actual command. + +The function B<env_parallel> copies the full current environment into +the environment variable B<PARALLEL_ENV>. This variable is picked up +by GNU B<parallel> and used to create the Perl script mentioned above. + + +=head2 Base64 encoded bzip2 + +B<csh> limits words of commands to 1024 chars. This is often too little +when GNU B<parallel> encodes environment variables and wraps the +command with different templates. All of these are combined and quoted +into one single word, which often is longer than 1024 chars. + +When the line to run is > 1000 chars, GNU B<parallel> therefore +encodes the line to run. The encoding B<bzip2>s the line to run, +converts this to base64, splits the base64 into 1000 char blocks (so +B<csh> does not fail), and prepends it with this Perl script that +decodes, decompresses and B<eval>s the line. + + @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); + eval "@GNU_Parallel"; + + $SIG{CHLD}="IGNORE"; + # Search for bzip2. Not found => use default path + my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2"; + # $in = stdin on $zip, $out = stdout from $zip + my($in, $out,$eval); + open3($in,$out,">&STDERR",$zip,"-dc"); + if(my $perlpid = fork) { + close $in; + $eval = join "", <$out>; + close $out; + } else { + close $out; + # Pipe decoded base64 into 'bzip2 -dc' + print $in (decode_base64(join"",@ARGV)); + close $in; + exit; + } + wait; + eval $eval; + +Perl and B<bzip2> must be installed on the remote system, but a small +test showed that B<bzip2> is installed by default on all platforms +that runs GNU B<parallel>, so this is not a big problem. + +The added bonus of this is that much bigger environments can now be +transferred as they will be below B<bash>'s limit of 131072 chars. + + +=head2 Which shell to use + +Different shells behave differently. A command that works in B<tcsh> +may not work in B<bash>. It is therefore important that the correct +shell is used when GNU B<parallel> executes commands. + +GNU B<parallel> tries hard to use the right shell. If GNU B<parallel> +is called from B<tcsh> it will use B<tcsh>. If it is called from +B<bash> it will use B<bash>. It does this by looking at the +(grand)*parent process: If the (grand)*parent process is a shell, use +this shell; otherwise look at the parent of this (grand)*parent. If +none of the (grand)*parents are shells, then $SHELL is used. + +This will do the right thing if called from: + +=over 2 + +=item * + +an interactive shell + +=item * + +a shell script + +=item * + +a Perl script in `` or using B<system> if called as a single string. + +=back + +While these cover most cases, there are situations where it will fail: + +=over 2 + +=item * + +When run using B<exec>. + +=item * + +When run as the last command using B<-c> from another shell (because +some shells use B<exec>): + + zsh% bash -c "parallel 'echo {} is not run in bash; \ + set | grep BASH_VERSION' ::: This" + +You can work around that by appending '&& true': + + zsh% bash -c "parallel 'echo {} is run in bash; \ + set | grep BASH_VERSION' ::: This && true" + +=item * + +When run in a Perl script using B<system> with parallel as the first +string: + + #!/usr/bin/perl + + system("parallel",'setenv a {}; echo $a',":::",2); + +Here it depends on which shell is used to call the Perl script. If the +Perl script is called from B<tcsh> it will work just fine, but if it +is called from B<bash> it will fail, because the command B<setenv> is +not known to B<bash>. + +=back + +If GNU B<parallel> guesses wrong in these situation, set the shell using +B<$PARALLEL_SHELL>. + + +=head2 Always running commands in a shell + +If the command is a simple command with no redirection and setting of +variables, the command I<could> be run without spawning a +shell. E.g. this simple B<grep> matching either 'ls ' or ' wc E<gt>E<gt> c': + + parallel "grep -E 'ls | wc >> c' {}" ::: foo + +could be run as: + + system("grep","-E","ls | wc >> c","foo"); + +However, as soon as the command is a bit more complex a shell I<must> +be spawned: + + parallel "grep -E 'ls | wc >> c' {} | wc >> c" ::: foo + parallel "LANG=C grep -E 'ls | wc >> c' {}" ::: foo + +It is impossible to tell how B<| wc E<gt>E<gt> c> should be +interpreted without parsing the string (is the B<|> a pipe in shell or +an alternation in a B<grep> regexp? Is B<LANG=C> a command in B<csh> +or setting a variable in B<bash>? Is B<E<gt>E<gt>> redirection or part +of a regexp?). + +On top of this, wrapper scripts will often require a shell to be +spawned. + +The downside is that you need to quote special shell chars twice: + + parallel echo '*' ::: This will expand the asterisk + parallel echo "'*'" ::: This will not + parallel "echo '*'" ::: This will not + parallel echo '\*' ::: This will not + parallel echo \''*'\' ::: This will not + parallel -q echo '*' ::: This will not + +B<-q> will quote all special chars, thus redirection will not work: +this prints '* > out.1' and I<does not> save '*' into the file out.1: + + parallel -q echo "*" ">" out.{} ::: 1 + +GNU B<parallel> tries to live up to Principle Of Least Astonishment +(POLA), and the requirement of using B<-q> is hard to understand, when +you do not see the whole picture. + + +=head2 Quoting + +Quoting depends on the shell. For most shells '-quoting is used for +strings containing special characters. + +For B<tcsh>/B<csh> newline is quoted as \ followed by newline. Other +special characters are also \-quoted. + +For B<rc> everything is quoted using '. + + +=head2 --pipepart vs. --pipe + +While B<--pipe> and B<--pipepart> look much the same to the user, they are +implemented very differently. + +With B<--pipe> GNU B<parallel> reads the blocks from standard input +(stdin), which is then given to the command on standard input (stdin); +so every block is being processed by GNU B<parallel> itself. This is +the reason why B<--pipe> maxes out at around 500 MB/sec. + +B<--pipepart>, on the other hand, first identifies at which byte +positions blocks start and how long they are. It does that by seeking +into the file by the size of a block and then reading until it meets +end of a block. The seeking explains why GNU B<parallel> does not know +the line number and why B<-L/-l> and B<-N> do not work. + +With a reasonable block and file size this seeking is more than 1000 +time faster than reading the full file. The byte positions are then +given to a small script that reads from position X to Y and sends +output to standard output (stdout). This small script is prepended to +the command and the full command is executed just as if GNU +B<parallel> had been in its normal mode. The script looks like this: + + < file perl -e 'while(@ARGV) { + sysseek(STDIN,shift,0) || die; + $left = shift; + while($read = sysread(STDIN,$buf, + ($left > 131072 ? 131072 : $left))){ + $left -= $read; syswrite(STDOUT,$buf); + } + }' startbyte length_in_bytes + +It delivers 1 GB/s per core. + +Instead of the script B<dd> was tried, but many versions of B<dd> do +not support reading from one byte to another and might cause partial +data. See this for a surprising example: + + yes | dd bs=1024k count=10 | wc + + +=head2 --block-size adjustment + +Every time GNU B<parallel> detects a record bigger than +B<--block-size> it increases the block size by 30%. A small +B<--block-size> gives very poor performance; by exponentially +increasing the block size performance will not suffer. + +GNU B<parallel> will waste CPU power if B<--block-size> does not +contain a full record, because it tries to find a full record and will +fail to do so. The recommendation is therefore to use a +B<--block-size> > 2 records, so you always get at least one full +record when you read one block. + +If you use B<-N> then B<--block-size> should be big enough to contain +N+1 records. + + +=head2 Automatic --block-size computation + +With B<--pipepart> GNU B<parallel> can compute the B<--block-size> +automatically. A B<--block-size> of B<-1> will use a block size so +that each jobslot will receive approximately 1 block. B<--block -2> +will pass 2 blocks to each jobslot and B<-I<n>> will pass I<n> blocks +to each jobslot. + +This can be done because B<--pipepart> reads from files, and we can +compute the total size of the input. + + +=head2 --jobs and --onall + +When running the same commands on many servers what should B<--jobs> +signify? Is it the number of servers to run on in parallel? Is it the +number of jobs run in parallel on each server? + +GNU B<parallel> lets B<--jobs> represent the number of servers to run +on in parallel. This is to make it possible to run a sequence of +commands (that cannot be parallelized) on each server, but run the +same sequence on multiple servers. + + +=head2 --shuf + +When using B<--shuf> to shuffle the jobs, all jobs are read, then they +are shuffled, and finally executed. When using SQL this makes the +B<--sqlmaster> be the part that shuffles the jobs. The B<--sqlworker>s +simply executes according to Seq number. + + +=head2 --csv + +B<--pipepart> is incompatible with B<--csv> because you can have +records like: + + a,b,c + a," + a,b,c + a,b,c + a,b,c + ",c + a,b,c + +Here the second record contains a multi-line field that looks like +records. Since B<--pipepart> does not read then whole file when +searching for record endings, it may start reading in this multi-line +field, which would be wrong. + + +=head2 Buffering on disk + +GNU B<parallel> buffers output, because if output is not buffered you +have to be ridiculously careful on sizes to avoid mixing of outputs +(see excellent example on https://catern.com/posts/pipes.html). + +GNU B<parallel> buffers on disk in $TMPDIR using files, that are +removed as soon as they are created, but which are kept open. So even +if GNU B<parallel> is killed by a power outage, there will be no files +to clean up afterwards. Another advantage is that the file system is +aware that these files will be lost in case of a crash, so it does +not need to sync them to disk. + +It gives the odd situation that a disk can be fully used, but there +are no visible files on it. + + +=head3 Partly buffering in memory + +When using output formats SQL and CSV then GNU Parallel has to read +the whole output into memory. When run normally it will only read the +output from a single job. But when using B<--linebuffer> every line +printed will also be buffered in memory - for all jobs currently +running. + +If memory is tight, then do not use the output format SQL/CSV with +B<--linebuffer>. + + +=head3 Comparing to buffering in memory + +B<gargs> is a parallelizing tool that buffers in memory. It is +therefore a useful way of comparing the advantages and disadvantages +of buffering in memory to buffering on disk. + +On an system with 6 GB RAM free and 6 GB free swap these were tested +with different sizes: + + echo /dev/zero | gargs "head -c $size {}" >/dev/null + echo /dev/zero | parallel "head -c $size {}" >/dev/null + +The results are here: + + JobRuntime Command + 0.344 parallel_test 1M + 0.362 parallel_test 10M + 0.640 parallel_test 100M + 9.818 parallel_test 1000M + 23.888 parallel_test 2000M + 30.217 parallel_test 2500M + 30.963 parallel_test 2750M + 34.648 parallel_test 3000M + 43.302 parallel_test 4000M + 55.167 parallel_test 5000M + 67.493 parallel_test 6000M + 178.654 parallel_test 7000M + 204.138 parallel_test 8000M + 230.052 parallel_test 9000M + 255.639 parallel_test 10000M + 757.981 parallel_test 30000M + 0.537 gargs_test 1M + 0.292 gargs_test 10M + 0.398 gargs_test 100M + 3.456 gargs_test 1000M + 8.577 gargs_test 2000M + 22.705 gargs_test 2500M + 123.076 gargs_test 2750M + 89.866 gargs_test 3000M + 291.798 gargs_test 4000M + +GNU B<parallel> is pretty much limited by the speed of the disk: Up to +6 GB data is written to disk but cached, so reading is fast. Above 6 +GB data are both written and read from disk. When the 30000MB job is +running, the disk system is slow, but usable: If you are not using the +disk, you almost do not feel it. + +B<gargs> has a speed advantage up until 2500M where it hits a +wall. Then the system starts swapping like crazy and is completely +unusable. At 5000M it goes out of memory. + +You can make GNU B<parallel> behave similar to B<gargs> if you point +$TMPDIR to a tmpfs-filesystem: It will be faster for small outputs, +but may kill your system for larger outputs and cause you to lose +output. + + +=head2 Disk full + +GNU B<parallel> buffers on disk. If the disk is full, data may be +lost. To check if the disk is full GNU B<parallel> writes a 8193 byte +file every second. If this file is written successfully, it is removed +immediately. If it is not written successfully, the disk is full. The +size 8193 was chosen because 8192 gave wrong result on some file +systems, whereas 8193 did the correct thing on all tested filesystems. + + +=head2 Memory usage + +Normally GNU B<parallel> will use around 17 MB RAM constantly - no +matter how many jobs or how much output there is. There are a few +things that cause the memory usage to rise: + +=over 3 + +=item * + +Multiple input sources. GNU B<parallel> reads an input source only +once. This is by design, as an input source can be a stream +(e.g. FIFO, pipe, standard input (stdin)) which cannot be rewound and +read again. When reading a single input source, the memory is freed as +soon as the job is done - thus keeping the memory usage constant. + +But when reading multiple input sources GNU B<parallel> keeps the +already read values for generating all combinations with other input +sources. + +=item * + +Computing the number of jobs. B<--bar>, B<--eta>, and B<--halt xx%> +use B<total_jobs()> to compute the total number of jobs. It does this +by generating the data structures for all jobs. All these job data +structures will be stored in memory and take up around 400 bytes/job. + +=item * + +Buffering a full line. B<--linebuffer> will read a full line per +running job. A very long output line (say 1 GB without \n) will +increase RAM usage temporarily: From when the beginning of the line is +read till the line is printed. + +=item * + +Buffering the full output of a single job. This happens when using +B<--results *.csv/*.tsv> or B<--sql*>. Here GNU B<parallel> will read +the whole output of a single job and save it as csv/tsv or SQL. + +=back + + +=head2 Argument separators ::: :::: :::+ ::::+ + +The argument separator B<:::> was chosen because I have never seen +B<:::> used in any command. The natural choice B<--> would be a bad +idea since it is not unlikely that the template command will contain +B<-->. I have seen B<::> used in programming languanges to separate +classes, and I did not want the user to be confused that the separator +had anything to do with classes. + +B<:::> also makes a visual separation, which is good if there are +multiple B<:::>. + +When B<:::> was chosen, B<::::> came as a fairly natural extension. + +Linking input sources meant having to decide for some way to indicate +linking of B<:::> and B<::::>. B<:::+> and B<::::+> were chosen, so +that they were similar to B<:::> and B<::::>. + +In 2022 I realized that B<///> would have been an even better choice, +because you cannot have an file named B<///> whereas you I<can> have a +file named B<:::>. + + +=head2 Perl replacement strings, {= =}, and --rpl + +The shorthands for replacement strings make a command look more +cryptic. Different users will need different replacement +strings. Instead of inventing more shorthands you get more +flexible replacement strings if they can be programmed by the user. + +The language Perl was chosen because GNU B<parallel> is written in +Perl and it was easy and reasonably fast to run the code given by the +user. + +If a user needs the same programmed replacement string again and +again, the user may want to make his own shorthand for it. This is +what B<--rpl> is for. It works so well, that even GNU B<parallel>'s +own shorthands are implemented using B<--rpl>. + +In Perl code the bigrams B<{=> and B<=}> rarely exist. They look like a +matching pair and can be entered on all keyboards. This made them good +candidates for enclosing the Perl expression in the replacement +strings. Another candidate ,, and ,, was rejected because they do not +look like a matching pair. B<--parens> was made, so that the users can +still use ,, and ,, if they like: B<--parens ,,,,> + +Internally, however, the B<{=> and B<=}> are replaced by \257< and +\257>. This is to make it simpler to make regular expressions. You +only need to look one character ahead, and never have to look behind. + + +=head2 Test suite + +GNU B<parallel> uses its own testing framework. This is mostly due to +historical reasons. It deals reasonably well with tests that are +dependent on how long a given test runs (e.g. more than 10 secs is a +pass, but less is a fail). It parallelizes most tests, but it is easy +to force a test to run as the single test (which may be important for +timing issues). It deals reasonably well with tests that fail +intermittently. It detects which tests failed and pushes these to the +top, so when running the test suite again, the tests that failed most +recently are run first. + +If GNU B<parallel> should adopt a real testing framework then those +elements would be important. + +Since many tests are dependent on which hardware it is running on, +these tests break when run on a different hardware than what the test +was written for. + +When most bugs are fixed a test is added, so this bug will not +reappear. It is, however, sometimes hard to create the environment in +which the bug shows up - especially if the bug only shows up +sometimes. One of the harder problems was to make a machine start +swapping without forcing it to its knees. + + +=head2 Median run time + +Using a percentage for B<--timeout> causes GNU B<parallel> to compute +the median run time of a job. The median is a better indicator of the +expected run time than average, because there will often be outliers +taking way longer than the normal run time. + +To avoid keeping all run times in memory, an implementation of +remedian was made (Rousseeuw et al). + + +=head2 Error messages and warnings + +Error messages like: ERROR, Not found, and 42 are not very +helpful. GNU B<parallel> strives to inform the user: + +=over 2 + +=item * + +What went wrong? + +=item * + +Why did it go wrong? + +=item * + +What can be done about it? + +=back + +Unfortunately it is not always possible to predict the root cause of +the error. + + +=head2 Determine number of CPUs + +CPUs is an ambiguous term. It can mean the number of socket filled +(i.e. the number of physical chips). It can mean the number of cores +(i.e. the number of physical compute cores). It can mean the number of +hyperthreaded cores (i.e. the number of virtual cores - with some of +them possibly being hyperthreaded). + +On ark.intel.com Intel uses the terms I<cores> and I<threads> for +number of physical cores and the number of hyperthreaded cores +respectively. + +GNU B<parallel> uses uses I<CPUs> as the number of compute units and +the terms I<sockets>, I<cores>, and I<threads> to specify how the +number of compute units is calculated. + + +=head2 Computation of load + +Contrary to the obvious B<--load> does not use load average. This is +due to load average rising too slowly. Instead it uses B<ps> to list +the number of threads in running or blocked state (state D, O or +R). This gives an instant load. + +As remote calculation of load can be slow, a process is spawned to run +B<ps> and put the result in a file, which is then used next time. + + +=head2 Killing jobs + +GNU B<parallel> kills jobs. It can be due to B<--memfree>, B<--halt>, +or when GNU B<parallel> meets a condition from which it cannot +recover. Every job is started as its own process group. This way any +(grand)*children will get killed, too. The process group is killed +with the specification mentioned in B<--termseq>. + + +=head2 SQL interface + +GNU B<parallel> uses the DBURL from GNU B<sql> to give database +software, username, password, host, port, database, and table in a +single string. + +The DBURL must point to a table name. The table will be dropped and +created. The reason for not reusing an existing table is that the user +may have added more input sources which would require more columns in +the table. By prepending '+' to the DBURL the table will not be +dropped. + +The table columns are similar to joblog with the addition of B<V1> +.. B<Vn> which are values from the input sources, and Stdout and +Stderr which are the output from standard output and standard error, +respectively. + +The Signal column has been renamed to _Signal due to Signal being a +reserved word in MySQL. + + +=head2 Logo + +The logo is inspired by the Cafe Wall illusion. The font is DejaVu +Sans. + +=head2 Citation notice + +Funding a free software project is hard. GNU B<parallel> is no +exception. On top of that it seems the less visible a project is, the +harder it is to get funding. And the nature of GNU B<parallel> is that +it will never be seen by "the guy with the checkbook", but only by the +people doing the actual work. + +This problem has been covered by others - though no solution has been +found: https://www.slideshare.net/NadiaEghbal/consider-the-maintainer +https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/ + +Before implementing the citation notice it was discussed with the +users: +https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html + +Having to spend 10 seconds on running B<parallel --citation> once is +no doubt not an ideal solution, but no one has so far come up with an +ideal solution - neither for funding GNU B<parallel> nor other free +software. + +If you believe you have the perfect solution, you should try it out, +and if it works, you should post it on the email list. Ideas that will +cost work and which have not been tested are, however, unlikely to be +prioritized. + +Running B<parallel --citation> one single time takes less than 10 +seconds, and will silence the citation notice for future runs. This is +comparable to graphical tools where you have to click a checkbox +saying "Do not show this again". But if that is too much trouble for +you, why not use one of the alternatives instead? See a list in: +B<man parallel_alternatives>. + +As the request for citation is not a legal requirement this is +acceptable under GPLv3 and cleared with Richard M. Stallman +himself. Thus it does not fall under this: +https://www.gnu.org/licenses/gpl-faq.en.html#RequireCitation + + +=head1 Ideas for new design + +=head2 Multiple processes working together + +Open3 is slow. Printing is slow. It would be good if they did not tie +up resources, but were run in separate threads. + + +=head2 --rrs on remote using a perl wrapper + +... | perl -pe '$/=$recend$recstart;BEGIN{ if(substr($_) eq $recstart) substr($_)="" } eof and substr($_) eq $recend) substr($_)="" + +It ought to be possible to write a filter that removed rec sep on the +fly instead of inside GNU B<parallel>. This could then use more cpus. + +Will that require 2x record size memory? + +Will that require 2x block size memory? + + +=head1 Historical decisions + +These decisions were relevant for earlier versions of GNU B<parallel>, +but not the current version. They are kept here as historical record. + + +=head2 --tollef + +You can read about the history of GNU B<parallel> on +https://www.gnu.org/software/parallel/history.html + +B<--tollef> was included to make GNU B<parallel> switch compatible +with the parallel from moreutils (which is made by Tollef Fog +Heen). This was done so that users of that parallel easily could port +their use to GNU B<parallel>: Simply set B<PARALLEL="--tollef"> and +that would be it. + +But several distributions chose to make B<--tollef> global (by putting +it into /etc/parallel/config) without making the users aware of this, +and that caused much confusion when people tried out the examples from +GNU B<parallel>'s man page and these did not work. The users became +frustrated because the distribution did not make it clear to them that +it has made B<--tollef> global. + +So to lessen the frustration and the resulting support, B<--tollef> +was obsoleted 20130222 and removed one year later. + + +=cut diff --git a/src/parallel_examples.pod b/src/parallel_examples.pod new file mode 100644 index 0000000..e518276 --- /dev/null +++ b/src/parallel_examples.pod @@ -0,0 +1,1994 @@ +#!/usr/bin/perl -w + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=encoding utf8 + +=head1 GNU PARALLEL EXAMPLES + +=head2 EXAMPLE: Working as xargs -n1. Argument appending + +GNU B<parallel> can work similar to B<xargs -n1>. + +To compress all html files using B<gzip> run: + + find . -name '*.html' | parallel gzip --best + +If the file names may contain a newline use B<-0>. Substitute FOO BAR with +FUBAR in all files in this dir and subdirs: + + find . -type f -print0 | \ + parallel -q0 perl -i -pe 's/FOO BAR/FUBAR/g' + +Note B<-q> is needed because of the space in 'FOO BAR'. + + +=head2 EXAMPLE: Simple network scanner + +B<prips> can generate IP-addresses from CIDR notation. With GNU +B<parallel> you can build a simple network scanner to see which +addresses respond to B<ping>: + + prips 130.229.16.0/20 | \ + parallel --timeout 2 -j0 \ + 'ping -c 1 {} >/dev/null && echo {}' 2>/dev/null + + +=head2 EXAMPLE: Reading arguments from command line + +GNU B<parallel> can take the arguments from command line instead of +stdin (standard input). To compress all html files in the current dir +using B<gzip> run: + + parallel gzip --best ::: *.html + +To convert *.wav to *.mp3 using LAME running one process per CPU run: + + parallel lame {} -o {.}.mp3 ::: *.wav + + +=head2 EXAMPLE: Inserting multiple arguments + +When moving a lot of files like this: B<mv *.log destdir> you will +sometimes get the error: + + bash: /bin/mv: Argument list too long + +because there are too many files. You can instead do: + + ls | grep -E '\.log$' | parallel mv {} destdir + +This will run B<mv> for each file. It can be done faster if B<mv> gets +as many arguments that will fit on the line: + + ls | grep -E '\.log$' | parallel -m mv {} destdir + +In many shells you can also use B<printf>: + + printf '%s\0' *.log | parallel -0 -m mv {} destdir + + +=head2 EXAMPLE: Context replace + +To remove the files I<pict0000.jpg> .. I<pict9999.jpg> you could do: + + seq -w 0 9999 | parallel rm pict{}.jpg + +You could also do: + + seq -w 0 9999 | perl -pe 's/(.*)/pict$1.jpg/' | parallel -m rm + +The first will run B<rm> 10000 times, while the last will only run +B<rm> as many times needed to keep the command line length short +enough to avoid B<Argument list too long> (it typically runs 1-2 times). + +You could also run: + + seq -w 0 9999 | parallel -X rm pict{}.jpg + +This will also only run B<rm> as many times needed to keep the command +line length short enough. + + +=head2 EXAMPLE: Compute intensive jobs and substitution + +If ImageMagick is installed this will generate a thumbnail of a jpg +file: + + convert -geometry 120 foo.jpg thumb_foo.jpg + +This will run with number-of-cpus jobs in parallel for all jpg files +in a directory: + + ls *.jpg | parallel convert -geometry 120 {} thumb_{} + +To do it recursively use B<find>: + + find . -name '*.jpg' | \ + parallel convert -geometry 120 {} {}_thumb.jpg + +Notice how the argument has to start with B<{}> as B<{}> will include path +(e.g. running B<convert -geometry 120 ./foo/bar.jpg +thumb_./foo/bar.jpg> would clearly be wrong). The command will +generate files like ./foo/bar.jpg_thumb.jpg. + +Use B<{.}> to avoid the extra .jpg in the file name. This command will +make files like ./foo/bar_thumb.jpg: + + find . -name '*.jpg' | \ + parallel convert -geometry 120 {} {.}_thumb.jpg + + +=head2 EXAMPLE: Substitution and redirection + +This will generate an uncompressed version of .gz-files next to the .gz-file: + + parallel zcat {} ">"{.} ::: *.gz + +Quoting of > is necessary to postpone the redirection. Another +solution is to quote the whole command: + + parallel "zcat {} >{.}" ::: *.gz + +Other special shell characters (such as * ; $ > < | >> <<) also need +to be put in quotes, as they may otherwise be interpreted by the shell +and not given to GNU B<parallel>. + + +=head2 EXAMPLE: Composed commands + +A job can consist of several commands. This will print the number of +files in each directory: + + ls | parallel 'echo -n {}" "; ls {}|wc -l' + +To put the output in a file called <name>.dir: + + ls | parallel '(echo -n {}" "; ls {}|wc -l) >{}.dir' + +Even small shell scripts can be run by GNU B<parallel>: + + find . | parallel 'a={}; name=${a##*/};' \ + 'upper=$(echo "$name" | tr "[:lower:]" "[:upper:]");'\ + 'echo "$name - $upper"' + + ls | parallel 'mv {} "$(echo {} | tr "[:upper:]" "[:lower:]")"' + +Given a list of URLs, list all URLs that fail to download. Print the +line number and the URL. + + cat urlfile | parallel "wget {} 2>/dev/null || grep -n {} urlfile" + +Create a mirror directory with the same filenames except all files and +symlinks are empty files. + + cp -rs /the/source/dir mirror_dir + find mirror_dir -type l | parallel -m rm {} '&&' touch {} + +Find the files in a list that do not exist + + cat file_list | parallel 'if [ ! -e {} ] ; then echo {}; fi' + + +=head2 EXAMPLE: Composed command with perl replacement string + +You have a bunch of file. You want them sorted into dirs. The dir of +each file should be named the first letter of the file name. + + parallel 'mkdir -p {=s/(.).*/$1/=}; mv {} {=s/(.).*/$1/=}' ::: * + + +=head2 EXAMPLE: Composed command with multiple input sources + +You have a dir with files named as 24 hours in 5 minute intervals: +00:00, 00:05, 00:10 .. 23:55. You want to find the files missing: + + parallel [ -f {1}:{2} ] "||" echo {1}:{2} does not exist \ + ::: {00..23} ::: {00..55..5} + + +=head2 EXAMPLE: Calling Bash functions + +If the composed command is longer than a line, it becomes hard to +read. In Bash you can use functions. Just remember to B<export -f> the +function. + + doit() { + echo Doing it for $1 + sleep 2 + echo Done with $1 + } + export -f doit + parallel doit ::: 1 2 3 + + doubleit() { + echo Doing it for $1 $2 + sleep 2 + echo Done with $1 $2 + } + export -f doubleit + parallel doubleit ::: 1 2 3 ::: a b + +To do this on remote servers you need to transfer the function using +B<--env>: + + parallel --env doit -S server doit ::: 1 2 3 + parallel --env doubleit -S server doubleit ::: 1 2 3 ::: a b + +If your environment (aliases, variables, and functions) is small you +can copy the full environment without having to +B<export -f> anything. See B<env_parallel>. + + +=head2 EXAMPLE: Function tester + +To test a program with different parameters: + + tester() { + if (eval "$@") >&/dev/null; then + perl -e 'printf "\033[30;102m[ OK ]\033[0m @ARGV\n"' "$@" + else + perl -e 'printf "\033[30;101m[FAIL]\033[0m @ARGV\n"' "$@" + fi + } + export -f tester + parallel tester my_program ::: arg1 arg2 + parallel tester exit ::: 1 0 2 0 + +If B<my_program> fails a red FAIL will be printed followed by the failing +command; otherwise a green OK will be printed followed by the command. + + +=head2 EXAMPLE: Continously show the latest line of output + +It can be useful to monitor the output of running jobs. + +This shows the most recent output line until a job finishes. After +which the output of the job is printed in full: + + parallel '{} | tee >(cat >&3)' ::: 'command 1' 'command 2' \ + 3> >(perl -ne '$|=1;chomp;printf"%.'$COLUMNS's\r",$_." "x100') + + +=head2 EXAMPLE: Log rotate + +Log rotation renames a logfile to an extension with a higher number: +log.1 becomes log.2, log.2 becomes log.3, and so on. The oldest log is +removed. To avoid overwriting files the process starts backwards from +the high number to the low number. This will keep 10 old versions of +the log: + + seq 9 -1 1 | parallel -j1 mv log.{} log.'{= $_++ =}' + mv log log.1 + + +=head2 EXAMPLE: Removing file extension when processing files + +When processing files removing the file extension using B<{.}> is +often useful. + +Create a directory for each zip-file and unzip it in that dir: + + parallel 'mkdir {.}; cd {.}; unzip ../{}' ::: *.zip + +Recompress all .gz files in current directory using B<bzip2> running 1 +job per CPU in parallel: + + parallel "zcat {} | bzip2 >{.}.bz2 && rm {}" ::: *.gz + +Convert all WAV files to MP3 using LAME: + + find sounddir -type f -name '*.wav' | parallel lame {} -o {.}.mp3 + +Put all converted in the same directory: + + find sounddir -type f -name '*.wav' | \ + parallel lame {} -o mydir/{/.}.mp3 + + +=head2 EXAMPLE: Removing strings from the argument + +If you have directory with tar.gz files and want these extracted in +the corresponding dir (e.g foo.tar.gz will be extracted in the dir +foo) you can do: + + parallel --plus 'mkdir {..}; tar -C {..} -xf {}' ::: *.tar.gz + +If you want to remove a different ending, you can use {%string}: + + parallel --plus echo {%_demo} ::: mycode_demo keep_demo_here + +You can also remove a starting string with {#string} + + parallel --plus echo {#demo_} ::: demo_mycode keep_demo_here + +To remove a string anywhere you can use regular expressions with +{/regexp/replacement} and leave the replacement empty: + + parallel --plus echo {/demo_/} ::: demo_mycode remove_demo_here + + +=head2 EXAMPLE: Download 24 images for each of the past 30 days + +Let us assume a website stores images like: + + https://www.example.com/path/to/YYYYMMDD_##.jpg + +where YYYYMMDD is the date and ## is the number 01-24. This will +download images for the past 30 days: + + getit() { + date=$(date -d "today -$1 days" +%Y%m%d) + num=$2 + echo wget https://www.example.com/path/to/${date}_${num}.jpg + } + export -f getit + + parallel getit ::: $(seq 30) ::: $(seq -w 24) + +B<$(date -d "today -$1 days" +%Y%m%d)> will give the dates in +YYYYMMDD with B<$1> days subtracted. + + +=head2 EXAMPLE: Download world map from NASA + +NASA provides tiles to download on earthdata.nasa.gov. Download tiles +for Blue Marble world map and create a 10240x20480 map. + + base=https://map1a.vis.earthdata.nasa.gov/wmts-geo/wmts.cgi + service="SERVICE=WMTS&REQUEST=GetTile&VERSION=1.0.0" + layer="LAYER=BlueMarble_ShadedRelief_Bathymetry" + set="STYLE=&TILEMATRIXSET=EPSG4326_500m&TILEMATRIX=5" + tile="TILEROW={1}&TILECOL={2}" + format="FORMAT=image%2Fjpeg" + url="$base?$service&$layer&$set&$tile&$format" + + parallel -j0 -q wget "$url" -O {1}_{2}.jpg ::: {0..19} ::: {0..39} + parallel eval convert +append {}_{0..39}.jpg line{}.jpg ::: {0..19} + convert -append line{0..19}.jpg world.jpg + + +=head2 EXAMPLE: Download Apollo-11 images from NASA using jq + +Search NASA using their API to get JSON for images related to 'apollo +11' and has 'moon landing' in the description. + +The search query returns JSON containing URLs to JSON containing +collections of pictures. One of the pictures in each of these +collection is I<large>. + +B<wget> is used to get the JSON for the search query. B<jq> is then +used to extract the URLs of the collections. B<parallel> then calls +B<wget> to get each collection, which is passed to B<jq> to extract +the URLs of all images. B<grep> filters out the I<large> images, and +B<parallel> finally uses B<wget> to fetch the images. + + base="https://images-api.nasa.gov/search" + q="q=apollo 11" + description="description=moon landing" + media_type="media_type=image" + wget -O - "$base?$q&$description&$media_type" | + jq -r .collection.items[].href | + parallel wget -O - | + jq -r .[] | + grep large | + parallel wget + + +=head2 EXAMPLE: Download video playlist in parallel + +B<youtube-dl> is an excellent tool to download videos. It can, +however, not download videos in parallel. This takes a playlist and +downloads 10 videos in parallel. + + url='youtu.be/watch?v=0wOf2Fgi3DE&list=UU_cznB5YZZmvAmeq7Y3EriQ' + export url + youtube-dl --flat-playlist "https://$url" | + parallel --tagstring {#} --lb -j10 \ + youtube-dl --playlist-start {#} --playlist-end {#} '"https://$url"' + + +=head2 EXAMPLE: Prepend last modified date (ISO8601) to file name + + parallel mv {} '{= $a=pQ($_); $b=$_;' \ + '$_=qx{date -r "$a" +%FT%T}; chomp; $_="$_ $b" =}' ::: * + +B<{=> and B<=}> mark a perl expression. B<pQ> perl-quotes the +string. B<date +%FT%T> is the date in ISO8601 with time. + +=head2 EXAMPLE: Save output in ISO8601 dirs + +Save output from B<ps aux> every second into dirs named +yyyy-mm-ddThh:mm:ss+zz:zz. + + seq 1000 | parallel -N0 -j1 --delay 1 \ + --results '{= $_=`date -Isec`; chomp=}/' ps aux + + +=head2 EXAMPLE: Digital clock with "blinking" : + +The : in a digital clock blinks. To make every other line have a ':' +and the rest a ' ' a perl expression is used to look at the 3rd input +source. If the value modulo 2 is 1: Use ":" otherwise use " ": + + parallel -k echo {1}'{=3 $_=$_%2?":":" "=}'{2}{3} \ + ::: {0..12} ::: {0..5} ::: {0..9} + + +=head2 EXAMPLE: Aggregating content of files + +This: + + parallel --header : echo x{X}y{Y}z{Z} \> x{X}y{Y}z{Z} \ + ::: X {1..5} ::: Y {01..10} ::: Z {1..5} + +will generate the files x1y01z1 .. x5y10z5. If you want to aggregate +the output grouping on x and z you can do this: + + parallel eval 'cat {=s/y01/y*/=} > {=s/y01//=}' ::: *y01* + +For all values of x and z it runs commands like: + + cat x1y*z1 > x1z1 + +So you end up with x1z1 .. x5z5 each containing the content of all +values of y. + + +=head2 EXAMPLE: Breadth first parallel web crawler/mirrorer + +This script below will crawl and mirror a URL in parallel. It +downloads first pages that are 1 click down, then 2 clicks down, then +3; instead of the normal depth first, where the first link link on +each page is fetched first. + +Run like this: + + PARALLEL=-j100 ./parallel-crawl http://gatt.org.yeslab.org/ + +Remove the B<wget> part if you only want a web crawler. + +It works by fetching a page from a list of URLs and looking for links +in that page that are within the same starting URL and that have not +already been seen. These links are added to a new queue. When all the +pages from the list is done, the new queue is moved to the list of +URLs and the process is started over until no unseen links are found. + + #!/bin/bash + + # E.g. http://gatt.org.yeslab.org/ + URL=$1 + # Stay inside the start dir + BASEURL=$(echo $URL | perl -pe 's:#.*::; s:(//.*/)[^/]*:$1:') + URLLIST=$(mktemp urllist.XXXX) + URLLIST2=$(mktemp urllist.XXXX) + SEEN=$(mktemp seen.XXXX) + + # Spider to get the URLs + echo $URL >$URLLIST + cp $URLLIST $SEEN + + while [ -s $URLLIST ] ; do + cat $URLLIST | + parallel lynx -listonly -image_links -dump {} \; \ + wget -qm -l1 -Q1 {} \; echo Spidered: {} \>\&2 | + perl -ne 's/#.*//; s/\s+\d+.\s(\S+)$/$1/ and + do { $seen{$1}++ or print }' | + grep -F $BASEURL | + grep -v -x -F -f $SEEN | tee -a $SEEN > $URLLIST2 + mv $URLLIST2 $URLLIST + done + + rm -f $URLLIST $URLLIST2 $SEEN + + +=head2 EXAMPLE: Process files from a tar file while unpacking + +If the files to be processed are in a tar file then unpacking one file +and processing it immediately may be faster than first unpacking all +files. + + tar xvf foo.tgz | perl -ne 'print $l;$l=$_;END{print $l}' | \ + parallel echo + +The Perl one-liner is needed to make sure the file is complete before +handing it to GNU B<parallel>. + + +=head2 EXAMPLE: Rewriting a for-loop and a while-read-loop + +for-loops like this: + + (for x in `cat list` ; do + do_something $x + done) | process_output + +and while-read-loops like this: + + cat list | (while read x ; do + do_something $x + done) | process_output + +can be written like this: + + cat list | parallel do_something | process_output + +For example: Find which host name in a list has IP address 1.2.3 4: + + cat hosts.txt | parallel -P 100 host | grep 1.2.3.4 + +If the processing requires more steps the for-loop like this: + + (for x in `cat list` ; do + no_extension=${x%.*}; + do_step1 $x scale $no_extension.jpg + do_step2 <$x $no_extension + done) | process_output + +and while-loops like this: + + cat list | (while read x ; do + no_extension=${x%.*}; + do_step1 $x scale $no_extension.jpg + do_step2 <$x $no_extension + done) | process_output + +can be written like this: + + cat list | parallel "do_step1 {} scale {.}.jpg ; do_step2 <{} {.}" |\ + process_output + +If the body of the loop is bigger, it improves readability to use a function: + + (for x in `cat list` ; do + do_something $x + [... 100 lines that do something with $x ...] + done) | process_output + + cat list | (while read x ; do + do_something $x + [... 100 lines that do something with $x ...] + done) | process_output + +can both be rewritten as: + + doit() { + x=$1 + do_something $x + [... 100 lines that do something with $x ...] + } + export -f doit + cat list | parallel doit + +=head2 EXAMPLE: Rewriting nested for-loops + +Nested for-loops like this: + + (for x in `cat xlist` ; do + for y in `cat ylist` ; do + do_something $x $y + done + done) | process_output + +can be written like this: + + parallel do_something {1} {2} :::: xlist ylist | process_output + +Nested for-loops like this: + + (for colour in red green blue ; do + for size in S M L XL XXL ; do + echo $colour $size + done + done) | sort + +can be written like this: + + parallel echo {1} {2} ::: red green blue ::: S M L XL XXL | sort + + +=head2 EXAMPLE: Finding the lowest difference between files + +B<diff> is good for finding differences in text files. B<diff | wc -l> +gives an indication of the size of the difference. To find the +differences between all files in the current dir do: + + parallel --tag 'diff {1} {2} | wc -l' ::: * ::: * | sort -nk3 + +This way it is possible to see if some files are closer to other +files. + + +=head2 EXAMPLE: for-loops with column names + +When doing multiple nested for-loops it can be easier to keep track of +the loop variable if is is named instead of just having a number. Use +B<--header :> to let the first argument be an named alias for the +positional replacement string: + + parallel --header : echo {colour} {size} \ + ::: colour red green blue ::: size S M L XL XXL + +This also works if the input file is a file with columns: + + cat addressbook.tsv | \ + parallel --colsep '\t' --header : echo {Name} {E-mail address} + + +=head2 EXAMPLE: All combinations in a list + +GNU B<parallel> makes all combinations when given two lists. + +To make all combinations in a single list with unique values, you +repeat the list and use replacement string B<{choose_k}>: + + parallel --plus echo {choose_k} ::: A B C D ::: A B C D + + parallel --plus echo 2{2choose_k} 1{1choose_k} ::: A B C D ::: A B C D + +B<{choose_k}> works for any number of input sources: + + parallel --plus echo {choose_k} ::: A B C D ::: A B C D ::: A B C D + +Where B<{choose_k}> does not care about order, B<{uniq}> cares about +order. It simply skips jobs where values from different input sources +are the same: + + parallel --plus echo {uniq} ::: A B C ::: A B C ::: A B C + parallel --plus echo {1uniq}+{2uniq}+{3uniq} ::: A B C ::: A B C ::: A B C + + +=head2 EXAMPLE: From a to b and b to c + +Assume you have input like: + + aardvark + babble + cab + dab + each + +and want to run combinations like: + + aardvark babble + babble cab + cab dab + dab each + +If the input is in the file in.txt: + + parallel echo {1} - {2} ::::+ <(head -n -1 in.txt) <(tail -n +2 in.txt) + +If the input is in the array $a here are two solutions: + + seq $((${#a[@]}-1)) | \ + env_parallel --env a echo '${a[{=$_--=}]} - ${a[{}]}' + parallel echo {1} - {2} ::: "${a[@]::${#a[@]}-1}" :::+ "${a[@]:1}" + + +=head2 EXAMPLE: Count the differences between all files in a dir + +Using B<--results> the results are saved in /tmp/diffcount*. + + parallel --results /tmp/diffcount "diff -U 0 {1} {2} | \ + tail -n +3 |grep -v '^@'|wc -l" ::: * ::: * + +To see the difference between file A and file B look at the file +'/tmp/diffcount/1/A/2/B'. + + +=head2 EXAMPLE: Speeding up fast jobs + +Starting a job on the local machine takes around 3-10 ms. This can be +a big overhead if the job takes very few ms to run. Often you can +group small jobs together using B<-X> which will make the overhead +less significant. Compare the speed of these: + + seq -w 0 9999 | parallel touch pict{}.jpg + seq -w 0 9999 | parallel -X touch pict{}.jpg + +If your program cannot take multiple arguments, then you can use GNU +B<parallel> to spawn multiple GNU B<parallel>s: + + seq -w 0 9999999 | \ + parallel -j10 -q -I,, --pipe parallel -j0 touch pict{}.jpg + +If B<-j0> normally spawns 252 jobs, then the above will try to spawn +2520 jobs. On a normal GNU/Linux system you can spawn 32000 jobs using +this technique with no problems. To raise the 32000 jobs limit raise +/proc/sys/kernel/pid_max to 4194303. + +If you do not need GNU B<parallel> to have control over each job (so +no need for B<--retries> or B<--joblog> or similar), then it can be +even faster if you can generate the command lines and pipe those to a +shell. So if you can do this: + + mygenerator | sh + +Then that can be parallelized like this: + + mygenerator | parallel --pipe --block 10M sh + +E.g. + + mygenerator() { + seq 10000000 | perl -pe 'print "echo This is fast job number "'; + } + mygenerator | parallel --pipe --block 10M sh + +The overhead is 100000 times smaller namely around 100 nanoseconds per +job. + + +=head2 EXAMPLE: Using shell variables + +When using shell variables you need to quote them correctly as they +may otherwise be interpreted by the shell. + +Notice the difference between: + + ARR=("My brother's 12\" records are worth <\$\$\$>"'!' Foo Bar) + parallel echo ::: ${ARR[@]} # This is probably not what you want + +and: + + ARR=("My brother's 12\" records are worth <\$\$\$>"'!' Foo Bar) + parallel echo ::: "${ARR[@]}" + +When using variables in the actual command that contains special +characters (e.g. space) you can quote them using B<'"$VAR"'> or using +"'s and B<-q>: + + VAR="My brother's 12\" records are worth <\$\$\$>" + parallel -q echo "$VAR" ::: '!' + export VAR + parallel echo '"$VAR"' ::: '!' + +If B<$VAR> does not contain ' then B<"'$VAR'"> will also work +(and does not need B<export>): + + VAR="My 12\" records are worth <\$\$\$>" + parallel echo "'$VAR'" ::: '!' + +If you use them in a function you just quote as you normally would do: + + VAR="My brother's 12\" records are worth <\$\$\$>" + export VAR + myfunc() { echo "$VAR" "$1"; } + export -f myfunc + parallel myfunc ::: '!' + + +=head2 EXAMPLE: Group output lines + +When running jobs that output data, you often do not want the output +of multiple jobs to run together. GNU B<parallel> defaults to grouping +the output of each job, so the output is printed when the job +finishes. If you want full lines to be printed while the job is +running you can use B<--line-buffer>. If you want output to be +printed as soon as possible you can use B<-u>. + +Compare the output of: + + parallel wget --progress=dot --limit-rate=100k \ + https://ftpmirror.gnu.org/parallel/parallel-20{}0822.tar.bz2 \ + ::: {12..16} + parallel --line-buffer wget --progress=dot --limit-rate=100k \ + https://ftpmirror.gnu.org/parallel/parallel-20{}0822.tar.bz2 \ + ::: {12..16} + parallel --latest-line wget --progress=dot --limit-rate=100k \ + https://ftpmirror.gnu.org/parallel/parallel-20{}0822.tar.bz2 \ + ::: {12..16} + parallel -u wget --progress=dot --limit-rate=100k \ + https://ftpmirror.gnu.org/parallel/parallel-20{}0822.tar.bz2 \ + ::: {12..16} + +=head2 EXAMPLE: Tag output lines + +GNU B<parallel> groups the output lines, but it can be hard to see +where the different jobs begin. B<--tag> prepends the argument to make +that more visible: + + parallel --tag wget --limit-rate=100k \ + https://ftpmirror.gnu.org/parallel/parallel-20{}0822.tar.bz2 \ + ::: {12..16} + +B<--tag> works with B<--line-buffer> but not with B<-u>: + + parallel --tag --line-buffer wget --limit-rate=100k \ + https://ftpmirror.gnu.org/parallel/parallel-20{}0822.tar.bz2 \ + ::: {12..16} + +Check the uptime of the servers in I<~/.parallel/sshloginfile>: + + parallel --tag -S .. --nonall uptime + + +=head2 EXAMPLE: Colorize output + +Give each job a new color. Most terminals support ANSI colors with the +escape code "\033[30;3Xm" where 0 <= X <= 7: + + seq 10 | \ + parallel --tagstring '\033[30;3{=$_=++$::color%8=}m' seq {} + parallel --rpl '{color} $_="\033[30;3".(++$::color%8)."m"' \ + --tagstring {color} seq {} ::: {1..10} + +To get rid of the initial \t (which comes from B<--tagstring>): + + ... | perl -pe 's/\t//' + + +=head2 EXAMPLE: Keep order of output same as order of input + +Normally the output of a job will be printed as soon as it +completes. Sometimes you want the order of the output to remain the +same as the order of the input. This is often important, if the output +is used as input for another system. B<-k> will make sure the order of +output will be in the same order as input even if later jobs end +before earlier jobs. + +Append a string to every line in a text file: + + cat textfile | parallel -k echo {} append_string + +If you remove B<-k> some of the lines may come out in the wrong order. + +Another example is B<traceroute>: + + parallel traceroute ::: qubes-os.org debian.org freenetproject.org + +will give traceroute of qubes-os.org, debian.org and +freenetproject.org, but it will be sorted according to which job +completed first. + +To keep the order the same as input run: + + parallel -k traceroute ::: qubes-os.org debian.org freenetproject.org + +This will make sure the traceroute to qubes-os.org will be printed +first. + +A bit more complex example is downloading a huge file in chunks in +parallel: Some internet connections will deliver more data if you +download files in parallel. For downloading files in parallel see: +"EXAMPLE: Download 10 images for each of the past 30 days". But if you +are downloading a big file you can download the file in chunks in +parallel. + +To download byte 10000000-19999999 you can use B<curl>: + + curl -r 10000000-19999999 https://example.com/the/big/file >file.part + +To download a 1 GB file we need 100 10MB chunks downloaded and +combined in the correct order. + + seq 0 99 | parallel -k curl -r \ + {}0000000-{}9999999 https://example.com/the/big/file > file + + +=head2 EXAMPLE: Parallel grep + +B<grep -r> greps recursively through directories. GNU B<parallel> can +often speed this up. + + find . -type f | parallel -k -j150% -n 1000 -m grep -H -n STRING {} + +This will run 1.5 job per CPU, and give 1000 arguments to B<grep>. + +There are situations where the above will be slower than B<grep -r>: + +=over 2 + +=item * + +If data is already in RAM. The overhead of starting jobs and buffering +output may outweigh the benefit of running in parallel. + +=item * + +If the files are big. If a file cannot be read in a single seek, the +disk may start thrashing. + +=back + +The speedup is caused by two factors: + +=over 2 + +=item * + +On rotating harddisks small files often require a seek for each +file. By searching for more files in parallel, the arm may pass +another wanted file on its way. + +=item * + +NVMe drives often perform better by having multiple command running in +parallel. + +=back + + +=head2 EXAMPLE: Grepping n lines for m regular expressions. + +The simplest solution to grep a big file for a lot of regexps is: + + grep -f regexps.txt bigfile + +Or if the regexps are fixed strings: + + grep -F -f regexps.txt bigfile + +There are 3 limiting factors: CPU, RAM, and disk I/O. + +RAM is easy to measure: If the B<grep> process takes up most of your +free memory (e.g. when running B<top>), then RAM is a limiting factor. + +CPU is also easy to measure: If the B<grep> takes >90% CPU in B<top>, +then the CPU is a limiting factor, and parallelization will speed this +up. + +It is harder to see if disk I/O is the limiting factor, and depending +on the disk system it may be faster or slower to parallelize. The only +way to know for certain is to test and measure. + + +=head3 Limiting factor: RAM + +The normal B<grep -f regexps.txt bigfile> works no matter the size of +bigfile, but if regexps.txt is so big it cannot fit into memory, then +you need to split this. + +B<grep -F> takes around 100 bytes of RAM and B<grep> takes about 500 +bytes of RAM per 1 byte of regexp. So if regexps.txt is 1% of your +RAM, then it may be too big. + +If you can convert your regexps into fixed strings do that. E.g. if +the lines you are looking for in bigfile all looks like: + + ID1 foo bar baz Identifier1 quux + fubar ID2 foo bar baz Identifier2 + +then your regexps.txt can be converted from: + + ID1.*Identifier1 + ID2.*Identifier2 + +into: + + ID1 foo bar baz Identifier1 + ID2 foo bar baz Identifier2 + +This way you can use B<grep -F> which takes around 80% less memory and +is much faster. + +If it still does not fit in memory you can do this: + + parallel --pipe-part -a regexps.txt --block 1M grep -F -f - -n bigfile | \ + sort -un | perl -pe 's/^\d+://' + +The 1M should be your free memory divided by the number of CPU threads and +divided by 200 for B<grep -F> and by 1000 for normal B<grep>. On +GNU/Linux you can do: + + free=$(awk '/^((Swap)?Cached|MemFree|Buffers):/ { sum += $2 } + END { print sum }' /proc/meminfo) + percpu=$((free / 200 / $(parallel --number-of-threads)))k + + parallel --pipe-part -a regexps.txt --block $percpu --compress \ + grep -F -f - -n bigfile | \ + sort -un | perl -pe 's/^\d+://' + +If you can live with duplicated lines and wrong order, it is faster to do: + + parallel --pipe-part -a regexps.txt --block $percpu --compress \ + grep -F -f - bigfile + +=head3 Limiting factor: CPU + +If the CPU is the limiting factor parallelization should be done on +the regexps: + + cat regexps.txt | parallel --pipe -L1000 --round-robin --compress \ + grep -f - -n bigfile | \ + sort -un | perl -pe 's/^\d+://' + +The command will start one B<grep> per CPU and read I<bigfile> one +time per CPU, but as that is done in parallel, all reads except the +first will be cached in RAM. Depending on the size of I<regexps.txt> it +may be faster to use B<--block 10m> instead of B<-L1000>. + +Some storage systems perform better when reading multiple chunks in +parallel. This is true for some RAID systems and for some network file +systems. To parallelize the reading of I<bigfile>: + + parallel --pipe-part --block 100M -a bigfile -k --compress \ + grep -f regexps.txt + +This will split I<bigfile> into 100MB chunks and run B<grep> on each of +these chunks. To parallelize both reading of I<bigfile> and I<regexps.txt> +combine the two using B<--cat>: + + parallel --pipe-part --block 100M -a bigfile --cat cat regexps.txt \ + \| parallel --pipe -L1000 --round-robin grep -f - {} + +If a line matches multiple regexps, the line may be duplicated. + +=head3 Bigger problem + +If the problem is too big to be solved by this, you are probably ready +for Lucene. + + +=head2 EXAMPLE: Using remote computers + +To run commands on a remote computer SSH needs to be set up and you +must be able to login without entering a password (The commands +B<ssh-copy-id>, B<ssh-agent>, and B<sshpass> may help you do that). + +If you need to login to a whole cluster, you typically do not want to +accept the host key for every host. You want to accept them the first +time and be warned if they are ever changed. To do that: + + # Add the servers to the sshloginfile + (echo servera; echo serverb) > .parallel/my_cluster + # Make sure .ssh/config exist + touch .ssh/config + cp .ssh/config .ssh/config.backup + # Disable StrictHostKeyChecking temporarily + (echo 'Host *'; echo StrictHostKeyChecking no) >> .ssh/config + parallel --slf my_cluster --nonall true + # Remove the disabling of StrictHostKeyChecking + mv .ssh/config.backup .ssh/config + +The servers in B<.parallel/my_cluster> are now added in B<.ssh/known_hosts>. + +To run B<echo> on B<server.example.com>: + + seq 10 | parallel --sshlogin server.example.com echo + +To run commands on more than one remote computer run: + + seq 10 | parallel --sshlogin s1.example.com,s2.example.net echo + +Or: + + seq 10 | parallel --sshlogin server.example.com \ + --sshlogin server2.example.net echo + +If the login username is I<foo> on I<server2.example.net> use: + + seq 10 | parallel --sshlogin server.example.com \ + --sshlogin foo@server2.example.net echo + +If your list of hosts is I<server1-88.example.net> with login I<foo>: + + seq 10 | parallel -Sfoo@server{1..88}.example.net echo + +To distribute the commands to a list of computers, make a file +I<mycomputers> with all the computers: + + server.example.com + foo@server2.example.com + server3.example.com + +Then run: + + seq 10 | parallel --sshloginfile mycomputers echo + +To include the local computer add the special sshlogin ':' to the list: + + server.example.com + foo@server2.example.com + server3.example.com + : + +GNU B<parallel> will try to determine the number of CPUs on each of +the remote computers, and run one job per CPU - even if the remote +computers do not have the same number of CPUs. + +If the number of CPUs on the remote computers is not identified +correctly the number of CPUs can be added in front. Here the computer +has 8 CPUs. + + seq 10 | parallel --sshlogin 8/server.example.com echo + + +=head2 EXAMPLE: Transferring of files + +To recompress gzipped files with B<bzip2> using a remote computer run: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com \ + --transfer "zcat {} | bzip2 -9 >{.}.bz2" + +This will list the .gz-files in the I<logs> directory and all +directories below. Then it will transfer the files to +I<server.example.com> to the corresponding directory in +I<$HOME/logs>. On I<server.example.com> the file will be recompressed +using B<zcat> and B<bzip2> resulting in the corresponding file with +I<.gz> replaced with I<.bz2>. + +If you want the resulting bz2-file to be transferred back to the local +computer add I<--return {.}.bz2>: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com \ + --transfer --return {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" + +After the recompressing is done the I<.bz2>-file is transferred back to +the local computer and put next to the original I<.gz>-file. + +If you want to delete the transferred files on the remote computer add +I<--cleanup>. This will remove both the file transferred to the remote +computer and the files transferred from the remote computer: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com \ + --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" + +If you want run on several computers add the computers to I<--sshlogin> +either using ',' or multiple I<--sshlogin>: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com,server2.example.com \ + --sshlogin server3.example.com \ + --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" + +You can add the local computer using I<--sshlogin :>. This will disable the +removing and transferring for the local computer only: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com,server2.example.com \ + --sshlogin server3.example.com \ + --sshlogin : \ + --transfer --return {.}.bz2 --cleanup "zcat {} | bzip2 -9 >{.}.bz2" + +Often I<--transfer>, I<--return> and I<--cleanup> are used together. They can be +shortened to I<--trc>: + + find logs/ -name '*.gz' | \ + parallel --sshlogin server.example.com,server2.example.com \ + --sshlogin server3.example.com \ + --sshlogin : \ + --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" + +With the file I<mycomputers> containing the list of computers it becomes: + + find logs/ -name '*.gz' | parallel --sshloginfile mycomputers \ + --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" + +If the file I<~/.parallel/sshloginfile> contains the list of computers +the special short hand I<-S ..> can be used: + + find logs/ -name '*.gz' | parallel -S .. \ + --trc {.}.bz2 "zcat {} | bzip2 -9 >{.}.bz2" + + +=head2 EXAMPLE: Advanced file transfer + +Assume you have files in in/*, want them processed on server, +and transferred back into /other/dir: + + parallel -S server --trc /other/dir/./{/}.out \ + cp {/} {/}.out ::: in/./* + + +=head2 EXAMPLE: Distributing work to local and remote computers + +Convert *.mp3 to *.ogg running one process per CPU on local computer +and server2: + + parallel --trc {.}.ogg -S server2,: \ + 'mpg321 -w - {} | oggenc -q0 - -o {.}.ogg' ::: *.mp3 + + +=head2 EXAMPLE: Running the same command on remote computers + +To run the command B<uptime> on remote computers you can do: + + parallel --tag --nonall -S server1,server2 uptime + +B<--nonall> reads no arguments. If you have a list of jobs you want +to run on each computer you can do: + + parallel --tag --onall -S server1,server2 echo ::: 1 2 3 + +Remove B<--tag> if you do not want the sshlogin added before the +output. + +If you have a lot of hosts use '-j0' to access more hosts in parallel. + + +=head2 EXAMPLE: Running 'sudo' on remote computers + +Put the password into passwordfile then run: + + parallel --ssh 'cat passwordfile | ssh' --nonall \ + -S user@server1,user@server2 sudo -S ls -l /root + + +=head2 EXAMPLE: Using remote computers behind NAT wall + +If the workers are behind a NAT wall, you need some trickery to get to +them. + +If you can B<ssh> to a jumphost, and reach the workers from there, +then the obvious solution would be this, but it B<does not work>: + + parallel --ssh 'ssh jumphost ssh' -S host1 echo ::: DOES NOT WORK + +It does not work because the command is dequoted by B<ssh> twice where +as GNU B<parallel> only expects it to be dequoted once. + +You can use a bash function and have GNU B<parallel> quote the command: + + jumpssh() { ssh -A jumphost ssh $(parallel --shellquote ::: "$@"); } + export -f jumpssh + parallel --ssh jumpssh -S host1 echo ::: this works + +Or you can instead put this in B<~/.ssh/config>: + + Host host1 host2 host3 + ProxyCommand ssh jumphost.domain nc -w 1 %h 22 + +It requires B<nc(netcat)> to be installed on jumphost. With this you +can simply: + + parallel -S host1,host2,host3 echo ::: This does work + +=head3 No jumphost, but port forwards + +If there is no jumphost but each server has port 22 forwarded from the +firewall (e.g. the firewall's port 22001 = port 22 on host1, 22002 = host2, +22003 = host3) then you can use B<~/.ssh/config>: + + Host host1.v + Port 22001 + Host host2.v + Port 22002 + Host host3.v + Port 22003 + Host *.v + Hostname firewall + +And then use host{1..3}.v as normal hosts: + + parallel -S host1.v,host2.v,host3.v echo ::: a b c + +=head3 No jumphost, no port forwards + +If ports cannot be forwarded, you need some sort of VPN to traverse +the NAT-wall. TOR is one options for that, as it is very easy to get +working. + +You need to install TOR and setup a hidden service. In B<torrc> put: + + HiddenServiceDir /var/lib/tor/hidden_service/ + HiddenServicePort 22 127.0.0.1:22 + +Then start TOR: B</etc/init.d/tor restart> + +The TOR hostname is now in B</var/lib/tor/hidden_service/hostname> and +is something similar to B<izjafdceobowklhz.onion>. Now you simply +prepend B<torsocks> to B<ssh>: + + parallel --ssh 'torsocks ssh' -S izjafdceobowklhz.onion \ + -S zfcdaeiojoklbwhz.onion,auclucjzobowklhi.onion echo ::: a b c + +If not all hosts are accessible through TOR: + + parallel -S 'torsocks ssh izjafdceobowklhz.onion,host2,host3' \ + echo ::: a b c + +See more B<ssh> tricks on https://en.wikibooks.org/wiki/OpenSSH/Cookbook/Proxies_and_Jump_Hosts + + +=head2 EXAMPLE: Use sshpass with ssh + +If you cannot use passwordless login, you may be able to use B<sshpass>: + + seq 10 | parallel -S user-with-password:MyPassword@server echo + +or: + + export SSHPASS='MyPa$$w0rd' + seq 10 | parallel -S user-with-password:@server echo + + +=head2 EXAMPLE: Use outrun instead of ssh + +B<outrun> lets you run a command on a remote server. B<outrun> sets up +a connection to access files at the source server, and automatically +transfers files. B<outrun> must be installed on the remote system. + +You can use B<outrun> in an sshlogin this way: + + parallel -S 'outrun user@server' command + +or: + + parallel --ssh outrun -S server command + + +=head2 EXAMPLE: Slurm cluster + +The Slurm Workload Manager is used in many clusters. + +Here is a simple example of using GNU B<parallel> to call B<srun>: + + #!/bin/bash + + #SBATCH --time 00:02:00 + #SBATCH --ntasks=4 + #SBATCH --job-name GnuParallelDemo + #SBATCH --output gnuparallel.out + + module purge + module load gnu_parallel + + my_parallel="parallel --delay .2 -j $SLURM_NTASKS" + my_srun="srun --export=all --exclusive -n1 --cpus-per-task=1 --cpu-bind=cores" + $my_parallel "$my_srun" echo This is job {} ::: {1..20} + + +=head2 EXAMPLE: Parallelizing rsync + +B<rsync> is a great tool, but sometimes it will not fill up the +available bandwidth. Running multiple B<rsync> in parallel can fix +this. + + cd src-dir + find . -type f | + parallel -j10 -X rsync -zR -Ha ./{} fooserver:/dest-dir/ + +Adjust B<-j10> until you find the optimal number. + +B<rsync -R> will create the needed subdirectories, so all files are +not put into a single dir. The B<./> is needed so the resulting command +looks similar to: + + rsync -zR ././sub/dir/file fooserver:/dest-dir/ + +The B</./> is what B<rsync -R> works on. + +If you are unable to push data, but need to pull them and the files +are called digits.png (e.g. 000000.png) you might be able to do: + + seq -w 0 99 | parallel rsync -Havessh fooserver:src/*{}.png destdir/ + + +=head2 EXAMPLE: Use multiple inputs in one command + +Copy files like foo.es.ext to foo.ext: + + ls *.es.* | perl -pe 'print; s/\.es//' | parallel -N2 cp {1} {2} + +The perl command spits out 2 lines for each input. GNU B<parallel> +takes 2 inputs (using B<-N2>) and replaces {1} and {2} with the inputs. + +Count in binary: + + parallel -k echo ::: 0 1 ::: 0 1 ::: 0 1 ::: 0 1 ::: 0 1 ::: 0 1 + +Print the number on the opposing sides of a six sided die: + + parallel --link -a <(seq 6) -a <(seq 6 -1 1) echo + parallel --link echo :::: <(seq 6) <(seq 6 -1 1) + +Convert files from all subdirs to PNG-files with consecutive numbers +(useful for making input PNG's for B<ffmpeg>): + + parallel --link -a <(find . -type f | sort) \ + -a <(seq $(find . -type f|wc -l)) convert {1} {2}.png + +Alternative version: + + find . -type f | sort | parallel convert {} {#}.png + + +=head2 EXAMPLE: Use a table as input + +Content of table_file.tsv: + + foo<TAB>bar + baz <TAB> quux + +To run: + + cmd -o bar -i foo + cmd -o quux -i baz + +you can run: + + parallel -a table_file.tsv --colsep '\t' cmd -o {2} -i {1} + +Note: The default for GNU B<parallel> is to remove the spaces around +the columns. To keep the spaces: + + parallel -a table_file.tsv --trim n --colsep '\t' cmd -o {2} -i {1} + + +=head2 EXAMPLE: Output to database + +GNU B<parallel> can output to a database table and a CSV-file: + + dburl=csv:///%2Ftmp%2Fmydir + dbtableurl=$dburl/mytable.csv + parallel --sqlandworker $dbtableurl seq ::: {1..10} + +It is rather slow and takes up a lot of CPU time because GNU +B<parallel> parses the whole CSV file for each update. + +A better approach is to use an SQLite-base and then convert that to CSV: + + dburl=sqlite3:///%2Ftmp%2Fmy.sqlite + dbtableurl=$dburl/mytable + parallel --sqlandworker $dbtableurl seq ::: {1..10} + sql $dburl '.headers on' '.mode csv' 'SELECT * FROM mytable;' + +This takes around a second per job. + +If you have access to a real database system, such as PostgreSQL, it +is even faster: + + dburl=pg://user:pass@host/mydb + dbtableurl=$dburl/mytable + parallel --sqlandworker $dbtableurl seq ::: {1..10} + sql $dburl \ + "COPY (SELECT * FROM mytable) TO stdout DELIMITER ',' CSV HEADER;" + +Or MySQL: + + dburl=mysql://user:pass@host/mydb + dbtableurl=$dburl/mytable + parallel --sqlandworker $dbtableurl seq ::: {1..10} + sql -p -B $dburl "SELECT * FROM mytable;" > mytable.tsv + perl -pe 's/"/""/g; s/\t/","/g; s/^/"/; s/$/"/; + %s=("\\" => "\\", "t" => "\t", "n" => "\n"); + s/\\([\\tn])/$s{$1}/g;' mytable.tsv + + +=head2 EXAMPLE: Output to CSV-file for R + +If you have no need for the advanced job distribution control that a +database provides, but you simply want output into a CSV file that you +can read into R or LibreCalc, then you can use B<--results>: + + parallel --results my.csv seq ::: 10 20 30 + R + > mydf <- read.csv("my.csv"); + > print(mydf[2,]) + > write(as.character(mydf[2,c("Stdout")]),'') + + +=head2 EXAMPLE: Use XML as input + +The show Aflyttet on Radio 24syv publishes an RSS feed with their audio +podcasts on: http://arkiv.radio24syv.dk/audiopodcast/channel/4466232 + +Using B<xpath> you can extract the URLs for 2019 and download them +using GNU B<parallel>: + + wget -O - http://arkiv.radio24syv.dk/audiopodcast/channel/4466232 | \ + xpath -e "//pubDate[contains(text(),'2019')]/../enclosure/@url" | \ + parallel -u wget '{= s/ url="//; s/"//; =}' + + +=head2 EXAMPLE: Run the same command 10 times + +If you want to run the same command with the same arguments 10 times +in parallel you can do: + + seq 10 | parallel -n0 my_command my_args + + +=head2 EXAMPLE: Working as cat | sh. Resource inexpensive jobs and evaluation + +GNU B<parallel> can work similar to B<cat | sh>. + +A resource inexpensive job is a job that takes very little CPU, disk +I/O and network I/O. Ping is an example of a resource inexpensive +job. wget is too - if the webpages are small. + +The content of the file jobs_to_run: + + ping -c 1 10.0.0.1 + wget http://example.com/status.cgi?ip=10.0.0.1 + ping -c 1 10.0.0.2 + wget http://example.com/status.cgi?ip=10.0.0.2 + ... + ping -c 1 10.0.0.255 + wget http://example.com/status.cgi?ip=10.0.0.255 + +To run 100 processes simultaneously do: + + parallel -j 100 < jobs_to_run + +As there is not a I<command> the jobs will be evaluated by the shell. + + +=head2 EXAMPLE: Call program with FASTA sequence + +FASTA files have the format: + + >Sequence name1 + sequence + sequence continued + >Sequence name2 + sequence + sequence continued + more sequence + +To call B<myprog> with the sequence as argument run: + + cat file.fasta | + parallel --pipe -N1 --recstart '>' --rrs \ + 'read a; echo Name: "$a"; myprog $(tr -d "\n")' + + +=head2 EXAMPLE: Call program with interleaved FASTQ records + +FASTQ files have the format: + + @M10991:61:000000000-A7EML:1:1101:14011:1001 1:N:0:28 + CTCCTAGGTCGGCATGATGGGGGAAGGAGAGCATGGGAAGAAATGAGAGAGTAGCAAGG + + + #8BCCGGGGGFEFECFGGGGGGGGG@;FFGGGEG@FF<EE<@FFC,CEGCCGGFF<FGF + +Interleaved FASTQ starts with a line like these: + + @HWUSI-EAS100R:6:73:941:1973#0/1 + @EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG + @EAS139:136:FC706VJ:2:2104:15343:197393 1:N:18:1 + +where '/1' and ' 1:' determines this is read 1. + +This will cut big.fq into one chunk per CPU thread and pass it on +stdin (standard input) to the program fastq-reader: + + parallel --pipe-part -a big.fq --block -1 --regexp \ + --recend '\n' --recstart '@.*(/1| 1:.*)\n[A-Za-z\n\.~]' \ + fastq-reader + + +=head2 EXAMPLE: Processing a big file using more CPUs + +To process a big file or some output you can use B<--pipe> to split up +the data into blocks and pipe the blocks into the processing program. + +If the program is B<gzip -9> you can do: + + cat bigfile | parallel --pipe --recend '' -k gzip -9 > bigfile.gz + +This will split B<bigfile> into blocks of 1 MB and pass that to B<gzip +-9> in parallel. One B<gzip> will be run per CPU. The output of B<gzip +-9> will be kept in order and saved to B<bigfile.gz> + +B<gzip> works fine if the output is appended, but some processing does +not work like that - for example sorting. For this GNU B<parallel> can +put the output of each command into a file. This will sort a big file +in parallel: + + cat bigfile | parallel --pipe --files sort |\ + parallel -Xj1 sort -m {} ';' rm {} >bigfile.sort + +Here B<bigfile> is split into blocks of around 1MB, each block ending +in '\n' (which is the default for B<--recend>). Each block is passed +to B<sort> and the output from B<sort> is saved into files. These +files are passed to the second B<parallel> that runs B<sort -m> on the +files before it removes the files. The output is saved to +B<bigfile.sort>. + +GNU B<parallel>'s B<--pipe> maxes out at around 100 MB/s because every +byte has to be copied through GNU B<parallel>. But if B<bigfile> is a +real (seekable) file GNU B<parallel> can by-pass the copying and send +the parts directly to the program: + + parallel --pipe-part --block 100m -a bigfile --files sort |\ + parallel -Xj1 sort -m {} ';' rm {} >bigfile.sort + + +=head2 EXAMPLE: Grouping input lines + +When processing with B<--pipe> you may have lines grouped by a +value. Here is I<my.csv>: + + Transaction Customer Item + 1 a 53 + 2 b 65 + 3 b 82 + 4 c 96 + 5 c 67 + 6 c 13 + 7 d 90 + 8 d 43 + 9 d 91 + 10 d 84 + 11 e 72 + 12 e 102 + 13 e 63 + 14 e 56 + 15 e 74 + +Let us assume you want GNU B<parallel> to process each customer. In +other words: You want all the transactions for a single customer to be +treated as a single record. + +To do this we preprocess the data with a program that inserts a record +separator before each customer (column 2 = $F[1]). Here we first make +a 50 character random string, which we then use as the separator: + + sep=`perl -e 'print map { ("a".."z","A".."Z")[rand(52)] } (1..50);'` + cat my.csv | \ + perl -ape '$F[1] ne $l and print "'$sep'"; $l = $F[1]' | \ + parallel --recend $sep --rrs --pipe -N1 wc + +If your program can process multiple customers replace B<-N1> with a +reasonable B<--blocksize>. + + +=head2 EXAMPLE: Running more than 250 jobs workaround + +If you need to run a massive amount of jobs in parallel, then you will +likely hit the filehandle limit which is often around 250 jobs. If you +are super user you can raise the limit in /etc/security/limits.conf +but you can also use this workaround. The filehandle limit is per +process. That means that if you just spawn more GNU B<parallel>s then +each of them can run 250 jobs. This will spawn up to 2500 jobs: + + cat myinput |\ + parallel --pipe -N 50 --round-robin -j50 parallel -j50 your_prg + +This will spawn up to 62500 jobs (use with caution - you need 64 GB +RAM to do this, and you may need to increase /proc/sys/kernel/pid_max): + + cat myinput |\ + parallel --pipe -N 250 --round-robin -j250 parallel -j250 your_prg + + +=head2 EXAMPLE: Working as mutex and counting semaphore + +The command B<sem> is an alias for B<parallel --semaphore>. + +A counting semaphore will allow a given number of jobs to be started +in the background. When the number of jobs are running in the +background, GNU B<sem> will wait for one of these to complete before +starting another command. B<sem --wait> will wait for all jobs to +complete. + +Run 10 jobs concurrently in the background: + + for i in *.log ; do + echo $i + sem -j10 gzip $i ";" echo done + done + sem --wait + +A mutex is a counting semaphore allowing only one job to run. This +will edit the file I<myfile> and prepends the file with lines with the +numbers 1 to 3. + + seq 3 | parallel sem sed -i -e '1i{}' myfile + +As I<myfile> can be very big it is important only one process edits +the file at the same time. + +Name the semaphore to have multiple different semaphores active at the +same time: + + seq 3 | parallel sem --id mymutex sed -i -e '1i{}' myfile + + +=head2 EXAMPLE: Mutex for a script + +Assume a script is called from cron or from a web service, but only +one instance can be run at a time. With B<sem> and B<--shebang-wrap> +the script can be made to wait for other instances to finish. Here in +B<bash>: + + #!/usr/bin/sem --shebang-wrap -u --id $0 --fg /bin/bash + + echo This will run + sleep 5 + echo exclusively + +Here B<perl>: + + #!/usr/bin/sem --shebang-wrap -u --id $0 --fg /usr/bin/perl + + print "This will run "; + sleep 5; + print "exclusively\n"; + +Here B<python>: + + #!/usr/local/bin/sem --shebang-wrap -u --id $0 --fg /usr/bin/python + + import time + print "This will run "; + time.sleep(5) + print "exclusively"; + + +=head2 EXAMPLE: Start editor with filenames from stdin (standard input) + +You can use GNU B<parallel> to start interactive programs like emacs or vi: + + cat filelist | parallel --tty -X emacs + cat filelist | parallel --tty -X vi + +If there are more files than will fit on a single command line, the +editor will be started again with the remaining files. + + +=head2 EXAMPLE: Running sudo + +B<sudo> requires a password to run a command as root. It caches the +access, so you only need to enter the password again if you have not +used B<sudo> for a while. + +The command: + + parallel sudo echo ::: This is a bad idea + +is no good, as you would be prompted for the sudo password for each of +the jobs. Instead do: + + sudo parallel echo ::: This is a good idea + +This way you only have to enter the sudo password once. + +=head2 EXAMPLE: Run ping in parallel + +B<ping> prints out statistics when killed with CTRL-C. + +Unfortunately, CTRL-C will also normally kill GNU B<parallel>. + +But by using B<--open-tty> and ignoring SIGINT you can get the wanted effect: + + parallel -j0 --open-tty --lb --tag ping '{= $SIG{INT}=sub {} =}' \ + ::: 1.1.1.1 8.8.8.8 9.9.9.9 21.21.21.21 80.80.80.80 88.88.88.88 + +B<--open-tty> will make the B<ping>s receive SIGINT (from CTRL-C). +CTRL-C will not kill GNU B<parallel>, so that will only exit after +B<ping> is done. + + +=head2 EXAMPLE: GNU Parallel as queue system/batch manager + +GNU B<parallel> can work as a simple job queue system or batch manager. +The idea is to put the jobs into a file and have GNU B<parallel> read +from that continuously. As GNU B<parallel> will stop at end of file we +use B<tail> to continue reading: + + true >jobqueue; tail -n+0 -f jobqueue | parallel + +To submit your jobs to the queue: + + echo my_command my_arg >> jobqueue + +You can of course use B<-S> to distribute the jobs to remote +computers: + + true >jobqueue; tail -n+0 -f jobqueue | parallel -S .. + +Output only will be printed when reading the next input after a job +has finished: So you need to submit a job after the first has finished +to see the output from the first job. + +If you keep this running for a long time, jobqueue will grow. A way of +removing the jobs already run is by making GNU B<parallel> stop when +it hits a special value and then restart. To use B<--eof> to make GNU +B<parallel> exit, B<tail> also needs to be forced to exit: + + true >jobqueue; + while true; do + tail -n+0 -f jobqueue | + (parallel -E StOpHeRe -S ..; echo GNU Parallel is now done; + perl -e 'while(<>){/StOpHeRe/ and last};print <>' jobqueue > j2; + (seq 1000 >> jobqueue &); + echo Done appending dummy data forcing tail to exit) + echo tail exited; + mv j2 jobqueue + done + +In some cases you can run on more CPUs and computers during the night: + + # Day time + echo 50% > jobfile + cp day_server_list ~/.parallel/sshloginfile + # Night time + echo 100% > jobfile + cp night_server_list ~/.parallel/sshloginfile + tail -n+0 -f jobqueue | parallel --jobs jobfile -S .. + +GNU B<parallel> discovers if B<jobfile> or B<~/.parallel/sshloginfile> +changes. + + +=head2 EXAMPLE: GNU Parallel as dir processor + +If you have a dir in which users drop files that needs to be processed +you can do this on GNU/Linux (If you know what B<inotifywait> is +called on other platforms file a bug report): + + inotifywait -qmre MOVED_TO -e CLOSE_WRITE --format %w%f my_dir |\ + parallel -u echo + +This will run the command B<echo> on each file put into B<my_dir> or +subdirs of B<my_dir>. + +You can of course use B<-S> to distribute the jobs to remote +computers: + + inotifywait -qmre MOVED_TO -e CLOSE_WRITE --format %w%f my_dir |\ + parallel -S .. -u echo + +If the files to be processed are in a tar file then unpacking one file +and processing it immediately may be faster than first unpacking all +files. Set up the dir processor as above and unpack into the dir. + +Using GNU B<parallel> as dir processor has the same limitations as +using GNU B<parallel> as queue system/batch manager. + + +=head2 EXAMPLE: Locate the missing package + +If you have downloaded source and tried compiling it, you may have seen: + + $ ./configure + [...] + checking for something.h... no + configure: error: "libsomething not found" + +Often it is not obvious which package you should install to get that +file. Debian has `apt-file` to search for a file. `tracefile` from +https://gitlab.com/ole.tange/tangetools can tell which files a program +tried to access. In this case we are interested in one of the last +files: + + $ tracefile -un ./configure | tail | parallel -j0 apt-file search + + +=head1 AUTHOR + +When using GNU B<parallel> for a publication please cite: + +O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ;login: +The USENIX Magazine, February 2011:42-47. + +This helps funding further development; and it won't cost you a cent. +If you pay 10000 EUR you should feel free to use GNU Parallel without citing. + +Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk + +Copyright (C) 2008-2010 Ole Tange, http://ole.tange.dk + +Copyright (C) 2010-2022 Ole Tange, http://ole.tange.dk and Free +Software Foundation, Inc. + +Parts of the manual concerning B<xargs> compatibility is inspired by +the manual of B<xargs> from GNU findutils 4.4.2. + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <https://www.gnu.org/licenses/>. + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this +documentation under the terms of the GNU Free Documentation License, +Version 1.3 or any later version published by the Free Software +Foundation; with no Invariant Sections, with no Front-Cover Texts, and +with no Back-Cover Texts. A copy of the license is included in the +file LICENSES/GFDL-1.3-or-later.txt. + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B<to Share> + +to copy, distribute and transmit the work + +=item B<to Remix> + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B<Attribution> + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B<Share Alike> + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B<Waiver> + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B<Public Domain> + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B<Other Rights> + +In no way are any of the following rights affected by the license: + +=over 2 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=back + +=over 9 + +=item B<Notice> + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as +LICENCES/CC-BY-SA-4.0.txt + + +=head1 SEE ALSO + +B<parallel>(1), B<parallel_tutorial>(7), B<env_parallel>(1), +B<parset>(1), B<parsort>(1), B<parallel_alternatives>(7), +B<parallel_design>(7), B<niceload>(1), B<sql>(1), B<ssh>(1), +B<ssh-agent>(1), B<sshpass>(1), B<ssh-copy-id>(1), B<rsync>(1) + +=cut diff --git a/src/parallel_tutorial.pod b/src/parallel_tutorial.pod new file mode 100644 index 0000000..51b5e1c --- /dev/null +++ b/src/parallel_tutorial.pod @@ -0,0 +1,3172 @@ +#!/usr/bin/perl -w + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=head1 GNU Parallel Tutorial + +This tutorial shows off much of GNU B<parallel>'s functionality. The +tutorial is meant to learn the options in and syntax of GNU +B<parallel>. The tutorial is B<not> to show realistic examples from the +real world. + +=head2 Reader's guide + +If you prefer reading a book buy B<GNU Parallel 2018> at +https://www.lulu.com/shop/ole-tange/gnu-parallel-2018/paperback/product-23558902.html +or download it at: https://doi.org/10.5281/zenodo.1146014 + +Otherwise start by watching the intro videos for a quick introduction: +https://www.youtube.com/playlist?list=PL284C9FF2488BC6D1 + +Then browse through the B<EXAMPLE>s after the list of B<OPTIONS> in +B<man parallel> (Use B<LESS=+/EXAMPLE: man parallel>). That will give +you an idea of what GNU B<parallel> is capable of. + +If you want to dive even deeper: spend a couple of hours walking +through the tutorial (B<man parallel_tutorial>). Your command line +will love you for it. + +Finally you may want to look at the rest of the manual (B<man +parallel>) if you have special needs not already covered. + +If you want to know the design decisions behind GNU B<parallel>, try: +B<man parallel_design>. This is also a good intro if you intend to +change GNU B<parallel>. + + + +=head1 Prerequisites + +To run this tutorial you must have the following: + +=over 9 + +=item parallel >= version 20160822 + +Install the newest version using your package manager (recommended for +security reasons), the way described in README, or with this command: + + $ (wget -O - pi.dk/3 || lynx -source pi.dk/3 || curl pi.dk/3/ || \ + fetch -o - http://pi.dk/3 ) > install.sh + $ sha1sum install.sh + 12345678 3374ec53 bacb199b 245af2dd a86df6c9 + $ md5sum install.sh + 029a9ac0 6e8b5bc6 052eac57 b2c3c9ca + $ sha512sum install.sh + 40f53af6 9e20dae5 713ba06c f517006d 9897747b ed8a4694 b1acba1b 1464beb4 + 60055629 3f2356f3 3e9c4e3c 76e3f3af a9db4b32 bd33322b 975696fc e6b23cfb + $ bash install.sh + +This will also install the newest version of the tutorial which you +can see by running this: + + man parallel_tutorial + +Most of the tutorial will work on older versions, too. + + +=item abc-file: + +The file can be generated by this command: + + parallel -k echo ::: A B C > abc-file + +=item def-file: + +The file can be generated by this command: + + parallel -k echo ::: D E F > def-file + +=item abc0-file: + +The file can be generated by this command: + + perl -e 'printf "A\0B\0C\0"' > abc0-file + +=item abc_-file: + +The file can be generated by this command: + + perl -e 'printf "A_B_C_"' > abc_-file + +=item tsv-file.tsv + +The file can be generated by this command: + + perl -e 'printf "f1\tf2\nA\tB\nC\tD\n"' > tsv-file.tsv + +=item num8 + +The file can be generated by this command: + + perl -e 'for(1..8){print "$_\n"}' > num8 + +=item num128 + +The file can be generated by this command: + + perl -e 'for(1..128){print "$_\n"}' > num128 + +=item num30000 + +The file can be generated by this command: + + perl -e 'for(1..30000){print "$_\n"}' > num30000 + +=item num1000000 + +The file can be generated by this command: + + perl -e 'for(1..1000000){print "$_\n"}' > num1000000 + +=item num_%header + +The file can be generated by this command: + + (echo %head1; echo %head2; \ + perl -e 'for(1..10){print "$_\n"}') > num_%header + +=item fixedlen + +The file can be generated by this command: + + perl -e 'print "HHHHAAABBBCCC"' > fixedlen + +=item For remote running: ssh login on 2 servers with no password in +$SERVER1 and $SERVER2 must work. + + SERVER1=server.example.com + SERVER2=server2.example.net + +So you must be able to do this without entering a password: + + ssh $SERVER1 echo works + ssh $SERVER2 echo works + +It can be setup by running 'ssh-keygen -t dsa; ssh-copy-id $SERVER1' +and using an empty passphrase, or you can use B<ssh-agent>. + +=back + + +=head1 Input sources + +GNU B<parallel> reads input from input sources. These can be files, the +command line, and stdin (standard input or a pipe). + +=head2 A single input source + +Input can be read from the command line: + + parallel echo ::: A B C + +Output (the order may be different because the jobs are run in +parallel): + + A + B + C + +The input source can be a file: + + parallel -a abc-file echo + +Output: Same as above. + +STDIN (standard input) can be the input source: + + cat abc-file | parallel echo + +Output: Same as above. + + +=head2 Multiple input sources + +GNU B<parallel> can take multiple input sources given on the command +line. GNU B<parallel> then generates all combinations of the input +sources: + + parallel echo ::: A B C ::: D E F + +Output (the order may be different): + + A D + A E + A F + B D + B E + B F + C D + C E + C F + +The input sources can be files: + + parallel -a abc-file -a def-file echo + +Output: Same as above. + +STDIN (standard input) can be one of the input sources using B<->: + + cat abc-file | parallel -a - -a def-file echo + +Output: Same as above. + +Instead of B<-a> files can be given after B<::::>: + + cat abc-file | parallel echo :::: - def-file + +Output: Same as above. + +::: and :::: can be mixed: + + parallel echo ::: A B C :::: def-file + +Output: Same as above. + +=head3 Linking arguments from input sources + +With B<--link> you can link the input sources and get one argument +from each input source: + + parallel --link echo ::: A B C ::: D E F + +Output (the order may be different): + + A D + B E + C F + +If one of the input sources is too short, its values will wrap: + + parallel --link echo ::: A B C D E ::: F G + +Output (the order may be different): + + A F + B G + C F + D G + E F + +For more flexible linking you can use B<:::+> and B<::::+>. They work +like B<:::> and B<::::> except they link the previous input source to +this input source. + +This will link ABC to GHI: + + parallel echo :::: abc-file :::+ G H I :::: def-file + +Output (the order may be different): + + A G D + A G E + A G F + B H D + B H E + B H F + C I D + C I E + C I F + +This will link GHI to DEF: + + parallel echo :::: abc-file ::: G H I ::::+ def-file + +Output (the order may be different): + + A G D + A H E + A I F + B G D + B H E + B I F + C G D + C H E + C I F + +If one of the input sources is too short when using B<:::+> or +B<::::+>, the rest will be ignored: + + parallel echo ::: A B C D E :::+ F G + +Output (the order may be different): + + A F + B G + + +=head2 Changing the argument separator. + +GNU B<parallel> can use other separators than B<:::> or B<::::>. This is +typically useful if B<:::> or B<::::> is used in the command to run: + + parallel --arg-sep ,, echo ,, A B C :::: def-file + +Output (the order may be different): + + A D + A E + A F + B D + B E + B F + C D + C E + C F + +Changing the argument file separator: + + parallel --arg-file-sep // echo ::: A B C // def-file + +Output: Same as above. + + +=head2 Changing the argument delimiter + +GNU B<parallel> will normally treat a full line as a single argument: It +uses B<\n> as argument delimiter. This can be changed with B<-d>: + + parallel -d _ echo :::: abc_-file + +Output (the order may be different): + + A + B + C + +NUL can be given as B<\0>: + + parallel -d '\0' echo :::: abc0-file + +Output: Same as above. + +A shorthand for B<-d '\0'> is B<-0> (this will often be used to read files +from B<find ... -print0>): + + parallel -0 echo :::: abc0-file + +Output: Same as above. + +=head2 End-of-file value for input source + +GNU B<parallel> can stop reading when it encounters a certain value: + + parallel -E stop echo ::: A B stop C D + +Output: + + A + B + +=head2 Skipping empty lines + +Using B<--no-run-if-empty> GNU B<parallel> will skip empty lines. + + (echo 1; echo; echo 2) | parallel --no-run-if-empty echo + +Output: + + 1 + 2 + + +=head1 Building the command line + +=head2 No command means arguments are commands + +If no command is given after parallel the arguments themselves are +treated as commands: + + parallel ::: ls 'echo foo' pwd + +Output (the order may be different): + + [list of files in current dir] + foo + [/path/to/current/working/dir] + +The command can be a script, a binary or a Bash function if the function is +exported using B<export -f>: + + # Only works in Bash + my_func() { + echo in my_func $1 + } + export -f my_func + parallel my_func ::: 1 2 3 + +Output (the order may be different): + + in my_func 1 + in my_func 2 + in my_func 3 + +=head2 Replacement strings + +=head3 The 7 predefined replacement strings + +GNU B<parallel> has several replacement strings. If no replacement +strings are used the default is to append B<{}>: + + parallel echo ::: A/B.C + +Output: + + A/B.C + +The default replacement string is B<{}>: + + parallel echo {} ::: A/B.C + +Output: + + A/B.C + +The replacement string B<{.}> removes the extension: + + parallel echo {.} ::: A/B.C + +Output: + + A/B + +The replacement string B<{/}> removes the path: + + parallel echo {/} ::: A/B.C + +Output: + + B.C + +The replacement string B<{//}> keeps only the path: + + parallel echo {//} ::: A/B.C + +Output: + + A + +The replacement string B<{/.}> removes the path and the extension: + + parallel echo {/.} ::: A/B.C + +Output: + + B + +The replacement string B<{#}> gives the job number: + + parallel echo {#} ::: A B C + +Output (the order may be different): + + 1 + 2 + 3 + +The replacement string B<{%}> gives the job slot number (between 1 and +number of jobs to run in parallel): + + parallel -j 2 echo {%} ::: A B C + +Output (the order may be different and 1 and 2 may be swapped): + + 1 + 2 + 1 + +=head3 Changing the replacement strings + +The replacement string B<{}> can be changed with B<-I>: + + parallel -I ,, echo ,, ::: A/B.C + +Output: + + A/B.C + +The replacement string B<{.}> can be changed with B<--extensionreplace>: + + parallel --extensionreplace ,, echo ,, ::: A/B.C + +Output: + + A/B + +The replacement string B<{/}> can be replaced with B<--basenamereplace>: + + parallel --basenamereplace ,, echo ,, ::: A/B.C + +Output: + + B.C + +The replacement string B<{//}> can be changed with B<--dirnamereplace>: + + parallel --dirnamereplace ,, echo ,, ::: A/B.C + +Output: + + A + +The replacement string B<{/.}> can be changed with B<--basenameextensionreplace>: + + parallel --basenameextensionreplace ,, echo ,, ::: A/B.C + +Output: + + B + +The replacement string B<{#}> can be changed with B<--seqreplace>: + + parallel --seqreplace ,, echo ,, ::: A B C + +Output (the order may be different): + + 1 + 2 + 3 + +The replacement string B<{%}> can be changed with B<--slotreplace>: + + parallel -j2 --slotreplace ,, echo ,, ::: A B C + +Output (the order may be different and 1 and 2 may be swapped): + + 1 + 2 + 1 + +=head3 Perl expression replacement string + +When predefined replacement strings are not flexible enough a perl +expression can be used instead. One example is to remove two +extensions: foo.tar.gz becomes foo + + parallel echo '{= s:\.[^.]+$::;s:\.[^.]+$::; =}' ::: foo.tar.gz + +Output: + + foo + +In B<{= =}> you can access all of GNU B<parallel>'s internal functions +and variables. A few are worth mentioning. + +B<total_jobs()> returns the total number of jobs: + + parallel echo Job {#} of {= '$_=total_jobs()' =} ::: {1..5} + +Output: + + Job 1 of 5 + Job 2 of 5 + Job 3 of 5 + Job 4 of 5 + Job 5 of 5 + +B<Q(...)> shell quotes the string: + + parallel echo {} shell quoted is {= '$_=Q($_)' =} ::: '*/!#$' + +Output: + + */!#$ shell quoted is \*/\!\#\$ + +B<skip()> skips the job: + + parallel echo {= 'if($_==3) { skip() }' =} ::: {1..5} + +Output: + + 1 + 2 + 4 + 5 + +B<@arg> contains the input source variables: + + parallel echo {= 'if($arg[1]==$arg[2]) { skip() }' =} \ + ::: {1..3} ::: {1..3} + +Output: + + 1 2 + 1 3 + 2 1 + 2 3 + 3 1 + 3 2 + +If the strings B<{=> and B<=}> cause problems they can be replaced with B<--parens>: + + parallel --parens ,,,, echo ',, s:\.[^.]+$::;s:\.[^.]+$::; ,,' \ + ::: foo.tar.gz + +Output: + + foo + +To define a shorthand replacement string use B<--rpl>: + + parallel --rpl '.. s:\.[^.]+$::;s:\.[^.]+$::;' echo '..' \ + ::: foo.tar.gz + +Output: Same as above. + +If the shorthand starts with B<{> it can be used as a positional +replacement string, too: + + parallel --rpl '{..} s:\.[^.]+$::;s:\.[^.]+$::;' echo '{..}' + ::: foo.tar.gz + +Output: Same as above. + +If the shorthand contains matching parenthesis the replacement string +becomes a dynamic replacement string and the string in the parenthesis +can be accessed as $$1. If there are multiple matching parenthesis, +the matched strings can be accessed using $$2, $$3 and so on. + +You can think of this as giving arguments to the replacement +string. Here we give the argument B<.tar.gz> to the replacement string +B<{%I<string>}> which removes I<string>: + + parallel --rpl '{%(.+?)} s/$$1$//;' echo {%.tar.gz}.zip ::: foo.tar.gz + +Output: + + foo.zip + +Here we give the two arguments B<tar.gz> and B<zip> to the replacement +string B<{/I<string1>/I<string2>}> which replaces I<string1> with +I<string2>: + + parallel --rpl '{/(.+?)/(.*?)} s/$$1/$$2/;' echo {/tar.gz/zip} \ + ::: foo.tar.gz + +Output: + + foo.zip + + +GNU B<parallel>'s 7 replacement strings are implemented as this: + + --rpl '{} ' + --rpl '{#} $_=$job->seq()' + --rpl '{%} $_=$job->slot()' + --rpl '{/} s:.*/::' + --rpl '{//} $Global::use{"File::Basename"} ||= + eval "use File::Basename; 1;"; $_ = dirname($_);' + --rpl '{/.} s:.*/::; s:\.[^/.]+$::;' + --rpl '{.} s:\.[^/.]+$::' + +=head3 Positional replacement strings + +With multiple input sources the argument from the individual input +sources can be accessed with S<< B<{>numberB<}> >>: + + parallel echo {1} and {2} ::: A B ::: C D + +Output (the order may be different): + + A and C + A and D + B and C + B and D + +The positional replacement strings can also be modified using B</>, B<//>, B</.>, and B<.>: + + parallel echo /={1/} //={1//} /.={1/.} .={1.} ::: A/B.C D/E.F + +Output (the order may be different): + + /=B.C //=A /.=B .=A/B + /=E.F //=D /.=E .=D/E + +If a position is negative, it will refer to the input source counted +from behind: + + parallel echo 1={1} 2={2} 3={3} -1={-1} -2={-2} -3={-3} \ + ::: A B ::: C D ::: E F + +Output (the order may be different): + + 1=A 2=C 3=E -1=E -2=C -3=A + 1=A 2=C 3=F -1=F -2=C -3=A + 1=A 2=D 3=E -1=E -2=D -3=A + 1=A 2=D 3=F -1=F -2=D -3=A + 1=B 2=C 3=E -1=E -2=C -3=B + 1=B 2=C 3=F -1=F -2=C -3=B + 1=B 2=D 3=E -1=E -2=D -3=B + 1=B 2=D 3=F -1=F -2=D -3=B + + +=head3 Positional perl expression replacement string + +To use a perl expression as a positional replacement string simply +prepend the perl expression with number and space: + + parallel echo '{=2 s:\.[^.]+$::;s:\.[^.]+$::; =} {1}' \ + ::: bar ::: foo.tar.gz + +Output: + + foo bar + +If a shorthand defined using B<--rpl> starts with B<{> it can be used as +a positional replacement string, too: + + parallel --rpl '{..} s:\.[^.]+$::;s:\.[^.]+$::;' echo '{2..} {1}' \ + ::: bar ::: foo.tar.gz + +Output: Same as above. + + +=head3 Input from columns + +The columns in a file can be bound to positional replacement strings +using B<--colsep>. Here the columns are separated by TAB (\t): + + parallel --colsep '\t' echo 1={1} 2={2} :::: tsv-file.tsv + +Output (the order may be different): + + 1=f1 2=f2 + 1=A 2=B + 1=C 2=D + +=head3 Header defined replacement strings + +With B<--header> GNU B<parallel> will use the first value of the input +source as the name of the replacement string. Only the non-modified +version B<{}> is supported: + + parallel --header : echo f1={f1} f2={f2} ::: f1 A B ::: f2 C D + +Output (the order may be different): + + f1=A f2=C + f1=A f2=D + f1=B f2=C + f1=B f2=D + +It is useful with B<--colsep> for processing files with TAB separated values: + + parallel --header : --colsep '\t' echo f1={f1} f2={f2} \ + :::: tsv-file.tsv + +Output (the order may be different): + + f1=A f2=B + f1=C f2=D + +=head3 More pre-defined replacement strings with --plus + +B<--plus> adds the replacement strings B<{+/} {+.} {+..} {+...} {..} {...} +{/..} {/...} {##}>. The idea being that B<{+foo}> matches the opposite of B<{foo}> +and B<{}> = B<{+/}>/B<{/}> = B<{.}>.B<{+.}> = B<{+/}>/B<{/.}>.B<{+.}> = B<{..}>.B<{+..}> = +B<{+/}>/B<{/..}>.B<{+..}> = B<{...}>.B<{+...}> = B<{+/}>/B<{/...}>.B<{+...}>. + + parallel --plus echo {} ::: dir/sub/file.ex1.ex2.ex3 + parallel --plus echo {+/}/{/} ::: dir/sub/file.ex1.ex2.ex3 + parallel --plus echo {.}.{+.} ::: dir/sub/file.ex1.ex2.ex3 + parallel --plus echo {+/}/{/.}.{+.} ::: dir/sub/file.ex1.ex2.ex3 + parallel --plus echo {..}.{+..} ::: dir/sub/file.ex1.ex2.ex3 + parallel --plus echo {+/}/{/..}.{+..} ::: dir/sub/file.ex1.ex2.ex3 + parallel --plus echo {...}.{+...} ::: dir/sub/file.ex1.ex2.ex3 + parallel --plus echo {+/}/{/...}.{+...} ::: dir/sub/file.ex1.ex2.ex3 + +Output: + + dir/sub/file.ex1.ex2.ex3 + +B<{##}> is simply the number of jobs: + + parallel --plus echo Job {#} of {##} ::: {1..5} + +Output: + + Job 1 of 5 + Job 2 of 5 + Job 3 of 5 + Job 4 of 5 + Job 5 of 5 + +=head3 Dynamic replacement strings with --plus + +B<--plus> also defines these dynamic replacement strings: + +=over 19 + +=item B<{:-I<string>}> + +Default value is I<string> if the argument is empty. + +=item B<{:I<number>}> + +Substring from I<number> till end of string. + +=item B<{:I<number1>:I<number2>}> + +Substring from I<number1> to I<number2>. + +=item B<{#I<string>}> + +If the argument starts with I<string>, remove it. + +=item B<{%I<string>}> + +If the argument ends with I<string>, remove it. + +=item B<{/I<string1>/I<string2>}> + +Replace I<string1> with I<string2>. + +=item B<{^I<string>}> + +If the argument starts with I<string>, upper case it. I<string> must +be a single letter. + +=item B<{^^I<string>}> + +If the argument contains I<string>, upper case it. I<string> must be a +single letter. + +=item B<{,I<string>}> + +If the argument starts with I<string>, lower case it. I<string> must +be a single letter. + +=item B<{,,I<string>}> + +If the argument contains I<string>, lower case it. I<string> must be a +single letter. + +=back + +They are inspired from B<Bash>: + + unset myvar + echo ${myvar:-myval} + parallel --plus echo {:-myval} ::: "$myvar" + + myvar=abcAaAdef + echo ${myvar:2} + parallel --plus echo {:2} ::: "$myvar" + + echo ${myvar:2:3} + parallel --plus echo {:2:3} ::: "$myvar" + + echo ${myvar#bc} + parallel --plus echo {#bc} ::: "$myvar" + echo ${myvar#abc} + parallel --plus echo {#abc} ::: "$myvar" + + echo ${myvar%de} + parallel --plus echo {%de} ::: "$myvar" + echo ${myvar%def} + parallel --plus echo {%def} ::: "$myvar" + + echo ${myvar/def/ghi} + parallel --plus echo {/def/ghi} ::: "$myvar" + + echo ${myvar^a} + parallel --plus echo {^a} ::: "$myvar" + echo ${myvar^^a} + parallel --plus echo {^^a} ::: "$myvar" + + myvar=AbcAaAdef + echo ${myvar,A} + parallel --plus echo '{,A}' ::: "$myvar" + echo ${myvar,,A} + parallel --plus echo '{,,A}' ::: "$myvar" + +Output: + + myval + myval + cAaAdef + cAaAdef + cAa + cAa + abcAaAdef + abcAaAdef + AaAdef + AaAdef + abcAaAdef + abcAaAdef + abcAaA + abcAaA + abcAaAghi + abcAaAghi + AbcAaAdef + AbcAaAdef + AbcAAAdef + AbcAAAdef + abcAaAdef + abcAaAdef + abcaaadef + abcaaadef + + +=head2 More than one argument + +With B<--xargs> GNU B<parallel> will fit as many arguments as possible on a +single line: + + cat num30000 | parallel --xargs echo | wc -l + +Output (if you run this under Bash on GNU/Linux): + + 2 + +The 30000 arguments fitted on 2 lines. + +The maximal length of a single line can be set with B<-s>. With a maximal +line length of 10000 chars 17 commands will be run: + + cat num30000 | parallel --xargs -s 10000 echo | wc -l + +Output: + + 17 + +For better parallelism GNU B<parallel> can distribute the arguments +between all the parallel jobs when end of file is met. + +Below GNU B<parallel> reads the last argument when generating the second +job. When GNU B<parallel> reads the last argument, it spreads all the +arguments for the second job over 4 jobs instead, as 4 parallel jobs +are requested. + +The first job will be the same as the B<--xargs> example above, but the +second job will be split into 4 evenly sized jobs, resulting in a +total of 5 jobs: + + cat num30000 | parallel --jobs 4 -m echo | wc -l + +Output (if you run this under Bash on GNU/Linux): + + 5 + +This is even more visible when running 4 jobs with 10 arguments. The +10 arguments are being spread over 4 jobs: + + parallel --jobs 4 -m echo ::: 1 2 3 4 5 6 7 8 9 10 + +Output: + + 1 2 3 + 4 5 6 + 7 8 9 + 10 + +A replacement string can be part of a word. B<-m> will not repeat the context: + + parallel --jobs 4 -m echo pre-{}-post ::: A B C D E F G + +Output (the order may be different): + + pre-A B-post + pre-C D-post + pre-E F-post + pre-G-post + +To repeat the context use B<-X> which otherwise works like B<-m>: + + parallel --jobs 4 -X echo pre-{}-post ::: A B C D E F G + +Output (the order may be different): + + pre-A-post pre-B-post + pre-C-post pre-D-post + pre-E-post pre-F-post + pre-G-post + +To limit the number of arguments use B<-N>: + + parallel -N3 echo ::: A B C D E F G H + +Output (the order may be different): + + A B C + D E F + G H + +B<-N> also sets the positional replacement strings: + + parallel -N3 echo 1={1} 2={2} 3={3} ::: A B C D E F G H + +Output (the order may be different): + + 1=A 2=B 3=C + 1=D 2=E 3=F + 1=G 2=H 3= + +B<-N0> reads 1 argument but inserts none: + + parallel -N0 echo foo ::: 1 2 3 + +Output: + + foo + foo + foo + +=head2 Quoting + +Command lines that contain special characters may need to be protected from the shell. + +The B<perl> program B<print "@ARGV\n"> basically works like B<echo>. + + perl -e 'print "@ARGV\n"' A + +Output: + + A + +To run that in parallel the command needs to be quoted: + + parallel perl -e 'print "@ARGV\n"' ::: This wont work + +Output: + + [Nothing] + +To quote the command use B<-q>: + + parallel -q perl -e 'print "@ARGV\n"' ::: This works + +Output (the order may be different): + + This + works + +Or you can quote the critical part using B<\'>: + + parallel perl -e \''print "@ARGV\n"'\' ::: This works, too + +Output (the order may be different): + + This + works, + too + +GNU B<parallel> can also \-quote full lines. Simply run this: + + parallel --shellquote + Warning: Input is read from the terminal. You either know what you + Warning: are doing (in which case: YOU ARE AWESOME!) or you forgot + Warning: ::: or :::: or to pipe data into parallel. If so + Warning: consider going through the tutorial: man parallel_tutorial + Warning: Press CTRL-D to exit. + perl -e 'print "@ARGV\n"' + [CTRL-D] + +Output: + + perl\ -e\ \'print\ \"@ARGV\\n\"\' + +This can then be used as the command: + + parallel perl\ -e\ \'print\ \"@ARGV\\n\"\' ::: This also works + +Output (the order may be different): + + This + also + works + + +=head2 Trimming space + +Space can be trimmed on the arguments using B<--trim>: + + parallel --trim r echo pre-{}-post ::: ' A ' + +Output: + + pre- A-post + +To trim on the left side: + + parallel --trim l echo pre-{}-post ::: ' A ' + +Output: + + pre-A -post + +To trim on the both sides: + + parallel --trim lr echo pre-{}-post ::: ' A ' + +Output: + + pre-A-post + + +=head2 Respecting the shell + +This tutorial uses Bash as the shell. GNU B<parallel> respects which +shell you are using, so in B<zsh> you can do: + + parallel echo \={} ::: zsh bash ls + +Output: + + /usr/bin/zsh + /bin/bash + /bin/ls + +In B<csh> you can do: + + parallel 'set a="{}"; if( { test -d "$a" } ) echo "$a is a dir"' ::: * + +Output: + + [somedir] is a dir + +This also becomes useful if you use GNU B<parallel> in a shell script: +GNU B<parallel> will use the same shell as the shell script. + + +=head1 Controlling the output + +The output can prefixed with the argument: + + parallel --tag echo foo-{} ::: A B C + +Output (the order may be different): + + A foo-A + B foo-B + C foo-C + +To prefix it with another string use B<--tagstring>: + + parallel --tagstring {}-bar echo foo-{} ::: A B C + +Output (the order may be different): + + A-bar foo-A + B-bar foo-B + C-bar foo-C + +To see what commands will be run without running them use B<--dryrun>: + + parallel --dryrun echo {} ::: A B C + +Output (the order may be different): + + echo A + echo B + echo C + +To print the command before running them use B<--verbose>: + + parallel --verbose echo {} ::: A B C + +Output (the order may be different): + + echo A + echo B + A + echo C + B + C + +GNU B<parallel> will postpone the output until the command completes: + + parallel -j2 'printf "%s-start\n%s" {} {}; + sleep {};printf "%s\n" -middle;echo {}-end' ::: 4 2 1 + +Output: + + 2-start + 2-middle + 2-end + 1-start + 1-middle + 1-end + 4-start + 4-middle + 4-end + +To get the output immediately use B<--ungroup>: + + parallel -j2 --ungroup 'printf "%s-start\n%s" {} {}; + sleep {};printf "%s\n" -middle;echo {}-end' ::: 4 2 1 + +Output: + + 4-start + 42-start + 2-middle + 2-end + 1-start + 1-middle + 1-end + -middle + 4-end + +B<--ungroup> is fast, but can cause half a line from one job to be mixed +with half a line of another job. That has happened in the second line, +where the line '4-middle' is mixed with '2-start'. + +To avoid this use B<--linebuffer>: + + parallel -j2 --linebuffer 'printf "%s-start\n%s" {} {}; + sleep {};printf "%s\n" -middle;echo {}-end' ::: 4 2 1 + +Output: + + 4-start + 2-start + 2-middle + 2-end + 1-start + 1-middle + 1-end + 4-middle + 4-end + +To force the output in the same order as the arguments use B<--keep-order>/B<-k>: + + parallel -j2 -k 'printf "%s-start\n%s" {} {}; + sleep {};printf "%s\n" -middle;echo {}-end' ::: 4 2 1 + +Output: + + 4-start + 4-middle + 4-end + 2-start + 2-middle + 2-end + 1-start + 1-middle + 1-end + + +=head2 Saving output into files + +GNU B<parallel> can save the output of each job into files: + + parallel --files echo ::: A B C + +Output will be similar to this: + + /tmp/pAh6uWuQCg.par + /tmp/opjhZCzAX4.par + /tmp/W0AT_Rph2o.par + +By default GNU B<parallel> will cache the output in files in B</tmp>. This +can be changed by setting B<$TMPDIR> or B<--tmpdir>: + + parallel --tmpdir /var/tmp --files echo ::: A B C + +Output will be similar to this: + + /var/tmp/N_vk7phQRc.par + /var/tmp/7zA4Ccf3wZ.par + /var/tmp/LIuKgF_2LP.par + +Or: + + TMPDIR=/var/tmp parallel --files echo ::: A B C + +Output: Same as above. + +The output files can be saved in a structured way using B<--results>: + + parallel --results outdir echo ::: A B C + +Output: + + A + B + C + +These files were also generated containing the standard output +(stdout), standard error (stderr), and the sequence number (seq): + + outdir/1/A/seq + outdir/1/A/stderr + outdir/1/A/stdout + outdir/1/B/seq + outdir/1/B/stderr + outdir/1/B/stdout + outdir/1/C/seq + outdir/1/C/stderr + outdir/1/C/stdout + +B<--header :> will take the first value as name and use that in the +directory structure. This is useful if you are using multiple input +sources: + + parallel --header : --results outdir echo ::: f1 A B ::: f2 C D + +Generated files: + + outdir/f1/A/f2/C/seq + outdir/f1/A/f2/C/stderr + outdir/f1/A/f2/C/stdout + outdir/f1/A/f2/D/seq + outdir/f1/A/f2/D/stderr + outdir/f1/A/f2/D/stdout + outdir/f1/B/f2/C/seq + outdir/f1/B/f2/C/stderr + outdir/f1/B/f2/C/stdout + outdir/f1/B/f2/D/seq + outdir/f1/B/f2/D/stderr + outdir/f1/B/f2/D/stdout + +The directories are named after the variables and their values. + +=head1 Controlling the execution + +=head2 Number of simultaneous jobs + +The number of concurrent jobs is given with B<--jobs>/B<-j>: + + /usr/bin/time parallel -N0 -j64 sleep 1 :::: num128 + +With 64 jobs in parallel the 128 B<sleep>s will take 2-8 seconds to run - +depending on how fast your machine is. + +By default B<--jobs> is the same as the number of CPU cores. So this: + + /usr/bin/time parallel -N0 sleep 1 :::: num128 + +should take twice the time of running 2 jobs per CPU core: + + /usr/bin/time parallel -N0 --jobs 200% sleep 1 :::: num128 + +B<--jobs 0> will run as many jobs in parallel as possible: + + /usr/bin/time parallel -N0 --jobs 0 sleep 1 :::: num128 + +which should take 1-7 seconds depending on how fast your machine is. + +B<--jobs> can read from a file which is re-read when a job finishes: + + echo 50% > my_jobs + /usr/bin/time parallel -N0 --jobs my_jobs sleep 1 :::: num128 & + sleep 1 + echo 0 > my_jobs + wait + +The first second only 50% of the CPU cores will run a job. Then B<0> is +put into B<my_jobs> and then the rest of the jobs will be started in +parallel. + +Instead of basing the percentage on the number of CPU cores +GNU B<parallel> can base it on the number of CPUs: + + parallel --use-cpus-instead-of-cores -N0 sleep 1 :::: num8 + +=head2 Shuffle job order + +If you have many jobs (e.g. by multiple combinations of input +sources), it can be handy to shuffle the jobs, so you get different +values run. Use B<--shuf> for that: + + parallel --shuf echo ::: 1 2 3 ::: a b c ::: A B C + +Output: + + All combinations but different order for each run. + +=head2 Interactivity + +GNU B<parallel> can ask the user if a command should be run using B<--interactive>: + + parallel --interactive echo ::: 1 2 3 + +Output: + + echo 1 ?...y + echo 2 ?...n + 1 + echo 3 ?...y + 3 + +GNU B<parallel> can be used to put arguments on the command line for an +interactive command such as B<emacs> to edit one file at a time: + + parallel --tty emacs ::: 1 2 3 + +Or give multiple argument in one go to open multiple files: + + parallel -X --tty vi ::: 1 2 3 + +=head2 A terminal for every job + +Using B<--tmux> GNU B<parallel> can start a terminal for every job run: + + seq 10 20 | parallel --tmux 'echo start {}; sleep {}; echo done {}' + +This will tell you to run something similar to: + + tmux -S /tmp/tmsrPrO0 attach + +Using normal B<tmux> keystrokes (CTRL-b n or CTRL-b p) you can cycle +between windows of the running jobs. When a job is finished it will +pause for 10 seconds before closing the window. + +=head2 Timing + +Some jobs do heavy I/O when they start. To avoid a thundering herd GNU +B<parallel> can delay starting new jobs. B<--delay> I<X> will make +sure there is at least I<X> seconds between each start: + + parallel --delay 2.5 echo Starting {}\;date ::: 1 2 3 + +Output: + + Starting 1 + Thu Aug 15 16:24:33 CEST 2013 + Starting 2 + Thu Aug 15 16:24:35 CEST 2013 + Starting 3 + Thu Aug 15 16:24:38 CEST 2013 + + +If jobs taking more than a certain amount of time are known to fail, +they can be stopped with B<--timeout>. The accuracy of B<--timeout> is +2 seconds: + + parallel --timeout 4.1 sleep {}\; echo {} ::: 2 4 6 8 + +Output: + + 2 + 4 + +GNU B<parallel> can compute the median runtime for jobs and kill those +that take more than 200% of the median runtime: + + parallel --timeout 200% sleep {}\; echo {} ::: 2.1 2.2 3 7 2.3 + +Output: + + 2.1 + 2.2 + 3 + 2.3 + +=head2 Progress information + +Based on the runtime of completed jobs GNU B<parallel> can estimate the +total runtime: + + parallel --eta sleep ::: 1 3 2 2 1 3 3 2 1 + +Output: + + Computers / CPU cores / Max jobs to run + 1:local / 2 / 2 + + Computer:jobs running/jobs completed/%of started jobs/ + Average seconds to complete + ETA: 2s 0left 1.11avg local:0/9/100%/1.1s + +GNU B<parallel> can give progress information with B<--progress>: + + parallel --progress sleep ::: 1 3 2 2 1 3 3 2 1 + +Output: + + Computers / CPU cores / Max jobs to run + 1:local / 2 / 2 + + Computer:jobs running/jobs completed/%of started jobs/ + Average seconds to complete + local:0/9/100%/1.1s + +A progress bar can be shown with B<--bar>: + + parallel --bar sleep ::: 1 3 2 2 1 3 3 2 1 + +And a graphic bar can be shown with B<--bar> and B<zenity>: + + seq 1000 | parallel -j10 --bar '(echo -n {};sleep 0.1)' \ + 2> >(perl -pe 'BEGIN{$/="\r";$|=1};s/\r/\n/g' | + zenity --progress --auto-kill --auto-close) + +A logfile of the jobs completed so far can be generated with B<--joblog>: + + parallel --joblog /tmp/log exit ::: 1 2 3 0 + cat /tmp/log + +Output: + + Seq Host Starttime Runtime Send Receive Exitval Signal Command + 1 : 1376577364.974 0.008 0 0 1 0 exit 1 + 2 : 1376577364.982 0.013 0 0 2 0 exit 2 + 3 : 1376577364.990 0.013 0 0 3 0 exit 3 + 4 : 1376577365.003 0.003 0 0 0 0 exit 0 + +The log contains the job sequence, which host the job was run on, the +start time and run time, how much data was transferred, the exit +value, the signal that killed the job, and finally the command being +run. + +With a joblog GNU B<parallel> can be stopped and later pickup where it +left off. It it important that the input of the completed jobs is +unchanged. + + parallel --joblog /tmp/log exit ::: 1 2 3 0 + cat /tmp/log + parallel --resume --joblog /tmp/log exit ::: 1 2 3 0 0 0 + cat /tmp/log + +Output: + + Seq Host Starttime Runtime Send Receive Exitval Signal Command + 1 : 1376580069.544 0.008 0 0 1 0 exit 1 + 2 : 1376580069.552 0.009 0 0 2 0 exit 2 + 3 : 1376580069.560 0.012 0 0 3 0 exit 3 + 4 : 1376580069.571 0.005 0 0 0 0 exit 0 + + Seq Host Starttime Runtime Send Receive Exitval Signal Command + 1 : 1376580069.544 0.008 0 0 1 0 exit 1 + 2 : 1376580069.552 0.009 0 0 2 0 exit 2 + 3 : 1376580069.560 0.012 0 0 3 0 exit 3 + 4 : 1376580069.571 0.005 0 0 0 0 exit 0 + 5 : 1376580070.028 0.009 0 0 0 0 exit 0 + 6 : 1376580070.038 0.007 0 0 0 0 exit 0 + +Note how the start time of the last 2 jobs is clearly different from the second run. + +With B<--resume-failed> GNU B<parallel> will re-run the jobs that failed: + + parallel --resume-failed --joblog /tmp/log exit ::: 1 2 3 0 0 0 + cat /tmp/log + +Output: + + Seq Host Starttime Runtime Send Receive Exitval Signal Command + 1 : 1376580069.544 0.008 0 0 1 0 exit 1 + 2 : 1376580069.552 0.009 0 0 2 0 exit 2 + 3 : 1376580069.560 0.012 0 0 3 0 exit 3 + 4 : 1376580069.571 0.005 0 0 0 0 exit 0 + 5 : 1376580070.028 0.009 0 0 0 0 exit 0 + 6 : 1376580070.038 0.007 0 0 0 0 exit 0 + 1 : 1376580154.433 0.010 0 0 1 0 exit 1 + 2 : 1376580154.444 0.022 0 0 2 0 exit 2 + 3 : 1376580154.466 0.005 0 0 3 0 exit 3 + +Note how seq 1 2 3 have been repeated because they had exit value +different from 0. + +B<--retry-failed> does almost the same as B<--resume-failed>. Where +B<--resume-failed> reads the commands from the command line (and +ignores the commands in the joblog), B<--retry-failed> ignores the +command line and reruns the commands mentioned in the joblog. + + parallel --retry-failed --joblog /tmp/log + cat /tmp/log + +Output: + + Seq Host Starttime Runtime Send Receive Exitval Signal Command + 1 : 1376580069.544 0.008 0 0 1 0 exit 1 + 2 : 1376580069.552 0.009 0 0 2 0 exit 2 + 3 : 1376580069.560 0.012 0 0 3 0 exit 3 + 4 : 1376580069.571 0.005 0 0 0 0 exit 0 + 5 : 1376580070.028 0.009 0 0 0 0 exit 0 + 6 : 1376580070.038 0.007 0 0 0 0 exit 0 + 1 : 1376580154.433 0.010 0 0 1 0 exit 1 + 2 : 1376580154.444 0.022 0 0 2 0 exit 2 + 3 : 1376580154.466 0.005 0 0 3 0 exit 3 + 1 : 1376580164.633 0.010 0 0 1 0 exit 1 + 2 : 1376580164.644 0.022 0 0 2 0 exit 2 + 3 : 1376580164.666 0.005 0 0 3 0 exit 3 + + +=head2 Termination + +=head3 Unconditional termination + +By default GNU B<parallel> will wait for all jobs to finish before exiting. + +If you send GNU B<parallel> the B<TERM> signal, GNU B<parallel> will +stop spawning new jobs and wait for the remaining jobs to finish. If +you send GNU B<parallel> the B<TERM> signal again, GNU B<parallel> +will kill all running jobs and exit. + +=head3 Termination dependent on job status + +For certain jobs there is no need to continue if one of the jobs fails +and has an exit code different from 0. GNU B<parallel> will stop spawning new jobs +with B<--halt soon,fail=1>: + + parallel -j2 --halt soon,fail=1 echo {}\; exit {} ::: 0 0 1 2 3 + +Output: + + 0 + 0 + 1 + parallel: This job failed: + echo 1; exit 1 + parallel: Starting no more jobs. Waiting for 1 jobs to finish. + 2 + +With B<--halt now,fail=1> the running jobs will be killed immediately: + + parallel -j2 --halt now,fail=1 echo {}\; exit {} ::: 0 0 1 2 3 + +Output: + + 0 + 0 + 1 + parallel: This job failed: + echo 1; exit 1 + +If B<--halt> is given a percentage this percentage of the jobs must fail +before GNU B<parallel> stops spawning more jobs: + + parallel -j2 --halt soon,fail=20% echo {}\; exit {} \ + ::: 0 1 2 3 4 5 6 7 8 9 + +Output: + + 0 + 1 + parallel: This job failed: + echo 1; exit 1 + 2 + parallel: This job failed: + echo 2; exit 2 + parallel: Starting no more jobs. Waiting for 1 jobs to finish. + 3 + parallel: This job failed: + echo 3; exit 3 + +If you are looking for success instead of failures, you can use +B<success>. This will finish as soon as the first job succeeds: + + parallel -j2 --halt now,success=1 echo {}\; exit {} ::: 1 2 3 0 4 5 6 + +Output: + + 1 + 2 + 3 + 0 + parallel: This job succeeded: + echo 0; exit 0 + +GNU B<parallel> can retry the command with B<--retries>. This is useful if a +command fails for unknown reasons now and then. + + parallel -k --retries 3 \ + 'echo tried {} >>/tmp/runs; echo completed {}; exit {}' ::: 1 2 0 + cat /tmp/runs + +Output: + + completed 1 + completed 2 + completed 0 + + tried 1 + tried 2 + tried 1 + tried 2 + tried 1 + tried 2 + tried 0 + +Note how job 1 and 2 were tried 3 times, but 0 was not retried because it had exit code 0. + +=head3 Termination signals (advanced) + +Using B<--termseq> you can control which signals are sent when killing +children. Normally children will be killed by sending them B<SIGTERM>, +waiting 200 ms, then another B<SIGTERM>, waiting 100 ms, then another +B<SIGTERM>, waiting 50 ms, then a B<SIGKILL>, finally waiting 25 ms +before giving up. It looks like this: + + show_signals() { + perl -e 'for(keys %SIG) { + $SIG{$_} = eval "sub { print \"Got $_\\n\"; }"; + } + while(1){sleep 1}' + } + export -f show_signals + echo | parallel --termseq TERM,200,TERM,100,TERM,50,KILL,25 \ + -u --timeout 1 show_signals + +Output: + + Got TERM + Got TERM + Got TERM + +Or just: + + echo | parallel -u --timeout 1 show_signals + +Output: Same as above. + +You can change this to B<SIGINT>, B<SIGTERM>, B<SIGKILL>: + + echo | parallel --termseq INT,200,TERM,100,KILL,25 \ + -u --timeout 1 show_signals + +Output: + + Got INT + Got TERM + +The B<SIGKILL> does not show because it cannot be caught, and thus the +child dies. + + +=head2 Limiting the resources + +To avoid overloading systems GNU B<parallel> can look at the system load +before starting another job: + + parallel --load 100% echo load is less than {} job per cpu ::: 1 + +Output: + + [when then load is less than the number of cpu cores] + load is less than 1 job per cpu + +GNU B<parallel> can also check if the system is swapping. + + parallel --noswap echo the system is not swapping ::: now + +Output: + + [when then system is not swapping] + the system is not swapping now + +Some jobs need a lot of memory, and should only be started when there +is enough memory free. Using B<--memfree> GNU B<parallel> can check if +there is enough memory free. Additionally, GNU B<parallel> will kill +off the youngest job if the memory free falls below 50% of the +size. The killed job will put back on the queue and retried later. + + parallel --memfree 1G echo will run if more than 1 GB is ::: free + +GNU B<parallel> can run the jobs with a nice value. This will work both +locally and remotely. + + parallel --nice 17 echo this is being run with nice -n ::: 17 + +Output: + + this is being run with nice -n 17 + +=head1 Remote execution + +GNU B<parallel> can run jobs on remote servers. It uses B<ssh> to +communicate with the remote machines. + +=head2 Sshlogin + +The most basic sshlogin is B<-S> I<host>: + + parallel -S $SERVER1 echo running on ::: $SERVER1 + +Output: + + running on [$SERVER1] + +To use a different username prepend the server with I<username@>: + + parallel -S username@$SERVER1 echo running on ::: username@$SERVER1 + +Output: + + running on [username@$SERVER1] + +The special sshlogin B<:> is the local machine: + + parallel -S : echo running on ::: the_local_machine + +Output: + + running on the_local_machine + +If B<ssh> is not in $PATH it can be prepended to $SERVER1: + + parallel -S '/usr/bin/ssh '$SERVER1 echo custom ::: ssh + +Output: + + custom ssh + +The B<ssh> command can also be given using B<--ssh>: + + parallel --ssh /usr/bin/ssh -S $SERVER1 echo custom ::: ssh + +or by setting B<$PARALLEL_SSH>: + + export PARALLEL_SSH=/usr/bin/ssh + parallel -S $SERVER1 echo custom ::: ssh + +Several servers can be given using multiple B<-S>: + + parallel -S $SERVER1 -S $SERVER2 echo ::: running on more hosts + +Output (the order may be different): + + running + on + more + hosts + +Or they can be separated by B<,>: + + parallel -S $SERVER1,$SERVER2 echo ::: running on more hosts + +Output: Same as above. + +Or newline: + + # This gives a \n between $SERVER1 and $SERVER2 + SERVERS="`echo $SERVER1; echo $SERVER2`" + parallel -S "$SERVERS" echo ::: running on more hosts + +They can also be read from a file (replace I<user@> with the user on B<$SERVER2>): + + echo $SERVER1 > nodefile + # Force 4 cores, special ssh-command, username + echo 4//usr/bin/ssh user@$SERVER2 >> nodefile + parallel --sshloginfile nodefile echo ::: running on more hosts + +Output: Same as above. + +Every time a job finished, the B<--sshloginfile> will be re-read, so +it is possible to both add and remove hosts while running. + +The special B<--sshloginfile ..> reads from B<~/.parallel/sshloginfile>. + +To force GNU B<parallel> to treat a server having a given number of CPU +cores prepend the number of core followed by B</> to the sshlogin: + + parallel -S 4/$SERVER1 echo force {} cpus on server ::: 4 + +Output: + + force 4 cpus on server + +Servers can be put into groups by prepending I<@groupname> to the +server and the group can then be selected by appending I<@groupname> to +the argument if using B<--hostgroup>: + + parallel --hostgroup -S @grp1/$SERVER1 -S @grp2/$SERVER2 echo {} \ + ::: run_on_grp1@grp1 run_on_grp2@grp2 + +Output: + + run_on_grp1 + run_on_grp2 + +A host can be in multiple groups by separating the groups with B<+>, and +you can force GNU B<parallel> to limit the groups on which the command +can be run with B<-S> I<@groupname>: + + parallel -S @grp1 -S @grp1+grp2/$SERVER1 -S @grp2/SERVER2 echo {} \ + ::: run_on_grp1 also_grp1 + +Output: + + run_on_grp1 + also_grp1 + +=head2 Transferring files + +GNU B<parallel> can transfer the files to be processed to the remote +host. It does that using rsync. + + echo This is input_file > input_file + parallel -S $SERVER1 --transferfile {} cat ::: input_file + +Output: + + This is input_file + +If the files are processed into another file, the resulting file can be +transferred back: + + echo This is input_file > input_file + parallel -S $SERVER1 --transferfile {} --return {}.out \ + cat {} ">"{}.out ::: input_file + cat input_file.out + +Output: Same as above. + +To remove the input and output file on the remote server use B<--cleanup>: + + echo This is input_file > input_file + parallel -S $SERVER1 --transferfile {} --return {}.out --cleanup \ + cat {} ">"{}.out ::: input_file + cat input_file.out + +Output: Same as above. + +There is a shorthand for B<--transferfile {} --return --cleanup> called B<--trc>: + + echo This is input_file > input_file + parallel -S $SERVER1 --trc {}.out cat {} ">"{}.out ::: input_file + cat input_file.out + +Output: Same as above. + +Some jobs need a common database for all jobs. GNU B<parallel> can +transfer that using B<--basefile> which will transfer the file before the +first job: + + echo common data > common_file + parallel --basefile common_file -S $SERVER1 \ + cat common_file\; echo {} ::: foo + +Output: + + common data + foo + +To remove it from the remote host after the last job use B<--cleanup>. + + +=head2 Working dir + +The default working dir on the remote machines is the login dir. This +can be changed with B<--workdir> I<mydir>. + +Files transferred using B<--transferfile> and B<--return> will be relative +to I<mydir> on remote computers, and the command will be executed in +the dir I<mydir>. + +The special I<mydir> value B<...> will create working dirs under +B<~/.parallel/tmp> on the remote computers. If B<--cleanup> is given +these dirs will be removed. + +The special I<mydir> value B<.> uses the current working dir. If the +current working dir is beneath your home dir, the value B<.> is +treated as the relative path to your home dir. This means that if your +home dir is different on remote computers (e.g. if your login is +different) the relative path will still be relative to your home dir. + + parallel -S $SERVER1 pwd ::: "" + parallel --workdir . -S $SERVER1 pwd ::: "" + parallel --workdir ... -S $SERVER1 pwd ::: "" + +Output: + + [the login dir on $SERVER1] + [current dir relative on $SERVER1] + [a dir in ~/.parallel/tmp/...] + + +=head2 Avoid overloading sshd + +If many jobs are started on the same server, B<sshd> can be +overloaded. GNU B<parallel> can insert a delay between each job run on +the same server: + + parallel -S $SERVER1 --sshdelay 0.2 echo ::: 1 2 3 + +Output (the order may be different): + + 1 + 2 + 3 + +B<sshd> will be less overloaded if using B<--controlmaster>, which will +multiplex ssh connections: + + parallel --controlmaster -S $SERVER1 echo ::: 1 2 3 + +Output: Same as above. + +=head2 Ignore hosts that are down + +In clusters with many hosts a few of them are often down. GNU B<parallel> +can ignore those hosts. In this case the host 173.194.32.46 is down: + + parallel --filter-hosts -S 173.194.32.46,$SERVER1 echo ::: bar + +Output: + + bar + +=head2 Running the same commands on all hosts + +GNU B<parallel> can run the same command on all the hosts: + + parallel --onall -S $SERVER1,$SERVER2 echo ::: foo bar + +Output (the order may be different): + + foo + bar + foo + bar + +Often you will just want to run a single command on all hosts with out +arguments. B<--nonall> is a no argument B<--onall>: + + parallel --nonall -S $SERVER1,$SERVER2 echo foo bar + +Output: + + foo bar + foo bar + +When B<--tag> is used with B<--nonall> and B<--onall> the B<--tagstring> is the host: + + parallel --nonall --tag -S $SERVER1,$SERVER2 echo foo bar + +Output (the order may be different): + + $SERVER1 foo bar + $SERVER2 foo bar + +B<--jobs> sets the number of servers to log in to in parallel. + +=head2 Transferring environment variables and functions + +B<env_parallel> is a shell function that transfers all aliases, +functions, variables, and arrays. You active it by running: + + source `which env_parallel.bash` + +Replace B<bash> with the shell you use. + +Now you can use B<env_parallel> instead of B<parallel> and still have +your environment: + + alias myecho=echo + myvar="Joe's var is" + env_parallel -S $SERVER1 'myecho $myvar' ::: green + +Output: + + Joe's var is green + +The disadvantage is that if your environment is huge B<env_parallel> +will fail. + +When B<env_parallel> fails, you can still use B<--env> to tell GNU +B<parallel> to transfer an environment variable to the remote system. + + MYVAR='foo bar' + export MYVAR + parallel --env MYVAR -S $SERVER1 echo '$MYVAR' ::: baz + +Output: + + foo bar baz + +This works for functions, too, if your shell is Bash: + + # This only works in Bash + my_func() { + echo in my_func $1 + } + export -f my_func + parallel --env my_func -S $SERVER1 my_func ::: baz + +Output: + + in my_func baz + +GNU B<parallel> can copy all user defined variables and functions to +the remote system. It just needs to record which ones to ignore in +B<~/.parallel/ignored_vars>. Do that by running this once: + + parallel --record-env + cat ~/.parallel/ignored_vars + +Output: + + [list of variables to ignore - including $PATH and $HOME] + +Now all other variables and functions defined will be copied when +using B<--env _>. + + # The function is only copied if using Bash + my_func2() { + echo in my_func2 $VAR $1 + } + export -f my_func2 + VAR=foo + export VAR + + parallel --env _ -S $SERVER1 'echo $VAR; my_func2' ::: bar + +Output: + + foo + in my_func2 foo bar + +If you use B<env_parallel> the variables, functions, and aliases do +not even need to be exported to be copied: + + NOT='not exported var' + alias myecho=echo + not_ex() { + myecho in not_exported_func $NOT $1 + } + env_parallel --env _ -S $SERVER1 'echo $NOT; not_ex' ::: bar + +Output: + + not exported var + in not_exported_func not exported var bar + + +=head2 Showing what is actually run + +B<--verbose> will show the command that would be run on the local +machine. + +When using B<--cat>, B<--pipepart>, or when a job is run on a remote +machine, the command is wrapped with helper scripts. B<-vv> shows all +of this. + + parallel -vv --pipepart --block 1M wc :::: num30000 + +Output: + + <num30000 perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; + $left = shift; while($read = sysread(STDIN,$buf, ($left > 131072 + ? 131072 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' + 0 0 0 168894 | (wc) + 30000 30000 168894 + +When the command gets more complex, the output is so hard to read, +that it is only useful for debugging: + + my_func3() { + echo in my_func $1 > $1.out + } + export -f my_func3 + parallel -vv --workdir ... --nice 17 --env _ --trc {}.out \ + -S $SERVER1 my_func3 {} ::: abc-file + +Output will be similar to: + + + ( ssh server -- mkdir -p ./.parallel/tmp/aspire-1928520-1;rsync + --protocol 30 -rlDzR -essh ./abc-file + server:./.parallel/tmp/aspire-1928520-1 );ssh server -- exec perl -e + \''@GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); + eval"@GNU_Parallel";my$eval=decode_base64(join"",@ARGV);eval$eval;'\' + c3lzdGVtKCJta2RpciIsIi1wIiwiLS0iLCIucGFyYWxsZWwvdG1wL2FzcGlyZS0xOTI4N + TsgY2hkaXIgIi5wYXJhbGxlbC90bXAvYXNwaXJlLTE5Mjg1MjAtMSIgfHxwcmludChTVE + BhcmFsbGVsOiBDYW5ub3QgY2hkaXIgdG8gLnBhcmFsbGVsL3RtcC9hc3BpcmUtMTkyODU + iKSAmJiBleGl0IDI1NTskRU5WeyJPTERQV0QifT0iL2hvbWUvdGFuZ2UvcHJpdmF0L3Bh + IjskRU5WeyJQQVJBTExFTF9QSUQifT0iMTkyODUyMCI7JEVOVnsiUEFSQUxMRUxfU0VRI + 0BiYXNoX2Z1bmN0aW9ucz1xdyhteV9mdW5jMyk7IGlmKCRFTlZ7IlNIRUxMIn09fi9jc2 + ByaW50IFNUREVSUiAiQ1NIL1RDU0ggRE8gTk9UIFNVUFBPUlQgbmV3bGluZXMgSU4gVkF + TL0ZVTkNUSU9OUy4gVW5zZXQgQGJhc2hfZnVuY3Rpb25zXG4iOyBleGVjICJmYWxzZSI7 + YXNoZnVuYyA9ICJteV9mdW5jMygpIHsgIGVjaG8gaW4gbXlfZnVuYyBcJDEgPiBcJDEub + Xhwb3J0IC1mIG15X2Z1bmMzID4vZGV2L251bGw7IjtAQVJHVj0ibXlfZnVuYzMgYWJjLW + RzaGVsbD0iJEVOVntTSEVMTH0iOyR0bXBkaXI9Ii90bXAiOyRuaWNlPTE3O2RveyRFTlZ + MRUxfVE1QfT0kdG1wZGlyLiIvcGFyIi5qb2luIiIsbWFweygwLi45LCJhIi4uInoiLCJB + KVtyYW5kKDYyKV19KDEuLjUpO313aGlsZSgtZSRFTlZ7UEFSQUxMRUxfVE1QfSk7JFNJ + fT1zdWJ7JGRvbmU9MTt9OyRwaWQ9Zm9yazt1bmxlc3MoJHBpZCl7c2V0cGdycDtldmFse + W9yaXR5KDAsMCwkbmljZSl9O2V4ZWMkc2hlbGwsIi1jIiwoJGJhc2hmdW5jLiJAQVJHVi + JleGVjOiQhXG4iO31kb3skcz0kczwxPzAuMDAxKyRzKjEuMDM6JHM7c2VsZWN0KHVuZGV + mLHVuZGVmLCRzKTt9dW50aWwoJGRvbmV8fGdldHBwaWQ9PTEpO2tpbGwoU0lHSFVQLC0k + dW5sZXNzJGRvbmU7d2FpdDtleGl0KCQ/JjEyNz8xMjgrKCQ/JjEyNyk6MSskPz4+OCk=; + _EXIT_status=$?; mkdir -p ./.; rsync --protocol 30 --rsync-path=cd\ + ./.parallel/tmp/aspire-1928520-1/./.\;\ rsync -rlDzR -essh + server:./abc-file.out ./.;ssh server -- \(rm\ -f\ + ./.parallel/tmp/aspire-1928520-1/abc-file\;\ sh\ -c\ \'rmdir\ + ./.parallel/tmp/aspire-1928520-1/\ ./.parallel/tmp/\ ./.parallel/\ + 2\>/dev/null\'\;rm\ -rf\ ./.parallel/tmp/aspire-1928520-1\;\);ssh + server -- \(rm\ -f\ ./.parallel/tmp/aspire-1928520-1/abc-file.out\;\ + sh\ -c\ \'rmdir\ ./.parallel/tmp/aspire-1928520-1/\ ./.parallel/tmp/\ + ./.parallel/\ 2\>/dev/null\'\;rm\ -rf\ + ./.parallel/tmp/aspire-1928520-1\;\);ssh server -- rm -rf + .parallel/tmp/aspire-1928520-1; exit $_EXIT_status; + +=head1 Saving output to shell variables (advanced) + +GNU B<parset> will set shell variables to the output of GNU +B<parallel>. GNU B<parset> has one important limitation: It cannot be +part of a pipe. In particular this means it cannot read anything from +standard input (stdin) or pipe output to another program. + +To use GNU B<parset> prepend command with destination variables: + + parset myvar1,myvar2 echo ::: a b + echo $myvar1 + echo $myvar2 + +Output: + + a + b + +If you only give a single variable, it will be treated as an array: + + parset myarray seq {} 5 ::: 1 2 3 + echo "${myarray[1]}" + +Output: + + 2 + 3 + 4 + 5 + +The commands to run can be an array: + + cmd=("echo '<<joe \"double space\" cartoon>>'" "pwd") + parset data ::: "${cmd[@]}" + echo "${data[0]}" + echo "${data[1]}" + +Output: + + <<joe "double space" cartoon>> + [current dir] + + +=head1 Saving to an SQL base (advanced) + +GNU B<parallel> can save into an SQL base. Point GNU B<parallel> to a +table and it will put the joblog there together with the variables and +the output each in their own column. + +=head2 CSV as SQL base + +The simplest is to use a CSV file as the storage table: + + parallel --sqlandworker csv:///%2Ftmp/log.csv \ + seq ::: 10 ::: 12 13 14 + cat /tmp/log.csv + +Note how '/' in the path must be written as %2F. + +Output will be similar to: + + Seq,Host,Starttime,JobRuntime,Send,Receive,Exitval,_Signal, + Command,V1,V2,Stdout,Stderr + 1,:,1458254498.254,0.069,0,9,0,0,"seq 10 12",10,12,"10 + 11 + 12 + ", + 2,:,1458254498.278,0.080,0,12,0,0,"seq 10 13",10,13,"10 + 11 + 12 + 13 + ", + 3,:,1458254498.301,0.083,0,15,0,0,"seq 10 14",10,14,"10 + 11 + 12 + 13 + 14 + ", + +A proper CSV reader (like LibreOffice or R's read.csv) will read this +format correctly - even with fields containing newlines as above. + +If the output is big you may want to put it into files using B<--results>: + + parallel --results outdir --sqlandworker csv:///%2Ftmp/log2.csv \ + seq ::: 10 ::: 12 13 14 + cat /tmp/log2.csv + +Output will be similar to: + + Seq,Host,Starttime,JobRuntime,Send,Receive,Exitval,_Signal, + Command,V1,V2,Stdout,Stderr + 1,:,1458824738.287,0.029,0,9,0,0, + "seq 10 12",10,12,outdir/1/10/2/12/stdout,outdir/1/10/2/12/stderr + 2,:,1458824738.298,0.025,0,12,0,0, + "seq 10 13",10,13,outdir/1/10/2/13/stdout,outdir/1/10/2/13/stderr + 3,:,1458824738.309,0.026,0,15,0,0, + "seq 10 14",10,14,outdir/1/10/2/14/stdout,outdir/1/10/2/14/stderr + + +=head2 DBURL as table + +The CSV file is an example of a DBURL. + +GNU B<parallel> uses a DBURL to address the table. A DBURL has this format: + + vendor://[[user][:password]@][host][:port]/[database[/table] + +Example: + + mysql://scott:tiger@my.example.com/mydatabase/mytable + postgresql://scott:tiger@pg.example.com/mydatabase/mytable + sqlite3:///%2Ftmp%2Fmydatabase/mytable + csv:///%2Ftmp/log.csv + +To refer to B</tmp/mydatabase> with B<sqlite> or B<csv> you need to +encode the B</> as B<%2F>. + +Run a job using B<sqlite> on B<mytable> in B</tmp/mydatabase>: + + DBURL=sqlite3:///%2Ftmp%2Fmydatabase + DBURLTABLE=$DBURL/mytable + parallel --sqlandworker $DBURLTABLE echo ::: foo bar ::: baz quuz + +To see the result: + + sql $DBURL 'SELECT * FROM mytable ORDER BY Seq;' + +Output will be similar to: + + Seq|Host|Starttime|JobRuntime|Send|Receive|Exitval|_Signal| + Command|V1|V2|Stdout|Stderr + 1|:|1451619638.903|0.806||8|0|0|echo foo baz|foo|baz|foo baz + | + 2|:|1451619639.265|1.54||9|0|0|echo foo quuz|foo|quuz|foo quuz + | + 3|:|1451619640.378|1.43||8|0|0|echo bar baz|bar|baz|bar baz + | + 4|:|1451619641.473|0.958||9|0|0|echo bar quuz|bar|quuz|bar quuz + | + +The first columns are well known from B<--joblog>. B<V1> and B<V2> are +data from the input sources. B<Stdout> and B<Stderr> are standard +output and standard error, respectively. + +=head2 Using multiple workers + +Using an SQL base as storage costs overhead in the order of 1 second +per job. + +One of the situations where it makes sense is if you have multiple +workers. + +You can then have a single master machine that submits jobs to the SQL +base (but does not do any of the work): + + parallel --sqlmaster $DBURLTABLE echo ::: foo bar ::: baz quuz + +On the worker machines you run exactly the same command except you +replace B<--sqlmaster> with B<--sqlworker>. + + parallel --sqlworker $DBURLTABLE echo ::: foo bar ::: baz quuz + +To run a master and a worker on the same machine use B<--sqlandworker> +as shown earlier. + + +=head1 --pipe + +The B<--pipe> functionality puts GNU B<parallel> in a different mode: +Instead of treating the data on stdin (standard input) as arguments +for a command to run, the data will be sent to stdin (standard input) +of the command. + +The typical situation is: + + command_A | command_B | command_C + +where command_B is slow, and you want to speed up command_B. + +=head2 Chunk size + +By default GNU B<parallel> will start an instance of command_B, read a +chunk of 1 MB, and pass that to the instance. Then start another +instance, read another chunk, and pass that to the second instance. + + cat num1000000 | parallel --pipe wc + +Output (the order may be different): + + 165668 165668 1048571 + 149797 149797 1048579 + 149796 149796 1048572 + 149797 149797 1048579 + 149797 149797 1048579 + 149796 149796 1048572 + 85349 85349 597444 + +The size of the chunk is not exactly 1 MB because GNU B<parallel> only +passes full lines - never half a line, thus the blocksize is only +1 MB on average. You can change the block size to 2 MB with B<--block>: + + cat num1000000 | parallel --pipe --block 2M wc + +Output (the order may be different): + + 315465 315465 2097150 + 299593 299593 2097151 + 299593 299593 2097151 + 85349 85349 597444 + +GNU B<parallel> treats each line as a record. If the order of records +is unimportant (e.g. you need all lines processed, but you do not care +which is processed first), then you can use B<--roundrobin>. Without +B<--roundrobin> GNU B<parallel> will start a command per block; with +B<--roundrobin> only the requested number of jobs will be started +(B<--jobs>). The records will then be distributed between the running +jobs: + + cat num1000000 | parallel --pipe -j4 --roundrobin wc + +Output will be similar to: + + 149797 149797 1048579 + 299593 299593 2097151 + 315465 315465 2097150 + 235145 235145 1646016 + +One of the 4 instances got a single record, 2 instances got 2 full +records each, and one instance got 1 full and 1 partial record. + +=head2 Records + +GNU B<parallel> sees the input as records. The default record is a single +line. + +Using B<-N140000> GNU B<parallel> will read 140000 records at a time: + + cat num1000000 | parallel --pipe -N140000 wc + +Output (the order may be different): + + 140000 140000 868895 + 140000 140000 980000 + 140000 140000 980000 + 140000 140000 980000 + 140000 140000 980000 + 140000 140000 980000 + 140000 140000 980000 + 20000 20000 140001 + +Note how that the last job could not get the full 140000 lines, but +only 20000 lines. + +If a record is 75 lines B<-L> can be used: + + cat num1000000 | parallel --pipe -L75 wc + +Output (the order may be different): + + 165600 165600 1048095 + 149850 149850 1048950 + 149775 149775 1048425 + 149775 149775 1048425 + 149850 149850 1048950 + 149775 149775 1048425 + 85350 85350 597450 + 25 25 176 + +Note how GNU B<parallel> still reads a block of around 1 MB; but +instead of passing full lines to B<wc> it passes full 75 lines at a +time. This of course does not hold for the last job (which in this +case got 25 lines). + +=head2 Fixed length records + +Fixed length records can be processed by setting B<--recend ''> and +B<--block I<recordsize>>. A header of size I<n> can be processed with +B<--header .{I<n>}>. + +Here is how to process a file with a 4-byte header and a 3-byte record +size: + + cat fixedlen | parallel --pipe --header .{4} --block 3 --recend '' \ + 'echo start; cat; echo' + +Output: + + start + HHHHAAA + start + HHHHCCC + start + HHHHBBB + +It may be more efficient to increase B<--block> to a multiplum of the +record size. + +=head2 Record separators + +GNU B<parallel> uses separators to determine where two records split. + +B<--recstart> gives the string that starts a record; B<--recend> gives the +string that ends a record. The default is B<--recend '\n'> (newline). + +If both B<--recend> and B<--recstart> are given, then the record will only +split if the recend string is immediately followed by the recstart +string. + +Here the B<--recend> is set to B<', '>: + + echo /foo, bar/, /baz, qux/, | \ + parallel -kN1 --recend ', ' --pipe echo JOB{#}\;cat\;echo END + +Output: + + JOB1 + /foo, END + JOB2 + bar/, END + JOB3 + /baz, END + JOB4 + qux/, + END + +Here the B<--recstart> is set to B</>: + + echo /foo, bar/, /baz, qux/, | \ + parallel -kN1 --recstart / --pipe echo JOB{#}\;cat\;echo END + +Output: + + JOB1 + /foo, barEND + JOB2 + /, END + JOB3 + /baz, quxEND + JOB4 + /, + END + +Here both B<--recend> and B<--recstart> are set: + + echo /foo, bar/, /baz, qux/, | \ + parallel -kN1 --recend ', ' --recstart / --pipe \ + echo JOB{#}\;cat\;echo END + +Output: + + JOB1 + /foo, bar/, END + JOB2 + /baz, qux/, + END + +Note the difference between setting one string and setting both strings. + +With B<--regexp> the B<--recend> and B<--recstart> will be treated as +a regular expression: + + echo foo,bar,_baz,__qux, | \ + parallel -kN1 --regexp --recend ,_+ --pipe \ + echo JOB{#}\;cat\;echo END + +Output: + + JOB1 + foo,bar,_END + JOB2 + baz,__END + JOB3 + qux, + END + +GNU B<parallel> can remove the record separators with +B<--remove-rec-sep>/B<--rrs>: + + echo foo,bar,_baz,__qux, | \ + parallel -kN1 --rrs --regexp --recend ,_+ --pipe \ + echo JOB{#}\;cat\;echo END + +Output: + + JOB1 + foo,barEND + JOB2 + bazEND + JOB3 + qux, + END + +=head2 Header + +If the input data has a header, the header can be repeated for each +job by matching the header with B<--header>. If headers start with +B<%> you can do this: + + cat num_%header | \ + parallel --header '(%.*\n)*' --pipe -N3 echo JOB{#}\;cat + +Output (the order may be different): + + JOB1 + %head1 + %head2 + 1 + 2 + 3 + JOB2 + %head1 + %head2 + 4 + 5 + 6 + JOB3 + %head1 + %head2 + 7 + 8 + 9 + JOB4 + %head1 + %head2 + 10 + +If the header is 2 lines, B<--header> 2 will work: + + cat num_%header | parallel --header 2 --pipe -N3 echo JOB{#}\;cat + +Output: Same as above. + +=head2 --pipepart + +B<--pipe> is not very efficient. It maxes out at around 500 +MB/s. B<--pipepart> can easily deliver 5 GB/s. But there are a few +limitations. The input has to be a normal file (not a pipe) given by +B<-a> or B<::::> and B<-L>/B<-l>/B<-N> do not work. B<--recend> and +B<--recstart>, however, I<do> work, and records can often be split on +that alone. + + parallel --pipepart -a num1000000 --block 3m wc + +Output (the order may be different): + + 444443 444444 3000002 + 428572 428572 3000004 + 126985 126984 888890 + + +=head1 Shebang + +=head2 Input data and parallel command in the same file + +GNU B<parallel> is often called as this: + + cat input_file | parallel command + +With B<--shebang> the I<input_file> and B<parallel> can be combined into the same script. + +UNIX shell scripts start with a shebang line like this: + + #!/bin/bash + +GNU B<parallel> can do that, too. With B<--shebang> the arguments can be +listed in the file. The B<parallel> command is the first line of the +script: + + #!/usr/bin/parallel --shebang -r echo + + foo + bar + baz + +Output (the order may be different): + + foo + bar + baz + +=head2 Parallelizing existing scripts + +GNU B<parallel> is often called as this: + + cat input_file | parallel command + parallel command ::: foo bar + +If B<command> is a script, B<parallel> can be combined into a single +file so this will run the script in parallel: + + cat input_file | command + command foo bar + +This B<perl> script B<perl_echo> works like B<echo>: + + #!/usr/bin/perl + + print "@ARGV\n" + +It can be called as this: + + parallel perl_echo ::: foo bar + +By changing the B<#!>-line it can be run in parallel: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/perl + + print "@ARGV\n" + +Thus this will work: + + perl_echo foo bar + +Output (the order may be different): + + foo + bar + +This technique can be used for: + +=over 9 + +=item Perl: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/perl + + print "Arguments @ARGV\n"; + + +=item Python: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/python + + import sys + print 'Arguments', str(sys.argv) + + +=item Bash/sh/zsh/Korn shell: + + #!/usr/bin/parallel --shebang-wrap /bin/bash + + echo Arguments "$@" + + +=item csh: + + #!/usr/bin/parallel --shebang-wrap /bin/csh + + echo Arguments "$argv" + + +=item Tcl: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/tclsh + + puts "Arguments $argv" + + +=item R: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/Rscript --vanilla --slave + + args <- commandArgs(trailingOnly = TRUE) + print(paste("Arguments ",args)) + + +=item GNUplot: + + #!/usr/bin/parallel --shebang-wrap ARG={} /usr/bin/gnuplot + + print "Arguments ", system('echo $ARG') + + +=item Ruby: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/ruby + + print "Arguments " + puts ARGV + + +=item Octave: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/octave + + printf ("Arguments"); + arg_list = argv (); + for i = 1:nargin + printf (" %s", arg_list{i}); + endfor + printf ("\n"); + +=item Common LISP: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/clisp + + (format t "~&~S~&" 'Arguments) + (format t "~&~S~&" *args*) + +=item PHP: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/php + <?php + echo "Arguments"; + foreach(array_slice($argv,1) as $v) + { + echo " $v"; + } + echo "\n"; + ?> + +=item Node.js: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/node + + var myArgs = process.argv.slice(2); + console.log('Arguments ', myArgs); + +=item LUA: + + #!/usr/bin/parallel --shebang-wrap /usr/bin/lua + + io.write "Arguments" + for a = 1, #arg do + io.write(" ") + io.write(arg[a]) + end + print("") + +=item C#: + + #!/usr/bin/parallel --shebang-wrap ARGV={} /usr/bin/csharp + + var argv = Environment.GetEnvironmentVariable("ARGV"); + print("Arguments "+argv); + +=back + +=head1 Semaphore + +GNU B<parallel> can work as a counting semaphore. This is slower and less +efficient than its normal mode. + +A counting semaphore is like a row of toilets. People needing a toilet +can use any toilet, but if there are more people than toilets, they +will have to wait for one of the toilets to become available. + +An alias for B<parallel --semaphore> is B<sem>. + +B<sem> will follow a person to the toilets, wait until a toilet is +available, leave the person in the toilet and exit. + +B<sem --fg> will follow a person to the toilets, wait until a toilet is +available, stay with the person in the toilet and exit when the person +exits. + +B<sem --wait> will wait for all persons to leave the toilets. + +B<sem> does not have a queue discipline, so the next person is chosen +randomly. + +B<-j> sets the number of toilets. + +=head2 Mutex + +The default is to have only one toilet (this is called a mutex). The +program is started in the background and B<sem> exits immediately. Use +B<--wait> to wait for all B<sem>s to finish: + + sem 'sleep 1; echo The first finished' && + echo The first is now running in the background && + sem 'sleep 1; echo The second finished' && + echo The second is now running in the background + sem --wait + +Output: + + The first is now running in the background + The first finished + The second is now running in the background + The second finished + +The command can be run in the foreground with B<--fg>, which will only +exit when the command completes: + + sem --fg 'sleep 1; echo The first finished' && + echo The first finished running in the foreground && + sem --fg 'sleep 1; echo The second finished' && + echo The second finished running in the foreground + sem --wait + +The difference between this and just running the command, is that a +mutex is set, so if other B<sem>s were running in the background only one +would run at a time. + +To control which semaphore is used, use +B<--semaphorename>/B<--id>. Run this in one terminal: + + sem --id my_id -u 'echo First started; sleep 10; echo First done' + +and simultaneously this in another terminal: + + sem --id my_id -u 'echo Second started; sleep 10; echo Second done' + +Note how the second will only be started when the first has finished. + +=head2 Counting semaphore + +A mutex is like having a single toilet: When it is in use everyone +else will have to wait. A counting semaphore is like having multiple +toilets: Several people can use the toilets, but when they all are in +use, everyone else will have to wait. + +B<sem> can emulate a counting semaphore. Use B<--jobs> to set the +number of toilets like this: + + sem --jobs 3 --id my_id -u 'echo Start 1; sleep 5; echo 1 done' && + sem --jobs 3 --id my_id -u 'echo Start 2; sleep 6; echo 2 done' && + sem --jobs 3 --id my_id -u 'echo Start 3; sleep 7; echo 3 done' && + sem --jobs 3 --id my_id -u 'echo Start 4; sleep 8; echo 4 done' && + sem --wait --id my_id + +Output: + + Start 1 + Start 2 + Start 3 + 1 done + Start 4 + 2 done + 3 done + 4 done + +=head2 Timeout + +With B<--semaphoretimeout> you can force running the command anyway after +a period (positive number) or give up (negative number): + + sem --id foo -u 'echo Slow started; sleep 5; echo Slow ended' && + sem --id foo --semaphoretimeout 1 'echo Forced running after 1 sec' && + sem --id foo --semaphoretimeout -2 'echo Give up after 2 secs' + sem --id foo --wait + +Output: + + Slow started + parallel: Warning: Semaphore timed out. Stealing the semaphore. + Forced running after 1 sec + parallel: Warning: Semaphore timed out. Exiting. + Slow ended + +Note how the 'Give up' was not run. + +=head1 Informational + +GNU B<parallel> has some options to give short information about the +configuration. + +B<--help> will print a summary of the most important options: + + parallel --help + +Output: + + Usage: + + parallel [options] [command [arguments]] < list_of_arguments + parallel [options] [command [arguments]] (::: arguments|:::: argfile(s))... + cat ... | parallel --pipe [options] [command [arguments]] + + -j n Run n jobs in parallel + -k Keep same order + -X Multiple arguments with context replace + --colsep regexp Split input on regexp for positional replacements + {} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings + {3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings + With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} = + {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...} + + -S sshlogin Example: foo@server.example.com + --slf .. Use ~/.parallel/sshloginfile as the list of sshlogins + --trc {}.bar Shorthand for --transfer --return {}.bar --cleanup + --onall Run the given command with argument on all sshlogins + --nonall Run the given command with no arguments on all sshlogins + + --pipe Split stdin (standard input) to multiple jobs. + --recend str Record end separator for --pipe. + --recstart str Record start separator for --pipe. + + See 'man parallel' for details + + Academic tradition requires you to cite works you base your article on. + When using programs that use GNU Parallel to process data for publication + please cite: + + O. Tange (2011): GNU Parallel - The Command-Line Power Tool, + ;login: The USENIX Magazine, February 2011:42-47. + + This helps funding further development; AND IT WON'T COST YOU A CENT. + If you pay 10000 EUR you should feel free to use GNU Parallel without citing. + +When asking for help, always report the full output of this: + + parallel --version + +Output: + + GNU parallel 20210122 + Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free Software + Foundation, Inc. + License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html> + This is free software: you are free to change and redistribute it. + GNU parallel comes with no warranty. + + Web site: https://www.gnu.org/software/parallel + + When using programs that use GNU Parallel to process data for publication + please cite as described in 'parallel --citation'. + +In scripts B<--minversion> can be used to ensure the user has at least +this version: + + parallel --minversion 20130722 && \ + echo Your version is at least 20130722. + +Output: + + 20160322 + Your version is at least 20130722. + +If you are using GNU B<parallel> for research the BibTeX citation can be +generated using B<--citation>: + + parallel --citation + +Output: + + Academic tradition requires you to cite works you base your article on. + When using programs that use GNU Parallel to process data for publication + please cite: + + @article{Tange2011a, + title = {GNU Parallel - The Command-Line Power Tool}, + author = {O. Tange}, + address = {Frederiksberg, Denmark}, + journal = {;login: The USENIX Magazine}, + month = {Feb}, + number = {1}, + volume = {36}, + url = {https://www.gnu.org/s/parallel}, + year = {2011}, + pages = {42-47}, + doi = {10.5281/zenodo.16303} + } + + (Feel free to use \nocite{Tange2011a}) + + This helps funding further development; AND IT WON'T COST YOU A CENT. + If you pay 10000 EUR you should feel free to use GNU Parallel without citing. + + If you send a copy of your published article to tange@gnu.org, it will be + mentioned in the release notes of next version of GNU Parallel. + +With B<--max-line-length-allowed> GNU B<parallel> will report the maximal +size of the command line: + + parallel --max-line-length-allowed + +Output (may vary on different systems): + + 131071 + +B<--number-of-cpus> and B<--number-of-cores> run system specific code to +determine the number of CPUs and CPU cores on the system. On +unsupported platforms they will return 1: + + parallel --number-of-cpus + parallel --number-of-cores + +Output (may vary on different systems): + + 4 + 64 + +=head1 Profiles + +The defaults for GNU B<parallel> can be changed systemwide by putting the +command line options in B</etc/parallel/config>. They can be changed for +a user by putting them in B<~/.parallel/config>. + +Profiles work the same way, but have to be referred to with B<--profile>: + + echo '--nice 17' > ~/.parallel/nicetimeout + echo '--timeout 300%' >> ~/.parallel/nicetimeout + parallel --profile nicetimeout echo ::: A B C + +Output: + + A + B + C + +Profiles can be combined: + + echo '-vv --dry-run' > ~/.parallel/dryverbose + parallel --profile dryverbose --profile nicetimeout echo ::: A B C + +Output: + + echo A + echo B + echo C + + +=head1 Spread the word + +I hope you have learned something from this tutorial. + +If you like GNU B<parallel>: + +=over 2 + +=item * + +(Re-)walk through the tutorial if you have not done so in the past year +(https://www.gnu.org/software/parallel/parallel_tutorial.html) + +=item * + +Give a demo at your local user group/your team/your colleagues + +=item * + +Post the intro videos and the tutorial on Reddit, Mastodon, Diaspora*, +forums, blogs, Identi.ca, Google+, Twitter, Facebook, Linkedin, and +mailing lists + +=item * + +Request or write a review for your favourite blog or magazine +(especially if you do something cool with GNU B<parallel>) + +=item * + +Invite me for your next conference + +=back + +If you use GNU B<parallel> for research: + +=over 2 + +=item * + +Please cite GNU B<parallel> in you publications (use B<--citation>) + +=back + +If GNU B<parallel> saves you money: + +=over 2 + +=item * + +(Have your company) donate to FSF or become a member +https://my.fsf.org/donate/ + +=back + +(C) 2013-2022 Ole Tange, GFDLv1.3+ (See +LICENSES/GFDL-1.3-or-later.txt) + + +=cut diff --git a/src/parcat b/src/parcat new file mode 100755 index 0000000..285efce --- /dev/null +++ b/src/parcat @@ -0,0 +1,194 @@ +#!/usr/bin/perl + +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +use Symbol qw(gensym); +use IPC::Open3; +use POSIX qw(:errno_h); +use IO::Select; +use strict; +use threads; +use threads::shared; +use Thread::Queue; + + +my $opened :shared; +my $q = Thread::Queue->new(); +my $okq = Thread::Queue->new(); +my @producers; + +if(not @ARGV) { + if(-t *STDIN) { + print "Usage:\n"; + print " parcat file(s)\n"; + print " cat argfile | parcat\n"; + } else { + # Read arguments from stdin + chomp(@ARGV = <STDIN>); + } +} + +my $files_to_open = 0; +# Default: fd = stdout +my $fd = 1; +for (@ARGV) { + # --rm = remove file when opened + /^--rm$/ and do { $opt::rm = 1; next; }; + # -1 = output to fd 1, -2 = output to fd 2 + /^-(\d+)$/ and do { $fd = $1; next; }; + push @producers, threads->create('producer', $_, $fd); + $files_to_open++; +} + +sub producer { + # Open a file/fifo, set non blocking, enqueue fileno of the file handle + my $file = shift; + my $output_fd = shift; + open(my $fh, "<", $file) || do { + print STDERR "parcat: Cannot open $file\n"; + exit(1); + }; + # Remove file when it has been opened + if($opt::rm) { + unlink $file; + } + set_fh_non_blocking($fh); + $opened++; + # Pass the fileno to parent + $q->enqueue(fileno($fh),$output_fd); + # Get an OK that the $fh is opened and we can release the $fh + while(1) { + my $ok = $okq->dequeue(); + if($ok == fileno($fh)) { last; } + # Not ours - very unlikely to happen + $okq->enqueue($ok); + } + return; +} + +my $s = IO::Select->new(); +my %buffer; + +sub add_file { + my $infd = shift; + my $outfd = shift; + open(my $infh, "<&=", $infd) || die; + open(my $outfh, ">&=", $outfd) || die; + $s->add($infh); + # Tell the producer now opened here and can be released + $okq->enqueue($infd); + # Initialize the buffer + @{$buffer{$infh}{$outfd}} = (); + $Global::fh{$outfd} = $outfh; +} + +sub add_files { + # Non-blocking dequeue + my ($infd,$outfd); + do { + ($infd,$outfd) = $q->dequeue_nb(2); + if(defined($outfd)) { + add_file($infd,$outfd); + } + } while(defined($outfd)); +} + +sub add_files_block { + # Blocking dequeue + my ($infd,$outfd) = $q->dequeue(2); + add_file($infd,$outfd); +} + + +my $fd; +my (@ready,$infh,$rv,$buf); +do { + # Wait until at least one file is opened + add_files_block(); + while($q->pending or keys %buffer) { + add_files(); + while(keys %buffer) { + @ready = $s->can_read(0.01); + if(not @ready) { + add_files(); + } + for $infh (@ready) { + # There is only one key, namely the output file descriptor + for my $outfd (keys %{$buffer{$infh}}) { + $rv = sysread($infh, $buf, 65536); + if (!$rv) { + if($! == EAGAIN) { + # Would block: Nothing read + next; + } else { + # Nothing read, but would not block: + # This file is done + $s->remove($infh); + for(@{$buffer{$infh}{$outfd}}) { + syswrite($Global::fh{$outfd},$_); + } + delete $buffer{$infh}; + # Closing the $infh causes it to block + # close $infh; + add_files(); + next; + } + } + # Something read. + # Find \n or \r for full line + my $i = (rindex($buf,"\n")+1); + if($i) { + # Print full line + for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) { + syswrite($Global::fh{$outfd},$_); + } + # @buffer = remaining half line + $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)]; + } else { + # Something read, but not a full line + push @{$buffer{$infh}{$outfd}}, $buf; + } + redo; + } + } + } + } +} while($opened < $files_to_open); + + +for (@producers) { + $_->join(); +} + +sub set_fh_non_blocking { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= &O_NONBLOCK; # Add non-blocking to the flags + fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle +} diff --git a/src/parcat.pod b/src/parcat.pod new file mode 100644 index 0000000..2516e0b --- /dev/null +++ b/src/parcat.pod @@ -0,0 +1,191 @@ +#!/usr/bin/perl + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=head1 NAME + +parcat - cat files or fifos in parallel + +=head1 SYNOPSIS + +B<parcat> [--rm] [-#] file(s) [-#] file(s) + +=head1 DESCRIPTION + +GNU B<parcat> reads files or fifos in parallel. It writes full lines +so there will be no problem with mixed-half-lines which you risk if +you use: + + (cat file1 & cat file2 &) | ... + +It is faster than doing: + + parallel -j0 --lb cat ::: file* + +Arguments can be given on the command line or passed in on stdin +(standard input). + +=head1 OPTIONS + +=over 9 + +=item -B<#> + +Arguments following this will be sent to the file descriptor B<#>. E.g. + + parcat -1 stdout1 stdout2 -2 stderr1 stderr2 + +will send I<stdout1> and I<stdout2> to stdout (standard output = file +descriptor 1), and send I<stderr1> and I<stderr2> to stderr (standard +error = file descriptor 2). + +=item --rm + +Remove files after opening. As soon as the files are opened, unlink +the files. + +=back + +=head1 EXAMPLES + +=head2 Simple line buffered output + +B<traceroute> will often print half a line. If run in parallel, two +instances may half-lines of their output. This can be avoided by +saving the output to a fifo and then using B<parcat> to read the two +fifos in parallel: + + mkfifo freenetproject.org.fifo tange.dk.fifo + traceroute freenetproject.org > freenetproject.org.fifo & + traceroute tange.dk > tange.dk.fifo & + parcat --rm *fifo + + +=head1 REPORTING BUGS + +GNU B<parcat> is part of GNU B<parallel>. Report bugs to +<bug-parallel@gnu.org>. + + +=head1 AUTHOR + +Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +Software Foundation, Inc. + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this +documentation under the terms of the GNU Free Documentation License, +Version 1.3 or any later version published by the Free Software +Foundation; with no Invariant Sections, with no Front-Cover Texts, and +with no Back-Cover Texts. A copy of the license is included in the +file LICENSES/GFDL-1.3-or-later.txt. + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B<to Share> + +to copy, distribute and transmit the work + +=item B<to Remix> + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B<Attribution> + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B<Share Alike> + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B<Waiver> + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B<Public Domain> + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B<Other Rights> + +In no way are any of the following rights affected by the license: + +=over 9 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=item B<Notice> + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as +LICENCES/CC-BY-SA-4.0.txt + + +=head1 DEPENDENCIES + +GNU B<parcat> uses Perl. + + +=head1 SEE ALSO + +B<cat>(1), B<parallel>(1) + +=cut diff --git a/src/parset b/src/parset new file mode 100755 index 0000000..faf6a81 --- /dev/null +++ b/src/parset @@ -0,0 +1,138 @@ +#!/usr/bin/env bash + +# Copyright (C) 2016-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +grepq() { + # grep -q for systems without -q + grep >/dev/null 2>/dev/null "$@" +} + +installer() { + source="$1" + script="$2" + into="$3" + if grepq $script $into; then + true already installed + else + echo $source \`which $script\` >> $into + fi +} + +while test $# -gt 0; do + key="$1" + + case $key in + -i|--install) + installer . env_parallel.bash $HOME/.bashrc + installer . env_parallel.sh $HOME/.shrc + installer . env_parallel.zsh $HOME/.zshenv + installer source env_parallel.ksh $HOME/.kshrc + installer source env_parallel.mksh $HOME/.kshrc + echo $SHELL | grepq /pdksh && + installer . env_parallel.pdksh $HOME/.profile + echo $SHELL | grepq /ash && + installer . env_parallel.ash $HOME/.profile + echo $SHELL | grepq /dash && + installer . env_parallel.dash $HOME/.profile + installer source env_parallel.csh $HOME/.cshrc + installer source env_parallel.tcsh $HOME/.tcshrc + mkdir -p $HOME/.config/fish + grepq env_parallel.fish $HOME/.config/fish/config.fish || + echo '. (which env_parallel.fish)' >> $HOME/.config/fish/config.fish + echo 'Installed env_parallel in:' + echo " " $HOME/.bashrc + echo " " $HOME/.shrc + echo " " $HOME/.zshenv + echo " " $HOME/.config/fish/config.fish + echo " " $HOME/.kshrc + echo " " $HOME/.mkshrc + echo " " $HOME/.profile + echo " " $HOME/.cshrc + echo " " $HOME/.tcshrc + exit + ;; + *) + echo "Unknown option: $key" + ;; + esac + shift # past argument or value +done + + +cat <<'_EOS' + +parset only works if it is a function. The function is defined as part +of env_parallel. + +Do the below and restart your shell. + +bash: Put this in $HOME/.bashrc: . `which env_parallel.bash` + E.g. by doing: echo '. `which env_parallel.bash`' >> $HOME/.bashrc + Supports: variables, aliases, functions, arrays + +ksh: Put this in $HOME/.kshrc: source `which env_parallel.ksh` + E.g. by doing: echo 'source `which env_parallel.ksh`' >> $HOME/.kshrc + Supports: variables, aliases, functions, arrays + +mksh: Put this in $HOME/.mkshrc: source `which env_parallel.mksh` + E.g. by doing: echo 'source `which env_parallel.mksh`' >> $HOME/.mkshrc + Supports: variables, aliases, functions, arrays + +pdksh: Put this in $HOME/.profile: source `which env_parallel.pdksh` + E.g. by doing: echo '. `which env_parallel.pdksh`' >> $HOME/.profile + Supports: variables, aliases, functions, arrays + +zsh: Put this in $HOME/.zshrc: . `which env_parallel.zsh` + E.g. by doing: echo '. `which env_parallel.zsh`' >> $HOME/.zshenv + Supports: variables, functions, arrays + +ash: Put this in $HOME/.profile: . `which env_parallel.ash` + E.g. by doing: echo '. `which env_parallel.ash`' >> $HOME/.profile + Supports: variables, aliases + +dash: Put this in $HOME/.profile: . `which env_parallel.dash` + E.g. by doing: echo '. `which env_parallel.dash`' >> $HOME/.profile + Supports: variables, aliases + +fish: Unsupported + +csh: Unsupported + +tcsh: Unsupported + +To install in all shells run: + + parset --install + +In a script you need to run this before using parset: + +bash: . `which env_parallel.bash` +ksh: source `which env_parallel.ksh` +mksh: source `which env_parallel.mksh` +pdksh: source `which env_parallel.pdksh` +zsh: . `which env_parallel.zsh` +ash: . `which env_parallel.ash` +dash: . `which env_parallel.dash` + +For details: see man parset + +_EOS diff --git a/src/parset.pod b/src/parset.pod new file mode 100644 index 0000000..cb95ec6 --- /dev/null +++ b/src/parset.pod @@ -0,0 +1,327 @@ +#!/usr/bin/perl -w + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=encoding utf8 + +=head1 NAME + +parset - set shell variables in parallel + + +=head1 SYNOPSIS + +B<parset> I<variablename> [options for GNU Parallel] + +B<env_parset> I<variablename> [options for GNU Parallel] + +=head1 DESCRIPTION + +B<parset> is a shell function that puts the output from GNU +B<parallel> into shell variables. + +B<env_parset> is a shell function that puts the output from +B<env_parallel> into shell variables. + +The B<parset> and B<env_parset> functions are defined as part of +B<env_parallel>. + +If I<variablename> is a single variable name, this will be treated as +the destination variable. If the variable is defined as an associative +array (using B<typeset -A myassoc>), this will be used. Otherwise the +variable will be made into a normal array. + +If I<variablename> contains multiple names separated by ',' or space, +the names will be the destination variables. The number of names must +be at least the number of jobs. + + +=head1 OPTIONS + +Same as GNU B<parallel>, but they are put I<after> the destination +variable. + + +=head1 SUPPORTED SHELLS + +=head2 Bash/Zsh/Ksh/Mksh + +=head3 Examples + +Put output into B<myarray>: + + parset myarray seq 3 ::: 4 5 6 + echo "${myarray[1]}" + +Put output into vars B<$seq, $pwd, $ls>: + + parset "seq pwd ls" ::: "seq 10" pwd ls + echo "$ls" + +Put output into vars B<$seq, $pwd, $ls>: + + into_vars=(seq pwd ls) + parset "${into_vars[*]}" ::: "seq 10" pwd ls + echo "$ls" + +Put output into associative array B<myassoc> (not supported for mksh): + + typeset -A myassoc + parset myassoc seq ::: 4 5 ::: 6 7 + echo "${myassoc[4 7]}" + +The commands to run can be an array: + + cmd=("echo first" "echo '<<joe \"double space\" cartoon>>'" "pwd") + parset data ::: "${cmd[@]}" + echo "${data[1]}" + echo "${data[2]}" + +B<parset> can read from stdin (standard input) if it is a file: + + parset res echo < parallel_input_file + +but B<parset> can I<not> be part of a pipe. In particular this means +it cannot read from a pipe or write to a pipe: + + seq 10 | parset res echo Does not work + +but must instead use a tempfile: + + seq 10 > parallel_input + parset res echo :::: parallel_input + echo "${res[1]}" + echo "${res[9]}" + +or a FIFO: + + mkfifo input_fifo + seq 30 > input_fifo & + parset res echo :::: input_fifo + echo "${res[1]}" + echo "${res[29]}" + +or Bash/Zsh/Ksh process substitution: + + parset res echo :::: <(seq 100) + echo "${res[1]}" + echo "${res[99]}" + + +=head3 Installation + +Put this in the relevant B<$HOME/.bashrc> or B<$HOME/.zshenv> or B<$HOME/.kshrc>: + + . `which env_parallel.bash` + . `which env_parallel.zsh` + source `which env_parallel.ksh` + +E.g. by doing: + + echo '. `which env_parallel.bash`' >> $HOME/.bashrc + echo '. `which env_parallel.zsh`' >> $HOME/.zshenv + echo 'source `which env_parallel.ksh`' >> $HOME/.kshrc + +or by doing: + + env_parallel --install + + +=head2 ash/dash (FreeBSD's /bin/sh) + +=head3 Examples + +ash does not support arrays. + +Put output into vars B<$seq, $pwd, $ls>: + + parset "seq pwd ls" ::: "seq 10" pwd ls + echo "$ls" + +B<parset> can read from stdin (standard input) if it is a file: + + parset res1,res2,res3 echo < parallel_input_file + +but B<parset> can not be part of a pipe. In particular this means it +cannot read from a pipe or write to a pipe: + + seq 3 | parset res1,res2,res3 echo Does not work + +but must instead use a tempfile: + + seq 3 > parallel_input + parset res1,res2,res3 echo :::: parallel_input + echo "$res1" + echo "$res2" + echo "$res3" + +or a FIFO: + + mkfifo input_fifo + seq 3 > input_fifo & + parset res1,res2,res3 echo :::: input_fifo + echo "$res1" + echo "$res2" + echo "$res3" + +=head3 Installation + +Put the relevant one of these into B<$HOME/.profile>: + + . `which env_parallel.sh` + . `which env_parallel.ash` + . `which env_parallel.dash` + +E.g. by doing: + + echo '. `which env_parallel.ash`' >> $HOME/.bashrc + +or by doing: + + env_parallel --install + + +=head1 EXIT STATUS + +Same as GNU B<parallel>. + + +=head1 AUTHOR + +When using GNU B<parallel> for a publication please cite: + +O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ;login: +The USENIX Magazine, February 2011:42-47. + +This helps funding further development; and it won't cost you a cent. +If you pay 10000 EUR you should feel free to use GNU Parallel without citing. + +Copyright (C) 2007-10-18 Ole Tange, http://ole.tange.dk + +Copyright (C) 2008-2010 Ole Tange, http://ole.tange.dk + +Copyright (C) 2010-2022 Ole Tange, http://ole.tange.dk and Free +Software Foundation, Inc. + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this +documentation under the terms of the GNU Free Documentation License, +Version 1.3 or any later version published by the Free Software +Foundation; with no Invariant Sections, with no Front-Cover Texts, and +with no Back-Cover Texts. A copy of the license is included in the +file LICENSES/GFDL-1.3-or-later.txt. + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B<to Share> + +to copy, distribute and transmit the work + +=item B<to Remix> + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B<Attribution> + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B<Share Alike> + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B<Waiver> + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B<Public Domain> + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B<Other Rights> + +In no way are any of the following rights affected by the license: + +=over 2 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=back + +=over 9 + +=item B<Notice> + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as +LICENCES/CC-BY-SA-4.0.txt + +=head1 DEPENDENCIES + +B<parset> uses GNU B<parallel>. + + +=head1 SEE ALSO + +B<parallel>(1), B<env_parallel>(1), B<bash>(1). + + +=cut diff --git a/src/parsort b/src/parsort new file mode 100755 index 0000000..64c64c0 --- /dev/null +++ b/src/parsort @@ -0,0 +1,423 @@ +#!/usr/bin/perl + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +=pod + +=head1 NAME + +parsort - Sort (big files) in parallel + + +=head1 SYNOPSIS + +B<parsort> I<options for sort> + + +=head1 DESCRIPTION + +B<parsort> uses GNU B<sort> to sort in parallel. It works just like +B<sort> but faster on inputs with more than 1 M lines, if you have a +multicore machine. + +Hopefully these ideas will make it into GNU B<sort> in the future. + + +=head1 EXAMPLE + +Sort files: + + parsort *.txt > sorted.txt + +Sort stdin (standard input) numerically: + + cat numbers | parsort -n > sorted.txt + + +=head1 PERFORMANCE + +B<parsort> is faster on a file than on stdin (standard input), because +different parts of a file can be read in parallel. + +On a 48 core machine you should see a speedup of 3x over B<sort>. + + +=head1 AUTHOR + +Copyright (C) 2020-2022 Ole Tange, +http://ole.tange.dk and Free Software Foundation, Inc. + + +=head1 LICENSE + +Copyright (C) 2012 Free Software Foundation, Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. + + +=head1 DEPENDENCIES + +B<parsort> uses B<sort>, B<bash>, and B<parallel>. + + +=head1 SEE ALSO + +B<sort> + + +=cut + +use strict; +use Getopt::Long; +use POSIX qw(mkfifo); + +Getopt::Long::Configure("bundling","require_order"); + +my @ARGV_before = @ARGV; + +GetOptions( + "debug|D" => \$opt::D, + "version" => \$opt::version, + "verbose|v" => \$opt::verbose, + "b|ignore-leading-blanks" => \$opt::ignore_leading_blanks, + "d|dictionary-order" => \$opt::dictionary_order, + "f|ignore-case" => \$opt::ignore_case, + "g|general-numeric-sort" => \$opt::general_numeric_sort, + "i|ignore-nonprinting" => \$opt::ignore_nonprinting, + "M|month-sort" => \$opt::month_sort, + "h|human-numeric-sort" => \$opt::human_numeric_sort, + "n|numeric-sort" => \$opt::numeric_sort, + "N|numascii" => \$opt::numascii, + "r|reverse" => \$opt::reverse, + "R|random-sort" => \$opt::random_sort, + "sort=s" => \$opt::sort, + "V|version-sort" => \$opt::version_sort, + "k|key=s" => \@opt::key, + "t|field-separator=s" => \$opt::field_separator, + "z|zero-terminated" => \$opt::zero_terminated, + "files0-from=s" => \$opt::files0_from, + "random-source=s" => \$opt::dummy, + "batch-size=s" => \$opt::dummy, + "check=s" => \$opt::dummy, + "c" => \$opt::dummy, + "C" => \$opt::dummy, + "compress-program=s" => \$opt::dummy, + "T|temporary-directory=s" => \$opt::dummy, + "parallel=s" => \$opt::dummy, + "u|unique" => \$opt::dummy, + "S|buffer-size=s" => \$opt::dummy, + "s|stable" => \$opt::dummy, + "help" => \$opt::dummy, + ) || exit(255); +$Global::progname = ($0 =~ m:(^|/)([^/]+)$:)[1]; +$Global::version = 20221122; +if($opt::version) { version(); exit 0; } +@Global::sortoptions = grep { ! /^-D$/ } + shell_quote(@ARGV_before[0..($#ARGV_before-$#ARGV-1)]); + +$ENV{'TMPDIR'} ||= "/tmp"; + +sub merge { + # Input: + # @cmd = commands to 'cat' (part of) a file + # 'cat a' 'cat b' 'cat c' => + # sort -m <(sort -m <(cat a) <(cat b)) <(sort -m <(cat c)) + my @cmd = @_; + chomp(@cmd); + while($#cmd > 0) { + my @tmp; + while($#cmd >= 0) { + my $a = shift @cmd; + my $b = shift @cmd; + $a &&= "<($a)"; + $b &&= "<($b)"; + # This looks like useless use of 'cat', but contrary to + # naive belief it increases performance dramatically. + push @tmp, "sort -m @Global::sortoptions $a $b | cat" + } + @cmd = @tmp; + } + return @cmd; +} + +sub sort_files { + # Input is files + my @files = @_; + # Let GNU Parallel generate the commands to read parts of files + # The commands split at \n (or \0) + # and there will be at least one for each CPU thread + my @subopt = $opt::zero_terminated ? qw(--recend "\0") : (); + open(my $par,"-|",qw(parallel), @subopt, + qw(--pipepart --block -1 --dryrun -vv sort), + @Global::sortoptions, '::::', @files) || die; + my @cmd = merge(<$par>); + close $par; + debug(@cmd); + # The command uses <(...) so it is incompatible with /bin/sh + open(my $bash,"|-","bash") || die; + print $bash @cmd; + close $bash; +} + +sub sort_stdin { + # Input is stdin + # Spread the input between n processes that each sort + # n = number of CPU threads + my $numthreads = `parallel --number-of-threads`; + my @fifos = map { tmpfifo() } 1..$numthreads; + map { mkfifo($_,0600) } @fifos; + # This trick removes the fifo as soon as it is connected in the other end + # (rm fifo; ...) < fifo + my @cmd = (map { "(rm $_; sort @Global::sortoptions) < $_" } + map { Q($_) } @fifos); + @cmd = merge(@cmd); + if(fork) { + } else { + my @subopt = $opt::zero_terminated ? qw(--recend "\0") : (); + exec(qw(parallel -j), $numthreads, @subopt, + # 286k is the best mean value after testing 250..350 + qw(--block 286k --pipe --roundrobin cat > {} :::),@fifos); + } + # The command uses <(...) so it is incompatible with /bin/sh + open(my $bash,"|-","bash") || die; + print $bash @cmd; + close $bash; +} + +sub tmpname { + # Select a name that does not exist + # Do not create the file as it may be used for creating a socket (by tmux) + # Remember the name in $Global::unlink to avoid hitting the same name twice + my $name = shift; + my($tmpname); + if(not -w $ENV{'TMPDIR'}) { + if(not -e $ENV{'TMPDIR'}) { + ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir ". + Q($ENV{'TMPDIR'})."'"); + } else { + ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w ". + Q($ENV{'TMPDIR'})."'"); + } + exit(255); + } + do { + $tmpname = $ENV{'TMPDIR'}."/".$name. + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $tmpname); + return $tmpname; +} + +sub tmpfifo { + # Find an unused name and mkfifo on it + my $tmpfifo = tmpname("psort"); + mkfifo($tmpfifo,0600); + return $tmpfifo; +} + +sub debug { + # Returns: N/A + $opt::D or return; + @_ = grep { defined $_ ? $_ : "" } @_; + print STDERR @_[1..$#_]; +} + +sub version() { + # Returns: N/A + print join + ("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2020-2022 Ole Tange, http://ole.tange.dk and Free Software", + "Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: https://www.gnu.org/software/parallel\n", + ); +} + +sub shell_quote(@) { + # Input: + # @strings = strings to be quoted + # Returns: + # @shell_quoted_strings = string quoted as needed by the shell + return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_); +} + +sub shell_quote_scalar_rc($) { + # Quote for the rc-shell + my $a = $_[0]; + if(defined $a) { + if(($a =~ s/'/''/g) + + + ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_csh($) { + # Quote for (t)csh + my $a = $_[0]; + if(defined $a) { + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + + + # quote newline in csh as \\\n + ($a =~ s/[\n]/"\\\n"/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_default($) { + # Quote for other shells (Bourne compatibles) + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + my $s = $_[0]; + if($s =~ /[^-_.+a-z0-9\/]/i) { + $s =~ s/'/'"'"'/g; # "-quote single quotes + $s = "'$s'"; # '-quote entire string + $s =~ s/^''//; # Remove unneeded '' at ends + $s =~ s/''$//; # (faster than s/^''|''$//g) + return $s; + } elsif ($s eq "") { + return "''"; + } else { + # No quoting needed + return $s; + } +} + +sub shell_quote_scalar($) { + # Quote the string so the shell will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + + # Speed optimization: Choose the correct shell_quote_scalar_* + # and call that directly from now on + no warnings 'redefine'; + if($Global::cshell) { + # (t)csh + *shell_quote_scalar = \&shell_quote_scalar_csh; + } elsif($Global::shell =~ m:(^|/)rc$:) { + # rc-shell + *shell_quote_scalar = \&shell_quote_scalar_rc; + } else { + # other shells + *shell_quote_scalar = \&shell_quote_scalar_default; + } + # The sub is now redefined. Call it + return shell_quote_scalar($_[0]); +} + +sub Q($) { + # Q alias for ::shell_quote_scalar + my $ret = shell_quote_scalar($_[0]); + no warnings 'redefine'; + *Q = \&::shell_quote_scalar; + return $ret; +} + + +sub status(@) { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh map { ($_, "\n") } @w; + flush $fh; +} + +sub status_no_nl(@) { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh @w; + flush $fh; +} + +sub warning(@) { + my @w = @_; + my $prog = $Global::progname || "parsort"; + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); +} + +{ + my %warnings; + sub warning_once(@) { + my @w = @_; + my $prog = $Global::progname || "parsort"; + $warnings{@w}++ or + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); + } +} + +sub error(@) { + my @w = @_; + my $prog = $Global::progname || "parsort"; + status(map { ($prog.": Error: ". $_); } @w); +} + +sub die_bug($) { + my $bugid = shift; + print STDERR + ("$Global::progname: This should not happen. You have found a bug. ", + "Please follow\n", + "https://www.gnu.org/software/parallel/man.html#REPORTING-BUGS\n", + "\n", + "Include this in the report:\n", + "* The version number: $Global::version\n", + "* The bugid: $bugid\n", + "* The command line being run\n", + "* The files being read (put the files on a webserver if they are big)\n", + "\n", + "If you get the error on smaller/fewer files, please include those instead.\n"); + exit(255); +} + +if(@ARGV) { + sort_files(@ARGV); +} elsif(length $opt::files0_from) { + $/="\0"; + open(my $fh,"<",$opt::files0_from) || die; + my @files = <$fh>; + chomp(@files); + sort_files(@files); +} else { + sort_stdin(); +} + +# Test +# -z +# OK: cat bigfile | parsort +# OK: parsort -k4n files*.txt +# OK: parsort files*.txt +# OK: parsort "file with space" + diff --git a/src/pod2graph b/src/pod2graph new file mode 100755 index 0000000..a7be100 --- /dev/null +++ b/src/pod2graph @@ -0,0 +1,128 @@ +#!/usr/bin/perl + +# Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +# Convert .pod file containing: +# +# =item --option +# +# See also: --other-option +# +# to a graph.pdf with link between --option and --other-option + +$pod=join("",<>); +# Remove stuff before OPTIONS +$pod=~s/^.*=head1 OPTIONS//s; +# Remove from EXAMPLES (which is next section) and till end +$pod=~s/=head1 EXAMPLES.*//s; +# Remove =over / =back pairs +$pod=~s/^.*?=over//s; +$pod=~s/=back\s*$//s; +$pod=~s/=over.*?=back//sg; + +$in_text = 0; +$in_item = 0; +$in_see_also = 0; + +for(split(/\n\n+/,$pod)) { + if(/^See also:\s+(\S.*)/s) { + # "See also" paragraph + $lex = "seealso"; + $in_text = 0; + $in_item = 0; + $in_see_only = 1; + } elsif(/^=item\s+(B<[{]=.*?perl expression.*?=[}]>|[IB]<.*?>)(\s|$)/s) { + # "=item" paragraph + $lex = "item"; + $in_text = 0; + $in_item = 1; + $in_see_only = 0; + } elsif(/\S/) { + # else it is just text + $lex = "text"; + $in_text = 1; + $in_item = 0; + $in_see_only = 0; + } + + if($lex eq "seealso") { + # We found "See also": output edge + if($lastlex eq "item") { + @saveditems = @items; + @items = (); + } + my $to = $1; + # Edge from = item/item/item + my $from = (join "/", + map { + s/I<(.*?)>/$1/g; + s/B<(.*?)>/$1/g; + $_ } + @saveditems[0]); + my @to; + while($to =~ s/(B<[{]=.*?perl expression.*?=[}]>|[BI]<.*?>)(\s|$)//) { + my $v = $1; + push @to, map { + s/I<(.*?)>/$1/g; + s/B<(.*?)>/$1/g; + $_; + } $v; + } + map { + if(not $seen{$from,$_}++ + and + not $seen{$_,$from}++) { + push @nodelines, "\"$from\" -- \"$_\"\n" + } + } @to; + + } elsif($lex eq "text") { + if($lastlex eq "item") { + @saveditems = @items; + @items = (); + } + } elsif($lex eq "item") { + push(@items,$1); + } + $lastlex=$lex; +} + + +sub header() { + return q[ + graph test123 { + graph [splines=true; overlap=false;]; + labelloc="t"; + label="Related map for options for GNU Parallel\nFind the options you use and learn about the options related to it";fontsize=33; + + "{}"[margin=0.3;] + "--sshlogin"[margin=0.3;] + "--pipe"[margin=0.3;] + ":::"[margin=0.3;] + "-X"[margin=0.3;] + ]; +} + +open(GRAPHVIZ,"|-","tee foo.dot |neato -Gepsilon=.000000001 -Tpdf") || die; +print GRAPHVIZ header(), (sort { rand()*3 -1 } @nodelines), "}"; +close GRAPHVIZ; + @@ -0,0 +1,14979 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free +# Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later + +# open3 used in Job::start +use IPC::Open3; +use POSIX; +# gensym used in Job::start +use Symbol qw(gensym); +# tempfile used in Job::start +use File::Temp qw(tempfile tempdir); +# mkpath used in openresultsfile +use File::Path; +# GetOptions used in get_options_from_array +use Getopt::Long; +# Used to ensure code quality +use strict; +use File::Basename; + +sub set_input_source_header($$) { + my ($command_ref,$input_source_fh_ref) = @_; + if(defined $opt::header and not $opt::pipe) { + # split with colsep or \t + # $header force $colsep = \t if undef? + my $delimiter = defined $opt::colsep ? $opt::colsep : "\t"; + # regexp for {= + my $left = "\Q$Global::parensleft\E"; + my $l = $Global::parensleft; + # regexp for =} + my $right = "\Q$Global::parensright\E"; + my $r = $Global::parensright; + if($opt::header ne "0") { + my $id = 1; + for my $fh (@$input_source_fh_ref) { + my $line = <$fh>; + chomp($line); + $line =~ s/\r$//; + ::debug("init", "Delimiter: '$delimiter'"); + for my $s (split /$delimiter/o, $line) { + ::debug("init", "Colname: '$s'"); + # Replace {colname} with {2} + for(@$command_ref, @Global::ret_files, + @Global::transfer_files, $opt::tagstring, + $opt::workdir, $opt::results, $opt::retries, + @Global::template_contents, @Global::template_names, + @opt::filter) { + # Skip if undefined + $_ or next; + s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; + # {=header1 ... =} => {=1 ... =} + s:$left $s (.*?) $right:$l$id$1$r:gx; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } + } + # Make it possible to do: + # parallel --header 0 echo {file2} {file1} :::: file1 file2 + my $id = 1; + for my $s (@opt::a) { + # ::: are put into files and given a filehandle + # ignore these and only keep the filenames. + fileno $s and next; + for(@$command_ref, @Global::ret_files, + @Global::transfer_files, $opt::tagstring, + $opt::workdir, $opt::results, $opt::retries, + @Global::template_contents, @Global::template_names, + @opt::filter) { + # Skip if undefined + $_ or next; + s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; + # {=header1 ... =} => {=1 ... =} + s:$left $s (.*?) $right:$l$id$1$r:gx; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } else { + my $id = 1; + for my $fh (@$input_source_fh_ref) { + $Global::input_source_header{$id} = $id; + $id++; + } + } +} + +sub max_jobs_running() { + # Compute $Global::max_jobs_running as the max number of jobs + # running on each sshlogin. + # Returns: + # $Global::max_jobs_running + if(not $Global::max_jobs_running) { + for my $sshlogin (values %Global::host) { + $sshlogin->max_jobs_running(); + } + } + if(not $Global::max_jobs_running) { + ::error("Cannot run any jobs."); + wait_and_exit(255); + } + return $Global::max_jobs_running; +} + +sub halt() { + # Compute exit value, + # wait for children to complete + # and exit + if($opt::halt and $Global::halt_when ne "never") { + if(not defined $Global::halt_exitstatus) { + if($Global::halt_pct) { + $Global::halt_exitstatus = + ::ceil($Global::total_failed / + ($Global::total_started || 1) * 100); + } elsif($Global::halt_count) { + $Global::halt_exitstatus = + ::min(undef_as_zero($Global::total_failed),101); + } + } + wait_and_exit($Global::halt_exitstatus); + } else { + if($Global::semaphore) { + # --semaphore runs a single job: + # Use exit value of that + wait_and_exit($Global::halt_exitstatus); + } else { + # 0 = all jobs succeeded + # 1-100 = n jobs failed + # 101 = >100 jobs failed + wait_and_exit(min(undef_as_zero($Global::exitstatus),101)); + } + } +} + + +sub __PIPE_MODE__() {} + + +sub pipepart_setup() { + # Compute the blocksize + # Generate the commands to extract the blocks + # Push the commands on queue + # Changes: + # @Global::cat_prepends + # $Global::JobQueue + if($opt::tee) { + # Prepend each command with + # < file + my $cat_string = "< ".Q($opt::a[0]); + for(1..$Global::JobQueue->total_jobs()) { + push @Global::cat_appends, $cat_string; + push @Global::cat_prepends, ""; + } + } else { + if(not $opt::blocksize) { + # --blocksize with 10 jobs per jobslot + $opt::blocksize = -10; + } + if($opt::roundrobin) { + # --blocksize with 1 job per jobslot + $opt::blocksize = -1; + } + if($opt::blocksize < 0) { + my $size = 0; + # Compute size of -a + for(@opt::a) { + if(-f $_) { + $size += -s $_; + } elsif(-b $_) { + $size += size_of_block_dev($_); + } elsif(-e $_) { + ::error("$_ is neither a file nor a block device"); + wait_and_exit(255); + } else { + ::error("File not found: $_"); + wait_and_exit(255); + } + } + # Run in total $job_slots*(- $blocksize) jobs + # Set --blocksize = size / no of proc / (- $blocksize) + $Global::dummy_jobs = 1; + $Global::blocksize = 1 + + int($size / max_jobs_running() / + -multiply_binary_prefix($opt::blocksize)); + } + @Global::cat_prepends = (map { pipe_part_files($_) } + # ::: are put into files and given a filehandle + # ignore these and only keep the filenames. + grep { ! fileno $_ } @opt::a); + # Unget the empty arg as many times as there are parts + $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget( + map { [Arg->new("\0noarg")] } @Global::cat_prepends + ); + } +} + +sub pipe_tee_setup() { + # Create temporary fifos + # Run 'tee fifo1 fifo2 fifo3 ... fifoN' in the background + # This will spread the input to fifos + # Generate commands that reads from fifo1..N: + # cat fifo | user_command + # Changes: + # @Global::cat_prepends + my @fifos; + for(1..$Global::JobQueue->total_jobs()) { + push @fifos, tmpfifo(); + } + # cat foo | tee fifo1 fifo2 fifo3 fifo4 fifo5 > /dev/null + if(not fork()){ + # Test if tee supports --output-error=warn-nopipe + `echo | tee --output-error=warn-nopipe /dev/null >/dev/null 2>/dev/null`; + my $opt = $? ? "" : "--output-error=warn-nopipe"; + ::debug("init","tee $opt"); + if($opt::dryrun) { + # This is not exactly what is run, but it gives the basic idea + print "mkfifo @fifos\n"; + print "tee $opt @fifos >/dev/null &\n"; + } else { + # Let tee inherit our stdin + # and redirect stdout to null + open STDOUT, ">","/dev/null"; + if($opt) { + exec "tee", $opt, @fifos; + } else { + exec "tee", @fifos; + } + } + exit(0); + } + # For each fifo + # (rm fifo1; grep 1) < fifo1 + # (rm fifo2; grep 2) < fifo2 + # (rm fifo3; grep 3) < fifo3 + # Remove the tmpfifo as soon as it is open + @Global::cat_prepends = map { "(rm $_;" } @fifos; + @Global::cat_appends = map { ") < $_" } @fifos; +} + + +sub parcat_script() { + # TODO if script fails: Use parallel -j0 --plain --lb cat ::: fifos + my $script = q'{ + use POSIX qw(:errno_h); + use IO::Select; + use strict; + use threads; + use Thread::Queue; + use Fcntl qw(:DEFAULT :flock); + + my $opened :shared; + my $q = Thread::Queue->new(); + my $okq = Thread::Queue->new(); + my @producers; + + if(not @ARGV) { + if(-t *STDIN) { + print "Usage:\n"; + print " parcat file(s)\n"; + print " cat argfile | parcat\n"; + } else { + # Read arguments from stdin + chomp(@ARGV = <STDIN>); + } + } + my $files_to_open = 0; + # Default: fd = stdout + my $fd = 1; + for (@ARGV) { + # --rm = remove file when opened + /^--rm$/ and do { $opt::rm = 1; next; }; + # -1 = output to fd 1, -2 = output to fd 2 + /^-(\d+)$/ and do { $fd = $1; next; }; + push @producers, threads->create("producer", $_, $fd); + $files_to_open++; + } + + sub producer { + # Open a file/fifo, set non blocking, enqueue fileno of the file handle + my $file = shift; + my $output_fd = shift; + open(my $fh, "<", $file) || do { + print STDERR "parcat: Cannot open $file\n"; + exit(1); + }; + # Remove file when it has been opened + if($opt::rm) { + unlink $file; + } + set_fh_non_blocking($fh); + $opened++; + # Pass the fileno to parent + $q->enqueue(fileno($fh),$output_fd); + # Get an OK that the $fh is opened and we can release the $fh + while(1) { + my $ok = $okq->dequeue(); + if($ok == fileno($fh)) { last; } + # Not ours - very unlikely to happen + $okq->enqueue($ok); + } + return; + } + + my $s = IO::Select->new(); + my %buffer; + + sub add_file { + my $infd = shift; + my $outfd = shift; + open(my $infh, "<&=", $infd) || die; + open(my $outfh, ">&=", $outfd) || die; + $s->add($infh); + # Tell the producer now opened here and can be released + $okq->enqueue($infd); + # Initialize the buffer + @{$buffer{$infh}{$outfd}} = (); + $Global::fh{$outfd} = $outfh; + } + + sub add_files { + # Non-blocking dequeue + my ($infd,$outfd); + do { + ($infd,$outfd) = $q->dequeue_nb(2); + if(defined($outfd)) { + add_file($infd,$outfd); + } + } while(defined($outfd)); + } + + sub add_files_block { + # Blocking dequeue + my ($infd,$outfd) = $q->dequeue(2); + add_file($infd,$outfd); + } + + + my $fd; + my (@ready,$infh,$rv,$buf); + do { + # Wait until at least one file is opened + add_files_block(); + while($q->pending or keys %buffer) { + add_files(); + while(keys %buffer) { + @ready = $s->can_read(0.01); + if(not @ready) { + add_files(); + } + for $infh (@ready) { + # There is only one key, namely the output file descriptor + for my $outfd (keys %{$buffer{$infh}}) { + # TODO test if 60800 is optimal (2^17 is used elsewhere) + $rv = sysread($infh, $buf, 60800); + if (!$rv) { + if($! == EAGAIN) { + # Would block: Nothing read + next; + } else { + # Nothing read, but would not block: + # This file is done + $s->remove($infh); + for(@{$buffer{$infh}{$outfd}}) { + syswrite($Global::fh{$outfd},$_); + } + delete $buffer{$infh}; + # Closing the $infh causes it to block + # close $infh; + add_files(); + next; + } + } + # Something read. + # Find \n or \r for full line + my $i = (rindex($buf,"\n")+1); + if($i) { + # Print full line + for(@{$buffer{$infh}{$outfd}}, substr($buf,0,$i)) { + syswrite($Global::fh{$outfd},$_); + } + # @buffer = remaining half line + $buffer{$infh}{$outfd} = [substr($buf,$i,$rv-$i)]; + } else { + # Something read, but not a full line + push @{$buffer{$infh}{$outfd}}, $buf; + } + redo; + } + } + } + } + } while($opened < $files_to_open); + + for (@producers) { + $_->join(); + } + + sub set_fh_non_blocking { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + my $flags; + fcntl($fh, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= &O_NONBLOCK; # Add non-blocking to the flags + fcntl($fh, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle + } + }'; + return ::spacefree(3, $script); +} + +sub sharder_script() { + my $script = q{ + use B; + # Column separator + my $sep = shift; + # Which columns to shard on (count from 1) + my $col = shift; + # Which columns to shard on (count from 0) + my $col0 = $col - 1; + # Perl expression + my $perlexpr = shift; + my $bins = @ARGV; + # Open fifos for writing, fh{0..$bins} + my $t = 0; + my %fh; + for(@ARGV) { + open $fh{$t++}, ">", $_; + # open blocks until it is opened by reader + # so unlink only happens when it is ready + unlink $_; + } + if($perlexpr) { + my $subref = eval("sub { no strict; no warnings; $perlexpr }"); + while(<STDIN>) { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + { + local $_ = $F[$col0]; + &$subref(); + $fh = $fh{ hex(B::hash($_))%$bins }; + } + print $fh $_; + } + } else { + while(<STDIN>) { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + $fh = $fh{ hex(B::hash($F[$col0]))%$bins }; + print $fh $_; + } + } + # Close all open fifos + close values %fh; + }; + return ::spacefree(1, $script); +} + +sub binner_script() { + my $script = q{ + use B; + # Column separator + my $sep = shift; + # Which columns to shard on (count from 1) + my $col = shift; + # Which columns to shard on (count from 0) + my $col0 = $col - 1; + # Perl expression + my $perlexpr = shift; + my $bins = @ARGV; + # Open fifos for writing, fh{0..$bins} + my $t = 0; + my %fh; + # Let the last output fifo be the 0'th + open $fh{$t++}, ">", pop @ARGV; + for(@ARGV) { + open $fh{$t++}, ">", $_; + # open blocks until it is opened by reader + # so unlink only happens when it is ready + unlink $_; + } + if($perlexpr) { + my $subref = eval("sub { no strict; no warnings; $perlexpr }"); + while(<STDIN>) { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + { + local $_ = $F[$col0]; + &$subref(); + $fh = $fh{ $_%$bins }; + } + print $fh $_; + } + } else { + while(<STDIN>) { + # Split into $col columns (no need to split into more) + @F = split $sep, $_, $col+1; + $fh = $fh{ $F[$col0]%$bins }; + print $fh $_; + } + } + # Close all open fifos + close values %fh; + }; + return ::spacefree(1, $script); +} + +sub pipe_shard_setup() { + # Create temporary fifos + # Run 'shard.pl sep col fifo1 fifo2 fifo3 ... fifoN' in the background + # This will spread the input to fifos + # Generate commands that reads from fifo1..N: + # cat fifo | user_command + # Changes: + # @Global::cat_prepends + my @shardfifos; + my @parcatfifos; + # TODO $opt::jobs should be evaluated (100%) + # TODO $opt::jobs should be number of total_jobs if there are arguments + max_jobs_running(); + my $njobs = $Global::max_jobs_running; + for my $m (0..$njobs-1) { + for my $n (0..$njobs-1) { + # sharding to A B C D + # parcatting all As together + $parcatfifos[$n][$m] = $shardfifos[$m][$n] = tmpfifo(); + } + } + my $shardbin = ($opt::shard || $opt::bin); + my $script; + if($opt::bin) { + $script = binner_script(); + } else { + $script = sharder_script(); + } + + # cat foo | sharder sep col fifo1 fifo2 fifo3 ... fifoN + + if($shardbin =~ /^[a-z_][a-z_0-9]*(\s|$)/i) { + # Group by column name + # (Yes, this will also wrongly match a perlexpr like: chop) + my($read,$char,@line); + # A full line, but nothing more (the rest must be read by the child) + # $Global::header used to prepend block to each job + do { + $read = sysread(STDIN,$char,1); + push @line, $char; + } while($read and $char ne "\n"); + $Global::header = join "", @line; + } + my ($col, $perlexpr, $subref) = + column_perlexpr($shardbin, $Global::header, $opt::colsep); + if(not fork()) { + # Let the sharder inherit our stdin + # and redirect stdout to null + open STDOUT, ">","/dev/null"; + # The PERL_HASH_SEED must be the same for all sharders + # so B::hash will return the same value for any given input + $ENV{'PERL_HASH_SEED'} = $$; + exec qw(parallel --block 100k -q --pipe -j), $njobs, + qw(--roundrobin -u perl -e), $script, ($opt::colsep || ","), + $col, $perlexpr, '{}', (map { (':::+', @{$_}) } @parcatfifos); + } + # For each fifo + # (rm fifo1; grep 1) < fifo1 + # (rm fifo2; grep 2) < fifo2 + # (rm fifo3; grep 3) < fifo3 + my $parcat = Q(parcat_script()); + if(not $parcat) { + ::error("'parcat' must be in path."); + ::wait_and_exit(255); + } + @Global::cat_prepends = map { "perl -e $parcat @$_ | " } @parcatfifos; +} + +sub pipe_part_files(@) { + # Given the bigfile + # find header and split positions + # make commands that 'cat's the partial file + # Input: + # $file = the file to read + # Returns: + # @commands that will cat_partial each part + my ($file) = @_; + my $buf = ""; + if(not -f $file and not -b $file) { + ::error("--pipepart only works on seekable files, not streams/pipes.", + "$file is not a seekable file."); + ::wait_and_exit(255); + } + + my $fh = open_or_exit($file); + my $firstlinelen = 0; + if($opt::skip_first_line) { + my $newline; + # Read a full line one byte at a time + while($firstlinelen += sysread($fh,$newline,1,0)) { + $newline eq "\n" and last; + } + } + my $header = find_header(\$buf,$fh); + # find positions + my @pos = find_split_positions($file,int($Global::blocksize), + $header,$firstlinelen); + # Make @cat_prepends + my @cat_prepends = (); + for(my $i=0; $i<$#pos; $i++) { + push(@cat_prepends, + cat_partial($file, $firstlinelen, $firstlinelen+length($header), + $pos[$i], $pos[$i+1])); + } + return @cat_prepends; +} + +sub find_header($$) { + # Compute the header based on $opt::header + # Input: + # $buf_ref = reference to read-in buffer + # $fh = filehandle to read from + # Uses: + # $opt::header + # $Global::blocksize + # $Global::header + # Returns: + # $header string + my ($buf_ref, $fh) = @_; + my $header = ""; + # $Global::header may be set in group_by_loop() + if($Global::header) { return $Global::header } + if($opt::header) { + if($opt::header eq ":") { $opt::header = "(.*\n)"; } + # Number = number of lines + $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; + while(sysread($fh,$$buf_ref,int($Global::blocksize),length $$buf_ref)) { + if($$buf_ref =~ s/^($opt::header)//) { + $header = $1; + last; + } + } + } + return $header; +} + +sub find_split_positions($$$) { + # Find positions in bigfile where recend is followed by recstart + # Input: + # $file = the file to read + # $block = (minimal) --block-size of each chunk + # $header = header to be skipped + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # @positions of block start/end + my($file, $block, $header, $firstlinelen) = @_; + my $skiplen = $firstlinelen + length $header; + my $size = -s $file; + if(-b $file) { + # $file is a blockdevice + $size = size_of_block_dev($file); + } + $block = int $block; + if($opt::groupby) { + return split_positions_for_group_by($file,$size,$block, + $header,$firstlinelen); + } + # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 + # The optimal dd blocksize for freebsd = 2^15..2^17 + # The optimal dd blocksize for ubuntu (AMD6376) = 2^16 + my $dd_block_size = 131072; # 2^17 + my @pos; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $fh = ::open_or_exit($file); + push(@pos,$skiplen); + for(my $pos = $block+$skiplen; $pos < $size; $pos += $block) { + my $buf; + if($recendrecstart eq "") { + # records ends anywhere + push(@pos,$pos); + } else { + # Seek the the block start + if(not sysseek($fh, $pos, 0)) { + ::error("Cannot seek to $pos in $file"); + edit(255); + } + while(sysread($fh,$buf,$dd_block_size,length $buf)) { + if($opt::regexp) { + # If match /$recend$recstart/ => Record position + if($buf =~ m:^(.*$recend)$recstart:os) { + # Start looking for next record _after_ this match + $pos += length($1); + push(@pos,$pos); + last; + } + } else { + # If match $recend$recstart => Record position + # TODO optimize to only look at the appended + # $dd_block_size + len $recendrecstart + # TODO increase $dd_block_size to optimize for longer records + my $i = index64(\$buf,$recendrecstart); + if($i != -1) { + # Start looking for next record _after_ this match + $pos += $i + length($recend); + push(@pos,$pos); + last; + } + } + } + } + } + if($pos[$#pos] != $size) { + # Last splitpoint was not at end of the file: add $size as the last + push @pos, $size; + } + close $fh; + return @pos; +} + +sub split_positions_for_group_by($$$$) { + my($fh); + sub value_at($) { + my $pos = shift; + if($pos != 0) { + seek($fh, $pos-1, 0) || die; + # Read half line + <$fh>; + } + # Read full line + my $linepos = tell($fh); + $_ = <$fh>; + if(defined $_) { + # Not end of file + my @F; + if(defined $group_by::col) { + $opt::colsep ||= "\t"; + @F = split /$opt::colsep/, $_; + $_ = $F[$group_by::col]; + } + eval $group_by::perlexpr; + } + return ($_,$linepos); + } + + sub binary_search_end($$$) { + my ($s,$spos,$epos) = @_; + # value_at($spos) == $s + # value_at($epos) != $s + my $posdif = $epos - $spos; + my ($v,$vpos); + while($posdif) { + ($v,$vpos) = value_at($spos+$posdif); + if($v eq $s) { + $spos = $vpos; + $posdif = $epos - $spos; + } else { + $epos = $vpos; + } + $posdif = int($posdif/2); + } + return($v,$vpos); + } + + sub binary_search_start($$$) { + my ($s,$spos,$epos) = @_; + # value_at($spos) != $s + # value_at($epos) == $s + my $posdif = $epos - $spos; + my ($v,$vpos); + while($posdif) { + ($v,$vpos) = value_at($spos+$posdif); + if($v eq $s) { + $epos = $vpos; + } else { + $spos = $vpos; + $posdif = $epos - $spos; + } + $posdif = int($posdif/2); + } + return($v,$vpos); + } + + my ($file,$size,$block,$header,$firstlinelen) = @_; + my ($a,$b,$c,$apos,$bpos,$cpos); + my @pos; + $fh = open_or_exit($file); + # Set $Global::group_by_column $Global::group_by_perlexpr + group_by_loop($fh,$opt::recsep); + # $xpos = linestart, $x = value at $xpos, $apos < $bpos < $cpos + $apos = $firstlinelen + length $header; + for(($a,$apos) = value_at($apos); $apos < $size;) { + push @pos, $apos; + $bpos = $apos + $block; + ($b,$bpos) = value_at($bpos); + if(eof($fh)) { + push @pos, $size; last; + } + $cpos = $bpos + $block; + ($c,$cpos) = value_at($cpos); + if($a eq $b) { + while($b eq $c) { + # Move bpos, cpos a block forward until $a == $b != $c + $bpos = $cpos; + $cpos += $block; + ($c,$cpos) = value_at($cpos); + if($cpos >= $size) { + $cpos = $size; + last; + } + } + # $a == $b != $c + # Binary search for $b ending between ($bpos,$cpos) + ($b,$bpos) = binary_search_end($b,$bpos,$cpos); + } else { + if($b eq $c) { + # $a != $b == $c + # Binary search for $b starting between ($apos,$bpos) + ($b,$bpos) = binary_search_start($b,$apos,$bpos); + } else { + # $a != $b != $c + # Binary search for $b ending between ($bpos,$cpos) + ($b,$bpos) = binary_search_end($b,$bpos,$cpos); + } + } + ($a,$apos) = ($b,$bpos); + } + if($pos[$#pos] != $size) { + # Last splitpoint was not at end of the file: add it + push @pos, $size; + } + return @pos; +} + +sub cat_partial($@) { + # Efficient command to copy from byte X to byte Y + # Input: + # $file = the file to read + # ($start, $end, [$start2, $end2, ...]) = start byte, end byte + # Returns: + # Efficient command to copy $start..$end, $start2..$end2, ... to stdout + my($file, @start_end) = @_; + my($start, $i); + # Convert (start,end) to (start,len) + my @start_len = map { + if(++$i % 2) { $start = $_; } else { $_-$start } + } @start_end; + # The optimal block size differs + # It has been measured on: + # AMD 6376: n*4k-1; small n + # AMD Neo N36L: 44k-200k + # Intel i7-3632QM: 55k- + # ARM Cortex A53: 4k-28k + # Intel i5-2410M: 36k-46k + # + # I choose 2^15-1 = 32767 + # q{ + # expseq() { + # perl -E ' + # $last = pop @ARGV; + # $first = shift || 1; + # $inc = shift || 1.03; + # for($i=$first; $i<=$last;$i*=$inc) { say int $i } + # ' "$@" + # } + # + # seq 111111111 > big; + # f() { ppar --test $1 -a big --pipepart --block -1 'md5sum > /dev/null'; } + # export -f f; + # expseq 1000 1.001 300000 | shuf | parallel -j1 --jl jl-md5sum f; + # }; + my $script = spacefree + (0, + q{ + while(@ARGV) { + sysseek(STDIN,shift,0) || die; + $left = shift; + while($read = + sysread(STDIN,$buf, $left > 32767 ? 32767 : $left)){ + $left -= $read; + syswrite(STDOUT,$buf); + } + } + }); + return "<". Q($file) . + " perl -e '$script' @start_len |"; +} + +sub column_perlexpr($$$) { + # Compute the column number (if any), perlexpression from combined + # string (such as --shard key, --groupby key, {=n perlexpr=} + # Input: + # $column_perlexpr = string with column and perl expression + # $header = header from input file (if column is column name) + # $colsep = column separator regexp + # Returns: + # $col = column number + # $perlexpr = perl expression + # $subref = compiled perl expression as sub reference + my ($column_perlexpr, $header, $colsep) = @_; + my ($col, $perlexpr, $subref); + if($column_perlexpr =~ /^[-a-z0-9_]+(\s|$)/i) { + # Column name/number (possibly prefix) + if($column_perlexpr =~ s/^(-?\d+)(\s|$)//) { + # Column number (possibly prefix) + $col = $1; + } elsif($column_perlexpr =~ s/^([a-z0-9_]+)(\s+|$)//i) { + # Column name (possibly prefix) + my $colname = $1; + # Split on --copsep pattern + my @headers = split /$colsep/, $header; + my %headers; + @headers{@headers} = (1..($#headers+1)); + $col = $headers{$colname}; + if(not defined $col) { + ::error("Column '$colname' $colsep not found in header",keys %headers); + ::wait_and_exit(255); + } + } + } + # What is left of $column_perlexpr is $perlexpr (possibly empty) + $perlexpr = $column_perlexpr; + $subref = eval("sub { no strict; no warnings; $perlexpr }"); + return($col, $perlexpr, $subref); +} + +sub group_by_loop($$) { + # Generate perl code for group-by loop + # Insert a $recsep when the column value changes + # The column value can be computed with $perlexpr + my($fh,$recsep) = @_; + my $groupby = $opt::groupby; + if($groupby =~ /^[a-z_][a-z_0-9]*(\s|$)/i) { + # Group by column name + # (Yes, this will also wrongly match a perlexpr like: chop) + my($read,$char,@line); + # Read a full line, but nothing more + # (the rest must be read by the child) + # $Global::header used to prepend block to each job + do { + $read = sysread($fh,$char,1); + push @line, $char; + } while($read and $char ne "\n"); + $Global::header = join "", @line; + } + $opt::colsep ||= "\t"; + ($group_by::col, $group_by::perlexpr, $group_by::subref) = + column_perlexpr($groupby, $Global::header, $opt::colsep); + # Numbered 0..n-1 due to being used by $F[n] + if($group_by::col) { $group_by::col--; } + + my $loop = ::spacefree(0,q{ + BEGIN{ $last = "RECSEP"; } + { + local $_=COLVALUE; + PERLEXPR; + if(($last) ne $_) { + print "RECSEP"; + $last = $_; + } + } + }); + if(defined $group_by::col) { + $loop =~ s/COLVALUE/\$F[$group_by::col]/g; + } else { + $loop =~ s/COLVALUE/\$_/g; + } + $loop =~ s/PERLEXPR/$group_by::perlexpr/g; + $loop =~ s/RECSEP/$recsep/g; + return $loop; +} + +sub pipe_group_by_setup() { + # Record separator with 119 bit random value + $opt::recend = ''; + $opt::recstart = + join "", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20); + $opt::remove_rec_sep = 1; + my @filter; + push @filter, "perl"; + if($opt::groupby =~ /^[a-z0-9_]+(\s|$)/i) { + # This is column number/name + # Use -a (auto-split) + push @filter, "-a"; + $opt::colsep ||= "\t"; + my $sep = $opt::colsep; + $sep =~ s/\t/\\t/g; + $sep =~ s/\"/\\"/g; + # man perlrun: -Fpattern [...] You can't use literal whitespace + $sep =~ s/ /\\040{1}/g; + push @filter, "-F$sep"; + } + push @filter, "-pe"; + push @filter, group_by_loop(*STDIN,$opt::recstart); + ::debug("init", "@filter\n"); + open(STDIN, '-|', @filter) || die ("Cannot start @filter"); + if(which("mbuffer")) { + # You get a speed up of 30% by going through mbuffer + open(STDIN, '-|', "mbuffer", "-q","-m6M","-b5") || + die ("Cannot start mbuffer"); + } +} + +sub spreadstdin() { + # read a record + # Spawn a job and print the record to it. + # Uses: + # $Global::blocksize + # STDIN + # $opt::r + # $Global::max_lines + # $Global::max_number_of_args + # $opt::regexp + # $Global::start_no_new_jobs + # $opt::roundrobin + # %Global::running + # Returns: N/A + + my $buf = ""; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $chunk_number = 1; + my $one_time_through; + my $two_gb = 2**31-1; + my $blocksize = int($Global::blocksize); + my $in = *STDIN; + my $timeout = $Global::blocktimeout; + + if($opt::skip_first_line) { + my $newline; + # Read a full line one byte at a time + while(sysread($in,$newline,1,0)) { + $newline eq "\n" and last; + } + } + my $header = find_header(\$buf,$in); + my $anything_written; + my $eof; + my $garbage_read; + + sub read_block() { + # Read a --blocksize from STDIN + # possibly interrupted by --blocktimeout + # Add up to the next full block + my $readsize = $blocksize - (length $buf) % $blocksize; + my ($nread,$alarm); + eval { + local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required + # --blocktimeout (or 0 if not set) + alarm $timeout; + if($] >= 5.026) { + do { + $nread = sysread $in, $buf, $readsize, length $buf; + $readsize -= $nread; + } while($readsize and $nread); + } else { + # Less efficient reading, but 32-bit sysread compatible + do { + $nread = sysread($in,substr($buf,length $buf,0),$readsize,0); + $readsize -= $nread; + } while($readsize and $nread); + } + alarm 0; + }; + if ($@) { + die unless $@ eq "alarm\n"; # propagate unexpected errors + $alarm = 1; + } else { + $alarm = 0; + } + $eof = not ($nread or $alarm); + } + + sub pass_n_line_records() { + # Pass records of N lines + my $n_lines = $buf =~ tr/\n/\n/; + my $last_newline_pos = rindex64(\$buf,"\n"); + # Go backwards until there are full n-line records + while($n_lines % $Global::max_lines) { + $n_lines--; + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + } + # Chop at $last_newline_pos as that is where n-line record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } + + sub pass_n_regexps() { + # Pass records of N regexps + # -N => (start..*?end){n} + # -L -N => (start..*?end){n*l} + if(not $garbage_read) { + $garbage_read = 1; + if($buf !~ /^$recstart/o) { + # Buf does not start with $recstart => There is garbage. + # Make a single record of the garbage + if($buf =~ + /(?s:^)( + (?:(?:(?!$recend$recstart)(?s:.))*?$recend) + ) + # Followed by recstart + (?=$recstart)/mox and length $1 > 0) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + } + + my $n_records = + $Global::max_number_of_args * ($Global::max_lines || 1); + # (?!negative lookahead) is needed to avoid backtracking + # See: https://unix.stackexchange.com/questions/439356/ + # (?s:.) = (.|[\n]) but faster + while($buf =~ + /(?s:^)( + # n more times recstart.*recend + (?:$recstart(?:(?!$recend$recstart)(?s:.))*?$recend){$n_records} + ) + # Followed by recstart + (?=$recstart)/mox and length $1 > 0) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + + sub pass_regexp() { + # Find the last recend-recstart in $buf + $eof and return; + # (?s:.) = (.|[\n]) but faster + if($buf =~ /^((?s:.)*$recend)$recstart(?s:.)*?$/mox) { + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,length $1); + shorten(\$buf,length $1); + } + } + + sub pass_csv_record() { + # Pass CVS record + # We define a CSV record as an even number of " + end of line + # This works if you use " as quoting character + my $last_newline_pos = length $buf; + # Go backwards from the last \n and search for a position + # where there is an even number of " + do { + # find last EOL + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + # While uneven " + } while((substr($buf,0,$last_newline_pos) =~ y/"/"/)%2 + and $last_newline_pos >= 0); + # Chop at $last_newline_pos as that is where CSV record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } + + sub pass_n() { + # Pass n records of --recend/--recstart + # -N => (start..*?end){n} + my $i = 0; + my $read_n_lines = + $Global::max_number_of_args * ($Global::max_lines || 1); + while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1 + and + length $buf) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + + sub pass() { + # Pass records of --recend/--recstart + # Split record at fixed string + # Find the last recend+recstart in $buf + $eof and return; + my $i = rindex64(\$buf,$recendrecstart); + if($i != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + + sub increase_blocksize_maybe() { + if(not $anything_written + and not $opt::blocktimeout + and not $Global::no_autoexpand_block) { + # Nothing was written - maybe the block size < record size? + # Increase blocksize exponentially up to 2GB-1 (2GB causes problems) + if($blocksize < $two_gb) { + my $old_blocksize = $blocksize; + $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb); + ::warning("A record was longer than $old_blocksize. " . + "Increasing to --blocksize $blocksize."); + } + } + } + + while(1) { + $anything_written = 0; + read_block(); + if($opt::r) { + # Remove empty lines + $buf =~ s/^\s*\n//gm; + if(length $buf == 0) { + if($eof) { + last; + } else { + next; + } + } + } + if($Global::max_lines and not $Global::max_number_of_args) { + # Pass n-line records + pass_n_line_records(); + } elsif($opt::csv) { + # Pass a full CSV record + pass_csv_record(); + } elsif($opt::regexp) { + # Split record at regexp + if($Global::max_number_of_args) { + pass_n_regexps(); + } else { + pass_regexp(); + } + } else { + # Pass normal --recend/--recstart record + if($Global::max_number_of_args) { + pass_n(); + } else { + pass(); + } + } + $eof and last; + increase_blocksize_maybe(); + ::debug("init", "Round\n"); + } + ::debug("init", "Done reading input\n"); + + # If there is anything left in the buffer write it + write_record_to_pipe($chunk_number++, \$header, \$buf, $recstart, + $recend, length $buf); + + if($opt::retries) { + $Global::no_more_input = 1; + # We need to start no more jobs: At most we need to retry some + # of the already running. + my @running = values %Global::running; + # Stop any virgins. + for my $job (@running) { + if(defined $job and $job->virgin()) { + close $job->fh(0,"w"); + } + } + # Wait for running jobs to be done + my $sleep = 1; + while($Global::total_running > 0) { + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + } + } + $Global::start_no_new_jobs ||= 1; + if($opt::roundrobin) { + # Flush blocks to roundrobin procs + my $sleep = 1; + while(%Global::running) { + my $something_written = 0; + for my $job (values %Global::running) { + if($job->block_length()) { + $something_written += $job->non_blocking_write(); + } else { + close $job->fh(0,"w"); + } + } + if($something_written) { + $sleep = $sleep/2+0.001; + } + $sleep = ::reap_usleep($sleep); + } + } +} + +sub recstartrecend() { + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # $recstart,$recend with default values and regexp conversion + my($recstart,$recend); + if(defined($opt::recstart) and defined($opt::recend)) { + # If both --recstart and --recend is given then both must match + $recstart = $opt::recstart; + $recend = $opt::recend; + } elsif(defined($opt::recstart)) { + # If --recstart is given it must match start of record + $recstart = $opt::recstart; + $recend = ""; + } elsif(defined($opt::recend)) { + # If --recend is given then it must match end of record + $recstart = ""; + $recend = $opt::recend; + if($opt::regexp and $recend eq '') { + # --regexp --recend '' + $recend = '(?s:.)'; + } + } + + if($opt::regexp) { + # Do not allow /x comments - to avoid having to quote space + $recstart = "(?-x:".$recstart.")"; + $recend = "(?-x:".$recend.")"; + # If $recstart/$recend contains '|' + # the | should only apply to the regexp + $recstart = "(?:".$recstart.")"; + $recend = "(?:".$recend.")"; + } else { + # $recstart/$recend = printf strings (\n) + $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + } + return ($recstart,$recend); +} + +sub nindex($$) { + # See if string is in buffer N times + # Returns: + # the position where the Nth copy is found + my ($buf_ref, $str, $n) = @_; + my $i = 0; + for(1..$n) { + $i = index64($buf_ref,$str,$i+1); + if($i == -1) { last } + } + return $i; +} + +{ + my @robin_queue; + my $sleep = 1; + + sub round_robin_write($$$$$) { + # Input: + # $header_ref = ref to $header string + # $block_ref = ref to $block to be written + # $recstart = record start string + # $recend = record end string + # $endpos = end position of $block + # Uses: + # %Global::running + # Returns: + # $something_written = amount of bytes written + my ($header_ref,$buffer_ref,$recstart,$recend,$endpos) = @_; + my $written = 0; + my $block_passed = 0; + while(not $block_passed) { + # Continue flushing existing buffers + # until one is empty and a new block is passed + if(@robin_queue) { + # Rotate queue once so new blocks get a fair chance + # to be given to another slot + push @robin_queue, shift @robin_queue; + } else { + # Make a queue to spread the blocks evenly + push @robin_queue, (sort { $a->seq() <=> $b->seq() } + values %Global::running); + } + do { + $written = 0; + for my $job (@robin_queue) { + if($job->block_length() > 0) { + $written += $job->non_blocking_write(); + } else { + $job->set_block($header_ref, $buffer_ref, + $endpos, $recstart, $recend); + $block_passed = 1; + $written += $job->non_blocking_write(); + last; + } + } + if($written) { + $sleep = $sleep/1.5+0.001; + } + # Don't sleep if something is written + } while($written and not $block_passed); + $sleep = ::reap_usleep($sleep); + } + return $written; + } +} + +sub index64($$$) { + # Do index on strings > 2GB. + # index in Perl < v5.22 does not work for > 2GB + # Input: + # as index except STR which must be passed as a reference + # Output: + # as index + my $ref = shift; + my $match = shift; + my $pos = shift || 0; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # No point in doing extra work if we don't need to. + if($strlen < $block_size or $] > 5.022) { + return index($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos; + while($offset < $strlen) { + $ret = index( + substr($$ref, $offset, $block_size), + $match, $pos-$offset); + if($ret != -1) { + return $ret + $offset; + } + $offset += ($block_size - $matchlen - 1); + } + return -1; +} + +sub rindex64($@) { + # Do rindex on strings > 2GB. + # rindex in Perl < v5.22 does not work for > 2GB + # Input: + # as rindex except STR which must be passed as a reference + # Output: + # as rindex + my $ref = shift; + my $match = shift; + my $pos = shift; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # Default: search from end + $pos = defined $pos ? $pos : $strlen; + # No point in doing extra work if we don't need to. + if($strlen < $block_size or $] > 5.022) { + return rindex($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos - $block_size + $matchlen; + if($offset < 0) { + # The offset is less than a $block_size + # Set the $offset to 0 and + # Adjust block_size accordingly + $block_size = $block_size + $offset; + $offset = 0; + } + while($offset >= 0) { + $ret = rindex( + substr($$ref, $offset, $block_size), + $match); + if($ret != -1) { + return $ret + $offset; + } + $offset -= ($block_size - $matchlen - 1); + } + return -1; +} + +sub shorten($$) { + # Do: substr($buf,0,$i) = ""; + # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks + # Input: + # $buf_ref = \$buf + # $i = position to shorten to + # Returns: N/A + my ($buf_ref, $i) = @_; + my $two_gb = 2**31-1; + while($i > $two_gb) { + substr($$buf_ref,0,$two_gb) = ""; + $i -= $two_gb; + } + substr($$buf_ref,0,$i) = ""; +} + +sub write_record_to_pipe($$$$$$) { + # Fork then + # Write record from pos 0 .. $endpos to pipe + # Input: + # $chunk_number = sequence number - to see if already run + # $header_ref = reference to header string to prepend + # $buffer_ref = reference to record to write + # $recstart = start string of record + # $recend = end string of record + # $endpos = position in $buffer_ref where record ends + # Uses: + # $Global::job_already_run + # $opt::roundrobin + # @Global::virgin_jobs + # Returns: + # Number of chunks written (0 or 1) + my ($chunk_number, $header_ref, $buffer_ref, + $recstart, $recend, $endpos) = @_; + if($endpos == 0) { return 0; } + if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } + if($opt::roundrobin) { + # Write the block to one of the already running jobs + return round_robin_write($header_ref, $buffer_ref, + $recstart, $recend, $endpos); + } + # If no virgin found, backoff + my $sleep = 0.0001; # 0.01 ms - better performance on highend + while(not @Global::virgin_jobs) { + ::debug("pipe", "No virgin jobs"); + $sleep = ::reap_usleep($sleep); + # Jobs may not be started because of loadavg + # or too little time between each ssh login + # or retrying failed jobs. + start_more_jobs(); + } + my $job = shift @Global::virgin_jobs; + $job->set_block($header_ref, $buffer_ref, $endpos, $recstart, $recend); + $job->write_block(); + return 1; +} + + +sub __SEM_MODE__() {} + + +sub acquire_semaphore() { + # Acquires semaphore. If needed: spawns to the background + # Uses: + # @Global::host + # Returns: + # The semaphore to be released when jobs is complete + $Global::host{':'} = SSHLogin->new(":"); + my $sem = Semaphore->new($Semaphore::name, + $Global::host{':'}->max_jobs_running()); + $sem->acquire(); + if($Semaphore::fg) { + # skip + } else { + if(fork()) { + exit(0); + } else { + # If run in the background, the PID will change + $sem->pid_change(); + } + } + return $sem; +} + + +sub __PARSE_OPTIONS__() {} + +sub shell_completion() { + if($opt::shellcompletion eq "zsh") { + # if shell == zsh + zsh_competion(); + } elsif($opt::shellcompletion eq "bash") { + # if shell == bash + bash_competion(); + } elsif($opt::shellcompletion eq "auto") { + if($Global::shell =~ m:/zsh$|^zsh$:) { + # if shell == zsh + zsh_competion(); + } elsif($Global::shell =~ m:/bash$|^bash$:) { + # if shell == bash + bash_competion(); + } else { + ::error("--shellcompletion is not implemented for ". + "'$Global::shell'."); + wait_and_exit(255); + } + } else { + ::error("--shellcompletion is not implemented for ". + "'$opt::shellcompletion'."); + wait_and_exit(255); + } +} + +sub bash_competion() { + # Print: + # complete -F _comp_parallel parallel; + # _comp_parallel() { + # COMPREPLY=($(compgen -W "--options" -- + # "${COMP_WORDS[$COMP_CWORD]}")); + # }; + my @bash_completion = + ("complete -F _comp_parallel parallel;", + '_comp_parallel() { COMPREPLY=($(compgen -W "'); + my @och = options_completion_hash(); + while(@och) { + $_ = shift @och; + # Split input like: + # "joblog|jl=s[Logfile for executed jobs]:logfile:_files" + if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) { + my $opt = $1; + my $desc = $2; + my $argdesc = $3; + my $func = $4; + # opt=s => opt + $opt =~ s/[:=].$//; + if($opt =~ /^_/) { + # internal options start with --_ + # skip + } else { + push @bash_completion, + (map { (length $_ == 1) ? "-$_ " : "--$_ " } + split /\|/, $opt); + } + } + shift @och; + } + push @bash_completion,'" -- "${COMP_WORDS[$COMP_CWORD]}")); };'."\n"; + print @bash_completion; +} + +sub zsh_competion() { + my @zsh_completion = + ("compdef _comp_parallel parallel; ", + "setopt localoptions extended_glob; ", + "local -a _comp_priv_prefix; ", + "_comp_parallel() { ", + "_arguments "); + my @och = options_completion_hash(); + while(@och) { + $_ = shift @och; + # Split input like: + # "joblog|jl=s[Logfile for executed jobs]:logfile:_files" + if(/^(.*?)(\[.*?])?(:[^:]*)?(:.*)?$/) { + my $opt = $1; + my $desc = $2; + my $argdesc = $3; + my $func = $4; + # opt=s => opt + $opt =~ s/[:=].$//; + if($opt =~ /^_/) { + # internal options start with --_ + # skip + } else { + # {-o,--option} + my $zsh_opt = join(",", + (map { (length $_ == 1) ? "-$_" : "--$_" } + split /\|/, $opt)); + if($zsh_opt =~ /,/) { $zsh_opt = "{$zsh_opt}"; } + $desc =~ s/'/'"'"'/g; + $argdesc =~ s/'/'"'"'/g; + $func =~ s/'/'"'"'/g; + push @zsh_completion, $zsh_opt."'".$desc.$argdesc.$func."' "; + } + } + shift @och; + } + push @zsh_completion, + q{'(-)1:command: _command_names -e' }, + q{'*::arguments:{ _comp_priv_prefix=( '$words[1]' -n ${(kv)opt_args[(I)(-[ugHEP]|--(user|group|set-home|preserve-env|preserve-groups))]} ) ; _normal }'}, + "};\n"; + print @zsh_completion; +} + +sub options_hash() { + # Returns: + # %hash = for GetOptions + my %och = options_completion_hash(); + my %oh; + my ($k,$v); + while(($k,$v) = each %och) { + $k =~ s/\[.*//; + $oh{$k} = $v; + } + return %oh; +} + +sub options_completion_hash() { + # Returns: + # %hash = for GetOptions and shell completion + return + ("debug|D=s" => \$opt::D, + "xargs[Insert as many arguments as the command line length permits]" + => \$opt::xargs, + "m[Multiple arguments]" => \$opt::m, + ("X[Insert as many arguments with context as the command line ". + "length permits]" + => \$opt::X), + "v[Verbose]" => \@opt::v, + "sql=s[Use --sql-master instead (obsolete)]:DBURL" => \$opt::retired, + ("sql-master|sqlmaster=s". + "[Submit jobs via SQL server. DBURL must point to a table, which ". + "will contain --joblog, the values, and output]:DBURL" + => \$opt::sqlmaster), + ("sql-worker|sqlworker=s". + "[Execute jobs via SQL server. Read the input sources variables ". + "from the table pointed to by DBURL.]:DBURL" + => \$opt::sqlworker), + ("sql-and-worker|sqlandworker=s". + "[--sql-master DBURL --sql-worker DBURL]:DBURL" + => \$opt::sqlandworker), + ("joblog|jl=s[Logfile for executed jobs]:logfile:_files" + => \$opt::joblog), + ("results|result|res=s[Save the output into files]:name:_files" + => \$opt::results), + "resume[Resumes from the last unfinished job]" => \$opt::resume, + ("resume-failed|resumefailed". + "[Retry all failed and resume from the last unfinished job]" + => \$opt::resume_failed), + ("retry-failed|retryfailed[Retry all failed jobs in joblog]" + => \$opt::retry_failed), + "silent[Silent]" => \$opt::silent, + ("keep-order|keeporder|k". + "[Keep sequence of output same as the order of input]" + => \$opt::keeporder), + ("no-keep-order|nokeeporder|nok|no-k". + "[Overrides an earlier --keep-order (e.g. if set in ". + "~/.parallel/config)]" + => \$opt::nokeeporder), + "group[Group output]" => \$opt::group, + "g" => \$opt::retired, + ("ungroup|u". + "[Output is printed as soon as possible and bypasses GNU parallel ". + "internal processing]" + => \$opt::ungroup), + ("latest-line|latestline|ll". + "[Print latest line of each job]" + => \$opt::latestline), + ("line-buffer|line-buffered|linebuffer|linebuffered|lb". + "[Buffer output on line basis]" + => \$opt::linebuffer), + ("tmux". + "[Use tmux for output. Start a tmux session and run each job in a ". + "window in that session. No other output will be produced]" + => \$opt::tmux), + ("tmux-pane|tmuxpane". + "[Use tmux for output but put output into panes in the first ". + "window. Useful if you want to monitor the progress of less than ". + "100 concurrent jobs]" + => \$opt::tmuxpane), + "null|0[Use NUL as delimiter]" => \$opt::null, + "quote|q[Quote command]" => \$opt::quote, + # Replacement strings + ("parens=s[Use parensstring instead of {==}]:parensstring" + => \$opt::parens), + ('rpl=s[Define replacement string]:"tag perl expression"' + => \@opt::rpl), + "plus[Add more replacement strings]" => \$opt::plus, + ("I=s". + "[Use the replacement string replace-str instead of {}]:replace-str" + => \$opt::I), + ("extensionreplace|er=s". + "[Use the replacement string replace-str instead of {.} for input ". + "line without extension]:replace-str" + => \$opt::U), + "U=s" => \$opt::retired, + ("basenamereplace|bnr=s". + "[Use the replacement string replace-str instead of {/} for ". + "basename of input line]:replace-str" + => \$opt::basenamereplace), + ("dirnamereplace|dnr=s". + "[Use the replacement string replace-str instead of {//} for ". + "dirname of input line]:replace-str" + => \$opt::dirnamereplace), + ("basenameextensionreplace|bner=s". + "[Use the replacement string replace-str instead of {/.} for ". + "basename of input line without extension]:replace-str" + => \$opt::basenameextensionreplace), + ("seqreplace=s". + "[Use the replacement string replace-str instead of {#} for job ". + "sequence number]:replace-str" + => \$opt::seqreplace), + ("slotreplace=s". + "[Use the replacement string replace-str instead of {%} for job ". + "slot number]:replace-str" + => \$opt::slotreplace), + ("jobs|j=s". + "[(Add +N to/Subtract -N from/Multiply N%) the number of CPU ". + "threads or read parameter from file]:_files" + => \$opt::jobs), + ("delay=s". + "[Delay starting next job by duration]:duration" => \$opt::delay), + ("ssh-delay|sshdelay=f". + "[Delay starting next ssh by duration]:duration" + => \$opt::sshdelay), + ("load=s". + "[Only start jobs if load is less than max-load]:max-load" + => \$opt::load), + "noswap[Do not start job is computer is swapping]" => \$opt::noswap, + ("max-line-length-allowed|maxlinelengthallowed". + "[Print maximal command line length]" + => \$opt::max_line_length_allowed), + ("number-of-cpus|numberofcpus". + "[Print the number of physical CPU cores and exit (obsolete)]" + => \$opt::number_of_cpus), + ("number-of-sockets|numberofsockets". + "[Print the number of CPU sockets and exit]" + => \$opt::number_of_sockets), + ("number-of-cores|numberofcores". + "[Print the number of physical CPU cores and exit]" + => \$opt::number_of_cores), + ("number-of-threads|numberofthreads". + "[Print the number of hyperthreaded CPU cores and exit]" + => \$opt::number_of_threads), + ("use-sockets-instead-of-threads|usesocketsinsteadofthreads". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_sockets_instead_of_threads), + ("use-cores-instead-of-threads|usecoresinsteadofthreads". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_cores_instead_of_threads), + ("use-cpus-instead-of-cores|usecpusinsteadofcores". + "[Determine how GNU Parallel counts the number of CPUs]" + => \$opt::use_cpus_instead_of_cores), + ("shell-quote|shellquote|shell_quote". + "[Does not run the command but quotes it. Useful for making ". + "quoted composed commands for GNU parallel]" + => \@opt::shellquote), + ('nice=i[Run the command at this niceness]:niceness:($(seq -20 19))' + => \$opt::nice), + "tag[Tag lines with arguments]" => \$opt::tag, + ("tag-string|tagstring=s". + "[Tag lines with a string]:str" => \$opt::tagstring), + "ctag[Color tag]:str" => \$opt::ctag, + "ctag-string|ctagstring=s[Colour tagstring]:str" => \$opt::ctagstring, + "color|colour[Colourize output]" => \$opt::color, + ("color-failed|colour-failed|colorfailed|colourfailed|". + "color-fail|colour-fail|colorfail|colourfail|cf". + "[Colour failed jobs red]" + => \$opt::colorfailed), + ("onall[Run all the jobs on all computers given with --sshlogin]" + => \$opt::onall), + "nonall[--onall with no arguments]" => \$opt::nonall, + ("filter-hosts|filterhosts|filter-host[Remove down hosts]" + => \$opt::filter_hosts), + ('sshlogin|S=s'. + '[Distribute jobs to remote computers]'. + ':[@hostgroups/][ncpus/]sshlogin'. + '[,[@hostgroups/][ncpus/]sshlogin[,...]] or @hostgroup'. + ':_users') => \@opt::sshlogin, + ("sshloginfile|slf=s". + "[File with sshlogins on separate lines. Lines starting with '#' ". + "are ignored.]:filename:_files" + => \@opt::sshloginfile), + ("controlmaster|M". + "[Use ssh's ControlMaster to make ssh connections faster]" + => \$opt::controlmaster), + ("ssh=s". + "[Use this command instead of ssh for remote access]:sshcommand" + => \$opt::ssh), + ("transfer-file|transferfile|transfer-files|transferfiles|tf=s". + "[Transfer filename to remote computers]:filename:_files" + => \@opt::transfer_files), + ("return=s[Transfer files from remote computers]:filename:_files" + => \@opt::return), + ("trc=s[--transfer --return filename --cleanup]:filename:_files" + => \@opt::trc), + "transfer[Transfer files to remote computers]" => \$opt::transfer, + "cleanup[Remove transferred files]" => \$opt::cleanup, + ("basefile|bf=s". + "[Transfer file to each sshlogin before first job is started]". + ":file:_files" + => \@opt::basefile), + ("template|tmpl=s". + "[Replace replacement strings in file and save it in repl]". + ":file=repl:_files" + => \%opt::template), + "B=s" => \$opt::retired, + "ctrl-c|ctrlc" => \$opt::retired, + "no-ctrl-c|no-ctrlc|noctrlc" => \$opt::retired, + ("work-dir|workdir|wd=s". + "[Jobs will be run in the dir mydir. (default: the current dir ". + "for the local machine, the login dir for remote computers)]". + ":mydir:_cd" + => \$opt::workdir), + "W=s" => \$opt::retired, + ("rsync-opts|rsyncopts=s[Options to pass on to rsync]:options" + => \$opt::rsync_opts), + ("tmpdir|tempdir=s[Directory for temporary files]:dirname:_cd" + => \$opt::tmpdir), + ("use-compress-program|compress-program|". + "usecompressprogram|compressprogram=s". + "[Use prg for compressing temporary files]:prg:_commands" + => \$opt::compress_program), + ("use-decompress-program|decompress-program|". + "usedecompressprogram|decompressprogram=s". + "[Use prg for decompressing temporary files]:prg:_commands" + => \$opt::decompress_program), + "compress[Compress temporary files]" => \$opt::compress, + "open-tty|o[Open terminal tty]" => \$opt::open_tty, + "tty[Open terminal tty]" => \$opt::tty, + "T" => \$opt::retired, + "H=i" => \$opt::retired, + ("dry-run|dryrun|dr". + "[Print the job to run on stdout (standard output), but do not ". + "run the job]" + => \$opt::dryrun), + "progress[Show progress of computations]" => \$opt::progress, + ("eta[Show the estimated number of seconds before finishing]" + => \$opt::eta), + "bar[Show progress as a progress bar]" => \$opt::bar, + ("total-jobs|totaljobs|total=s". + "[Set total number of jobs]" => \$opt::totaljobs), + "shuf[Shuffle jobs]" => \$opt::shuf, + ("arg-sep|argsep=s". + "[Use sep-str instead of ::: as separator string]:sep-str" + => \$opt::arg_sep), + ("arg-file-sep|argfilesep=s". + "[Use sep-str instead of :::: as separator string ". + "between command and argument files]:sep-str" + => \$opt::arg_file_sep), + ('trim=s[Trim white space in input]:trim_method:'. + '((n\:"No trim" l\:"Left\ trim" r\:"Right trim" '. + 'lr\:"Both trim" rl\:"Both trim"))' + => \$opt::trim), + "env=s[Copy environment variable var]:var:_vars" => \@opt::env, + "recordenv|record-env[Record environment]" => \$opt::record_env, + ('session'. + '[Record names in current environment in $PARALLEL_IGNORED_NAMES '. + 'and exit. Only used with env_parallel. '. + 'Aliases, functions, and variables with names i]' + => \$opt::session), + ('plain[Ignore --profile, $PARALLEL, and ~/.parallel/config]' + => \$opt::plain), + ("profile|J=s". + "[Use profile profilename for options]:profilename:_files" + => \@opt::profile), + "tollef" => \$opt::tollef, + "gnu[Behave like GNU parallel]" => \$opt::gnu, + "link|xapply[Link input sources]" => \$opt::link, + "linkinputsource|xapplyinputsource=i" => \@opt::linkinputsource, + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing + # these lines. + ("bibtex|citation". + "[Print the citation notice and BibTeX entry for GNU parallel, ". + "silence citation notice for all future runs, and exit. ". + "It will not run any commands]" + => \$opt::citation), + "will-cite|willcite|nn|nonotice|no-notice" => \$opt::willcite, + # Termination and retries + ('halt-on-error|haltonerror|halt=s'. + '[When should GNU parallel terminate]'. + ':when:((now\:"kill all running jobs and halt immediately" '. + 'soon\:"wait for all running jobs to complete, start no new jobs"))' + => \$opt::halt), + 'limit=s[Dynamic job limit]:"command args"' => \$opt::limit, + ("memfree=s". + "[Minimum memory free when starting another job]:size" + => \$opt::memfree), + ("memsuspend=s". + "[Suspend jobs when there is less memory available]:size" + => \$opt::memsuspend), + "retries=s[Try failing jobs n times]:n" => \$opt::retries, + ("timeout=s". + "[Time out for command. If the command runs for longer than ". + "duration seconds it will get killed as per --term-seq]:duration" + => \$opt::timeout), + ("term-seq|termseq=s". + "[Termination sequence]:sequence" => \$opt::termseq), + # xargs-compatibility - implemented, man, testsuite + ("max-procs|maxprocs|P=s". + "[Add N to/Subtract N from/Multiply N% with/ the number of CPU ". + "threads or read parameter from file]:+N/-N/N%/N/procfile:_files" + => \$opt::jobs), + ("delimiter|d=s[Input items are terminated by delim]:delim" + => \$opt::d), + ("max-chars|maxchars|s=s[Limit length of command]:max-chars" + => \$opt::max_chars), + ("arg-file|argfile|a=s". + "[Use input-file as input source]:input-file:_files" => \@opt::a), + "no-run-if-empty|norunifempty|r[Do not run empty input]" => \$opt::r, + ("replace|i:s". + "[This option is deprecated; use -I instead]:replace-str" + => \$opt::i), + "E=s" => \$opt::eof, + ("eof|e:s[Set the end of file string to eof-str]:eof-str" + => \$opt::eof), + ("process-slot-var|processslotvar=s". + "[Set this variable to job slot number]:varname" + => \$opt::process_slot_var), + ("max-args|maxargs|n=s". + "[Use at most max-args arguments per command line]:max-args" + => \$opt::max_args), + ("max-replace-args|maxreplaceargs|N=s". + "[Use at most max-args arguments per command line]:max-args" + => \$opt::max_replace_args), + "col-sep|colsep|C=s[Column separator]:regexp" => \$opt::colsep, + "csv[Treat input as CSV-format]"=> \$opt::csv, + ("help|h[Print a summary of the options to GNU parallel and exit]" + => \$opt::help), + ("L=s[When used with --pipe: Read records of recsize]:recsize" + => \$opt::L), + ("max-lines|maxlines|l:f". + "[When used with --pipe: Read records of recsize lines]:recsize" + => \$opt::max_lines), + "interactive|p[Ask user before running a job]" => \$opt::interactive, + ("verbose|t[Print the job to be run on stderr (standard error)]" + => \$opt::verbose), + ("version|V[Print the version GNU parallel and exit]" + => \$opt::version), + ('min-version|minversion=i'. + '[Print the version GNU parallel and exit]'. + ':version:($(parallel --minversion 0))' + => \$opt::minversion), + ("show-limits|showlimits". + "[Display limits given by the operating system]" + => \$opt::show_limits), + ("exit|x[Exit if the size (see the -s option) is exceeded]" + => \$opt::x), + # Semaphore + "semaphore[Work as a counting semaphore]" => \$opt::semaphore, + ("semaphore-timeout|semaphoretimeout|st=s". + "[If secs > 0: If the semaphore is not released within secs ". + "seconds, take it anyway]:secs" + => \$opt::semaphoretimeout), + ("semaphore-name|semaphorename|id=s". + "[Use name as the name of the semaphore]:name" + => \$opt::semaphorename), + "fg[Run command in foreground]" => \$opt::fg, + "bg[Run command in background]" => \$opt::bg, + "wait[Wait for all commands to complete]" => \$opt::wait, + # Shebang #!/usr/bin/parallel --shebang + ("shebang|hashbang". + "[GNU parallel can be called as a shebang (#!) command as the ". + "first line of a script. The content of the file will be treated ". + "as inputsource]" + => \$opt::shebang), + ("_pipe-means-argfiles[internal]" + => \$opt::_pipe_means_argfiles), + "Y" => \$opt::retired, + ("skip-first-line|skipfirstline". + "[Do not use the first line of input]" + => \$opt::skip_first_line), + "bug" => \$opt::bug, + # --pipe + ("pipe|spreadstdin". + "[Spread input to jobs on stdin (standard input)]" => \$opt::pipe), + ("round-robin|roundrobin|round". + "[Distribute chunks of standard input in a round robin fashion]" + => \$opt::roundrobin), + "recstart=s" => \$opt::recstart, + ("recend=s". + "[Split record between endstring and startstring]:endstring" + => \$opt::recend), + ("regexp|regex". + "[Interpret --recstart and --recend as regular expressions]" + => \$opt::regexp), + ("remove-rec-sep|removerecsep|rrs". + "[Remove record separator]" => \$opt::remove_rec_sep), + ("output-as-files|outputasfiles|files[Save output to files]" + => \$opt::files), + ("block-size|blocksize|block=s". + "[Size of block in bytes to read at a time]:size" + => \$opt::blocksize), + ("block-timeout|blocktimeout|bt=s". + "[Timeout for reading block when using --pipe]:duration" + => \$opt::blocktimeout), + "header=s[Use regexp as header]:regexp" => \$opt::header, + "cat[Create a temporary file with content]" => \$opt::cat, + "fifo[Create a temporary fifo with content]" => \$opt::fifo, + ("pipe-part|pipepart[Pipe parts of a physical file]" + => \$opt::pipepart), + "tee[Pipe all data to all jobs]" => \$opt::tee, + ("shard=s". + "[Use shardexpr as shard key and shard input to the jobs]:shardexpr" + => \$opt::shard), + ("bin=s". + "[Use binexpr as binning key and bin input to the jobs]:binexpr" + => \$opt::bin), + "group-by|groupby=s[Group input by value]:val" => \$opt::groupby, + # + ("hgrp|hostgrp|hostgroup|hostgroups[Enable hostgroups on arguments]" + => \$opt::hostgroups), + "embed[Embed GNU parallel in a shell script]" => \$opt::embed, + ("filter=s[Only run jobs where filter is true]:filter" + => \@opt::filter), + "_parset=s[Generate shell code for parset]" => \$opt::_parset, + ("shell-completion|shellcompletion=s". + "[Generate shell code for shell completion]" + => \$opt::shellcompletion), + # Parameter for testing optimal values + "_test=s" => \$opt::_test, + ); +} + +sub get_options_from_array($@) { + # Run GetOptions on @array + # Input: + # $array_ref = ref to @ARGV to parse + # @keep_only = Keep only these options + # Uses: + # @ARGV + # Returns: + # true if parsing worked + # false if parsing failed + # @$array_ref is changed + my ($array_ref, @keep_only) = @_; + if(not @$array_ref) { + # Empty array: No need to look more at that + return 1; + } + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + # If @keep_only set: Ignore all values except @keep_only + my %options = options_hash(); + if(@keep_only) { + my (%keep,@dummy); + @keep{@keep_only} = @keep_only; + for my $k (grep { not $keep{$_} } keys %options) { + # Store the value of the option in @dummy + $options{$k} = \@dummy; + } + } + my $retval = GetOptions(%options); + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return $retval; +} + +sub parse_parset() { + $Global::progname = "parset"; + @Global::parset_vars = split /[ ,]/, $opt::_parset; + my $var_or_assoc = shift @Global::parset_vars; + # Legal names: var _v2ar arrayentry[2] + my @illegal = (grep { not /^[a-zA-Z_][a-zA-Z_0-9]*(\[\d+\])?$/ } + @Global::parset_vars); + if(@illegal) { + ::error + ("@illegal is an invalid variable name.", + "Variable names must be letter followed by letters or digits.", + "Usage:", + " parset varname GNU Parallel options and command"); + wait_and_exit(255); + } + if($var_or_assoc eq "assoc") { + my $var = shift @Global::parset_vars; + print "$var=("; + $Global::parset = "assoc"; + $Global::parset_endstring=")\n"; + } elsif($var_or_assoc eq "var") { + if($#Global::parset_vars > 0) { + $Global::parset = "var"; + } else { + my $var = shift @Global::parset_vars; + print "$var=("; + $Global::parset = "array"; + $Global::parset_endstring=")\n"; + } + } else { + ::die_bug("parset: unknown '$opt::_parset'"); + } +} + +sub parse_options(@) { + # Returns: N/A + init_globals(); + my @argv_before = @ARGV; + @ARGV = read_options(); + + # Before changing these line, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be added to a public hall of shame by + # removing the lines. + if(defined $opt::citation) { + citation(\@argv_before,\@ARGV); + wait_and_exit(0); + } + # no-* overrides * + if($opt::nokeeporder) { $opt::keeporder = undef; } + + if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 + if($opt::bug) { ::die_bug("test-bug"); } + $Global::debug = $opt::D; + $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) + || $ENV{'SHELL'} || "/bin/sh"; + if(not -x $Global::shell and not which($Global::shell)) { + ::error("Shell '$Global::shell' not found."); + wait_and_exit(255); + } + ::debug("init","Global::shell $Global::shell\n"); + $Global::cshell = $Global::shell =~ m:(/[-a-z]*)?csh:; + if(defined $opt::_parset) { parse_parset(); } + if(defined $opt::X) { $Global::ContextReplace = 1; } + if(defined $opt::silent) { $Global::verbose = 0; } + if(defined $opt::null) { $/ = "\0"; } + if(defined $opt::d) { $/ = unquote_printf($opt::d) } + parse_replacement_string_options(); + $opt::tag ||= $opt::ctag; + $opt::tagstring ||= $opt::ctagstring; + if(defined $opt::ctag or defined $opt::ctagstring + or defined $opt::color) { + $Global::color = 1; + } + if($opt::linebuffer or $opt::latestline) { + $Global::linebuffer = 1; + Job::latestline_init(); + } + if(defined $opt::tag and not defined $opt::tagstring) { + # Default = {} + $opt::tagstring = $Global::parensleft.$Global::parensright; + } + if(defined $opt::tagstring) { + $opt::tagstring = unquote_printf($opt::tagstring); + if($opt::tagstring =~ + /\Q$Global::parensleft\E.*\S+.*\Q$Global::parensright\E/ + and + $Global::linebuffer) { + # --tagstring contains {= ... =} and --linebuffer => + # recompute replacement string for each use (do not cache) + $Global::cache_replacement_eval = 0; + } + } + if(defined $opt::interactive) { $Global::interactive = $opt::interactive; } + if(defined $opt::quote) { $Global::quoting = 1; } + if(defined $opt::r) { $Global::ignore_empty = 1; } + if(defined $opt::verbose) { $Global::stderr_verbose = 1; } + if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } + if(defined $opt::max_args) { + $opt::max_args = multiply_binary_prefix($opt::max_args); + $Global::max_number_of_args = $opt::max_args; + } + if(defined $opt::blocktimeout) { + $Global::blocktimeout = int(multiply_time_units($opt::blocktimeout)); + if($Global::blocktimeout < 1) { + ::error("--block-timeout must be at least 1"); + wait_and_exit(255); + } + } + if(defined $opt::timeout) { + $Global::timeoutq = TimeoutQueue->new($opt::timeout); + } + if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } + $ENV{'PARALLEL_RSYNC_OPTS'} = $opt::rsync_opts || + $ENV{'PARALLEL_RSYNC_OPTS'} || '-rlDzR'; + # Default: Same nice level as GNU Parallel is started at + $opt::nice ||= eval { getpriority(0,0) } || 0; + if(defined $opt::help) { usage(); exit(0); } + if(defined $opt::shellcompletion) { shell_completion(); exit(0); } + if(defined $opt::embed) { embed(); exit(0); } + if(defined $opt::sqlandworker) { + $opt::sqlmaster = $opt::sqlworker = $opt::sqlandworker; + } + if(defined $opt::tmuxpane) { $opt::tmux = $opt::tmuxpane; } + if(defined $opt::colsep) { $Global::trim = 'lr'; } + if(defined $opt::csv) { + if(not $Global::use{"Text::CSV"} ||= eval "use Text::CSV; 1;") { + ::error("The perl module Text::CSV is not installed."); + ::error("Try installing libtext-csv-perl or perl-Text-CSV."); + wait_and_exit(255); + } + $opt::colsep = defined $opt::colsep ? $opt::colsep : ","; + my $csv_setting = { binary => 1, sep_char => $opt::colsep }; + my $sep = $csv_setting->{sep_char}; + $Global::csv = Text::CSV->new($csv_setting) + or die "Cannot use CSV: ".Text::CSV->error_diag (); + } + if(defined $opt::header) { + $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; + } + if(defined $opt::trim) { $Global::trim = $opt::trim; } + if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } + if(defined $opt::arg_file_sep) { + $Global::arg_file_sep = $opt::arg_file_sep; + } + if(not defined $opt::process_slot_var) { + $opt::process_slot_var = 'PARALLEL_JOBSLOT0'; + } + if(defined $opt::number_of_sockets) { + print SSHLogin::no_of_sockets(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_cpus) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_cores) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::number_of_threads) { + print SSHLogin::no_of_threads(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_line_length_allowed) { + print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_chars) { + $opt::max_chars = multiply_binary_prefix($opt::max_chars); + } + if(defined $opt::version) { version(); wait_and_exit(0); } + if(defined $opt::record_env) { record_env(); wait_and_exit(0); } + if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } + if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } + if(@opt::return) { push @Global::ret_files, @opt::return; } + if($opt::transfer) { + push @Global::transfer_files, $opt::i || $opt::I || "{}"; + } + push @Global::transfer_files, @opt::transfer_files; + if(%opt::template) { + while (my ($source, $template_name) = each %opt::template) { + if(open(my $tmpl, "<", $source)) { + local $/; # $/ = undef => slurp whole file + my $content = <$tmpl>; + push @Global::template_names, $template_name; + push @Global::template_contents, $content; + ::debug("tmpl","Name: $template_name\n$content\n"); + } else { + ::error("Cannot open '$source'."); + wait_and_exit(255); + } + } + } + if(not defined $opt::recstart and + not defined $opt::recend) { $opt::recend = "\n"; } + $Global::blocksize = multiply_binary_prefix($opt::blocksize || "1M"); + if($Global::blocksize > 2**31-1 and not $opt::pipepart) { + warning("--blocksize >= 2G causes problems. Using 2G-1."); + $Global::blocksize = 2**31-1; + } + if($^O eq "cygwin" and + ($opt::pipe or $opt::pipepart or $opt::roundrobin) + and $Global::blocksize > 65535) { + warning("--blocksize >= 64K causes problems on Cygwin."); + } + $opt::memfree = multiply_binary_prefix($opt::memfree); + $opt::memsuspend = multiply_binary_prefix($opt::memsuspend); + $Global::memlimit = $opt::memsuspend + $opt::memfree; + check_invalid_option_combinations(); + if((defined $opt::fifo or defined $opt::cat) and not $opt::pipepart) { + $opt::pipe = 1; + } + if(defined $opt::minversion) { + print $Global::version,"\n"; + if($Global::version < $opt::minversion) { + wait_and_exit(255); + } else { + wait_and_exit(0); + } + } + if(not defined $opt::delay) { + # Set --delay to --sshdelay if not set + $opt::delay = $opt::sshdelay; + } + $Global::sshdelayauto = $opt::sshdelay =~ s/auto$//; + $opt::sshdelay = multiply_time_units($opt::sshdelay); + $Global::delayauto = $opt::delay =~ s/auto$//; + $opt::delay = multiply_time_units($opt::delay); + if($opt::compress_program) { + $opt::compress = 1; + $opt::decompress_program ||= $opt::compress_program." -dc"; + } + + if(defined $opt::results) { + # Is the output a dir or CSV-file? + if($opt::results =~ /\.csv$/i) { + # CSV with , as separator + $Global::csvsep = ","; + $Global::membuffer ||= 1; + } elsif($opt::results =~ /\.tsv$/i) { + # CSV with TAB as separator + $Global::csvsep = "\t"; + $Global::membuffer ||= 1; + } elsif($opt::results =~ /\.json$/i) { + # JSON output + $Global::jsonout ||= 1; + $Global::membuffer ||= 1; + } + } + if($opt::compress) { + my ($compress, $decompress) = find_compression_program(); + $opt::compress_program ||= $compress; + $opt::decompress_program ||= $decompress; + if(($opt::results and not $Global::csvsep) or $opt::files) { + # No need for decompressing + $opt::decompress_program = "cat >/dev/null"; + } + } + if(defined $opt::dryrun) { + # Force grouping due to bug #51039: --dry-run --timeout 3600 -u breaks + $opt::ungroup = 0; + $opt::group = 1; + } + if(defined $opt::nonall) { + # Append a dummy empty argument if there are no arguments + # on the command line to avoid reading from STDIN. + # arg_sep = random 50 char + # \0noarg => nothing (not the empty string) + $Global::arg_sep = join "", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..50); + push @ARGV, $Global::arg_sep, "\0noarg"; + } + if(defined $opt::tee) { + if(not defined $opt::jobs) { + $opt::jobs = 0; + } + } + if(defined $opt::tty) { + # Defaults for --tty: -j1 -u + # Can be overridden with -jXXX -g + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if(not defined $opt::group) { + $opt::ungroup = 1; + } + } + if(@opt::trc) { + push @Global::ret_files, @opt::trc; + if(not @Global::transfer_files) { + # Defaults to --transferfile {} + push @Global::transfer_files, $opt::i || $opt::I || "{}"; + } + $opt::cleanup = 1; + } + if(defined $opt::max_lines) { + if($opt::max_lines eq "-0") { + # -l -0 (swallowed -0) + $opt::max_lines = 1; + $opt::null = 1; + $/ = "\0"; + } else { + $opt::max_lines = multiply_binary_prefix($opt::max_lines); + if ($opt::max_lines == 0) { + # If not given (or if 0 is given) => 1 + $opt::max_lines = 1; + } + } + + $Global::max_lines = $opt::max_lines; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + + # Read more than one arg at a time (-L, -N) + if(defined $opt::L) { + $opt::L = multiply_binary_prefix($opt::L); + $Global::max_lines = $opt::L; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + if(defined $opt::max_replace_args) { + $opt::max_replace_args = + multiply_binary_prefix($opt::max_replace_args); + $Global::max_number_of_args = $opt::max_replace_args; + $Global::ContextReplace = 1; + } + if((defined $opt::L or defined $opt::max_replace_args) + and + not ($opt::xargs or $opt::m)) { + $Global::ContextReplace = 1; + } + if(grep /^$Global::arg_sep\+?$|^$Global::arg_file_sep\+?$/o, @ARGV) { + # Deal with ::: :::+ :::: and ::::+ + @ARGV = read_args_from_command_line(); + } + parse_semaphore(); + + if(defined $opt::eta) { $opt::progress = $opt::eta; } + if(defined $opt::bar) { $opt::progress = $opt::bar; } + if(defined $opt::bar or defined $opt::latestline) { + my $fh = $Global::status_fd || *STDERR; + eval q{ + # Enable utf8 if possible + use utf8; + binmode $fh, "encoding(utf8)"; + *decode_utf8 = \&Encode::decode_utf8; + }; + if(eval { decode_utf8("x") }) { + # Great: decode works + } else { + # UTF8-decode not supported: Dummy decode + eval q{sub decode_utf8($;$) { $_[0]; }}; + } + } + + # Funding a free software project is hard. GNU Parallel is no + # exception. On top of that it seems the less visible a project + # is, the harder it is to get funding. And the nature of GNU + # Parallel is that it will never be seen by "the guy with the + # checkbook", but only by the people doing the actual work. + # + # This problem has been covered by others - though no solution has + # been found: + # https://www.slideshare.net/NadiaEghbal/consider-the-maintainer + # https://www.numfocus.org/blog/why-is-numpy-only-now-getting-funded/ + # + # The FAQ tells you why the citation notice exists: + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # + # If you want GNU Parallel to be maintained in the future, and not + # just wither away like so many other free software tools, you + # need to help finance the development. + # + # The citation notice is a simple way of doing so, as citations + # makes it possible to me to get a job where I can maintain GNU + # Parallel as part of the job. + # + # This means you can help financing development + # + # WITHOUT PAYING A SINGLE CENT! + # + # Before implementing the citation notice it was discussed with + # the users: + # https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html + # + # Having to spend 10 seconds on running 'parallel --citation' once + # is no doubt not an ideal solution, but no one has so far come up + # with an ideal solution - neither for funding GNU Parallel nor + # other free software. + # + # If you believe you have the perfect solution, you should try it + # out, and if it works, you should post it on the email + # list. Ideas that will cost work and which have not been tested + # are, however, unlikely to be prioritized. + # + # Please note that GPL version 3 gives you the right to fork GNU + # Parallel under a new name, but it does not give you the right to + # distribute modified copies with the citation notice disabled in + # a way where the software can be confused with GNU Parallel. To + # do that you need to be the owner of the GNU Parallel + # trademark. The xt:Commerce case shows this. + # + # Description of the xt:Commerce case in OLG Duesseldorf + # https://web.archive.org/web/20180715073746/http://www.inta.org/INTABulletin/Pages/GERMANYGeneralPublicLicenseDoesNotPermitUseofThird-PartyTrademarksforAdvertisingModifiedVersionsofOpen-SourceSoftware.aspx + # + # The verdict in German + # https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09 + # https://web.archive.org/web/20180715073717/https://www.admody.com/urteilsdatenbank/cafe6fdaeed3/OLG-Duesseldorf_Urteil_vom_28-September-2010_Az_I-20-U-41-09 + # + # Other free software limiting derivates by the same name: + # https://en.wikipedia.org/wiki/Red_Hat_Enterprise_Linux_derivatives#Legal_aspects + # https://tm.joomla.org/trademark-faq.html + # https://www.mozilla.org/en-US/foundation/trademarks/faq/ + # + # Running 'parallel --citation' one single time takes less than 10 + # seconds, and will silence the citation notice for future + # runs. If that is too much trouble for you, why not use one of + # the alternatives instead? + # See a list in: 'man parallel_alternatives' + # + # If you want GNU Parallel to be maintained in the future you + # should keep this line. + citation_notice(); + # *YOU* will be harming free software by removing the notice. You + # accept to be added to a public hall of shame by removing the + # line. This is because _YOU_ actively make it harder to justify + # spending time developing GNU Parallel. + + parse_halt(); + + if($ENV{'PARALLEL_ENV'}) { + # Read environment and set $Global::parallel_env + # Must be done before is_acceptable_command_line_length() + my $penv = $ENV{'PARALLEL_ENV'}; + # unset $PARALLEL_ENV: It should not be given to children + # because it takes up a lot of env space + delete $ENV{'PARALLEL_ENV'}; + if(-e $penv) { + # This is a file/fifo: Replace envvar with content of file + open(my $parallel_env, "<", $penv) || + ::die_bug("Cannot read parallel_env from $penv"); + local $/; # Put <> in slurp mode + $penv = <$parallel_env>; + close $parallel_env; + } + # Map \001 to \n to make it easer to quote \n in $PARALLEL_ENV + $penv =~ s/\001/\n/g; + if($penv =~ /\0/) { + ::warning('\0 (NUL) in environment is not supported'); + } + $Global::parallel_env = $penv; + } + + parse_sshlogin(); + if(defined $opt::show_limits) { show_limits(); } + + if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { + # As we do not know the max line length on the remote machine + # long commands generated by xargs may fail + # If $opt::max_replace_args is set, it is probably safe + ::warning("Using -X or -m with --sshlogin may fail."); + } + + if(not defined $opt::jobs) { $opt::jobs = "100%"; } + open_joblog(); + open_json_csv(); + if($opt::sqlmaster or $opt::sqlworker) { + $Global::sql = SQL->new($opt::sqlmaster || $opt::sqlworker); + } + if($opt::sqlworker) { $Global::membuffer ||= 1; } + # The sqlmaster groups the arguments, so the should just read one + if($opt::sqlworker and not $opt::sqlmaster) { + $Global::max_number_of_args = 1; + } + if($Global::color or $opt::colorfailed) { Job::init_color(); } +} + +sub check_invalid_option_combinations() { + if(defined $opt::timeout and + $opt::timeout !~ /^\d+(\.\d+)?%?$|^(\d+(\.\d+)?[dhms])+$/i) { + ::error("--timeout must be seconds or percentage."); + wait_and_exit(255); + } + if(defined $opt::fifo and defined $opt::cat) { + ::error("--fifo cannot be combined with --cat."); + ::wait_and_exit(255); + } + if(defined $opt::retries and defined $opt::roundrobin) { + ::error("--retries cannot be combined with --roundrobin."); + ::wait_and_exit(255); + } + if(defined $opt::pipepart and + (defined $opt::L or defined $opt::max_lines + or defined $opt::max_replace_args)) { + ::error("--pipepart is incompatible with --max-replace-args, ". + "--max-lines, and -L."); + wait_and_exit(255); + } + if(defined $opt::group and $opt::ungroup) { + ::error("--group cannot be combined with --ungroup."); + ::wait_and_exit(255); + } + if(defined $opt::group and $opt::linebuffer) { + ::error("--group cannot be combined with --line-buffer."); + ::wait_and_exit(255); + } + if(defined $opt::ungroup and $opt::linebuffer) { + ::error("--ungroup cannot be combined with --line-buffer."); + ::wait_and_exit(255); + } + if(defined $opt::tollef and not $opt::gnu) { + ::error("--tollef has been retired.", + "Remove --tollef or use --gnu to override --tollef."); + ::wait_and_exit(255); + } + if(defined $opt::retired) { + ::error("-g has been retired. Use --group.", + "-B has been retired. Use --bf.", + "-T has been retired. Use --tty.", + "-U has been retired. Use --er.", + "-W has been retired. Use --wd.", + "-Y has been retired. Use --shebang.", + "-H has been retired. Use --halt.", + "--sql has been retired. Use --sqlmaster.", + "--ctrlc has been retired.", + "--noctrlc has been retired."); + ::wait_and_exit(255); + } + if($opt::groupby) { + if(not $opt::pipe and not $opt::pipepart) { + $opt::pipe = 1; + } + if($opt::remove_rec_sep) { + ::error("--remove-rec-sep is not compatible with --groupby"); + ::wait_and_exit(255); + } + if($opt::recstart) { + ::error("--recstart is not compatible with --groupby"); + ::wait_and_exit(255); + } + if($opt::recend ne "\n") { + ::error("--recend is not compatible with --groupby"); + ::wait_and_exit(255); + } + } +} + +sub init_globals() { + # Defaults: + $Global::version = 20221122; + $Global::progname = 'parallel'; + $::name = "GNU Parallel"; + $Global::infinity = 2**31; + $Global::debug = 0; + $Global::verbose = 0; + # Don't quote every part of the command line + $Global::quoting = 0; + # Quote replacement strings + $Global::quote_replace = 1; + $Global::total_completed = 0; + $Global::cache_replacement_eval = 1; + # Read only table with default --rpl values + %Global::replace = + ( + '{}' => '', + '{#}' => '1 $_=$job->seq()', + '{%}' => '1 $_=$job->slot()', + '{/}' => 's:.*/::', + '{//}' => + ('$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; '. + '$_ = dirname($_);'), + '{/.}' => 's:.*/::; s:\.[^/.]*$::;', + '{.}' => 's:\.[^/.]*$::', + ); + %Global::plus = + ( + # {} = {+/}/{/} + # = {.}.{+.} = {+/}/{/.}.{+.} + # = {..}.{+..} = {+/}/{/..}.{+..} + # = {...}.{+...} = {+/}/{/...}.{+...} + '{+/}' => 's:/[^/]*$:: || s:.*$::', + # a.b => b; a => '' + '{+.}' => 's:.*\.:: || s:.*$::', + # a.b.c => b.c; a.b => ''; a => '' + '{+..}' => 's:.*\.([^/.]*\.[^/.]*)$:$1: || s:.*$::', + '{+...}' => 's:.*\.([^/.]*\.[^/.]*\.[^/.]*)$:$1: || s:.*$::', + '{..}' => 's:\.[^/.]*\.[^/.]*$::', + '{...}' => 's:\.[^/.]*\.[^/.]*\.[^/.]*$::', + '{/..}' => 's:.*/::; s:\.[^/.]*\.[^/.]*$::', + '{/...}' => 's:.*/::; s:\.[^/.]*\.[^/.]*\.[^/.]*$::', + # n choose k = Binomial coefficient + '{choose_k}' => ('for $t (2..$#arg)'. + '{ if($arg[$t-1] ge $arg[$t]) { skip() } }'), + # unique values: Skip job if any args are the same + '{uniq}' => 'if(::uniq(@arg) != @arg) { skip(); }', + # {##} = number of jobs + '{##}' => '1 $_=total_jobs()', + # {0%} = 0-padded jobslot + '{0%}' => ('1 $f=1+int((log($Global::max_jobs_running||1)/log(10)));'. + '$_=sprintf("%0${f}d",slot())'), + # {0%} = 0-padded seq + '{0#}' => ('1 $f=1+int((log(total_jobs())/log(10)));'. + '$_=sprintf("%0${f}d",seq())'), + + ## Bash inspired replacement strings + # Bash ${a:-myval} + '{:-([^}]+?)}' => '$_ ||= $$1', + # Bash ${a:2} + '{:(\d+?)}' => 'substr($_,0,$$1) = ""', + # Bash ${a:2:3} + '{:(\d+?):(\d+?)}' => '$_ = substr($_,$$1,$$2);', + # echo {#z.*z.} ::: z.z.z.foo => z.foo + # echo {##z.*z.} ::: z.z.z.foo => foo + # Bash ${a#bc} + '{#([^#}][^}]*?)}' => + '$nongreedy=::make_regexp_ungreedy($$1);s/^$nongreedy(.*)/$1/;', + # Bash ${a##bc} + '{##([^#}][^}]*?)}' => 's/^$$1//;', + # echo {%.z.*z} ::: foo.z.z.z => foo.z + # echo {%%.z.*z} ::: foo.z.z.z => foo + # Bash ${a%def} + '{%([^}]+?)}' => + '$nongreedy=::make_regexp_ungreedy($$1);s/(.*)$nongreedy$/$1/;', + # Bash ${a%%def} + '{%%([^}]+?)}' => 's/$$1$//;', + # Bash ${a/def/ghi} ${a/def/} + '{/([^#%}/]+?)/([^}]*?)}' => 's/$$1/$$2/;', + # Bash ${a/#def/ghi} ${a/#def/} + '{/#([^}]+?)/([^}]*?)}' => 's/^$$1/$$2/g;', + # Bash ${a/%def/ghi} ${a/%def/} + '{/%([^}]+?)/([^}]*?)}' => 's/$$1$/$$2/g;', + # Bash ${a//def/ghi} ${a//def/} + '{//([^}]+?)/([^}]*?)}' => 's/$$1/$$2/g;', + # Bash ${a^a} + '{^([^}]+?)}' => 's/^($$1)/uc($1)/e;', + # Bash ${a^^a} + '{^^([^}]+?)}' => 's/($$1)/uc($1)/eg;', + # Bash ${a,A} + '{,([^}]+?)}' => 's/^($$1)/lc($1)/e;', + # Bash ${a,,A} + '{,,([^}]+?)}' => 's/($$1)/lc($1)/eg;', + + # {slot} = $PARALLEL_JOBSLOT + '{slot}' => '1 $_="\${PARALLEL_JOBSLOT}";uq()', + # {host} = ssh host + '{host}' => '1 $_="\${PARALLEL_SSHHOST}";uq()', + # {sshlogin} = sshlogin + '{sshlogin}' => '1 $_="\${PARALLEL_SSHLOGIN}";uq()', + # {hgrp} = hostgroups of the host + '{hgrp}' => '1 $_="\${PARALLEL_HOSTGROUPS}";uq()', + # {agrp} = hostgroups of the argument + '{agrp}' => '1 $_="\${PARALLEL_ARGHOSTGROUPS}";uq()', + ); + # Modifiable copy of %Global::replace + %Global::rpl = %Global::replace; + $/ = "\n"; + $Global::ignore_empty = 0; + $Global::interactive = 0; + $Global::stderr_verbose = 0; + $Global::default_simultaneous_sshlogins = 9; + $Global::exitstatus = 0; + $Global::arg_sep = ":::"; + $Global::arg_file_sep = "::::"; + $Global::trim = 'n'; + $Global::max_jobs_running = 0; + $Global::job_already_run = ''; + $ENV{'TMPDIR'} ||= "/tmp"; + $ENV{'PARALLEL_REMOTE_TMPDIR'} ||= "/tmp"; + $ENV{'OLDPWD'} = $ENV{'PWD'}; + if(not $ENV{HOME}) { + # $ENV{HOME} is sometimes not set if called from PHP + ::warning("\$HOME not set. Using /tmp."); + $ENV{HOME} = "/tmp"; + } + # no warnings to allow for undefined $XDG_* + no warnings 'uninitialized'; + # If $PARALLEL_HOME is set, but does not exist, try making it. + if(defined $ENV{'PARALLEL_HOME'}) { + eval { File::Path::mkpath($ENV{'PARALLEL_HOME'}); }; + } + # $xdg_config_home is needed to make env_parallel.fish stop complaining + my $xdg_config_home = $ENV{'XDG_CONFIG_HOME'}; + # config_dirs = $PARALLEL_HOME, $XDG_CONFIG_HOME/parallel, + # $(each XDG_CONFIG_DIRS)/parallel, $HOME/.parallel + # Keep only dirs that exist + @Global::config_dirs = + (grep { -d $_ } + $ENV{'PARALLEL_HOME'}, + (map { "$_/parallel" } + $xdg_config_home, + split /:/, $ENV{'XDG_CONFIG_DIRS'}), + $ENV{'HOME'} . "/.parallel"); + # Use first dir as config dir + $Global::config_dir = $Global::config_dirs[0] || + $ENV{'HOME'} . "/.parallel"; + if($ENV{'PARALLEL_HOME'} =~ /./ and not -d $ENV{'PARALLEL_HOME'}) { + ::warning("\$PARALLEL_HOME ($ENV{'PARALLEL_HOME'}) does not exist."); + ::warning("Using $Global::config_dir"); + } + # cache_dirs = $PARALLEL_HOME, $XDG_CACHE_HOME/parallel, + # Keep only dirs that exist + @Global::cache_dirs = + (grep { -d $_ } + $ENV{'PARALLEL_HOME'}, $ENV{'XDG_CACHE_HOME'}."/parallel"); + $Global::cache_dir = $Global::cache_dirs[0] || + $ENV{'HOME'} . "/.parallel"; + Job::init_color(); +} + +sub parse_halt() { + # $opt::halt flavours + # Uses: + # $opt::halt + # $Global::halt_when + # $Global::halt_fail + # $Global::halt_success + # $Global::halt_pct + # $Global::halt_count + if(defined $opt::halt) { + my %halt_expansion = ( + "0" => "never", + "1" => "soon,fail=1", + "2" => "now,fail=1", + "-1" => "soon,success=1", + "-2" => "now,success=1", + ); + # Expand -2,-1,0,1,2 into long form + $opt::halt = $halt_expansion{$opt::halt} || $opt::halt; + # --halt 5% == --halt soon,fail=5% + $opt::halt =~ s/^(\d+)%$/soon,fail=$1%/; + # Split: soon,fail=5% + my ($when,$fail_success,$pct_count) = split /[,=]/, $opt::halt; + if(not grep { $when eq $_ } qw(never soon now)) { + ::error("--halt must have 'never', 'soon', or 'now'."); + ::wait_and_exit(255); + } + $Global::halt_when = $when; + if($when ne "never") { + if($fail_success eq "fail") { + $Global::halt_fail = 1; + } elsif($fail_success eq "success") { + $Global::halt_success = 1; + } elsif($fail_success eq "done") { + $Global::halt_done = 1; + } else { + ::error("--halt $when must be followed by ,success or ,fail."); + ::wait_and_exit(255); + } + if($pct_count =~ /^(\d+)%$/) { + $Global::halt_pct = $1/100; + } elsif($pct_count =~ /^(\d+)$/) { + $Global::halt_count = $1; + } else { + ::error("--halt $when,$fail_success ". + "must be followed by ,number or ,percent%."); + ::wait_and_exit(255); + } + } + } +} + +sub parse_replacement_string_options() { + # Deal with --rpl + # Uses: + # %Global::rpl + # $Global::parensleft + # $Global::parensright + # $opt::parens + # $Global::parensleft + # $Global::parensright + # $opt::plus + # %Global::plus + # $opt::I + # $opt::U + # $opt::i + # $opt::basenamereplace + # $opt::dirnamereplace + # $opt::seqreplace + # $opt::slotreplace + # $opt::basenameextensionreplace + + sub rpl($$) { + # Modify %Global::rpl + # Replace $old with $new + my ($old,$new) = @_; + if($old ne $new) { + $Global::rpl{$new} = $Global::rpl{$old}; + delete $Global::rpl{$old}; + } + } + my $parens = "{==}"; + if(defined $opt::parens) { $parens = $opt::parens; } + my $parenslen = 0.5*length $parens; + $Global::parensleft = substr($parens,0,$parenslen); + $Global::parensright = substr($parens,$parenslen); + if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } + if(defined $opt::I) { rpl('{}',$opt::I); } + if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } + if(defined $opt::U) { rpl('{.}',$opt::U); } + if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } + if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } + if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } + if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } + if(defined $opt::basenameextensionreplace) { + rpl('{/.}',$opt::basenameextensionreplace); + } + for(@opt::rpl) { + # Create $Global::rpl entries for --rpl options + # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" + my ($shorthand,$long) = split/\s/,$_,2; + $Global::rpl{$shorthand} = $long; + } +} + +sub parse_semaphore() { + # Semaphore defaults + # Must be done before computing number of processes and max_line_length + # because when running as a semaphore GNU Parallel does not read args + # Uses: + # $opt::semaphore + # $Global::semaphore + # $opt::semaphoretimeout + # $Semaphore::timeout + # $opt::semaphorename + # $Semaphore::name + # $opt::fg + # $Semaphore::fg + # $opt::wait + # $Semaphore::wait + # $opt::bg + # @opt::a + # @Global::unget_argv + # $Global::default_simultaneous_sshlogins + # $opt::jobs + # $Global::interactive + $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' + if(defined $opt::semaphore) { $Global::semaphore = 1; } + if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } + if(defined $opt::semaphorename) { $Global::semaphore = 1; } + if(defined $opt::fg and not $opt::tmux and not $opt::tmuxpane) { + $Global::semaphore = 1; + } + if(defined $opt::bg) { $Global::semaphore = 1; } + if(defined $opt::wait and not $opt::sqlmaster) { + $Global::semaphore = 1; @ARGV = "true"; + } + if($Global::semaphore) { + if(@opt::a) { + # Assign the first -a to STDIN + open(STDIN,"<",shift @opt::a); + if(@opt::a) { + # We currently have no way of dealing with more -a + ::error("A semaphore cannot take input from more files\n"); + ::wait_and_exit(255); + } + } + @opt::a = ("/dev/null"); + # Append a dummy empty argument + # \0 => nothing (not the empty string) + push(@Global::unget_argv, [Arg->new("\0noarg")]); + $Semaphore::timeout = int(multiply_time_units($opt::semaphoretimeout)) + || 0; + if(defined $opt::semaphorename) { + $Semaphore::name = $opt::semaphorename; + } else { + local $/ = "\n"; + $Semaphore::name = `tty`; + chomp $Semaphore::name; + } + $Semaphore::fg = $opt::fg; + $Semaphore::wait = $opt::wait; + $Global::default_simultaneous_sshlogins = 1; + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if($Global::interactive and $opt::bg) { + ::error("Jobs running in the ". + "background cannot be interactive."); + ::wait_and_exit(255); + } + } +} + +sub record_env() { + # Record current %ENV-keys in $PARALLEL_HOME/ignored_vars + # Returns: N/A + my $ignore_filename = $Global::config_dir . "/ignored_vars"; + if(open(my $vars_fh, ">", $ignore_filename)) { + print $vars_fh map { $_,"\n" } keys %ENV; + } else { + ::error("Cannot write to $ignore_filename."); + ::wait_and_exit(255); + } +} + +sub open_joblog() { + # Open joblog as specified by --joblog + # Uses: + # $opt::resume + # $opt::resume_failed + # $opt::joblog + # $opt::results + # $Global::job_already_run + # %Global::fh + my $append = 0; + if(($opt::resume or $opt::resume_failed) + and + not ($opt::joblog or $opt::results)) { + ::error("--resume and --resume-failed require --joblog or --results."); + ::wait_and_exit(255); + } + if(defined $opt::joblog and $opt::joblog =~ s/^\+//) { + # --joblog +filename = append to filename + $append = 1; + } + if($opt::joblog + and + ($opt::sqlmaster + or + not $opt::sqlworker)) { + # Do not log if --sqlworker + if($opt::resume || $opt::resume_failed || $opt::retry_failed) { + if(open(my $joblog_fh, "<", $opt::joblog)) { + # Read the joblog + # Override $/ with \n because -d might be set + local $/ = "\n"; + # If there is a header: Open as append later + $append = <$joblog_fh>; + my $joblog_regexp; + if($opt::retry_failed) { + # Make a regexp that matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + my @group; + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + $group[$1-1] = "true"; + } elsif(/(\d+)\s+\S+(\s+[-0-9.]+){6}\s+(.*)$/) { + # Grab out the command + $group[$1-1] = $3; + } else { + chomp; + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + if(@group) { + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + if(grep /\0/, @group) { + # force --null to deal with \n in commandlines + ::warning("Command lines contain newline. ". + "Forcing --null."); + $opt::null = 1; + $/ = "\0"; + } + # Replace \0 with '\n' as used in print_joblog() + print $outfh (map { s/\0/\n/g; $_,$/ } + map { $_ } @group); + seek $outfh, 0, 0; + exit_if_disk_full(); + # Set filehandle to -a + @opt::a = ($outfh); + } + # Remove $command (so -a is run) + @ARGV = (); + } + if($opt::resume || $opt::resume_failed) { + if($opt::resume_failed) { + # Make a regexp that matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + } else { + # Just match the job number + $joblog_regexp='^(\d+)'; + } + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + $Global::total_completed++; + } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) { + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + } + close $joblog_fh; + } + # $opt::null may be set if the commands contain \n + if($opt::null) { $/ = "\0"; } + } + if($opt::dryrun) { + # Do not write to joblog in a dry-run + if(not open($Global::joblog, ">", "/dev/null")) { + ::error("Cannot write to --joblog $opt::joblog."); + ::wait_and_exit(255); + } + } elsif($append) { + # Append to joblog + if(not open($Global::joblog, ">>", $opt::joblog)) { + ::error("Cannot append to --joblog $opt::joblog."); + ::wait_and_exit(255); + } + } else { + if($opt::joblog eq "-") { + # Use STDOUT as joblog + $Global::joblog = $Global::fh{1}; + } elsif(not open($Global::joblog, ">", $opt::joblog)) { + # Overwrite the joblog + ::error("Cannot write to --joblog $opt::joblog."); + ::wait_and_exit(255); + } + print $Global::joblog + join("\t", "Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command" + ). "\n"; + } + } +} + +sub open_json_csv() { + if($opt::results) { + # Output as JSON/CSV/TSV + if($opt::results eq "-.csv" + or + $opt::results eq "-.tsv" + or + $opt::results eq "-.json") { + # Output as JSON/CSV/TSV on stdout + open $Global::csv_fh, ">&", "STDOUT" or + ::die_bug("Can't dup STDOUT in csv: $!"); + # Do not print any other output to STDOUT + # by forcing all other output to /dev/null + open my $fd, ">", "/dev/null" or + ::die_bug("Can't >/dev/null in csv: $!"); + $Global::fh{1} = $fd; + $Global::fh{2} = $fd; + } elsif($Global::csvsep or $Global::jsonout) { + if(not open($Global::csv_fh,">",$opt::results)) { + ::error("Cannot open results file `$opt::results': ". + "$!."); + wait_and_exit(255); + } + } + } +} + +sub find_compression_program() { + # Find a fast compression program + # Returns: + # $compress_program = compress program with options + # $decompress_program = decompress program with options + + # Search for these. Sorted by speed on 128 core + + # seq 120000000|shuf > 1gb & + # apt-get update + # apt install make g++ htop + # wget -O - pi.dk/3 | bash + # apt install zstd clzip liblz4-tool lzop pigz pxz gzip plzip pbzip2 lzma xz-utils lzip bzip2 lbzip2 lrzip pixz + # git clone https://github.com/facebook/zstd.git + # (cd zstd/contrib/pzstd; make -j; cp pzstd /usr/local/bin) + # echo 'lrzip -L $((-$1))' >/usr/local/bin/lrz + # chmod +x /usr/local/bin/lrz + # wait + # onethread="zstd clzip lz4 lzop gzip lzma xz bzip2" + # multithread="pzstd pigz pxz plzip pbzip2 lzip lbzip2 lrz pixz" + # parallel --shuf -j1 --joblog jl-m --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $multithread + # parallel --shuf -j50% --delay 1 --joblog jl-s --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: 1gb '>'/dev/null , 1 2 3 , {1..3} , $onethread + # sort -nk4 jl-? + + # 1-core: + # 2-cores: pzstd zstd lz4 lzop pigz gzip lbzip2 pbzip2 lrz bzip2 lzma pxz plzip xz lzip clzip + # 4-cores: + # 8-cores: pzstd lz4 zstd pigz lzop lbzip2 pbzip2 gzip lzip lrz plzip pxz bzip2 lzma xz clzip + # 16-cores: pzstd lz4 pigz lzop lbzip2 pbzip2 plzip lzip lrz pxz gzip lzma xz bzip2 + # 32-cores: pzstd lbzip2 pbzip2 zstd pigz lz4 lzop plzip lzip lrz gzip pxz lzma bzip2 xz clzip + # 64-cores: pzstd lbzip2 pbzip2 pigz zstd pixz lz4 plzip lzop lzip lrz gzip pxz lzma bzip2 xz clzip + # 128-core: pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip lrz pxz bzip2 lzma xz clzip + + my @prg = qw(pzstd lbzip2 pbzip2 zstd pixz lz4 pigz lzop plzip lzip gzip + lrz pxz bzip2 lzma xz clzip); + for my $p (@prg) { + if(which($p)) { + return ("$p -c -1","$p -dc"); + } + } + # Fall back to cat + return ("cat","cat"); +} + +sub read_options() { + # Read options from command line, profile and $PARALLEL + # Uses: + # $opt::shebang_wrap + # $opt::shebang + # @ARGV + # $opt::plain + # @opt::profile + # $ENV{'HOME'} + # $ENV{'PARALLEL'} + # Returns: + # @ARGV_no_opt = @ARGV without --options + + # This must be done first as this may exec myself + if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or + $ARGV[0] =~ /^--shebang-?wrap/ or + $ARGV[0] =~ /^--hashbang/)) { + # Program is called from #! line in script + # remove --shebang-wrap if it is set + $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); + # remove --shebang if it is set + $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); + # remove --hashbang if it is set + $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); + if($opt::shebang) { + my $argfile = Q(pop @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --skip-first-line -a $argfile @ARGV"; + } + if($opt::shebang_wrap) { + my @options; + my @parser; + if ($^O eq 'freebsd') { + # FreeBSD's #! puts different values in @ARGV than Linux' does + my @nooptions = @ARGV; + get_options_from_array(\@nooptions); + while($#ARGV > $#nooptions) { + push @options, shift @ARGV; + } + while(@ARGV and $ARGV[0] ne ":::") { + push @parser, shift @ARGV; + } + if(@ARGV and $ARGV[0] eq ":::") { + shift @ARGV; + } + } else { + @options = shift @ARGV; + } + my $script = Q(shift @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --_pipe-means-argfiles @options @parser $script ". + "::: @ARGV"; + } + } + if($ARGV[0] =~ / --shebang(-?wrap)? /) { + ::warning("--shebang and --shebang-wrap must be the first ". + "argument.\n"); + } + + Getopt::Long::Configure("bundling","require_order"); + my @ARGV_copy = @ARGV; + my @ARGV_orig = @ARGV; + # Check if there is a --profile to set @opt::profile + get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); + my @ARGV_profile = (); + my @ARGV_env = (); + if(not $opt::plain) { + # Add options from $PARALLEL_HOME/config and other profiles + my @config_profiles = ( + "/etc/parallel/config", + (map { "$_/config" } @Global::config_dirs), + $ENV{'HOME'}."/.parallelrc"); + my @profiles = @config_profiles; + if(@opt::profile) { + # --profile overrides default profiles + @profiles = (); + for my $profile (@opt::profile) { + if($profile =~ m:^\./|^/:) { + # Look for ./profile in . + # Look for /profile in / + push @profiles, grep { -r $_ } $profile; + } else { + # Look for the $profile in @Global::config_dirs + push @profiles, grep { -r $_ } + map { "$_/$profile" } @Global::config_dirs; + } + } + } + for my $profile (@profiles) { + if(-r $profile) { + ::debug("init","Read $profile\n"); + local $/ = "\n"; + open (my $in_fh, "<", $profile) || + ::die_bug("read-profile: $profile"); + while(<$in_fh>) { + /^\s*\#/ and next; + chomp; + push @ARGV_profile, shell_words($_); + } + close $in_fh; + } else { + if(grep /^\Q$profile\E$/, @config_profiles) { + # config file is not required to exist + } else { + ::error("$profile not readable."); + wait_and_exit(255); + } + } + } + # Add options from shell variable $PARALLEL + if($ENV{'PARALLEL'}) { + push @ARGV_env, shell_words($ENV{'PARALLEL'}); + } + # Add options from env_parallel.csh via $PARALLEL_CSH + if($ENV{'PARALLEL_CSH'}) { + push @ARGV_env, shell_words($ENV{'PARALLEL_CSH'}); + } + } + Getopt::Long::Configure("bundling","require_order"); + get_options_from_array(\@ARGV_profile) || die_usage(); + get_options_from_array(\@ARGV_env) || die_usage(); + get_options_from_array(\@ARGV) || die_usage(); + # What were the options given on the command line? + # Used to start --sqlworker + my $ai = arrayindex(\@ARGV_orig, \@ARGV); + @Global::options_in_argv = @ARGV_orig[0..$ai-1]; + # Prepend non-options to @ARGV (such as commands like 'nice') + unshift @ARGV, @ARGV_profile, @ARGV_env; + return @ARGV; +} + +sub arrayindex() { + # Similar to Perl's index function, but for arrays + # Input: + # $arr_ref1 = ref to @array1 to search in + # $arr_ref2 = ref to @array2 to search for + # Returns: + # $pos = position of @array1 in @array2, -1 if not found + my ($arr_ref1,$arr_ref2) = @_; + my $array1_as_string = join "", map { "\0".$_ } @$arr_ref1; + my $array2_as_string = join "", map { "\0".$_ } @$arr_ref2; + my $i = index($array1_as_string,$array2_as_string,0); + if($i == -1) { return -1 } + my @before = split /\0/, substr($array1_as_string,0,$i); + return $#before; +} + +sub read_args_from_command_line() { + # Arguments given on the command line after: + # ::: ($Global::arg_sep) + # :::: ($Global::arg_file_sep) + # :::+ ($Global::arg_sep with --link) + # ::::+ ($Global::arg_file_sep with --link) + # Removes the arguments from @ARGV and: + # - puts filenames into -a + # - puts arguments into files and add the files to -a + # - adds --linkinputsource with 0/1 for each -a depending on :::+/::::+ + # Input: + # @::ARGV = command option ::: arg arg arg :::: argfiles + # Uses: + # $Global::arg_sep + # $Global::arg_file_sep + # $opt::_pipe_means_argfiles + # $opt::pipe + # @opt::a + # Returns: + # @argv_no_argsep = @::ARGV without ::: and :::: and following args + my @new_argv = (); + for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_sep."+" + or + $arg eq $Global::arg_file_sep + or + $arg eq $Global::arg_file_sep."+") { + my $group_sep = $arg; # This group of args is args or argfiles + my @group; + while(defined ($arg = shift @ARGV)) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_sep."+" + or + $arg eq $Global::arg_file_sep + or + $arg eq $Global::arg_file_sep."+") { + # exit while loop if finding new separator + last; + } else { + # If not hitting ::: :::+ :::: or ::::+ + # Append it to the group + push @group, $arg; + } + } + my $is_linked = ($group_sep =~ /\+$/) ? 1 : 0; + my $is_file = ($group_sep eq $Global::arg_file_sep + or + $group_sep eq $Global::arg_file_sep."+"); + if($is_file) { + # :::: / ::::+ + push @opt::linkinputsource, map { $is_linked } @group; + } else { + # ::: / :::+ + push @opt::linkinputsource, $is_linked; + } + if($is_file + or ($opt::_pipe_means_argfiles and $opt::pipe) + ) { + # Group of file names on the command line. + # Append args into -a + push @opt::a, @group; + } else { + # Group of arguments on the command line. + # Put them into a file. + # Create argfile + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + print $outfh map { $_,$/ } @group; + seek $outfh, 0, 0; + exit_if_disk_full(); + # Append filehandle to -a + push @opt::a, $outfh; + } + if(defined($arg)) { + # $arg is ::: :::+ :::: or ::::+ + # so there is another group + redo; + } else { + # $arg is undef -> @ARGV empty + last; + } + } + push @new_argv, $arg; + } + # Output: @ARGV = command to run with options + return @new_argv; +} + +sub cleanup() { + # Returns: N/A + unlink keys %Global::unlink; + map { rmdir $_ } keys %Global::unlink; + if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } + for(keys %Global::sshmaster) { + # If 'ssh -M's are running: kill them + kill "TERM", $_; + } +} + + +sub __QUOTING_ARGUMENTS_FOR_SHELL__() {} + +sub shell_quote(@) { + # Input: + # @strings = strings to be quoted + # Returns: + # @shell_quoted_strings = string quoted as needed by the shell + return wantarray ? (map { Q($_) } @_) : (join" ",map { Q($_) } @_); +} + +sub shell_quote_scalar_rc($) { + # Quote for the rc-shell + my $a = $_[0]; + if(defined $a) { + if(($a =~ s/'/''/g) + + + ($a =~ s/[\n\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]+/'$&'/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_csh($) { + # Quote for (t)csh + my $a = $_[0]; + if(defined $a) { + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + if(($a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\<\=\>\~\|\; \"\!\$\&\'\202-\377]/\\$&/go) + + + # quote newline in csh as \\\n + ($a =~ s/[\n]/"\\\n"/go)) { + # A string was replaced + # No need to test for "" or \0 + } elsif($a eq "") { + $a = "''"; + } elsif($a eq "\0") { + $a = ""; + } + } + return $a; +} + +sub shell_quote_scalar_default($) { + # Quote for other shells (Bourne compatibles) + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + local $_ = $_[0]; + if(/[^-_.+a-z0-9\/]/i) { + s/'/'"'"'/g; # "-quote '-quotes + $_ = "'$_'"; # '-quote entire string + s/^''//; # Remove unneeded '' at ends + s/''$//; # (faster than s/^''|''$//g) + return $_; + } elsif ($_ eq "") { + return "''"; + } else { + # No quoting needed + return $_; + } +} + +sub shell_quote_scalar($) { + # Quote the string so the shell will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + + # Speed optimization: Choose the correct shell_quote_scalar_* + # and call that directly from now on + no warnings 'redefine'; + if($Global::cshell) { + # (t)csh + *shell_quote_scalar = \&shell_quote_scalar_csh; + } elsif($Global::shell =~ m:(^|/)rc$:) { + # rc-shell + *shell_quote_scalar = \&shell_quote_scalar_rc; + } else { + # other shells + *shell_quote_scalar = \&shell_quote_scalar_default; + } + # The sub is now redefined. Call it + return shell_quote_scalar($_[0]); +} + +sub Q($) { + # Q alias for ::shell_quote_scalar + my $ret = shell_quote_scalar($_[0]); + no warnings 'redefine'; + *Q = \&::shell_quote_scalar; + return $ret; +} + +sub shell_quote_file($) { + # Quote the string so shell will not expand any special chars + # and prepend ./ if needed + # Input: + # $filename = filename to be shell quoted + # Returns: + # $quoted_filename = filename quoted with \ and ./ if needed + my $a = shift; + if(defined $a) { + if($a =~ m:^/: or $a =~ m:^\./:) { + # /abs/path or ./rel/path => skip + } else { + # rel/path => ./rel/path + $a = "./".$a; + } + } + return Q($a); +} + +sub shell_words(@) { + # Input: + # $string = shell line + # Returns: + # @shell_words = $string split into words as shell would do + $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; + return Text::ParseWords::shellwords(@_); +} + +sub perl_quote_scalar($) { + # Quote the string so perl's eval will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $perl_quoted = string quoted with \ as needed by perl's eval + my $a = $_[0]; + if(defined $a) { + $a =~ s/[\\\"\$\@]/\\$&/go; + } + return $a; +} + +# -w complains about prototype +sub pQ($) { + # pQ alias for ::perl_quote_scalar + my $ret = perl_quote_scalar($_[0]); + *pQ = \&::perl_quote_scalar; + return $ret; +} + +sub unquote_printf() { + # Convert \t \n \r \000 \0 + # Inputs: + # $string = string with \t \n \r \num \0 + # Returns: + # $replaced = string with TAB NEWLINE CR <ascii-num> NUL + $_ = shift; + s/\\t/\t/g; + s/\\n/\n/g; + s/\\r/\r/g; + s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge; + s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge; + return $_; +} + + +sub __FILEHANDLES__() {} + + +sub save_stdin_stdout_stderr() { + # Remember the original STDIN, STDOUT and STDERR + # and file descriptors opened by the shell (e.g. 3>/tmp/foo) + # Uses: + # %Global::fh + # $Global::original_stderr + # $Global::original_stdin + # Returns: N/A + + # TODO Disabled until we have an open3 that will take n filehandles + # for my $fdno (1..61) { + # # /dev/fd/62 and above are used by bash for <(cmd) + # # Find file descriptors that are already opened (by the shell) + # Only focus on stdout+stderr for now + for my $fdno (1..2) { + my $fh; + # 2-argument-open is used to be compatible with old perl 5.8.0 + # bug #43570: Perl 5.8.0 creates 61 files + if(open($fh,">&=$fdno")) { + $Global::fh{$fdno}=$fh; + } + } + open $Global::original_stderr, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::status_fd, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::original_stdin, "<&", "STDIN" or + ::die_bug("Can't dup STDIN: $!"); +} + +sub enough_file_handles() { + # Check that we have enough filehandles available for starting + # another job + # Uses: + # $opt::ungroup + # %Global::fh + # Returns: + # 1 if ungrouped (thus not needing extra filehandles) + # 0 if too few filehandles + # 1 if enough filehandles + if(not $opt::ungroup) { + my %fh; + my $enough_filehandles = 1; + # perl uses 7 filehandles for something? + # open3 uses 2 extra filehandles temporarily + # We need a filehandle for each redirected file descriptor + # (normally just STDOUT and STDERR) + for my $i (1..(7+2+keys %Global::fh)) { + $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); + } + for (values %fh) { close $_; } + return $enough_filehandles; + } else { + # Ungrouped does not need extra file handles + return 1; + } +} + +sub open_or_exit($) { + # Open a file name or exit if the file cannot be opened + # Inputs: + # $file = filehandle or filename to open + # Uses: + # $Global::original_stdin + # Returns: + # $fh = file handle to read-opened file + my $file = shift; + if($file eq "-") { + return ($Global::original_stdin || *STDIN); + } + if(ref $file eq "GLOB") { + # This is an open filehandle + return $file; + } + my $fh = gensym; + if(not open($fh, "<", $file)) { + ::error("Cannot open input file `$file': No such file or directory."); + wait_and_exit(255); + } + return $fh; +} + +sub set_fh_blocking($) { + # Set filehandle as blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + # Get the current flags on the filehandle + fcntl($fh, &F_GETFL, $flags) || die $!; + # Remove non-blocking from the flags + $flags &= ~&O_NONBLOCK; + # Set the flags on the filehandle + fcntl($fh, &F_SETFL, $flags) || die $!; +} + +sub set_fh_non_blocking($) { + # Set filehandle as non-blocking + # Inputs: + # $fh = filehandle to be blocking + # Returns: + # N/A + my $fh = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $flags; + # Get the current flags on the filehandle + fcntl($fh, &F_GETFL, $flags) || die $!; + # Add non-blocking to the flags + $flags |= &O_NONBLOCK; + # Set the flags on the filehandle + fcntl($fh, &F_SETFL, $flags) || die $!; +} + + +sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__() {} + + +# Variable structure: +# +# $Global::running{$pid} = Pointer to Job-object +# @Global::virgin_jobs = Pointer to Job-object that have received no input +# $Global::host{$sshlogin} = Pointer to SSHLogin-object +# $Global::total_running = total number of running jobs +# $Global::total_started = total jobs started +# $Global::max_procs_file = filename if --jobs is given a filename +# $Global::JobQueue = JobQueue object for the queue of jobs +# $Global::timeoutq = queue of times where jobs timeout +# $Global::newest_job = Job object of the most recent job started +# $Global::newest_starttime = timestamp of $Global::newest_job +# @Global::sshlogin +# $Global::minimal_command_line_length = min len supported by all sshlogins +# $Global::start_no_new_jobs = should more jobs be started? +# $Global::original_stderr = file handle for STDERR when the program started +# $Global::total_started = total number of jobs started +# $Global::joblog = filehandle of joblog +# $Global::debug = Is debugging on? +# $Global::exitstatus = status code of GNU Parallel +# $Global::quoting = quote the command to run + +sub init_run_jobs() { + # Set Global variables and progress signal handlers + # Do the copying of basefiles + # Returns: N/A + $Global::total_running = 0; + $Global::total_started = 0; + $SIG{USR1} = \&list_running_jobs; + $SIG{USR2} = \&toggle_progress; + if(@opt::basefile) { setup_basefile(); } +} + +{ + my $last_time; + my %last_mtime; + my $max_procs_file_last_mod; + + sub changed_procs_file { + # If --jobs is a file and it is modfied: + # Force recomputing of max_jobs_running for each $sshlogin + # Uses: + # $Global::max_procs_file + # %Global::host + # Returns: N/A + if($Global::max_procs_file) { + # --jobs filename + my $mtime = (stat($Global::max_procs_file))[9]; + $max_procs_file_last_mod ||= 0; + if($mtime > $max_procs_file_last_mod) { + # file changed: Force re-computing max_jobs_running + $max_procs_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_jobs_running(undef); + } + } + } + } + + sub changed_sshloginfile { + # If --slf is changed: + # reload --slf + # filter_hosts + # setup_basefile + # Uses: + # @opt::sshloginfile + # @Global::sshlogin + # %Global::host + # $opt::filter_hosts + # Returns: N/A + if(@opt::sshloginfile) { + # Is --sshloginfile changed? + for my $slf (@opt::sshloginfile) { + my $actual_file = expand_slf_shorthand($slf); + my $mtime = (stat($actual_file))[9]; + $last_mtime{$actual_file} ||= $mtime; + if($mtime - $last_mtime{$actual_file} > 1) { + ::debug("run", + "--sshloginfile $actual_file changed. reload\n"); + $last_mtime{$actual_file} = $mtime; + # Reload $slf + # Empty sshlogins + @Global::sshlogin = (); + for (values %Global::host) { + # Don't start new jobs on any host + # except the ones added back later + $_->set_max_jobs_running(0); + } + # This will set max_jobs_running on the SSHlogins + read_sshloginfile($actual_file); + parse_sshlogin(); + $opt::filter_hosts and filter_hosts(); + setup_basefile(); + } + } + } + } + + sub start_more_jobs { + # Run start_another_job() but only if: + # * not $Global::start_no_new_jobs set + # * not JobQueue is empty + # * not load on server is too high + # * not server swapping + # * not too short time since last remote login + # Uses: + # %Global::host + # $Global::start_no_new_jobs + # $Global::JobQueue + # $opt::pipe + # $opt::load + # $opt::noswap + # $opt::delay + # $Global::newest_starttime + # Returns: + # $jobs_started = number of jobs started + my $jobs_started = 0; + if($Global::start_no_new_jobs) { + return $jobs_started; + } + if(time - ($last_time||0) > 1) { + # At most do this every second + $last_time = time; + changed_procs_file(); + changed_sshloginfile(); + } + # This will start 1 job on each --sshlogin (if possible) + # thus distribute the jobs on the --sshlogins round robin + for my $sshlogin (values %Global::host) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more jobs in the queue + last; + } + debug("run", "Running jobs before on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), "\n"); + if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { + if($opt::delay + and + $opt::delay-0.008 > ::now()-$Global::newest_starttime) { + # It has been too short since last start + next; + } + if($opt::load and $sshlogin->loadavg_too_high()) { + # The load is too high or unknown + next; + } + if($opt::noswap and $sshlogin->swapping()) { + # The server is swapping + next; + } + if($opt::limit and $sshlogin->limit()) { + # Over limit + next; + } + if(($opt::memfree or $opt::memsuspend) + and + $sshlogin->memfree() < $Global::memlimit) { + # The server has not enough mem free + ::debug("mem", "Not starting job: not enough mem\n"); + next; + } + if($sshlogin->too_fast_remote_login()) { + # It has been too short since last login + next; + } + debug("run", $sshlogin->string(), + " has ", $sshlogin->jobs_running(), + " out of ", $sshlogin->max_jobs_running(), + " jobs running. Start another.\n"); + if(start_another_job($sshlogin) == 0) { + # No more jobs to start on this $sshlogin + debug("run","No jobs started on ", + $sshlogin->string(), "\n"); + next; + } + $sshlogin->inc_jobs_running(); + $sshlogin->set_last_login_at(::now()); + $jobs_started++; + } + debug("run","Running jobs after on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), " of ", + $sshlogin->max_jobs_running(), "\n"); + } + + return $jobs_started; + } +} + +{ + my $no_more_file_handles_warned; + + sub start_another_job() { + # If there are enough filehandles + # and JobQueue not empty + # and not $job is in joblog + # Then grab a job from Global::JobQueue, + # start it at sshlogin + # mark it as virgin_job + # Inputs: + # $sshlogin = the SSHLogin to start the job on + # Uses: + # $Global::JobQueue + # $opt::pipe + # $opt::results + # $opt::resume + # @Global::virgin_jobs + # Returns: + # 1 if another jobs was started + # 0 otherwise + my $sshlogin = shift; + # Do we have enough file handles to start another job? + if(enough_file_handles()) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more commands to run + debug("start", "Not starting: JobQueue empty\n"); + return 0; + } else { + my $job; + # Skip jobs already in job log + # Skip jobs already in results + do { + $job = get_job_with_sshlogin($sshlogin); + if(not defined $job) { + # No command available for that sshlogin + debug("start", "Not starting: no jobs available for ", + $sshlogin->string(), "\n"); + return 0; + } + if($job->is_already_in_joblog()) { + $job->free_slot(); + } + } while ($job->is_already_in_joblog() + or + ($opt::results and $opt::resume + and $job->is_already_in_results())); + debug("start", "Command to run on '", + $job->sshlogin()->string(), "': '", + $job->replaced(),"'\n"); + if($job->start()) { + if($opt::pipe) { + if($job->virgin()) { + push(@Global::virgin_jobs,$job); + } else { + # Block already set: This is a retry + $job->write_block(); + } + } + debug("start", "Started as seq ", $job->seq(), + " pid:", $job->pid(), "\n"); + return 1; + } else { + # Not enough processes to run the job. + # Put it back on the queue. + $Global::JobQueue->unget($job); + # Count down the number of jobs to run for this SSHLogin. + my $max = $sshlogin->max_jobs_running(); + if($max > 1) { $max--; } else { + my @arg; + for my $record (@{$job->{'commandline'}{'arg_list'}}) { + push @arg, map { $_->orig() } @$record; + } + ::error("No more processes: cannot run a single job. ". + "Something is wrong at @arg."); + ::wait_and_exit(255); + } + $sshlogin->set_max_jobs_running($max); + # Sleep up to 300 ms to give other processes time to die + ::usleep(rand()*300); + ::warning("No more processes: ". + "Decreasing number of running jobs to $max.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + return 0; + } + } + } else { + # No more file handles + $no_more_file_handles_warned++ or + ::warning("No more file handles. ", + "Try running 'parallel -j0 -N 100 --pipe parallel -j0'", + "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)", + "or increasing 'nofile' in /etc/security/limits.conf", + "or increasing /proc/sys/fs/file-max"); + debug("start", "No more file handles. "); + return 0; + } + } +} + +sub init_progress() { + # Uses: + # $opt::bar + # Returns: + # list of computers for progress output + $|=1; + if($opt::bar) { + return("",""); + } + my %progress = progress(); + return ("\nComputers / CPU cores / Max jobs to run\n", + $progress{'workerlist'}); +} + +sub drain_job_queue(@) { + # Uses: + # $opt::progress + # $Global::total_running + # $Global::max_jobs_running + # %Global::running + # $Global::JobQueue + # %Global::host + # $Global::start_no_new_jobs + # Returns: N/A + my @command = @_; + if($opt::progress) { + ::status_no_nl(init_progress()); + } + my $last_header = ""; + my $sleep = 0.2; + my $sleepsum = 0; + do { + while($Global::total_running > 0) { + debug("init",$Global::total_running, "==", scalar + keys %Global::running," slots: ", $Global::max_jobs_running); + if($opt::pipe) { + # When using --pipe sometimes file handles are not + # closed properly + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + } + if($opt::progress) { + my %progress = progress(); + if($last_header ne $progress{'header'}) { + ::status("", $progress{'header'}); + $last_header = $progress{'header'}; + } + ::status_no_nl("\r",$progress{'status'}); + } + if($Global::total_running < $Global::max_jobs_running + and not $Global::JobQueue->empty()) { + # These jobs may not be started because of loadavg + # or too little time between each ssh login. + if(start_more_jobs() > 0) { + # Exponential back-on if jobs were started + $sleep = $sleep/2+0.001; + } + } + # Exponential back-off sleeping + $sleep = ::reap_usleep($sleep); + $sleepsum += $sleep; + if($sleepsum >= 1000) { + # At most do this every second + $sleepsum = 0; + changed_procs_file(); + changed_sshloginfile(); + start_more_jobs(); + } + } + if(not $Global::JobQueue->empty()) { + # These jobs may not be started: + # * because there the --filter-hosts has removed all + if(not %Global::host) { + ::error("There are no hosts left to run on."); + ::wait_and_exit(255); + } + # * because of loadavg + # * because of too little time between each ssh login. + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + if($Global::max_jobs_running == 0) { + ::warning("There are no job slots available. Increase --jobs."); + } + } + while($opt::sqlmaster and not $Global::sql->finished()) { + # SQL master + $sleep = ::reap_usleep($sleep); + start_more_jobs(); + if($Global::start_sqlworker) { + # Start an SQL worker as we are now sure there is work to do + $Global::start_sqlworker = 0; + if(my $pid = fork()) { + $Global::unkilled_sqlworker = $pid; + } else { + # Replace --sql/--sqlandworker with --sqlworker + my @ARGV = (map { s/^--sql(andworker)?$/--sqlworker/; $_ } + @Global::options_in_argv); + # exec the --sqlworker + exec($0,@ARGV,@command); + } + } + } + } while ($Global::total_running > 0 + or + not $Global::start_no_new_jobs and not $Global::JobQueue->empty() + or + $opt::sqlmaster and not $Global::sql->finished()); + if($opt::progress) { + my %progress = progress(); + ::status("\r".$progress{'status'}); + } +} + +sub toggle_progress() { + # Turn on/off progress view + # Uses: + # $opt::progress + # Returns: N/A + $opt::progress = not $opt::progress; + if($opt::progress) { + ::status_no_nl(init_progress()); + } +} + +sub progress() { + # Uses: + # $opt::bar + # $opt::eta + # %Global::host + # $Global::total_started + # Returns: + # $workerlist = list of workers + # $header = that will fit on the screen + # $status = message that will fit on the screen + if($opt::bar) { + return ("workerlist" => "", "header" => "", "status" => bar()); + } + my $eta = ""; + my ($status,$header)=("",""); + if($opt::eta) { + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + } + my $termcols = terminal_columns(); + my @workers = sort keys %Global::host; + my %sshlogin = map { $_ eq ":" ? ($_ => "local") : ($_ => $_) } @workers; + my $workerno = 1; + my %workerno = map { ($_=>$workerno++) } @workers; + my $workerlist = ""; + for my $w (@workers) { + $workerlist .= + $workerno{$w}.":".$sshlogin{$w} ." / ". + ($Global::host{$w}->ncpus() || "-")." / ". + $Global::host{$w}->max_jobs_running()."\n"; + } + $status = "c"x($termcols+1); + # Select an output format that will fit on a single line + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX%/XX.Xs s2:XX/XX/XX%/XX.Xs s3:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/". + "%of started jobs/Average seconds to complete"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + my $completed = + ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $sshlogin{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } + } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX%/X.Xs 2:XX/XX/XX%/X.Xs 3:XX/XX/XX%/X.Xs 4:XX/XX/XX%/X.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + my $completed = + ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $workerno{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } + } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + sprintf("%s:%d/%d/%d%%", + $sshlogin{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) + } + } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + sprintf("%s:%d/%d/%d%%", + $workerno{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) + } + } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ", + map { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ", + map { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ", + map { sprintf("%s:%d/%d", $workerno{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ", + map { sprintf("%s:%d", $sshlogin{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ", + map { sprintf("%s:%d", + $workerno{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + return ("workerlist" => $workerlist, "header" => $header, + "status" => $status); +} + +{ + + my ($first_completed, $smoothed_avg_time, $last_eta); + + sub compute_eta { + # Calculate important numbers for ETA + # Returns: + # $total = number of jobs in total + # $completed = number of jobs completed + # $left = number of jobs left + # $pctcomplete = percent of jobs completed + # $avgtime = averaged time + # $eta = smoothed eta + my $completed = $Global::total_completed; + # In rare cases with -X will $completed > total_jobs() + my $total = ::max($Global::JobQueue->total_jobs(),$completed); + my $left = $total - $completed; + if(not $completed) { + return($total, $completed, $left, 0, 0, 0); + } + my $pctcomplete = ::min($completed / $total,100); + $first_completed ||= time; + my $timepassed = (time - $first_completed); + my $avgtime = $timepassed / $completed; + $smoothed_avg_time ||= $avgtime; + # Smooth the eta so it does not jump wildly + $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + + $pctcomplete * $avgtime; + my $eta = int($left * $smoothed_avg_time); + if($eta*0.90 < $last_eta and $last_eta < $eta) { + # Eta jumped less that 10% up: Keep the last eta instead + $eta = $last_eta; + } else { + $last_eta = $eta; + } + return($total, $completed, $left, $pctcomplete, $avgtime, $eta); + } +} + +{ + my ($rev,$reset); + + sub bar() { + # Return: + # $status = bar with eta, completed jobs, arg and pct + $rev ||= "\033[7m"; + $reset ||= "\033[0m"; + my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = + compute_eta(); + my $arg = $Global::newest_job ? + $Global::newest_job->{'commandline'}-> + replace_placeholders(["\257<\257>"],0,0) : ""; + $arg = decode_utf8($arg); + my $eta_dhms = ::seconds_to_time_units($eta); + my $bar_text = + sprintf("%d%% %d:%d=%s %s", + $pctcomplete*100, $completed, $left, $eta_dhms, $arg); + my $terminal_width = terminal_columns(); + my $s = sprintf("%-${terminal_width}s", + substr($bar_text." "x$terminal_width, + 0,$terminal_width)); + my $width = int($terminal_width * $pctcomplete); + substr($s,$width,0) = $reset; + my $zenity = sprintf("%-${terminal_width}s", + substr("# $eta sec $arg", + 0,$terminal_width)); + # Prefix with zenity header + $s = "\r" . $zenity . "\r" . $pctcomplete*100 . + "\r" . $rev . $s . $reset; + return $s; + } +} + +{ + my ($rows,$columns,$last_update_time); + + sub compute_terminal_size() { + # && true is to force spawning a shell and not just exec'ing + my @tput = qx{ tput lines cols </dev/tty 2>/dev/null && true }; + $rows = 0 + $tput[0]; + $columns = 0 + $tput[1]; + if(not ($rows && $columns)) { + # && true is to force spawning a shell and not just exec'ing + my $stty = qx{ stty -a </dev/tty 2>/dev/null && true }; + # FreeBSD/OpenBSD/NetBSD/Dragonfly/MirOS + # MacOSX/IRIX/AIX/Tru64 + $stty =~ /(\d+) columns/ and do { $columns = $1; }; + $stty =~ /(\d+) rows/ and do { $rows = $1; }; + # GNU/Linux/Solaris + $stty =~ /columns (\d+)/ and do { $columns = $1; }; + $stty =~ /rows (\d+)/ and do { $rows = $1; }; + # Solaris-x86/HPUX/SCOsysV/UnixWare/OpenIndiana + $stty =~ /columns = (\d+)/ and do { $columns = $1; }; + $stty =~ /rows = (\d+)/ and do { $rows = $1; }; + # QNX + $stty =~ /rows=(\d+),(\d+)/ and do { ($rows,$columns) = ($1,$2); }; + } + if(not ($rows && $columns)) { + # && true is to force spawning a shell and not just exec'ing + my $resize = qx{ resize 2>/dev/null && true }; + $resize =~ /COLUMNS=(\d+);/ and do { $columns ||= $1; }; + $resize =~ /LINES=(\d+);/ and do { $rows ||= $1; }; + } + $rows ||= 24; + $columns ||= 80; + } + + sub update_terminal_size() { + # Only update once per second. + if($last_update_time < time) { + $last_update_time = time; + compute_terminal_size(); + # Set signal WINdow CHange to force recompute + $SIG{WINCH} = \&compute_terminal_size; + } + } + + sub terminal_rows() { + # Get the number of rows of the terminal. + # Returns: + # number of rows of the screen + update_terminal_size(); + return $rows; + } + + sub terminal_columns() { + # Get the number of columns of the terminal. + # Returns: + # number of columns of the screen + update_terminal_size(); + return $columns; + } +} + +# Prototype forwarding +sub get_job_with_sshlogin($); +sub get_job_with_sshlogin($) { + # Input: + # $sshlogin = which host should the job be run on? + # Uses: + # $opt::hostgroups + # $Global::JobQueue + # Returns: + # $job = next job object for $sshlogin if any available + my $sshlogin = shift; + my $job; + + if ($opt::hostgroups) { + my @other_hostgroup_jobs = (); + + while($job = $Global::JobQueue->get()) { + if($sshlogin->in_hostgroups($job->hostgroups())) { + # Found a job to be run on a hostgroup of this + # $sshlogin + last; + } else { + # This job was not in the hostgroups of $sshlogin + push @other_hostgroup_jobs, $job; + } + } + $Global::JobQueue->unget(@other_hostgroup_jobs); + if(not defined $job) { + # No more jobs + return undef; + } + } else { + $job = $Global::JobQueue->get(); + if(not defined $job) { + # No more jobs + ::debug("start", "No more jobs: JobQueue empty\n"); + return undef; + } + } + if(not $job->suspended()) { + $job->set_sshlogin($sshlogin); + } + if(defined $opt::retries and $job->failed_here()) { + # This command with these args failed for this sshlogin + my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); + # Only look at the Global::host that have > 0 jobslots + if($no_of_failed_sshlogins == + grep { $_->max_jobs_running() > 0 } values %Global::host + and $job->failed_here() == $min_failures) { + # It failed the same or more times on another host: + # run it on this host + } else { + # If it failed fewer times on another host: + # Find another job to run + my $nextjob; + if(not $Global::JobQueue->empty()) { + # This can potentially recurse for all args + no warnings 'recursion'; + $nextjob = get_job_with_sshlogin($sshlogin); + } + # Push the command back on the queue + $Global::JobQueue->unget($job); + return $nextjob; + } + } + return $job; +} + + +sub __REMOTE_SSH__() {} + + +sub read_sshloginfiles(@) { + # Read a list of --slf's + # Input: + # @files = files or symbolic file names to read + # Returns: N/A + for my $s (@_) { + read_sshloginfile(expand_slf_shorthand($s)); + } +} + +sub expand_slf_shorthand($) { + # Expand --slf shorthand into a read file name + # Input: + # $file = file or symbolic file name to read + # Returns: + # $file = actual file name to read + my $file = shift; + if($file eq "-") { + # skip: It is stdin + } elsif($file eq "..") { + $file = $Global::config_dir."/sshloginfile"; + } elsif($file eq ".") { + $file = "/etc/parallel/sshloginfile"; + } elsif(not -r $file) { + for(@Global::config_dirs) { + if(not -r $_."/".$file) { + # Try prepending $PARALLEL_HOME + ::error("Cannot open $file."); + ::wait_and_exit(255); + } else { + $file = $_."/".$file; + last; + } + } + } + return $file; +} + +sub read_sshloginfile($) { + # Read sshloginfile into @Global::sshlogin + # Input: + # $file = file to read + # Uses: + # @Global::sshlogin + # Returns: N/A + local $/ = "\n"; + my $file = shift; + my $close = 1; + my $in_fh; + ::debug("init","--slf ",$file); + if($file eq "-") { + $in_fh = *STDIN; + $close = 0; + } else { + if(not open($in_fh, "<", $file)) { + # Try the filename + ::error("Cannot open $file."); + ::wait_and_exit(255); + } + } + while(<$in_fh>) { + chomp; + /^\s*#/ and next; + /^\s*$/ and next; + push @Global::sshlogin, $_; + } + if($close) { + close $in_fh; + } +} + +sub parse_sshlogin() { + # Parse @Global::sshlogin into %Global::host. + # Keep only hosts that are in one of the given ssh hostgroups. + # Uses: + # @Global::sshlogin + # $Global::minimal_command_line_length + # %Global::host + # $opt::transfer + # @opt::return + # $opt::cleanup + # @opt::basefile + # @opt::trc + # Returns: N/A + my @login; + if(not @Global::sshlogin) { @Global::sshlogin = (":"); } + for my $sshlogin (@Global::sshlogin) { + # Split up -S sshlogin,sshlogin + # Parse ,, and \, as , but do not split on that + # -S "ssh -J jump1,,jump2 host1,host2" => + # ssh -J jump1,jump2 host1 + # host2 + # Protect \, and ,, as \0 + $sshlogin =~ s/\\,|,,/\0/g; + for my $s (split /,|\n/, $sshlogin) { + # Replace \0 => , + $s =~ s/\0/,/g; + if ($s eq ".." or $s eq "-") { + # This may add to @Global::sshlogin - possibly bug + read_sshloginfile(expand_slf_shorthand($s)); + } else { + $s =~ s/\s*$//; + push (@login, $s); + } + } + } + $Global::minimal_command_line_length = 100_000_000; + my @allowed_hostgroups; + for my $ncpu_sshlogin_string (::uniq(@login)) { + my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); + my $sshlogin_string = $sshlogin->string(); + if($sshlogin_string eq "") { + # This is an ssh group: -S @webservers + push @allowed_hostgroups, $sshlogin->hostgroups(); + next; + } + if($Global::host{$sshlogin_string}) { + # This sshlogin has already been added: + # It is probably a host that has come back + # Set the max_jobs_running back to the original + debug("run","Already seen $sshlogin_string\n"); + if($sshlogin->{'ncpus'}) { + # If ncpus set by '#/' of the sshlogin, overwrite it: + $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); + } + $Global::host{$sshlogin_string}->set_max_jobs_running(undef); + next; + } + $sshlogin->set_maxlength(Limits::Command::max_length()); + + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); + $Global::host{$sshlogin_string} = $sshlogin; + } + $Global::usable_command_line_length = + # Usable len = maxlen - 3000 for wrapping, div 2 for hexing + int(($Global::minimal_command_line_length - 3000)/2); + if($opt::max_chars) { + if($opt::max_chars <= $Global::usable_command_line_length) { + $Global::usable_command_line_length = $opt::max_chars; + } else { + ::warning("Value for option -s should be < ". + $Global::usable_command_line_length."."); + } + } + if(@allowed_hostgroups) { + # Remove hosts that are not in these groups + while (my ($string, $sshlogin) = each %Global::host) { + if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { + delete $Global::host{$string}; + } + } + } + + # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); + if(@Global::transfer_files or @opt::return + or $opt::cleanup or @opt::basefile) { + if(not remote_hosts()) { + # There are no remote hosts + if(@opt::trc) { + ::warning("--trc ignored as there are no remote --sshlogin."); + } elsif (defined $opt::transfer) { + ::warning("--transfer ignored as there are ". + "no remote --sshlogin."); + } elsif (@opt::transfer_files) { + ::warning("--transferfile ignored as there ". + "are no remote --sshlogin."); + } elsif (@opt::return) { + ::warning("--return ignored as there are no remote --sshlogin."); + } elsif (defined $opt::cleanup and not %opt::template) { + ::warning("--cleanup ignored as there ". + "are no remote --sshlogin."); + } elsif (@opt::basefile) { + ::warning("--basefile ignored as there ". + "are no remote --sshlogin."); + } + } + } +} + +sub remote_hosts() { + # Return sshlogins that are not ':' + # Uses: + # %Global::host + # Returns: + # list of sshlogins with ':' removed + return grep !/^:$/, keys %Global::host; +} + +sub setup_basefile() { + # Transfer basefiles to each $sshlogin + # This needs to be done before first jobs on $sshlogin is run + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my @cmd; + my $rsync_destdir; + my $workdir; + for my $sshlogin (values %Global::host) { + if($sshlogin->local()) { next } + for my $file (@opt::basefile) { + if($file !~ m:^/: and $opt::workdir eq "...") { + ::error("Work dir '...' will not work with relative basefiles."); + ::wait_and_exit(255); + } + if(not $workdir) { + my $dummycmdline = + CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{}); + my $dummyjob = Job->new($dummycmdline); + $workdir = $dummyjob->workdir(); + } + push @cmd, $sshlogin->rsync_transfer_cmd($file,$workdir); + } + } + debug("init", "basesetup: @cmd\n"); + my ($exitstatus,$stdout_ref,$stderr_ref) = + run_gnu_parallel((join "\n",@cmd),"-j0","--retries",5); + if($exitstatus) { + my @stdout = @$stdout_ref; + my @stderr = @$stderr_ref; + ::error("Copying of --basefile failed: @stdout@stderr"); + ::wait_and_exit(255); + } +} + +sub cleanup_basefile() { + # Remove the basefiles transferred + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my @cmd; + my $workdir; + if(not $workdir) { + my $dummycmdline = CommandLine->new(1,["true"],{},0,0,[],[],[],[],{},{}); + my $dummyjob = Job->new($dummycmdline); + $workdir = $dummyjob->workdir(); + } + for my $sshlogin (values %Global::host) { + if($sshlogin->local()) { next } + for my $file (@opt::basefile) { + push @cmd, $sshlogin->cleanup_cmd($file,$workdir); + } + } + debug("init", "basecleanup: @cmd\n"); + my ($exitstatus,$stdout_ref,$stderr_ref) = + run_gnu_parallel(join("\n",@cmd),"-j0","--retries",5); + if($exitstatus) { + my @stdout = @$stdout_ref; + my @stderr = @$stderr_ref; + ::error("Cleanup of --basefile failed: @stdout@stderr"); + ::wait_and_exit(255); + } +} + +sub run_gnu_parallel() { + my ($stdin,@args) = @_; + my $cmd = join "",map { " $_ & " } split /\n/, $stdin; + print $Global::original_stderr ` $cmd wait` ; + return 0 +} + +sub _run_gnu_parallel() { + # Run GNU Parallel + # This should ideally just fork an internal copy + # and not start it through a shell + # Input: + # $stdin = data to provide on stdin for GNU Parallel + # @args = command line arguments + # Returns: + # $exitstatus = exitcode of GNU Parallel run + # \@stdout = standard output + # \@stderr = standard error + my ($stdin,@args) = @_; + my ($exitstatus,@stdout,@stderr); + my ($stdin_fh,$stdout_fh)=(gensym(),gensym()); + my ($stderr_fh, $stderrname) = ::tmpfile(SUFFIX => ".par"); + unlink $stderrname; + + my $pid = ::open3($stdin_fh,$stdout_fh,$stderr_fh, + $0,qw(--plain --shell /bin/sh --will-cite), @args); + if(my $writerpid = fork()) { + close $stdin_fh; + @stdout = <$stdout_fh>; + # Now stdout is closed: + # These pids should be dead or die very soon + while(kill 0, $writerpid) { ::usleep(1); } + die; +# reap $writerpid; +# while(kill 0, $pid) { ::usleep(1); } +# reap $writerpid; + $exitstatus = $?; + seek $stderr_fh, 0, 0; + @stderr = <$stderr_fh>; + close $stdout_fh; + close $stderr_fh; + } else { + close $stdout_fh; + close $stderr_fh; + print $stdin_fh $stdin; + close $stdin_fh; + exit(0); + } + return ($exitstatus,\@stdout,\@stderr); +} + +sub filter_hosts() { + # Remove down --sshlogins from active duty. + # Find ncpus, ncores, maxlen, time-to-login for each host. + # Uses: + # %Global::host + # $Global::minimal_command_line_length + # $opt::use_sockets_instead_of_threads + # $opt::use_cores_instead_of_threads + # $opt::use_cpus_instead_of_cores + # Returns: N/A + + my ($nsockets_ref,$ncores_ref, $nthreads_ref, $time_to_login_ref, + $maxlen_ref, $echo_ref, $down_hosts_ref) = + parse_host_filtering(parallelized_host_filtering()); + + delete @Global::host{@$down_hosts_ref}; + @$down_hosts_ref and ::warning("Removed @$down_hosts_ref."); + + $Global::minimal_command_line_length = 100_000_000; + while (my ($string, $sshlogin) = each %Global::host) { + if($sshlogin->local()) { next } + my ($nsockets,$ncores,$nthreads,$time_to_login,$maxlen) = + ($nsockets_ref->{$string},$ncores_ref->{$string}, + $nthreads_ref->{$string},$time_to_login_ref->{$string}, + $maxlen_ref->{$string}); + defined $nsockets or ::die_bug("nsockets missing: $string"); + defined $ncores or ::die_bug("ncores missing: $string"); + defined $nthreads or ::die_bug("nthreads missing: $string"); + defined $time_to_login or ::die_bug("time_to_login missing: $string"); + defined $maxlen or ::die_bug("maxlen missing: $string"); + # ncpus may be set by 4/hostname or may be undefined yet + my $ncpus = $sshlogin->{'ncpus'}; + # $nthreads may be 0 if GNU Parallel is not installed remotely + $ncpus = $nthreads || $ncpus || $sshlogin->ncpus(); + if($opt::use_cpus_instead_of_cores) { + $ncpus = $ncores || $ncpus; + } elsif($opt::use_sockets_instead_of_threads) { + $ncpus = $nsockets || $ncpus; + } elsif($opt::use_cores_instead_of_threads) { + $ncpus = $ncores || $ncpus; + } + $sshlogin->set_ncpus($ncpus); + $sshlogin->set_time_to_login($time_to_login); + $maxlen = $maxlen || Limits::Command::max_length(); + $sshlogin->set_maxlength($maxlen); + ::debug("init", "Timing from -S:$string ", + " ncpus:", $ncpus, + " nsockets:",$nsockets, + " ncores:", $ncores, + " nthreads:",$nthreads, + " time_to_login:", $time_to_login, + " maxlen:", $maxlen, + " min_max_len:", $Global::minimal_command_line_length,"\n"); + } +} + +sub parse_host_filtering() { + # Input: + # @lines = output from parallelized_host_filtering() + # Returns: + # \%nsockets = number of sockets of {host} + # \%ncores = number of cores of {host} + # \%nthreads = number of hyperthreaded cores of {host} + # \%time_to_login = time_to_login on {host} + # \%maxlen = max command len on {host} + # \%echo = echo received from {host} + # \@down_hosts = list of hosts with no answer + local $/ = "\n"; + my (%nsockets, %ncores, %nthreads, %time_to_login, %maxlen, %echo, + @down_hosts); + for (@_) { + ::debug("init","Read: ",$_); + chomp; + my @col = split /\t/, $_; + if($col[0] =~ /^parallel: Warning:/) { + # Timed out job: Ignore it + next; + } elsif(defined $col[6]) { + # This is a line from --joblog + # seq host time spent sent received exit signal command + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores + if($col[0] eq "Seq" and $col[1] eq "Host" and + $col[2] eq "Starttime") { + # Header => skip + next; + } + # Get server from: eval true server\; + $col[8] =~ /eval .?true.?\s([^\;]+);/ or + ::die_bug("col8 does not contain host: $col[8] in $_"); + my $host = $1; + $host =~ tr/\\//d; + $Global::host{$host} or next; + if($col[6] eq "255" or $col[6] eq "-1" or $col[6] eq "1") { + # exit == 255 or exit == timeout (-1): ssh failed/timedout + # exit == 1: lsh failed + # Remove sshlogin + ::debug("init", "--filtered $host\n"); + push(@down_hosts, $host); + } elsif($col[6] eq "127") { + # signal == 127: parallel not installed remote + # Set nsockets, ncores, nthreads = 1 + ::warning("Could not figure out ". + "number of cpus on $host. Using 1."); + $nsockets{$host} = 1; + $ncores{$host} = 1; + $nthreads{$host} = 1; + $maxlen{$host} = Limits::Command::max_length(); + } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { + # Remember how log it took to log in + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo + $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); + } else { + ::die_bug("host check unmatched long jobline: $_"); + } + } elsif($Global::host{$col[0]}) { + # This output from --number-of-cores, --number-of-cpus, + # --max-line-length-allowed + # ncores: server 8 + # ncpus: server 2 + # maxlen: server 131071 + if(/parallel: Warning: Cannot figure out number of/) { + next; + } + if(/\t(perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed|Disconnected from|Received disconnect from)/ + or + /\tWarning: / + ) { + # Skip these (from perl): + # perl: warning: Setting locale failed. + # perl: warning: Please check that your locale settings: + # LANGUAGE = (unset), + # LC_ALL = (unset), + # LANG = "en_US.UTF-8" + # are supported and installed on your system. + # perl: warning: Falling back to the standard locale ("C"). + # Disconnected from 127.0.0.1 port 22 + # + # Skip these (from ssh): + # Warning: Permanently added * to the list of known hosts. + # Warning: Identity file * not accessible: * + } elsif(not defined $nsockets{$col[0]}) { + $nsockets{$col[0]} = $col[1]; + } elsif(not defined $ncores{$col[0]}) { + $ncores{$col[0]} = $col[1]; + } elsif(not defined $nthreads{$col[0]}) { + $nthreads{$col[0]} = $col[1]; + } elsif(not defined $maxlen{$col[0]}) { + $maxlen{$col[0]} = $col[1]; + } elsif(not defined $echo{$col[0]}) { + $echo{$col[0]} = $col[1]; + } else { + ::die_bug("host check too many col0: $_"); + } + } else { + ::die_bug("host check unmatched short jobline ($col[0]): $_"); + } + } + @down_hosts = uniq(@down_hosts); + return(\%nsockets, \%ncores, \%nthreads, \%time_to_login, + \%maxlen, \%echo, \@down_hosts); +} + +sub parallelized_host_filtering() { + # Uses: + # %Global::host + # Returns: + # text entries with: + # * joblog line + # * hostname \t number of cores + # * hostname \t number of cpus + # * hostname \t max-line-length-allowed + # * hostname \t empty + + sub sshwrapped { + # Wrap with ssh and --env + # Return $default_value if command fails + my $sshlogin = shift; + my $command = shift; + # wrapper that returns output "0\n" if the command fails + # E.g. parallel not installed => "0\n" + my $wcmd = q(perl -e '$a=`).$command.q(`; print $? ? "0".v010 : $a'); + my $commandline = CommandLine->new(1,[$wcmd],{},0,0,[],[],[],[],{},{}); + my $job = Job->new($commandline); + $job->set_sshlogin($sshlogin); + $job->wrapped(); + return($job->{'wrapped'}); + } + + my(@sockets, @cores, @threads, @maxline, @echo); + while (my ($host, $sshlogin) = each %Global::host) { + if($host eq ":") { next } + # The 'true' is used to get the $host out later + push(@sockets, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-sockets")."\n\0"); + push(@cores, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-cores")."\n\0"); + push(@threads, $host."\t"."true $host; ". + sshwrapped($sshlogin,"parallel --number-of-threads")."\n\0"); + push(@maxline, $host."\t"."true $host; ". + sshwrapped($sshlogin, + "parallel --max-line-length-allowed")."\n\0"); + # 'echo' is used to get the fastest possible ssh login time + push(@echo, $host."\t"."true $host; ". + $sshlogin->wrap("echo $host")."\n\0"); + } + # --timeout 10: Setting up an SSH connection and running a simple + # command should never take > 10 sec. + # --delay 0.1: If multiple sshlogins use the same proxy the delay + # will make it less likely to overload the ssh daemon. + # --retries 3: If the ssh daemon is overloaded, try 3 times + my $cmd = + "$0 -j0 --timeout 10 --joblog - --plain --delay 0.1 --retries 3 ". + "--tag --tagstring '{1}' -0 --colsep '\t' -k eval '{2}' && true "; + $cmd = $Global::shell." -c ".Q($cmd); + ::debug("init", $cmd, "\n"); + my @out; + my $prepend = ""; + + my ($host_fh,$in,$err); + open3($in, $host_fh, $err, $cmd) || ::die_bug("parallel host check: $cmd"); + ::debug("init", map { $_,"\n" } @sockets, @cores, @threads, @maxline, @echo); + + if(not fork()) { + # Give the commands to run to the $cmd + close $host_fh; + print $in @sockets, @cores, @threads, @maxline, @echo; + close $in; + exit(); + } + close $in; + # If -0: $/ must be \n + local $/ = "\n"; + for(<$host_fh>) { + # TODO incompatible with '-quoting. Needs to be fixed differently + #if(/\'$/) { + # # if last char = ' then append next line + # # This may be due to quoting of \n in environment var + # $prepend .= $_; + # next; + #} + $_ = $prepend . $_; + $prepend = ""; + push @out, $_; + } + close $host_fh; + return @out; +} + +sub onall($@) { + # Runs @command on all hosts. + # Uses parallel to run @command on each host. + # --jobs = number of hosts to run on simultaneously. + # For each host a parallel command with the args will be running. + # Uses: + # $Global::debug + # $Global::exitstatus + # $Global::joblog + # $Global::quoting + # $opt::D + # $opt::arg_file_sep + # $opt::arg_sep + # $opt::colsep + # $opt::files + # $opt::group + # $opt::joblog + # $opt::jobs + # $opt::keeporder + # $opt::linebuffer + # $opt::max_chars + # $opt::plain + # $opt::retries + # $opt::tag + # $opt::tee + # $opt::timeout + # $opt::ungroup + # %Global::host + # @opt::basefile + # @opt::env + # @opt::v + # Input: + # @command = command to run on all hosts + # Returns: N/A + sub tmp_joblog { + # Input: + # $joblog = filename of joblog - undef if none + # Returns: + # $tmpfile = temp file for joblog - undef if none + my $joblog = shift; + if(not defined $joblog) { + return undef; + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); + close $fh; + return $tmpfile; + } + my ($input_source_fh_ref,@command) = @_; + if($Global::quoting) { + @command = shell_quote(@command); + } + + # Copy all @input_source_fh (-a and :::) into tempfiles + my @argfiles = (); + for my $fh (@$input_source_fh_ref) { + my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => not $opt::D); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + if(@opt::basefile) { setup_basefile(); } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + # + # Pass some of the options to the sub-parallels, not all of them as + # -P should only go to the first, and -S should not be copied at all. + my $options = + join(" ", + ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""), + ((defined $opt::memfree) ? "--memfree ".$opt::memfree : ""), + ((defined $opt::memsuspend) ? "--memfree ".$opt::memsuspend : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::group) ? "--group" : ""), + ((defined $opt::jobs) ? "-P $opt::jobs" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::plain) ? "--plain" : ""), + (($opt::ungroup == 1) ? "-u" : ""), + ((defined $opt::tee) ? "--tee" : ""), + ); + my $suboptions = + join(" ", + ((defined $opt::sshdelay) ? "--delay ".$opt::sshdelay : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), + ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), + ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), + ((defined $opt::files) ? "--files" : ""), + ((defined $opt::group) ? "--group" : ""), + ((defined $opt::cleanup) ? "--cleanup" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::plus) ? "--plus" : ""), + ((defined $opt::retries) ? "--retries ".$opt::retries : ""), + ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), + (($opt::ungroup == 1) ? "-u" : ""), + ((defined $opt::ssh) ? "--ssh '".$opt::ssh."'" : ""), + ((defined $opt::tee) ? "--tee" : ""), + ((defined $opt::workdir) ? "--wd ".Q($opt::workdir) : ""), + (@Global::transfer_files ? map { "--tf ".Q($_) } + @Global::transfer_files : ""), + (@Global::ret_files ? map { "--return ".Q($_) } + @Global::ret_files : ""), + (@opt::env ? map { "--env ".Q($_) } @opt::env : ""), + (map { "-v" } @opt::v), + ); + ::debug("init", "| $0 $options\n"); + open(my $parallel_fh, "|-", "$0 -0 --will-cite -j0 $options") || + ::die_bug("This does not run GNU Parallel: $0 $options"); + my @joblogs; + for my $host (sort keys %Global::host) { + my $sshlogin = $Global::host{$host}; + my $joblog = tmp_joblog($opt::joblog); + if($joblog) { + push @joblogs, $joblog; + $joblog = "--joblog $joblog"; + } + my $quad = $opt::arg_file_sep || "::::"; + # If PARALLEL_ENV is set: Pass it on + my $penv=$Global::parallel_env ? + "PARALLEL_ENV=".Q($Global::parallel_env) : + ''; + ::debug("init", "$penv $0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".Q($sshlogin->string()) : ""), + " -S ", Q($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"); + print $parallel_fh "$penv $0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".Q($sshlogin->string()) : ""), + " -S ", Q($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\0"; + } + close $parallel_fh; + $Global::exitstatus = $? >> 8; + debug("init", "--onall exitvalue ", $?); + if(@opt::basefile and $opt::cleanup) { cleanup_basefile(); } + $Global::debug or unlink(@argfiles); + my %seen; + for my $joblog (@joblogs) { + # Append to $joblog + open(my $fh, "<", $joblog) || + ::die_bug("Cannot open tmp joblog $joblog"); + # Skip first line (header); + <$fh>; + print $Global::joblog (<$fh>); + close $fh; + unlink($joblog); + } +} + + +sub __SIGNAL_HANDLING__() {} + + +sub sigtstp() { + # Send TSTP signal (Ctrl-Z) to all children process groups + # Uses: + # %SIG + # Returns: N/A + signal_children("TSTP"); +} + +sub sigpipe() { + # Send SIGPIPE signal to all children process groups + # Uses: + # %SIG + # Returns: N/A + signal_children("PIPE"); +} + +sub signal_children() { + # Send signal to all children process groups + # and GNU Parallel itself + # Uses: + # %SIG + # Returns: N/A + my $signal = shift; + debug("run", "Sending $signal "); + kill $signal, map { -$_ } keys %Global::running; + # Use default signal handler for GNU Parallel itself + $SIG{$signal} = undef; + kill $signal, $$; +} + +sub save_original_signal_handler() { + # Remember the original signal handler + # Uses: + # %Global::original_sig + # Returns: N/A + $SIG{INT} = sub { + if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } + wait_and_exit(255); + }; + $SIG{TERM} = sub { + if($opt::tmux) { ::qqx("tmux kill-session -t p$$"); } + wait_and_exit(255); + }; + %Global::original_sig = %SIG; + $SIG{TERM} = sub {}; # Dummy until jobs really start + $SIG{ALRM} = 'IGNORE'; + # Allow Ctrl-Z to suspend and `fg` to continue + $SIG{TSTP} = \&sigtstp; + $SIG{PIPE} = \&sigpipe; + $SIG{CONT} = sub { + # Set $SIG{TSTP} again (it is undef'ed in sigtstp() ) + $SIG{TSTP} = \&sigtstp; + for my $job (values %Global::running) { + if($job->suspended()) { + # Force jobs to suspend, if they are marked as suspended. + # --memsupspend can suspend a job that will be resumed + # if the user presses CTRL-Z followed by `fg`. + $job->suspend(); + } else { + # Resume the rest of the jobs + $job->resume(); + } + } + }; +} + +sub list_running_jobs() { + # Print running jobs on tty + # Uses: + # %Global::running + # Returns: N/A + for my $job (values %Global::running) { + ::status("$Global::progname: ".$job->replaced()); + } +} + +sub start_no_new_jobs() { + # Start no more jobs + # Uses: + # %Global::original_sig + # %Global::unlink + # $Global::start_no_new_jobs + # Returns: N/A + unlink keys %Global::unlink; + ::status + ("$Global::progname: SIGHUP received. No new jobs will be started.", + "$Global::progname: Waiting for these ".(keys %Global::running). + " jobs to finish. Send SIGTERM to stop now."); + list_running_jobs(); + $Global::start_no_new_jobs ||= 1; +} + +sub reapers() { + # Run reaper until there are no more left + # Returns: + # @pids_reaped = pids of reaped processes + my @pids_reaped; + my $pid; + while($pid = reaper()) { + push @pids_reaped, $pid; + } + return @pids_reaped; +} + +sub reaper() { + # A job finished: + # * Set exitstatus, exitsignal, endtime. + # * Free ressources for new job + # * Update median runtime + # * Print output + # * If --halt = now: Kill children + # * Print progress + # Uses: + # %Global::running + # $opt::timeout + # $Global::timeoutq + # $opt::keeporder + # $Global::total_running + # Returns: + # $stiff = PID of child finished + my $stiff; + debug("run", "Reaper "); + if(($stiff = waitpid(-1, &WNOHANG)) <= 0) { + # No jobs waiting to be reaped + return 0; + } + + # $stiff = pid of dead process + my $job = $Global::running{$stiff}; + + # '-a <(seq 10)' will give us a pid not in %Global::running + # The same will one of the ssh -M: ignore + $job or return 0; + delete $Global::running{$stiff}; + $Global::total_running--; + if($job->{'commandline'}{'skip'}) { + # $job->skip() was called + $job->set_exitstatus(-2); + $job->set_exitsignal(0); + } else { + $job->set_exitstatus($? >> 8); + $job->set_exitsignal($? & 127); + } + + debug("run", "\nseq ",$job->seq()," died (", $job->exitstatus(), ")"); + if($Global::delayauto or $Global::sshdelayauto) { + if($job->exitstatus()) { + # Job failed: Increase delay (if $opt::(ssh)delay set) + $opt::delay &&= $opt::delay * 1.3; + $opt::sshdelay &&= $opt::sshdelay * 1.3; + } else { + # Job succeeded: Decrease delay (if $opt::(ssh)delay set) + $opt::delay &&= $opt::delay * 0.9; + $opt::sshdelay &&= $opt::sshdelay * 0.9; + } + debug("run", "delay:$opt::delay ssh:$opt::sshdelay "); + } + $job->set_endtime(::now()); + my $sshlogin = $job->sshlogin(); + $sshlogin->dec_jobs_running(); + if($job->should_be_retried()) { + # Free up file handles + $job->free_ressources(); + } else { + # The job is done + $sshlogin->inc_jobs_completed(); + # Free the jobslot + $job->free_slot(); + if($opt::timeout and not $job->exitstatus()) { + # Update average runtime for timeout only for successful jobs + $Global::timeoutq->update_median_runtime($job->runtime()); + } + if($opt::keeporder and not $opt::latestline) { + # --latestline fixes --keeporder in Job::row() + $job->print_earlier_jobs(); + } else { + $job->print(); + } + if($job->should_we_halt() eq "now") { + # Kill children + ::kill_sleep_seq($job->pid()); + ::killall(); + ::wait_and_exit($Global::halt_exitstatus); + } + } + $job->cleanup(); + + if($opt::progress) { + my %progress = progress(); + ::status_no_nl("\r",$progress{'status'}); + } + + debug("run", "jobdone \n"); + return $stiff; +} + + +sub __USAGE__() {} + + +sub killall() { + # Kill all jobs by killing their process groups + # Uses: + # $Global::start_no_new_jobs = we are stopping + # $Global::killall = Flag to not run reaper + $Global::start_no_new_jobs ||= 1; + # Do not reap killed children: Ignore them instead + $Global::killall ||= 1; + kill_sleep_seq(keys %Global::running); +} + +sub kill_sleep_seq(@) { + # Send jobs TERM,TERM,KILL to processgroups + # Input: + # @pids = list of pids that are also processgroups + # Convert pids to process groups ($processgroup = -$pid) + my @pgrps = map { -$_ } @_; + my @term_seq = split/,/,$opt::termseq; + if(not @term_seq) { + @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); + } + # for each signal+waittime: kill process groups still not dead + while(@term_seq) { + @pgrps = kill_sleep(shift @term_seq, shift @term_seq, @pgrps); + } +} + +sub kill_sleep() { + # Kill pids with a signal and wait a while for them to die + # Input: + # $signal = signal to send to @pids + # $sleep_max = number of ms to sleep at most before returning + # @pids = pids to kill (actually process groups) + # Uses: + # $Global::killall = set by killall() to avoid calling reaper + # Returns: + # @pids = pids still alive + my ($signal, $sleep_max, @pids) = @_; + ::debug("kill","kill_sleep $signal ",(join " ",sort @pids),"\n"); + kill $signal, @pids; + my $sleepsum = 0; + my $sleep = 0.001; + + while(@pids and $sleepsum < $sleep_max) { + if($Global::killall) { + # Killall => don't run reaper + while(waitpid(-1, &WNOHANG) > 0) { + $sleep = $sleep/2+0.001; + } + } elsif(reapers()) { + $sleep = $sleep/2+0.001; + } + $sleep *= 1.1; + ::usleep($sleep); + $sleepsum += $sleep; + # Keep only living children + @pids = grep { kill(0, $_) } @pids; + } + return @pids; +} + +sub wait_and_exit($) { + # If we do not wait, we sometimes get segfault + # Returns: N/A + my $error = shift; + unlink keys %Global::unlink; + if($error) { + # Kill all jobs without printing + killall(); + } + for (keys %Global::unkilled_children) { + # Kill any (non-jobs) children (e.g. reserved processes) + kill 9, $_; + waitpid($_,0); + delete $Global::unkilled_children{$_}; + } + if($Global::unkilled_sqlworker) { + waitpid($Global::unkilled_sqlworker,0); + } + # Avoid: Warning: unable to close filehandle properly: No space + # left on device during global destruction. + $SIG{__WARN__} = sub {}; + if($opt::_parset) { + # Make the shell script return $error + print "$Global::parset_endstring\nreturn $error"; + } + exit($error); +} + +sub die_usage() { + # Returns: N/A + usage(); + wait_and_exit(255); +} + +sub usage() { + # Returns: N/A + print join + ("\n", + "Usage:", + "", + "$Global::progname [options] [command [arguments]] < list_of_arguments", + "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", + "cat ... | $Global::progname --pipe [options] [command [arguments]]", + "", + "-j n Run n jobs in parallel", + "-k Keep same order", + "-X Multiple arguments with context replace", + "--colsep regexp Split input on regexp for positional replacements", + "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", + "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", + "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", + " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", + "", + "-S sshlogin Example: foo\@server.example.com", + "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", + "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", + "--onall Run the given command with argument on all sshlogins", + "--nonall Run the given command with no arguments on all sshlogins", + "", + "--pipe Split stdin (standard input) to multiple jobs.", + "--recend str Record end separator for --pipe.", + "--recstart str Record start separator for --pipe.", + "", + "GNU Parallel can do much more. See 'man $Global::progname' for details", + "", + "Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + " Tange, O. (2022, November 22). GNU Parallel 20221122 ('Херсо́н').", + " Zenodo. https://doi.org/10.5281/zenodo.7347980", + "", + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing + # these lines. + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "",); +} + +sub citation_notice() { + # if --will-cite or --plain: do nothing + # if stderr redirected: do nothing + # if $PARALLEL_HOME/will-cite: do nothing + # else: print citation notice to stderr + if($opt::willcite + or + $opt::plain + or + not -t $Global::original_stderr + or + grep { -e "$_/will-cite" } @Global::config_dirs) { + # skip + } else { + ::status + ("Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + " Tange, O. (2022, November 22). GNU Parallel 20221122 ('Херсо́н').", + " Zenodo. https://doi.org/10.5281/zenodo.7347980", + "", + # Before changing these line, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by + # removing these lines. + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "More about funding GNU Parallel and the citation notice:", + "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice", + "", + "To silence this citation notice: run 'parallel --citation' once.", + "" + ); + mkdir $Global::config_dir; + # Number of times the user has run GNU Parallel without showing + # willingness to cite + my $runs = 0; + if(open (my $fh, "<", $Global::config_dir. + "/runs-without-willing-to-cite")) { + $runs = <$fh>; + close $fh; + } + $runs++; + if(open (my $fh, ">", $Global::config_dir. + "/runs-without-willing-to-cite")) { + print $fh $runs; + close $fh; + if($runs >= 10) { + ::status("Come on: You have run parallel $runs times. ". + "Isn't it about time ", + "you run 'parallel --citation' once to silence ". + "the citation notice?", + ""); + } + } + } +} + +sub status(@) { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh map { ($_, "\n") } @w; + flush $fh; +} + +sub status_no_nl(@) { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh @w; + flush $fh; +} + +sub warning(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); +} + +{ + my %warnings; + sub warning_once(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + $warnings{@w}++ or + status_no_nl(map { ($prog, ": Warning: ", $_, "\n"); } @w); + } +} + +sub error(@) { + my @w = @_; + my $prog = $Global::progname || "parallel"; + status(map { ($prog.": Error: ". $_); } @w); +} + +sub die_bug($) { + my $bugid = shift; + print STDERR + ("$Global::progname: This should not happen. You have found a bug. ", + "Please follow\n", + "https://www.gnu.org/software/parallel/man.html#reporting-bugs\n", + "\n", + "Include this in the report:\n", + "* The version number: $Global::version\n", + "* The bugid: $bugid\n", + "* The command line being run\n", + "* The files being read (put the files on a webserver if they are big)\n", + "\n", + "If you get the error on smaller/fewer files, please include those instead.\n"); + ::wait_and_exit(255); +} + +sub version() { + # Returns: N/A + print join + ("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2007-2022 Ole Tange, http://ole.tange.dk and Free Software", + "Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: https://www.gnu.org/software/${Global::progname}\n", + "When using programs that use GNU Parallel to process data for publication", + "please cite as described in 'parallel --citation'.\n", + ); +} + +sub citation() { + # Returns: N/A + my ($all_argv_ref,$argv_options_removed_ref) = @_; + my $all_argv = "@$all_argv_ref"; + my $no_opts = "@$argv_options_removed_ref"; + $all_argv=~s/--citation//; + if($all_argv ne $no_opts) { + ::warning("--citation ignores all other options and arguments."); + ::status(""); + } + + ::status( + "Academic tradition requires you to cite works you base your article on.", + "If you use programs that use GNU Parallel to process data for an article in a", + "scientific publication, please cite:", + "", + "\@software{tange_2022_7347980,", + " author = {Tange, Ole},", + " title = {GNU Parallel 20221122 ('Херсо́н')},", + " month = Nov,", + " year = 2022,", + " note = {{GNU Parallel is a general parallelizer to run", + " multiple serial command line programs in parallel", + " without changing them.}},", + " publisher = {Zenodo},", + " doi = {10.5281/zenodo.7347980},", + " url = {https://doi.org/10.5281/zenodo.7347980}", + "}", + "", + "(Feel free to use \\nocite{tange_2022_7347980})", + "", + # Before changing these lines, please read + # https://www.gnu.org/software/parallel/parallel_design.html#citation-notice and + # https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt + # You accept to be put in a public hall of shame by removing + # these lines. + "This helps funding further development; AND IT WON'T COST YOU A CENT.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "More about funding GNU Parallel and the citation notice:", + "https://lists.gnu.org/archive/html/parallel/2013-11/msg00006.html", + "https://www.gnu.org/software/parallel/parallel_design.html#citation-notice", + "https://git.savannah.gnu.org/cgit/parallel.git/tree/doc/citation-notice-faq.txt", + "", + "If you send a copy of your published article to tange\@gnu.org, it will be", + "mentioned in the release notes of next version of GNU Parallel.", + "" + ); + while(not grep { -e "$_/will-cite" } @Global::config_dirs) { + print "\nType: 'will cite' and press enter.\n> "; + my $input = <STDIN>; + if(not defined $input) { + exit(255); + } + if($input =~ /will cite/i) { + mkdir $Global::config_dir; + if(open (my $fh, ">", $Global::config_dir."/will-cite")) { + close $fh; + ::status( + "", + "Thank you for your support: You are the reason why there is funding to", + "continue maintaining GNU Parallel. On behalf of future versions of", + "GNU Parallel, which would not exist without your support:", + "", + " THANK YOU SO MUCH", + "", + "It is really appreciated. The citation notice is now silenced.", + ""); + } else { + ::status( + "", + "Thank you for your support. It is much appreciated. The citation", + "cannot permanently be silenced. Use '--will-cite' instead.", + "", + "If you use '--will-cite' in scripts to be run by others you are making", + "it harder for others to see the citation notice. The development of", + "GNU Parallel is indirectly financed through citations, so if users", + "do not know they should cite then you are making it harder to finance", + "development. However, if you pay 10000 EUR, you should feel free to", + "use '--will-cite' in scripts.", + ""); + last; + } + } + } +} + +sub show_limits() { + # Returns: N/A + print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", + "Maximal usable size of command: ", + $Global::usable_command_line_length,"\n", + "\n", + "Execution will continue now, ", + "and it will try to read its input\n", + "and run commands; if this is not ", + "what you wanted to happen, please\n", + "press CTRL-D or CTRL-C\n"); +} + +sub embed() { + # Give an embeddable version of GNU Parallel + # Tested with: bash, zsh, ksh, ash, dash, sh + my $randomstring = "cut-here-".join"", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..20); + if(not -f $0 or not -r $0) { + ::error("--embed only works if parallel is a readable file"); + exit(255); + } + if(open(my $fh, "<", $0)) { + # Read the source from $0 + my @source = <$fh>; + my $user = $ENV{LOGNAME} || $ENV{USERNAME} || $ENV{USER}; + my @env_parallel_source = (); + my $shell = $Global::shell; + $shell =~ s:.*/::; + for(which("env_parallel.$shell")) { + -r $_ or next; + # Read the source of env_parallel.shellname + open(my $env_parallel_source_fh, $_) || die; + @env_parallel_source = <$env_parallel_source_fh>; + close $env_parallel_source_fh; + last; + } + print "#!$Global::shell + +# Copyright (C) 2007-2022 $user, Ole Tange, http://ole.tange.dk +# and Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <https://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +"; + + print q! +# Embedded GNU Parallel created with --embed +parallel() { + # Start GNU Parallel without leaving temporary files + # + # Not all shells support 'perl <(cat ...)' + # This is a complex way of doing: + # perl <(cat <<'cut-here' + # [...] + # ) "$@" + # and also avoiding: + # [1]+ Done cat + + # Make a temporary fifo that perl can read from + _fifo_with_GNU_Parallel_source=`perl -e 'use POSIX qw(mkfifo); + do { + $f = "/tmp/parallel-".join"", + map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $f); + mkfifo($f,0600); + print $f;'` + # Put source code into temporary file + # so it is easy to copy to the fifo + _file_with_GNU_Parallel_source=`mktemp`; +!, + "cat <<'$randomstring' > \$_file_with_GNU_Parallel_source\n", + @source, + $randomstring,"\n", + q! + # Copy the source code from the file to the fifo + # and remove the file and fifo ASAP + # 'sh -c' is needed to avoid + # [1]+ Done cat + sh -c "(rm $_file_with_GNU_Parallel_source; cat >$_fifo_with_GNU_Parallel_source; rm $_fifo_with_GNU_Parallel_source) < $_file_with_GNU_Parallel_source &" + + # Read the source from the fifo + perl $_fifo_with_GNU_Parallel_source "$@" +} +!, + @env_parallel_source, + q! + +# This will call the functions above +parallel -k echo ::: Put your code here +env_parallel --session +env_parallel -k echo ::: Put your code here +parset p,y,c,h -k echo ::: Put your code here +echo $p $y $c $h +echo You can also activate GNU Parallel for interactive use by: +echo . "$0" +!; + } else { + ::error("Cannot open $0"); + exit(255); + } + ::status("Redirect the output to a file and add your changes at the end:", + " $0 --embed > new_script"); +} + + +sub __GENERIC_COMMON_FUNCTION__() {} + + +sub mkdir_or_die($) { + # If dir is not executable: die + my $dir = shift; + # The eval is needed to catch exception from mkdir + eval { File::Path::mkpath($dir); }; + if(not -x $dir) { + ::error("Cannot change into non-executable dir $dir: $!"); + ::wait_and_exit(255); + } +} + +sub tmpfile(@) { + # Create tempfile as $TMPDIR/parXXXXX + # Returns: + # $filehandle = opened file handle + # $filename = file name created + my($filehandle,$filename) = + ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); + if(wantarray) { + return($filehandle,$filename); + } else { + # Separate unlink due to NFS dealing badly with File::Temp + unlink $filename; + return $filehandle; + } +} + +sub tmpname($) { + # Select a name that does not exist + # Do not create the file as it may be used for creating a socket (by tmux) + # Remember the name in $Global::unlink to avoid hitting the same name twice + my $name = shift; + my($tmpname); + if(not -w $ENV{'TMPDIR'}) { + if(not -e $ENV{'TMPDIR'}) { + ::error("Tmpdir '$ENV{'TMPDIR'}' does not exist.","Try 'mkdir $ENV{'TMPDIR'}'"); + } else { + ::error("Tmpdir '$ENV{'TMPDIR'}' is not writable.","Try 'chmod +w $ENV{'TMPDIR'}'"); + } + ::wait_and_exit(255); + } + do { + $tmpname = $ENV{'TMPDIR'}."/".$name. + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $tmpname or $Global::unlink{$tmpname}++); + return $tmpname; +} + +sub tmpfifo() { + # Find an unused name and mkfifo on it + my $tmpfifo = tmpname("fif"); + mkfifo($tmpfifo,0600); + return $tmpfifo; +} + +sub rm(@) { + # Remove file and remove it from %Global::unlink + # Uses: + # %Global::unlink + delete @Global::unlink{@_}; + unlink @_; +} + +sub size_of_block_dev() { + # Like -s but for block devices + # Input: + # $blockdev = file name of block device + # Returns: + # $size = in bytes, undef if error + my $blockdev = shift; + if(open(my $fh, "<", $blockdev)) { + seek($fh,0,2) || ::die_bug("cannot seek $blockdev"); + my $size = tell($fh); + close $fh; + return $size; + } else { + ::error("cannot open $blockdev"); + wait_and_exit(255); + } +} + +sub qqx(@) { + # Like qx but with clean environment (except for @keep) + # and STDERR ignored + # This is needed if the environment contains functions + # that /bin/sh does not understand + my %env; + # ssh with ssh-agent needs PATH SSH_AUTH_SOCK SSH_AGENT_PID + # ssh with Kerberos needs KRB5CCNAME + # sshpass needs SSHPASS + # tmux needs LC_CTYPE + # lsh needs HOME LOGNAME + my @keep = qw(PATH SSH_AUTH_SOCK SSH_AGENT_PID KRB5CCNAME LC_CTYPE + HOME LOGNAME SSHPASS); + @env{@keep} = @ENV{@keep}; + local %ENV; + %ENV = %env; + if($Global::debug) { + # && true is to force spawning a shell and not just exec'ing + return qx{ @_ && true }; + } else { + # CygWin does not respect 2>/dev/null + # so we do that by hand + # This trick does not work: + # https://stackoverflow.com/q/13833088/363028 + # local *STDERR; + # open(STDERR, ">", "/dev/null"); + open(local *CHILD_STDIN, '<', '/dev/null') or die $!; + open(local *CHILD_STDERR, '>', '/dev/null') or die $!; + my $out; + # eval is needed if open3 fails (e.g. command line too long) + eval { + my $pid = open3( + '<&CHILD_STDIN', + $out, + '>&CHILD_STDERR', + # && true is to force spawning a shell and not just exec'ing + "@_ && true"); + my @arr = <$out>; + close $out; + # Make sure $? is set + waitpid($pid, 0); + return wantarray ? @arr : join "",@arr; + } or do { + # If eval fails, force $?=false + `false`; + }; + } +} + +sub uniq(@) { + # Remove duplicates and return unique values + return keys %{{ map { $_ => 1 } @_ }}; +} + +sub min(@) { + # Returns: + # Minimum value of array + my $min; + for (@_) { + # Skip undefs + defined $_ or next; + defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef + $min = ($min < $_) ? $min : $_; + } + return $min; +} + +sub max(@) { + # Returns: + # Maximum value of array + my $max; + for (@_) { + # Skip undefs + defined $_ or next; + defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef + $max = ($max > $_) ? $max : $_; + } + return $max; +} + +sub sum(@) { + # Returns: + # Sum of values of array + my @args = @_; + my $sum = 0; + for (@args) { + # Skip undefs + $_ and do { $sum += $_; } + } + return $sum; +} + +sub undef_as_zero($) { + my $a = shift; + return $a ? $a : 0; +} + +sub undef_as_empty($) { + my $a = shift; + return $a ? $a : ""; +} + +sub undef_if_empty($) { + if(defined($_[0]) and $_[0] eq "") { + return undef; + } + return $_[0]; +} + +sub multiply_binary_prefix(@) { + # Evalualte numbers with binary prefix + # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 + # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 + # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 + # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 + # 13G = 13*1024*1024*1024 = 13958643712 + # Input: + # $s = string with prefixes + # Returns: + # $value = int with prefixes multiplied + my @v = @_; + for(@v) { + defined $_ or next; + s/ki/*1024/gi; + s/mi/*1024*1024/gi; + s/gi/*1024*1024*1024/gi; + s/ti/*1024*1024*1024*1024/gi; + s/pi/*1024*1024*1024*1024*1024/gi; + s/ei/*1024*1024*1024*1024*1024*1024/gi; + s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; + s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; + s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; + + s/K/*1024/g; + s/M/*1024*1024/g; + s/G/*1024*1024*1024/g; + s/T/*1024*1024*1024*1024/g; + s/P/*1024*1024*1024*1024*1024/g; + s/E/*1024*1024*1024*1024*1024*1024/g; + s/Z/*1024*1024*1024*1024*1024*1024*1024/g; + s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; + s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; + + s/k/*1000/g; + s/m/*1000*1000/g; + s/g/*1000*1000*1000/g; + s/t/*1000*1000*1000*1000/g; + s/p/*1000*1000*1000*1000*1000/g; + s/e/*1000*1000*1000*1000*1000*1000/g; + s/z/*1000*1000*1000*1000*1000*1000*1000/g; + s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; + s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; + + $_ = eval $_; + } + return wantarray ? @v : $v[0]; +} + +sub multiply_time_units($) { + # Evalualte numbers with time units + # s=1, m=60, h=3600, d=86400 + # Input: + # $s = string time units + # Returns: + # $value = int in seconds + my @v = @_; + for(@v) { + defined $_ or next; + if(/[dhms]/i) { + s/s/*1+/gi; + s/m/*60+/gi; + s/h/*3600+/gi; + s/d/*86400+/gi; + # 1m/3 => 1*60+/3 => 1*60/3 + s/\+(\D)/$1/gi; + } + $_ = eval $_."-0"; + } + return wantarray ? @v : $v[0]; +} + +sub seconds_to_time_units() { + # Convert seconds into ??d??h??m??s + # s=1, m=60, h=3600, d=86400 + # Input: + # $s = int in seconds + # Returns: + # $str = string time units + my $s = shift; + my $str; + my $d = int($s/86400); + $s -= $d * 86400; + my $h = int($s/3600); + $s -= $h * 3600; + my $m = int($s/60); + $s -= $m * 60; + if($d) { + $str = sprintf("%dd%02dh%02dm%02ds",$d,$h,$m,$s); + } elsif($h) { + $str = sprintf("%dh%02dm%02ds",$h,$m,$s); + } elsif($m) { + $str = sprintf("%dm%02ds",$m,$s); + } else { + $str = sprintf("%ds",$s); + } + return $str; +} + +{ + my ($disk_full_fh, $b8193, $error_printed); + sub exit_if_disk_full() { + # Checks if $TMPDIR is full by writing 8kb to a tmpfile + # If the disk is full: Exit immediately. + # Returns: + # N/A + if(not $disk_full_fh) { + $disk_full_fh = ::tmpfile(SUFFIX => ".df"); + $b8193 = "b"x8193; + } + # Linux does not discover if a disk is full if writing <= 8192 + # Tested on: + # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos + # ntfs reiserfs tmpfs ubifs vfat xfs + # TODO this should be tested on different OS similar to this: + # + # doit() { + # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop + # seq 100000 | parallel --tmpdir /mnt/loop/ true & + # seq 6900000 > /mnt/loop/i && echo seq OK + # seq 6980868 > /mnt/loop/i + # seq 10000 > /mnt/loop/ii + # sleep 3 + # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ + # echo >&2 + # } + print $disk_full_fh $b8193; + if(not $disk_full_fh + or + tell $disk_full_fh != 8193) { + # On raspbian the disk can be full except for 10 chars. + if(not $error_printed) { + ::error("Output is incomplete.", + "Cannot append to buffer file in $ENV{'TMPDIR'}.", + "Is the disk full?", + "Change \$TMPDIR with --tmpdir or use --compress."); + $error_printed = 1; + } + ::wait_and_exit(255); + } + truncate $disk_full_fh, 0; + seek($disk_full_fh, 0, 0) || die; + } +} + +sub spacefree($$) { + # Remove comments and spaces + # Inputs: + # $spaces = keep 1 space? + # $s = string to remove spaces from + # Returns: + # $s = with spaces removed + my $spaces = shift; + my $s = shift; + $s =~ s/#.*//mg; + if(1 == $spaces) { + $s =~ s/\s+/ /mg; + } elsif(2 == $spaces) { + # Keep newlines + $s =~ s/\n\n+/\n/sg; + $s =~ s/[ \t]+/ /mg; + } elsif(3 == $spaces) { + # Keep perl code required space + $s =~ s{([^a-zA-Z0-9/])\s+}{$1}sg; + $s =~ s{([a-zA-Z0-9/])\s+([^:a-zA-Z0-9/])}{$1$2}sg; + } else { + $s =~ s/\s//mg; + } + return $s; +} + +{ + my $hostname; + sub hostname() { + local $/ = "\n"; + if(not $hostname) { + $hostname = `hostname`; + chomp($hostname); + $hostname ||= "nohostname"; + } + return $hostname; + } +} + +sub which(@) { + # Input: + # @programs = programs to find the path to + # Returns: + # @full_path = full paths to @programs. Nothing if not found + my @which; + for my $prg (@_) { + push(@which, grep { not -d $_ and -x $_ } + map { $_."/".$prg } split(":",$ENV{'PATH'})); + if($prg =~ m:/:) { + # Test if program with full path exists + push(@which, grep { not -d $_ and -x $_ } $prg); + } + } + ::debug("which", "$which[0] in $ENV{'PATH'}\n"); + return wantarray ? @which : $which[0]; +} + +{ + my ($regexp,$shell,%fakename); + + sub parent_shell { + # Input: + # $pid = pid to see if (grand)*parent is a shell + # Returns: + # $shellpath = path to shell - undef if no shell found + my $pid = shift; + ::debug("init","Parent of $pid\n"); + if(not $regexp) { + # All shells known to mankind + # + # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh + # posh rbash rc rush rzsh sash sh static-sh tcsh yash zsh + + my @shells = (qw(ash bash bsd-csh csh dash fdsh fish fizsh ksh + ksh93 lksh mksh pdksh posh rbash rc rush rzsh sash sh + static-sh tcsh yash zsh -sh -csh -bash), + '-sh (sh)' # sh on FreeBSD + ); + # Can be formatted as: + # [sh] -sh sh busybox sh -sh (sh) + # /bin/sh /sbin/sh /opt/csw/sh + # But not: foo.sh sshd crash flush pdflush scosh fsflush ssh + $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")"; + $regexp = '^((\[)(-?)('. $shell. ')(\])|(|\S+/|busybox )'. + '(-?)('. $shell. '))( *$| [^(])'; + %fakename = ( + # sh disguises itself as -sh (sh) on FreeBSD + "-sh (sh)" => ["sh"], + # csh and tcsh disguise themselves as -sh/-csh + # E.g.: ssh -tt csh@lo 'ps aux;true' |egrep ^csh + # but sh also disguises itself as -sh + # (TODO When does that happen?) + "-sh" => ["sh"], + "-csh" => ["tcsh", "csh"], + # ash disguises itself as -ash + "-ash" => ["ash", "dash", "sh"], + # dash disguises itself as -dash + "-dash" => ["dash", "ash", "sh"], + # bash disguises itself as -bash + "-bash" => ["bash", "sh"], + # ksh disguises itself as -ksh + "-ksh" => ["ksh", "sh"], + # zsh disguises itself as -zsh + "-zsh" => ["zsh", "sh"], + ); + } + if($^O eq "linux") { + # Optimized for GNU/Linux + my $testpid = $pid; + my $shellpath; + my $shellline; + while($testpid) { + if(open(my $fd, "<", "/proc/$testpid/cmdline")) { + local $/="\0"; + chomp($shellline = <$fd>); + if($shellline =~ /$regexp/o) { + my $shellname = $4 || $8; + my $dash = $3 || $7; + if($shellname eq "sh" and $dash) { + # -sh => csh or sh + if($shellpath = readlink "/proc/$testpid/exe") { + ::debug("init","procpath $shellpath\n"); + if($shellpath =~ m:/$shell$:o) { + ::debug("init", + "proc which ".$shellpath." => "); + return $shellpath; + } + } + } + ::debug("init", "which ".$shellname." => "); + $shellpath = (which($shellname, + @{$fakename{$shellname}}))[0]; + ::debug("init", "shell path $shellpath\n"); + return $shellpath; + } + } + # Get parent pid + if(open(my $fd, "<", "/proc/$testpid/stat")) { + my $line = <$fd>; + close $fd; + # Parent pid is field 4 + $testpid = (split /\s+/, $line)[3]; + } else { + # Something is wrong: fall back to old method + last; + } + } + } + # if -sh or -csh try readlink /proc/$$/exe + my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); + my $shellpath; + my $testpid = $pid; + while($testpid) { + if($name_of_ref->{$testpid} =~ /$regexp/o) { + my $shellname = $4 || $8; + my $dash = $3 || $7; + if($shellname eq "sh" and $dash) { + # -sh => csh or sh + if($shellpath = readlink "/proc/$testpid/exe") { + ::debug("init","procpath $shellpath\n"); + if($shellpath =~ m:/$shell$:o) { + ::debug("init", "proc which ".$shellpath." => "); + return $shellpath; + } + } + } + ::debug("init", "which ".$shellname." => "); + $shellpath = (which($shellname,@{$fakename{$shellname}}))[0]; + ::debug("init", "shell path $shellpath\n"); + $shellpath and last; + } + if($testpid == $parent_of_ref->{$testpid}) { + # In Solaris zones, the PPID of the zsched process is itself + last; + } + $testpid = $parent_of_ref->{$testpid}; + } + return $shellpath; + } +} + +{ + my %pid_parentpid_cmd; + + sub pid_table() { + # Returns: + # %children_of = { pid -> children of pid } + # %parent_of = { pid -> pid of parent } + # %name_of = { pid -> commandname } + + if(not %pid_parentpid_cmd) { + # Filter for SysV-style `ps` + my $sysv = q( ps -ef |). + q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print "@F[1,2] $_"' ); + # Minix uses cols 2,3 and can have newlines in the command + # so lines not having numbers in cols 2,3 must be ignored + my $minix = q( ps -ef |). + q(perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}// and $F[2]>0 and $F[3]>0 and print "@F[2,3] $_"' ); + # BSD-style `ps` + my $bsd = q(ps -o pid,ppid,command -ax); + %pid_parentpid_cmd = + ( + 'aix' => $sysv, + 'android' => $sysv, + 'cygwin' => $sysv, + 'darwin' => $bsd, + 'dec_osf' => $sysv, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $sysv, + 'hpux' => $sysv, + 'linux' => $sysv, + 'mirbsd' => $bsd, + 'minix' => $minix, + 'msys' => $sysv, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $sysv, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $sysv, + 'syllable' => "echo ps not supported", + ); + } + $pid_parentpid_cmd{$^O} or + ::die_bug("pid_parentpid_cmd for $^O missing"); + + my (@pidtable,%parent_of,%children_of,%name_of); + # Table with pid -> children of pid + @pidtable = `$pid_parentpid_cmd{$^O}`; + my $p=$$; + for (@pidtable) { + # must match: 24436 21224 busybox ash + # must match: 24436 21224 <<empty on MacOSX running cubase>> + # must match: 24436 21224 <<empty on system running Viber>> + # or: perl -e 'while($0=" "){}' + if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/ + or + /^\s*(\S+)\s+(\S+)\s+()$/) { + $parent_of{$1} = $2; + push @{$children_of{$2}}, $1; + $name_of{$1} = $3; + } else { + ::die_bug("pidtable format: $_"); + } + } + return(\%children_of, \%parent_of, \%name_of); + } +} + +sub now() { + # Returns time since epoch as in seconds with 3 decimals + # Uses: + # @Global::use + # Returns: + # $time = time now with millisecond accuracy + if(not $Global::use{"Time::HiRes"}) { + if(eval "use Time::HiRes qw ( time );") { + eval "sub TimeHiRestime { return Time::HiRes::time };"; + } else { + eval "sub TimeHiRestime { return time() };"; + } + $Global::use{"Time::HiRes"} = 1; + } + + return (int(TimeHiRestime()*1000))/1000; +} + +sub usleep($) { + # Sleep this many milliseconds. + # Input: + # $ms = milliseconds to sleep + my $ms = shift; + ::debug("timing",int($ms),"ms "); + select(undef, undef, undef, $ms/1000); +} + +sub make_regexp_ungreedy { + my $regexp = shift; + my $class_state = 0; + my $escape_state = 0; + my $found = 0; + my $ungreedy = ""; + my $c; + + for $c (split (//, $regexp)) { + if ($found) { + if($c ne "?") { $ungreedy .= "?"; } + $found = 0; + } + $ungreedy .= $c; + + if ($escape_state) { $escape_state = 0; next; } + if ($c eq "\\") { $escape_state = 1; next; } + if ($c eq '[') { $class_state = 1; next; } + if ($class_state) { + if($c eq ']') { $class_state = 0; } + next; + } + # Quantifiers: + * {...} + if ($c =~ /[*}+]/) { $found = 1; } + } + if($found) { $ungreedy .= '?'; } + return $ungreedy; +} + + +sub __KILLER_REAPER__() {} + +sub reap_usleep() { + # Reap dead children. + # If no dead children: Sleep specified amount with exponential backoff + # Input: + # $ms = milliseconds to sleep + # Returns: + # $ms/2+0.001 if children reaped + # $ms*1.1 if no children reaped + my $ms = shift; + if(reapers()) { + if(not $Global::total_completed % 100) { + if($opt::timeout) { + # Force cleaning the timeout queue for every 100 jobs + # Fixes potential memleak + $Global::timeoutq->process_timeouts(); + } + } + # Sleep exponentially shorter (1/2^n) if a job finished + return $ms/2+0.001; + } else { + if($opt::timeout) { + $Global::timeoutq->process_timeouts(); + } + if($opt::memfree) { + kill_youngster_if_not_enough_mem($opt::memfree*0.5); + } + if($opt::memsuspend) { + suspend_young_if_not_enough_mem($opt::memsuspend); + } + if($opt::limit) { + kill_youngest_if_over_limit(); + } + exit_if_disk_full(); + if($Global::linebuffer) { + my $something_printed = 0; + if($opt::keeporder and not $opt::latestline) { + for my $job (values %Global::running) { + $something_printed += $job->print_earlier_jobs(); + } + } else { + for my $job (values %Global::running) { + $something_printed += $job->print(); + } + } + if($something_printed) { $ms = $ms/2+0.001; } + } + if($ms > 0.002) { + # When a child dies, wake up from sleep (or select(,,,)) + $SIG{CHLD} = sub { kill "ALRM", $$ }; + if($opt::delay and not $Global::linebuffer) { + # The 0.004s is approximately the time it takes for one round + my $next_earliest_start = + $Global::newest_starttime + $opt::delay - 0.004; + my $remaining_ms = 1000 * ($next_earliest_start - ::now()); + # The next job can only start at $next_earliest_start + # so sleep until then (but sleep at least $ms) + usleep(::max($ms,$remaining_ms)); + } else { + usleep($ms); + } + # --compress needs $SIG{CHLD} unset + $SIG{CHLD} = 'DEFAULT'; + } + # Sleep exponentially longer (1.1^n) if a job did not finish, + # though at most 1000 ms. + return (($ms < 1000) ? ($ms * 1.1) : ($ms)); + } +} + +sub kill_youngest_if_over_limit() { + # Check each $sshlogin we are over limit + # If over limit: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}) { + if($sshlogin->limit() == 2) { + $job->kill(); + last; + } + } + } +} + +sub suspend_young_if_not_enough_mem() { + # Check each $sshlogin if there is enough mem. + # If less than $limit free mem: suspend some of the young children + # Else: Resume all jobs + # Uses: + # %Global::running + my $limit = shift; + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + my $free = $sshlogin->memfree(); + if($free < 2*$limit) { + # Suspend all jobs (resume some of them later) + map { $_->suspended() or $_->suspend(); } @{$jobs_of{$sshlogin}}; + my @jobs = (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}); + # how many should be running? + # limit*1 => 1; + # limit*1.5 => 2; + # limit*1.75 => 4; + # free < limit*(2-1/2^n); + # => + # 1/(2-free/limit) < 2^n; + my $run = int(1/(2-$free/$limit)); + $run = ::min($run,$#jobs); + # Resume the oldest running + for my $job ((sort { $a->seq() <=> $b->seq() } @jobs)[0..$run]) { + ::debug("mem","\nResume ",$run+1, " jobs. Seq ", + $job->seq(), " resumed ", + $sshlogin->memfree()," < ",2*$limit); + $job->resume(); + } + } else { + for my $job (@{$jobs_of{$sshlogin}}) { + if($job->suspended()) { + $job->resume(); + ::debug("mem","\nResume ",$#{$jobs_of{$sshlogin}}+1, + " jobs. Seq ", $job->seq(), " resumed ", + $sshlogin->memfree()," > ",2*$limit); + last; + } + } + } + } +} + +sub kill_youngster_if_not_enough_mem() { + # Check each $sshlogin if there is enough mem. + # If less than 50% enough free mem: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my $limit = shift; + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}}) { + if($sshlogin->memfree() < $limit) { + ::debug("mem","\n",map { $_->seq()." " } + (sort { $b->seq() <=> $a->seq() } + @{$jobs_of{$sshlogin}})); + ::debug("mem","\n", $job->seq(), "killed ", + $sshlogin->memfree()," < ",$limit); + $job->kill(); + $sshlogin->memfree_recompute(); + } else { + last; + } + } + ::debug("mem","Free mem OK? ", + $sshlogin->memfree()," > ",$limit); + } +} + + +sub __DEBUGGING__() {} + + +sub debug(@) { + # Uses: + # $Global::debug + # %Global::fh + # Returns: N/A + $Global::debug or return; + @_ = grep { defined $_ ? $_ : "" } @_; + if($Global::debug eq "all" or $Global::debug eq $_[0]) { + if($Global::fh{2}) { + # Original stderr was saved + my $stderr = $Global::fh{2}; + print $stderr @_[1..$#_]; + } else { + print STDERR @_[1..$#_]; + } + } +} + +sub my_memory_usage() { + # Returns: + # memory usage if found + # 0 otherwise + use strict; + use FileHandle; + + local $/ = "\n"; + my $pid = $$; + if(-e "/proc/$pid/stat") { + my $fh = FileHandle->new("</proc/$pid/stat"); + + my $data = <$fh>; + chomp $data; + $fh->close; + + my @procinfo = split(/\s+/,$data); + + return undef_as_zero($procinfo[22]); + } else { + return 0; + } +} + +sub my_size() { + # Returns: + # $size = size of object if Devel::Size is installed + # -1 otherwise + my @size_this = (@_); + eval "use Devel::Size qw(size total_size)"; + if ($@) { + return -1; + } else { + return total_size(@_); + } +} + +sub my_dump(@) { + # Returns: + # ascii expression of object if Data::Dump(er) is installed + # error code otherwise + my @dump_this = (@_); + eval "use Data::Dump qw(dump);"; + if ($@) { + # Data::Dump not installed + eval "use Data::Dumper;"; + if ($@) { + my $err = "Neither Data::Dump nor Data::Dumper is installed\n". + "Not dumping output\n"; + ::status($err); + return $err; + } else { + return Dumper(@dump_this); + } + } else { + # Create a dummy Data::Dump:dump as Hans Schou sometimes has + # it undefined + eval "sub Data::Dump:dump {}"; + eval "use Data::Dump qw(dump);"; + return (Data::Dump::dump(@dump_this)); + } +} + +sub my_croak(@) { + eval "use Carp; 1"; + $Carp::Verbose = 1; + croak(@_); +} + +sub my_carp() { + eval "use Carp; 1"; + $Carp::Verbose = 1; + carp(@_); +} + + +sub __OBJECT_ORIENTED_PARTS__() {} + + +package SSHLogin; + +sub new($$) { + my $class = shift; + my $s = shift; + my $origs = $s; + my %hostgroups; + my $ncpus; + my $sshcommand; + my $user; + my $password; + my $host; + my $port; + my $local; + my $string; + # SSHLogins can have these formats: + # @grp+grp/ncpu//usr/bin/ssh user@server + # ncpu//usr/bin/ssh user@server + # /usr/bin/ssh user@server + # user@server + # ncpu/user@server + # @grp+grp/user@server + # above with: user:password@server + # above with: user@server:port + # So: + # [@grp+grp][ncpu/][ssh command ][[user][:password]@][server[:port]] + + # [@grp+grp]/ncpu//usr/bin/ssh user:pass@server:port + if($s =~ s:^\@([^/]+)/?::) { + # Look for SSHLogin hostgroups + %hostgroups = map { $_ => 1 } split(/\+/, $1); + } + # An SSHLogin is always in the hostgroup of its "numcpu/host" + $hostgroups{$s} = 1; + + # [ncpu/]/usr/bin/ssh user:pass@server:port + if ($s =~ s:^(\d+)/::) { $ncpus = $1; } + + # [/usr/bin/ssh ]user:pass@server:port + if($s =~ s/^(.*) //) { $sshcommand = $1; } + + # [user:pass@]server:port + if($s =~ s/^([^@]+)@//) { + my $userpw = $1; + # user[:pass] + if($userpw =~ s/:(.*)//) { + $password = $1; + if($password eq "") { $password = $ENV{'SSHPASS'} } + if(not ::which("sshpass")) { + ::error("--sshlogin with password requires sshpass installed"); + ::wait_and_exit(255); + } + } + $user = $userpw; + } + # [server]:port + if(not $s =~ /:.*:/ + and + $s =~ s/^([-a-z0-9._]+)//i) { + # Not IPv6 (IPv6 has 2 or more ':') + $host = $1; + } elsif($s =~ s/^(\\[\[\]box0-9a-f.]+)//i) { + # RFC2673 allows for: + # \[b11010000011101] \[o64072/14] \[xd074/14] \[208.116.0.0/14] + $host = $1; + } elsif($s =~ s/^\[([0-9a-f:]+)\]//i + or + $s =~ s/^([0-9a-f:]+)//i) { + # RFC5952 + # [2001:db8::1]:80 + # 2001:db8::1.80 + # 2001:db8::1p80 + # 2001:db8::1#80 + # 2001:db8::1:80 - not supported + # 2001:db8::1 port 80 - not supported + $host = $1; + } + + # [:port] + if($s =~ s/^:(\w+)//i) { + $port = $1; + } elsif($s =~ s/^[p\.\#](\w+)//i) { + # RFC5952 + # 2001:db8::1.80 + # 2001:db8::1p80 + # 2001:db8::1#80 + $port = $1; + } + + if($s and $s ne ':') { + ::die_bug("SSHLogin parser failed on '$origs' => '$s'"); + } + + $string = + # Only include the sshcommand in $string if it is set by user + ($sshcommand && $sshcommand." "). + ($user && $user."@"). + ($host && $host). + ($port && ":$port"); + if($host eq ':') { + $local = 1; + $string = ":"; + } else { + $sshcommand ||= $opt::ssh || $ENV{'PARALLEL_SSH'} || "ssh"; + } + # An SSHLogin is always in the hostgroup of its $string-name + $hostgroups{$string} = 1; + @Global::hostgroups{keys %hostgroups} = values %hostgroups; + # Used for file names for loadavg + my $no_slash_string = $string; + $no_slash_string =~ s/[^-a-z0-9:]/_/gi; + return bless { + 'string' => $string, + 'jobs_running' => 0, + 'jobs_completed' => 0, + 'maxlength' => undef, + 'max_jobs_running' => undef, + 'orig_max_jobs_running' => undef, + 'ncpus' => $ncpus, + 'sshcommand' => $sshcommand, + 'user' => $user, + 'password' => $password, + 'host' => $host, + 'port' => $port, + 'hostgroups' => \%hostgroups, + 'local' => $local, + 'control_path_dir' => undef, + 'control_path' => undef, + 'time_to_login' => undef, + 'last_login_at' => undef, + 'loadavg_file' => $Global::cache_dir . "/tmp/sshlogin/" . + $no_slash_string . "/loadavg", + 'loadavg' => undef, + 'last_loadavg_update' => 0, + 'swap_activity_file' => $Global::cache_dir . "/tmp/sshlogin/" . + $no_slash_string . "/swap_activity", + 'swap_activity' => undef, + }, ref($class) || $class; +} + +sub DESTROY($) { + my $self = shift; + # Remove temporary files if they are created. + ::rm($self->{'loadavg_file'}); + ::rm($self->{'swap_activity_file'}); +} + +sub string($) { + my $self = shift; + return $self->{'string'}; +} + +sub host($) { + my $self = shift; + return $self->{'host'}; +} + +sub sshcmd($) { + # Give the ssh command without hostname + # Returns: + # "sshpass -e ssh -p port -l user" + my $self = shift; + my @local; + # [sshpass -e] ssh -p port -l user + if($self->{'password'}) { push @local, "sshpass -e"; } + # [ssh] -p port -l user + push @local, $self->{'sshcommand'}; + # [-p port] -l user + if($self->{'port'}) { push @local, '-p',$self->{'port'}; } + # [-l user] + if($self->{'user'}) { push @local, '-l',$self->{'user'}; } + if($opt::controlmaster) { + # Use control_path to make ssh faster + my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; + push @local, "-S", $control_path; + if(not $self->{'control_path'}{$control_path}++) { + # Master is not running for this control_path + # Start it + my $pid = fork(); + if($pid) { + $Global::sshmaster{$pid} ||= 1; + } else { + $SIG{'TERM'} = undef; + # Run a sleep that outputs data, so it will discover + # if the ssh connection closes. + my $sleep = ::Q('$|=1;while(1){sleep 1;print "foo\n"}'); + # Ignore the 'foo' being printed + open(STDOUT,">","/dev/null"); + # STDERR >/dev/null to ignore + open(STDERR,">","/dev/null"); + open(STDIN,"<","/dev/null"); + exec(@local, "-MT", $self->{'host'}, "--", + "perl", "-e", $sleep); + } + } + } + + return "@local"; +} + +sub wrap($@) { + # Input: + # @cmd = shell command to run on remote + # Returns: + # $sshwrapped = ssh remote @cmd + my $self = shift; + my @remote = @_; + return(join " ", + $self->sshcmd(), $self->{'host'}, "--", "exec", @remote); +} + +sub hexwrap($@) { + # Input: + # @cmd = perl expresion to eval + # Returns: + # $hexencoded = perl command that decodes hex and evals @cmd + my $self = shift; + my $cmd = join("",@_); + + # "#" is needed because Perl on MacOS X adds NULs + # when running pack q/H10000000/ + my $hex = unpack "H*", $cmd."#"; + # csh does not deal well with > 1000 chars in one word + # Insert space every 1000 char + $hex =~ s/\G.{1000}\K/ /sg; + # Explanation: + # Write this without special chars: eval pack 'H*', join '',@ARGV + # GNU_Parallel_worker = String so people can see this is from GNU Parallel + # eval+ = way to write 'eval ' without space (gives warning) + # pack+ = way to write 'pack ' without space + # q/H10000000/, = almost the same as "H*" but does not use * + # join+q//, = join '', + return('perl -X -e '. + 'GNU_Parallel_worker,eval+pack+q/H10000000/,join+q//,@ARGV '. + $hex); +} + +sub jobs_running($) { + my $self = shift; + return ($self->{'jobs_running'} || "0"); +} + +sub inc_jobs_running($) { + my $self = shift; + $self->{'jobs_running'}++; +} + +sub dec_jobs_running($) { + my $self = shift; + $self->{'jobs_running'}--; +} + +sub set_maxlength($$) { + my $self = shift; + $self->{'maxlength'} = shift; +} + +sub maxlength($) { + my $self = shift; + return $self->{'maxlength'}; +} + +sub jobs_completed() { + my $self = shift; + return $self->{'jobs_completed'}; +} + +sub in_hostgroups() { + # Input: + # @hostgroups = the hostgroups to look for + # Returns: + # true if intersection of @hostgroups and the hostgroups of this + # SSHLogin is non-empty + my $self = shift; + return grep { defined $self->{'hostgroups'}{$_} } @_; +} + +sub hostgroups() { + my $self = shift; + return keys %{$self->{'hostgroups'}}; +} + +sub inc_jobs_completed($) { + my $self = shift; + $self->{'jobs_completed'}++; + $Global::total_completed++; +} + +sub set_max_jobs_running($$) { + my $self = shift; + if(defined $self->{'max_jobs_running'}) { + $Global::max_jobs_running -= $self->{'max_jobs_running'}; + } + $self->{'max_jobs_running'} = shift; + + if(defined $self->{'max_jobs_running'}) { + # max_jobs_running could be resat if -j is a changed file + $Global::max_jobs_running += $self->{'max_jobs_running'}; + } + # Initialize orig to the first non-zero value that comes around + $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; +} + +sub memfree() { + # Returns: + # $memfree in bytes + my $self = shift; + $self->memfree_recompute(); + # Return 1 if not defined. + return (not defined $self->{'memfree'} or $self->{'memfree'}) +} + +sub memfree_recompute() { + my $self = shift; + my $script = memfreescript(); + + # TODO add sshlogin and backgrounding + # Run the script twice if it gives 0 (typically intermittent error) + $self->{'memfree'} = ::qqx($script) || ::qqx($script); + if(not $self->{'memfree'}) { + ::die_bug("Less than 1 byte memory free"); + } + #::debug("mem","New free:",$self->{'memfree'}," "); +} + +{ + my $script; + + sub memfreescript() { + # Returns: + # shellscript for giving available memory in bytes + if(not $script) { + my %script_of = ( + # /proc/meminfo + # MemFree: 7012 kB + # Buffers: 19876 kB + # Cached: 431192 kB + # SwapCached: 0 kB + "linux" => ( + q{ + print 1024 * qx{ + awk '/^((Swap)?Cached|MemFree|Buffers):/ + { sum += \$2} END { print sum }' + /proc/meminfo } + }), + # Android uses same code as GNU/Linux + "android" => ( + q{ + print 1024 * qx{ + awk '/^((Swap)?Cached|MemFree|Buffers):/ + { sum += \$2} END { print sum }' + /proc/meminfo } + }), + # $ vmstat 1 1 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99 + "hpux" => ( + q{ + print (((reverse `vmstat 1 1`)[0] + =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) + }), + # $ vmstat 1 2 + # kthr memory page disk faults cpu + # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97 + # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92 + # + # The second free value is correct + "solaris" => ( + q{ + print (((reverse `vmstat 1 2`)[0] + =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) + }), + # hw.pagesize: 4096 + # vm.stats.vm.v_cache_count: 0 + # vm.stats.vm.v_inactive_count: 79574 + # vm.stats.vm.v_free_count: 4507 + "freebsd" => ( + q{ + for(qx{/sbin/sysctl -a}) { + if (/^([^:]+):\s+(.+)\s*$/s) { + $sysctl->{$1} = $2; + } + } + print $sysctl->{"hw.pagesize"} * + ($sysctl->{"vm.stats.vm.v_cache_count"} + + $sysctl->{"vm.stats.vm.v_inactive_count"} + + $sysctl->{"vm.stats.vm.v_free_count"}); + }), + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # Pages free: 198061. + # Pages active: 159701. + # Pages inactive: 47378. + # Pages speculative: 29707. + # Pages wired down: 89231. + # "Translation faults": 928901425. + # Pages copy-on-write: 156988239. + # Pages zero filled: 271267894. + # Pages reactivated: 48895. + # Pageins: 1798068. + # Pageouts: 257. + # Object cache: 6603 hits of 1713223 lookups (0% hit rate) + 'darwin' => ( + q{ + $vm = `vm_stat`; + print (($vm =~ /page size of (\d+)/)[0] * + (($vm =~ /Pages free:\s+(\d+)/)[0] + + ($vm =~ /Pages inactive:\s+(\d+)/)[0])); + }), + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate command + for my $os (keys %script_of) { + $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}'; + } + $script = "perl -e " . ::Q(::spacefree(1,$perlscript)); + } + return $script; + } +} + +sub limit($) { + # Returns: + # 0 = Below limit. Start another job. + # 1 = Over limit. Start no jobs. + # 2 = Kill youngest job + my $self = shift; + + if(not defined $self->{'limitscript'}) { + my %limitscripts = + ("io" => q! + io() { + limit=$1; + io_file=$2; + # Do the measurement in the background + ((tmp=$(tempfile); + LANG=C iostat -x 1 2 > $tmp; + mv $tmp $io_file) </dev/null >/dev/null & ); + perl -e '-e $ARGV[0] or exit(1); + for(reverse <>) { + /Device/ and last; + /(\S+)$/ and $max = $max > $1 ? $max : $1; } + exit ('$limit' < $max)' $io_file; + }; + io %s %s + !, + "mem" => q! + mem() { + limit=$1; + awk '/^((Swap)?Cached|MemFree|Buffers):/{ sum += $2} + END { + if (sum*1024 < '$limit'/2) { exit 2; } + else { exit (sum*1024 < '$limit') } + }' /proc/meminfo; + }; + mem %s; + !, + "load" => q! + load() { + limit=$1; + ps ax -o state,command | + grep -E '^[DOR].[^[]' | + wc -l | + perl -ne 'exit ('$limit' < $_)'; + }; + load %s + !, + ); + my ($cmd,@args) = split /\s+/,$opt::limit; + if($limitscripts{$cmd}) { + my $tmpfile = ::tmpname("parlmt"); + ++$Global::unlink{$tmpfile}; + $self->{'limitscript'} = + ::spacefree(1, sprintf($limitscripts{$cmd}, + ::multiply_binary_prefix(@args),$tmpfile)); + } else { + $self->{'limitscript'} = $opt::limit; + } + } + + my %env = %ENV; + local %ENV = %env; + $ENV{'SSHLOGIN'} = $self->string(); + system($Global::shell,"-c",$self->{'limitscript'}); + #::qqx($self->{'limitscript'}); + ::debug("limit","limit `".$self->{'limitscript'}."` result ".($?>>8)."\n"); + return $?>>8; +} + + +sub swapping($) { + my $self = shift; + my $swapping = $self->swap_activity(); + return (not defined $swapping or $swapping) +} + +sub swap_activity($) { + # If the currently known swap activity is too old: + # Recompute a new one in the background + # Returns: + # last swap activity computed + my $self = shift; + # Should we update the swap_activity file? + my $update_swap_activity_file = 0; + # Test with (on 64 core machine): + # seq 100 | parallel --lb -j100 'seq 1000 | parallel --noswap -j 1 true' + if(open(my $swap_fh, "<", $self->{'swap_activity_file'})) { + my $swap_out = <$swap_fh>; + close $swap_fh; + if($swap_out =~ /^(\d+)$/) { + $self->{'swap_activity'} = $1; + ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); + } + ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); + if(time - $self->{'last_swap_activity_update'} > 10) { + # last swap activity update was started 10 seconds ago + ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); + $update_swap_activity_file = 1; + } + } else { + ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); + $self->{'swap_activity'} = undef; + $update_swap_activity_file = 1; + } + if($update_swap_activity_file) { + ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); + $self->{'last_swap_activity_update'} = time; + my $dir = ::dirname($self->{'swap_activity_file'}); + -d $dir or eval { File::Path::mkpath($dir); }; + my $swap_activity; + $swap_activity = swapactivityscript(); + if(not $self->local()) { + $swap_activity = $self->wrap($swap_activity); + } + # Run swap_activity measuring. + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + my $file = $self->{'swap_activity_file'}; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); + ::debug("swap", "\n", $swap_activity, "\n"); + ::qqx("($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile &)"); + } + return $self->{'swap_activity'}; +} + +{ + my $script; + + sub swapactivityscript() { + # Returns: + # shellscript for detecting swap activity + # + # arguments for vmstat are OS dependant + # swap_in and swap_out are in different columns depending on OS + # + if(not $script) { + my %vmstat = ( + # linux: $7*$8 + # $ vmstat 1 2 + # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- + # r b swpd free buff cache si so bi bo in cs us sy id wa + # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 + # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 + 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # solaris: $6*$7 + # $ vmstat -S 1 2 + # kthr memory page disk faults cpu + # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 + # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 + 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], + + # darwin (macosx): $21*$22 + # $ vm_stat -c 2 1 + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts + # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 + # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 + 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], + + # ultrix: $12*$13 + # $ vmstat -S 1 2 + # procs faults cpu memory page disk + # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 + # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 + # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 + 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], + + # aix: $6*$7 + # $ vmstat 1 2 + # System configuration: lcpu=1 mem=2048MB + # + # kthr memory page faults cpu + # ----- ----------- ------------------------ ------------ ----------- + # r b avm fre re pi po fr sr cy in sy cs us sy id wa + # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 + # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 + 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], + + # freebsd: $8*$9 + # $ vmstat -H 1 2 + # procs memory page disks faults cpu + # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id + # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 + # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 + 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], + + # mirbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id + # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 + # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 + 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # netbsd: $7*$8 + # $ vmstat 1 2 + # procs memory page disks faults cpu + # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id + # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 + # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 + 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # openbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id + # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 + # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 + 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # hpux: $8*$9 + # $ vmstat 1 2 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 + # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 + 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # dec_osf (tru64): $11*$12 + # $ vmstat 1 2 + # Virtual Memory Statistics: (pagesize = 8192) + # procs memory pages intr cpu + # r w u act free wire fault cow zero react pin pout in sy cs us sy id + # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 + # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 + 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], + + # gnu (hurd): $7*$8 + # $ vmstat -k 1 2 + # (pagesize: 4, size: 512288, swap size: 894972) + # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree + # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 + # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 + 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], + + # -nto (qnx has no swap) + #-irix + #-svr5 (scosysv) + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate vmstat command + for my $os (keys %vmstat) { + $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ + $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . + $vmstat{$os}[1] . '}"` }'; + } + $script = "perl -e " . ::Q($perlscript); + } + return $script; + } +} + +sub too_fast_remote_login($) { + my $self = shift; + if($self->{'last_login_at'} and $self->{'time_to_login'}) { + # sshd normally allows 10 simultaneous logins + # A login takes time_to_login + # So time_to_login/5 should be safe + # If now <= last_login + time_to_login/5: Then it is too soon. + my $too_fast = (::now() <= $self->{'last_login_at'} + + $self->{'time_to_login'}/5); + ::debug("run", "Too fast? $too_fast "); + return $too_fast; + } else { + # No logins so far (or time_to_login not computed): it is not too fast + return 0; + } +} + +sub last_login_at($) { + my $self = shift; + return $self->{'last_login_at'}; +} + +sub set_last_login_at($$) { + my $self = shift; + $self->{'last_login_at'} = shift; +} + +sub loadavg_too_high($) { + my $self = shift; + my $loadavg = $self->loadavg(); + if(defined $loadavg) { + ::debug("load", "Load $loadavg > ",$self->max_loadavg()); + return $loadavg >= $self->max_loadavg(); + } else { + # Unknown load: Assume load is too high + return 1; + } +} + + + +sub loadavg($) { + # If the currently know loadavg is too old: + # Recompute a new one in the background + # The load average is computed as the number of processes waiting + # for disk or CPU right now. So it is the server load this instant + # and not averaged over several minutes. This is needed so GNU + # Parallel will at most start one job that will push the load over + # the limit. + # + # Returns: + # $last_loadavg = last load average computed (undef if none) + + my $self = shift; + sub loadavg_cmd() { + if(not $Global::loadavg_cmd) { + # aix => "ps -ae -o state,command" # state wrong + # bsd => "ps ax -o state,command" + # sysv => "ps -ef -o s -o comm" + # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \ + # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + # awk '{print $2,$1}' + # dec_osf => bsd + # dragonfly => bsd + # freebsd => bsd + # gnu => bsd + # hpux => ps -el|awk '{print $2,$14,$15}' + # irix => ps -ef -o state -o comm + # linux => bsd + # minix => ps el|awk '{print \$1,\$11}' + # mirbsd => bsd + # netbsd => bsd + # openbsd => bsd + # solaris => sysv + # svr5 => sysv + # ultrix => ps -ax | awk '{print $3,$5}' + # unixware => ps -el|awk '{print $2,$14,$15}' + my $ps = ::spacefree(1,q{ + $sysv="ps -ef -o s -o comm"; + $sysv2="ps -ef -o state -o comm"; + $bsd="ps ax -o state,command"; + # Treat threads as processes + $bsd2="ps axH -o state,command"; + $psel="ps -el|awk '{ print \$2,\$14,\$15 }'"; + $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n"; + /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + awk '{print $2,$1}' }; + $dummy="echo S COMMAND;echo R dummy"; + %ps=( + # TODO Find better code for AIX/Android + 'aix' => "uptime", + 'android' => "uptime", + 'cygwin' => $cygwin, + 'darwin' => $bsd, + 'dec_osf' => $sysv2, + 'dragonfly' => $bsd, + 'freebsd' => $bsd2, + 'gnu' => $bsd, + 'hpux' => $psel, + 'irix' => $sysv2, + 'linux' => $bsd2, + 'minix' => "ps el|awk '{print \$1,\$11}'", + 'mirbsd' => $bsd, + 'msys' => $cygwin, + 'netbsd' => $bsd, + 'nto' => $dummy, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $psel, + 'ultrix' => "ps -ax | awk '{print \$3,\$5}'", + 'MSWin32' => $sysv, + ); + print `$ps{$^O}`; + }); + # The command is too long for csh, so base64_wrap the command + $Global::loadavg_cmd = $self->hexwrap($ps); + } + return $Global::loadavg_cmd; + } + # Should we update the loadavg file? + my $update_loadavg_file = 0; + if(open(my $load_fh, "<", $self->{'loadavg_file'})) { + local $/; # $/ = undef => slurp whole file + my $load_out = <$load_fh>; + close $load_fh; + if($load_out =~ /\S/) { + # Content can be empty if ~/ is on NFS + # due to reading being non-atomic. + # + # Count lines starting with D,O,R but command does not start with [ + my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm); + if($load > 0) { + # load is overestimated by 1 + $self->{'loadavg'} = $load - 1; + ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n"); + } elsif ($load_out=~/average: (\d+.\d+)/) { + # AIX does not support instant load average + # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55 + $self->{'loadavg'} = $1; + } else { + ::die_bug("loadavg_invalid_content: " . + $self->{'loadavg_file'} . "\n$load_out"); + } + } + $update_loadavg_file = 1; + } else { + ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); + $self->{'loadavg'} = undef; + $update_loadavg_file = 1; + } + if($update_loadavg_file) { + ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); + $self->{'last_loadavg_update'} = time; + my $dir = ::dirname($self->{'swap_activity_file'}); + -d $dir or eval { File::Path::mkpath($dir); }; + -w $dir or ::die_bug("Cannot write to $dir"); + my $cmd = ""; + if($self->{'string'} ne ":") { + $cmd = $self->wrap(loadavg_cmd()); + } else { + $cmd .= loadavg_cmd(); + } + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + ::debug("load", "Update load\n"); + my $file = $self->{'loadavg_file'}; + # tmpfile on same filesystem as $file + my $tmpfile = $file.$$; + $ENV{'SSHPASS'} = $self->{'password'}; + ::qqx("($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile & )"); + } + return $self->{'loadavg'}; +} + +sub max_loadavg($) { + my $self = shift; + # If --load is a file it might be changed + if($Global::max_load_file) { + my $mtime = (stat($Global::max_load_file))[9]; + if($mtime > $Global::max_load_file_last_mod) { + $Global::max_load_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_loadavg(undef); + } + } + } + if(not defined $self->{'max_loadavg'}) { + $self->{'max_loadavg'} = + $self->compute_max_loadavg($opt::load); + } + ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); + return $self->{'max_loadavg'}; +} + +sub set_max_loadavg($$) { + my $self = shift; + $self->{'max_loadavg'} = shift; +} + +sub compute_max_loadavg($) { + # Parse the max loadaverage that the user asked for using --load + # Returns: + # max loadaverage + my $self = shift; + my $loadspec = shift; + my $load; + if(defined $loadspec) { + if($loadspec =~ /^\+(\d+)$/) { + # E.g. --load +2 + my $j = $1; + $load = + $self->ncpus() + $j; + } elsif ($loadspec =~ /^-(\d+)$/) { + # E.g. --load -2 + my $j = $1; + $load = + $self->ncpus() - $j; + } elsif ($loadspec =~ /^(\d+)\%$/) { + my $j = $1; + $load = + $self->ncpus() * $j / 100; + } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { + $load = $1; + } elsif (-f $loadspec) { + $Global::max_load_file = $loadspec; + $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; + if(open(my $in_fh, "<", $Global::max_load_file)) { + my $opt_load_file = join("",<$in_fh>); + close $in_fh; + $load = $self->compute_max_loadavg($opt_load_file); + } else { + ::error("Cannot open $loadspec."); + ::wait_and_exit(255); + } + } else { + ::error("Parsing of --load failed."); + ::die_usage(); + } + if($load < 0.01) { + $load = 0.01; + } + } + return $load; +} + +sub time_to_login($) { + my $self = shift; + return $self->{'time_to_login'}; +} + +sub set_time_to_login($$) { + my $self = shift; + $self->{'time_to_login'} = shift; +} + +sub max_jobs_running($) { + my $self = shift; + if(not defined $self->{'max_jobs_running'}) { + my $nproc = $self->compute_number_of_processes($opt::jobs); + $self->set_max_jobs_running($nproc); + } + return $self->{'max_jobs_running'}; +} + +sub orig_max_jobs_running($) { + my $self = shift; + return $self->{'orig_max_jobs_running'}; +} + +sub compute_number_of_processes($) { + # Number of processes wanted and limited by system resources + # Returns: + # Number of processes + my $self = shift; + my $opt_P = shift; + my $wanted_processes = $self->user_requested_processes($opt_P); + if(not defined $wanted_processes) { + $wanted_processes = $Global::default_simultaneous_sshlogins; + } + ::debug("load", "Wanted procs: $wanted_processes\n"); + my $system_limit = + $self->processes_available_by_system_limit($wanted_processes); + ::debug("load", "Limited to procs: $system_limit\n"); + return $system_limit; +} + +{ + my @children; + my $max_system_proc_reached; + my $more_filehandles; + my %fh; + my $tmpfhname; + my $count_jobs_already_read; + my @jobs; + my $job; + my @args; + my $arg; + + sub reserve_filehandles($) { + # Reserves filehandle + my $n = shift; + for (1..$n) { + $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null"); + } + } + + sub reserve_process() { + # Spawn a dummy process + my $child; + if($child = fork()) { + push @children, $child; + $Global::unkilled_children{$child} = 1; + } elsif(defined $child) { + # This is the child + # The child takes one process slot + # It will be killed later + $SIG{'TERM'} = $Global::original_sig{'TERM'}; + if($^O eq "cygwin" or $^O eq "msys" or $^O eq "nto") { + # The exec does not work on Cygwin and QNX + sleep 10101010; + } else { + # 'exec sleep' takes less RAM than sleeping in perl + exec 'sleep', 10101; + } + exit(0); + } else { + # Failed to spawn + $max_system_proc_reached = 1; + } + } + + sub get_args_or_jobs() { + # Get an arg or a job (depending on mode) + if($Global::semaphore or ($opt::pipe and not $opt::tee)) { + # Skip: No need to get args + return 1; + } elsif(defined $opt::retries and $count_jobs_already_read) { + # For retries we may need to run all jobs on this sshlogin + # so include the already read jobs for this sshlogin + $count_jobs_already_read--; + return 1; + } else { + if($opt::X or $opt::m) { + # The arguments may have to be re-spread over several jobslots + # So pessimistically only read one arg per jobslot + # instead of a full commandline + if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + push(@jobs, $job); + return 1; + } + } else { + $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + push(@args, $arg); + return 1; + } + } else { + # If there are no more command lines, then we have a process + # per command line, so no need to go further + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + # Replacement must happen here due to seq() + $job and $job->replaced(); + push(@jobs, $job); + return 1; + } + } + } + } + + sub cleanup() { + # Cleanup: Close the files + for (values %fh) { close $_ } + # Cleanup: Kill the children + for my $pid (@children) { + kill 9, $pid; + waitpid($pid,0); + delete $Global::unkilled_children{$pid}; + } + # Cleanup: Unget the command_lines or the @args + $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}->unget(@args); + @args = (); + $Global::JobQueue->unget(@jobs); + @jobs = (); + } + + sub processes_available_by_system_limit($) { + # If the wanted number of processes is bigger than the system limits: + # Limit them to the system limits + # Limits are: File handles, number of input lines, processes, + # and taking > 1 second to spawn 10 extra processes + # Returns: + # Number of processes + my $self = shift; + my $wanted_processes = shift; + my $system_limit = 0; + my $slow_spawning_warning_printed = 0; + my $time = time; + $more_filehandles = 1; + $tmpfhname = "TmpFhNamE"; + + # perl uses 7 filehandles for something? + # parallel uses 1 for memory_usage + # parallel uses 4 for ? + reserve_filehandles(12); + # Two processes for load avg and ? + reserve_process(); + reserve_process(); + + # For --retries count also jobs already run + $count_jobs_already_read = $Global::JobQueue->next_seq(); + my $wait_time_for_getting_args = 0; + my $start_time = time; + if($wanted_processes < $Global::infinity) { + $Global::dummy_jobs = 1; + } + while(1) { + $system_limit >= $wanted_processes and last; + not $more_filehandles and last; + $max_system_proc_reached and last; + + my $before_getting_arg = time; + if(!$Global::dummy_jobs) { + get_args_or_jobs() or last; + } + $wait_time_for_getting_args += time - $before_getting_arg; + $system_limit++; + + # Every simultaneous process uses 2 filehandles to write to + # and 2 filehandles to read from + reserve_filehandles(4); + + # System process limit + reserve_process(); + + my $forktime = time - $time - $wait_time_for_getting_args; + ::debug("run", "Time to fork $system_limit procs: ". + $wait_time_for_getting_args, " ", $forktime, + " (processes so far: ", $system_limit,")\n"); + if($system_limit > 10 and + $forktime > 1 and + $forktime > $system_limit * 0.01) { + # It took more than 0.01 second to fork a processes on avg. + # Give the user a warning. He can press Ctrl-C if this + # sucks. + ::warning_once( + "Starting $system_limit processes took > $forktime sec.", + "Consider adjusting -j. Press CTRL-C to stop."); + } + } + cleanup(); + + if($system_limit < $wanted_processes) { + # The system_limit is less than the wanted_processes + if($system_limit < 1 and not $Global::JobQueue->empty()) { + ::warning("Cannot spawn any jobs.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + ::wait_and_exit(255); + } + if(not $more_filehandles) { + ::warning("Only enough file handles to run ". + $system_limit. " jobs in parallel.", + "Try running 'parallel -j0 -N $system_limit --pipe parallel -j0'", + "or increasing 'ulimit -n' (try: ulimit -n `ulimit -Hn`)", + "or increasing 'nofile' in /etc/security/limits.conf", + "or increasing /proc/sys/fs/file-max"); + } + if($max_system_proc_reached) { + ::warning("Only enough available processes to run ". + $system_limit. " jobs in parallel.", + "Try increasing 'ulimit -u' (try: ulimit -u `ulimit -Hu`)", + "or increasing 'nproc' in /etc/security/limits.conf", + "or increasing /proc/sys/kernel/pid_max"); + } + } + if($] == 5.008008 and $system_limit > 1000) { + # https://savannah.gnu.org/bugs/?36942 + $system_limit = 1000; + } + if($Global::JobQueue->empty()) { + $system_limit ||= 1; + } + if($self->string() ne ":" and + $system_limit > $Global::default_simultaneous_sshlogins) { + $system_limit = + $self->simultaneous_sshlogin_limit($system_limit); + } + return $system_limit; + } +} + +sub simultaneous_sshlogin_limit($) { + # Test by logging in wanted number of times simultaneously + # Returns: + # min($wanted_processes,$working_simultaneous_ssh_logins-1) + my $self = shift; + my $wanted_processes = shift; + if($self->{'time_to_login'}) { + return $wanted_processes; + } + + # Try twice because it guesses wrong sometimes + # Choose the minimal + my $ssh_limit = + ::min($self->simultaneous_sshlogin($wanted_processes), + $self->simultaneous_sshlogin($wanted_processes)); + if($ssh_limit < $wanted_processes) { + my $serverlogin = $self->string(); + ::warning("ssh to $serverlogin only allows ". + "for $ssh_limit simultaneous logins.", + "You may raise this by changing", + "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.", + "You can also try --sshdelay 0.1", + "Using only ".($ssh_limit-1)." connections ". + "to avoid race conditions."); + # Race condition can cause problem if using all sshs. + if($ssh_limit > 1) { $ssh_limit -= 1; } + } + return $ssh_limit; +} + +sub simultaneous_sshlogin($) { + # Using $sshlogin try to see if we can do $wanted_processes + # simultaneous logins + # (ssh host echo simul-login & ssh host echo simul-login & ...) | + # grep simul|wc -l + # Input: + # $wanted_processes = Try for this many logins in parallel + # Returns: + # $ssh_limit = Number of succesful parallel logins + local $/ = "\n"; + my $self = shift; + my $wanted_processes = shift; + my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; + # TODO sh -c wrapper to work for csh + my $cmd = ($sshdelay.$self->wrap("echo simultaneouslogin"). + "</dev/null 2>&1 &")x$wanted_processes; + ::debug("init","Trying $wanted_processes logins at ".$self->string()."\n"); + open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or + ::die_bug("simultaneouslogin"); + my $ssh_limit = <$simul_fh>; + close $simul_fh; + chomp $ssh_limit; + return $ssh_limit; +} + +sub set_ncpus($$) { + my $self = shift; + $self->{'ncpus'} = shift; +} + +sub user_requested_processes($) { + # Parse the number of processes that the user asked for using -j + # Input: + # $opt_P = string formatted as for -P + # Returns: + # $processes = the number of processes to run on this sshlogin + my $self = shift; + my $opt_P = shift; + my $processes; + if(defined $opt_P) { + if($opt_P =~ /^\+(\d+)$/) { + # E.g. -P +2 + my $j = $1; + $processes = + $self->ncpus() + $j; + } elsif ($opt_P =~ /^-(\d+)$/) { + # E.g. -P -2 + my $j = $1; + $processes = + $self->ncpus() - $j; + } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) { + # E.g. -P 10.5% + my $j = $1; + $processes = + $self->ncpus() * $j / 100; + } elsif ($opt_P =~ /^(\d+)$/) { + $processes = $1; + if($processes == 0) { + # -P 0 = infinity (or at least close) + $processes = $Global::infinity; + } + } elsif (-f $opt_P) { + $Global::max_procs_file = $opt_P; + if(open(my $in_fh, "<", $Global::max_procs_file)) { + my $opt_P_file = join("",<$in_fh>); + close $in_fh; + $processes = $self->user_requested_processes($opt_P_file); + } else { + ::error("Cannot open $opt_P."); + ::wait_and_exit(255); + } + } else { + ::error("Parsing of --jobs/-j/--max-procs/-P failed."); + ::die_usage(); + } + $processes = ::ceil($processes); + } + return $processes; +} + +sub ncpus($) { + # Number of CPU threads + # --use_sockets_instead_of_threads = count socket instead + # --use_cores_instead_of_threads = count physical cores instead + # Returns: + # $ncpus = number of cpu (threads) on this sshlogin + local $/ = "\n"; + my $self = shift; + if(not defined $self->{'ncpus'}) { + if($self->local()) { + if($opt::use_sockets_instead_of_threads) { + $self->{'ncpus'} = socket_core_thread()->{'sockets'}; + } elsif($opt::use_cores_instead_of_threads) { + $self->{'ncpus'} = socket_core_thread()->{'cores'}; + } else { + $self->{'ncpus'} = socket_core_thread()->{'threads'}; + } + } else { + my $ncpu; + $ENV{'SSHPASS'} = $self->{'password'}; + ::debug("init",("echo | ".$self->wrap("parallel --number-of-sockets"))); + if($opt::use_sockets_instead_of_threads + or + $opt::use_cpus_instead_of_cores) { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-sockets")); + } elsif($opt::use_cores_instead_of_threads) { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-cores")); + } else { + $ncpu = ::qqx("echo | ".$self->wrap("parallel --number-of-threads")); + } + chomp $ncpu; + if($ncpu =~ /^\s*[0-9]+\s*$/s) { + $self->{'ncpus'} = $ncpu; + } else { + ::warning("Could not figure out ". + "number of cpus on ".$self->string." ($ncpu). Using 1."); + $self->{'ncpus'} = 1; + } + } + } + return $self->{'ncpus'}; +} + + +sub nproc() { + # Returns: + # Number of threads using `nproc` + my $no_of_threads = ::qqx("nproc"); + chomp $no_of_threads; + return $no_of_threads; +} + +sub no_of_sockets() { + return socket_core_thread()->{'sockets'}; +} + +sub no_of_cores() { + return socket_core_thread()->{'cores'}; +} + +sub no_of_threads() { + return socket_core_thread()->{'threads'}; +} + +sub socket_core_thread() { + # Returns: + # { + # 'sockets' => #sockets = number of socket with CPU present + # 'cores' => #cores = number of physical cores + # 'threads' => #threads = number of compute cores (hyperthreading) + # 'active' => #taskset_threads = number of taskset limited cores + # } + my $cpu; + if ($^O eq 'linux') { + $cpu = sct_gnu_linux($cpu); + } elsif ($^O eq 'android') { + $cpu = sct_android($cpu); + } elsif ($^O eq 'freebsd') { + $cpu = sct_freebsd($cpu); + } elsif ($^O eq 'netbsd') { + $cpu = sct_netbsd($cpu); + } elsif ($^O eq 'openbsd') { + $cpu = sct_openbsd($cpu); + } elsif ($^O eq 'gnu') { + $cpu = sct_hurd($cpu); + } elsif ($^O eq 'darwin') { + $cpu = sct_darwin($cpu); + } elsif ($^O eq 'solaris') { + $cpu = sct_solaris($cpu); + } elsif ($^O eq 'aix') { + $cpu = sct_aix($cpu); + } elsif ($^O eq 'hpux') { + $cpu = sct_hpux($cpu); + } elsif ($^O eq 'nto') { + $cpu = sct_qnx($cpu); + } elsif ($^O eq 'svr5') { + $cpu = sct_openserver($cpu); + } elsif ($^O eq 'irix') { + $cpu = sct_irix($cpu); + } elsif ($^O eq 'dec_osf') { + $cpu = sct_tru64($cpu); + } else { + # Try all methods until we find something that works + $cpu = (sct_gnu_linux($cpu) + || sct_android($cpu) + || sct_freebsd($cpu) + || sct_netbsd($cpu) + || sct_openbsd($cpu) + || sct_hurd($cpu) + || sct_darwin($cpu) + || sct_solaris($cpu) + || sct_aix($cpu) + || sct_hpux($cpu) + || sct_qnx($cpu) + || sct_openserver($cpu) + || sct_irix($cpu) + || sct_tru64($cpu) + ); + } + if(not $cpu) { + # Fall back: Set all to nproc + my $nproc = nproc(); + if($nproc) { + $cpu->{'sockets'} = + $cpu->{'cores'} = + $cpu->{'threads'} = + $cpu->{'active'} = + $nproc; + } + } + if(not $cpu) { + ::warning("Cannot figure out number of cpus. Using 1."); + $cpu->{'sockets'} = + $cpu->{'cores'} = + $cpu->{'threads'} = + $cpu->{'active'} = + 1 + } + $cpu->{'sockets'} ||= 1; + $cpu->{'threads'} ||= $cpu->{'cores'}; + $cpu->{'active'} ||= $cpu->{'threads'}; + chomp($cpu->{'sockets'}, + $cpu->{'cores'}, + $cpu->{'threads'}, + $cpu->{'active'}); + # Choose minimum of active and actual + my $mincpu; + $mincpu->{'sockets'} = ::min($cpu->{'sockets'},$cpu->{'active'}); + $mincpu->{'cores'} = ::min($cpu->{'cores'},$cpu->{'active'}); + $mincpu->{'threads'} = ::min($cpu->{'threads'},$cpu->{'active'}); + return $mincpu; +} + +sub sct_gnu_linux($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + my $cpu = shift; + + sub read_topology($) { + my $prefix = shift; + my %sibiling; + my %socket; + my $thread; + for($thread = 0; + -r "$prefix/cpu$thread/topology/physical_package_id"; + $thread++) { + open(my $fh,"<", + "$prefix/cpu$thread/topology/physical_package_id") + || die; + $socket{<$fh>}++; + close $fh; + } + for($thread = 0; + -r "$prefix/cpu$thread/topology/thread_siblings"; + $thread++) { + open(my $fh,"<", + "$prefix/cpu$thread/topology/thread_siblings") + || die; + $sibiling{<$fh>}++; + close $fh; + } + $cpu->{'sockets'} = keys %socket; + $cpu->{'cores'} = keys %sibiling; + $cpu->{'threads'} = $thread; + } + + sub read_cpuinfo(@) { + my @cpuinfo = @_; + $cpu->{'sockets'} = 0; + $cpu->{'cores'} = 0; + $cpu->{'threads'} = 0; + my %seen; + my %phy_seen; + my $physicalid; + for(@cpuinfo) { + # physical id : 0 + if(/^physical id.*[:](.*)/) { + $physicalid = $1; + if(not $phy_seen{$1}++) { + $cpu->{'sockets'}++; + } + } + # core id : 3 + if(/^core id.*[:](.*)/ and not $seen{$physicalid,$1}++) { + $cpu->{'cores'}++; + } + # processor : 2 + /^processor.*[:]\s*\d/i and $cpu->{'threads'}++; + } + $cpu->{'cores'} ||= $cpu->{'threads'}; + $cpu->{'cpus'} ||= $cpu->{'threads'}; + $cpu->{'sockets'} ||= 1; + } + + sub read_lscpu(@) { + my @lscpu = @_; + my $threads_per_core; + my $cores_per_socket; + for(@lscpu) { + /^CPU.s.:\s*(\d+)/ and $cpu->{'threads'} = $1; + /^Thread.s. per core:\s*(\d+)/ and $threads_per_core = $1; + /^Core.s. per socket:\s*(\d+)/ and $cores_per_socket = $1; + /^(CPU )?Socket.s.:\s*(\d+)/i and $cpu->{'sockets'} = $2; + } + if($threads_per_core and $cpu->{'threads'}) { + $cpu->{'cores'} = $cpu->{'threads'} / $threads_per_core; + } + $cpu->{'cpus'} ||= $cpu->{'threads'}; + } + + local $/ = "\n"; # If delimiter is set, then $/ will be wrong + my @cpuinfo; + my @lscpu; + if($ENV{'PARALLEL_CPUINFO'}) { + # Use CPUINFO from environment - used for testing only + read_cpuinfo(split/(?<=\n)/,$ENV{'PARALLEL_CPUINFO'}); + } elsif($ENV{'PARALLEL_LSCPU'}) { + # Use LSCPU from environment - used for testing only + read_lscpu(split/(?<=\n)/,$ENV{'PARALLEL_LSCPU'}); + } elsif(-r "$ENV{'PARALLEL_CPUPREFIX'}/cpu0/topology/thread_siblings") { + # Use CPUPREFIX from environment - used for testing only + read_topology($ENV{'PARALLEL_CPUPREFIX'}); + } elsif($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) { + # Skip /proc/cpuinfo - already set + } else { + # Not debugging: Look at this computer + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + open(my $in_fh, "-|", "lscpu")) { + # Parse output from lscpu + read_lscpu(<$in_fh>); + close $in_fh; + } + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + -r "/sys/devices/system/cpu/cpu0/topology/thread_siblings") { + read_topology("/sys/devices/system/cpu"); + } + if(!($cpu->{'sockets'} and $cpu->{'cores'} and $cpu->{'threads'}) + and + open(my $in_fh, "<", "/proc/cpuinfo")) { + # Read /proc/cpuinfo + read_cpuinfo(<$in_fh>); + close $in_fh; + } + } + if(-e "/proc/self/status" and not $ENV{'PARALLEL_CPUINFO'}) { + # if 'taskset' is used to limit number of threads + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $cpu->{'active'} = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return $cpu; +} + +sub sct_android($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + # Use GNU/Linux + return sct_gnu_linux($_[0]); +} + +sub sct_freebsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + (::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }) + or + ::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' })); + $cpu->{'threads'} ||= + (::qqx(qq{ sysctl hw.ncpu | awk '{ print \$2 }' }) + or + ::qqx(qq{ sysctl -a dev.cpu | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' })); + return $cpu; +} + +sub sct_netbsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("sysctl -n hw.ncpu"); + return $cpu; +} + +sub sct_openbsd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx('sysctl -n hw.ncpu'); + return $cpu; +} + +sub sct_hurd($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("nproc"); + return $cpu; +} + +sub sct_darwin($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + (::qqx('sysctl -n hw.physicalcpu') + or + ::qqx(qq{ sysctl -a hw | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' })); + $cpu->{'threads'} ||= + (::qqx('sysctl -n hw.logicalcpu') + or + ::qqx(qq{ sysctl -a hw | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' })); + return $cpu; +} + +sub sct_solaris($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/bin/kstat") { + my @chip_id = ::qqx("/usr/bin/kstat cpu_info|grep chip_id"); + if($#chip_id >= 0) { + $cpu->{'sockets'} ||= $#chip_id +1; + } + my @core_id = ::qqx("/usr/bin/kstat -m cpu_info|grep -w core_id|uniq"); + if($#core_id >= 0) { + $cpu->{'cores'} ||= $#core_id +1; + } + } + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo -p"); + if($#psrinfo >= 0) { + $cpu->{'sockets'} ||= $psrinfo[0]; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = ::qqx("/usr/sbin/prtconf | grep cpu..instance"); + if($#prtconf >= 0) { + $cpu->{'cores'} ||= $#prtconf +1; + } + } + } + return $cpu; +} + +sub sct_aix($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/sbin/lscfg") { + if(open(my $in_fh, "-|", + "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")) { + $cpu->{'cores'} = <$in_fh>; + close $in_fh; + } + } + } + if(not $cpu->{'threads'}) { + if(-x "/usr/bin/vmstat") { + if(open(my $in_fh, "-|", "/usr/bin/vmstat 1 1")) { + while(<$in_fh>) { + /lcpu=([0-9]*) / and $cpu->{'threads'} = $1; + } + close $in_fh; + } + } + } + return $cpu; +} + +sub sct_hpux($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | grep 'Locality Domain Count' | awk '{ print \$4 }'}); + $cpu->{'threads'} ||= + ::qqx(qq{ /usr/bin/mpsched -s 2>&1 | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1"'}); + return $cpu; +} + +sub sct_qnx($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + # BUG: It is not known how to calculate this. + + return $cpu; +} + +sub sct_openserver($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + if(not $cpu->{'cores'}) { + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = ::qqx("/usr/sbin/psrinfo"); + if($#psrinfo >= 0) { + $cpu->{'cores'} = $#psrinfo +1; + } + } + } + $cpu->{'sockets'} ||= $cpu->{'cores'}; + return $cpu; +} + +sub sct_irix($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= + ::qqx(qq{ hinv | grep HZ | grep Processor | awk '{print \$1}' }); + return $cpu; +} + +sub sct_tru64($) { + # Returns: + # { 'sockets' => #sockets + # 'cores' => #cores + # 'threads' => #threads + # 'active' => #taskset_threads } + local $/ = "\n"; + my $cpu = shift; + $cpu->{'cores'} ||= ::qqx("sizer -pr"); + $cpu->{'sockets'} ||= $cpu->{'cores'}; + $cpu->{'threads'} ||= $cpu->{'cores'}; + + return $cpu; +} + +sub sshcommand($) { + # Returns: + # $sshcommand = the command (incl options) to run when using ssh + my $self = shift; + if (not defined $self->{'sshcommand'}) { + ::die_bug("sshcommand not set"); + } + return $self->{'sshcommand'}; +} + +sub local($) { + my $self = shift; + return $self->{'local'}; +} + +sub control_path_dir($) { + # Returns: + # $control_path_dir = dir of control path (for -M) + my $self = shift; + if(not defined $self->{'control_path_dir'}) { + $self->{'control_path_dir'} = + # Use $ENV{'TMPDIR'} as that is typically not + # NFS mounted + File::Temp::tempdir($ENV{'TMPDIR'} + . "/control_path_dir-XXXX", + CLEANUP => 1); + } + return $self->{'control_path_dir'}; +} + +sub rsync_transfer_cmd($) { + # Command to run to transfer a file + # Input: + # $file = filename of file to transfer + # $workdir = destination dir + # Returns: + # $cmd = rsync command to run to transfer $file ("" if unreadable) + my $self = shift; + my $file = shift; + my $workdir = shift; + if(not -r $file) { + ::warning($file. " is not readable and will not be transferred."); + return "true"; + } + my $rsync_destdir; + my $relpath = ($file !~ m:^/:) || ($file =~ m:/\./:); # Is the path relative or /./? + if($relpath) { + $rsync_destdir = ::shell_quote_file($workdir); + } else { + # rsync /foo/bar / + $rsync_destdir = "/"; + } + $file = ::shell_quote_file($file); + # Make dir if it does not exist + return($self->wrap("mkdir -p $rsync_destdir") . " && " . + $self->rsync()." $file ".$self->{'host'}.":$rsync_destdir"); +} + +{ + my $rsync_protocol; + + sub rsync($) { + sub rsync_protocol { + # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. + # If the version >= 3.1.0: downgrade to protocol 30 + # Returns: + # $rsync = "rsync" or "rsync --protocol 30" + if(not $rsync_protocol) { + my @out = `rsync --version`; + if(not @out) { + if(::which("rsync")) { + ::die_bug("'rsync --version' gave no output."); + } else { + ::error("'rsync' is not in \$PATH."); + ::wait_and_exit(255); + } + } + for (@out) { + # rsync version 3.1.3 protocol version 31 + # rsync version v3.2.3 protocol version 31 + if(/version v?(\d+.\d+)(.\d+)?/) { + if($1 >= 3.1) { + # Version 3.1.0 or later: Downgrade to protocol 30 + $rsync_protocol = "rsync --protocol 30"; + } else { + $rsync_protocol = "rsync"; + } + } + } + $rsync_protocol or + ::die_bug("Cannot figure out version of rsync: @out"); + } + return $rsync_protocol; + } + my $self = shift; + + return rsync_protocol()." ".$ENV{'PARALLEL_RSYNC_OPTS'}. + " -e".::Q($self->sshcmd()); + } +} + +sub cleanup_cmd($$$) { + # Command to run to remove the remote file + # Input: + # $file = filename to remove + # $workdir = destination dir + # Returns: + # $cmd = ssh command to run to remove $file and empty parent dirs + my $self = shift; + my $file = shift; + my $workdir = shift; + my $f = $file; + if($f =~ m:/\./:) { + # foo/bar/./baz/quux => workdir/baz/quux + # /foo/bar/./baz/quux => workdir/baz/quux + $f =~ s:.*/\./:$workdir/:; + } elsif($f =~ m:^[^/]:) { + # foo/bar => workdir/foo/bar + $f = $workdir."/".$f; + } + my @subdirs = split m:/:, ::dirname($f); + my @rmdir; + my $dir = ""; + for(@subdirs) { + $dir .= $_."/"; + unshift @rmdir, ::shell_quote_file($dir); + } + my $rmdir = @rmdir ? "rmdir @rmdir 2>/dev/null;" : ""; + if(defined $opt::workdir and $opt::workdir eq "...") { + $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; + } + my $rmf = "sh -c ". + ::Q("rm -f ".::shell_quote_file($f)." 2>/dev/null;".$rmdir); + return $self->wrap(::Q($rmf)); +} + +package JobQueue; + +sub new($) { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift; + my $max_number_of_args = shift; + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my $commandlinequeue = CommandLineQueue->new + ($commandref, $read_from, $context_replace, $max_number_of_args, + $transfer_files, $return_files, $template_names, $template_contents); + my @unget = (); + return bless { + 'unget' => \@unget, + 'commandlinequeue' => $commandlinequeue, + 'this_job_no' => 0, + 'total_jobs' => undef, + }, ref($class) || $class; +} + +sub get($) { + my $self = shift; + + $self->{'this_job_no'}++; + if(@{$self->{'unget'}}) { + my $job = shift @{$self->{'unget'}}; + # {%} may have changed, so flush computed values + $job && $job->flush_cache(); + return $job; + } else { + my $commandline = $self->{'commandlinequeue'}->get(); + if(defined $commandline) { + return Job->new($commandline); + } else { + $self->{'this_job_no'}--; + return undef; + } + } +} + +sub unget($) { + my $self = shift; + unshift @{$self->{'unget'}}, @_; + $self->{'this_job_no'} -= @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'commandlinequeue'}->empty(); + ::debug("run", "JobQueue->empty $empty "); + return $empty; +} + +sub total_jobs($) { + my $self = shift; + if(not defined $self->{'total_jobs'}) { + if($opt::pipe and not $opt::tee) { + ::error("--pipe is incompatible with --eta/--bar/--shuf"); + ::wait_and_exit(255); + } + if($opt::totaljobs) { + $self->{'total_jobs'} = $opt::totaljobs; + } elsif($opt::sqlworker) { + $self->{'total_jobs'} = $Global::sql->total_jobs(); + } else { + my $record; + my @arg_records; + my $record_queue = $self->{'commandlinequeue'}{'arg_queue'}; + my $start = time; + while($record = $record_queue->get()) { + push @arg_records, $record; + if(time - $start > 10) { + ::warning("Reading ".scalar(@arg_records). + " arguments took longer than 10 seconds."); + $opt::eta && ::warning("Consider removing --eta."); + $opt::bar && ::warning("Consider removing --bar."); + $opt::shuf && ::warning("Consider removing --shuf."); + last; + } + } + while($record = $record_queue->get()) { + push @arg_records, $record; + } + if($opt::shuf and @arg_records) { + my $i = @arg_records; + while (--$i) { + my $j = int rand($i+1); + @arg_records[$i,$j] = @arg_records[$j,$i]; + } + } + $record_queue->unget(@arg_records); + # $#arg_records = number of args - 1 + # We have read one @arg_record for this job (so add 1 more) + my $num_args = $#arg_records + 2; + # This jobs is not started so -1 + my $started_jobs = $self->{'this_job_no'} - 1; + my $max_args = ::max($Global::max_number_of_args,1); + $self->{'total_jobs'} = ::ceil($num_args / $max_args) + + $started_jobs; + ::debug("init","Total jobs: ".$self->{'total_jobs'}. + " ($num_args/$max_args + $started_jobs)\n"); + } + } + return $self->{'total_jobs'}; +} + +sub flush_total_jobs($) { + # Unset total_jobs to force recomputing + my $self = shift; + ::debug("init","flush Total jobs: "); + $self->{'total_jobs'} = undef; +} + +sub next_seq($) { + my $self = shift; + + return $self->{'commandlinequeue'}->seq(); +} + +sub quote_args($) { + my $self = shift; + return $self->{'commandlinequeue'}->quote_args(); +} + + +package Job; + +sub new($) { + my $class = shift; + my $commandlineref = shift; + return bless { + 'commandline' => $commandlineref, # CommandLine object + 'workdir' => undef, # --workdir + # filehandle for stdin (used for --pipe) + # filename for writing stdout to (used for --files) + # remaining data not sent to stdin (used for --pipe) + # tmpfiles to cleanup when job is done + 'unlink' => [], + # amount of data sent via stdin (used for --pipe) + 'transfersize' => 0, # size of files using --transfer + 'returnsize' => 0, # size of files using --return + 'pid' => undef, + # hash of { SSHLogins => number of times the command failed there } + 'failed' => undef, + 'sshlogin' => undef, + # The commandline wrapped with rsync and ssh + 'sshlogin_wrap' => undef, + 'exitstatus' => undef, + 'exitsignal' => undef, + # Timestamp for timeout if any + 'timeout' => undef, + 'virgin' => 1, + # Output used for SQL and CSV-output + 'output' => { 1 => [], 2 => [] }, + 'halfline' => { 1 => [], 2 => [] }, + }, ref($class) || $class; +} + +sub flush_cache($) { + my $self = shift; + $self->{'commandline'}->flush_cache(); +} + +sub replaced($) { + my $self = shift; + $self->{'commandline'} or ::die_bug("commandline empty"); + return $self->{'commandline'}->replaced(); +} + +{ + my $next_available_row; + + sub row($) { + my $self = shift; + if(not defined $self->{'row'}) { + if($opt::keeporder) { + $self->{'row'} = $self->seq(); + } else { + $self->{'row'} = ++$next_available_row; + } + } + return $self->{'row'}; + } +} + +sub seq($) { + my $self = shift; + return $self->{'commandline'}->seq(); +} + +sub set_seq($$) { + my $self = shift; + return $self->{'commandline'}->set_seq(shift); +} + +sub slot($) { + my $self = shift; + return $self->{'commandline'}->slot(); +} + +sub free_slot($) { + my $self = shift; + push @Global::slots, $self->slot(); +} + +{ + my($cattail); + + sub cattail() { + # Returns: + # $cattail = perl program for: + # cattail "decomp-prg" wpid [file_stdin] [file_to_unlink] + # decomp-prg = decompress program + # wpid = pid of writer program + # file_stdin = file_to_decompress + # file_to_unlink = unlink this file + if(not $cattail) { + $cattail = q{ + # cat followed by tail (possibly with rm as soon at the file is opened) + # If $writerpid dead: finish after this round + use Fcntl; + $|=1; + + my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV; + if($read_file) { + open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); + } else { + *IN = *STDIN; + } + while(! -s $comfile) { + # Writer has not opened the buffer file, so we cannot remove it yet + $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + # The writer and we have both opened the file, so it is safe to unlink it + unlink $unlink_file; + unlink $comfile; + + my $first_round = 1; + my $flags; + fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= O_NONBLOCK; # Add non-blocking to the flags + fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle + + while(1) { + # clear EOF + seek(IN,0,1); + my $writer_running = kill 0, $writerpid; + $read = sysread(IN,$buf,131072); + if($read) { + if($first_round) { + # Only start the command if there any input to process + $first_round = 0; + open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); + } + + # Blocking print + while($buf) { + my $bytes_written = syswrite(OUT,$buf); + # syswrite may be interrupted by SIGHUP + substr($buf,0,$bytes_written) = ""; + } + # Something printed: Wait less next time + $sleep /= 2; + } else { + if(eof(IN) and not $writer_running) { + # Writer dead: There will never be sent more to the decompressor + close OUT; + exit; + } + # TODO This could probably be done more efficiently using select(2) + # Nothing read: Wait longer before next read + # Up to 100 milliseconds + $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + } + + sub usleep { + # Sleep this many milliseconds. + my $secs = shift; + select(undef, undef, undef, $secs/1000); + } + }; + $cattail =~ s/#.*//mg; + $cattail =~ s/\s+/ /g; + } + return $cattail; + } +} + +sub openoutputfiles($) { + # Open files for STDOUT and STDERR + # Set file handles in $self->fh + my $self = shift; + my ($outfhw, $errfhw, $outname, $errname); + + if($opt::latestline) { + # Do not save to files: Use non-blocking pipe + my ($outfhr, $errfhr); + pipe($outfhr, $outfhw) || die; + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$outfhw); + $self->set_fh(1,'r',$outfhr); + $self->set_fh(2,'r',$outfhr); + # Make it possible to read non-blocking from the pipe + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + # Return immediately because we do not need setting filenames + return; + } elsif($Global::linebuffer and not + ($opt::keeporder or $opt::files or $opt::results or + $opt::compress or $opt::compress_program or + $opt::decompress_program)) { + # Do not save to files: Use non-blocking pipe + my ($outfhr, $errfhr); + pipe($outfhr, $outfhw) || die; + pipe($errfhr, $errfhw) || die; + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'r',$outfhr); + $self->set_fh(2,'r',$errfhr); + # Make it possible to read non-blocking from the pipe + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + # Return immediately because we do not need setting filenames + return; + } elsif($opt::results and not $Global::csvsep and not $Global::jsonout) { + # If --results, but not --results *.csv/*.tsv + my $out = $self->{'commandline'}->results_out(); + my $seqname; + if($out eq $opt::results or $out =~ m:/$:) { + # $opt::results = simple string or ending in / + # => $out is a dir/ + # prefix/name1/val1/name2/val2/seq + $seqname = $out."seq"; + # prefix/name1/val1/name2/val2/stdout + $outname = $out."stdout"; + # prefix/name1/val1/name2/val2/stderr + $errname = $out."stderr"; + } else { + # $opt::results = replacement string not ending in / + # => $out is a file + $outname = $out; + $errname = "$out.err"; + $seqname = "$out.seq"; + } + my $seqfhw; + if(not open($seqfhw, "+>", $seqname)) { + ::error("Cannot write to `$seqname'."); + ::wait_and_exit(255); + } + print $seqfhw $self->seq(); + close $seqfhw; + if(not open($outfhw, "+>", $outname)) { + ::error("Cannot write to `$outname'."); + ::wait_and_exit(255); + } + if(not open($errfhw, "+>", $errname)) { + ::error("Cannot write to `$errname'."); + ::wait_and_exit(255); + } + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",""); + if($opt::sqlworker) { + # Save the filenames in SQL table + $Global::sql->update("SET Stdout = ?, Stderr = ? ". + "WHERE Seq = ". $self->seq(), + $outname, $errname); + } + } elsif(not $opt::ungroup) { + # To group we create temporary files for STDOUT and STDERR + # To avoid the cleanup unlink the files immediately (but keep them open) + if($opt::files) { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + # --files => only remove stderr + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",$errname); + } else { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + } else { + # --ungroup + open($outfhw,">&",$Global::fh{1}) || die; + open($errfhw,">&",$Global::fh{2}) || die; + # File name must be empty as it will otherwise be printed + $outname = ""; + $errname = ""; + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + # Set writing FD + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'name',$outname); + $self->set_fh(2,'name',$errname); + if($opt::compress) { + $self->filter_through_compress(); + } elsif(not $opt::ungroup) { + $self->grouped(); + } + if($Global::linebuffer) { + # Make it possible to read non-blocking from + # the buffer files + # Used for --linebuffer with -k, --files, --res, --compress* + for my $fdno (1,2) { + ::set_fh_non_blocking($self->fh($fdno,'r')); + } + } +} + +sub print_verbose_dryrun($) { + # If -v set: print command to stdout (possibly buffered) + # This must be done before starting the command + my $self = shift; + if($Global::verbose or $opt::dryrun) { + my $fh = $self->fh(1,"w"); + if($Global::verbose <= 1) { + print $fh $self->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print $fh $self->wrapped(),"\n"; + } + } + if($opt::sqlworker) { + $Global::sql->update("SET Command = ? WHERE Seq = ".$self->seq(), + $self->replaced()); + } +} + +sub add_rm($) { + # Files to remove when job is done + my $self = shift; + push @{$self->{'unlink'}}, @_; +} + +sub get_rm($) { + # Files to remove when job is done + my $self = shift; + return @{$self->{'unlink'}}; +} + +sub cleanup($) { + # Remove files when job is done + my $self = shift; + unlink $self->get_rm(); + delete @Global::unlink{$self->get_rm()}; +} + +sub grouped($) { + my $self = shift; + # Set reading FD if using --group (--ungroup does not need) + for my $fdno (1,2) { + # Re-open the file for reading + # so fdw can be closed seperately + # and fdr can be seeked seperately (for --line-buffer) + open(my $fdr,"<", $self->fh($fdno,'name')) || + ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name')); + $self->set_fh($fdno,'r',$fdr); + # Unlink if not debugging + $Global::debug or ::rm($self->fh($fdno,"unlink")); + } +} + +sub empty_input_wrapper($) { + # If no input: exit(0) + # If some input: Pass input as input to command on STDIN + # This avoids starting the command if there is no input. + # Input: + # $command = command to pipe data to + # Returns: + # $wrapped_command = the wrapped command + my $command = shift; + # The optimal block size differs + # It has been measured on: + # AMD 6376: 59000 + # <big ppar --pipe --block 100M --test $1 -j1 'cat >/dev/null'; + my $script = + ::spacefree(0,q{ + if(sysread(STDIN, $buf, 1)) { + open($fh, "|-", @ARGV) || die; + syswrite($fh, $buf); + while($read = sysread(STDIN, $buf, 59000)) { + syswrite($fh, $buf); + } + close $fh; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + } + }); + ::debug("run",'Empty wrap: perl -e '.::Q($script)."\n"); + if($Global::cshell + and + length $command > 499) { + # csh does not like words longer than 1000 (499 quoted) + # $command = "perl -e '".base64_zip_eval()."' ". + # join" ",string_zip_base64( + # 'exec "'.::perl_quote_scalar($command).'"'); + return 'perl -e '.::Q($script)." ". + base64_wrap("exec \"$Global::shell\",'-c',\"". + ::perl_quote_scalar($command).'"'); + } else { + return 'perl -e '.::Q($script)." ". + $Global::shell." -c ".::Q($command); + } +} + +sub filter_through_compress($) { + my $self = shift; + # Send stdout to stdin for $opt::compress_program(1) + # Send stderr to stdin for $opt::compress_program(2) + # cattail get pid: $pid = $self->fh($fdno,'rpid'); + my $cattail = cattail(); + + for my $fdno (1,2) { + # Make a communication file. + my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac"); + close $fh; + # Compressor: (echo > $comfile; compress pipe) > output + # When the echo is written to $comfile, + # it is known that output file is opened, + # thus output file can then be removed by the decompressor. + # empty_input_wrapper is needed for plzip + my $wpid = open(my $fdw,"|-", "(echo > $comfile; ". + empty_input_wrapper($opt::compress_program).") >". + ::Q($self->fh($fdno,'name'))) || die $?; + $self->set_fh($fdno,'w',$fdw); + $self->set_fh($fdno,'wpid',$wpid); + # Decompressor: open output; -s $comfile > 0: rm $comfile output; + # decompress output > stdout + my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile, + $opt::decompress_program, $wpid, + $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) + || die $?; + $self->set_fh($fdno,'r',$fdr); + $self->set_fh($fdno,'rpid',$rpid); + } +} + +sub set_fh($$$$) { + # Set file handle + my ($self, $fd_no, $key, $fh) = @_; + $self->{'fd'}{$fd_no,$key} = $fh; +} + +sub fh($) { + # Get file handle + my ($self, $fd_no, $key) = @_; + return $self->{'fd'}{$fd_no,$key}; +} + +sub write_block($) { + my $self = shift; + my $stdin_fh = $self->fh(0,"w"); + if(fork()) { + # Close in parent + close $stdin_fh; + } else { + # If writing is to a closed pipe: + # Do not call signal handler, but let nothing be written + local $SIG{PIPE} = undef; + + for my $part ( + grep { defined $_ } + $self->{'header'},$self->{'block'}) { + # syswrite may not write all in one go, + # so make sure everything is written. + my $written; + while($written = syswrite($stdin_fh,$$part)) { + substr($$part,0,$written) = ""; + } + } + close $stdin_fh; + exit(0); + } +} + +sub write($) { + my $self = shift; + my $remaining_ref = shift; + my $stdin_fh = $self->fh(0,"w"); + + my $len = length $$remaining_ref; + # syswrite may not write all in one go, + # so make sure everything is written. + my $written; + + # If writing is to a closed pipe: + # Do not call signal handler, but let nothing be written + local $SIG{PIPE} = undef; + while($written = syswrite($stdin_fh,$$remaining_ref)){ + substr($$remaining_ref,0,$written) = ""; + } +} + +sub set_block($$$$$$) { + # Copy stdin buffer from $block_ref up to $endpos + # Prepend with $header_ref if virgin (i.e. not --roundrobin) + # Remove $recstart and $recend if needed + # Input: + # $header_ref = ref to $header to prepend + # $buffer_ref = ref to $buffer containing the block + # $endpos = length of $block to pass on + # $recstart = --recstart regexp + # $recend = --recend regexp + # Returns: + # N/A + my $self = shift; + my ($header_ref,$buffer_ref,$endpos,$recstart,$recend) = @_; + $self->{'header'} = $header_ref; + if($opt::roundrobin or $opt::remove_rec_sep or defined $opt::retries) { + my $a = ""; + if(($opt::roundrobin or defined $opt::retries) and $self->virgin()) { + $a .= $$header_ref; + } + # Job is no longer virgin + $self->set_virgin(0); + # Make a full copy because $buffer will change + $a .= substr($$buffer_ref,0,$endpos); + $self->{'block'} = \$a; + if($opt::remove_rec_sep) { + remove_rec_sep($self->{'block'},$recstart,$recend); + } + $self->{'block_length'} = length ${$self->{'block'}}; + } else { + $self->set_virgin(0); + for(substr($$buffer_ref,0,$endpos)) { + $self->{'block'} = \$_; + } + $self->{'block_length'} = $endpos + length ${$self->{'header'}}; + } + $self->{'block_pos'} = 0; + $self->add_transfersize($self->{'block_length'}); +} + +sub block_ref($) { + my $self = shift; + return $self->{'block'}; +} + +sub block_length($) { + my $self = shift; + return $self->{'block_length'}; +} + +sub remove_rec_sep($) { + # Remove --recstart and --recend from $block + # Input: + # $block_ref = reference to $block to be modified + # $recstart = --recstart + # $recend = --recend + # Uses: + # $opt::regexp = Are --recstart/--recend regexp? + # Returns: + # N/A + my ($block_ref,$recstart,$recend) = @_; + # Remove record separator + if($opt::regexp) { + $$block_ref =~ s/$recend$recstart//gom; + $$block_ref =~ s/^$recstart//os; + $$block_ref =~ s/$recend$//os; + } else { + $$block_ref =~ s/\Q$recend$recstart\E//gom; + $$block_ref =~ s/^\Q$recstart\E//os; + $$block_ref =~ s/\Q$recend\E$//os; + } +} + +sub non_blocking_write($) { + my $self = shift; + my $something_written = 0; + + my $in = $self->fh(0,"w"); + my $rv = syswrite($in, + substr(${$self->{'block'}},$self->{'block_pos'})); + if (!defined($rv) && $! == ::EAGAIN()) { + # would block - but would have written + $something_written = 0; + # avoid triggering auto expanding block size + $Global::no_autoexpand_block ||= 1; + } elsif ($self->{'block_pos'}+$rv != $self->{'block_length'}) { + # incomplete write + # Remove the written part + $self->{'block_pos'} += $rv; + $something_written = $rv; + } else { + # successfully wrote everything + # Empty block to free memory + my $a = ""; + $self->set_block(\$a,\$a,0,"",""); + $something_written = $rv; + } + ::debug("pipe", "Non-block: ", $something_written); + return $something_written; +} + + +sub virgin($) { + my $self = shift; + return $self->{'virgin'}; +} + +sub set_virgin($$) { + my $self = shift; + $self->{'virgin'} = shift; +} + +sub pid($) { + my $self = shift; + return $self->{'pid'}; +} + +sub set_pid($$) { + my $self = shift; + $self->{'pid'} = shift; +} + +sub starttime($) { + # Returns: + # UNIX-timestamp this job started + my $self = shift; + return sprintf("%.3f",$self->{'starttime'}); +} + +sub set_starttime($@) { + my $self = shift; + my $starttime = shift || ::now(); + $self->{'starttime'} = $starttime; + $opt::sqlworker and + $Global::sql->update("SET Starttime = ? WHERE Seq = ".$self->seq(), + $starttime); +} + +sub runtime($) { + # Returns: + # Run time in seconds with 3 decimals + my $self = shift; + return sprintf("%.3f", + int(($self->endtime() - $self->starttime())*1000)/1000); +} + +sub endtime($) { + # Returns: + # UNIX-timestamp this job ended + # 0 if not ended yet + my $self = shift; + return ($self->{'endtime'} || 0); +} + +sub set_endtime($$) { + my $self = shift; + my $endtime = shift; + $self->{'endtime'} = $endtime; + $opt::sqlworker and + $Global::sql->update("SET JobRuntime = ? WHERE Seq = ".$self->seq(), + $self->runtime()); +} + +sub is_timedout($) { + # Is the job timedout? + # Input: + # $delta_time = time that the job may run + # Returns: + # True or false + my $self = shift; + my $delta_time = shift; + return time > $self->{'starttime'} + $delta_time; +} + +sub kill($) { + my $self = shift; + $self->set_exitstatus(-1); + ::kill_sleep_seq($self->pid()); +} + +sub suspend($) { + my $self = shift; + my @pgrps = map { -$_ } $self->pid(); + kill "STOP", @pgrps; + $self->set_suspended(1); +} + +sub set_suspended($$) { + my $self = shift; + $self->{'suspended'} = shift; +} + +sub suspended($) { + my $self = shift; + return $self->{'suspended'}; +} + +sub resume($) { + my $self = shift; + my @pgrps = map { -$_ } $self->pid(); + kill "CONT", @pgrps; + $self->set_suspended(0); +} + +sub failed($) { + # return number of times failed for this $sshlogin + # Input: + # $sshlogin + # Returns: + # Number of times failed for $sshlogin + my $self = shift; + my $sshlogin = shift; + return $self->{'failed'}{$sshlogin}; +} + +sub failed_here($) { + # return number of times failed for the current $sshlogin + # Returns: + # Number of times failed for this sshlogin + my $self = shift; + return $self->{'failed'}{$self->sshlogin()}; +} + +sub add_failed($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + $self->{'failed'}{$sshlogin}++; +} + +sub add_failed_here($) { + # increase the number of times failed for the current $sshlogin + my $self = shift; + $self->{'failed'}{$self->sshlogin()}++; +} + +sub reset_failed($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + delete $self->{'failed'}{$sshlogin}; +} + +sub reset_failed_here($) { + # increase the number of times failed for this $sshlogin + my $self = shift; + delete $self->{'failed'}{$self->sshlogin()}; +} + +sub min_failed($) { + # Returns: + # the number of sshlogins this command has failed on + # the minimal number of times this command has failed + my $self = shift; + my $min_failures = + ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); + my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; + return ($number_of_sshlogins_failed_on,$min_failures); +} + +sub total_failed($) { + # Returns: + # $total_failures = the number of times this command has failed + my $self = shift; + my $total_failures = 0; + for (values %{$self->{'failed'}}) { + $total_failures += $_; + } + return $total_failures; +} + +{ + my $script; + + sub postpone_exit_and_cleanup { + # Command to remove files and dirs (given as args) without + # affecting the exit value in $?/$status. + if(not $script) { + $script = "perl -e '". + ::spacefree(0,q{ + $bash=shift; + $csh=shift; + for(@ARGV){ + unlink; + rmdir; + } + if($bash=~s/(\d+)h/$1/) { + exit $bash; + } + exit $csh; + }). + # `echo \$?h` is needed to make fish not complain + "' ".'"`echo \\\\\\\\\$?h`" "$status" '; + } + return $script + } +} + +{ + my $script; + + sub fifo_wrap() { + # Script to create a fifo, run a command on the fifo + # while copying STDIN to the fifo, and finally + # remove the fifo and return the exit code of the command. + if(not $script) { + # {} == $PARALLEL_TMP for --fifo + # To make it csh compatible a wrapper needs to: + # * mkfifo + # * spawn $command & + # * cat > fifo + # * waitpid to get the exit code from $command + # * be less than 1000 chars long + + # The optimal block size differs + # It has been measured on: + # AMD 6376: 4095 + # ppar -a big --pipepart --block -1 --test $1 --fifo 'cat {} >/dev/null'; + $script = "perl -e '". + (::spacefree + (0, q{ + ($s,$c,$f) = @ARGV; + # mkfifo $PARALLEL_TMP + system "mkfifo", $f; + # spawn $shell -c $command & + $pid = fork || exec $s, "-c", $c; + open($o,">",$f) || die $!; + # cat > $PARALLEL_TMP + while(sysread(STDIN,$buf,4095)){ + syswrite $o, $buf; + } + close $o; + # waitpid to get the exit code from $command + waitpid $pid,0; + # Cleanup + unlink $f; + exit $?/256; + }))."'"; + } + return $script; + } +} + +sub wrapped($) { + # Wrap command with: + # * --shellquote + # * --nice + # * --cat + # * --fifo + # * --sshlogin + # * --pipepart (@Global::cat_prepends) + # * --tee (@Global::cat_prepends) + # * --pipe + # * --tmux + # The ordering of the wrapping is important: + # * --nice/--cat/--fifo should be done on the remote machine + # * --pipepart/--pipe should be done on the local machine inside --tmux + # Uses: + # @opt::shellquote + # $opt::nice + # $Global::shell + # $opt::cat + # $opt::fifo + # @Global::cat_prepends + # $opt::pipe + # $opt::tmux + # Returns: + # $self->{'wrapped'} = the command wrapped with the above + my $self = shift; + if(not defined $self->{'wrapped'}) { + my $command = $self->replaced(); + # Bug in Bash and Ksh when running multiline aliases + # This will force them to run correctly, but will fail in + # tcsh so we do not do it. + # $command .= "\n\n"; + if(@opt::shellquote) { + # Quote one time for each --shellquote + my $c = $command; + for(@opt::shellquote) { + $c = ::Q($c); + } + # Prepend "echo" (it is written in perl because + # quoting '-e' causes problem in some versions and + # csh's version does something wrong) + $command = q(perl -e '$,=" "; print "@ARGV\n";' -- ) . ::Q($c); + } + if($Global::parallel_env) { + # If $PARALLEL_ENV set, put that in front of the command + # Used for env_parallel.* + if($Global::shell =~ /zsh/) { + # The extra 'eval' will make aliases work, too + $command = $Global::parallel_env."\n". + "eval ".::Q($command); + } else { + $command = $Global::parallel_env."\n".$command; + } + } + if($opt::cat) { + # In '--cat' and '--fifo' {} == $PARALLEL_TMP. + # This is to make it possible to compute $PARALLEL_TMP on + # the fly when running remotely. + # $ENV{PARALLEL_TMP} is set in the remote wrapper before + # the command is run. + # + # Prepend 'cat > $PARALLEL_TMP;' + # Append 'unlink $PARALLEL_TMP without affecting $?' + $command = + 'cat > $PARALLEL_TMP;'. + $command.";". postpone_exit_and_cleanup(). + '$PARALLEL_TMP'; + } elsif($opt::fifo) { + # Prepend fifo-wrapper. In essence: + # mkfifo {} + # ( $command ) & + # # $command must read {}, otherwise this 'cat' will block + # cat > {}; + # wait; rm {} + # without affecting $? + $command = fifo_wrap(). " ". + $Global::shell. " ". ::Q($command). ' $PARALLEL_TMP'. ';'; + } + # Wrap with ssh + tranferring of files + $command = $self->sshlogin_wrap($command); + if(@Global::cat_prepends) { + # --pipepart: prepend: + # < /tmp/foo perl -e 'while(@ARGV) { + # sysseek(STDIN,shift,0) || die; $left = shift; + # while($read = sysread(STDIN,$buf, ($left > 60800 ? 60800 : $left))){ + # $left -= $read; syswrite(STDOUT,$buf); + # } + # }' 0 0 0 11 | + # + # --pipepart --tee: prepend: + # < dash-a-file + # + # --pipe --tee: wrap: + # (rm fifo; ... ) < fifo + # + # --pipe --shard X: + # (rm fifo; ... ) < fifo + $command = (shift @Global::cat_prepends). "($command)". + (shift @Global::cat_appends); + } elsif($opt::pipe and not $opt::roundrobin) { + # Wrap with EOF-detector to avoid starting $command if EOF. + $command = empty_input_wrapper($command); + } + if($opt::tmux) { + # Wrap command with 'tmux' + $command = $self->tmux_wrap($command); + } + if($Global::cshell + and + length $command > 499) { + # csh does not like words longer than 1000 (499 quoted) + # $command = "perl -e '".base64_zip_eval()."' ". + # join" ",string_zip_base64( + # 'exec "'.::perl_quote_scalar($command).'"'); + $command = base64_wrap("exec \"$Global::shell\",'-c',\"". + ::perl_quote_scalar($command).'"'); + } + $self->{'wrapped'} = $command; + } + return $self->{'wrapped'}; +} + +sub set_sshlogin($$) { + my $self = shift; + my $sshlogin = shift; + $self->{'sshlogin'} = $sshlogin; + delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong + delete $self->{'wrapped'}; + + if($opt::sqlworker) { + # Identify worker as --sqlworker often runs on different machines + # If local: Use hostname + my $host = $sshlogin->local() ? ::hostname() : $sshlogin->host(); + $Global::sql->update("SET Host = ? WHERE Seq = ".$self->seq(), $host); + } +} + +sub sshlogin($) { + my $self = shift; + return $self->{'sshlogin'}; +} + +sub string_base64($) { + # Base64 encode strings into 1000 byte blocks. + # 1000 bytes is the largest word size csh supports + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + my @base64 = unpack("(A1000)*",encode_base64((join"",@_),"")); + return @base64; +} + +sub string_zip_base64($) { + # Pipe string through 'bzip2 -9' and base64 encode it into 1000 + # byte blocks. + # 1000 bytes is the largest word size csh supports + # Zipping will make exporting big environments work, too + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + my($zipin_fh, $zipout_fh,@base64); + ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9"); + if(fork) { + close $zipin_fh; + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + # Split base64 encoded into 1000 byte blocks + @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),"")); + close $zipout_fh; + } else { + close $zipout_fh; + print $zipin_fh @_; + close $zipin_fh; + exit; + } + ::debug("base64","Orig:@_\nAs bzip2 base64:@base64\n"); + return @base64; +} + +sub base64_zip_eval() { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * pipes through 'bzip2 -dc' + # * evals the result + # Reverse of string_zip_base64 + eval + # Will be wrapped in ' so single quote is forbidden + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel = split /_/, "use_IPC::Open3;_use_MIME::Base64"; + eval"@GNU_Parallel"; + $chld = $SIG{CHLD}; + $SIG{CHLD} = "IGNORE"; + # Search for bzip2. Not found => use default path + my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2"; + # $in = stdin on $zip, $out = stdout from $zip + # Forget my() to save chars for csh + # my($in, $out,$eval); + open3($in,$out,">&STDERR",$zip,"-dc"); + if(my $perlpid = fork) { + close $in; + $eval = join "", <$out>; + close $out; + } else { + close $out; + # Pipe decoded base64 into 'bzip2 -dc' + print $in (decode_base64(join"",@ARGV)); + close $in; + exit; + } + wait; + $SIG{CHLD} = $chld; + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub base64_wrap($) { + # base64 encode Perl code + # Split it into chunks of < 1000 bytes + # Prepend it with a decoder that eval's it + # Input: + # $eval_string = Perl code to run + # Returns: + # $shell_command = shell command that runs $eval_string + my $eval_string = shift; + return + "perl -e ". + ::Q(base64_zip_eval())." ". + join" ",::shell_quote(string_zip_base64($eval_string)); +} + +sub base64_eval($) { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * evals the result + # Reverse of string_base64 + eval + # Will be wrapped in ' so single quote is forbidden. + # Spaces are stripped so spaces cannot be significant. + # The funny 'use IPC::Open3'-syntax is to avoid spaces and + # to make it clear that this is a GNU Parallel command + # when looking at the process table. + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); + eval "@GNU_Parallel"; + my $eval = decode_base64(join"",@ARGV); + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub sshlogin_wrap($) { + # Wrap the command with the commands needed to run remotely + # Input: + # $command = command to run + # Returns: + # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands + sub monitor_parent_sshd_script { + # This script is to solve the problem of + # * not mixing STDERR and STDOUT + # * terminating with ctrl-c + # If its parent is ssh: all good + # If its parent is init(1): ssh died, so kill children + my $monitor_parent_sshd_script; + + if(not $monitor_parent_sshd_script) { + $monitor_parent_sshd_script = + # This will be packed in ', so only use " + ::spacefree + (0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'. + '$tmpdir = $ENV{"TMPDIR"} || "'. + ::perl_quote_scalar($ENV{'PARALLEL_REMOTE_TMPDIR'}).'";'. + '$nice = '.$opt::nice.';'. + '$termseq = "'.$opt::termseq.'";'. + # } + q{ + # Check that $tmpdir is writable + -w $tmpdir || + die("$tmpdir\040is\040not\040writable.". + "\040Set\040PARALLEL_REMOTE_TMPDIR"); + # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR + do { + $ENV{PARALLEL_TMP} = $tmpdir."/par". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $ENV{PARALLEL_TMP}); + # Set $script to a non-existent file name in $TMPDIR + do { + $script = $tmpdir."/par-job-$ENV{PARALLEL_SEQ}_". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $script); + # Create a script from the hex code + # that removes itself and runs the commands + open($fh,">",$script) || die; + # \040 = space - but we remove spaces in the script + # ' needed due to rc-shell + print($fh("rm\040\'$script\'\n",$bashfunc.$cmd)); + close $fh; + my $parent = getppid; + my $done = 0; + $SIG{CHLD} = sub { $done = 1; }; + $pid = fork; + unless($pid) { + # Make own process group to be able to kill HUP it later + eval { setpgrp }; + # Set nice value + eval { setpriority(0,0,$nice) }; + # Run the script + exec($shell,$script); + die("exec\040failed: $!"); + } + while((not $done) and (getppid == $parent)) { + # Parent pid is not changed, so sshd is alive + # Exponential sleep up to 1 sec + $s = $s < 1 ? 0.001 + $s * 1.03 : $s; + select(undef, undef, undef, $s); + } + if(not $done) { + # sshd is dead: User pressed Ctrl-C + # Kill as per --termseq + my @term_seq = split/,/,$termseq; + if(not @term_seq) { + @term_seq = ("TERM",200,"TERM",100,"TERM",50,"KILL",25); + } + while(@term_seq && kill(0,-$pid)) { + kill(shift @term_seq, -$pid); + select(undef, undef, undef, (shift @term_seq)/1000); + } + } + wait; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + }); + } + return $monitor_parent_sshd_script; + } + + sub vars_to_export { + # Uses: + # @opt::env + my @vars = ("parallel_bash_environment"); + for my $varstring (@opt::env) { + # Split up --env VAR1,VAR2 + push @vars, split /,/, $varstring; + } + for (@vars) { + if(-r $_ and not -d) { + # Read as environment definition bug #44041 + # TODO parse this + my $fh = ::open_or_exit($_); + $Global::envdef = join("",<$fh>); + close $fh; + } + } + if(grep { /^_$/ } @vars) { + local $/ = "\n"; + # --env _ + # Include all vars that are not in a clean environment + if(open(my $vars_fh, "<", $Global::config_dir . "/ignored_vars")) { + my @ignore = <$vars_fh>; + chomp @ignore; + my %ignore; + @ignore{@ignore} = @ignore; + close $vars_fh; + push @vars, grep { not defined $ignore{$_} } keys %ENV; + @vars = grep { not /^_$/ } @vars; + } else { + ::error("Run '$Global::progname --record-env' ". + "in a clean environment first."); + ::wait_and_exit(255); + } + } + # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2) + # So --env myfunc should look for BASH_FUNC_myfunc() and BASH_FUNC_myfunc%% + + push(@vars, "PARALLEL_PID", "PARALLEL_SEQ", + "PARALLEL_SSHLOGIN", "PARALLEL_SSHHOST", + "PARALLEL_HOSTGROUPS", "PARALLEL_ARGHOSTGROUPS", + "PARALLEL_JOBSLOT", $opt::process_slot_var, + map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars); + # Keep only defined variables + return grep { defined($ENV{$_}) } @vars; + } + + sub env_as_eval { + # Returns: + # $eval = '$ENV{"..."}=...; ...' + my @vars = vars_to_export(); + my $csh_friendly = not grep { /\n/ } @ENV{@vars}; + my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; + my @non_functions = (grep { !/PARALLEL_ENV/ } + grep { substr($ENV{$_},0,4) ne "() {" } @vars); + + # eval of @envset will set %ENV + my $envset = join"", map { + '$ENV{"'.::perl_quote_scalar($_).'"}="'. + ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions; + + # running @bashfunc on the command line, will set the functions + my @bashfunc = map { + my $v=$_; + s/BASH_FUNC_(.*)(\(\)|%%)/$1/; + "$_$ENV{$v};\nexport -f $_ 2> /dev/null;\n" } @bash_functions; + # eval $bashfuncset will set $bashfunc + my $bashfuncset; + if(@bashfunc) { + # Functions are not supported for all shells + if($Global::shell !~ m:(^|/)(ash|bash|rbash|zsh|rzsh|dash|ksh):) { + ::warning("Shell functions may not be supported in $Global::shell."); + } + $bashfuncset = + '@bash_functions=qw('."@bash_functions".");". + ::spacefree(1,'$shell="'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.q{ + if($shell=~/csh/) { + print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n"; + exec "false"; + } + }). + "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";'; + } else { + $bashfuncset = '$bashfunc = "";' + } + if($ENV{'parallel_bash_environment'}) { + $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";'; + } + ::debug("base64",$envset,$bashfuncset,"\n"); + return $csh_friendly,$envset,$bashfuncset; + } + + my $self = shift; + my $command = shift; + # TODO test that *sh -c 'parallel --env' use *sh + if(not defined $self->{'sshlogin_wrap'}{$command}) { + my $sshlogin = $self->sshlogin(); + $ENV{'PARALLEL_SEQ'} = $self->seq(); + $ENV{$opt::process_slot_var} = -1 + + ($ENV{'PARALLEL_JOBSLOT'} = $self->slot()); + $ENV{'PARALLEL_SSHLOGIN'} = $sshlogin->string(); + $ENV{'PARALLEL_SSHHOST'} = $sshlogin->host(); + if ($opt::hostgroups) { + $ENV{'PARALLEL_HOSTGROUPS'} = join '+', $sshlogin->hostgroups(); + $ENV{'PARALLEL_ARGHOSTGROUPS'} = join '+', $self->hostgroups(); + } + $ENV{'PARALLEL_PID'} = $$; + if($sshlogin->local()) { + if($opt::workdir) { + # Create workdir if needed. Then cd to it. + my $wd = $self->workdir(); + if($opt::workdir eq "." or $opt::workdir eq "...") { + # If $wd does not start with '/': Prepend $HOME + $wd =~ s:^([^/]):$ENV{'HOME'}/$1:; + } + ::mkdir_or_die($wd); + my $post = ""; + if($opt::workdir eq "...") { + $post = ";".exitstatuswrapper("rm -rf ".::Q($wd).";"); + + } + $command = "cd ".::Q($wd)." || exit 255; " . + $command . $post;; + } + if(@opt::env) { + # Prepend with environment setter, which sets functions in zsh + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $perl_code = $envset.$bashfuncset. + '@ARGV="'.::perl_quote_scalar($command).'";'. + "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;"; + if(length $perl_code > 999 + or + not $csh_friendly + or + $command =~ /\n/) { + # csh does not deal well with > 1000 chars in one word + # csh does not deal well with $ENV with \n + $self->{'sshlogin_wrap'}{$command} = base64_wrap($perl_code); + } else { + $self->{'sshlogin_wrap'}{$command} = "perl -e ".::Q($perl_code); + } + } else { + $self->{'sshlogin_wrap'}{$command} = $command; + } + } else { + my $pwd = ""; + if($opt::workdir) { + # Create remote workdir if needed. Then cd to it. + my $wd = ::pQ($self->workdir()); + $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}. + qq{print(STDERR "parallel: Cannot chdir to $wd\\n") &&}. + qq{exit 255;}; + } + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $cmd = $command; + # q// does not quote \, so we must do that + $cmd =~ s/\\/\\\\/g; + + my $remote_command = $sshlogin->hexwrap + ($pwd.$envset.$bashfuncset.'$cmd='."q\0".$cmd."\0;". + monitor_parent_sshd_script()); + my ($pre,$post,$cleanup)=("","",""); + # --transfer + $pre .= $self->sshtransfer(); + # --return + $post .= $self->sshreturn(); + # --cleanup + $post .= $self->sshcleanup(); + if($post) { + # We need to save the exit status of the job + $post = exitstatuswrapper($post); + } + $self->{'sshlogin_wrap'}{$command} = + ($pre + . $sshlogin->wrap($remote_command) + . ";" + . $post); + } + } + return $self->{'sshlogin_wrap'}{$command}; +} + +sub fill_templates($) { + # Replace replacement strings in template(s) + # Returns: + # @templates - File names of replaced templates + my $self = shift; + + if(%opt::template) { + my @template_name = + map { $self->{'commandline'}->replace_placeholders([$_],0,0) } + @{$self->{'commandline'}{'template_names'}}; + ::debug("tmpl","Names: @template_name\n"); + for(my $i = 0; $i <= $#template_name; $i++) { + open(my $fh, ">", $template_name[$i]) || die; + print $fh $self->{'commandline'}-> + replace_placeholders([$self->{'commandline'} + {'template_contents'}[$i]],0,0); + close $fh; + } + if($opt::cleanup) { + $self->add_rm(@template_name); + } + } +} + +sub filter($) { + # Replace replacement strings in filter(s) and evaluate them + # Returns: + # $run - 1=yes, undef=no + my $self = shift; + my $run = 1; + if(@opt::filter) { + for my $eval ($self->{'commandline'}-> + replace_placeholders(\@opt::filter,0,0)) { + $run &&= eval $eval; + } + $self->{'commandline'}{'skip'} ||= not $run; + } + return $run; +} + +sub transfer($) { + # Files to transfer + # Non-quoted and with {...} substituted + # Returns: + # @transfer - File names of files to transfer + my $self = shift; + + my $transfersize = 0; + my @transfer = $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'transfer_files'},0,0); + for(@transfer) { + # filesize + if(-e $_) { + $transfersize += (stat($_))[7]; + } + } + $self->add_transfersize($transfersize); + return @transfer; +} + +sub transfersize($) { + my $self = shift; + return $self->{'transfersize'}; +} + +sub add_transfersize($) { + my $self = shift; + my $transfersize = shift; + $self->{'transfersize'} += $transfersize; + $opt::sqlworker and + $Global::sql->update("SET Send = ? WHERE Seq = ".$self->seq(), + $self->{'transfersize'}); +} + +sub sshtransfer($) { + # Returns for each transfer file: + # rsync $file remote:$workdir + my $self = shift; + my @pre; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + for my $file ($self->transfer()) { + push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; + } + return join("",@pre); +} + +sub return($) { + # Files to return + # Non-quoted and with {...} substituted + # Returns: + # @non_quoted_filenames + my $self = shift; + return $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'return_files'},0,0); +} + +sub returnsize($) { + # This is called after the job has finished + # Returns: + # $number_of_bytes transferred in return + my $self = shift; + for my $file ($self->return()) { + if(-e $file) { + $self->{'returnsize'} += (stat($file))[7]; + } + } + return $self->{'returnsize'}; +} + +sub add_returnsize($) { + my $self = shift; + my $returnsize = shift; + $self->{'returnsize'} += $returnsize; + $opt::sqlworker and + $Global::sql->update("SET Receive = ? WHERE Seq = ".$self->seq(), + $self->{'returnsize'}); +} + +sub sshreturn($) { + # Returns for each return-file: + # rsync remote:$workdir/$file . + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $pre = ""; + for my $file ($self->return()) { + $file =~ s:^\./::g; # Remove ./ if any + my $relpath = ($file !~ m:^/:) || + ($file =~ m:/\./:); # Is the path relative or /./? + my $cd = ""; + my $wd = ""; + if($relpath) { + # rsync -avR /foo/./bar/baz.c remote:/tmp/ + # == (on old systems) + # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ + $wd = ::shell_quote_file($self->workdir()."/"); + } + # Only load File::Basename if actually needed + $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; + # dir/./file means relative to dir, so remove dir on remote + $file =~ m:(.*)/\./:; + my $basedir = $1 ? ::shell_quote_file($1."/") : ""; + my $nobasedir = $file; + $nobasedir =~ s:.*/\./::; + $cd = ::shell_quote_file(::dirname($nobasedir)); + my $rsync_cd = '--rsync-path='.::Q("cd $wd$cd; rsync"); + my $basename = ::Q(::shell_quote_file(::basename($file))); + # --return + # mkdir -p /home/tange/dir/subdir/; + # rsync (--protocol 30) -rlDzR + # --rsync-path="cd /home/tange/dir/subdir/; rsync" + # server:file.gz /home/tange/dir/subdir/ + $pre .= "mkdir -p $basedir$cd" . " && " . + $sshlogin->rsync(). " $rsync_cd -- ".$sshlogin->host().':'. + $basename . " ".$basedir.$cd.";"; + } + return $pre; +} + +sub sshcleanup($) { + # Return the sshcommand needed to remove the file + # Returns: + # ssh command needed to remove files from sshlogin + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + my $cleancmd = ""; + + for my $file ($self->remote_cleanup()) { + my @subworkdirs = parentdirs_of($file); + $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; + } + if(defined $opt::workdir and $opt::workdir eq "...") { + $cleancmd .= $sshlogin->wrap("rm -rf " . ::Q($workdir).';'); + } + return $cleancmd; +} + +sub remote_cleanup($) { + # Returns: + # Files to remove at cleanup + my $self = shift; + if($opt::cleanup) { + my @transfer = $self->transfer(); + my @return = $self->return(); + return (@transfer,@return); + } else { + return (); + } +} + +sub exitstatuswrapper(@) { + # Input: + # @shellcode = shell code to execute + # Returns: + # shell script that returns current status after executing @shellcode + if($Global::cshell) { + return ('set _EXIT_status=$status; ' . + join(" ",@_). + 'exit $_EXIT_status;'); + } else { + return ('_EXIT_status=$?; ' . + join(" ",@_). + 'exit $_EXIT_status;'); + } +} + +sub workdir($) { + # Returns: + # the workdir on a remote machine + my $self = shift; + if(not defined $self->{'workdir'}) { + my $workdir; + if(defined $opt::workdir) { + if($opt::workdir eq ".") { + # . means current dir + my $home = $ENV{'HOME'}; + eval 'use Cwd'; + my $cwd = cwd(); + $workdir = $cwd; + if($home) { + # If homedir exists: remove the homedir from + # workdir if cwd starts with homedir + # E.g. /home/foo/my/dir => my/dir + # E.g. /tmp/my/dir => /tmp/my/dir + my ($home_dev, $home_ino) = (stat($home))[0,1]; + my $parent = ""; + my @dir_parts = split(m:/:,$cwd); + my $part; + while(defined ($part = shift @dir_parts)) { + $part eq "" and next; + $parent .= "/".$part; + my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; + if($parent_dev == $home_dev and $parent_ino == $home_ino) { + # dev and ino is the same: We found the homedir. + $workdir = join("/",@dir_parts); + last; + } + } + } + if($workdir eq "") { + $workdir = "."; + } + } elsif($opt::workdir eq "...") { + $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ + . "-" . $self->seq(); + } else { + $workdir = $self->{'commandline'}-> + replace_placeholders([$opt::workdir],0,0); + #$workdir = $opt::workdir; + # Rsync treats /./ special. We dont want that + $workdir =~ s:/\./:/:g; # Remove /./ + $workdir =~ s:(.)/+$:$1:; # Remove ending / if any + $workdir =~ s:^\./::g; # Remove starting ./ if any + } + } else { + $workdir = "."; + } + $self->{'workdir'} = $workdir; + } + return $self->{'workdir'}; +} + +sub parentdirs_of($) { + # Return: + # all parentdirs except . of this dir or file - sorted desc by length + my $d = shift; + my @parents = (); + while($d =~ s:/[^/]+$::) { + if($d ne ".") { + push @parents, $d; + } + } + return @parents; +} + +sub start($) { + # Setup STDOUT and STDERR for a job and start it. + # Returns: + # job-object or undef if job not to run + + sub open3_setpgrp_internal { + # Run open3+setpgrp followed by the command + # Input: + # $stdin_fh = Filehandle to use as STDIN + # $stdout_fh = Filehandle to use as STDOUT + # $stderr_fh = Filehandle to use as STDERR + # $command = Command to run + # Returns: + # $pid = Process group of job started + my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; + my $pid; + local (*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + # The eval is needed to catch exception from open3 + eval { + if(not $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", "-")) { + # Each child gets its own process group to make it safe to killall + eval{ setpgrp(0,0) }; + eval{ setpriority(0,0,$opt::nice) }; + exec($Global::shell,"-c",$command) + || ::die_bug("open3-$stdin_fh ".substr($command,0,200)); + } + }; + return $pid; + } + + sub open3_setpgrp_external { + # Run open3 on $command wrapped with a perl script doing setpgrp + # Works on systems that do not support open3(,,,"-") + # Input: + # $stdin_fh = Filehandle to use as STDIN + # $stdout_fh = Filehandle to use as STDOUT + # $stderr_fh = Filehandle to use as STDERR + # $command = Command to run + # Returns: + # $pid = Process group of job started + my ($stdin_fh,$stdout_fh,$stderr_fh,$command) = @_; + local (*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + + my $pid; + my @setpgrp_wrap = + ('perl','-e', + "eval\{setpgrp\}\;eval\{setpriority\(0,0,$opt::nice\)\}\;". + "exec '$Global::shell', '-c', \@ARGV"); + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", @setpgrp_wrap, $command) + || ::die_bug("open3-$stdin_fh"); + 1; + }; + return $pid; + } + + sub redefine_open3_setpgrp { + my $setgprp_cache = shift; + # Select and run open3_setpgrp_internal/open3_setpgrp_external + no warnings 'redefine'; + my ($outfh,$name) = ::tmpfile(SUFFIX => ".tst"); + # Test to see if open3(x,x,x,"-") is fully supported + # Can an exported bash function be called via open3? + my $script = 'if($pid=::open3($i,$o,$e,"-")) { wait; } '. + 'else { exec("bash","-c","testfun && true"); }'; + my $bash = + ::shell_quote_scalar_default( + "testfun() { rm $name; }; export -f testfun; ". + "perl -MIPC::Open3 -e ". + ::shell_quote_scalar_default($script) + ); + my $redefine_eval; + # Redirect STDERR temporarily, + # so errors on MacOS X are ignored. + open my $saveerr, ">&STDERR"; + open STDERR, '>', "/dev/null"; + # Run the test + ::debug("init",qq{bash -c $bash 2>/dev/null}); + qx{ bash -c $bash 2>/dev/null }; + open STDERR, ">&", $saveerr; + + if(-e $name) { + # Does not support open3(x,x,x,"-") + # or does not have bash: + # Use (slow) external version + unlink($name); + $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_external'; + ::debug("init","open3_setpgrp_external chosen\n"); + } else { + # Supports open3(x,x,x,"-") + # This is 0.5 ms faster to run + $redefine_eval = '*open3_setpgrp = \&open3_setpgrp_internal'; + ::debug("init","open3_setpgrp_internal chosen\n"); + } + if(open(my $fh, ">", $setgprp_cache)) { + print $fh $redefine_eval; + close $fh; + } else { + ::debug("init","Cannot write to $setgprp_cache"); + } + eval $redefine_eval; + } + + sub open3_setpgrp { + my $setgprp_cache = $Global::cache_dir . "/tmp/sshlogin/" . + ::hostname() . "/setpgrp_func"; + sub read_cache() { + -e $setgprp_cache || return 0; + local $/ = undef; + open(my $fh, "<", $setgprp_cache) || return 0; + eval <$fh> || return 0; + close $fh; + return 1; + } + if(not read_cache()) { + redefine_open3_setpgrp($setgprp_cache); + } + # The sub is now redefined. Call it + return open3_setpgrp(@_); + } + + my $job = shift; + # Get the shell command to be executed (possibly with ssh infront). + my $command = $job->wrapped(); + my $pid; + + if($Global::interactive or $Global::stderr_verbose) { + $job->interactive_start(); + } + # Must be run after $job->interactive_start(): + # $job->interactive_start() may call $job->skip() + if($job->{'commandline'}{'skip'} + or + not $job->filter()) { + # $job->skip() was called or job filtered + $command = "true"; + } + $job->openoutputfiles(); + $job->print_verbose_dryrun(); + my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); + if($opt::dryrun or $opt::sqlmaster) { $command = "true"; } + $ENV{'PARALLEL_SEQ'} = $job->seq(); + $ENV{'PARALLEL_PID'} = $$; + $ENV{$opt::process_slot_var} = -1 + + ($ENV{'PARALLEL_JOBSLOT'} = $job->slot()); + $ENV{'PARALLEL_TMP'} = ::tmpname("par"); + $job->add_rm($ENV{'PARALLEL_TMP'}); + $job->fill_templates(); + $ENV{'SSHPASS'} = $job->{'sshlogin'}->{'password'}; + ::debug("run", $Global::total_running, " processes . Starting (", + $job->seq(), "): $command\n"); + if($opt::pipe) { + my ($stdin_fh) = ::gensym(); + $pid = open3_setpgrp($stdin_fh,$stdout_fh,$stderr_fh,$command); + if($opt::roundrobin and not $opt::keeporder) { + # --keep-order will make sure the order will be reproducible + ::set_fh_non_blocking($stdin_fh); + } + $job->set_fh(0,"w",$stdin_fh); + if($opt::tee or $opt::shard or $opt::bin) { $job->set_virgin(0); } + } elsif(($opt::tty or $opt::open_tty) and -c "/dev/tty" and + open(my $devtty_fh, "<", "/dev/tty")) { + # Give /dev/tty to the command if no one else is using it + # The eval is needed to catch exception from open3 + local (*IN,*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't dup STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDERR: $!"); + *IN = $devtty_fh; + # The eval is needed to catch exception from open3 + my @wrap = ('perl','-e', + "eval\{setpriority\(0,0,$opt::nice\)\}\;". + "exec '$Global::shell', '-c', \@ARGV"); + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", @wrap, $command) + || ::die_bug("open3-/dev/tty"); + 1; + }; + close $devtty_fh; + $job->set_virgin(0); + } elsif($Global::semaphore) { + # Allow sem to read from stdin + $pid = open3_setpgrp("<&STDIN",$stdout_fh,$stderr_fh,$command); + $job->set_virgin(0); + } else { + $pid = open3_setpgrp(::gensym(),$stdout_fh,$stderr_fh,$command); + $job->set_virgin(0); + } + if($pid) { + # A job was started + $Global::total_running++; + $Global::total_started++; + $job->set_pid($pid); + $job->set_starttime(); + $Global::running{$job->pid()} = $job; + if($opt::timeout) { + $Global::timeoutq->insert($job); + } + $Global::newest_job = $job; + $Global::newest_starttime = ::now(); + return $job; + } else { + # No more processes + ::debug("run", "Cannot spawn more jobs.\n"); + return undef; + } +} + +sub interactive_start($) { + my $self = shift; + my $command = $self->wrapped(); + if($Global::interactive) { + my $answer; + ::status_no_nl("$command ?..."); + do{ + open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty"); + $answer = <$tty_fh>; + close $tty_fh; + # Sometime we get an empty string (not even \n) + # Do not know why, so let us just ignore it and try again + } while(length $answer < 1); + if (not ($answer =~ /^\s*y/i)) { + $self->{'commandline'}->skip(); + } + } else { + print $Global::original_stderr "$command\n"; + } +} + +{ + my $tmuxsocket; + + sub tmux_wrap($) { + # Wrap command with tmux for session pPID + # Input: + # $actual_command = the actual command being run (incl ssh wrap) + my $self = shift; + my $actual_command = shift; + # Temporary file name. Used for fifo to communicate exit val + my $tmpfifo = ::tmpname("tmx"); + $self->add_rm($tmpfifo); + + if(length($tmpfifo) >=100) { + ::error("tmux does not support sockets with path > 100."); + ::wait_and_exit(255); + } + if($opt::tmuxpane) { + # Move the command into a pane in window 0 + $actual_command = $ENV{'PARALLEL_TMUX'}.' joinp -t :0 ; '. + $ENV{'PARALLEL_TMUX'}.' select-layout -t :0 tiled ; '. + $actual_command; + } + my $visual_command = $self->replaced(); + my $title = $visual_command; + if($visual_command =~ /\0/) { + ::error("Command line contains NUL. tmux is confused by NUL."); + ::wait_and_exit(255); + } + # ; causes problems + # ascii 194-245 annoys tmux + $title =~ tr/[\011-\016;\302-\365]/ /s; + $title = ::Q($title); + + my $l_act = length($actual_command); + my $l_tit = length($title); + my $l_fifo = length($tmpfifo); + # The line to run contains a 118 chars extra code + the title 2x + my $l_tot = 2 * $l_tit + $l_act + $l_fifo; + + my $quoted_space75 = ::Q(" ")x75; + while($l_tit < 1000 and + ( + (890 < $l_tot and $l_tot < 1350) + or + (9250 < $l_tot and $l_tot < 9800) + )) { + # tmux blocks for certain lengths: + # 900 < title + command < 1200 + # 9250 < title + command < 9800 + # but only if title < 1000, so expand the title with 75 spaces + # The measured lengths are: + # 996 < (title + whole command) < 1127 + # 9331 < (title + whole command) < 9636 + $title .= $quoted_space75; + $l_tit = length($title); + $l_tot = 2 * $l_tit + $l_act + $l_fifo; + } + + my $tmux; + $ENV{'PARALLEL_TMUX'} ||= "tmux"; + if(not $tmuxsocket) { + $tmuxsocket = ::tmpname("tms"); + ::debug("tmux", "Start: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach"); + if($opt::fg) { + if(not fork) { + # Run tmux in the foreground + # Wait for the socket to appear + while (not -e $tmuxsocket) { } + `$ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach`; + exit; + } + } + ::status("See output with: $ENV{'PARALLEL_TMUX'} -S $tmuxsocket attach"); + } + $tmux = "sh -c '". + $ENV{'PARALLEL_TMUX'}. + " -S $tmuxsocket new-session -s p$$ -d \"sleep .2\" >/dev/null 2>&1';" . + $ENV{'PARALLEL_TMUX'}. + " -S $tmuxsocket new-window -t p$$ -n $title"; + + ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ", + $Limits::Command::line_max_len, " tot ", + $l_tot, "\n"); + + return "mkfifo $tmpfifo && $tmux ". + # Run in tmux + ::Q + ( + "(".$actual_command.');'. + # The triple print is needed - otherwise the testsuite fails + q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ]. + $tmpfifo."&". + "echo $title; echo \007Job finished at: `date`;sleep 10" + ). + # Run outside tmux + # Read a / separated line: 0h/2 for csh, 2/0 for bash. + # If csh the first will be 0h, so use the second as exit value. + # Otherwise just use the first value as exit value. + q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; }. + q{/(\d+)h/ and exit($1);exit$c' }.$tmpfifo; + } +} + +sub is_already_in_results($) { + # Do we already have results for this job? + # Returns: + # $job_already_run = bool whether there is output for this or not + my $job = $_[0]; + if($Global::csvsep) { + if($opt::joblog) { + # OK: You can look for job run in joblog + return 0 + } else { + ::warning_once( + "--resume --results .csv/.tsv/.json is not supported yet\n"); + # TODO read and parse the file + return 0 + } + } + my $out = $job->{'commandline'}->results_out(); + ::debug("run", "Test ${out}stdout", -e "${out}stdout", "\n"); + return(-e $out."stdout" or -f $out); +} + +sub is_already_in_joblog($) { + my $job = shift; + return vec($Global::job_already_run,$job->seq(),1); +} + +sub set_job_in_joblog($) { + my $job = shift; + vec($Global::job_already_run,$job->seq(),1) = 1; +} + +sub should_be_retried($) { + # Should this job be retried? + # Returns + # 0 - do not retry + # 1 - job queued for retry + my $self = shift; + if (not defined $opt::retries) { return 0; } + if(not $self->exitstatus() and not $self->exitsignal()) { + # Completed with success. If there is a recorded failure: forget it + $self->reset_failed_here(); + return 0; + } else { + # The job failed. Should it be retried? + $self->add_failed_here(); + my $retries = $self->{'commandline'}-> + replace_placeholders([$opt::retries],0,0); + # 0 = Inf + if($retries == 0) { $retries = 2**31; } + # Ignore files already unlinked to avoid memory leak + $self->{'unlink'} = [ grep { -e $_ } @{$self->{'unlink'}} ]; + map { -e $_ or delete $Global::unlink{$_} } keys %Global::unlink; + if($self->total_failed() == $retries) { + # This has been retried enough + return 0; + } else { + # This command should be retried + $self->set_endtime(undef); + $self->reset_exitstatus(); + $Global::JobQueue->unget($self); + ::debug("run", "Retry ", $self->seq(), "\n"); + return 1; + } + } +} + +{ + my (%print_later,$job_seq_to_print); + + sub print_earlier_jobs($) { + # Print jobs whose output is postponed due to --keep-order + # Returns: N/A + my $job = shift; + $print_later{$job->seq()} = $job; + $job_seq_to_print ||= 1; + my $returnsize = 0; + ::debug("run", "Looking for: $job_seq_to_print ", + "This: ", $job->seq(), "\n"); + for(;vec($Global::job_already_run,$job_seq_to_print,1); + $job_seq_to_print++) {} + while(my $j = $print_later{$job_seq_to_print}) { + $returnsize += $j->print(); + if($j->endtime()) { + # Job finished - look at the next + delete $print_later{$job_seq_to_print}; + $job_seq_to_print++; + next; + } else { + # Job not finished yet - look at it again next round + last; + } + } + return $returnsize; + } +} + +sub print($) { + # Print the output of the jobs + # Returns: N/A + my $self = shift; + + ::debug("print", ">>joboutput ", $self->replaced(), "\n"); + if($opt::dryrun) { + # Nothing was printed to this job: + # cleanup tmp files if --files was set + ::rm($self->fh(1,"name")); + } + if($opt::pipe and $self->virgin() and not $opt::tee) { + # Skip --joblog, --dryrun, --verbose + } else { + if($opt::ungroup) { + # NULL returnsize = 0 returnsize + $self->returnsize() or $self->add_returnsize(0); + if($Global::joblog and defined $self->{'exitstatus'}) { + # Add to joblog when finished + $self->print_joblog(); + # Printing is only relevant for grouped/--line-buffer output. + $opt::ungroup and return; + } + } + # Check for disk full + ::exit_if_disk_full(); + } + + my $returnsize = $self->returnsize(); + my @fdno; + if($opt::latestline) { + @fdno = (1); + } else { + @fdno = (sort { $a <=> $b } keys %Global::fh); + } + for my $fdno (@fdno) { + # Sort by file descriptor numerically: 1,2,3,..,9,10,11 + $fdno == 0 and next; + my $out_fh = $Global::fh{$fdno}; + my $in_fh = $self->fh($fdno,"r"); + if(not $in_fh) { + if(not $Job::file_descriptor_warning_printed{$fdno}++) { + # ::warning("File descriptor $fdno not defined\n"); + } + next; + } + ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n"); + if($Global::linebuffer) { + # Line buffered print out + $self->print_linebuffer($fdno,$in_fh,$out_fh); + } elsif($opt::files) { + $self->print_files($fdno,$in_fh,$out_fh); + } elsif($opt::results) { + $self->print_results($fdno,$in_fh,$out_fh); + } else { + $self->print_normal($fdno,$in_fh,$out_fh); + } + flush $out_fh; + } + ::debug("print", "<<joboutput\n"); + if(defined $self->{'exitstatus'} + and not ($self->virgin() and $opt::pipe)) { + if($Global::joblog and not $opt::sqlworker) { + # Add to joblog when finished + $self->print_joblog(); + } + if($opt::sqlworker and not $opt::results) { + $Global::sql->output($self); + } + if($Global::csvsep) { + # Add output to CSV when finished + $self->print_csv(); + } + if($Global::jsonout) { + $self->print_json(); + } + } + return $returnsize - $self->returnsize(); +} + +{ + my %jsonmap; + + sub print_json($) { + my $self = shift; + sub jsonquote($) { + my $a = shift; + if(not $jsonmap{"\001"}) { + map { $jsonmap{sprintf("%c",$_)} = + sprintf '\u%04x', $_ } 0..31; + } + $a =~ s/\\/\\\\/g; + $a =~ s/\"/\\"/g; + $a =~ s/([\000-\037])/$jsonmap{$1}/g; + return $a; + } + + my $cmd; + if($Global::verbose <= 1) { + $cmd = jsonquote($self->replaced()); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = jsonquote(join " ", @{$self->{'commandline'}}); + } + my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'}; + + # Memory optimization: Overwrite with the joined output + $self->{'output'}{1} = join("", @{$self->{'output'}{1}}); + $self->{'output'}{2} = join("", @{$self->{'output'}{2}}); + # { + # "Seq": 12, + # "Host": "/usr/bin/ssh foo@lo", + # "Starttime": 1608344711.743, + # "JobRuntime": 0.01, + # "Send": 0, + # "Receive": 10, + # "Exitval": 0, + # "Signal": 0, + # "Command": "echo 1", + # "V": [ + # "1" + # ], + # "Stdout": "1\n", + # "Stderr": "" + # } + # + printf($Global::csv_fh + q({ "Seq": %s, "Host": "%s", "Starttime": %s, "JobRuntime": %s, ). + q("Send": %s, "Receive": %s, "Exitval": %s, "Signal": %s, ). + q("Command": "%s", "V": [ %s ], "Stdout": "%s", "Stderr": "%s" }). + "\n", + $self->seq(), + jsonquote($self->sshlogin()->string()), + $self->starttime(), sprintf("%0.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd, + (join ",", + map { '"'.jsonquote($_).'"' } @$record_ref[1..$#$record_ref], + ), + jsonquote($self->{'output'}{1}), + jsonquote($self->{'output'}{2}) + ); + } +} + +{ + my $header_printed; + + sub print_csv($) { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = join " ", @{$self->{'commandline'}}; + } + my $record_ref = $self->{'commandline'}{'arg_list_flat_orig'}; + + if(not $header_printed) { + # Variable headers + # Normal => V1..Vn + # --header : => first value from column + my @V; + if($opt::header) { + my $i = 1; + @V = (map { $Global::input_source_header{$i++} } + @$record_ref[1..$#$record_ref]); + } else { + my $V = "V1"; + @V = (map { $V++ } @$record_ref[1..$#$record_ref]); + } + print $Global::csv_fh + (map { $$_ } + combine_ref("Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command", + @V, + "Stdout","Stderr" + )),"\n"; + $header_printed++; + } + # Memory optimization: Overwrite with the joined output + $self->{'output'}{1} = join("", @{$self->{'output'}{1}}); + $self->{'output'}{2} = join("", @{$self->{'output'}{2}}); + print $Global::csv_fh + (map { $$_ } + combine_ref + ($self->seq(), + $self->sshlogin()->string(), + $self->starttime(), sprintf("%0.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), \$cmd, + \@$record_ref[1..$#$record_ref], + \$self->{'output'}{1}, + \$self->{'output'}{2})),"\n"; + } +} + +sub combine_ref($) { + # Inspired by Text::CSV_PP::_combine (by Makamaka Hannyaharamitu) + my @part = @_; + my $sep = $Global::csvsep; + my $quot = '"'; + my @out = (); + + my $must_be_quoted; + for my $column (@part) { + # Memory optimization: Content transferred as reference + if(ref $column ne "SCALAR") { + # Convert all columns to scalar references + my $v = $column; + $column = \$v; + } + if(not defined $$column) { + $$column = ''; + next; + } + + $must_be_quoted = 0; + + if($$column =~ s/$quot/$quot$quot/go){ + # Replace " => "" + $must_be_quoted ||=1; + } + if($$column =~ /[\s\Q$sep\E]/o){ + # Put quotes around if the column contains , + $must_be_quoted ||=1; + } + + $Global::use{"bytes"} ||= eval "use bytes; 1;"; + if ($$column =~ /\0/) { + # Contains \0 => put quotes around + $must_be_quoted ||=1; + } + if($must_be_quoted){ + push @out, \$sep, \$quot, $column, \$quot; + } else { + push @out, \$sep, $column; + } + } + # Remove the first $sep: ,val,"val" => val,"val" + shift @out; + return @out; +} + +sub print_files($) { + # Print the name of the file containing stdout on stdout + # Uses: + # $opt::pipe + # $opt::group = Print when job is done + # $opt::linebuffer = Print ASAP + # Returns: N/A + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if($opt::compress) { + # Kill the decompressor which will not be needed + CORE::kill "TERM", $self->fh($fdno,"rpid"); + } + close $in_fh; + + if($opt::pipe and $self->virgin()) { + # Nothing was printed to this job: + # cleanup unused tmp files because --files was set + for my $fdno (1,2) { + ::rm($self->fh($fdno,"name")); + ::rm($self->fh($fdno,"unlink")); + } + } elsif($fdno == 1 and $self->fh($fdno,"name")) { + print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n"; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, + $self->tag(), $self->fh($fdno,"name"); + } + $self->add_returnsize(-s $self->fh($fdno,"name")); + # Mark as printed - do not print again + $self->set_fh($fdno,"name",undef); + } +} + + +# Different print types +# (--ll | --ll --bar | --lb | --group | --parset | --sql-worker) +# (--files | --results (.json|.csv|.tsv) ) +# --color-failed +# --color +# --keep-order +# --tag +# --bar +{ + my ($up,$currow,$maxrow); + my ($minvisible,%print_later,%notvisible); + my (%binmodeset,%tab); + + sub latestline_init() { + # cursor_up cuu1 = up one line + $up = `sh -c "tput cuu1 </dev/tty" 2>/dev/null`; + chomp($up); + $currow = 1; + $maxrow = 1; + $minvisible = 1; + for(0..8) { + $tab{$_} = " "x(8-($_%8)); + } + } + + sub print_latest_line($) { + my $self = shift; + my $out_fh = shift; + my $row = $self->row(); + # Is row visible? + if(not ($minvisible <= $row + and + $row < $minvisible + ::terminal_rows() - 1)) { + return; + } + if(not $binmodeset{$out_fh}++) { + # Enable utf8 if possible + eval q{ binmode $out_fh, "encoding(utf8)"; }; + } + my ($color,$reset_color) = $self->color(); + # Strings with TABs give the wrong length. Untabify strings + my $termcol = ::terminal_columns(); + my $untabify_tag = ::decode_utf8($self->untabtag()); + my $taglen = length $untabify_tag; + my $truncated_tag = ""; + my $strlen = $termcol - $taglen; + my $untabify_str = ::decode_utf8($self->{$out_fh,'latestline'}); + $untabify_str =~ s/\t/$tab{$-[0]%8}/g; + my $strspc = $strlen - length $untabify_str; + $strlen--; + if($strlen < 0) { $strlen = 0;} + # Line is shorter than terminal width: add " " + # Line is longer than terminal width: add ">" + my $truncated = ($strspc > 0) ? " " : ">"; + if($taglen > $termcol) { + # Tag is longer than terminal width: add ">" to tag + # Remove $truncated (it will not be shown at all) + $taglen = $termcol - 1; + $truncated_tag = ">"; + $truncated = ""; + } + + $maxrow = $row > $maxrow ? $row : $maxrow; + printf($out_fh + ("%s%s%s". # up down \r + "%.${taglen}s%s". # tag + "%s%.${strlen}s%s%s". # color + line + "%s" # down + ), + "$up"x($currow - $row), + "\n"x($row - $currow), + "\r", $untabify_tag,$truncated_tag, + $color, $untabify_str, $truncated, $reset_color, + "\n"x($maxrow - $row + 1)); + $currow = $maxrow + 1; + } + + sub print_linebuffer($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + if(defined $self->{'exitstatus'}) { + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($opt::compress) { + if($?) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + # Blocked reading in final round + for my $fdno (1,2) { ::set_fh_blocking($self->fh($fdno,'r')); } + } + if($opt::latestline) { $print_later{$self->row()} = $self; } + } + if(not $self->virgin()) { + if($opt::files or ($opt::results and not $Global::csvsep)) { + # Print filename + if($fdno == 1 and not $self->fh($fdno,"printed")) { + print $out_fh $self->tag(),$self->fh($fdno,"name"),"\n"; + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, $self->tag(), + $self->fh($fdno,"name")); + } + $self->set_fh($fdno,"printed",1); + } + # No need for reading $in_fh, as it is from "cat >/dev/null" + } else { + # Read halflines and print full lines + my $outputlength = 0; + my $halfline_ref = $self->{'halfline'}{$fdno}; + my ($buf,$i,$rv); + # 1310720 gives 1.2 GB/s + # 131072 gives 0.9 GB/s + # The optimal block size differs + # It has been measured on: + # AMD 6376: 60800 (>70k is also reasonable) + # Intel i7-3632QM: 52-59k, 170-175k + # seq 64 | ppar --_test $1 --lb \ + # 'yes {} `seq 1000`|head -c 10000000' >/dev/null + while($rv = sysread($in_fh, $buf, 60800)) { + $outputlength += $rv; + # TODO --recend + # Treat both \n and \r as line end + # Only test for \r if there is no \n + # Test: + # perl -e '$a="x"x1000000; + # $b="$a\r$a\n$a\r$a\n"; + # map { print $b,$_ } 1..10' + $i = ((rindex($buf,"\n")+1) || (rindex($buf,"\r")+1)); + if($i) { + if($opt::latestline) { + # Keep the latest full line + my $l = join('', @$halfline_ref, + substr($buf,0,$i-1)); + my $j = ((rindex($l,"\n")+1) || + (rindex($l,"\r")+1)); + $self->{$out_fh,'latestline'} = substr($l,$j); + # Remove the processed part + # by keeping the unprocessed part + @$halfline_ref = (substr($buf,$i)); + } else { + # One or more complete lines were found + if($Global::color) { + my $print = join("",@$halfline_ref, + substr($buf,0,$i)); + chomp($print); + my ($color,$reset_color) = $self->color(); + my $colortag = $color.$self->tag(); + # \n => reset \n color tag + $print =~ s{([\n\r])(?=.|$)} + {$reset_color$1$colortag}gs; + print($out_fh $colortag, $print, + $reset_color, "\n"); + } elsif($opt::tag or defined $opt::tagstring) { + # Replace ^ with $tag within the full line + if($Global::cache_replacement_eval) { + # Replace with the same value for tag + my $tag = $self->tag(); + unshift @$halfline_ref, $tag; + # TODO --recend that can be partially in + # @$halfline_ref + substr($buf,0,$i-1) =~ + s/([\n\r])(?=.|$)/$1$tag/gs; + } else { + # Replace with freshly computed tag-value + unshift @$halfline_ref, $self->tag(); + substr($buf,0,$i-1) =~ + s/([\n\r])(?=.|$)/$1.$self->tag()/gse; + } + # The length changed, + # so find the new ending pos + $i = ::max((rindex($buf,"\n")+1), + (rindex($buf,"\r")+1)); + # Print the partial line (halfline) + # and the last half + print $out_fh @$halfline_ref, substr($buf,0,$i); + } else { + # Print the partial line (halfline) + # and the last half + print $out_fh @$halfline_ref, substr($buf,0,$i); + } + # Buffer in memory for SQL and CSV-output + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, + @$halfline_ref, substr($buf,0,$i)); + } + # Remove the printed part by keeping the unprinted + @$halfline_ref = (substr($buf,$i)); + } + } else { + # No newline, so append to the halfline + push @$halfline_ref, $buf; + } + } + $self->add_returnsize($outputlength); + if($opt::latestline) { $self->print_latest_line($out_fh); } + } + if(defined $self->{'exitstatus'}) { + if($opt::latestline) { + # Force re-computing color if --colorfailed + if($opt::colorfailed) { delete $self->{'color'}; } + $self->print_latest_line($out_fh); + # Print latest line from jobs that are already done + while($print_later{$minvisible}) { + $print_later{$minvisible}->print_latest_line($out_fh); + delete $print_later{$minvisible}; + $minvisible++; + } + # Print latest line from jobs that are on screen now + for(my $row = $minvisible; + $row < $minvisible -1 + ::terminal_rows(); + $row++) { + $print_later{$row} and + $print_later{$row}->print_latest_line($out_fh); + } + } + if($opt::files or ($opt::results and not $Global::csvsep)) { + $self->add_returnsize(-s $self->fh($fdno,"name")); + } else { + # If the job is dead: print the remaining partial line + # read remaining + my $halfline_ref = $self->{'halfline'}{$fdno}; + if(grep /./, @$halfline_ref) { + my $returnsize = 0; + for(@{$self->{'halfline'}{$fdno}}) { + $returnsize += length $_; + } + $self->add_returnsize($returnsize); + if($opt::tag or defined $opt::tagstring) { + # Prepend $tag the the remaining half line + unshift @$halfline_ref, $self->tag(); + } + # Print the partial line (halfline) + print $out_fh @{$self->{'halfline'}{$fdno}}; + # Buffer in memory for SQL and CSV-output + if($Global::membuffer) { + push(@{$self->{'output'}{$fdno}}, @$halfline_ref); + } + @$halfline_ref = (); + } + } + if($self->fh($fdno,"rpid") and + CORE::kill 0, $self->fh($fdno,"rpid")) { + # decompress still running + } else { + # decompress done: close fh + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } + } + } + } +} + +sub free_ressources() { + my $self = shift; + if(not $opt::ungroup) { + my $fh; + for my $fdno (sort { $a <=> $b } keys %Global::fh) { + $fh = $self->fh($fdno,"w"); + $fh and close $fh; + $fh = $self->fh($fdno,"r"); + $fh and close $fh; + } + } +} + +sub print_parset($) { + # Wrap output with shell script code to set as variables + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $outputlength = 0; + + ::debug("parset","print $Global::parset"); + if($Global::parset eq "assoc") { + # Start: (done in parse_parset()) + # eval "`echo 'declare -A myassoc; myassoc=( + # Each: (done here) + # [$'a\tb']=$'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # )'`" + print '[',::Q($self->{'commandline'}-> + replace_placeholders(["\257<\257>"],0,0)),']='; + } elsif($Global::parset eq "array") { + # Start: (done in parse_parset()) + # eval "`echo 'myassoc=( + # Each: (done here) + # $'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # )'`" + } elsif($Global::parset eq "var") { + # Start: (done in parse_parset()) + # <empty> + # Each: (done here) + # var=$'a\tb\tc ddd' + # End: (done in wait_and_exit()) + # <empty> + if(not @Global::parset_vars) { + ::error("Too few named destination variables"); + ::wait_and_exit(255); + } + print shift @Global::parset_vars,"="; + } + local $/ = "\n"; + my $tag = $self->tag(); + my @out; + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs; + push @out, $tag,$_; + } + # Remove last newline + # This often makes it easier to use the output in shell + @out and ${out[$#out]} =~ s/\n$//s; + print ::Q(join("",@out)),"\n"; + return $outputlength; +} + +sub print_normal($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if(not $self->virgin()) { + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $outputlength = 0; + my @output; + + if($Global::parset and $fdno == 1) { + $outputlength += $self->print_parset($fdno,$in_fh,$out_fh); + } elsif(defined $opt::tag or defined $opt::tagstring + or $Global::color or $opt::colorfailed) { + if($Global::color or $opt::colorfailed) { + my ($color,$reset_color) = $self->color(); + my $colortag = $color.$self->tag(); + # Read line by line + local $/ = "\n"; + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + chomp; + s{([\n\r])(?=.|$)}{$reset_color$1$colortag}gs; + print $out_fh $colortag,$_,$reset_color,"\n"; + } + } else { + my $tag = $self->tag(); + my $pretag = 1; + my $s; + while(sysread($in_fh,$buf,32767)) { + $outputlength += length $buf; + $buf =~ s/(?<=[\r\n])(?=.)/$tag/gs; + print $out_fh ($pretag ? $tag : ""),$buf; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, + ($pretag ? $tag : ""),$buf; + } + # Should next print start with a tag? + $s = substr($buf, -1); + # This is faster than ($s eq "\n") || ($s eq "\r") + $pretag = ($s eq "\n") ? 1 : ($s eq "\r"); + } + } + } else { + # Most efficient way of copying data from $in_fh to $out_fh + # Intel i7-3632QM: 25k- + while(sysread($in_fh,$buf,32767)) { + print $out_fh $buf; + $outputlength += length $buf; + if($Global::membuffer) { + push @{$self->{'output'}{$fdno}}, $buf; + } + } + } + if($fdno == 1) { + $self->add_returnsize($outputlength); + } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } +} + +sub print_results($) { + my $self = shift; + my ($fdno,$in_fh,$out_fh) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed."); + $self->set_exitstatus(255); + } + if(not $self->virgin()) { + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $outputlength = 0; + my @output; + + if($Global::membuffer) { + # Read data into membuffer + if($opt::tag or $opt::tagstring) { + # Read line by line + local $/ = "\n"; + my $tag = $self->tag(); + while(<$in_fh>) { + $outputlength += length $_; + # Tag lines with \r, too + $_ =~ s/(?<=[\r])(?=.|$)/$tag/gs; + push @{$self->{'output'}{$fdno}}, $tag, $_; + } + } else { + # Most efficient way of copying data from $in_fh to $out_fh + while(sysread($in_fh,$buf,60000)) { + $outputlength += length $buf; + push @{$self->{'output'}{$fdno}}, $buf; + } + } + } else { + # Not membuffer: No need to read the file + if($opt::compress) { + $outputlength = -1; + } else { + # Determine $outputlength = file length + seek($in_fh, 0, 2) || ::die_bug("cannot seek result"); + $outputlength = tell($in_fh); + } + } + if($fdno == 1) { $self->add_returnsize($outputlength); } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed."); + $self->set_exitstatus(255); + } + } +} + +sub print_joblog($) { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = $self->wrapped(); + } + # Newlines make it hard to parse the joblog + $cmd =~ s/\n/\0/g; + print $Global::joblog + join("\t", $self->seq(), $self->sshlogin()->string(), + $self->starttime(), sprintf("%10.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd + ). "\n"; + flush $Global::joblog; + $self->set_job_in_joblog(); +} + +sub tag($) { + my $self = shift; + if(not defined $self->{'tag'} or not $Global::cache_replacement_eval) { + if(defined $opt::tag or defined $opt::tagstring) { + $self->{'tag'} = + ($self->{'commandline'}-> + replace_placeholders([$opt::tagstring],0,0)). + "\t"; + } else { + # No tag + $self->{'tag'} = ""; + } + } + return $self->{'tag'}; +} + +sub untabtag($) { + # tag with \t replaced with spaces + my $self = shift; + my $tag = $self->tag(); + if(not defined $self->{'untab'}{$tag}) { + my $t = $tag; + $t =~ s/\t/" "x(8-($-[0]%8))/eg; + $self->{'untab'}{$tag} = $t; + } + return $self->{'untab'}{$tag}; +} + +{ + my (@color,$eol,$reset_color,$init); + + sub init_color() { + if(not $init) { + $init = 1; + # color combinations that are readable: black/white text + # on colored background, but not white on yellow + my @color_combinations = + # Force each color code to have the same length in chars + # This will make \t work as expected + ((map { [sprintf("%03d",$_),"000"] } + 6..7,9..11,13..15,40..51,75..87,113..123,147..159, + 171..182,185..231,249..254), + (map { [sprintf("%03d",$_),231] } + 1..9,12..13,16..45,52..81,88..114,124..149, + 160..178,180,182..184,196..214,232..250)); + # reorder list so adjacent colors are dissimilar + # %23 and %7 were found experimentally + @color_combinations = @color_combinations[ + sort { ($a%23 <=> $b%23) or ($b%7 <=> $a%7) } + 0..$#color_combinations + ]; + @color = map { + # TODO Can this be done with `tput` codes? + "\033[48;5;".$_->[0].";38;5;".$_->[1]."m" + } @color_combinations; + + # clr_eol el = clear to end of line + $eol = `sh -c "tput el </dev/tty" 2>/dev/null`; + chomp($eol); + if($eol eq "") { $eol = "\033[K"; } + # exit_attribute_mode sgr0 = turn off all attributes + $reset_color = `sh -c "tput sgr0 </dev/tty" 2>/dev/null`; + chomp($reset_color); + if($reset_color eq "") { $reset_color = "\033[m"; } + } + } + + sub color($) { + my $self = shift; + if(not defined $self->{'color'}) { + if($Global::color) { + # Choose a value based on the seq + $self->{'color'} = $color[$self->seq() % ($#color+1)].$eol; + $self->{'reset_color'} = $reset_color; + } else { + $self->{'color'} = ""; + $self->{'reset_color'} = ""; + } + if($opt::colorfailed) { + if($self->exitstatus()) { + # White on Red + # Can this be done more generally? + $self->{'color'} = + "\033[48;5;"."196".";38;5;"."231"."m".$eol; + $self->{'reset_color'} = $reset_color; + } + } + } + return ($self->{'color'},$self->{'reset_color'}); + } +} + +sub hostgroups($) { + my $self = shift; + if(not defined $self->{'hostgroups'}) { + $self->{'hostgroups'} = + $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; + } + return @{$self->{'hostgroups'}}; +} + +sub exitstatus($) { + my $self = shift; + return $self->{'exitstatus'}; +} + +sub set_exitstatus($$) { + my $self = shift; + my $exitstatus = shift; + if($exitstatus) { + # Overwrite status if non-zero + $self->{'exitstatus'} = $exitstatus; + } else { + # Set status but do not overwrite + # Status may have been set by --timeout + $self->{'exitstatus'} ||= $exitstatus; + } + $opt::sqlworker and + $Global::sql->update("SET Exitval = ? WHERE Seq = ".$self->seq(), + $exitstatus); +} + +sub reset_exitstatus($) { + my $self = shift; + undef $self->{'exitstatus'}; +} + +sub exitsignal($) { + my $self = shift; + return $self->{'exitsignal'}; +} + +sub set_exitsignal($$) { + my $self = shift; + my $exitsignal = shift; + $self->{'exitsignal'} = $exitsignal; + $opt::sqlworker and + $Global::sql->update("SET _Signal = ? WHERE Seq = ".$self->seq(), + $exitsignal); +} + +{ + my $total_jobs; + + sub should_we_halt { + # Should we halt? Immediately? Gracefully? + # Returns: N/A + my $job = shift; + my $limit; + if($Global::semaphore) { + # Emulate Bash's +128 if there is a signal + $Global::halt_exitstatus = + ($job->exitstatus() + or + $job->exitsignal() ? $job->exitsignal() + 128 : 0); + } + if($job->exitstatus() or $job->exitsignal()) { + # Job failed + $Global::exitstatus++; + $Global::total_failed++; + if($Global::halt_fail) { + ::status("$Global::progname: This job failed:", + $job->replaced()); + $limit = $Global::total_failed; + } + } elsif($Global::halt_success) { + ::status("$Global::progname: This job succeeded:", + $job->replaced()); + $limit = $Global::total_completed - $Global::total_failed; + } + if($Global::halt_done) { + ::status("$Global::progname: This job finished:", + $job->replaced()); + $limit = $Global::total_completed; + } + if(not defined $limit) { + return "" + } + # --halt # => 1..100 (number of jobs failed, 101 means > 100) + # --halt % => 1..100 (pct of jobs failed) + if($Global::halt_pct and not $Global::halt_count) { + $total_jobs ||= $Global::JobQueue->total_jobs(); + # From the pct compute the number of jobs that must fail/succeed + $Global::halt_count = $total_jobs * $Global::halt_pct; + } + if($limit >= $Global::halt_count) { + # At least N jobs have failed/succeded/completed + # or at least N% have failed/succeded/completed + # So we should prepare for exit + if($Global::halt_fail or $Global::halt_done) { + # Set exit status + if(not defined $Global::halt_exitstatus) { + if($Global::halt_pct) { + # --halt now,fail=X% or soon,fail=X% + # --halt now,done=X% or soon,done=X% + $Global::halt_exitstatus = + ::ceil($Global::total_failed / $total_jobs * 100); + } elsif($Global::halt_count) { + # --halt now,fail=X or soon,fail=X + # --halt now,done=X or soon,done=X + $Global::halt_exitstatus = + ::min($Global::total_failed,101); + } + if($Global::halt_count and $Global::halt_count == 1) { + # --halt now,fail=1 or soon,fail=1 + # --halt now,done=1 or soon,done=1 + # Emulate Bash's +128 if there is a signal + $Global::halt_exitstatus = + ($job->exitstatus() + or + $job->exitsignal() ? $job->exitsignal() + 128 : 0); + } + } + ::debug("halt","Pct: ",$Global::halt_pct, + " count: ",$Global::halt_count, + " status: ",$Global::halt_exitstatus,"\n"); + } elsif($Global::halt_success) { + $Global::halt_exitstatus = 0; + } + if($Global::halt_when eq "soon") { + $Global::start_no_new_jobs ||= 1; + if(scalar(keys %Global::running) > 0) { + # Only warn if there are more jobs running + ::status + ("$Global::progname: Starting no more jobs. ". + "Waiting for ". (keys %Global::running). + " jobs to finish."); + } + } + return($Global::halt_when); + } + return ""; + } +} + + +package CommandLine; + +sub new($) { + my $class = shift; + my $seq = shift; + my $commandref = shift; + $commandref || die; + my $arg_queue = shift; + my $context_replace = shift; + my $max_number_of_args = shift; # for -N and normal (-n1) + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my $replacecount_ref = shift; + my $len_ref = shift; + my %replacecount = %$replacecount_ref; + my %len = %$len_ref; + for (keys %$replacecount_ref) { + # Total length of this replacement string {} replaced with all args + $len{$_} = 0; + } + return bless { + 'command' => $commandref, + 'seq' => $seq, + 'len' => \%len, + 'arg_list' => [], + 'arg_list_flat' => [], + 'arg_list_flat_orig' => [undef], + 'arg_queue' => $arg_queue, + 'max_number_of_args' => $max_number_of_args, + 'replacecount' => \%replacecount, + 'context_replace' => $context_replace, + 'transfer_files' => $transfer_files, + 'return_files' => $return_files, + 'template_names' => $template_names, + 'template_contents' => $template_contents, + 'replaced' => undef, + }, ref($class) || $class; +} + +sub flush_cache() { + my $self = shift; + for my $arglist (@{$self->{'arg_list'}}) { + for my $arg (@$arglist) { + $arg->flush_cache(); + } + } + $self->{'arg_queue'}->flush_cache(); + $self->{'replaced'} = undef; +} + +sub seq($) { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq($$) { + my $self = shift; + $self->{'seq'} = shift; +} + +sub slot($) { + # Find the number of a free job slot and return it + # Uses: + # @Global::slots - list with free jobslots + # Returns: + # $jobslot = number of jobslot + my $self = shift; + if(not $self->{'slot'}) { + if(not @Global::slots) { + # $max_slot_number will typically be $Global::max_jobs_running + push @Global::slots, ++$Global::max_slot_number; + } + $self->{'slot'} = shift @Global::slots; + } + return $self->{'slot'}; +} + +{ + my $already_spread; + my $darwin_max_len; + + sub populate($) { + # Add arguments from arg_queue until the number of arguments or + # max line length is reached + # Uses: + # $Global::usable_command_line_length + # $opt::cat + # $opt::fifo + # $Global::JobQueue + # $opt::m + # $opt::X + # $Global::max_jobs_running + # Returns: N/A + my $self = shift; + my $next_arg; + my $max_len = $Global::usable_command_line_length || die; + if($^O eq "darwin") { + # Darwin's limit is affected by: + # * number of environment names (variables+functions) + # * size of environment + # * the length of arguments: + # a one-char argument lowers the limit by 5 + # To be safe assume all arguments are one-char + # The max_len is cached between runs, but if the size of + # the environment is different we need to recompute the + # usable max length for this run of GNU Parallel + # See https://unix.stackexchange.com/a/604943/2972 + if(not $darwin_max_len) { + my $envc = (keys %ENV); + my $envn = length join"",(keys %ENV); + my $envv = length join"",(values %ENV); + $darwin_max_len = -146+($max_len - $envn - $envv) - $envc*10; + ::debug("init", + "length: $darwin_max_len ". + "3+($max_len - $envn - $envv)/5 - $envc*2"); + } + $max_len = $darwin_max_len; + } + if($opt::cat or $opt::fifo) { + # Get the empty arg added by --pipepart (if any) + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + # $PARALLEL_TMP will point to a tempfile that will be used as {} + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}-> + unget([Arg->new('$PARALLEL_TMP')]); + } + while (not $self->{'arg_queue'}->empty()) { + $next_arg = $self->{'arg_queue'}->get(); + if(not defined $next_arg) { + next; + } + $self->push($next_arg); + if($self->len() >= $max_len) { + # Command length is now > max_length + # If there are arguments: remove the last + # If there are no arguments: Error + # TODO stuff about -x opt_x + if($self->number_of_args() > 1) { + # There is something to work on + $self->{'arg_queue'}->unget($self->pop()); + last; + } else { + my $args = join(" ", map { $_->orig() } @$next_arg); + ::error("Command line too long (". + $self->len(). " >= ". + $max_len. + ") at input ". + $self->{'arg_queue'}->arg_number(). + ": ". + ((length $args > 50) ? + (substr($args,0,50))."..." : + $args)); + $self->{'arg_queue'}->unget($self->pop()); + ::wait_and_exit(255); + } + } + + if(defined $self->{'max_number_of_args'}) { + if($self->number_of_args() >= $self->{'max_number_of_args'}) { + last; + } + } + } + if(($opt::m or $opt::X) and not $already_spread + and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { + # -m or -X and EOF => Spread the arguments over all jobslots + # (unless they are already spread) + $already_spread ||= 1; + if($self->number_of_args() > 1) { + $self->{'max_number_of_args'} = + ::ceil($self->number_of_args()/$Global::max_jobs_running); + $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = + $self->{'max_number_of_args'}; + $self->{'arg_queue'}->unget($self->pop_all()); + while($self->number_of_args() < $self->{'max_number_of_args'}) { + $self->push($self->{'arg_queue'}->get()); + } + } + $Global::JobQueue->flush_total_jobs(); + } + + if($opt::sqlmaster) { + # Insert the V1..Vn for this $seq in SQL table + # instead of generating one + $Global::sql->insert_records($self->seq(), $self->{'command'}, + $self->{'arg_list_flat_orig'}); + } + } +} + +sub push($) { + # Add one or more records as arguments + # Returns: N/A + my $self = shift; + my $record = shift; + push @{$self->{'arg_list_flat_orig'}}, map { $_->orig() } @$record; + push @{$self->{'arg_list_flat'}}, @$record; + push @{$self->{'arg_list'}}, $record; + # Make @arg available for {= =} + *Arg::arg = $self->{'arg_list_flat_orig'}; + + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + my $col; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + if($perlexpr =~ /^(-?\d+)(?:\D.*|)$/) { + # Positional replacement string + # Deal with negative positional replacement string + $col = ($1 < 0) ? $1 : $1-1; + if(defined($record->[$col])) { + $self->{'len'}{$perlexpr} += + length $record->[$col]->replace($perlexpr,$quote_arg,$self); + } + } else { + for my $arg (@$record) { + if(defined $arg) { + $self->{'len'}{$perlexpr} += + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + } +} + +sub pop($) { + # Remove last argument + # Returns: + # the last record + my $self = shift; + my $record = pop @{$self->{'arg_list'}}; + # pop off arguments from @$record + splice @{$self->{'arg_list_flat_orig'}}, -($#$record+1), $#$record+1; + splice @{$self->{'arg_list_flat'}}, -($#$record+1), $#$record+1; + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + for my $perlexpr (keys %{$self->{'replacecount'}}) { + if($perlexpr =~ /^(\d+) /) { + # Positional + defined($record->[$1-1]) or next; + $self->{'len'}{$perlexpr} -= + length $record->[$1-1]->replace($perlexpr,$quote_arg,$self); + } else { + for my $arg (@$record) { + if(defined $arg) { + $self->{'len'}{$perlexpr} -= + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + } + return $record; +} + +sub pop_all($) { + # Remove all arguments and zeros the length of replacement perlexpr + # Returns: + # all records + my $self = shift; + my @popped = @{$self->{'arg_list'}}; + for my $perlexpr (keys %{$self->{'replacecount'}}) { + $self->{'len'}{$perlexpr} = 0; + } + $self->{'arg_list'} = []; + $self->{'arg_list_flat_orig'} = [undef]; + $self->{'arg_list_flat'} = []; + return @popped; +} + +sub number_of_args($) { + # The number of records + # Returns: + # number of records + my $self = shift; + # This is really the number of records + return $#{$self->{'arg_list'}}+1; +} + +sub number_of_recargs($) { + # The number of args in records + # Returns: + # number of args records + my $self = shift; + my $sum = 0; + my $nrec = scalar @{$self->{'arg_list'}}; + if($nrec) { + $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); + } + return $sum; +} + +sub args_as_string($) { + # Returns: + # all unmodified arguments joined with ' ' (similar to {}) + my $self = shift; + return (join " ", map { $_->orig() } + map { @$_ } @{$self->{'arg_list'}}); +} + +sub results_out($) { + sub max_file_name_length { + # Figure out the max length of a subdir + # TODO and the max total length + # Ext4 = 255,130816 + # Uses: + # $Global::max_file_length is set + # Returns: + # $Global::max_file_length + my $testdir = shift; + + my $upper = 100_000_000; + # Dir length of 8 chars is supported everywhere + my $len = 8; + my $dir = "d"x$len; + do { + rmdir($testdir."/".$dir); + $len *= 16; + $dir = "d"x$len; + } while ($len < $upper and mkdir $testdir."/".$dir); + # Then search for the actual max length between $len/16 and $len + my $min = $len/16; + my $max = $len; + while($max-$min > 5) { + # If we are within 5 chars of the exact value: + # it is not worth the extra time to find the exact value + my $test = int(($min+$max)/2); + $dir = "d"x$test; + if(mkdir $testdir."/".$dir) { + rmdir($testdir."/".$dir); + $min = $test; + } else { + $max = $test; + } + } + $Global::max_file_length = $min; + return $min; + } + + my $self = shift; + my $out = $self->replace_placeholders([$opt::results],0,0); + if($out eq $opt::results) { + # $opt::results simple string: Append args_as_dirname + my $args_as_dirname = $self->args_as_dirname(); + # Output in: prefix/name1/val1/name2/val2/stdout + $out = $opt::results."/".$args_as_dirname; + if(-d $out or eval{ File::Path::mkpath($out); }) { + # OK + } else { + # mkpath failed: Argument probably too long. + # Set $Global::max_file_length, which will keep the individual + # dir names shorter than the max length + max_file_name_length($opt::results); + $args_as_dirname = $self->args_as_dirname(); + # prefix/name1/val1/name2/val2/ + $out = $opt::results."/".$args_as_dirname; + File::Path::mkpath($out); + } + $out .="/"; + } else { + if($out =~ m:/$:) { + # / = dir + if(-d $out or eval{ File::Path::mkpath($out); }) { + # OK + } else { + ::error("Cannot make dir '$out'."); + ::wait_and_exit(255); + } + } else { + $out =~ m:(.*)/:; + File::Path::mkpath($1); + } + } + return $out; +} + +sub args_as_dirname($) { + # Returns: + # all unmodified arguments joined with '/' (similar to {}) + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + # If $Global::max_file_length: Keep subdirs < $Global::max_file_length + my $self = shift; + my @res = (); + + for my $rec_ref (@{$self->{'arg_list'}}) { + # If headers are used, sort by them. + # Otherwise keep the order from the command line. + my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); + for my $n (@header_indexes_sorted) { + CORE::push(@res, + $Global::input_source_header{$n}, + map { my $s = $_; + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + $s =~ s/\\/\\\\/g; + $s =~ s/\t/\\t/g; + $s =~ s/\0/\\0/g; + $s =~ s:/:\\_:g; + if($Global::max_file_length) { + # Keep each subdir shorter than the longest + # allowed file name + $s = substr($s,0,$Global::max_file_length); + } + $s; } + $rec_ref->[$n-1]->orig()); + } + } + return join "/", @res; +} + +sub header_indexes_sorted($) { + # Sort headers first by number then by name. + # E.g.: 1a 1b 11a 11b + # Returns: + # Indexes of %Global::input_source_header sorted + my $max_col = shift; + + no warnings 'numeric'; + for my $col (1 .. $max_col) { + # Make sure the header is defined. If it is not: use column number + if(not defined $Global::input_source_header{$col}) { + $Global::input_source_header{$col} = $col; + } + } + my @header_indexes_sorted = sort { + # Sort headers numerically then asciibetically + $Global::input_source_header{$a} <=> $Global::input_source_header{$b} + or + $Global::input_source_header{$a} cmp $Global::input_source_header{$b} + } 1 .. $max_col; + return @header_indexes_sorted; +} + +sub len($) { + # Uses: + # @opt::shellquote + # The length of the command line with args substituted + my $self = shift; + my $len = 0; + # Add length of the original command with no args + # Length of command w/ all replacement args removed + $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; + ::debug("length", "noncontext + command: $len\n"); + # MacOS has an overhead of 8 bytes per argument + my $darwin = ($^O eq "darwin") ? 8 : 0; + my $recargs = $self->number_of_recargs(); + if($self->{'context_replace'}) { + # Context is duplicated for each arg + $len += $recargs * $self->{'len'}{'context'}; + for my $replstring (keys %{$self->{'replacecount'}}) { + # If the replacements string is more than once: mulitply its length + $len += $self->{'len'}{$replstring} * + $self->{'replacecount'}{$replstring}; + ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", + $self->{'replacecount'}{$replstring}, "\n"); + } + # echo 11 22 33 44 55 66 77 88 99 1010 + # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 + # 5 + ctxgrp*arg + ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, + " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); + # Add space between context groups + $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); + if($darwin) { + $len += $recargs * $self->{'len'}{'contextgroups'} * $darwin; + } + } else { + # Each replacement string may occur several times + # Add the length for each time + $len += 1*$self->{'len'}{'context'}; + ::debug("length", "context+noncontext + command: $len\n"); + for my $replstring (keys %{$self->{'replacecount'}}) { + # (space between recargs + length of replacement) + # * number this replacement is used + $len += ($recargs -1 + $self->{'len'}{$replstring}) * + $self->{'replacecount'}{$replstring}; + if($darwin) { + $len += ($recargs * $self->{'replacecount'}{$replstring} + * $darwin); + } + } + } + if(defined $Global::parallel_env) { + # If we are using --env, add the prefix for that, too. + $len += length $Global::parallel_env; + } + if($Global::quoting) { + # Pessimistic length if -q is set + # Worse than worst case: ' => "'" + " => '"' + # TODO can we count the number of expanding chars? + # and count them in arguments, too? + $len *= 3; + } + if(@opt::shellquote) { + # Pessimistic length if --shellquote is set + # Worse than worst case: ' => "'" + for(@opt::shellquote) { + $len *= 3; + } + $len *= 5; + } + if(@opt::sshlogin) { + # Pessimistic length if remote + # Worst case is BASE64 encoding 3 bytes -> 4 bytes + $len = int($len*4/3); + } + return $len; +} + +sub replaced($) { + # Uses: + # $Global::quote_replace + # $Global::quoting + # Returns: + # $replaced = command with place holders replaced and prepended + my $self = shift; + if(not defined $self->{'replaced'}) { + # Don't quote arguments if the input is the full command line + my $quote_arg = ($Global::quote_replace and not $Global::quoting); + # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP + $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg; + $self->{'replaced'} = $self-> + replace_placeholders($self->{'command'},$Global::quoting, + $quote_arg); + my $len = length $self->{'replaced'}; + if ($len != $self->len()) { + ::debug("length", $len, " != ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } else { + ::debug("length", $len, " == ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } + } + return $self->{'replaced'}; +} + +sub replace_placeholders($$$$) { + # Replace foo{}bar with fooargbar + # Input: + # $targetref = command as shell words + # $quote = should everything be quoted? + # $quote_arg = should replaced arguments be quoted? + # Uses: + # @Arg::arg = arguments as strings to be use in {= =} + # Returns: + # @target with placeholders replaced + my $self = shift; + my $targetref = shift; + my $quote = shift; + my $quote_arg = shift; + my %replace; + + # Token description: + # \0spc = unquoted space + # \0end = last token element + # \0ign = dummy token to be ignored + # \257<...\257> = replacement expression + # " " = quoted space, that splits -X group + # text = normal text - possibly part of -X group + my $spacer = 0; + my @tokens = grep { length $_ > 0 } map { + if(/^\257<|^ $/) { + # \257<...\257> or space + $_ + } else { + # Split each space/tab into a token + split /(?=\s)|(?<=\s)/ + } + } + # Split \257< ... \257> into own token + map { split /(?=\257<)|(?<=\257>)/ } + # Insert "\0spc" between every element + # This space should never be quoted + map { $spacer++ ? ("\0spc",$_) : $_ } + map { $_ eq "" ? "\0empty" : $_ } + @$targetref; + + if(not @tokens) { + # @tokens is empty: Return empty array + return @tokens; + } + ::debug("replace", "Tokens ".join":",@tokens,"\n"); + # Make it possible to use $arg[2] in {= =} + *Arg::arg = $self->{'arg_list_flat_orig'}; + # Flat list: + # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] + # $self->{'arg_list_flat'} = [ Arg11, Arg12, Arg21, Arg22, Arg31, Arg32 ] + if(not @{$self->{'arg_list_flat'}}) { + @{$self->{'arg_list_flat'}} = Arg->new(""); + } + my $argref = $self->{'arg_list_flat'}; + # Number of arguments - used for positional arguments + my $n = $#$argref+1; + + # $self is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = $self; + # @replaced = tokens with \257< \257> replaced + my @replaced; + if($self->{'context_replace'}) { + my @ctxgroup; + for my $t (@tokens,"\0end") { + # \0end = last token was end of tokens. + if($t eq "\t" or $t eq " " or $t eq "\0end" or $t eq "\0spc") { + # Context group complete: Replace in it + if(grep { /^\257</ } @ctxgroup) { + # Context group contains a replacement string: + # Copy once per arg + my $space = "\0ign"; + for my $arg (@$argref) { + my $normal_replace; + # Push output + # Put unquoted space before each context group + # except the first + CORE::push @replaced, $space, map { + $a = $_; + if($a =~ + s{\257<(-?\d+)?(.*)\257>} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($argref->[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace + $argref->[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }sgxe) { + # Token is \257<..\257> + } else { + if($Global::escape_string_present) { + # Command line contains \257: + # Unescape it \257\256 => \257 + $a =~ s/\257\256/\257/g; + } + } + $a + } @ctxgroup; + $normal_replace or last; + $space = "\0spc"; + } + } else { + # Context group has no a replacement string: Copy it once + CORE::push @replaced, map { + $Global::escape_string_present and s/\257\256/\257/g; $_; + } @ctxgroup; + } + # New context group + @ctxgroup=(); + } + if($t eq "\0spc" or $t eq " ") { + CORE::push @replaced,$t; + } else { + CORE::push @ctxgroup,$t; + } + } + } else { + # @group = @token + # Replace in group + # Push output + # repquote = no if {} first on line, no if $quote, yes otherwise + for my $t (@tokens) { + if($t =~ /^\257</) { + my $space = "\0ign"; + for my $arg (@$argref) { + my $normal_replace; + $a = $t; + $a =~ + s{\257<(-?\d+)?(.*)\257>} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($argref->[$1 > 0 ? $1-1 : $n+$1] ? + # If defined: replace + $argref->[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }sgxe; + CORE::push @replaced, $space, $a; + $normal_replace or last; + $space = "\0spc"; + } + } else { + # No replacement + CORE::push @replaced, map { + $Global::escape_string_present and s/\257\256/\257/g; $_; + } $t; + } + } + } + *Arg::arg = []; + ::debug("replace","Replaced: ".join":",@replaced,"\n"); + + # Put tokens into groups that may be quoted. + my @quotegroup; + my @quoted; + for (map { $_ eq "\0empty" ? "" : $_ } + grep { $_ ne "\0ign" and $_ ne "\0noarg" and $_ ne "'\0noarg'" } + @replaced, "\0end") { + if($_ eq "\0spc" or $_ eq "\0end") { + # \0spc splits quotable groups + if($quote) { + if(@quotegroup) { + CORE::push @quoted, ::Q(join"",@quotegroup);; + } + } else { + CORE::push @quoted, join"",@quotegroup; + } + @quotegroup = (); + } else { + CORE::push @quotegroup, $_; + } + } + ::debug("replace","Quoted: ".join":",@quoted,"\n"); + return wantarray ? @quoted : "@quoted"; +} + +sub skip($) { + # Skip this job + my $self = shift; + $self->{'skip'} = 1; +} + + +package CommandLineQueue; + +sub new($) { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift || 0; + my $max_number_of_args = shift; + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my @unget = (); + my $posrpl; + my ($replacecount_ref, $len_ref); + my @command = @$commandref; + my $seq = 1; + # Replace replacement strings with {= perl expr =} + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + @command = merge_rpl_parts(@command); + + # Protect matching inside {= perl expr =} + # by replacing {= and =} with \257< and \257> + # in options that can contain replacement strings: + # @command, --transferfile, --return, + # --tagstring, --workdir, --results + for(@command, @$transfer_files, @$return_files, + @$template_names, @$template_contents, + $opt::tagstring, $opt::workdir, $opt::results, $opt::retries, + @opt::filter) { + # Skip if undefined + defined($_) or next; + # Escape \257 => \257\256 + $Global::escape_string_present += s/\257/\257\256/g; + # Needs to match rightmost left parens (Perl defaults to leftmost) + # to deal with: {={==} and {={==}=} + # Replace {= -> \257< and =} -> \257> + # + # Complex way to do: + # s/{=(.*)=}/\257<$1\257>/g + # which would not work + s[\Q$Global::parensleft\E # Match {= + # Match . unless the next string is {= or =} + # needed to force matching the shortest {= =} + ((?:(?! \Q$Global::parensleft\E|\Q$Global::parensright\E ).)*?) + \Q$Global::parensright\E ] # Match =} + {\257<$1\257>}gxs; + for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) { + # Replace long --rpl's before short ones, as a short may be a + # substring of a long: + # --rpl '% s/a/b/' --rpl '%% s/b/a/' + # + # Replace the shorthand string (--rpl) + # with the {= perl expr =} + # + # Avoid searching for shorthand strings inside existing {= perl expr =} + # + # Replace $$1 in {= perl expr =} with groupings in shorthand string + # + # --rpl '{/(\.\S+)/(\.\S+)} s/$$1/$$2/g;' + # echo {/.tar/.gz} ::: UU.tar.gz + my ($prefix,$grp_regexp,$postfix) = + $rpl =~ /^( [^(]* ) # Prefix - e.g. {%% + ( \(.*\) )? # Group capture regexp - e.g (.*) + ( [^)]* )$ # Postfix - e.g } + /xs; + $grp_regexp ||= ''; + my $rplval = $Global::rpl{$rpl}; + while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? ) + # Don't replace after \257 unless \257> + \Q$prefix\E $grp_regexp \Q$postfix\E} + { + # The start remains the same + my $unchanged = $1; + # Dummy entry to start at 1. + my @grp = (1); + # $2 = first ()-group in $grp_regexp + # Put $2 in $grp[1], Put $3 in $grp[2] + # so first ()-group in $grp_regexp is $grp[1]; + for(my $i = 2; defined $grp[$#grp]; $i++) { + push @grp, eval '$'.$i; + } + my $rv = $rplval; + # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2 + # in the code to be executed + $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx; + # prepend with $_pAr_gRp1 = perlquote($1), + my $set_args = ""; + for(my $i = 1;defined $grp[$i]; $i++) { + $set_args .= "\$_pAr_gRp$i = \"" . + ::perl_quote_scalar($grp[$i]) . "\";"; + } + $unchanged . "\257<" . $set_args . $rv . "\257>" + }gxes) { + } + # Do the same for the positional replacement strings + $posrpl = $rpl; + if($posrpl =~ s/^\{//) { + # Only do this if the shorthand start with { + $prefix=~s/^\{//; + # Don't replace after \257 unless \257> + while(s{( (?: ^|\257> ) (?: [^\257]*|[\257][^<>] )*? ) + \{(-?\d+) \s* \Q$prefix\E $grp_regexp \Q$postfix\E} + { + # The start remains the same + my $unchanged = $1; + my $position = $2; + # Dummy entry to start at 1. + my @grp = (1); + # $3 = first ()-group in $grp_regexp + # Put $3 in $grp[1], Put $4 in $grp[2] + # so first ()-group in $grp_regexp is $grp[1]; + for(my $i = 3; defined $grp[$#grp]; $i++) { + push @grp, eval '$'.$i; + } + my $rv = $rplval; + # replace $$1 with $_pAr_gRp1, $$2 with $_pAr_gRp2 + # in the code to be executed + $rv =~ s/\$\$ (\d+)/\$_pAr_gRp$1/gx; + # prepend with $_pAr_gRp1 = perlquote($1), + my $set_args = ""; + for(my $i = 1;defined $grp[$i]; $i++) { + $set_args .= "\$_pAr_gRp$i = \"" . + ::perl_quote_scalar($grp[$i]) . "\";"; + } + $unchanged . "\257<" . $position . $set_args . $rv . "\257>" + }gxes) { + } + } + } + } + # Add {} if no replacement strings in @command + ($replacecount_ref, $len_ref, @command) = + replacement_counts_and_lengths($transfer_files, $return_files, + $template_names, $template_contents, + @command); + if("@command" =~ /^[^ \t\n=]*\257</) { + # Replacement string is (part of) the command (and not just + # argument or variable definition V1={}) + # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2} + # Do no quote (Otherwise it will fail if the input contains spaces) + $Global::quote_replace = 0; + } + + if($opt::sqlmaster and $Global::sql->append()) { + $seq = $Global::sql->max_seq() + 1; + } + + return bless { + ('unget' => \@unget, + 'command' => \@command, + 'replacecount' => $replacecount_ref, + 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), + 'context_replace' => $context_replace, + 'len' => $len_ref, + 'max_number_of_args' => $max_number_of_args, + 'size' => undef, + 'transfer_files' => $transfer_files, + 'return_files' => $return_files, + 'template_names' => $template_names, + 'template_contents' => $template_contents, + 'seq' => $seq, + ) + }, ref($class) || $class; +} + +sub merge_rpl_parts($) { + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + # Input: + # @in = the @command as given by the user + # Uses: + # $Global::parensleft + # $Global::parensright + # Returns: + # @command with parts merged to keep {= and =} as one + my @in = @_; + my @out; + my $l = quotemeta($Global::parensleft); + my $r = quotemeta($Global::parensright); + + while(@in) { + my $s = shift @in; + $_ = $s; + # Remove matching (right most) parens + while(s/(.*)$l.*?$r/$1/os) {} + if(/$l/o) { + # Missing right parens + while(@in) { + $s .= " ".shift @in; + $_ = $s; + while(s/(.*)$l.*?$r/$1/os) {} + if(not /$l/o) { + last; + } + } + } + push @out, $s; + } + return @out; +} + +sub replacement_counts_and_lengths($$@) { + # Count the number of different replacement strings. + # Find the lengths of context for context groups and non-context + # groups. + # If no {} found in @command: add it to @command + # + # Input: + # \@transfer_files = array of filenames to transfer + # \@return_files = array of filenames to return + # \@template_names = array of names to copy to + # \@template_contents = array of contents to write + # @command = command template + # Output: + # \%replacecount, \%len, @command + my $transfer_files = shift; + my $return_files = shift; + my $template_names = shift; + my $template_contents = shift; + my @command = @_; + my (%replacecount,%len); + my $sum = 0; + while($sum == 0) { + # Count how many times each replacement string is used + my @cmd = @command; + my $contextlen = 0; + my $noncontextlen = 0; + my $contextgroups = 0; + for my $c (@cmd) { + while($c =~ s/ \257<( (?: [^\257]*|[\257][^<>] )*?)\257> /\000/xs) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "s/a/b/" => 2 } + $replacecount{$1}++; + $sum++; + } + # Measure the length of the context around the {= perl expr =} + # Use that {=...=} has been replaced with \000 above + # So there is no need to deal with \257< + while($c =~ s/ (\S*\000\S*) //xs) { + my $w = $1; + $w =~ tr/\000//d; # Remove all \000's + $contextlen += length($w); + $contextgroups++; + } + # All {= perl expr =} have been removed: The rest is non-context + $noncontextlen += length $c; + } + for(@$transfer_files, @$return_files, + @$template_names, @$template_contents, + @opt::filter, + $opt::tagstring, $opt::workdir, $opt::results, $opt::retries) { + # Options that can contain replacement strings + defined($_) or next; + my $t = $_; + while($t =~ s/ \257<( (?: [^\257]*|[\257][^<>] )* )\257> //xs) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + # But for tagstring we just need to mark it as seen + $replacecount{$1} ||= 1; + } + } + if($opt::bar) { + # If the command does not contain {} force it to be computed + # as it is being used by --bar + $replacecount{""} ||= 1; + } + + $len{'context'} = 0+$contextlen; + $len{'noncontext'} = $noncontextlen; + $len{'contextgroups'} = $contextgroups; + $len{'noncontextgroups'} = @cmd-$contextgroups; + ::debug("length", "@command Context: ", $len{'context'}, + " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, + " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); + if($sum == 0) { + if(not @command) { + # Default command = {} + @command = ("\257<\257>"); + } elsif(($opt::pipe or $opt::pipepart) + and not $opt::fifo and not $opt::cat) { + # With --pipe / --pipe-part you can have no replacement + last; + } else { + # Append {} to the command if there are no {...}'s and no {=...=} + push @command, ("\257<\257>"); + } + } + } + return(\%replacecount,\%len,@command); +} + +sub get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + my $cmd_line = shift @{$self->{'unget'}}; + return ($cmd_line); + } else { + if($opt::sqlworker) { + # Get the sequence number from the SQL table + $self->set_seq($SQL::next_seq); + # Get the command from the SQL table + $self->{'command'} = $SQL::command_ref; + my @command; + # Recompute replace counts based on the read command + ($self->{'replacecount'}, + $self->{'len'}, @command) = + replacement_counts_and_lengths($self->{'transfer_files'}, + $self->{'return_files'}, + $self->{'template_name'}, + $self->{'template_contents'}, + @$SQL::command_ref); + if("@command" =~ /^[^ \t\n=]*\257</) { + # Replacement string is (part of) the command (and not just + # argument or variable definition V1={}) + # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2} + # Do no quote (Otherwise it will fail if the input contains spaces) + $Global::quote_replace = 0; + } + } + + my $cmd_line = CommandLine->new($self->seq(), + $self->{'command'}, + $self->{'arg_queue'}, + $self->{'context_replace'}, + $self->{'max_number_of_args'}, + $self->{'transfer_files'}, + $self->{'return_files'}, + $self->{'template_names'}, + $self->{'template_contents'}, + $self->{'replacecount'}, + $self->{'len'}, + ); + $cmd_line->populate(); + ::debug("run","cmd_line->number_of_args ", + $cmd_line->number_of_args(), "\n"); + if(not $Global::no_more_input and ($opt::pipe or $opt::pipepart)) { + if($cmd_line->replaced() eq "") { + # Empty command - pipe requires a command + ::error("--pipe/--pipepart must have a command to pipe into ". + "(e.g. 'cat')."); + ::wait_and_exit(255); + } + } elsif($cmd_line->number_of_args() == 0) { + # We did not get more args - maybe at EOF string? + return undef; + } + $self->set_seq($self->seq()+1); + return $cmd_line; + } +} + +sub unget($) { + my $self = shift; + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_queue'}->empty(); + ::debug("run", "CommandLineQueue->empty $empty"); + return $empty; +} + +sub seq($) { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq($$) { + my $self = shift; + $self->{'seq'} = shift; +} + +sub quote_args($) { + my $self = shift; + # If there is not command emulate |bash + return $self->{'command'}; +} + + +package Limits::Command; + +# Maximal command line length (for -m and -X) +sub max_length($) { + # Find the max_length of a command line and cache it + # Returns: + # number of chars on the longest command line allowed + if(not $Limits::Command::line_max_len) { + # Disk cache of max command line length + my $len_cache = $Global::cache_dir . "/tmp/sshlogin/" . ::hostname() . + "/linelen"; + my $cached_limit; + local $/ = undef; + if(open(my $fh, "<", $len_cache)) { + $cached_limit = <$fh>; + $cached_limit || ::die_bug("Cannot read $len_cache"); + close $fh; + } + if(not $cached_limit) { + $cached_limit = real_max_length(); + # If $HOME is write protected: Do not fail + my $dir = ::dirname($len_cache); + -d $dir or eval { File::Path::mkpath($dir); }; + open(my $fh, ">", $len_cache.$$); + print $fh $cached_limit; + close $fh; + rename $len_cache.$$, $len_cache || ::die_bug("rename cache file"); + } + $Limits::Command::line_max_len = tmux_length($cached_limit); + } + return int($Limits::Command::line_max_len); +} + +sub real_max_length() { + # Find the max_length of a command line + # Returns: + # The maximal command line length with 1 byte arguments + # return find_max(" c"); + return find_max("c"); +} + +sub find_max($) { + my $string = shift; + # This is slow on Cygwin, so give Cygwin users a warning + if($^O eq "cygwin" or $^O eq "msys") { + ::warning("Finding the maximal command line length. ". + "This may take up to 1 minute.") + } + # Use an upper bound of 100 MB if the shell allows for infinite + # long lengths + my $upper = 100_000_000; + my $lower; + # 1000 is supported everywhere, so the search can start anywhere 1..999 + # 324 makes the search much faster on Cygwin, so let us use that + my $len = 324; + do { + if($len > $upper) { return $len }; + $lower = $len; + $len *= 16; + ::debug("init", "Maxlen: $lower<$len<$upper(".($upper-$lower)."): "); + } while (is_acceptable_command_line_length($len,$string)); + # Then search for the actual max length between + # last successful length ($len/16) and upper bound + return binary_find_max(int($len/16),$len,$string); +} + + +# Prototype forwarding +sub binary_find_max($$$); +sub binary_find_max($$$) { + # Given a lower and upper bound find the max (length or args) of a + # command line + # Returns: + # number of chars on the longest command line allowed + my ($lower, $upper, $string) = (@_); + if($lower == $upper + or $lower == $upper-1 + or $lower/$upper > 0.99) { + # $lower is +- 1 or within 1%: Don't search more + return $lower; + } + # Unevenly split binary search which is faster for Microsoft Windows. + # Guessing too high is cheap. Guessing too low is expensive. + my $split = ($^O eq "cygwin" or $^O eq "msys") ? 0.93 : 0.5; + my $middle = int (($upper-$lower)*$split + $lower); + ::debug("init", "Maxlen: $lower<$middle<$upper(".($upper-$lower)."): "); + if (is_acceptable_command_line_length($middle,$string)) { + return binary_find_max($middle,$upper,$string); + } else { + return binary_find_max($lower,$middle,$string); + } +} + +{ + my $prg; + + sub is_acceptable_command_line_length($$) { + # Test if a command line of this length can run + # in the current environment + # If the string is " x" it tests how many args are allowed + # Returns: + # 0 if the command line length is too long + # 1 otherwise + my $len = shift; + my $string = shift; + if($Global::parallel_env) { + $len += length $Global::parallel_env; + } + # Force using non-built-in command + $prg ||= ::which("echo"); + ::qqx("$prg ".${string}x(($len-1-length $prg)/length $string)); + ::debug("init", "$len=$?\n"); + return not $?; + } +} + +sub tmux_length($) { + # If $opt::tmux set, find the limit for tmux + # tmux 1.8 has a 2kB limit + # tmux 1.9 has a 16kB limit + # tmux 2.0 has a 16kB limit + # tmux 2.1 has a 16kB limit + # tmux 2.2 has a 16kB limit + # Input: + # $len = maximal command line length + # Returns: + # $tmux_len = maximal length runable in tmux + local $/ = "\n"; + my $len = shift; + if($opt::tmux) { + $ENV{'PARALLEL_TMUX'} ||= "tmux"; + if(not ::which($ENV{'PARALLEL_TMUX'})) { + ::error($ENV{'PARALLEL_TMUX'}." not found in \$PATH."); + ::wait_and_exit(255); + } + my @out; + for my $l (1, 2020, 16320, 30000, $len) { + my $tmpfile = ::tmpname("tms"); + my $tmuxcmd = $ENV{'PARALLEL_TMUX'}. + " -S $tmpfile new-session -d -n echo $l". + ("t"x$l). " && echo $l; rm -f $tmpfile"; + push @out, ::qqx($tmuxcmd); + ::rm($tmpfile); + } + ::debug("tmux","tmux-out ",@out); + chomp @out; + # The arguments is given 3 times on the command line + # and the tmux wrapping is around 30 chars + # (29 for tmux1.9, 33 for tmux1.8) + my $tmux_len = ::max(@out); + $len = ::min($len,int($tmux_len/4-33)); + ::debug("tmux","tmux-length ",$len); + } + return $len; +} + + +package RecordQueue; + +sub new($) { + my $class = shift; + my $fhs = shift; + my $colsep = shift; + my @unget = (); + my $arg_sub_queue; + if($opt::sqlworker) { + # Open SQL table + $arg_sub_queue = SQLRecordQueue->new(); + } elsif(defined $colsep) { + # Open one file with colsep or CSV + $arg_sub_queue = RecordColQueue->new($fhs); + } else { + # Open one or more files if multiple -a + $arg_sub_queue = MultifileQueue->new($fhs); + } + return bless { + 'unget' => \@unget, + 'arg_number' => 0, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + $self->{'arg_number'}++; + # Flush cached computed replacements in Arg-objects + # To fix: parallel --bar echo {%} ::: a b c ::: d e f + my $ret = shift @{$self->{'unget'}}; + if($ret) { + map { $_->flush_cache() } @$ret; + } + return $ret; + } + my $ret = $self->{'arg_sub_queue'}->get(); + if($ret) { + if(grep { index($_->orig(),"\0") > 0 } @$ret) { + # Allow for \0 in position 0 because GNU Parallel uses "\0noarg" + # to mean no-string + ::warning("A NUL character in the input was replaced with \\0.", + "NUL cannot be passed through in the argument list.", + "Did you mean to use the --null option?"); + for(grep { index($_->orig(),"\0") > 0 } @$ret) { + # Replace \0 with \\0 + my $a = $_->orig(); + $a =~ s/\0/\\0/g; + $_->set_orig($a); + } + } + if(defined $Global::max_number_of_args + and $Global::max_number_of_args == 0) { + ::debug("run", "Read 1 but return 0 args\n"); + # \0noarg => nothing (not the empty string) + map { $_->set_orig("\0noarg"); } @$ret; + } + # Flush cached computed replacements in Arg-objects + # To fix: parallel --bar echo {%} ::: a b c ::: d e f + map { $_->flush_cache() } @$ret; + } + return $ret; +} + +sub unget($) { + my $self = shift; + ::debug("run", "RecordQueue-unget\n"); + $self->{'arg_number'} -= @_; + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } + $self->{'arg_sub_queue'}->flush_cache(); +} + +sub arg_number($) { + my $self = shift; + return $self->{'arg_number'}; +} + + +package RecordColQueue; + +sub new($) { + my $class = shift; + my $fhs = shift; + my @unget = (); + my $arg_sub_queue = MultifileQueue->new($fhs); + return bless { + 'unget' => \@unget, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + if($self->{'arg_sub_queue'}->empty()) { + return undef; + } + my $in_record = $self->{'arg_sub_queue'}->get(); + if(defined $in_record) { + my @out_record = (); + for my $arg (@$in_record) { + ::debug("run", "RecordColQueue::arg $arg\n"); + my $line = $arg->orig(); + ::debug("run", "line='$line'\n"); + if($line ne "") { + if($opt::csv) { + # Parse CSV and put it into a record + chomp $line; + if(not $Global::csv->parse($line)) { + die "CSV has unexpected format: ^$line^"; + } + for($Global::csv->fields()) { + push @out_record, Arg->new($_); + } + } else { + # Split --colsep into record + for my $s (split /$opt::colsep/o, $line, -1) { + push @out_record, Arg->new($s); + } + } + } else { + push @out_record, Arg->new(""); + } + } + return \@out_record; + } else { + return undef; + } +} + +sub unget($) { + my $self = shift; + ::debug("run", "RecordColQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && + $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordColQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $arg (@{$self->{'unget'}}) { + $arg->flush_cache(); + } + $self->{'arg_sub_queue'}->flush_cache(); +} + + +package SQLRecordQueue; + +sub new($) { + my $class = shift; + my @unget = (); + return bless { + 'unget' => \@unget, + }, ref($class) || $class; +} + +sub get($) { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + return $Global::sql->get_record(); +} + +sub unget($) { + my $self = shift; + ::debug("run", "SQLRecordQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + if(@{$self->{'unget'}}) { return 0; } + my $get = $self->get(); + if(defined $get) { + $self->unget($get); + } + my $empty = not $get; + ::debug("run", "SQLRecordQueue->empty $empty"); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } +} + + +package MultifileQueue; + +@Global::unget_argv=(); + +sub new($$) { + my $class = shift; + my $fhs = shift; + for my $fh (@$fhs) { + if(-t $fh and -t ($Global::status_fd || *STDERR)) { + ::warning( + "Input is read from the terminal. You are either an expert", + "(in which case: YOU ARE AWESOME!) or maybe you forgot", + "::: or :::: or -a or to pipe data into parallel. If so", + "consider going through the tutorial: man parallel_tutorial", + "Press CTRL-D to exit."); + } + } + return bless { + 'unget' => \@Global::unget_argv, + 'fhs' => $fhs, + 'arg_matrix' => undef, + }, ref($class) || $class; +} + +sub get($) { + my $self = shift; + if($opt::link) { + return $self->link_get(); + } else { + return $self->nest_get(); + } +} + +sub unget($) { + my $self = shift; + ::debug("run", "MultifileQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty($) { + my $self = shift; + my $empty = (not @Global::unget_argv) && + not @{$self->{'unget'}}; + for my $fh (@{$self->{'fhs'}}) { + $empty &&= eof($fh); + } + ::debug("run", "MultifileQueue->empty $empty "); + return $empty; +} + +sub flush_cache($) { + my $self = shift; + for my $record (@{$self->{'unget'}}, @{$self->{'arg_matrix'}}) { + for my $arg (@$record) { + $arg->flush_cache(); + } + } +} + +sub link_get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend; + my $empty = 1; + for my $i (0..$#{$self->{'fhs'}}) { + my $fh = $self->{'fhs'}[$i]; + my $arg = read_arg_from_fh($fh); + if(defined $arg) { + # Record $arg for recycling at end of file + push @{$self->{'arg_matrix'}[$i]}, $arg; + push @record, $arg; + $empty = 0; + } else { + ::debug("run", "EOA "); + # End of file: Recycle arguments + push @{$self->{'arg_matrix'}[$i]}, shift @{$self->{'arg_matrix'}[$i]}; + # return last @{$args->{'args'}{$fh}}; + push @record, @{$self->{'arg_matrix'}[$i]}[-1]; + } + } + if($empty) { + return undef; + } else { + return \@record; + } +} + +sub nest_get($) { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend; + my $empty = 1; + my $no_of_inputsources = $#{$self->{'fhs'}} + 1; + if(not $self->{'arg_matrix'}) { + # Initialize @arg_matrix with one arg from each file + # read one line from each file + my @first_arg_set; + my $all_empty = 1; + for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + if(defined $arg) { + $all_empty = 0; + } + $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); + push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; + } + if($all_empty) { + # All filehandles were at eof or eof-string + return undef; + } + return [@first_arg_set]; + } + + # Treat the case with one input source special. For multiple + # input sources we need to remember all previously read values to + # generate all combinations. But for one input source we can + # forget the value after first use. + if($no_of_inputsources == 1) { + my $arg = read_arg_from_fh($self->{'fhs'}[0]); + if(defined($arg)) { + return [$arg]; + } + return undef; + } + for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { + if(eof($self->{'fhs'}[$fhno])) { + next; + } else { + # read one + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + defined($arg) || next; # If we just read an EOF string: Treat this as EOF + my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; + $self->{'arg_matrix'}[$fhno][$len] = $arg; + # make all new combinations + my @combarg = (); + for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { + push(@combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}], + # Is input source --link'ed to the next? + $opt::linkinputsource[$fhn+1]); + } + # Find only combinations with this new entry + $combarg[2*$fhno] = [$len,$len]; + # map combinations + # [ 1, 3, 7 ], [ 2, 4, 1 ] + # => + # [ m[0][1], m[1][3], m[2][7] ], [ m[0][2], m[1][4], m[2][1] ] + my @mapped; + for my $c (expand_combinations(@combarg)) { + my @a; + for my $n (0 .. $no_of_inputsources - 1 ) { + push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; + } + push @mapped, \@a; + } + # append the mapped to the ungotten arguments + push @{$self->{'unget'}}, @mapped; + # get the first + if(@mapped) { + return shift @{$self->{'unget'}}; + } + } + } + # all are eof or at EOF string; return from the unget queue + return shift @{$self->{'unget'}}; +} + +{ + my $cr_count = 0; + my $nl_count = 0; + my $dos_crnl_determined; + sub read_arg_from_fh($) { + # Read one Arg from filehandle + # Returns: + # Arg-object with one read line + # undef if end of file + my $fh = shift; + my $prepend; + my $arg; + my $half_record = 0; + do {{ + # This makes 10% faster + if(not defined ($arg = <$fh>)) { + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(not $dos_crnl_determined and not defined $opt::d) { + # Warn if input has CR-NL and -d is not set + if($arg =~ /\r$/) { + $cr_count++; + } else { + $nl_count++; + } + if($cr_count == 3 or $nl_count == 3) { + $dos_crnl_determined = 1; + if($nl_count == 0 and $cr_count == 3) { + ::warning('The first three values end in CR-NL. '. + 'Consider using -d "\r\n"'); + } + } + } + if($opt::csv) { + # We need to read a full CSV line. + if(($arg =~ y/"/"/) % 2 ) { + # The number of " on the line is uneven: + # If we were in a half_record => we have a full record now + # If we were outside a half_record => + # we are in a half record now + $half_record = not $half_record; + } + if($half_record) { + # CSV half-record with quoting: + # col1,"col2 2""x3"" board newline <-this one + # cont",col3 + $prepend .= $arg; + redo; + } else { + # Now we have a full CSV record + } + } + # Remove delimiter + chomp $arg; + if($Global::end_of_file_string and + $arg eq $Global::end_of_file_string) { + # Ignore the rest of input file + close $fh; + ::debug("run", "EOF-string ($arg) met\n"); + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(defined $prepend) { + $arg = $prepend.$arg; # For line continuation + undef $prepend; + } + if($Global::ignore_empty) { + if($arg =~ /^\s*$/) { + redo; # Try the next line + } + } + if($Global::max_lines) { + if($arg =~ /\s$/) { + # Trailing space => continued on next line + $prepend = $arg; + redo; + } + } + }} while (1 == 0); # Dummy loop {{}} for redo + if(defined $arg) { + return Arg->new($arg); + } else { + ::die_bug("multiread arg undefined"); + } + } +} + +# Prototype forwarding +sub expand_combinations(@); +sub expand_combinations(@) { + # Input: + # ([xmin,xmax], [ymin,ymax], ...) + # Returns: ([x,y,...],[x,y,...]) + # where xmin <= x <= xmax and ymin <= y <= ymax + my $minmax_ref = shift; + my $link = shift; # This is linked to the next input source + my $xmin = $$minmax_ref[0]; + my $xmax = $$minmax_ref[1]; + my @p; + if(@_) { + my @rest = expand_combinations(@_); + if($link) { + # Linked to next col with --link/:::+/::::+ + # TODO BUG does not wrap values if not same number of vals + push(@p, map { [$$_[0], @$_] } + grep { $xmin <= $$_[0] and $$_[0] <= $xmax } @rest); + } else { + # If there are more columns: Compute those recursively + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, map { [$x, @$_] } @rest; + } + } + } else { + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, [$x]; + } + } + return @p; +} + + +package Arg; + +sub new($) { + my $class = shift; + my $orig = shift; + my @hostgroups; + if($opt::hostgroups) { + if($orig =~ s:@(.+)::) { + # We found hostgroups on the arg + @hostgroups = split(/\+/, $1); + if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { + # This hostgroup is not defined using -S + # Add it + ::warning("Adding hostgroups: @hostgroups"); + # Add sshlogin + for(grep { not defined $Global::hostgroups{$_} } @hostgroups) { + my $sshlogin = SSHLogin->new($_); + my $sshlogin_string = $sshlogin->string(); + $Global::host{$sshlogin_string} = $sshlogin; + $Global::hostgroups{$sshlogin_string} = 1; + } + } + } else { + # No hostgroup on the arg => any hostgroup + @hostgroups = (keys %Global::hostgroups); + } + } + return bless { + 'orig' => $orig, + 'hostgroups' => \@hostgroups, + }, ref($class) || $class; +} + +sub Q($) { + # Q alias for ::shell_quote_scalar + my $ret = ::Q($_[0]); + no warnings 'redefine'; + *Q = \&::Q; + return $ret; +} + +sub pQ($) { + # pQ alias for ::perl_quote_scalar + my $ret = ::pQ($_[0]); + no warnings 'redefine'; + *pQ = \&::pQ; + return $ret; +} + +sub hash($) { + $Global::use{"DBI"} ||= eval "use B; 1;"; + B::hash(@_); +} + +sub total_jobs() { + return $Global::JobQueue->total_jobs(); +} + +{ + my %perleval; + my $job; + sub skip() { + # shorthand for $job->skip(); + $job->skip(); + } + sub slot() { + # shorthand for $job->slot(); + $job->slot(); + } + sub seq() { + # shorthand for $job->seq(); + $job->seq(); + } + sub uq() { + # Do not quote this arg + $Global::unquote_arg = 1; + } + sub yyyy_mm_dd_hh_mm_ss() { + # ISO8601 2038-01-19T03:14:08 + ::strftime("%Y-%m-%dT%H:%M:%S", localtime(time())); + } + sub yyyy_mm_dd_hh_mm() { + # ISO8601 2038-01-19T03:14 + ::strftime("%Y-%m-%dT%H:%M", localtime(time())); + } + sub yyyy_mm_dd() { + # ISO8601 2038-01-19 + ::strftime("%Y-%m-%d", localtime(time())); + } + sub hh_mm_ss() { + # ISO8601 03:14:08 + ::strftime("%H:%M:%S", localtime(time())); + } + sub hh_mm() { + # ISO8601 03:14 + ::strftime("%H:%M", localtime(time())); + } + sub yyyymmddhhmmss() { + # ISO8601 20380119 + ISO8601 031408 + ::strftime("%Y%m%d%H%M%S", localtime(time())); + } + sub yyyymmddhhmm() { + # ISO8601 20380119 + ISO8601 0314 + ::strftime("%Y%m%d%H%M", localtime(time())); + } + sub yyyymmdd() { + # ISO8601 20380119 + ::strftime("%Y%m%d", localtime(time())); + } + sub hhmmss() { + # ISO8601 031408 + ::strftime("%H%M%S", localtime(time())); + } + sub hhmm() { + # ISO8601 0314 + ::strftime("%H%M", localtime(time())); + } + + sub replace($$$$) { + # Calculates the corresponding value for a given perl expression + # Returns: + # The calculated string (quoted if asked for) + my $self = shift; + my $perlexpr = shift; # E.g. $_=$_ or s/.gz// + my $quote = shift; # should the string be quoted? + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + $job = shift; + # Positional replace treated as normal replace + $perlexpr =~ s/^(-?\d+)? *//; + if(not $Global::cache_replacement_eval + or + not $self->{'cache'}{$perlexpr}) { + # Only compute the value once + # Use $_ as the variable to change + local $_; + if($Global::trim eq "n") { + $_ = $self->{'orig'}; + } else { + # Trim the input + $_ = trim_of($self->{'orig'}); + } + ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); + if(not $perleval{$perlexpr}) { + # Make an anonymous function of the $perlexpr + # And more importantly: Compile it only once + if($perleval{$perlexpr} = + eval('sub { no strict; no warnings; my $job = shift; '. + $perlexpr.' }')) { + # All is good + } else { + # The eval failed. Maybe $perlexpr is invalid perl? + ::error("Cannot use $perlexpr: $@"); + ::wait_and_exit(255); + } + } + # Execute the function + $perleval{$perlexpr}->($job); + $self->{'cache'}{$perlexpr} = $_; + if($Global::unquote_arg) { + # uq() was called in perlexpr + $self->{'cache'}{'unquote'}{$perlexpr} = 1; + # Reset for next perlexpr + $Global::unquote_arg = 0; + } + } + # Return the value quoted if needed + if($self->{'cache'}{'unquote'}{$perlexpr}) { + return($self->{'cache'}{$perlexpr}); + } else { + return($quote ? Q($self->{'cache'}{$perlexpr}) + : $self->{'cache'}{$perlexpr}); + } + } +} + +sub flush_cache($) { + # Flush cache of computed values + my $self = shift; + $self->{'cache'} = undef; +} + +sub orig($) { + my $self = shift; + return $self->{'orig'}; +} + +sub set_orig($$) { + my $self = shift; + $self->{'orig'} = shift; +} + +sub trim_of($) { + # Removes white space as specifed by --trim: + # n = nothing + # l = start + # r = end + # lr|rl = both + # Returns: + # string with white space removed as needed + my @strings = map { defined $_ ? $_ : "" } (@_); + my $arg; + if($Global::trim eq "n") { + # skip + } elsif($Global::trim eq "l") { + for my $arg (@strings) { $arg =~ s/^\s+//; } + } elsif($Global::trim eq "r") { + for my $arg (@strings) { $arg =~ s/\s+$//; } + } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { + for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } + } else { + ::error("--trim must be one of: r l rl lr."); + ::wait_and_exit(255); + } + return wantarray ? @strings : "@strings"; +} + + +package TimeoutQueue; + +sub new($) { + my $class = shift; + my $delta_time = shift; + my ($pct); + if($delta_time =~ /(\d+(\.\d+)?)%/) { + # Timeout in percent + $pct = $1/100; + $delta_time = 1_000_000; + } + $delta_time = ::multiply_time_units($delta_time); + + return bless { + 'queue' => [], + 'delta_time' => $delta_time, + 'pct' => $pct, + 'remedian_idx' => 0, + 'remedian_arr' => [], + 'remedian' => undef, + }, ref($class) || $class; +} + +sub delta_time($) { + my $self = shift; + return $self->{'delta_time'}; +} + +sub set_delta_time($$) { + my $self = shift; + $self->{'delta_time'} = shift; +} + +sub remedian($) { + my $self = shift; + return $self->{'remedian'}; +} + +sub set_remedian($$) { + # Set median of the last 999^3 (=997002999) values using Remedian + # + # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A + # robust averaging method for large data sets." Journal of the + # American Statistical Association 85.409 (1990): 97-104. + my $self = shift; + my $val = shift; + my $i = $self->{'remedian_idx'}++; + my $rref = $self->{'remedian_arr'}; + $rref->[0][$i%999] = $val; + $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; + $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; + $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; +} + +sub update_median_runtime($) { + # Update delta_time based on runtime of finished job if timeout is + # a percentage + my $self = shift; + my $runtime = shift; + if($self->{'pct'}) { + $self->set_remedian($runtime); + $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); + ::debug("run", "Timeout: $self->{'delta_time'}s "); + } +} + +sub process_timeouts($) { + # Check if there was a timeout + my $self = shift; + # $self->{'queue'} is sorted by start time + while (@{$self->{'queue'}}) { + my $job = $self->{'queue'}[0]; + if($job->endtime()) { + # Job already finished. No need to timeout the job + # This could be because of --keep-order + shift @{$self->{'queue'}}; + } elsif($job->is_timedout($self->{'delta_time'})) { + # Need to shift off queue before kill + # because kill calls usleep that calls process_timeouts + shift @{$self->{'queue'}}; + ::warning("This job was killed because it timed out:", + $job->replaced()); + $job->kill(); + } else { + # Because they are sorted by start time the rest are later + last; + } + } +} + +sub insert($) { + my $self = shift; + my $in = shift; + push @{$self->{'queue'}}, $in; +} + + +package SQL; + +sub new($) { + my $class = shift; + my $dburl = shift; + $Global::use{"DBI"} ||= eval "use DBI; 1;"; + # +DBURL = append to this DBURL + my $append = $dburl=~s/^\+//; + my %options = parse_dburl(get_alias($dburl)); + my %driveralias = ("sqlite" => "SQLite", + "sqlite3" => "SQLite", + "pg" => "Pg", + "postgres" => "Pg", + "postgresql" => "Pg", + "csv" => "CSV", + "oracle" => "Oracle", + "ora" => "Oracle"); + my $driver = $driveralias{$options{'databasedriver'}} || + $options{'databasedriver'}; + my $database = $options{'database'}; + my $host = $options{'host'} ? ";host=".$options{'host'} : ""; + my $port = $options{'port'} ? ";port=".$options{'port'} : ""; + my $dsn = "DBI:$driver:dbname=$database$host$port"; + my $userid = $options{'user'}; + my $password = $options{'password'};; + if(not grep /$driver/, DBI->available_drivers) { + ::error("$driver not supported. Are you missing a perl DBD::$driver module?"); + ::wait_and_exit(255); + } + my $dbh; + if($driver eq "CSV") { + # CSV does not use normal dsn + if(-d $database) { + $dbh = DBI->connect("dbi:CSV:", "", "", { f_dir => "$database", }) + or die $DBI::errstr; + } else { + ::error("$database is not a directory."); + ::wait_and_exit(255); + } + } else { + $dbh = DBI->connect($dsn, $userid, $password, + { RaiseError => 1, AutoInactiveDestroy => 1 }) + or die $DBI::errstr; + } + $dbh->{'PrintWarn'} = $Global::debug || 0; + $dbh->{'PrintError'} = $Global::debug || 0; + $dbh->{'RaiseError'} = 1; + $dbh->{'ShowErrorStatement'} = 1; + $dbh->{'HandleError'} = sub {}; + if(not defined $options{'table'}) { + ::error("The DBURL ($dburl) must contain a table."); + ::wait_and_exit(255); + } + + return bless { + 'dbh' => $dbh, + 'driver' => $driver, + 'max_number_of_args' => undef, + 'table' => $options{'table'}, + 'append' => $append, + }, ref($class) || $class; +} + +# Prototype forwarding +sub get_alias($); +sub get_alias($) { + my $alias = shift; + $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql: + if ($alias !~ /^:/) { + return $alias; + } + + # Find the alias + my $path; + if (-l $0) { + ($path) = readlink($0) =~ m|^(.*)/|; + } else { + ($path) = $0 =~ m|^(.*)/|; + } + + my @deprecated = ("$ENV{HOME}/.dburl.aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for (@deprecated) { + if(-r $_) { + ::warning("$_ is deprecated. ". + "Use .sql/aliases instead (read man sql)."); + } + } + my @urlalias=(); + check_permissions("$ENV{HOME}/.sql/aliases"); + check_permissions("$ENV{HOME}/.dburl.aliases"); + my @search = ("$ENV{HOME}/.sql/aliases", + "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for my $alias_file (@search) { + # local $/ needed if -0 set + local $/ = "\n"; + if(-r $alias_file) { + open(my $in, "<", $alias_file) || die; + push @urlalias, <$in>; + close $in; + } + } + my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/; + # If we saw this before: we have an alias loop + if(grep {$_ eq $alias_part } @Private::seen_aliases) { + ::error("$alias_part is a cyclic alias."); + exit -1; + } else { + push @Private::seen_aliases, $alias_part; + } + + my $dburl; + for (@urlalias) { + /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; } + } + + if($dburl) { + return get_alias($dburl.$rest); + } else { + ::error("$alias is not defined in @search"); + exit(-1); + } +} + +sub check_permissions($) { + my $file = shift; + + if(-e $file) { + if(not -o $file) { + my $username = (getpwuid($<))[0]; + ::warning("$file should be owned by $username: ". + "chown $username $file"); + } + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, + $atime,$mtime,$ctime,$blksize,$blocks) = stat($file); + if($mode & 077) { + my $username = (getpwuid($<))[0]; + ::warning("$file should be only be readable by $username: ". + "chmod 600 $file"); + } + } +} + +sub parse_dburl($) { + my $url = shift; + my %options = (); + # sql:mysql://[[user][:password]@][host][:port]/[database[/table][?query]] + + if($url=~m!^(?:sql:)? # You can prefix with 'sql:' + ((?:oracle|ora|mysql|pg|postgres|postgresql)(?:s|ssl|)| + (?:sqlite|sqlite2|sqlite3|csv)):// # Databasedriver ($1) + (?: + ([^:@/][^:@]*|) # Username ($2) + (?: + :([^@]*) # Password ($3) + )? + @)? + ([^:/]*)? # Hostname ($4) + (?: + : + ([^/]*)? # Port ($5) + )? + (?: + / + ([^/?]*)? # Database ($6) + )? + (?: + / + ([^?]*)? # Table ($7) + )? + (?: + \? + (.*)? # Query ($8) + )? + $!ix) { + $options{databasedriver} = ::undef_if_empty(lc(uri_unescape($1))); + $options{user} = ::undef_if_empty(uri_unescape($2)); + $options{password} = ::undef_if_empty(uri_unescape($3)); + $options{host} = ::undef_if_empty(uri_unescape($4)); + $options{port} = ::undef_if_empty(uri_unescape($5)); + $options{database} = ::undef_if_empty(uri_unescape($6)); + $options{table} = ::undef_if_empty(uri_unescape($7)); + $options{query} = ::undef_if_empty(uri_unescape($8)); + ::debug("sql", "dburl $url\n"); + ::debug("sql", "databasedriver ", $options{databasedriver}, + " user ", $options{user}, + " password ", $options{password}, " host ", $options{host}, + " port ", $options{port}, " database ", $options{database}, + " table ", $options{table}, " query ", $options{query}, "\n"); + } else { + ::error("$url is not a valid DBURL"); + exit 255; + } + return %options; +} + +sub uri_unescape($) { + # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm + # to avoid depending on URI::Escape + # This section is (C) Gisle Aas. + # Note from RFC1630: "Sequences which start with a percent sign + # but are not followed by two hexadecimal characters are reserved + # for future extension" + my $str = shift; + if (@_ && wantarray) { + # not executed for the common case of a single argument + my @str = ($str, @_); # need to copy + foreach (@str) { + s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + } + return @str; + } + $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; + $str; +} + +sub run($) { + my $self = shift; + my $stmt = shift; + if($self->{'driver'} eq "CSV") { + $stmt=~ s/;$//; + if($stmt eq "BEGIN" or + $stmt eq "COMMIT") { + return undef; + } + } + my @retval; + my $dbh = $self->{'dbh'}; + ::debug("sql","$opt::sqlmaster$opt::sqlworker run $stmt\n"); + # Execute with the rest of the args - if any + my $rv; + my $sth; + my $lockretry = 0; + while($lockretry < 10) { + $sth = $dbh->prepare($stmt); + if($sth + and + eval { $rv = $sth->execute(@_) }) { + last; + } else { + if($@ =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/ + or + $DBI::errstr =~ /no such table|Table .* doesn.t exist|relation ".*" does not exist/) { + # This is fine: + # It is just a worker that reported back too late - + # another worker had finished the job first + # and the table was then dropped + $rv = $sth = 0; + last; + } + if($DBI::errstr =~ /locked/) { + ::debug("sql", "Lock retry: $lockretry"); + $lockretry++; + ::usleep(rand()*300); + } elsif(not $sth) { + # Try again + $lockretry++; + } else { + ::error($DBI::errstr); + ::wait_and_exit(255); + } + } + } + if($lockretry >= 10) { + ::die_bug("retry > 10: $DBI::errstr"); + } + if($rv < 0 and $DBI::errstr){ + ::error($DBI::errstr); + ::wait_and_exit(255); + } + return $sth; +} + +sub get($) { + my $self = shift; + my $sth = $self->run(@_); + my @retval; + # If $sth = 0 it means the table was dropped by another process + while($sth) { + my @row = $sth->fetchrow_array(); + @row or last; + push @retval, \@row; + } + return \@retval; +} + +sub table($) { + my $self = shift; + return $self->{'table'}; +} + +sub append($) { + my $self = shift; + return $self->{'append'}; +} + +sub update($) { + my $self = shift; + my $stmt = shift; + my $table = $self->table(); + $self->run("UPDATE $table $stmt",@_); +} + +sub output($) { + my $self = shift; + my $commandline = shift; + + $self->update("SET Stdout = ?, Stderr = ? WHERE Seq = ". + $commandline->seq(), + join("",@{$commandline->{'output'}{1}}), + join("",@{$commandline->{'output'}{2}})); +} + +sub max_number_of_args($) { + # Maximal number of args for this table + my $self = shift; + if(not $self->{'max_number_of_args'}) { + # Read the number of args from the SQL table + my $table = $self->table(); + my $v = $self->get("SELECT * FROM $table LIMIT 1;"); + my @reserved_columns = qw(Seq Host Starttime JobRuntime Send + Receive Exitval _Signal Command Stdout Stderr); + if(not $v) { + ::error("$table contains no records"); + } + # Count the number of Vx columns + $self->{'max_number_of_args'} = $#{$v->[0]} - $#reserved_columns; + } + return $self->{'max_number_of_args'}; +} + +sub set_max_number_of_args($$) { + my $self = shift; + $self->{'max_number_of_args'} = shift; +} + +sub create_table($) { + my $self = shift; + if($self->append()) { return; } + my $max_number_of_args = shift; + $self->set_max_number_of_args($max_number_of_args); + my $table = $self->table(); + $self->run(qq(DROP TABLE IF EXISTS $table;)); + # BIGINT and TEXT are not supported in these databases or are too small + my %vartype = ( + "Oracle" => { "BIGINT" => "NUMBER(19,0)", + "TEXT" => "CLOB", }, + "mysql" => { "TEXT" => "BLOB", }, + "CSV" => { "BIGINT" => "INT", + "FLOAT" => "REAL", }, + ); + my $BIGINT = $vartype{$self->{'driver'}}{"BIGINT"} || "BIGINT"; + my $TEXT = $vartype{$self->{'driver'}}{"TEXT"} || "TEXT"; + my $FLOAT = $vartype{$self->{'driver'}}{"FLOAT"} || "FLOAT(44)"; + my $v_def = join "", map { "V$_ $TEXT," } (1..$self->max_number_of_args()); + $self->run(qq{CREATE TABLE $table + (Seq $BIGINT, + Host $TEXT, + Starttime $FLOAT, + JobRuntime $FLOAT, + Send $BIGINT, + Receive $BIGINT, + Exitval $BIGINT, + _Signal $BIGINT, + Command $TEXT,}. + $v_def. + qq{Stdout $TEXT, + Stderr $TEXT);}); +} + +sub insert_records($) { + my $self = shift; + my $seq = shift; + my $command_ref = shift; + my $record_ref = shift; + my $table = $self->table(); + # For SQL encode the command with \257 space as split points + my $command = join("\257 ",@$command_ref); + my @v_cols = map { ", V$_" } (1..$self->max_number_of_args()); + # Two extra value due to $seq, Exitval, Send + my $v_vals = join ",", map { "?" } (1..$self->max_number_of_args()+4); + $self->run("INSERT INTO $table (Seq,Command,Exitval,Send @v_cols) ". + "VALUES ($v_vals);", $seq, $command, -1000, + 0, @$record_ref[1..$#$record_ref]); +} + + +sub get_record($) { + my $self = shift; + my @retval; + my $table = $self->table(); + my @v_cols = map { ", V$_" } (1..$self->max_number_of_args()); + my $rand = "Reserved-".$$.rand(); + my $v; + my $more_pending; + + do { + if($self->{'driver'} eq "CSV") { + # Sub SELECT is not supported in CSV + # So to minimize the race condition below select a job at random + my $r = $self->get("SELECT Seq, Command @v_cols FROM $table ". + "WHERE Exitval = -1000 LIMIT 100;"); + $v = [ sort { rand() > 0.5 } @$r ]; + } else { + # Avoid race condition where multiple workers get the same job + # by setting Stdout to a unique string + # (SELECT * FROM (...) AS dummy) is needed due to sillyness in MySQL + $self->update("SET Stdout = ?,Exitval = ? ". + "WHERE Seq = (". + " SELECT * FROM (". + " SELECT min(Seq) FROM $table WHERE Exitval = -1000". + " ) AS dummy". + ") AND Exitval = -1000;", $rand, -1210); + # If a parallel worker overwrote the unique string this will get nothing + $v = $self->get("SELECT Seq, Command @v_cols FROM $table ". + "WHERE Stdout = ?;", $rand); + } + if($v->[0]) { + my $val_ref = $v->[0]; + # Mark record as taken + my $seq = shift @$val_ref; + # Save the sequence number to use when running the job + $SQL::next_seq = $seq; + $self->update("SET Exitval = ? WHERE Seq = ".$seq, -1220); + # Command is encoded with '\257 space' as splitting char + my @command = split /\257 /, shift @$val_ref; + $SQL::command_ref = \@command; + for (@$val_ref) { + push @retval, Arg->new($_); + } + } else { + # If the record was updated by another job in parallel, + # then we may not be done, so see if there are more jobs pending + $more_pending = + $self->get("SELECT Seq FROM $table WHERE Exitval = ?;", -1210); + } + } while (not $v->[0] and $more_pending->[0]); + + if(@retval) { + return \@retval; + } else { + return undef; + } +} + +sub total_jobs($) { + my $self = shift; + my $table = $self->table(); + my $v = $self->get("SELECT count(*) FROM $table;"); + if($v->[0]) { + return $v->[0]->[0]; + } else { + ::die_bug("SQL::total_jobs"); + } +} + +sub max_seq($) { + my $self = shift; + my $table = $self->table(); + my $v = $self->get("SELECT max(Seq) FROM $table;"); + if($v->[0]) { + return $v->[0]->[0]; + } else { + ::die_bug("SQL::max_seq"); + } +} + +sub finished($) { + # Check if there are any jobs left in the SQL table that do not + # have a "real" exitval + my $self = shift; + if($opt::wait or $Global::start_sqlworker) { + my $table = $self->table(); + my $rv = $self->get("select Seq,Exitval from $table ". + "where Exitval <= -1000 limit 1"); + return not $rv->[0]; + } else { + return 1; + } +} + +package Semaphore; + +# This package provides a counting semaphore +# +# If a process dies without releasing the semaphore the next process +# that needs that entry will clean up dead semaphores +# +# The semaphores are stored in $PARALLEL_HOME/semaphores/id-<name> Each +# file in $PARALLEL_HOME/semaphores/id-<name>/ is the process ID of the +# process holding the entry. If the process dies, the entry can be +# taken by another process. + +sub new($) { + my $class = shift; + my $id = shift; + my $count = shift; + $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex + $id = "id-".$id; # To distinguish it from a process id + my $parallel_locks = $Global::cache_dir . "/semaphores"; + -d $parallel_locks or ::mkdir_or_die($parallel_locks); + my $lockdir = "$parallel_locks/$id"; + my $lockfile = $lockdir.".lock"; + if(-d $parallel_locks and -w $parallel_locks + and -r $parallel_locks and -x $parallel_locks) { + # skip + } else { + ::error("Semaphoredir must be writable: '$parallel_locks'"); + ::wait_and_exit(255); + } + + if($count < 1) { ::die_bug("semaphore-count: $count"); } + return bless { + 'lockfile' => $lockfile, + 'lockfh' => Symbol::gensym(), + 'lockdir' => $lockdir, + 'id' => $id, + 'idfile' => $lockdir."/".$id, + 'pid' => $$, + 'pidfile' => $lockdir."/".$$.'@'.::hostname(), + 'count' => $count + 1 # nlinks returns a link for the 'id-' as well + }, ref($class) || $class; +} + +sub remove_dead_locks($) { + my $self = shift; + my $lockdir = $self->{'lockdir'}; + + for my $d (glob "$lockdir/*") { + $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; + my ($pid, $host) = ($1, $2); + if($host eq ::hostname()) { + if(kill 0, $pid) { + ::debug("sem", "Alive: $pid $d\n"); + } else { + ::debug("sem", "Dead: $d\n"); + ::rm($d); + } + } + } +} + +sub acquire($) { + my $self = shift; + my $sleep = 1; # 1 ms + my $start_time = time; + while(1) { + # Can we get a lock? + $self->atomic_link_if_count_less_than() and last; + $self->remove_dead_locks(); + # Retry slower and slower up to 1 second + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + time - $start_time > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Stealing the semaphore."); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("timeout_write_idfile: $self->{'idfile'}"); + close $fh; + } + link $self->{'idfile'}, $self->{'pidfile'}; + last; + } + if($opt::semaphoretimeout < 0 + and + time - $start_time > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + exit(1); + last; + } + } + } + ::debug("sem", "acquired $self->{'pid'}\n"); +} + +sub release($) { + my $self = shift; + ::rm($self->{'pidfile'}); + if($self->nlinks() == 1) { + # This is the last link, so atomic cleanup + $self->lock(); + if($self->nlinks() == 1) { + ::rm($self->{'idfile'}); + rmdir $self->{'lockdir'}; + } + $self->unlock(); + } + ::debug("run", "released $self->{'pid'}\n"); +} + +sub pid_change($) { + # This should do what release()+acquire() would do without having + # to re-acquire the semaphore + my $self = shift; + + my $old_pidfile = $self->{'pidfile'}; + $self->{'pid'} = $$; + $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname(); + my $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + ::rm($old_pidfile); +} + +sub atomic_link_if_count_less_than($) { + # Link $file1 to $file2 if nlinks to $file1 < $count + my $self = shift; + my $retval = 0; + $self->lock(); + my $nlinks = $self->nlinks(); + ::debug("sem","$nlinks<$self->{'count'} "); + if($nlinks < $self->{'count'}) { + -d $self->{'lockdir'} or ::mkdir_or_die($self->{'lockdir'}); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("write_idfile: $self->{'idfile'}"); + close $fh; + } + $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + } + $self->unlock(); + ::debug("sem", "atomic $retval"); + return $retval; +} + +sub nlinks($) { + my $self = shift; + if(-e $self->{'idfile'}) { + return (stat(_))[3]; + } else { + return 0; + } +} + +sub lock($) { + my $self = shift; + my $sleep = 100; # 100 ms + my $total_sleep = 0; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $locked = 0; + while(not $locked) { + if(tell($self->{'lockfh'}) == -1) { + # File not open + open($self->{'lockfh'}, ">", $self->{'lockfile'}) + or ::debug("run", "Cannot open $self->{'lockfile'}"); + } + if($self->{'lockfh'}) { + # File is open + chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw + if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { + # The file is locked: No need to retry + $locked = 1; + last; + } else { + if ($! =~ m/Function not implemented/) { + ::warning("flock: $!", + "Will wait for a random while."); + ::usleep(rand(5000)); + # File cannot be locked: No need to retry + $locked = 2; + last; + } + } + } + # Locking failed in first round + # Sleep and try again + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + $total_sleep += $sleep; + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + $total_sleep/1000 > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Taking the semaphore."); + $locked = 3; + last; + } + if($opt::semaphoretimeout < 0 + and + $total_sleep/1000 > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + $locked = 4; + last; + } + } else { + if($total_sleep/1000 > 30) { + ::warning("Semaphore stuck for 30 seconds. ". + "Consider using --semaphoretimeout."); + } + } + } + ::debug("run", "locked $self->{'lockfile'}"); +} + +sub unlock($) { + my $self = shift; + ::rm($self->{'lockfile'}); + close $self->{'lockfh'}; + ::debug("run", "unlocked\n"); +} + +# Keep perl -w happy + +$opt::x = $Semaphore::timeout = $Semaphore::wait = +$Job::file_descriptor_warning_printed = $Global::envdef = @Arg::arg = +$Global::max_slot_number = $opt::session; + +package main; + +sub main() { + save_stdin_stdout_stderr(); + save_original_signal_handler(); + parse_options(); + ::debug("init", "Open file descriptors: ", join(" ",keys %Global::fh), "\n"); + my $number_of_args; + if($Global::max_number_of_args) { + $number_of_args = $Global::max_number_of_args; + } elsif ($opt::X or $opt::m or $opt::xargs) { + $number_of_args = undef; + } else { + $number_of_args = 1; + } + + my @command = @ARGV; + my @input_source_fh; + if($opt::pipepart) { + if($opt::tee) { + @input_source_fh = map { open_or_exit($_) } @opt::a; + # Remove the first: It will be the file piped. + shift @input_source_fh; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); + } + } else { + # -a is used for data - not for command line args + @input_source_fh = map { open_or_exit($_) } "/dev/null"; + } + } else { + @input_source_fh = map { open_or_exit($_) } @opt::a; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); + } + } + + if($opt::skip_first_line) { + # Skip the first line for the first file handle + my $fh = $input_source_fh[0]; + <$fh>; + } + + set_input_source_header(\@command,\@input_source_fh); + if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { + # Parallel check all hosts are up. Remove hosts that are down + filter_hosts(); + } + + + if($opt::sqlmaster and $opt::sqlworker) { + # Start a real --sqlworker in the background later + $Global::start_sqlworker = 1; + $opt::sqlworker = undef; + } + + if($opt::nonall or $opt::onall) { + onall(\@input_source_fh,@command); + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); + } + + $Global::JobQueue = JobQueue->new( + \@command, \@input_source_fh, $Global::ContextReplace, + $number_of_args, \@Global::transfer_files, \@Global::ret_files, + \@Global::template_names, \@Global::template_contents + ); + + if($opt::sqlmaster) { + # Create SQL table to hold joblog + output + # Figure out how many arguments are in a job + # (It is affected by --colsep, -N, $number_source_fh) + my $record_queue = $Global::JobQueue->{'commandlinequeue'}{'arg_queue'}; + my $record = $record_queue->get(); + my $no_of_values = $number_of_args * (1+$#{$record}); + $record_queue->unget($record); + $Global::sql->create_table($no_of_values); + if($opt::sqlworker) { + # Start a real --sqlworker in the background later + $Global::start_sqlworker = 1; + $opt::sqlworker = undef; + } + } + + if($opt::pipepart) { + pipepart_setup(); + } elsif($opt::pipe) { + if($opt::tee) { + pipe_tee_setup(); + } elsif($opt::shard or $opt::bin) { + pipe_shard_setup(); + } elsif($opt::groupby) { + pipe_group_by_setup(); + } + } + + if($opt::eta or $opt::bar or $opt::shuf or $Global::halt_pct) { + # Count the number of jobs or shuffle all jobs + # before starting any. + # Must be done after ungetting any --pipepart jobs. + $Global::JobQueue->total_jobs(); + } + # Compute $Global::max_jobs_running + # Must be done after ungetting any --pipepart jobs. + max_jobs_running(); + + init_run_jobs(); + my $sem; + if($Global::semaphore) { + $sem = acquire_semaphore(); + } + $SIG{TERM} = $Global::original_sig{TERM}; + $SIG{HUP} = \&start_no_new_jobs; + + if($opt::tee or $opt::shard or $opt::bin) { + # All jobs must be running in parallel for --tee/--shard/--bin + while(start_more_jobs()) {} + $Global::start_no_new_jobs = 1; + if(not $Global::JobQueue->empty()) { + if($opt::tee) { + ::error("--tee requires --jobs to be higher. Try --jobs 0."); + } elsif($opt::bin) { + ::error("--bin requires --jobs to be higher than the number of", + "arguments. Increase --jobs."); + } elsif($opt::shard) { + ::error("--shard requires --jobs to be higher than the number of", + "arguments. Increase --jobs."); + } else { + ::die_bug("--bin/--shard/--tee should not get here"); + } + ::wait_and_exit(255); + } + } elsif($opt::pipe and not $opt::pipepart and not $opt::semaphore) { + # Fill all jobslots + while(start_more_jobs()) {} + spreadstdin(); + } else { + # Reap the finished jobs and start more + while(reapers() + start_more_jobs()) {} + } + ::debug("init", "Start draining\n"); + drain_job_queue(@command); + ::debug("init", "Done draining\n"); + reapers(); + ::debug("init", "Done reaping\n"); + if($Global::semaphore) { $sem->release(); } + cleanup(); + ::debug("init", "Halt\n"); + halt(); +} + +main(); diff --git a/src/sem.pod b/src/sem.pod new file mode 100644 index 0000000..15d4ea7 --- /dev/null +++ b/src/sem.pod @@ -0,0 +1,381 @@ +#!/usr/bin/perl -w + +# SPDX-FileCopyrightText: 2021-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GFDL-1.3-or-later +# SPDX-License-Identifier: CC-BY-SA-4.0 + +=head1 NAME + +sem - semaphore for executing shell command lines in parallel + +=head1 SYNOPSIS + +B<sem> [--fg] [--id <id>] [--semaphoretimeout <secs>] [-j <num>] [--wait] command + +=head1 DESCRIPTION + +GNU B<sem> is an alias for GNU B<parallel --semaphore>. + +GNU B<sem> acts as a counting semaphore. When GNU B<sem> is called +with command it starts the command in the background. When I<num> +number of commands are running in the background, GNU B<sem> waits for +one of these to complete before starting the command. + +GNU B<sem> does not read any arguments to build the command (no -a, +:::, and ::::). It simply waits for a semaphore to become available +and then runs the command given. + +Before looking at the options you may want to check out the examples +after the list of options. That will give you an idea of what GNU +B<sem> is capable of. + +=head1 OPTIONS + +=over 9 + +=item I<command> + +Command to execute. The command may be followed by arguments for the +command. + + +=item B<--bg> + +Run command in background thus GNU B<sem> will not wait for +completion of the command before exiting. This is the default. + +In toilet analogy: GNU B<sem> waits for a toilet to be available, +gives the toilet to a person, and exits immediately. + +See also: B<--fg> + + +=item B<--jobs> I<N> + +=item B<-j> I<N> + +=item B<--max-procs> I<N> + +=item B<-P> I<N> + +Run up to N commands in parallel. Default is 1 thus acting like a +mutex. + +In toilet analogy: B<-j> is the number of toilets. + + +=item B<--jobs> I<+N> + +=item B<-j> I<+N> + +=item B<--max-procs> I<+N> + +=item B<-P> I<+N> + +Add N to the number of CPU cores. Run up to this many jobs in +parallel. For compute intensive jobs B<-j> +0 is useful as it will run +number-of-cpu-cores jobs simultaneously. + + +=item B<--jobs> I<-N> + +=item B<-j> I<-N> + +=item B<--max-procs> I<-N> + +=item B<-P> I<-N> + +Subtract N from the number of CPU cores. Run up to this many jobs in +parallel. If the evaluated number is less than 1 then 1 will be used. +See also B<--use-cpus-instead-of-cores>. + + +=item B<--jobs> I<N>% + +=item B<-j> I<N>% + +=item B<--max-procs> I<N>% + +=item B<-P> I<N>% + +Multiply N% with the number of CPU cores. Run up to this many jobs in +parallel. If the evaluated number is less than 1 then 1 will be used. +See also B<--use-cpus-instead-of-cores>. + + +=item B<--jobs> I<procfile> + +=item B<-j> I<procfile> + +=item B<--max-procs> I<procfile> + +=item B<-P> I<procfile> + +Read parameter from file. Use the content of I<procfile> as parameter +for I<-j>. E.g. I<procfile> could contain the string 100% or +2 or +10. + + +=item B<--pipe> + +Pass stdin (standard input) to I<command>. + +If I<command> read from stdin (standard input), use B<--pipe>. + + +=item B<--semaphorename> I<name> + +=item B<--id> I<name> + +Use B<name> as the name of the semaphore. Default is the name of the +controlling tty (output from B<tty>). + +The default normally works as expected when used interactively, but +when used in a script I<name> should be set. I<$$> or I<my_task_name> +are often a good value. + +The semaphore is stored in ~/.parallel/semaphores/ + +In toilet analogy the name corresponds to different types of toilets: +e.g. male, female, customer, staff. + + +=item B<--fg> + +Do not put command in background. + +In toilet analogy: GNU B<sem> waits for a toilet to be available, +takes a person to the toilet, waits for the person to finish, and +exits. + + +=item B<--semaphoretimeout> I<secs> + +=item B<--st> I<secs> + +If I<secs> > 0: If the semaphore is not released within I<secs> +seconds, take it anyway. + +If I<secs> < 0: If the semaphore is not released within I<secs> +seconds, exit. + +In toilet analogy: I<secs> > 0: If no toilet becomes available within +I<secs> seconds, pee on the floor. I<secs> < 0: If no toilet becomes +available within I<secs> seconds, exit without doing anything. + + +=item B<--wait> + +Wait for all commands to complete. + +In toilet analogy: Wait until all toilets are empty, then exit. + + +=back + +=head1 UNDERSTANDING A SEMAPHORE + +Try the following example: + + sem -j 2 'sleep 1;echo 1 finished'; echo sem 1 exited + sem -j 2 'sleep 2;echo 2 finished'; echo sem 2 exited + sem -j 2 'sleep 3;echo 3 finished'; echo sem 3 exited + sem -j 2 'sleep 4;echo 4 finished'; echo sem 4 exited + sem --wait; echo sem --wait done + +In toilet analogy this uses 2 toilets (B<-j 2>). GNU B<sem> takes '1' +to a toilet, and exits immediately. While '1' is sleeping, another GNU +B<sem> takes '2' to a toilet, and exits immediately. + +While '1' and '2' are sleeping, another GNU B<sem> waits for a free +toilet. When '1' finishes, a toilet becomes available, and this GNU +B<sem> stops waiting, and takes '3' to a toilet, and exits +immediately. + +While '2' and '3' are sleeping, another GNU B<sem> waits for a free +toilet. When '2' finishes, a toilet becomes available, and this GNU +B<sem> stops waiting, and takes '4' to a toilet, and exits +immediately. + +Finally another GNU B<sem> waits for all toilets to become free. + + +=head1 EXAMPLE: Gzipping *.log + +Run one gzip process per CPU core. Block until a CPU core becomes +available. + + for i in *.log ; do + echo $i + sem -j+0 gzip $i ";" echo done + done + sem --wait + +=head1 EXAMPLE: Protecting pod2html from itself + +pod2html creates two files: pod2htmd.tmp and pod2htmi.tmp which it +does not clean up. It uses these two files for a short time. But if +you run multiple pod2html in parallel (e.g. in a Makefile with make +-j) there is a risk that two different instances of pod2html will +write to the files at the same time: + + # This may fail due to shared pod2htmd.tmp/pod2htmi.tmp files + foo.html: + pod2html foo.pod --outfile foo.html + + bar.html: + pod2html bar.pod --outfile bar.html + + $ make -j foo.html bar.html + +You need to protect pod2html from running twice at the same time. +B<sem> running as a mutex will make sure only one runs: + + foo.html: + sem --id pod2html pod2html foo.pod --outfile foo.html + + bar.html: + sem --id pod2html pod2html bar.pod --outfile bar.html + + clean: foo.html bar.html + sem --id pod2html --wait + rm -f pod2htmd.tmp pod2htmi.tmp + + $ make -j foo.html bar.html clean + +=head1 BUGS + +None known. + + +=head1 REPORTING BUGS + +Report bugs to <bug-parallel@gnu.org>. + + +=head1 AUTHOR + +Copyright (C) 2010-2022 Ole Tange, http://ole.tange.dk and Free +Software Foundation, Inc. + + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <https://www.gnu.org/licenses/>. + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this +documentation under the terms of the GNU Free Documentation License, +Version 1.3 or any later version published by the Free Software +Foundation; with no Invariant Sections, with no Front-Cover Texts, and +with no Back-Cover Texts. A copy of the license is included in the +file LICENSES/GFDL-1.3-or-later.txt. + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B<to Share> + +to copy, distribute and transmit the work + +=item B<to Remix> + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B<Attribution> + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B<Share Alike> + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B<Waiver> + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B<Public Domain> + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B<Other Rights> + +In no way are any of the following rights affected by the license: + +=over 2 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=back + +=over 9 + +=item B<Notice> + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as +LICENCES/CC-BY-SA-4.0.txt + + +=head1 DEPENDENCIES + +GNU B<sem> uses Perl, and the Perl modules Getopt::Long, +Symbol, Fcntl. + + +=head1 SEE ALSO + +B<parallel>(1) + +=cut @@ -0,0 +1,1274 @@ +#!/usr/bin/perl -w + +# Copyright (C) 2008-2010 Ole Tange, http://ole.tange.dk +# +# Copyright (C) 2010-2022 Ole Tange, http://ole.tange.dk and +# Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/> +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA +# +# SPDX-FileCopyrightText: 2008-2022 Ole Tange, http://ole.tange.dk and Free Software and Foundation, Inc. +# SPDX-License-Identifier: GPL-3.0-or-later +# SPDX-License-Identifier: GFDL-1.3-or-later + +=head1 NAME + +sql - execute a command on a database determined by a dburl + +=head1 SYNOPSIS + +B<sql> [options] I<dburl> [I<commands>] + +B<sql> [options] I<dburl> < commandfile + +B<#!/usr/bin/sql> B<--shebang> [options] I<dburl> + +=head1 DESCRIPTION + +GNU B<sql> aims to give a simple, unified interface for accessing +databases through all the different databases' command line +clients. So far the focus has been on giving a common way to specify +login information (protocol, username, password, hostname, and port +number), size (database and table size), and running queries. + +The database is addressed using a DBURL. If I<commands> are left out +you will get that database's interactive shell. + +GNU B<sql> is often used in combination with GNU B<parallel>. + +=over 9 + +=item I<dburl> + +A DBURL has the following syntax: +[sql:]vendor:// +[[user][:password]@][host][:port]/[database][?sqlquery] + +See the section DBURL below. + +=item I<commands> + +The SQL commands to run. Each argument will have a newline +appended. + +Example: "SELECT * FROM foo;" "SELECT * FROM bar;" + +If the arguments contain '\n' or '\x0a' this will be replaced with a +newline: + +Example: "SELECT * FROM foo;\n SELECT * FROM bar;" + +If no commands are given SQL is read from the keyboard or STDIN. + +Example: echo 'SELECT * FROM foo;' | sql mysql:/// + + +=item B<--csv> (beta testing) + +CSV output. + + +=item B<--db-size> + +=item B<--dbsize> + +Size of database. Show the size of the database on disk. For Oracle +this requires access to read the table I<dba_data_files> - the user +I<system> has that. + + +=item B<--help> + +=item B<-h> + +Print a summary of the options to GNU B<sql> and exit. + + +=item B<--html> + +HTML output. Turn on HTML tabular output. + + +=item B<--json> (beta testing) + +=item B<--pretty> (beta testing) + +Pretty JSON output. + + +=item B<--list-databases> + +=item B<--listdbs> + +=item B<--show-databases> + +=item B<--showdbs> + +List the databases (table spaces) in the database. + + +=item B<--listproc> + +=item B<--proclist> + +=item B<--show-processlist> + +Show the list of running queries. + + +=item B<--list-tables> + +=item B<--show-tables> + +=item B<--table-list> + +List the tables in the database. + + +=item B<--noheaders> + +=item B<--no-headers> + +=item B<-n> + +Remove headers and footers and print only tuples. Bug in Oracle: it +still prints number of rows found. + + +=item B<-p> I<pass-through> + +The string following -p will be given to the database connection +program as arguments. Multiple -p's will be joined with +space. Example: pass '-U' and the user name to the program: + +I<-p "-U scott"> can also be written I<-p -U -p scott>. + + +=item B<--precision> <I<rfc3339|h|m|s|ms|u|ns>> + +Precision of timestamps. + +Specifiy the format of the output timestamps: rfc3339, h, m, s, ms, u +or ns. + + +=item B<-r> + +Try 3 times. Short version of I<--retries 3>. + + +=item B<--retries> I<ntimes> + +Try I<ntimes> times. If the client program returns with an error, +retry the command. Default is I<--retries 1>. + + +=item B<--sep> I<string> + +=item B<-s> I<string> + +Field separator. Use I<string> as separator between columns. + + +=item B<--skip-first-line> + +Do not use the first line of input (used by GNU B<sql> itself +when called with B<--shebang>). + + +=item B<--table-size> + +=item B<--tablesize> + +Size of tables. Show the size of the tables in the database. + + +=item B<--verbose> + +=item B<-v> + +Print which command is sent. + + +=item B<--version> + +=item B<-V> + +Print the version GNU B<sql> and exit. + + +=item B<--shebang> + +=item B<-Y> + +GNU B<sql> can be called as a shebang (#!) command as the first line of a script. Like this: + + #!/usr/bin/sql -Y mysql:/// + + SELECT * FROM foo; + +For this to work B<--shebang> or B<-Y> must be set as the first option. + +=back + +=head1 DBURL + +A DBURL has the following syntax: +[sql:]vendor:// +[[user][:password]@][host][:port]/[database][?sqlquery] + +To quote special characters use %-encoding specified in +http://tools.ietf.org/html/rfc3986#section-2.1 (E.g. a password +containing '/' would contain '%2F'). + +Examples: + + mysql://scott:tiger@my.example.com/mydb + influxdb://scott:tiger@influxdb.example.com/foo + sql:oracle://scott:tiger@ora.example.com/xe + postgresql://scott:tiger@pg.example.com/pgdb + pg:/// + postgresqlssl://scott@pg.example.com:3333/pgdb + sql:sqlite2:////tmp/db.sqlite?SELECT * FROM foo; + sqlite3:///../db.sqlite3?SELECT%20*%20FROM%20foo; + +Currently supported vendors: MySQL (mysql), MySQL with SSL (mysqls, +mysqlssl), Oracle (oracle, ora), PostgreSQL (postgresql, pg, pgsql, +postgres), PostgreSQL with SSL (postgresqlssl, pgs, pgsqlssl, +postgresssl, pgssl, postgresqls, pgsqls, postgress), SQLite2 (sqlite, +sqlite2), SQLite3 (sqlite3), InfluxDB 1.x (influx, influxdb), InfluxDB +with SSL (influxdbssl, influxdbs, influxs, influxssl) + +Aliases must start with ':' and are read from +/etc/sql/aliases and ~/.sql/aliases. The user's own +~/.sql/aliases should only be readable by the user. + +Example of aliases: + + :myalias1 pg://scott:tiger@pg.example.com/pgdb + :myalias2 ora://scott:tiger@ora.example.com/xe + # Short form of mysql://`whoami`:nopassword@localhost:3306/`whoami` + :myalias3 mysql:/// + # Short form of mysql://`whoami`:nopassword@localhost:33333/mydb + :myalias4 mysql://:33333/mydb + # Alias for an alias + :m :myalias4 + # the sortest alias possible + : sqlite2:////tmp/db.sqlite + # Including an SQL query + :query sqlite:////tmp/db.sqlite?SELECT * FROM foo; + +=head1 EXAMPLES + +=head2 Get an interactive prompt + +The most basic use of GNU B<sql> is to get an interactive prompt: + +B<sql sql:oracle://scott:tiger@ora.example.com/xe> + +If you have setup an alias you can do: + +B<sql :myora> + + +=head2 Run a query + +To run a query directly from the command line: + +B<sql :myalias "SELECT * FROM foo;"> + +Oracle requires newlines after each statement. This can be done like +this: + +B<sql :myora "SELECT * FROM foo;" "SELECT * FROM bar;"> + +Or this: + +B<sql :myora "SELECT * FROM foo;\nSELECT * FROM bar;"> + + +=head2 Copy a PostgreSQL database + +To copy a PostgreSQL database use pg_dump to generate the dump and GNU +B<sql> to import it: + +B<pg_dump pg_database | sql pg://scott:tiger@pg.example.com/pgdb> + + +=head2 Empty all tables in a MySQL database + +Using GNU B<parallel> it is easy to empty all tables without dropping them: + +B<sql -n mysql:/// 'show tables' | parallel sql mysql:/// DELETE FROM {};> + + +=head2 Drop all tables in a PostgreSQL database + +To drop all tables in a PostgreSQL database do: + +B<sql -n pg:/// '\dt' | parallel --colsep '\|' -r sql pg:/// DROP TABLE {2};> + + +=head2 Run as a script + +Instead of doing: + +B<sql mysql:/// < sqlfile> + +you can combine the sqlfile with the DBURL to make a +UNIX-script. Create a script called I<demosql>: + +B<#!/usr/bin/sql -Y mysql:///> + +B<SELECT * FROM foo;> + +Then do: + +B<chmod +x demosql; ./demosql> + + +=head2 Use --colsep to process multiple columns + +Use GNU B<parallel>'s B<--colsep> to separate columns: + +B<sql -s '\t' :myalias 'SELECT * FROM foo;' | parallel --colsep '\t' do_stuff {4} {1}> + + +=head2 Retry if the connection fails + +If the access to the database fails occasionally B<--retries> can help +make sure the query succeeds: + +B<sql --retries 5 :myalias 'SELECT * FROM really_big_foo;'> + + +=head2 Get info about the running database system + +Show how big the database is: + +B<sql --db-size :myalias> + +List the tables: + +B<sql --list-tables :myalias> + +List the size of the tables: + +B<sql --table-size :myalias> + +List the running processes: + +B<sql --show-processlist :myalias> + + +=head1 REPORTING BUGS + +GNU B<sql> is part of GNU B<parallel>. Report bugs to <bug-parallel@gnu.org>. + + +=head1 AUTHOR + +When using GNU B<sql> for a publication please cite: + +O. Tange (2011): GNU SQL - A Command Line Tool for Accessing Different +Databases Using DBURLs, ;login: The USENIX Magazine, April 2011:29-32. + +Copyright (C) 2008-2010 Ole Tange http://ole.tange.dk + +Copyright (C) 2010-2022 Ole Tange, http://ole.tange.dk and Free +Software Foundation, Inc. + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +at your option any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. + +=head2 Documentation license I + +Permission is granted to copy, distribute and/or modify this +documentation under the terms of the GNU Free Documentation License, +Version 1.3 or any later version published by the Free Software +Foundation; with no Invariant Sections, with no Front-Cover Texts, and +with no Back-Cover Texts. A copy of the license is included in the +file LICENSES/GFDL-1.3-or-later.txt. + + +=head2 Documentation license II + +You are free: + +=over 9 + +=item B<to Share> + +to copy, distribute and transmit the work + +=item B<to Remix> + +to adapt the work + +=back + +Under the following conditions: + +=over 9 + +=item B<Attribution> + +You must attribute the work in the manner specified by the author or +licensor (but not in any way that suggests that they endorse you or +your use of the work). + +=item B<Share Alike> + +If you alter, transform, or build upon this work, you may distribute +the resulting work only under the same, similar or a compatible +license. + +=back + +With the understanding that: + +=over 9 + +=item B<Waiver> + +Any of the above conditions can be waived if you get permission from +the copyright holder. + +=item B<Public Domain> + +Where the work or any of its elements is in the public domain under +applicable law, that status is in no way affected by the license. + +=item B<Other Rights> + +In no way are any of the following rights affected by the license: + +=over 9 + +=item * + +Your fair dealing or fair use rights, or other applicable +copyright exceptions and limitations; + +=item * + +The author's moral rights; + +=item * + +Rights other persons may have either in the work itself or in +how the work is used, such as publicity or privacy rights. + +=back + +=item B<Notice> + +For any reuse or distribution, you must make clear to others the +license terms of this work. + +=back + +A copy of the full license is included in the file as cc-by-sa.txt. + +=head1 DEPENDENCIES + +GNU B<sql> uses Perl. If B<mysql> is installed, MySQL dburls will +work. If B<psql> is installed, PostgreSQL dburls will work. If +B<sqlite> is installed, SQLite2 dburls will work. If B<sqlite3> is +installed, SQLite3 dburls will work. If B<sqlplus> is installed, +Oracle dburls will work. If B<rlwrap> is installed, GNU B<sql> will +have a command history for Oracle. + + +=head1 FILES + +~/.sql/aliases - user's own aliases with DBURLs + +/etc/sql/aliases - common aliases with DBURLs + + +=head1 SEE ALSO + +B<mysql>(1), B<psql>(1), B<rlwrap>(1), B<sqlite>(1), B<sqlite3>(1), +B<sqlplus>(1), B<influx>(1) + +=cut + +use Getopt::Long; +use strict; +use File::Temp qw/tempfile tempdir/; + +parse_options(); + +my $pass_through_options = (defined $::opt_p) ? join(" ",@{$::opt_p}) : ""; +my $dburl_or_alias = shift; +if (not defined $dburl_or_alias) { Usage("No DBURL given"); exit -1; } +my %dburl = parse_dburl(get_alias($dburl_or_alias)); + +my $interactive_command; +my $batch_command; + +my $database_driver = database_driver_alias($dburl{'databasedriver'}); +if($database_driver eq "mysql" or + $database_driver eq "mysqlssl") { + ($batch_command,$interactive_command) = + mysql_commands($database_driver,%dburl); +} elsif($database_driver eq "postgresql" or + $database_driver eq "postgresqlssl") { + ($batch_command,$interactive_command) = + postgresql_commands($database_driver,%dburl); +} elsif($database_driver eq "oracle") { + ($batch_command,$interactive_command) = + oracle_commands($database_driver,%dburl); +} elsif($database_driver eq "sqlite" or + $database_driver eq "sqlite3") { + ($batch_command,$interactive_command) = + sqlite_commands($database_driver,%dburl); +} elsif($database_driver eq "influx" or + $database_driver eq "influxssl") { + ($batch_command,$interactive_command) = + influx_commands($database_driver,%dburl); +} + +my $err; +my $retries; +$retries ||= defined $::opt_retries ? $::opt_retries : undef; +$retries ||= defined $::opt_retry ? $::opt_retry * 3 : undef; +$retries ||= 1; + +if(defined $::opt_processlist) { + unshift @ARGV, processlist($database_driver,%dburl); +} + +if(defined $::opt_tablelist) { + unshift @ARGV, tablelist($database_driver,%dburl); +} + +if(defined $::opt_dblist) { + unshift @ARGV, dblist($database_driver,%dburl); +} + +if(defined $::opt_dbsize) { + unshift @ARGV, dbsize($database_driver,%dburl); +} + +if(defined $::opt_tablesize) { + unshift @ARGV, tablesize($database_driver,%dburl); +} + +my $queryfile = ""; +if($dburl{'query'}) { + my $fh; + ($fh,$queryfile) = tempfile(SUFFIX => ".sql"); + print $fh $dburl{'query'},"\n"; + close $fh; + $batch_command = "(cat $queryfile;rm $queryfile; cat) | $batch_command"; +} + +sub shell_quote($) { + # Quote for other shells (Bourne compatibles) + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted as needed by the shell + my $s = $_[0]; + if($s =~ /[^-_.+a-z0-9\/]/i) { + $s =~ s/'/'"'"'/g; # "-quote single quotes + $s = "'$s'"; # '-quote entire string + $s =~ s/^''//; # Remove unneeded '' at ends + $s =~ s/''$//; # (faster than s/^''|''$//g) + return $s; + } elsif ($s eq "") { + return "''"; + } else { + # No quoting needed + return $s; + } +} + +do { + if(@ARGV) { + # SQL Commands given as arguments: + # Run those commands + $::opt_debug and print "[ | $batch_command]\n"; + $::opt_verbose and print "[ | $batch_command]\n"; + if($database_driver eq "influx" or $database_driver eq "influxssl") { + # Influx currently cannot read commands from stdin + for(@ARGV) { + s/\\n/\n/g; + s/\\x0a/\n/gi; + $::opt_debug and print "[$batch_command -execute $_]\n"; + system("$batch_command -execute ".shell_quote($_)); + } + } else { + open(M,"| $batch_command") || + die("mysql/psql/sqlplus/influx not in path"); + for(@ARGV) { + s/\\n/\n/g; + s/\\x0a/\n/gi; + print M "$_\n"; + } + close M; + } + } else { + if (is_stdin_terminal()) { + # Run interactively + $::opt_debug and print "[$interactive_command]\n"; + $::opt_verbose and print "[$interactive_command]\n"; + system("$interactive_command"); + } else { + # Let the command read from stdin + $::opt_debug and print "[$batch_command]\n"; + $::opt_verbose and print "[$batch_command]\n"; + if($database_driver eq "influx" or $database_driver eq "influxssl") { + # Influx currently cannot read commands from stdin + while(<STDIN>) { + s/\\n/\n/g; + s/\\x0a/\n/gi; + $::opt_debug and print "[$batch_command -execute $_]\n"; + system("$batch_command -execute ".shell_quote($_)); + } + } else{ + system("$batch_command"); + } + } + } + $err = $?>>8; +} while (--$retries and $err); + +$queryfile and unlink $queryfile; + +$Global::Initfile && unlink $Global::Initfile; +exit ($err); + +sub parse_options { + $Global::version = 20221122; + $Global::progname = 'sql'; + + # This must be done first as this may exec myself + if(defined $ARGV[0] and ($ARGV[0]=~/^-Y/ or $ARGV[0]=~/^--shebang /)) { + # Program is called from #! line in script + $ARGV[0]=~s/^-Y //; # remove -Y if on its own + $ARGV[0]=~s/^-Y/-/; # remove -Y if bundled with other options + $ARGV[0]=~s/^--shebang //; # remove --shebang if it is set + my $argfile = pop @ARGV; + # exec myself to split @ARGV into separate fields + exec "$0 --skip-first-line < $argfile @ARGV"; + } + Getopt::Long::Configure ("bundling","require_order"); + GetOptions("passthrough|p=s@" => \$::opt_p, + "sep|s=s" => \$::opt_s, + "html" => \$::opt_html, + "show-processlist|proclist|listproc|showqueries|show-queries" + => \$::opt_processlist, + "show-tables|showtables|listtables|list-tables|". + "tablelist|table-list|show-measurements|showmeasurements|". + "list-measurements|listmeasurements" => \$::opt_tablelist, + "dblist|". + "listdb|listdbs|list-db|list-dbs|list-database|". + "list-databases|listdatabases|listdatabase|showdb|". + "showdbs|show-db|show-dbs|show-database|show-databases|". + "showdatabases|showdatabase" => \$::opt_dblist, + "db-size|dbsize" => \$::opt_dbsize, + "table-size|tablesize" => \$::opt_tablesize, + "json|j" => \$::opt_json, + "pretty" => \$::opt_pretty, + "csv" => \$::opt_csv, + "precision=s" => \$::opt_precision, + "noheaders|no-headers|n" => \$::opt_n, + "r" => \$::opt_retry, + "retries=s" => \$::opt_retries, + "debug|D" => \$::opt_debug, + # Shebang #!/usr/bin/parallel -Yotheroptions + "Y|shebang" => \$::opt_shebang, + "skip-first-line" => \$::opt_skip_first_line, + # GNU requirements + "help|h" => \$::opt_help, + "version|V" => \$::opt_version, + "verbose|v" => \$::opt_verbose, + ) || die_usage(); + + if(defined $::opt_help) { die_usage(); } + if(defined $::opt_version) { version(); exit(0); } + $Global::debug = $::opt_debug; +} + +sub database_driver_alias { + my $driver = shift; + my %database_driver_alias = ("mysql" => "mysql", + "mysqls" => "mysqlssl", + "mysqlssl" => "mysqlssl", + "oracle" => "oracle", + "ora" => "oracle", + "oracles" => "oraclessl", + "oras" => "oraclessl", + "oraclessl" => "oraclessl", + "orassl" => "oraclessl", + "postgresql" => "postgresql", + "pgsql" => "postgresql", + "postgres" => "postgresql", + "pg" => "postgresql", + "postgresqlssl" => "postgresqlssl", + "pgsqlssl" => "postgresqlssl", + "postgresssl" => "postgresqlssl", + "pgssl" => "postgresqlssl", + "postgresqls" => "postgresqlssl", + "pgsqls" => "postgresqlssl", + "postgress" => "postgresqlssl", + "pgs" => "postgresqlssl", + "sqlite" => "sqlite", + "sqlite2" => "sqlite", + "sqlite3" => "sqlite3", + "influx" => "influx", + "influxdb" => "influx", + "influxssl" => "influxssl", + "influxdbssl" => "influxssl", + "influxs" => "influxssl", + "influxdbs" => "influxssl", + ); + return $database_driver_alias{$driver}; +} + +sub mysql_commands { + my ($database_driver,%opt) = (@_); + find_command_in_path("mysql") || die ("mysql not in path"); + if(defined($::opt_s)) { die "Field separator not implemented for mysql" } + my $password = defined($opt{'password'}) ? "--password=".$opt{'password'} : ""; + my $host = defined($opt{'host'}) ? "--host=".$opt{'host'} : ""; + my $port = defined($opt{'port'}) ? "--port=".$opt{'port'} : ""; + my $user = defined($opt{'user'}) ? "--user=".$opt{'user'} : ""; + my $database = defined($opt{'database'}) ? $opt{'database'} : $ENV{'USER'}; + my $html = defined($::opt_html) ? "--html" : ""; + my $no_headers = defined($::opt_n) ? "--skip-column-names" : ""; + my $ssl = ""; + if ($database_driver eq "mysqlssl") { $ssl="--ssl"; } + my($credential_fh,$tmp) = tempfile(SUFFIX => ".sql"); + chmod (0600,$tmp); + print $credential_fh ("[client]\n", + $user && "user=$opt{'user'}\n", + $password && "password=$opt{'password'}\n", + $host && "host=$opt{'host'}\n"); + close $credential_fh; + + # Prepend with a remover of the credential tempfile + # -C: Compression if both ends support it + $batch_command = + "((sleep 1; rm $tmp) & ". + "mysql --defaults-extra-file=$tmp -C $pass_through_options ". + "$no_headers $html $ssl $host $user $port $database)"; + $interactive_command = $batch_command; + return($batch_command,$interactive_command); +} + +sub postgresql_commands { + my ($database_driver,%opt) = (@_); + find_command_in_path("psql") || die ("psql not in path"); + my $sep = ($::opt_s) ? "-A --field-separator '$::opt_s'" : ""; + my $password = defined($opt{'password'}) ? + "PGPASSWORD=".$opt{'password'} : ""; + my $host = defined($opt{'host'}) ? "-h ".$opt{'host'} : ""; + my $port = defined($opt{'port'}) ? "-p ".$opt{'port'} : ""; + my $user = defined($opt{'user'}) ? "-U ".$opt{'user'} : ""; + my $database = defined($opt{'database'}) ? "-d ".$opt{'database'} : ""; + my $html = defined($::opt_html) ? "--html" : ""; + my $no_headers = defined($::opt_n) ? "--tuples-only" : ""; + my $ssl = ""; + if ($database_driver eq "postgresqlssl") { $ssl="PGSSLMODE=require"; } + $batch_command = + "$ssl $password psql $pass_through_options $sep $no_headers ". + "$html $host $user $port $database"; + $interactive_command = $batch_command; + + return($batch_command,$interactive_command); +} + +sub oracle_commands { + my ($database_driver,%opt) = (@_); + # oracle://user:pass@grum:1521/XE becomes: + # sqlplus 'user/pass@(DESCRIPTION = (ADDRESS = (PROTOCOL = TCP)(HOST = grum)(PORT = 1521)) (CONNECT_DATA =(SERVER = DEDICATED)(SERVICE_NAME = XE) ))' + my $sqlplus = find_command_in_path("sqlplus") || + find_command_in_path("sqlplus64") or + die("sqlplus/sqlplus64 not in path"); + + # Readline support: if rlwrap installed run rlwrap sqlplus + my $rlwrap = find_command_in_path("rlwrap"); + + # Set good defaults in the inifile + $Global::Initfile = "/tmp/$$.sql.init"; + open(INIT,">".$Global::Initfile) || die; + print INIT "SET LINESIZE 32767\n"; + $::opt_debug and print "SET LINESIZE 32767\n"; + print INIT "SET WRAP OFF\n"; + $::opt_debug and print "SET WRAP OFF\n"; + if(defined($::opt_html)) { + print INIT "SET MARK HTML ON\n"; + $::opt_debug and print "SET MARK HTML ON\n"; + } + if(defined($::opt_n)) { + print INIT "SET PAGES 0\n"; + $::opt_debug and print "SET PAGES 0\n"; + } else { + print INIT "SET PAGES 50000\n"; + $::opt_debug and print "SET PAGES 50000\n"; + } + if(defined($::opt_s)) { + print INIT "SET COLSEP $::opt_s\n"; + $::opt_debug and print "SET COLSEP $::opt_s\n"; + } + close INIT; + + my $password = defined($opt{'password'}) ? "/".$opt{'password'} : ""; + my $host = defined($opt{'host'}) ? $opt{'host'} : "localhost"; + my $port = defined($opt{'port'}) ? $opt{'port'} : "1521"; + my $user = defined($opt{'user'}) ? $opt{'user'} : ""; + # Database is called service in Oracle lingo + my $service = defined($opt{'database'}) ? "(SERVICE_NAME = ".$opt{'database'}.")" : ""; + my $tns = "(DESCRIPTION = (ADDRESS = (PROTOCOL = TCP)(HOST = $host)(PORT = $port)) ". + "(CONNECT_DATA =(SERVER = DEDICATED)$service ))"; + my $ssl = ""; + # -L: Do not re-ask for password if it is wrong + my $common_options = "-L $pass_through_options ". + "'$user$password\@$tns' \@$Global::Initfile"; + my $batch_command = "$sqlplus -S ".$common_options; + my $interactive_command = "$rlwrap $sqlplus ".$common_options; + return($batch_command,$interactive_command); +} + +sub sqlite_commands { + my ($database_driver,%opt) = (@_); + if(not find_command_in_path($database_driver)) { + print STDERR "Database driver '$database_driver' not supported\n"; + exit -1; + } + my $sep = defined($::opt_s) ? "-separator '$::opt_s'" : ""; + my $password = defined($opt{'password'}) ? "--password=".$opt{'password'} : ""; + my $host = defined($opt{'host'}) ? "--host=".$opt{'host'} : ""; + my $port = defined($opt{'port'}) ? "--port=".$opt{'port'} : ""; + my $user = defined($opt{'user'}) ? "--user=".$opt{'user'} : ""; + my $database = defined($opt{'database'}) ? $opt{'database'} : ""; + my $html = defined($::opt_html) ? "-html" : ""; + my $no_headers = defined($::opt_n) ? "-noheader" : "-header"; + my $ssl = ""; + $batch_command = + "$database_driver $pass_through_options $sep ". + "$no_headers $html $database"; + $interactive_command = $batch_command; + return($batch_command,$interactive_command); +} + +sub influx_commands { + my ($database_driver,%opt) = (@_); + my $influx = find_command_in_path("influx") || + die ("influx not in path"); + if(defined($::opt_s)) { + die "Field separator not implemented for influx"; + } + my $password = + defined($opt{'password'}) ? "-password=".$opt{'password'} : ""; + my $host = defined($opt{'host'}) ? "-host=".$opt{'host'} : ""; + my $port = defined($opt{'port'}) ? "-port=".$opt{'port'} : ""; + my $user = defined($opt{'user'}) ? "-username=".$opt{'user'} : ""; + my $database = defined($opt{'database'}) ? + "-database $opt{'database'}" : "-database $ENV{'USER'}"; + + my $format = defined($::opt_json) ? "-format json" : + defined($::opt_pretty) ? "-format json -pretty" : + defined($::opt_csv) ? "-format csv" : ""; + my $precision = defined($::opt_precision) ? + "-precision $::opt_precision" : ""; + + my $no_headers = defined($::opt_n) ? "--skip-column-names" : ""; + my $ssl = ""; + if($database_driver eq "influxssl") { $ssl="--ssl"; } + + $batch_command = + "$influx $pass_through_options $no_headers $format ". + "$precision $ssl $host $user $password $port $database"; + $interactive_command = $batch_command; + return($batch_command,$interactive_command); +} + +# Return the code for 'show processlist' in the chosen database dialect +sub processlist { + my $dbdriver = shift; + my %dburl = @_; + my %statement = + ("mysql" => "show processlist;", + "postgresql" => ("SELECT ". + " datname AS database,". + " usename AS username,". + " now()-xact_start AS runtime,". + " current_query ". + "FROM pg_stat_activity ". + "ORDER BY runtime DESC;"), + "oracle" => ("SELECT ". + ' CPU_TIME/100000, SYS.V_$SQL.SQL_TEXT, USERNAME '. + "FROM ". + ' SYS.V_$SQL, SYS.V_$SESSION '. + "WHERE ". + ' SYS.V_$SQL.SQL_ID = SYS.V_$SESSION.SQL_ID(+) '. + "AND username IS NOT NULL ". + "ORDER BY CPU_TIME DESC;"), + "influx" => "show queries;", + ); + if($statement{$dbdriver}) { + return $statement{$dbdriver}; + } else { + print STDERR "processlist is not implemented for $dbdriver\n"; + exit 1; + } +} + +# Return the code for 'show tables' in the chosen database dialect +sub tablelist { + my $dbdriver = shift; + my %dburl = @_; + my %statement = + ("mysql" => "show tables;", + "postgresql" => '\dt', + "oracle" => ("SELECT object_name ". + "FROM user_objects ". + "WHERE object_type = 'TABLE';"), + "sqlite" => ".tables", + "sqlite3" => ".tables", + "influx" => "show measurements;", + ); + if($statement{$dbdriver}) { + return $statement{$dbdriver}; + } else { + print STDERR "tablelist is not implemented for $dbdriver\n"; + exit 1; + } +} + +# Return the code for 'show databases' in the chosen database dialect +sub dblist { + my $dbdriver = shift; + my %dburl = @_; + my %statement = + ("mysql" => "show databases;", + "postgresql" => ("SELECT datname FROM pg_database ". + "WHERE datname NOT IN ('template0','template1','postgres') ". + "ORDER BY datname ASC;"), + "oracle" => ("select * from user_tablespaces;"), + "influx" => "show databases;", + ); + if($statement{$dbdriver}) { + return $statement{$dbdriver}; + } else { + print STDERR "dblist is not implemented for $dbdriver\n"; + exit 1; + } +} + +# Return the code for 'show database size' in the chosen database dialect +sub dbsize { + my $dbdriver = shift; + my %dburl = @_; + my %statement; + if(defined $dburl{'database'}) { + %statement = + ("mysql" => ( + 'SELECT '. + ' table_schema "database", '. + ' SUM(data_length+index_length) "bytes", '. + ' SUM(data_length+index_length)/1024/1024 "megabytes" '. + 'FROM information_schema.TABLES '. + "WHERE table_schema = '$dburl{'database'}'". + 'GROUP BY table_schema;'), + "postgresql" => ( + "SELECT '$dburl{'database'}' AS database, ". + "pg_database_size('$dburl{'database'}') AS bytes, ". + "pg_size_pretty(pg_database_size('$dburl{'database'}')) AS human_readabable "), + "sqlite" => ( + "SELECT ".(undef_as_zero(-s $dburl{'database'}))." AS bytes;"), + "sqlite3" => ( + "SELECT ".(undef_as_zero(-s $dburl{'database'}))." AS bytes;"), + ); + } else { + %statement = + ("mysql" => ( + 'SELECT '. + ' table_schema "database", '. + ' SUM(data_length+index_length) "bytes", '. + ' SUM(data_length+index_length)/1024/1024 "megabytes" '. + 'FROM information_schema.TABLES '. + 'GROUP BY table_schema;'), + "postgresql" => ( + 'SELECT datname AS database, pg_database_size(datname) AS bytes, '. + 'pg_size_pretty(pg_database_size(datname)) AS human_readabable '. + 'FROM (SELECT datname FROM pg_database) a;'), + "sqlite" => ( + "SELECT 0 AS bytes;"), + "sqlite3" => ( + "SELECT 0 AS bytes;"), + ); + } + if($statement{$dbdriver}) { + return $statement{$dbdriver}; + } else { + print STDERR "dbsize is not implemented for $dbdriver\n"; + exit 1; + } +} + + +# Return the code for 'show table size' in the chosen database dialect +sub tablesize { + my $dbdriver = shift; + my $database = shift; + my %statement = + ("postgresql" => ( + "SELECT relname, relpages*8 AS kb, reltuples::int AS \"live+dead rows\" ". + "FROM pg_class c ". + "ORDER BY relpages DESC;"), + "mysql" => ( + "select table_name, TABLE_ROWS, DATA_LENGTH,INDEX_LENGTH from INFORMATION_SCHEMA.tables;"), + ); + if($statement{$dbdriver}) { + return $statement{$dbdriver}; + } else { + print STDERR "table size is not implemented for $dbdriver\n"; + exit 1; + } +} + +sub is_stdin_terminal { + return (-t STDIN); +} + +sub find_command_in_path { + # Find the command if it exists in the current path + my $command = shift; + my $path = `which $command`; + chomp $path; + return $path; +} + +sub Usage { + if(@_) { + print "Error:\n"; + print map{ "$_\n" } @_; + print "\n"; + } + print "sql [-hnr] [--table-size] [--db-size] [-p pass-through] [-s string] dburl [command]\n"; +} + +sub get_alias { + my $alias = shift; + $alias =~ s/^(sql:)*//; # Accept aliases prepended with sql: + if ($alias !~ /^:/) { + return $alias; + } + + # Find the alias + my $path; + if (-l $0) { + ($path) = readlink($0) =~ m|^(.*)/|; + } else { + ($path) = $0 =~ m|^(.*)/|; + } + + my @deprecated = ("$ENV{HOME}/.dburl.aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for (@deprecated) { + if(-r $_) { + print STDERR "$_ is deprecated. Use .sql/aliases instead (read man sql)\n"; + } + } + my @urlalias=(); + check_permissions("$ENV{HOME}/.sql/aliases"); + check_permissions("$ENV{HOME}/.dburl.aliases"); + my @search = ("$ENV{HOME}/.sql/aliases", + "$ENV{HOME}/.dburl.aliases", "/etc/sql/aliases", + "$path/dburl.aliases", "$path/dburl.aliases.dist"); + for my $alias_file (@search) { + if(-r $alias_file) { + push @urlalias, `cat "$alias_file"`; + } + } + my ($alias_part,$rest) = $alias=~/(:\w*)(.*)/; + # If we saw this before: we have an alias loop + if(grep {$_ eq $alias_part } @Private::seen_aliases) { + print STDERR "$alias_part is a cyclic alias\n"; + exit -1; + } else { + push @Private::seen_aliases, $alias_part; + } + + my $dburl; + for (@urlalias) { + /^$alias_part\s+(\S+.*)/ and do { $dburl = $1; last; } + } + + if($dburl) { + return get_alias($dburl.$rest); + } else { + Usage("$alias is not defined in @search"); + exit(-1); + } +} + +sub check_permissions { + my $file = shift; + + if(-e $file) { + if(not -o $file) { + my $username = (getpwuid($<))[0]; + print STDERR "$file should be owned by $username: chown $username $file\n"; + } + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, + $atime,$mtime,$ctime,$blksize,$blocks) = stat($file); + if($mode & 077) { + my $username = (getpwuid($<))[0]; + print STDERR "$file should be only be readable by $username: chmod 600 $file\n"; + } + } +} + +sub parse_dburl { + my $url = shift; + my %options = (); + # sql:mysql://[[user][:password]@][host][:port]/[database[?sql query]] + + if($url=~m!(?:sql:)? # You can prefix with 'sql:' + ((?:oracle|ora|mysql|pg|postgres|postgresql|influx|influxdb)(?:s|ssl|)| + (?:sqlite|sqlite2|sqlite3)):// # Databasedriver ($1) + (?: + ([^:@/][^:@]*|) # Username ($2) + (?: + :([^@]*) # Password ($3) + )? + @)? + ([^:/]*)? # Hostname ($4) + (?: + : + ([^/]*)? # Port ($5) + )? + (?: + / + ([^?/]*)? # Database ($6) + )? + (?: + /? + \? + (.*)? # Query ($7) + )? + !x) { + $options{databasedriver} = undef_if_empty(uri_unescape($1)); + $options{user} = undef_if_empty(uri_unescape($2)); + $options{password} = undef_if_empty(uri_unescape($3)); + $options{host} = undef_if_empty(uri_unescape($4)); + $options{port} = undef_if_empty(uri_unescape($5)); + $options{database} = undef_if_empty(uri_unescape($6)) + || $options{user}; + $options{query} = undef_if_empty(uri_unescape($7)); + debug("dburl $url\n"); + debug("databasedriver ",$options{databasedriver}, " user ", $options{user}, + " password ", $options{password}, " host ", $options{host}, + " port ", $options{port}, " database ", $options{database}, + " query ",$options{query}, "\n"); + } else { + Usage("$url is not a valid DBURL"); + exit -1; + } + return %options; +} + +sub uri_unescape { + # Copied from http://cpansearch.perl.org/src/GAAS/URI-1.55/URI/Escape.pm + # to avoid depending on URI::Escape + # This section is (C) Gisle Aas. + # Note from RFC1630: "Sequences which start with a percent sign + # but are not followed by two hexadecimal characters are reserved + # for future extension" + my $str = shift; + if (@_ && wantarray) { + # not executed for the common case of a single argument + my @str = ($str, @_); # need to copy + foreach (@str) { + s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + } + return @str; + } + $str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str; + $str; +} + +sub undef_if_empty { + if(defined($_[0]) and $_[0] eq "") { + return undef; + } + return $_[0]; +} + +sub undef_as_zero { + my $a = shift; + return $a ? $a : 0; +} + +sub version { + # Returns: N/A + print join("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2009,2010,2011,2012,2013,2014,2015,2016,2017", + "Ole Tange and Free Software Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: http://www.gnu.org/software/${Global::progname}\n", + "When using GNU $Global::progname for a publication please cite:\n", + "O. Tange (2011): GNU SQL - A Command Line Tool for Accessing Different", + "Databases Using DBURLs, ;login: The USENIX Magazine, April 2011:29-32.\n" + ); +} + +sub die_usage { + # Returns: N/A + usage(); + exit(255); +} + +sub usage { + # Returns: N/A + print "Usage:\n"; + print "$Global::progname [options] dburl [sqlcommand]\n"; + print "$Global::progname [options] dburl < sql_command_file\n"; + print "\n"; + print "See 'man $Global::progname' for the options\n"; +} + +sub debug { + # Returns: N/A + $Global::debug or return; + @_ = grep { defined $_ ? $_ : "" } @_; + print @_; +} + +$::opt_skip_first_line = $::opt_shebang = 0; |