diff --git a/niceload b/niceload
new file mode 100755
index 0000000..ba7af53
--- /dev/null
+++ b/niceload
@@ -0,0 +1,882 @@
+#!/usr/bin/perl -w
+
+# Copyright (C) 2004,2005,2006,2006,2008,2009,2010 Ole Tange,
+# http://ole.tange.dk
+#
+# Copyright (C) 2010,2011,2012,2013,2014,2015 Ole Tange,
+# http://ole.tange.dk and Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see
+# or write to the Free Software Foundation, Inc., 51 Franklin St,
+# Fifth Floor, Boston, MA 02110-1301 USA
+
+use strict;
+use Getopt::Long;
+$Global::progname="niceload";
+$Global::version = 20150522;
+Getopt::Long::Configure("bundling","require_order");
+get_options_from_array(\@ARGV) || die_usage();
+if($opt::version) {
+ version();
+ exit 0;
+}
+if($opt::help) {
+ help();
+ exit 0;
+}
+if($opt::factor and $opt::suspend) {
+ # You cannot have --suspend and --factor
+ help();
+ exit;
+}
+
+if(not (defined $opt::start_io or defined $opt::run_io
+ or defined $opt::start_load or defined $opt::run_load
+ or defined $opt::start_mem or defined $opt::run_mem
+ or defined $opt::start_noswap or defined $opt::run_noswap
+ or defined $opt::io or defined $opt::load
+ or defined $opt::mem or defined $opt::noswap)) {
+ # Default is --runload=1
+ $opt::run_load = 1;
+}
+
+if(not defined $opt::start_io) { $opt::start_io = $opt::io; }
+if(not defined $opt::run_io) { $opt::run_io = $opt::io; }
+if(not defined $opt::start_load) { $opt::start_load = $opt::load; }
+if(not defined $opt::run_load) { $opt::run_load = $opt::load; }
+if(not defined $opt::start_mem) { $opt::start_mem = $opt::mem; }
+if(not defined $opt::run_mem) { $opt::run_mem = $opt::mem; }
+if(not defined $opt::start_noswap) { $opt::start_noswap = $opt::noswap; }
+if(not defined $opt::run_noswap) { $opt::run_noswap = $opt::noswap; }
+
+if(defined $opt::load) { multiply_binary_prefix($opt::load); }
+
+my $limit = Limit->new();
+my $process = Process->new($opt::nice,@ARGV);
+$::exitstatus = 0;
+if(@opt::prg) {
+ # Find all pids of prg
+ my $out = `pidof -x @opt::prg`;
+ $process->set_pid(split /\s+/,$out);
+ $::resume_process = $process;
+ $SIG{TERM} = $SIG{INT} = \&resume;
+} elsif(@opt::pid) {
+ $process->set_pid(@opt::pid);
+ $::resume_process = $process;
+ $SIG{TERM} = $SIG{INT} = \&resume;
+} elsif (@ARGV) {
+ # Wait until limit is below start_limit and run_limit
+ while($limit->over_start_limit()
+ or
+ ($limit->hard() and $limit->over_run_limit())) {
+ $limit->sleep_for_recheck();
+ }
+ $process->start();
+}
+
+while($process->is_alive()) {
+ if($limit->over_run_limit()) {
+ $process->suspend();
+ $limit->sleep_for_recheck();
+ if(not $limit->hard()) {
+ $process->resume();
+ $limit->sleep_while_running();
+ }
+ } else {
+ $process->resume();
+ $limit->sleep_while_running();
+ }
+}
+
+exit($::exitstatus);
+
+sub resume {
+ $::resume_process->resume();
+ exit(0);
+}
+
+sub uniq {
+ # Remove duplicates and return unique values
+ return keys %{{ map { $_ => 1 } @_ }};
+}
+
+sub multiply_binary_prefix {
+ # Evalualte numbers with binary prefix
+ # k=10^3, m=10^6, g=10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
+ # K=2^10, M=2^20, G=2^30, T=2^40, P=2^50, E=2^70, Z=2^80, Y=2^80
+ # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
+ # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
+ # 13G = 13*1024*1024*1024 = 13958643712
+ my $s = shift;
+ $s =~ s/k/*1000/g;
+ $s =~ s/M/*1000*1000/g;
+ $s =~ s/G/*1000*1000*1000/g;
+ $s =~ s/T/*1000*1000*1000*1000/g;
+ $s =~ s/P/*1000*1000*1000*1000*1000/g;
+ $s =~ s/E/*1000*1000*1000*1000*1000*1000/g;
+ $s =~ s/Z/*1000*1000*1000*1000*1000*1000*1000/g;
+ $s =~ s/Y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
+ $s =~ s/X/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
+
+ $s =~ s/Ki?/*1024/gi;
+ $s =~ s/Mi?/*1024*1024/gi;
+ $s =~ s/Gi?/*1024*1024*1024/gi;
+ $s =~ s/Ti?/*1024*1024*1024*1024/gi;
+ $s =~ s/Pi?/*1024*1024*1024*1024*1024/gi;
+ $s =~ s/Ei?/*1024*1024*1024*1024*1024*1024/gi;
+ $s =~ s/Zi?/*1024*1024*1024*1024*1024*1024*1024/gi;
+ $s =~ s/Yi?/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
+ $s =~ s/Xi?/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
+ $s = eval $s;
+ return $s;
+}
+
+sub get_options_from_array {
+ # Run GetOptions on @array
+ # Returns:
+ # true if parsing worked
+ # false if parsing failed
+ # @array is changed
+ my $array_ref = shift;
+ # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
+ # supported everywhere
+ my @save_argv;
+ my $this_is_ARGV = (\@::ARGV == $array_ref);
+ if(not $this_is_ARGV) {
+ @save_argv = @::ARGV;
+ @::ARGV = @{$array_ref};
+ }
+ my @retval = GetOptions
+ ("debug|D" => \$opt::debug,
+ "factor|f=s" => \$opt::factor,
+ "hard|H" => \$opt::hard,
+ "soft|S" => \$opt::soft,
+ "sensor=s" => \$opt::sensor,
+
+ "si|sio|startio|start-io=s" => \$opt::start_io,
+ "ri|rio|runio|run-io=s" => \$opt::run_io,
+ "io|I=s" => \$opt::io,
+
+ "sl|startload|start-load=s" => \$opt::start_load,
+ "rl|runload|run-load=s" => \$opt::run_load,
+ "load|L|l=s" => \$opt::load,
+
+ "sm|startmem|start-mem=s" => \$opt::start_mem,
+ "rm|runmem|run-mem=s" => \$opt::run_mem,
+ "mem|M=s" => \$opt::mem,
+
+ "sn|startnoswap|start-noswap|start-no-swap" => \$opt::start_noswap,
+ "rn|runnoswap|run-noswap|run-no-swap" => \$opt::run_noswap,
+ "noswap|N" => \$opt::noswap,
+
+ # niceload -l -1 --sensor 'cat /sys/class/power_supply/BAT0/status /proc/acpi/battery/BAT0/state 2>/dev/null |grep -i -q discharging; echo $?'
+ "battery|B" => \$opt::battery,
+
+ "nice|n=i" => \$opt::nice,
+ "program|prg=s" => \@opt::prg,
+ "process|pid|p=s" => \@opt::pid,
+ "suspend|s=s" => \$opt::suspend,
+ "recheck|t=s" => \$opt::recheck,
+ "quote|q" => \$opt::quote,
+ "help|h" => \$opt::help,
+ "verbose|v" => \$opt::verbose,
+ "version|V" => \$opt::version,
+ );
+ if(not $this_is_ARGV) {
+ @{$array_ref} = @::ARGV;
+ @::ARGV = @save_argv;
+ }
+ return @retval;
+}
+
+
+sub die_usage {
+ help();
+ exit 1;
+}
+
+
+sub help {
+ print q{
+Usage:
+ niceload [-v] [-n niceness] [-L loadavg] [-I io] [-N] [-M mem]
+ [-s suspend_sec|-f factor] [-H] [-S]
+ command or -p pid
+};
+}
+
+
+sub die_bug {
+ my $bugid = shift;
+ print STDERR
+ ("$Global::progname: This should not happen. You have found a bug.\n",
+ "Please contact and include:\n",
+ "* The version number: $Global::version\n",
+ "* The bugid: $bugid\n",
+ "* The command line being run\n",
+ "* The files being read (put the files on a webserver if they are big)\n",
+ "\n",
+ "If you get the error on smaller/fewer files, please include those instead.\n");
+ exit(255);
+}
+
+
+sub usleep {
+ # Sleep this many milliseconds.
+ my $secs = shift;
+ ::debug("Sleeping ",$secs," millisecs\n");
+ select(undef, undef, undef, $secs/1000);
+}
+
+
+sub debug {
+ if($opt::debug) {
+ print STDERR @_;
+ }
+}
+
+
+sub my_dump {
+ # Returns:
+ # ascii expression of object if Data::Dump(er) is installed
+ # error code otherwise
+ my @dump_this = (@_);
+ eval "use Data::Dump qw(dump);";
+ if ($@) {
+ # Data::Dump not installed
+ eval "use Data::Dumper;";
+ if ($@) {
+ my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
+ "Not dumping output\n";
+ print STDERR $err;
+ return $err;
+ } else {
+ return Dumper(@dump_this);
+ }
+ } else {
+ eval "use Data::Dump qw(dump);";
+ return (Data::Dump::dump(@dump_this));
+ }
+}
+
+
+sub version {
+ # Returns: N/A
+ print join("\n",
+ "GNU $Global::progname $Global::version",
+ "Copyright (C) 2004,2005,2006,2007,2008,2009 Ole Tange",
+ "Copyright (C) 2010,2011 Ole Tange and Free Software Foundation, Inc.",
+ "License GPLv3+: GNU GPL version 3 or later ",
+ "This is free software: you are free to change and redistribute it.",
+ "GNU $Global::progname comes with no warranty.",
+ "",
+ "Web site: http://www.gnu.org/software/parallel\n"
+ );
+}
+
+
+sub max {
+ # Returns:
+ # Maximum value of array
+ my $max;
+ for (@_) {
+ # Skip undefs
+ defined $_ or next;
+ defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
+ $max = ($max > $_) ? $max : $_;
+ }
+ return $max;
+}
+
+sub min {
+ # Returns:
+ # Minimum value of array
+ my $min;
+ for (@_) {
+ # Skip undefs
+ defined $_ or next;
+ defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
+ $min = ($min < $_) ? $min : $_;
+ }
+ return $min;
+}
+
+
+package Process;
+
+sub new {
+ my $class = shift;
+ my $nice = shift;
+ my @ARGV = @_;
+ if($nice) {
+ unshift(@ARGV, "nice", "-n", $nice);
+ }
+ return bless {
+ 'running' => 0, # Is the process running now?
+ 'command' => [@ARGV],
+ }, ref($class) || $class;
+}
+
+sub pgrp {
+ my $self = shift;
+ my @pgrp;
+ if(not $self->{'pgrp'}) {
+ for(@{$self->{'pids'}}) {
+ push @pgrp,-getpgrp($_);
+ }
+ @pgrp = ::uniq(@pgrp);
+ @{$self->{'pgrp'}} = @pgrp;
+ }
+ return @{$self->{'pgrp'}};
+}
+
+sub set_pid {
+ my $self = shift;
+ push(@{$self->{'pids'}},@_);
+ $self->{'running'} = 1;
+ $::exitstatus = 0;
+}
+
+sub start {
+ # Start the program
+ my $self = shift;
+ ::debug("Starting @{$self->{'command'}}\n");
+ $self->{'running'} = 1;
+ if($self->{'pid'} = fork) {
+ # set signal handler to kill children if parent is killed
+ push @{$self->{'pids'}}, $self->{'pid'};
+ $Global::process = $self;
+ $SIG{CHLD} = \&REAPER;
+ $SIG{INT}=\&kill_child_INT;
+ $SIG{TSTP}=\&kill_child_TSTP;
+ $SIG{CONT}=\&kill_child_CONT;
+ sleep 1; # Give child time to setpgrp(0,0);
+ } else {
+ setpgrp(0,0);
+ ::debug("Child pid: $$, pgrp: ",getpgrp $$,"\n");
+ ::debug("@{$self->{'command'}}\n");
+ if($opt::quote) {
+ system(@{$self->{'command'}});
+ } else {
+ system("@{$self->{'command'}}");
+ }
+ $::exitstatus = $? >> 8;
+ $::exitsignal = $? & 127;
+ ::debug("Child exit $::exitstatus\n");
+ exit($::exitstatus);
+ }
+}
+
+use POSIX ":sys_wait_h";
+use POSIX qw(:sys_wait_h);
+
+sub REAPER {
+ my $stiff;
+ while (($stiff = waitpid(-1, &WNOHANG)) > 0) {
+ # do something with $stiff if you want
+ $::exitstatus = $? >> 8;
+ $::exitsignal = $? & 127;
+ }
+ $SIG{CHLD} = \&REAPER; # install *after* calling waitpid
+}
+
+
+sub kill_child_CONT {
+ my $self = $Global::process;
+ ::debug("SIGCONT received. Killing @{$self->{'pgrp'}}\n");
+ kill CONT => $self->pgrp();
+}
+
+
+sub kill_child_TSTP {
+ my $self = $Global::process;
+ ::debug("SIGTSTP received. Killing $self->{'pid'} and self ($$)\n");
+ kill TSTP => $self->pgrp();
+ kill STOP => -$$;
+ kill STOP => $$;
+}
+
+
+sub kill_child_INT {
+ my $self = $Global::process;
+ ::debug("SIGINT received.\n");
+ if(not @opt::pid) {
+ ::debug("Killing $self->{'pid'} Exit\n");
+ kill INT => $self->pgrp();
+ } else {
+ ::debug("Continue pids $self->{'pid'} Exit\n");
+ kill CONT => $self->pgrp();
+ }
+ exit;
+}
+
+
+sub resume {
+ my $self = shift;
+ ::debug("Resume @{$self->{'pids'}}\n");
+ if(not $self->{'running'}) {
+ # - = PID group
+ map { kill "CONT", -$_ } @{$self->{'pids'}};
+ # If using -p it is not in a group
+ map { kill "CONT", $_ } @{$self->{'pids'}};
+ $self->{'running'} = 1;
+ }
+}
+
+
+sub suspend {
+ my $self = shift;
+ ::debug("Suspend @{$self->{'pids'}}\n");
+ if($self->{'running'}) {
+ # - = PID group
+ map { kill "STOP", -$_ } @{$self->{'pids'}};
+ # If using -p it is not in a group
+ map { kill "STOP", $_ } @{$self->{'pids'}};
+ $self->{'running'} = 0;
+ }
+}
+
+
+sub is_alive {
+ # The process is dead if none of the pids exist
+ my $self = shift;
+ my ($exists) = 0;
+ for my $pid (@{$self->{'pids'}}) {
+ if(kill 0 => $pid) { $exists++ }
+ }
+ ::debug("is_alive: $exists\n");
+ return $exists;
+}
+
+
+package Limit;
+
+sub new {
+ my $class = shift;
+ my %limits = @_;
+ my $hard = $opt::soft ? 0 : $opt::hard;
+ my $runio = $opt::run_io ? ::multiply_binary_prefix($opt::run_io) : 0;
+ my $startio = $opt::start_io ? ::multiply_binary_prefix($opt::start_io) : 0;
+ my $runload = $opt::run_load ? ::multiply_binary_prefix($opt::run_load) : 0;
+ my $startload = $opt::start_load ? ::multiply_binary_prefix($opt::start_load) : 0;
+ my $runmem = $opt::run_mem ? ::multiply_binary_prefix($opt::run_mem) : 0;
+ my $startmem = $opt::start_mem ? ::multiply_binary_prefix($opt::start_mem) : 0;
+ my $runnoswap = $opt::run_noswap ? ::multiply_binary_prefix($opt::run_noswap) : 0;
+ my $startnoswap = $opt::start_noswap ? ::multiply_binary_prefix($opt::start_noswap) : 0;
+ my $recheck = $opt::recheck ? ::multiply_binary_prefix($opt::recheck) : 1; # Default
+ my $runtime = $opt::suspend ? ::multiply_binary_prefix($opt::suspend) : 1; # Default
+
+ return bless {
+ 'hard' => $hard,
+ 'recheck' => $recheck,
+ 'runio' => $runio,
+ 'startio' => $startio,
+ 'runload' => $runload,
+ 'startload' => $startload,
+ 'runmem' => $runmem,
+ 'startmem' => $startmem,
+ 'runnoswap' => $runnoswap,
+ 'startnoswap' => $startnoswap,
+ 'factor' => $opt::factor || 1,
+ 'recheck' => $recheck,
+ 'runtime' => $runtime,
+ 'over_run_limit' => 1,
+ 'over_start_limit' => 1,
+ 'verbose' => $opt::verbose,
+ }, ref($class) || $class;
+}
+
+
+sub over_run_limit {
+ my $self = shift;
+ my $status = 0;
+ if($self->{'runmem'}) {
+ # mem should be between 0-10ish
+ # 100% available => 0 (1-1)
+ # 50% available => 1 (2-1)
+ # 10% available => 9 (10-1)
+ my $mem = $self->mem_status();
+ ::debug("Run memory: $self->{'runmem'}/$mem\n");
+ $status += (::max(1,$self->{'runmem'}/$mem)-1);
+ }
+ if($self->{'runload'}) {
+ # If used with other limits load should be between 0-10ish
+ no warnings 'numeric';
+ my $load = $self->load_status();
+ if($self->{'runload'} > 0) {
+ # Stop if the load is above the limit
+ $status += ::max(0,$load - $self->{'runload'});
+ } else {
+ # Stop if the load is below the limit (for sensor)
+ $status += ::max(0,-$load - $self->{'runload'});
+ }
+ }
+ if($self->{'runnoswap'}) {
+ # swap should be between 0-10ish
+ # swap in or swap out or no swap = 0
+ # else log(swapin*swapout)
+ my $swap = $self->swap_status();
+ $status += log(::max(1, $swap - $self->{'runnoswap'}));
+ }
+ if($self->{'runio'}) {
+ my $io = $self->io_status();
+ $status += ::max(0,$io - $self->{'runio'});
+ }
+ $self->{'over_run_limit'} = $status;
+ if(not $opt::recheck) {
+ $self->{'recheck'} = $self->{'factor'} * $self->{'over_run_limit'};
+ }
+ ::debug("over_run_limit: $status\n");
+ return $self->{'over_run_limit'};
+}
+
+sub over_start_limit {
+ my $self = shift;
+ my $status = 0;
+ if($self->{'startmem'}) {
+ # mem should be between 0-10ish
+ # 100% available => 0 (1-1)
+ # 50% available => 1 (2-1)
+ # 10% available => 9 (10-1)
+ my $mem = $self->mem_status();
+ ::debug("Start memory: $self->{'startmem'}/$mem\n");
+ $status += (::max(1,$self->{'startmem'}/$mem)-1);
+ }
+ if($self->{'startload'}) {
+ # load should be between 0-10ish
+ # 0 load => 0
+ no warnings 'numeric';
+ my $load = $self->load_status();
+ if($self->{'startload'} > 0) {
+ # Stop if the load is above the limit
+ $status += ::max(0,$load - $self->{'startload'});
+ } else {
+ # Stop if the load is below the limit (for sensor)
+ $status += ::max(0,-$load - $self->{'startload'});
+ }
+ }
+ if($self->{'startnoswap'}) {
+ # swap should be between 0-10ish
+ # swap in or swap out or no swap = 0
+ # else log(swapin*swapout)
+ my $swap = $self->swap_status();
+ $status += log(::max(1, $swap - $self->{'startnoswap'}));
+ }
+ if($self->{'startio'}) {
+ my $io = $self->io_status();
+ $status += ::max(0,$io - $self->{'startio'});
+ }
+ $self->{'over_start_limit'} = $status;
+ if(not $opt::recheck) {
+ $self->{'recheck'} = $self->{'factor'} * $self->{'over_start_limit'};
+ }
+ ::debug("over_start_limit: $status\n");
+ return $self->{'over_start_limit'};
+}
+
+
+sub hard {
+ my $self = shift;
+ return $self->{'hard'};
+}
+
+
+sub verbose {
+ my $self = shift;
+ return $self->{'verbose'};
+}
+
+
+sub sleep_for_recheck {
+ my $self = shift;
+ if($self->{'recheck'} < 0.5) {
+ # Never sleep less than 0.5 sec
+ $self->{'recheck'} = 0.5;
+ }
+ if($self->verbose()) {
+ $self->{'recheck'} = int($self->{'recheck'}*100)/100;
+ print STDERR "Sleeping $self->{'recheck'}s\n";
+ }
+ ::debug("recheck in $self->{'recheck'}s\n");
+ ::usleep(1000*$self->{'recheck'});
+}
+
+
+sub sleep_while_running {
+ my $self = shift;
+ ::debug("check in $self->{'runtime'}s\n");
+ if($self->verbose()) {
+ $self->{'runtime'} = int($self->{'runtime'}*100)/100;
+ print STDERR "Running $self->{'runtime'}s\n";
+ }
+ ::usleep(1000*$self->{'runtime'});
+}
+
+
+sub nonblockGetLines {
+ # An non-blocking filehandle read that returns an array of lines read
+ # Returns: ($eof,@lines)
+ # Example: --sensor 'vmstat 1 | perl -ane '\''$|=1; 4..0 and print $F[8],"\n"'\'
+ my ($fh,$timeout) = @_;
+
+ $timeout = 0 unless defined $timeout;
+ my $rfd = '';
+ $::nonblockGetLines_last{$fh} = ''
+ unless defined $::nonblockGetLines_last{$fh};
+
+ vec($rfd,fileno($fh),1) = 1;
+ return unless select($rfd, undef, undef, $timeout)>=0;
+ # I'm not sure the following is necessary?
+ return unless vec($rfd,fileno($fh),1);
+ my $buf = '';
+ my $n = sysread($fh,$buf,1024*1024);
+
+ my $eof = eof($fh);
+ # If we're done, make sure to send the last unfinished line
+ return ($eof,$::nonblockGetLines_last{$fh}) unless $n;
+ # Prepend the last unfinished line
+ $buf = $::nonblockGetLines_last{$fh}.$buf;
+ # And save any newly unfinished lines
+ $::nonblockGetLines_last{$fh} =
+ (substr($buf,-1) !~ /[\r\n]/ && $buf =~ s/([^\r\n]*)$//)
+ ? $1 : '';
+ $buf ? ($eof,split(/\n/,$buf)) : ($eof);
+}
+
+sub read_sensor {
+ my $self = shift;
+ ::debug("read_sensor");
+ my $fh = $self->{'sensor_fh'};
+ if(not $fh) {
+ # Start the sensor
+ open($fh, "-|", $opt::sensor) || ::die_bug("Cannot open: $opt::sensor");
+ $self->{'sensor_fh'} = $fh;
+ }
+ # Read as much as we can (non_block)
+ my ($eof,@lines) = nonblockGetLines($fh);
+
+ # new load = last full line
+ foreach my $line (@lines) {
+ if(defined $line) {
+ ::debug("Pipe saw: $eof [$line]\n");
+ $Global::last_sensor_reading = $line;
+ }
+ }
+ if($eof) {
+ # End of file => Restart the sensor
+ close $fh;
+ open($fh, "-|", $opt::sensor) || ::die_bug("Cannot open: $opt::sensor");
+ $self->{'sensor_fh'} = $fh;
+ }
+
+ return $Global::last_sensor_reading;
+}
+
+sub load_status {
+ # Returns:
+ # loadavg or sensor measurement
+ my $self = shift;
+
+ if($opt::sensor) {
+ if(not defined $self->{'load_status'} or
+ $self->{'load_status_cache_time'} + $self->{'recheck'} < time) {
+ $self->{'load_status'} = $self->read_sensor();
+ while (not defined $self->{'load_status'}) {
+ sleep 1;
+ $self->{'load_status'} = $self->read_sensor();
+ }
+ $self->{'load_status_cache_time'} = time - 0.001;
+ }
+ } else {
+ # Normal load avg
+ # Cache for some seconds
+ if(not defined $self->{'load_status'} or
+ $self->{'load_status_cache_time'} + $self->{'recheck'} < time) {
+ $self->{'load_status'} = load_status_linux() if $^O ne 'darwin';
+ $self->{'load_status'} = load_status_darwin() if $^O eq 'darwin';
+ $self->{'load_status_cache_time'} = time;
+ }
+ }
+ ::debug("load_status: ".$self->{'load_status'}."\n");
+ return $self->{'load_status'};
+}
+
+sub undef_as_zero {
+ my $a = shift;
+ return $a ? $a : 0;
+}
+
+
+sub load_status_linux {
+ my ($loadavg);
+ if(open(IN,"/proc/loadavg")) {
+ # Linux specific (but fast)
+ my $upString = ;
+ if($upString =~ m/^(\d+\.\d+)/) {
+ $loadavg = $1;
+ } else {
+ ::die_bug("proc_loadavg");
+ }
+ close IN;
+ } elsif (open(IN,"uptime|")) {
+ my $upString = ;
+ if($upString =~ m/averages?.\s*(\d+\.\d+)/) {
+ $loadavg = $1;
+ } else {
+ ::die_bug("uptime");
+ }
+ close IN;
+ }
+ return $loadavg;
+}
+
+sub load_status_darwin {
+ my $loadavg = `sysctl vm.loadavg`;
+ if($loadavg =~ /vm\.loadavg: { ([0-9.]+) ([0-9.]+) ([0-9.]+) }/) {
+ $loadavg = $1;
+ } elsif (open(IN,"LANG=C uptime|")) {
+ my $upString = ;
+ if($upString =~ m/averages?.\s*(\d+\.\d+)/) {
+ $loadavg = $1;
+ } else {
+ ::die_bug("uptime");
+ }
+ close IN;
+ }
+ return $loadavg;
+}
+
+
+sub swap_status {
+ # Returns:
+ # (swap in)*(swap out) kb
+ my $self = shift;
+ my $status;
+ # Cache for some seconds
+ if(not defined $self->{'swap_status'} or
+ $self->{'swap_status_cache_time'}+$self->{'recheck'} < time) {
+ $status = swap_status_linux() if $^O ne 'darwin';
+ $status = swap_status_darwin() if $^O eq 'darwin';
+ $self->{'swap_status'} = ::max($status,0);
+ $self->{'swap_status_cache_time'} = time;
+ }
+ ::debug("swap_status: $self->{'swap_status'}\n");
+ return $self->{'swap_status'};
+}
+
+
+sub swap_status_linux {
+ my $swap_activity;
+ $swap_activity = "vmstat 1 2 | tail -n1 | awk '{print \$7*\$8}'";
+ # Run swap_activity measuring.
+ return qx{ $swap_activity };
+}
+
+sub swap_status_darwin {
+ # Mach Virtual Memory Statistics: (page size of 4096 bytes, cache hits 0%)
+ # free active spec inactive wire faults copy 0fill reactive pageins pageout
+ # 298987 251463 162637 69437 265724 29730558 299022 2308237 1 110058 0
+ # 298991 251479 162637 69437 265726 43 4 16 0 0 0
+ my ($pagesize, $pageins, $pageouts);
+ my @vm_stat = `vm_stat 1 | head -n4`;
+ $pagesize = $1 if $vm_stat[0] =~ m/page size of (\d+) bytes/;
+ $pageins = (split(/\s+/,$vm_stat[3]))[9];
+ $pageouts = (split(/\s+/,$vm_stat[3]))[10];
+ return ($pageins*$pageouts*$pagesize)/1024;
+}
+
+
+sub mem_status {
+ # Returns:
+ # number of bytes (free+cache)
+ my $self = shift;
+ # Cache for one second
+ if(not defined $self->{'mem_status'} or
+ $self->{'mem_status_cache_time'}+$self->{'recheck'} < time) {
+ $self->{'mem_status'} = mem_status_linux() if $^O ne 'darwin';
+ $self->{'mem_status'} = mem_status_darwin() if $^O eq 'darwin';
+ $self->{'mem_status_cache_time'} = time;
+ }
+ ::debug("mem_status: $self->{'mem_status'}\n");
+ return $self->{'mem_status'};
+}
+
+
+sub mem_status_linux {
+ # total used free shared buffers cached
+ # Mem: 3366496 2901664 464832 0 179228 1850692
+ # -/+ buffers/cache: 871744 2494752
+ # Swap: 6445476 1396860 5048616
+ my @free = `free`;
+ my $free = (split(/\s+/,$free[2]))[3];
+ return $free*1024;
+}
+
+sub mem_status_darwin {
+ # Mach Virtual Memory Statistics: (page size of 4096 bytes, cache hits 0%)
+ # free active spec inactive wire faults copy 0fill reactive pageins pageout
+ # 298987 251463 162637 69437 265724 29730558 299022 2308237 1 110058 0
+ # 298991 251479 162637 69437 265726 43 4 16 0 0 0
+ my ($pagesize, $pages_free, $pages_speculative);
+ my @vm_stat = `vm_stat 1 | head -n4`;
+ $pagesize = $1 if $vm_stat[0] =~ m/page size of (\d+) bytes/;
+ $pages_free = (split(/\s+/,$vm_stat[3]))[0];
+ $pages_speculative = (split(/\s+/,$vm_stat[3]))[2];
+ return ($pages_free+$pages_speculative)*$pagesize;
+}
+
+
+sub io_status {
+ # Returns:
+ # max percent for all devices
+ my $self = shift;
+ # Cache for one second
+ if(not defined $self->{'io_status'} or
+ $self->{'io_status_cache_time'}+$self->{'recheck'} < time) {
+ $self->{'io_status'} = io_status_linux() if $^O ne 'darwin';
+ $self->{'io_status'} = io_status_darwin() if $^O eq 'darwin';
+ $self->{'io_status_cache_time'} = time;
+ }
+ ::debug("io_status: $self->{'io_status'}\n");
+ return $self->{'io_status'};
+}
+
+
+sub io_status_linux {
+ # Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util
+ # sda 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
+ my @iostat_out = `LANG=C iostat -x 1 2`;
+ # throw away all execpt the last Device:-section
+ my @iostat;
+ for(reverse @iostat_out) {
+ /Device:/ and last;
+ push @iostat, (split(/\s+/,$_))[13];
+ }
+ my $io = ::max(@iostat);
+ return undef_as_zero($io)/10;
+}
+
+sub io_status_darwin {
+ # disk0 disk1 disk2
+ # KB/t tps MB/s KB/t tps MB/s KB/t tps MB/s
+ # 14.95 15 0.22 11.18 35 0.38 2.00 0 0.00
+ # 0.00 0 0.00 0.00 0 0.00 0.00 0 0.00
+ my @iostat_out = `LANG=C iostat -d -w 1 -c 2`;
+ # return the MB/s of the last second (not the %util)
+ my @iostat = split(/\s+/, $iostat_out[3]);
+ my $io = $iostat[3] + $iostat[6] + $iostat[9];
+ return ::min($io, 10);
+}
+
+$::exitsignal = $::exitstatus = $opt::battery = 0; # Dummy
diff --git a/parallel b/parallel
index c207889..9a92e45 100755
--- a/parallel
+++ b/parallel
@@ -1,7 +1,7 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
-# Copyright (C) 2007,2008,2009,2010,2011,2012 Ole Tange and Free Software
-# Foundation, Inc.
+# Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange
+# and Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -26,124 +26,102 @@ use POSIX qw(:sys_wait_h setsid ceil :errno_h);
use Symbol qw(gensym);
# tempfile used in Job::start
use File::Temp qw(tempfile tempdir);
+# mkpath used in openresultsfile
+use File::Path;
# GetOptions used in get_options_from_array
use Getopt::Long;
# Used to ensure code quality
use strict;
+use File::Basename;
-$::oodebug=0;
-$SIG{TERM} ||= sub { exit 0; }; # $SIG{TERM} is not set on Mac OS X
-if(not $ENV{SHELL}) {
- # $ENV{SHELL} is sometimes not set on Mac OS X
- print STDERR ("parallel: Warning: \$SHELL not set. Using /bin/sh\n");
- $ENV{SHELL} = "/bin/sh";
-}
-%Global::original_sig = %SIG;
-$SIG{TERM} = sub {}; # Dummy until jobs really start
-open $Global::original_stderr, ">&STDERR" or ::die_bug("Can't dup STDERR: $!");
-
+save_stdin_stdout_stderr();
+save_original_signal_handler();
parse_options();
+::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n");
my $number_of_args;
if($Global::max_number_of_args) {
$number_of_args=$Global::max_number_of_args;
-} elsif ($::opt_X or $::opt_m) {
+} elsif ($opt::X or $opt::m or $opt::xargs) {
$number_of_args = undef;
} else {
$number_of_args = 1;
}
-my $command = "";
-if(@ARGV) {
- if($Global::quoting) {
- $command = shell_quote(@ARGV);
- } else {
- $command = join(" ", @ARGV);
+my @command = @ARGV;
+
+my @input_source_fh;
+if($opt::pipepart) {
+ @input_source_fh = map { open_or_exit($_) } "/dev/null";
+} else {
+ @input_source_fh = map { open_or_exit($_) } @opt::a;
+ if(not @input_source_fh and not $opt::pipe) {
+ @input_source_fh = (*STDIN);
}
}
-my @fhlist;
-@fhlist = map { open_or_exit($_) } @::opt_a;
-if(not @fhlist) {
- @fhlist = (*STDIN);
-}
-if($::opt_skip_first_line) {
+if($opt::skip_first_line) {
# Skip the first line for the first file handle
- my $fh = $fhlist[0];
+ my $fh = $input_source_fh[0];
<$fh>;
}
-if($::opt_header and not $::opt_pipe) {
- my $fh = $fhlist[0];
+if($opt::header and not $opt::pipe) {
# split with colsep or \t
- # TODO should $header force $colsep = \t if undef?
- my $delimiter = $::opt_colsep;
+ # $header force $colsep = \t if undef?
+ my $delimiter = $opt::colsep;
+ $delimiter ||= "\t";
my $id = 1;
- for my $fh (@fhlist) {
+ for my $fh (@input_source_fh) {
my $line = <$fh>;
chomp($line);
- ::debug("Delimiter: '$delimiter'");
+ ::debug("init", "Delimiter: '$delimiter'");
for my $s (split /$delimiter/o, $line) {
- ::debug("Colname: '$s'");
- $command =~ s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
+ ::debug("init", "Colname: '$s'");
+ # Replace {colname} with {2}
+ # TODO accept configurable short hands
+ # TODO how to deal with headers in {=...=}
+ for(@command) {
+ s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
+ }
+ $Global::input_source_header{$id} = $s;
$id++;
}
}
+} else {
+ my $id = 1;
+ for my $fh (@input_source_fh) {
+ $Global::input_source_header{$id} = $id;
+ $id++;
+ }
}
-if($::opt_nonall or $::opt_onall) {
- # Copy all @fhlist into tempfiles
- my @argfiles = ();
- for my $fh (@fhlist) {
- my ($outfh,$name) = ::tempfile(SUFFIX => ".all");
- print $outfh (<$fh>);
- close $outfh;
- push @argfiles, $name;
- }
- if(@::opt_basefile) { setup_basefile(); }
- # for each sshlogin do:
- # parallel -S $sshlogin $command :::: @argfiles
- #
- # Pass some of the options to the sub-parallels, not all of them as
- # -P should only go to the first, and -S should not be copied at all.
- my $options =
- join(" ",
- ((defined $::opt_P) ? "-P $::opt_P" : ""),
- ((defined $::opt_u) ? "-u" : ""),
- ((defined $::opt_group) ? "-g" : ""),
- ((defined $::opt_D) ? "-D" : ""),
- );
- my $suboptions =
- join(" ",
- ((defined $::opt_u) ? "-u" : ""),
- ((defined $::opt_group) ? "-g" : ""),
- ((defined $::opt_colsep) ? "--colsep ".shell_quote($::opt_colsep) : ""),
- ((defined @::opt_v) ? "-vv" : ""),
- ((defined $::opt_D) ? "-D" : ""),
- ((defined $::opt_timeout) ? "--timeout ".$::opt_timeout : ""),
- );
- ::debug("| parallel");
- open(PARALLEL,"| $0 $options") ||
- ::die_bug("This does not run GNU Parallel: $0 $options");
- for my $sshlogin (values %Global::host) {
- print PARALLEL "$0 $suboptions -j1 ".
- ((defined $::opt_tag) ?
- "--tagstring ".shell_quote_scalar($sshlogin->string()) : "").
- " -S ". shell_quote_scalar($sshlogin->string())." ".
- shell_quote_scalar($command)." :::: @argfiles\n";
- }
- close PARALLEL;
- $Global::exitstatus = $? >> 8;
- debug("--onall exitvalue ",$?);
- if(@::opt_basefile) { cleanup_basefile(); }
- unlink(@argfiles);
+if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
+ # Parallel check all hosts are up. Remove hosts that are down
+ filter_hosts();
+}
+
+if($opt::nonall or $opt::onall) {
+ onall(\@input_source_fh,@command);
wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
}
+# TODO --transfer foo/./bar --cleanup
+# multiple --transfer and --basefile with different /./
+
$Global::JobQueue = JobQueue->new(
- $command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files);
-if($::opt_eta) {
- # Count the number of jobs before starting any
+ \@command,\@input_source_fh,$Global::ContextReplace,$number_of_args,\@Global::ret_files);
+
+if($opt::eta or $opt::bar or $opt::shuf) {
+ # Count the number of jobs or shuffle all jobs
+ # before starting any
$Global::JobQueue->total_jobs();
}
+if($opt::pipepart) {
+ @Global::cat_partials = map { pipe_part_files($_) } @opt::a;
+ # Unget the command as many times as there are parts
+ $Global::JobQueue->{'commandlinequeue'}->unget(
+ map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials
+ );
+}
for my $sshlogin (values %Global::host) {
$sshlogin->max_jobs_running();
}
@@ -155,244 +133,545 @@ if($Global::semaphore) {
}
$SIG{TERM} = \&start_no_new_jobs;
start_more_jobs();
-if($::opt_pipe) {
- spreadstdin(@fhlist);
+if(not $opt::pipepart) {
+ if($opt::pipe) {
+ spreadstdin();
+ }
}
-::debug("Start draining\n");
+::debug("init", "Start draining\n");
drain_job_queue();
-::debug("Done draining\n");
+::debug("init", "Done draining\n");
reaper();
+::debug("init", "Done reaping\n");
+if($opt::pipe and @opt::a) {
+ for my $job (@Global::tee_jobs) {
+ unlink $job->fh(2,"name");
+ $job->set_fh(2,"name","");
+ $job->print();
+ unlink $job->fh(1,"name");
+ }
+}
+::debug("init", "Cleaning\n");
cleanup();
if($Global::semaphore) {
$sem->release();
}
-if($::opt_halt_on_error) {
- wait_and_exit($Global::halt_on_error_exitstatus);
+for(keys %Global::sshmaster) {
+ # If 'ssh -M's are running: kill them
+ kill "TERM", $_;
+}
+::debug("init", "Halt\n");
+if($opt::halt) {
+ wait_and_exit($Global::halt_exitstatus);
} else {
wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
}
sub __PIPE_MODE__ {}
-sub spreadstdin {
- # read a record
- # Spawn a job and print the record to it.
- my @fhlist = @_; # Filehandles to read from (Defaults to STDIN)
- my $record;
+sub pipe_part_files {
+ # Input:
+ # $file = the file to read
+ # Returns:
+ # @commands that will cat_partial each part
+ my ($file) = @_;
my $buf = "";
+ my $header = find_header(\$buf,open_or_exit($file));
+ # find positions
+ my @pos = find_split_positions($file,$opt::blocksize,length $header);
+ # Make @cat_partials
+ my @cat_partials = ();
+ for(my $i=0; $i<$#pos; $i++) {
+ push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]);
+ }
+ # Remote exec should look like:
+ # ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\)
+ # ssh -tt not allowed. Remote will die due to broken pipe anyway.
+ return @cat_partials;
+}
+
+sub find_header {
+ # Input:
+ # $buf_ref = reference to read-in buffer
+ # $fh = filehandle to read from
+ # Uses:
+ # $opt::header
+ # $opt::blocksize
+ # Returns:
+ # $header string
+ my ($buf_ref, $fh) = @_;
my $header = "";
- if($::opt_header) {
- my $non_greedy_regexp = $::opt_header;
- # ? , * , + , {} => ?? , *? , +? , {}?
- $non_greedy_regexp =~ s/(\?|\*|\+|\})/$1\?/g;
- while(read(STDIN,substr($buf,length $buf,0),$::opt_blocksize)) {
- if($buf=~s/^(.*?$non_greedy_regexp)//) {
- $header = $1; last;
+ if($opt::header) {
+ if($opt::header eq ":") { $opt::header = "(.*\n)"; }
+ # Number = number of lines
+ $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
+ while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) {
+ if($$buf_ref=~s/^($opt::header)//) {
+ $header = $1;
+ last;
}
}
}
- my ($recstart,$recend,$recerror);
- if(defined($::opt_recstart) and defined($::opt_recend)) {
+ return $header;
+}
+
+sub find_split_positions {
+ # Input:
+ # $file = the file to read
+ # $block = (minimal) --block-size of each chunk
+ # $headerlen = length of header to be skipped
+ # Uses:
+ # $opt::recstart
+ # $opt::recend
+ # Returns:
+ # @positions of block start/end
+ my($file, $block, $headerlen) = @_;
+ my $size = -s $file;
+ $block = int $block;
+ # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
+ # The optimal dd blocksize for freebsd = 2^15..2^17
+ my $dd_block_size = 131072; # 2^17
+ my @pos;
+ my ($recstart,$recend) = recstartrecend();
+ my $recendrecstart = $recend.$recstart;
+ my $fh = ::open_or_exit($file);
+ push(@pos,$headerlen);
+ for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
+ my $buf;
+ seek($fh, $pos, 0) || die;
+ while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
+ if($opt::regexp) {
+ # If match /$recend$recstart/ => Record position
+ if($buf =~ /^(.*$recend)$recstart/os) {
+ # Start looking for next record _after_ this match
+ $pos += length($1);
+ push(@pos,$pos);
+ last;
+ }
+ } else {
+ # If match $recend$recstart => Record position
+ my $i = index64(\$buf,$recendrecstart);
+ if($i != -1) {
+ # Start looking for next record _after_ this match
+ $pos += $i + length($recendrecstart);
+ push(@pos,$pos);
+ last;
+ }
+ }
+ }
+ }
+ push(@pos,$size);
+ close $fh;
+ return @pos;
+}
+
+sub cat_partial {
+ # Input:
+ # $file = the file to read
+ # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
+ # Returns:
+ # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout
+ my($file, @start_end) = @_;
+ my($start, $i);
+ # Convert start_end to start_len
+ my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end;
+ return "<". shell_quote_scalar($file) .
+ q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } .
+ " @start_len";
+}
+
+sub spreadstdin {
+ # read a record
+ # Spawn a job and print the record to it.
+ # Uses:
+ # $opt::blocksize
+ # STDIN
+ # $opt::r
+ # $Global::max_lines
+ # $Global::max_number_of_args
+ # $opt::regexp
+ # $Global::start_no_new_jobs
+ # $opt::roundrobin
+ # %Global::running
+ # Returns: N/A
+
+ my $buf = "";
+ my ($recstart,$recend) = recstartrecend();
+ my $recendrecstart = $recend.$recstart;
+ my $chunk_number = 1;
+ my $one_time_through;
+ my $two_gb = 2**31-1;
+ my $blocksize = $opt::blocksize;
+ my $in = *STDIN;
+ my $header = find_header(\$buf,$in);
+ while(1) {
+ my $anything_written = 0;
+ if(not read($in,substr($buf,length $buf,0),$blocksize)) {
+ # End-of-file
+ $chunk_number != 1 and last;
+ # Force the while-loop once if everything was read by header reading
+ $one_time_through++ and last;
+ }
+ if($opt::r) {
+ # Remove empty lines
+ $buf =~ s/^\s*\n//gm;
+ if(length $buf == 0) {
+ next;
+ }
+ }
+ if($Global::max_lines and not $Global::max_number_of_args) {
+ # Read n-line records
+ my $n_lines = $buf =~ tr/\n/\n/;
+ my $last_newline_pos = rindex64(\$buf,"\n");
+ while($n_lines % $Global::max_lines) {
+ $n_lines--;
+ $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
+ }
+ # Chop at $last_newline_pos as that is where n-line record ends
+ $anything_written +=
+ write_record_to_pipe($chunk_number++,\$header,\$buf,
+ $recstart,$recend,$last_newline_pos+1);
+ shorten(\$buf,$last_newline_pos+1);
+ } elsif($opt::regexp) {
+ if($Global::max_number_of_args) {
+ # -N => (start..*?end){n}
+ # -L -N => (start..*?end){n*l}
+ my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1);
+ while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) {
+ # Copy to modifiable variable
+ my $b = $1;
+ $anything_written +=
+ write_record_to_pipe($chunk_number++,\$header,\$b,
+ $recstart,$recend,length $1);
+ }
+ } else {
+ # Find the last recend-recstart in $buf
+ if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) {
+ # Copy to modifiable variable
+ my $b = $1;
+ $anything_written +=
+ write_record_to_pipe($chunk_number++,\$header,\$b,
+ $recstart,$recend,length $1);
+ }
+ }
+ } else {
+ if($Global::max_number_of_args) {
+ # -N => (start..*?end){n}
+ my $i = 0;
+ my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1);
+ while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) {
+ $i += length $recend; # find the actual splitting location
+ $anything_written +=
+ write_record_to_pipe($chunk_number++,\$header,\$buf,
+ $recstart,$recend,$i);
+ shorten(\$buf,$i);
+ }
+ } else {
+ # Find the last recend+recstart in $buf
+ my $i = rindex64(\$buf,$recendrecstart);
+ if($i != -1) {
+ $i += length $recend; # find the actual splitting location
+ $anything_written +=
+ write_record_to_pipe($chunk_number++,\$header,\$buf,
+ $recstart,$recend,$i);
+ shorten(\$buf,$i);
+ }
+ }
+ }
+ if(not $anything_written and not eof($in)) {
+ # Nothing was written - maybe the block size < record size?
+ # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
+ if($blocksize < $two_gb) {
+ my $old_blocksize = $blocksize;
+ $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
+ ::warning("A record was longer than $old_blocksize. " .
+ "Increasing to --blocksize $blocksize\n");
+ }
+ }
+ }
+ ::debug("init", "Done reading input\n");
+
+ # If there is anything left in the buffer write it
+ write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf);
+
+ $Global::start_no_new_jobs ||= 1;
+ if($opt::roundrobin) {
+ for my $job (values %Global::running) {
+ close $job->fh(0,"w");
+ }
+ my %incomplete_jobs = %Global::running;
+ my $sleep = 1;
+ while(keys %incomplete_jobs) {
+ my $something_written = 0;
+ for my $pid (keys %incomplete_jobs) {
+ my $job = $incomplete_jobs{$pid};
+ if($job->stdin_buffer_length()) {
+ $something_written += $job->non_block_write();
+ } else {
+ delete $incomplete_jobs{$pid}
+ }
+ }
+ if($something_written) {
+ $sleep = $sleep/2+0.001;
+ }
+ $sleep = ::reap_usleep($sleep);
+ }
+ }
+}
+
+sub recstartrecend {
+ # Uses:
+ # $opt::recstart
+ # $opt::recend
+ # Returns:
+ # $recstart,$recend with default values and regexp conversion
+ my($recstart,$recend);
+ if(defined($opt::recstart) and defined($opt::recend)) {
# If both --recstart and --recend is given then both must match
- $recstart = $::opt_recstart;
- $recend = $::opt_recend;
- $recerror = "parallel: Warning: --recend and --recstart unmatched. Is --blocksize too small?";
- } elsif(defined($::opt_recstart)) {
+ $recstart = $opt::recstart;
+ $recend = $opt::recend;
+ } elsif(defined($opt::recstart)) {
# If --recstart is given it must match start of record
- $recstart = $::opt_recstart;
+ $recstart = $opt::recstart;
$recend = "";
- $recerror = "parallel: Warning: --recstart unmatched. Is --blocksize too small?";
- } elsif(defined($::opt_recend)) {
+ } elsif(defined($opt::recend)) {
# If --recend is given then it must match end of record
$recstart = "";
- $recend = $::opt_recend;
- $recerror = "parallel: Warning: --recend unmatched. Is --blocksize too small?";
+ $recend = $opt::recend;
}
- if($::opt_regexp) {
+ if($opt::regexp) {
# If $recstart/$recend contains '|' this should only apply to the regexp
$recstart = "(?:".$recstart.")";
$recend = "(?:".$recend.")";
} else {
# $recstart/$recend = printf strings (\n)
- $recstart =~ s/\\([rnt'"\\])/"qq|\\$1|"/gee;
- $recend =~ s/\\([rnt'"\\])/"qq|\\$1|"/gee;
+ $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
+ $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
}
- my $recendrecstart = $recend.$recstart;
- # Force the while-loop once if everything was read by header reading
- my $force_one_time_through = 0;
- for my $in (@fhlist) {
- while(!$force_one_time_through++ or read($in,substr($buf,length $buf,0),$::opt_blocksize)) {
- # substr above = append to $buf
- if($::opt_r) {
- # Remove empty lines
- $buf=~s/^\s*\n//gm;
- if(length $buf == 0) {
- next;
- }
- }
- if($::opt_regexp) {
- if($Global::max_number_of_args) {
- # -N => (start..*?end){n}
- while($buf =~ s/((?:$recstart.*?$recend){$Global::max_number_of_args})($recstart.*)$/$2/os) {
- $record = $header.$1;
- ::debug("Read record -N: ".length($record)."\n");
- write_record_to_pipe(\$record,$recstart,$recend);
- }
- } else {
- # Find the last recend-recstart in $buf
- if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) {
- $record = $header.$1;
- ::debug("Matched record: ".length($record)."/".length($buf)."\n");
- write_record_to_pipe(\$record,$recstart,$recend);
- }
- }
- } else {
- if($Global::max_number_of_args) {
- # -N => (start..*?end){n}
- my $i = 0;
- while(($i = nindex(\$buf,$recendrecstart,$Global::max_number_of_args)) != -1) {
- $i += length $recend; # find the actual splitting location
- my $record = $header.substr($buf,0,$i);
- substr($buf,0,$i) = "";
- ::debug("Read record: ".length($record)."\n");
- write_record_to_pipe(\$record,$recstart,$recend);
- }
- } else {
- # Find the last recend-recstart in $buf
- my $i = rindex($buf,$recendrecstart);
- if($i != -1) {
- $i += length $recend; # find the actual splitting location
- my $record = $header.substr($buf,0,$i);
- substr($buf,0,$i) = "";
- # ::debug("Read record: ".length($record)."\n");
- write_record_to_pipe(\$record,$recstart,$recend);
- }
- }
- }
- }
-}
-
- # If there is anything left in the buffer write it
- substr($buf,0,0) = $header;
- write_record_to_pipe(\$buf,$recstart,$recend);
-
- ::debug("Done reading input\n");
- flush_and_close_pipes();
- ::debug("Done flushing to children\n");
- $Global::start_no_new_jobs = 1;
+ return ($recstart,$recend);
}
sub nindex {
# See if string is in buffer N times
# Returns:
# the position where the Nth copy is found
- my $buf_ref = shift;
- my $str = shift;
- my $n = shift;
+ my ($buf_ref, $str, $n) = @_;
my $i = 0;
+ my $two_gb = 2**31-1;
for(1..$n) {
- $i = index($$buf_ref,$str,$i+1);
+ $i = index64($buf_ref,$str,$i+1);
if($i == -1) { last }
}
return $i;
}
-sub flush_and_close_pipes {
- # Flush that that is cached to the open pipes
- # and close them.
- my $flush_done;
- my $sleep = 0.05;
- do {
- $flush_done = 1;
- # Make sure everything is written to the jobs
- for my $job (values %Global::running) {
- if($job->remaining()) {
- if($job->complete_write()) {
- # Some data was written - reset sleep timer
- $sleep = 0.05;
- }
- $flush_done = 0;
+{
+ my @robin_queue;
+
+ sub round_robin_write {
+ # Input:
+ # $header_ref = ref to $header string
+ # $block_ref = ref to $block to be written
+ # $recstart = record start string
+ # $recend = record end string
+ # $endpos = end position of $block
+ # Uses:
+ # %Global::running
+ # Returns:
+ # $something_written = amount of bytes written
+ my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_;
+ my $something_written = 0;
+ my $block_passed = 0;
+ my $sleep = 1;
+ while(not $block_passed) {
+ # Continue flushing existing buffers
+ # until one is empty and a new block is passed
+ # Make a queue to spread the blocks evenly
+ if(not @robin_queue) {
+ push @robin_queue, (sort { $a->seq() <=> $b->seq() }
+ values %Global::running);
}
+ while(my $job = shift @robin_queue) {
+ if($job->stdin_buffer_length() > 0) {
+ $something_written += $job->non_block_write();
+ } else {
+ $job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend);
+ $block_passed = 1;
+ $job->set_virgin(0);
+ $something_written += $job->non_block_write();
+ last;
+ }
+ }
+ $sleep = ::reap_usleep($sleep);
}
- $sleep = ::reap_usleep($sleep);
- } while (not $flush_done);
- for my $job (values %Global::running) {
- my $fh = $job->stdin();
- close $fh;
+ return $something_written;
}
}
-sub write_record_to_pipe {
- my $record_ref = shift;
- my $recstart = shift;
- my $recend = shift;
- if(length $$record_ref == 0) { return; }
- if($::opt_remove_rec_sep) {
- # Remove record separator
- $$record_ref =~ s/$recend$recstart//gos;
- $$record_ref =~ s/^$recstart//os;
- $$record_ref =~ s/$recend$//os;
+sub index64 {
+ # Do index on strings > 2GB.
+ # index in Perl < v5.22 does not work for > 2GB
+ # Input:
+ # as index except STR which must be passed as a reference
+ # Output:
+ # as index
+ my $ref = shift;
+ my $match = shift;
+ my $pos = shift || 0;
+ my $block_size = 2**31-1;
+ my $strlen = length($$ref);
+ # No point in doing extra work if we don't need to.
+ if($strlen < $block_size or $] > 5.022) {
+ return index($$ref, $match, $pos);
}
- # Keep the pipes hot, but if nothing happens sleep should back off
- my $sleep = 0.00001; # 0.00001 ms - better performance on highend
- write_record: while(1) {
- # Sorting according to sequence is necessary for -k to work
- for my $job (sort { $a->seq() <=> $b->seq() } values %Global::running) {
- ::debug("Looking at ",$job->seq(),"-",$job->remaining(),"-",$job->datawritten(),"\n");
- if($job->remaining()) {
- # Part of the job's last record has not finished being written
- if($job->complete_write()) {
- # Something got written - reset sleep timer
- $sleep = 0.00001;
- }
- } else {
- if($job->datawritten() > 0) {
- # There is no data remaining and we have written data before:
- # So this means we have completed writing a block.
- # close stdin
- # This will cause the job to finish and when it dies we will spawn another job
- my $fh = $job->stdin();
- close $fh;
- } else {
- $job->write($record_ref);
- # Something got written - reset sleep timer
- $sleep = 0.00001;
- last write_record;
- }
- }
- }
- # Maybe this should be in an if statement: if sleep > 0.001: start more
- start_more_jobs(); # These jobs may not be started because of loadavg
- $sleep = ::reap_usleep($sleep);
- }
- return;
+
+ my $matchlen = length($match);
+ my $ret;
+ my $offset = $pos;
+ while($offset < $strlen) {
+ $ret = index(
+ substr($$ref, $offset, $block_size),
+ $match, $pos-$offset);
+ if($ret != -1) {
+ return $ret + $offset;
+ }
+ $offset += ($block_size - $matchlen - 1);
+ }
+ return -1;
+}
+
+sub rindex64 {
+ # Do rindex on strings > 2GB.
+ # rindex in Perl < v5.22 does not work for > 2GB
+ # Input:
+ # as rindex except STR which must be passed as a reference
+ # Output:
+ # as rindex
+ my $ref = shift;
+ my $match = shift;
+ my $pos = shift;
+ my $block_size = 2**31-1;
+ my $strlen = length($$ref);
+ # Default: search from end
+ $pos = defined $pos ? $pos : $strlen;
+ # No point in doing extra work if we don't need to.
+ if($strlen < $block_size) {
+ return rindex($$ref, $match, $pos);
+ }
+
+ my $matchlen = length($match);
+ my $ret;
+ my $offset = $pos - $block_size + $matchlen;
+ if($offset < 0) {
+ # The offset is less than a $block_size
+ # Set the $offset to 0 and
+ # Adjust block_size accordingly
+ $block_size = $block_size + $offset;
+ $offset = 0;
+ }
+ while($offset >= 0) {
+ $ret = rindex(
+ substr($$ref, $offset, $block_size),
+ $match);
+ if($ret != -1) {
+ return $ret + $offset;
+ }
+ $offset -= ($block_size - $matchlen - 1);
+ }
+ return -1;
+}
+
+sub shorten {
+ # Do: substr($buf,0,$i) = "";
+ # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
+ # Input:
+ # $buf_ref = \$buf
+ # $i = position to shorten to
+ # Returns: N/A
+ my ($buf_ref, $i) = @_;
+ my $two_gb = 2**31-1;
+ while($i > $two_gb) {
+ substr($$buf_ref,0,$two_gb) = "";
+ $i -= $two_gb;
+ }
+ substr($$buf_ref,0,$i) = "";
+}
+
+sub write_record_to_pipe {
+ # Fork then
+ # Write record from pos 0 .. $endpos to pipe
+ # Input:
+ # $chunk_number = sequence number - to see if already run
+ # $header_ref = reference to header string to prepend
+ # $record_ref = reference to record to write
+ # $recstart = start string of record
+ # $recend = end string of record
+ # $endpos = position in $record_ref where record ends
+ # Uses:
+ # $Global::job_already_run
+ # $opt::roundrobin
+ # @Global::virgin_jobs
+ # Returns:
+ # Number of chunks written (0 or 1)
+ my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_;
+ if($endpos == 0) { return 0; }
+ if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
+ if($opt::roundrobin) {
+ return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos);
+ }
+ # If no virgin found, backoff
+ my $sleep = 0.0001; # 0.01 ms - better performance on highend
+ while(not @Global::virgin_jobs) {
+ ::debug("pipe", "No virgin jobs");
+ $sleep = ::reap_usleep($sleep);
+ # Jobs may not be started because of loadavg
+ # or too little time between each ssh login.
+ start_more_jobs();
+ }
+ my $job = shift @Global::virgin_jobs;
+ # Job is no longer virgin
+ $job->set_virgin(0);
+ # We ignore the removed rec_sep which is technically wrong.
+ $job->add_transfersize($endpos + length $$header_ref);
+ if(fork()) {
+ # Skip
+ } else {
+ # Chop of at $endpos as we do not know how many rec_sep will
+ # be removed.
+ substr($$record_ref,$endpos,length $$record_ref) = "";
+ # Remove rec_sep
+ if($opt::remove_rec_sep) {
+ Job::remove_rec_sep($record_ref,$recstart,$recend);
+ }
+ $job->write($header_ref);
+ $job->write($record_ref);
+ close $job->fh(0,"w");
+ exit(0);
+ }
+ close $job->fh(0,"w");
+ return 1;
}
sub __SEM_MODE__ {}
sub acquire_semaphore {
# Acquires semaphore. If needed: spawns to the background
+ # Uses:
+ # @Global::host
# Returns:
# The semaphore to be released when jobs is complete
$Global::host{':'} = SSHLogin->new(":");
my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running());
$sem->acquire();
- debug("run");
if($Semaphore::fg) {
# skip
} else {
- # If run in the background, the PID will change
- # therefore release and re-acquire the semaphore
- $sem->release();
if(fork()) {
exit(0);
} else {
- # child
- # Get a semaphore for this pid
- ::die_bug("Can't start a new session: $!") if setsid() == -1;
- $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running());
- $sem->acquire();
+ # If run in the background, the PID will change
+ $sem->pid_change();
}
}
return $sem;
@@ -401,123 +680,164 @@ sub acquire_semaphore {
sub __PARSE_OPTIONS__ {}
sub options_hash {
- # Returns a hash of the GetOptions config
+ # Returns:
+ # %hash = the GetOptions config
return
- ("debug|D" => \$::opt_D,
- "xargs" => \$::opt_xargs,
- "m" => \$::opt_m,
- "X" => \$::opt_X,
- "v" => \@::opt_v,
- "joblog=s" => \$::opt_joblog,
- "resume" => \$::opt_resume,
- "silent" => \$::opt_silent,
- #"silent-error|silenterror" => \$::opt_silent_error,
- "keep-order|keeporder|k" => \$::opt_k,
- "group" => \$::opt_group,
- "g" => \$::opt_retired,
- "ungroup|u" => \$::opt_u,
- "null|0" => \$::opt_0,
- "quote|q" => \$::opt_q,
- "I=s" => \$::opt_I,
- "extensionreplace|er=s" => \$::opt_U,
- "U=s" => \$::opt_retired,
- "basenamereplace|bnr=s" => \$::opt_basenamereplace,
- "dirnamereplace|dnr=s" => \$::opt_dirnamereplace,
- "basenameextensionreplace|bner=s" => \$::opt_basenameextensionreplace,
- "seqreplace=s" => \$::opt_seqreplace,
- "jobs|j=s" => \$::opt_P,
- "load=s" => \$::opt_load,
- "noswap" => \$::opt_noswap,
- "max-line-length-allowed" => \$::opt_max_line_length_allowed,
- "number-of-cpus" => \$::opt_number_of_cpus,
- "number-of-cores" => \$::opt_number_of_cores,
- "use-cpus-instead-of-cores" => \$::opt_use_cpus_instead_of_cores,
- "shellquote|shell_quote|shell-quote" => \$::opt_shellquote,
- "nice=i" => \$::opt_nice,
- "timeout=i" => \$::opt_timeout,
- "tag" => \$::opt_tag,
- "tagstring=s" => \$::opt_tagstring,
- "onall" => \$::opt_onall,
- "nonall" => \$::opt_nonall,
- "sshlogin|S=s" => \@::opt_sshlogin,
- "sshloginfile|slf=s" => \@::opt_sshloginfile,
- "controlmaster|M" => \$::opt_controlmaster,
- "return=s" => \@::opt_return,
- "trc=s" => \@::opt_trc,
- "transfer" => \$::opt_transfer,
- "cleanup" => \$::opt_cleanup,
- "basefile|bf=s" => \@::opt_basefile,
- "B=s" => \$::opt_retired,
- "workdir|wd=s" => \$::opt_workdir,
- "W=s" => \$::opt_retired,
- "tmpdir=s" => \$::opt_tmpdir,
- "tempdir=s" => \$::opt_tmpdir,
- "tty" => \$::opt_tty,
- "T" => \$::opt_retired,
- "halt-on-error|halt=i" => \$::opt_halt_on_error,
- "H=i" => \$::opt_retired,
- "retries=i" => \$::opt_retries,
- "dry-run|dryrun" => \$::opt_dryrun,
- "progress" => \$::opt_progress,
- "eta" => \$::opt_eta,
- "arg-sep|argsep=s" => \$::opt_arg_sep,
- "arg-file-sep|argfilesep=s" => \$::opt_arg_file_sep,
- "trim=s" => \$::opt_trim,
- "profile|J=s" => \@::opt_profile,
- "pipe|spreadstdin" => \$::opt_pipe,
- "recstart=s" => \$::opt_recstart,
- "recend=s" => \$::opt_recend,
- "regexp|regex" => \$::opt_regexp,
- "remove-rec-sep|removerecsep|rrs" => \$::opt_remove_rec_sep,
- "files|output-as-files|outputasfiles" => \$::opt_files,
- "block|block-size|blocksize=s" => \$::opt_blocksize,
- "tollef" => \$::opt_tollef,
- "gnu" => \$::opt_gnu,
- "xapply" => \$::opt_xapply,
- "bibtex" => \$::opt_bibtex,
+ ("debug|D=s" => \$opt::D,
+ "xargs" => \$opt::xargs,
+ "m" => \$opt::m,
+ "X" => \$opt::X,
+ "v" => \@opt::v,
+ "joblog=s" => \$opt::joblog,
+ "results|result|res=s" => \$opt::results,
+ "resume" => \$opt::resume,
+ "resume-failed|resumefailed" => \$opt::resume_failed,
+ "silent" => \$opt::silent,
+ "keep-order|keeporder|k" => \$opt::keeporder,
+ "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
+ "group" => \$opt::group,
+ "g" => \$opt::retired,
+ "ungroup|u" => \$opt::ungroup,
+ "linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer,
+ "tmux" => \$opt::tmux,
+ "null|0" => \$opt::0,
+ "quote|q" => \$opt::q,
+ # Replacement strings
+ "parens=s" => \$opt::parens,
+ "rpl=s" => \@opt::rpl,
+ "plus" => \$opt::plus,
+ "I=s" => \$opt::I,
+ "extensionreplace|er=s" => \$opt::U,
+ "U=s" => \$opt::retired,
+ "basenamereplace|bnr=s" => \$opt::basenamereplace,
+ "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
+ "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
+ "seqreplace=s" => \$opt::seqreplace,
+ "slotreplace=s" => \$opt::slotreplace,
+ "jobs|j=s" => \$opt::jobs,
+ "delay=f" => \$opt::delay,
+ "sshdelay=f" => \$opt::sshdelay,
+ "load=s" => \$opt::load,
+ "noswap" => \$opt::noswap,
+ "max-line-length-allowed" => \$opt::max_line_length_allowed,
+ "number-of-cpus" => \$opt::number_of_cpus,
+ "number-of-cores" => \$opt::number_of_cores,
+ "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
+ "shellquote|shell_quote|shell-quote" => \$opt::shellquote,
+ "nice=i" => \$opt::nice,
+ "tag" => \$opt::tag,
+ "tagstring|tag-string=s" => \$opt::tagstring,
+ "onall" => \$opt::onall,
+ "nonall" => \$opt::nonall,
+ "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
+ "sshlogin|S=s" => \@opt::sshlogin,
+ "sshloginfile|slf=s" => \@opt::sshloginfile,
+ "controlmaster|M" => \$opt::controlmaster,
+ "return=s" => \@opt::return,
+ "trc=s" => \@opt::trc,
+ "transfer" => \$opt::transfer,
+ "cleanup" => \$opt::cleanup,
+ "basefile|bf=s" => \@opt::basefile,
+ "B=s" => \$opt::retired,
+ "ctrlc|ctrl-c" => \$opt::ctrlc,
+ "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc,
+ "workdir|work-dir|wd=s" => \$opt::workdir,
+ "W=s" => \$opt::retired,
+ "tmpdir=s" => \$opt::tmpdir,
+ "tempdir=s" => \$opt::tmpdir,
+ "use-compress-program|compress-program=s" => \$opt::compress_program,
+ "use-decompress-program|decompress-program=s" => \$opt::decompress_program,
+ "compress" => \$opt::compress,
+ "tty" => \$opt::tty,
+ "T" => \$opt::retired,
+ "H=i" => \$opt::retired,
+ "dry-run|dryrun" => \$opt::dryrun,
+ "progress" => \$opt::progress,
+ "eta" => \$opt::eta,
+ "bar" => \$opt::bar,
+ "shuf" => \$opt::shuf,
+ "arg-sep|argsep=s" => \$opt::arg_sep,
+ "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
+ "trim=s" => \$opt::trim,
+ "env=s" => \@opt::env,
+ "recordenv|record-env" => \$opt::record_env,
+ "plain" => \$opt::plain,
+ "profile|J=s" => \@opt::profile,
+ "pipe|spreadstdin" => \$opt::pipe,
+ "robin|round-robin|roundrobin" => \$opt::roundrobin,
+ "recstart=s" => \$opt::recstart,
+ "recend=s" => \$opt::recend,
+ "regexp|regex" => \$opt::regexp,
+ "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
+ "files|output-as-files|outputasfiles" => \$opt::files,
+ "block|block-size|blocksize=s" => \$opt::blocksize,
+ "tollef" => \$opt::retired,
+ "gnu" => \$opt::ignored_option,
+ "xapply" => \$opt::xapply,
+ "bibtex" => \$opt::bibtex,
+ "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
+ # Termination and retries
+ "halt-on-error|halt=s" => \$opt::halt,
+ "memfree=s" => \$opt::memfree,
+ "retries=i" => \$opt::retries,
+ "timeout=s" => \$opt::timeout,
# xargs-compatibility - implemented, man, testsuite
- "max-procs|P=s" => \$::opt_P,
- "delimiter|d=s" => \$::opt_d,
- "max-chars|s=i" => \$::opt_s,
- "arg-file|a=s" => \@::opt_a,
- "no-run-if-empty|r" => \$::opt_r,
- "replace|i:s" => \$::opt_i,
- "E=s" => \$::opt_E,
- "eof|e:s" => \$::opt_E,
- "max-args|n=i" => \$::opt_n,
- "max-replace-args|N=i" => \$::opt_N,
- "colsep|col-sep|C=s" => \$::opt_colsep,
- "help|h" => \$::opt_help,
- "L=f" => \$::opt_L,
- "max-lines|l:f" => \$::opt_l,
- "interactive|p" => \$::opt_p,
- "verbose|t" => \$::opt_verbose,
- "version|V" => \$::opt_version,
- "minversion|min-version=i" => \$::opt_minversion,
- "show-limits|showlimits" => \$::opt_show_limits,
- "exit|x" => \$::opt_x,
+ "max-procs|P=s" => \$opt::jobs,
+ "delimiter|d=s" => \$opt::d,
+ "max-chars|s=i" => \$opt::max_chars,
+ "arg-file|a=s" => \@opt::a,
+ "no-run-if-empty|r" => \$opt::r,
+ "replace|i:s" => \$opt::i,
+ "E=s" => \$opt::eof,
+ "eof|e:s" => \$opt::eof,
+ "max-args|n=i" => \$opt::max_args,
+ "max-replace-args|N=i" => \$opt::max_replace_args,
+ "colsep|col-sep|C=s" => \$opt::colsep,
+ "help|h" => \$opt::help,
+ "L=f" => \$opt::L,
+ "max-lines|l:f" => \$opt::max_lines,
+ "interactive|p" => \$opt::p,
+ "verbose|t" => \$opt::verbose,
+ "version|V" => \$opt::version,
+ "minversion|min-version=i" => \$opt::minversion,
+ "show-limits|showlimits" => \$opt::show_limits,
+ "exit|x" => \$opt::x,
# Semaphore
- "semaphore" => \$::opt_semaphore,
- "semaphoretimeout=i" => \$::opt_semaphoretimeout,
- "semaphorename|id=s" => \$::opt_semaphorename,
- "fg" => \$::opt_fg,
- "bg" => \$::opt_bg,
- "wait" => \$::opt_wait,
+ "semaphore" => \$opt::semaphore,
+ "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
+ "semaphorename|id=s" => \$opt::semaphorename,
+ "fg" => \$opt::fg,
+ "bg" => \$opt::bg,
+ "wait" => \$opt::wait,
# Shebang #!/usr/bin/parallel --shebang
- "shebang|hashbang" => \$::opt_shebang,
- "Y" => \$::opt_retired,
- "skip-first-line" => \$::opt_skip_first_line,
- "header=s" => \$::opt_header,
+ "shebang|hashbang" => \$opt::shebang,
+ "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles,
+ "Y" => \$opt::retired,
+ "skip-first-line" => \$opt::skip_first_line,
+ "header=s" => \$opt::header,
+ "cat" => \$opt::cat,
+ "fifo" => \$opt::fifo,
+ "pipepart|pipe-part" => \$opt::pipepart,
+ "hgrp|hostgroup|hostgroups" => \$opt::hostgroups,
);
}
sub get_options_from_array {
# Run GetOptions on @array
+ # Input:
+ # $array_ref = ref to @ARGV to parse
+ # @keep_only = Keep only these options
+ # Uses:
+ # @ARGV
# Returns:
# true if parsing worked
# false if parsing failed
- # @array is changed
- my $array_ref = shift;
+ # @$array_ref is changed
+ my ($array_ref, @keep_only) = @_;
+ if(not @$array_ref) {
+ # Empty array: No need to look more at that
+ return 1;
+ }
# A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
# supported everywhere
my @save_argv;
@@ -526,342 +846,598 @@ sub get_options_from_array {
@save_argv = @::ARGV;
@::ARGV = @{$array_ref};
}
- my @retval = GetOptions(options_hash());
- if(not $this_is_ARGV) {
+ # If @keep_only set: Ignore all values except @keep_only
+ my %options = options_hash();
+ if(@keep_only) {
+ my (%keep,@dummy);
+ @keep{@keep_only} = @keep_only;
+ for my $k (grep { not $keep{$_} } keys %options) {
+ # Store the value of the option in @dummy
+ $options{$k} = \@dummy;
+ }
+ }
+ my $retval = GetOptions(%options);
+ if(not $this_is_ARGV) {
@{$array_ref} = @::ARGV;
@::ARGV = @save_argv;
}
- return @retval;
+ return $retval;
}
sub parse_options {
# Returns: N/A
- # Defaults:
- $Global::version = 20120503;
- $Global::progname = 'parallel';
- $Global::infinity = 2**31;
- $Global::debug = 0;
- $Global::verbose = 0;
- $Global::grouped = 1;
- $Global::keeporder = 0;
- $Global::quoting = 0;
- $Global::replace{'{}'} = '{}';
- $Global::replace{'{.}'} = '{.}';
- $Global::replace{'{/}'} = '{/}';
- $Global::replace{'{//}'} = '{//}';
- $Global::replace{'{/.}'} = '{/.}';
- $Global::replace{'{#}'} = '{#}';
- $/="\n";
- $Global::ignore_empty = 0;
- $Global::interactive = 0;
- $Global::stderr_verbose = 0;
- $Global::default_simultaneous_sshlogins = 9;
- $Global::exitstatus = 0;
- $Global::halt_on_error_exitstatus = 0;
- $Global::arg_sep = ":::";
- $Global::arg_file_sep = "::::";
- $Global::trim = 'n';
- $Global::max_jobs_running = 0;
- $Global::job_already_run = '';
-
+ init_globals();
@ARGV=read_options();
- if(defined $::opt_retired) {
- print STDERR "$Global::progname: -g has been retired. Use --group.\n";
- print STDERR "$Global::progname: -B has been retired. Use --bf.\n";
- print STDERR "$Global::progname: -T has been retired. Use --tty.\n";
- print STDERR "$Global::progname: -U has been retired. Use --er.\n";
- print STDERR "$Global::progname: -W has been retired. Use --wd.\n";
- print STDERR "$Global::progname: -Y has been retired. Use --shebang.\n";
- print STDERR "$Global::progname: -H has been retired. Use --halt.\n";
- ::wait_and_exit(255);
- }
- if(defined @::opt_v) { $Global::verbose = $#::opt_v+1; } # Convert -v -v to v=2
- $Global::debug = (defined $::opt_D);
- if(defined $::opt_X) { $Global::ContextReplace = 1; }
- if(defined $::opt_silent) { $Global::verbose = 0; }
- if(defined $::opt_k) { $Global::keeporder = 1; }
- if(defined $::opt_group) { $Global::grouped = 1; }
- if(defined $::opt_u) { $Global::grouped = 0; }
- if(defined $::opt_0) { $/ = "\0"; }
- if(defined $::opt_d) { my $e="sprintf \"$::opt_d\""; $/ = eval $e; }
- if(defined $::opt_p) { $Global::interactive = $::opt_p; }
- if(defined $::opt_q) { $Global::quoting = 1; }
- if(defined $::opt_r) { $Global::ignore_empty = 1; }
- if(defined $::opt_verbose) { $Global::stderr_verbose = 1; }
- if(defined $::opt_I) { $Global::replace{'{}'} = $::opt_I; }
- if(defined $::opt_U) { $Global::replace{'{.}'} = $::opt_U; }
- if(defined $::opt_i) {
- $Global::replace{'{}'} = $::opt_i eq "" ? "{}" : $::opt_i;
- }
- if(defined $::opt_basenamereplace) { $Global::replace{'{/}'} = $::opt_basenamereplace; }
- if(defined $::opt_dirnamereplace) { $Global::replace{'{//}'} = $::opt_dirnamereplace; }
- if(defined $::opt_basenameextensionreplace) {
- $Global::replace{'{/.}'} = $::opt_basenameextensionreplace;
- }
- if(defined $::opt_seqreplace) {
- $Global::replace{'{#}'} = $::opt_seqreplace;
- }
- if(defined $::opt_E) { $Global::end_of_file_string = $::opt_E; }
- if(defined $::opt_n) { $Global::max_number_of_args = $::opt_n; }
- if(defined $::opt_timeout) { $Global::timeoutq = TimeoutQueue->new($::opt_timeout); }
- if(defined $::opt_tmpdir) { $ENV{'TMPDIR'} = $::opt_tmpdir; }
- if(defined $::opt_help) { die_usage(); }
- if(defined $::opt_colsep) { $Global::trim = 'lr'; }
- if(defined $::opt_header) { $::opt_colsep = defined $::opt_colsep ? $::opt_colsep : "\t"; }
- if(defined $::opt_trim) { $Global::trim = $::opt_trim; }
- if(defined $::opt_arg_sep) { $Global::arg_sep = $::opt_arg_sep; }
- if(defined $::opt_arg_file_sep) { $Global::arg_file_sep = $::opt_arg_file_sep; }
- if(defined $::opt_number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); }
- if(defined $::opt_number_of_cores) {
+ # no-* overrides *
+ if($opt::nokeeporder) { $opt::keeporder = undef; }
+ if($opt::noctrlc) { $opt::ctrlc = undef; }
+
+ if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
+ $Global::debug = $opt::D;
+ $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh";
+ if(defined $opt::X) { $Global::ContextReplace = 1; }
+ if(defined $opt::silent) { $Global::verbose = 0; }
+ if(defined $opt::0) { $/ = "\0"; }
+ if(defined $opt::d) { $/ = unquote_printf($opt::d) }
+ if(defined $opt::tagstring) { $opt::tagstring = unquote_printf($opt::tagstring); }
+ if(defined $opt::p) { $Global::interactive = $opt::p; }
+ if(defined $opt::q) { $Global::quoting = 1; }
+ if(defined $opt::r) { $Global::ignore_empty = 1; }
+ if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
+ parse_replacement_string_options();
+ if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
+ if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; }
+ if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); }
+ if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
+ if(defined $opt::help) { die_usage(); }
+ if(defined $opt::colsep) { $Global::trim = 'lr'; }
+ if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; }
+ if(defined $opt::trim) { $Global::trim = $opt::trim; }
+ if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
+ if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; }
+ if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); }
+ if(defined $opt::number_of_cores) {
print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
}
- if(defined $::opt_max_line_length_allowed) {
+ if(defined $opt::max_line_length_allowed) {
print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
}
- if(defined $::opt_version) { version(); wait_and_exit(0); }
- if(defined $::opt_bibtex) { bibtex(); wait_and_exit(0); }
- if(defined $::opt_show_limits) { show_limits(); }
- if(defined @::opt_sshlogin) { @Global::sshlogin = @::opt_sshlogin; }
- if(defined @::opt_sshloginfile) { read_sshloginfiles(@::opt_sshloginfile); }
- if(defined @::opt_return) { push @Global::ret_files, @::opt_return; }
- if(not defined $::opt_recstart and
- not defined $::opt_recend) { $::opt_recend = "\n"; }
- if(not defined $::opt_blocksize) { $::opt_blocksize = "1M"; }
- $::opt_blocksize = multiply_binary_prefix($::opt_blocksize);
- if(defined $::opt_semaphore) { $Global::semaphore = 1; }
- if(defined $::opt_semaphoretimeout) { $Global::semaphore = 1; }
- if(defined $::opt_semaphorename) { $Global::semaphore = 1; }
- if(defined $::opt_fg) { $Global::semaphore = 1; }
- if(defined $::opt_bg) { $Global::semaphore = 1; }
- if(defined $::opt_wait) { $Global::semaphore = 1; }
- if(defined $::opt_minversion) {
+ if(defined $opt::version) { version(); wait_and_exit(0); }
+ if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); }
+ if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
+ if(defined $opt::show_limits) { show_limits(); }
+ if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
+ if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
+ if(@opt::return) { push @Global::ret_files, @opt::return; }
+ if(not defined $opt::recstart and
+ not defined $opt::recend) { $opt::recend = "\n"; }
+ if(not defined $opt::blocksize) { $opt::blocksize = "1M"; }
+ $opt::blocksize = multiply_binary_prefix($opt::blocksize);
+ if($opt::blocksize > 2**31-1) {
+ warning("--blocksize >= 2G causes problems. Using 2G-1\n");
+ $opt::blocksize = 2**31-1;
+ }
+ $opt::memfree = multiply_binary_prefix($opt::memfree);
+ if(defined $opt::controlmaster) { $opt::noctrlc = 1; }
+ if(defined $opt::halt and
+ $opt::halt =~ /%/) { $opt::halt /= 100; }
+ if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) {
+ ::error("--timeout must be seconds or percentage\n");
+ wait_and_exit(255);
+ }
+ if(defined $opt::minversion) {
print $Global::version,"\n";
- if($Global::version < $::opt_minversion) {
+ if($Global::version < $opt::minversion) {
wait_and_exit(255);
} else {
wait_and_exit(0);
}
}
- if(defined $::opt_nonall) {
- # Append a dummy empty argument
- push @ARGV, ":::", "";
+ if(not defined $opt::delay) {
+ # Set --delay to --sshdelay if not set
+ $opt::delay = $opt::sshdelay;
}
- if(defined $::opt_tty) {
+ if($opt::compress_program) {
+ $opt::compress = 1;
+ $opt::decompress_program ||= $opt::compress_program." -dc";
+ }
+ if($opt::compress) {
+ my ($compress, $decompress) = find_compression_program();
+ $opt::compress_program ||= $compress;
+ $opt::decompress_program ||= $decompress;
+ }
+ if(defined $opt::nonall) {
+ # Append a dummy empty argument
+ push @ARGV, $Global::arg_sep, "";
+ }
+ if(defined $opt::tty) {
# Defaults for --tty: -j1 -u
# Can be overridden with -jXXX -g
- if(not defined $::opt_P) {
- $::opt_P = 1;
+ if(not defined $opt::jobs) {
+ $opt::jobs = 1;
}
- if(not defined $::opt_group) {
- $Global::grouped = 0;
+ if(not defined $opt::group) {
+ $opt::ungroup = 1;
}
}
- if(defined @::opt_trc) {
- push @Global::ret_files, @::opt_trc;
- $::opt_transfer = 1;
- $::opt_cleanup = 1;
+ if(@opt::trc) {
+ push @Global::ret_files, @opt::trc;
+ $opt::transfer = 1;
+ $opt::cleanup = 1;
}
- if($::opt_tollef and not $::opt_gnu) {
- # Behave like tollef parallel (from moreutils)
- if(defined $::opt_l) {
- $::opt_load = $::opt_l;
- $::opt_l = undef;
- }
- if(not defined $::opt_arg_sep) {
- $Global::arg_sep = "--";
- }
- }
- if(defined $::opt_l) {
- if($::opt_l eq "-0") {
+ if(defined $opt::max_lines) {
+ if($opt::max_lines eq "-0") {
# -l -0 (swallowed -0)
- $::opt_l = 1;
- $::opt_0 = 1;
+ $opt::max_lines = 1;
+ $opt::0 = 1;
$/ = "\0";
- } elsif ($::opt_l == 0) {
+ } elsif ($opt::max_lines == 0) {
# If not given (or if 0 is given) => 1
- $::opt_l = 1;
+ $opt::max_lines = 1;
+ }
+ $Global::max_lines = $opt::max_lines;
+ if(not $opt::pipe) {
+ # --pipe -L means length of record - not max_number_of_args
+ $Global::max_number_of_args ||= $Global::max_lines;
}
- $Global::max_lines = $::opt_l;
- $Global::max_number_of_args ||= $Global::max_lines;
}
# Read more than one arg at a time (-L, -N)
- if(defined $::opt_L) {
- $Global::max_lines = $::opt_L;
- $Global::max_number_of_args ||= $Global::max_lines;
+ if(defined $opt::L) {
+ $Global::max_lines = $opt::L;
+ if(not $opt::pipe) {
+ # --pipe -L means length of record - not max_number_of_args
+ $Global::max_number_of_args ||= $Global::max_lines;
+ }
}
- if(defined $::opt_N) {
- $Global::max_number_of_args = $::opt_N;
+ if(defined $opt::max_replace_args) {
+ $Global::max_number_of_args = $opt::max_replace_args;
$Global::ContextReplace = 1;
}
- if((defined $::opt_L or defined $::opt_N)
+ if((defined $opt::L or defined $opt::max_replace_args)
and
- not ($::opt_xargs or $::opt_m)) {
+ not ($opt::xargs or $opt::m)) {
$Global::ContextReplace = 1;
}
-
- for (keys %Global::replace) {
- $Global::replace{$_} = ::maybe_quote($Global::replace{$_});
+ if(defined $opt::tag and not defined $opt::tagstring) {
+ # Default = {}
+ $opt::tagstring = $Global::parensleft.$Global::parensright;
}
- %Global::replace_rev = reverse %Global::replace;
- if(defined $::opt_tag and not defined $::opt_tagstring) {
- $::opt_tagstring = $Global::replace{'{}'};
+ if(defined $opt::pipepart and
+ (defined $opt::L or defined $opt::max_lines
+ or defined $opt::max_replace_args)) {
+ ::error("--pipepart is incompatible with --max-replace-args, ",
+ "--max-lines, and -L.\n");
+ wait_and_exit(255);
}
-
if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) {
# Deal with ::: and ::::
@ARGV=read_args_from_command_line();
}
+ parse_semaphore();
- # Semaphore defaults
- # Must be done before computing number of processes and max_line_length
- # because when running as a semaphore GNU Parallel does not read args
- $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
- if($Global::semaphore) {
- # A semaphore does not take input from neither stdin nor file
- @::opt_a = ("/dev/null");
- push(@Global::unget_argv, [Arg->new("")]);
- $Semaphore::timeout = $::opt_semaphoretimeout || 0;
- if(defined $::opt_semaphorename) {
- $Semaphore::name = $::opt_semaphorename;
- } else {
- $Semaphore::name = `tty`;
- chomp $Semaphore::name;
- }
- $Semaphore::fg = $::opt_fg;
- $Semaphore::wait = $::opt_wait;
- $Global::default_simultaneous_sshlogins = 1;
- if(not defined $::opt_P) {
- $::opt_P = 1;
- }
- if($Global::interactive and $::opt_bg) {
- print STDERR "$Global::progname: Jobs running in the ".
- "background cannot be interactive.\n";
+ if(defined $opt::eta) {
+ $opt::progress = $opt::eta;
+ }
+ if(defined $opt::bar) {
+ $opt::progress = $opt::bar;
+ }
+ if(defined $opt::retired) {
+ ::error("-g has been retired. Use --group.\n");
+ ::error("-B has been retired. Use --bf.\n");
+ ::error("-T has been retired. Use --tty.\n");
+ ::error("-U has been retired. Use --er.\n");
+ ::error("-W has been retired. Use --wd.\n");
+ ::error("-Y has been retired. Use --shebang.\n");
+ ::error("-H has been retired. Use --halt.\n");
+ ::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n");
::wait_and_exit(255);
- }
- }
- if(defined $::opt_eta) {
- $::opt_progress = $::opt_eta;
}
+ citation_notice();
parse_sshlogin();
+ parse_env_var();
- if(remote_hosts() and ($::opt_X or $::opt_m or $::opt_xargs)) {
+ if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
# As we do not know the max line length on the remote machine
# long commands generated by xargs may fail
- # If opt_N is set, it is probably safe
- print STDERR ("$Global::progname: Warning: using -X or -m ",
- "with --sshlogin may fail\n");
+ # If $opt::max_replace_args is set, it is probably safe
+ ::warning("Using -X or -m with --sshlogin may fail.\n");
}
- if(not defined $::opt_P) {
- $::opt_P = "100%";
+ if(not defined $opt::jobs) {
+ $opt::jobs = "100%";
}
open_joblog();
}
-sub open_joblog {
- my $append = 0;
- if($::opt_resume and not $::opt_joblog) {
- print STDERR ("$Global::progname: --resume requires --joblog\n");
- ::wait_and_exit(255);
+sub init_globals {
+ # Defaults:
+ $Global::version = 20150522;
+ $Global::progname = 'parallel';
+ $Global::infinity = 2**31;
+ $Global::debug = 0;
+ $Global::verbose = 0;
+ $Global::quoting = 0;
+ # Read only table with default --rpl values
+ %Global::replace =
+ (
+ '{}' => '',
+ '{#}' => '1 $_=$job->seq()',
+ '{%}' => '1 $_=$job->slot()',
+ '{/}' => 's:.*/::',
+ '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);',
+ '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
+ '{.}' => 's:\.[^/.]+$::',
+ );
+ %Global::plus =
+ (
+ # {} = {+/}/{/}
+ # = {.}.{+.} = {+/}/{/.}.{+.}
+ # = {..}.{+..} = {+/}/{/..}.{+..}
+ # = {...}.{+...} = {+/}/{/...}.{+...}
+ '{+/}' => 's:/[^/]*$::',
+ '{+.}' => 's:.*\.::',
+ '{+..}' => 's:.*\.([^.]*\.):$1:',
+ '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
+ '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
+ '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
+ '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
+ '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
+ );
+ # Modifiable copy of %Global::replace
+ %Global::rpl = %Global::replace;
+ $/ = "\n";
+ $Global::ignore_empty = 0;
+ $Global::interactive = 0;
+ $Global::stderr_verbose = 0;
+ $Global::default_simultaneous_sshlogins = 9;
+ $Global::exitstatus = 0;
+ $Global::halt_exitstatus = 0;
+ $Global::arg_sep = ":::";
+ $Global::arg_file_sep = "::::";
+ $Global::trim = 'n';
+ $Global::max_jobs_running = 0;
+ $Global::job_already_run = '';
+ $ENV{'TMPDIR'} ||= "/tmp";
+ if(not $ENV{HOME}) {
+ # $ENV{HOME} is sometimes not set if called from PHP
+ ::warning("\$HOME not set. Using /tmp\n");
+ $ENV{HOME} = "/tmp";
}
- if($::opt_joblog) {
- if($::opt_resume) {
- if(open(JOBLOG, $::opt_joblog)) {
- # Read the joblog
- $append = ; # If there is a header: Open as append later
- while() {
- if(/^(\d+)/) {
- # This is 30% faster than set_job_already_run($1);
- vec($Global::job_already_run,$1,1) = 1;
- } else {
- print STDERR ("$Global::progname: Format of '$::opt_joblog' is wrong\n");
- ::wait_and_exit(255);
- }
- }
- close JOBLOG;
- }
+}
+
+sub parse_replacement_string_options {
+ # Deal with --rpl
+ # Uses:
+ # %Global::rpl
+ # $Global::parensleft
+ # $Global::parensright
+ # $opt::parens
+ # $Global::parensleft
+ # $Global::parensright
+ # $opt::plus
+ # %Global::plus
+ # $opt::I
+ # $opt::U
+ # $opt::i
+ # $opt::basenamereplace
+ # $opt::dirnamereplace
+ # $opt::seqreplace
+ # $opt::slotreplace
+ # $opt::basenameextensionreplace
+
+ sub rpl {
+ # Modify %Global::rpl
+ # Replace $old with $new
+ my ($old,$new) = @_;
+ if($old ne $new) {
+ $Global::rpl{$new} = $Global::rpl{$old};
+ delete $Global::rpl{$old};
}
- if($append) {
- # Append to joblog
- if(not open($Global::joblog,">>$::opt_joblog")) {
- print STDERR ("$Global::progname: Cannot append to ",
- "--joblog $::opt_joblog\n");
- ::wait_and_exit(255);
- }
- } else {
- # Overwrite the joblog
- if(not open($Global::joblog,">$::opt_joblog")) {
- print STDERR ("$Global::progname: Cannot write to ",
- "--joblog $::opt_joblog\n");
- ::wait_and_exit(255);
- } else {
- print $Global::joblog
- join("\t", "Seq", "Host", "Starttime", "Runtime",
- "Send", "Receive", "Exitval", "Signal", "Command"
- ). "\n";
- }
+ }
+ my $parens = "{==}";
+ if(defined $opt::parens) { $parens = $opt::parens; }
+ my $parenslen = 0.5*length $parens;
+ $Global::parensleft = substr($parens,0,$parenslen);
+ $Global::parensright = substr($parens,$parenslen);
+ if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
+ if(defined $opt::I) { rpl('{}',$opt::I); }
+ if(defined $opt::U) { rpl('{.}',$opt::U); }
+ if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
+ if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
+ if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
+ if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
+ if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
+ if(defined $opt::basenameextensionreplace) {
+ rpl('{/.}',$opt::basenameextensionreplace);
+ }
+ for(@opt::rpl) {
+ # Create $Global::rpl entries for --rpl options
+ # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
+ my ($shorthand,$long) = split/ /,$_,2;
+ $Global::rpl{$shorthand} = $long;
+ }
+}
+
+sub parse_semaphore {
+ # Semaphore defaults
+ # Must be done before computing number of processes and max_line_length
+ # because when running as a semaphore GNU Parallel does not read args
+ # Uses:
+ # $opt::semaphore
+ # $Global::semaphore
+ # $opt::semaphoretimeout
+ # $Semaphore::timeout
+ # $opt::semaphorename
+ # $Semaphore::name
+ # $opt::fg
+ # $Semaphore::fg
+ # $opt::wait
+ # $Semaphore::wait
+ # $opt::bg
+ # @opt::a
+ # @Global::unget_argv
+ # $Global::default_simultaneous_sshlogins
+ # $opt::jobs
+ # $Global::interactive
+ $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
+ if(defined $opt::semaphore) { $Global::semaphore = 1; }
+ if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
+ if(defined $opt::semaphorename) { $Global::semaphore = 1; }
+ if(defined $opt::fg) { $Global::semaphore = 1; }
+ if(defined $opt::bg) { $Global::semaphore = 1; }
+ if(defined $opt::wait) { $Global::semaphore = 1; }
+ if($Global::semaphore) {
+ # A semaphore does not take input from neither stdin nor file
+ @opt::a = ("/dev/null");
+ push(@Global::unget_argv, [Arg->new("")]);
+ $Semaphore::timeout = $opt::semaphoretimeout || 0;
+ if(defined $opt::semaphorename) {
+ $Semaphore::name = $opt::semaphorename;
+ } else {
+ $Semaphore::name = `tty`;
+ chomp $Semaphore::name;
+ }
+ $Semaphore::fg = $opt::fg;
+ $Semaphore::wait = $opt::wait;
+ $Global::default_simultaneous_sshlogins = 1;
+ if(not defined $opt::jobs) {
+ $opt::jobs = 1;
+ }
+ if($Global::interactive and $opt::bg) {
+ ::error("Jobs running in the ".
+ "background cannot be interactive.\n");
+ ::wait_and_exit(255);
}
}
}
-sub read_options {
- # Read options from command line, profile and $PARALLEL
- # Returns:
- # @ARGV without --options
- # This must be done first as this may exec myself
- if(defined $ARGV[0] and ($ARGV[0]=~/^--shebang / or
- $ARGV[0]=~/^--hashbang /)) {
- # Program is called from #! line in script
- $ARGV[0]=~s/^--shebang *//; # remove --shebang if it is set
- $ARGV[0]=~s/^--hashbang *//; # remove --hashbang if it is set
- my $argfile = pop @ARGV;
- # exec myself to split $ARGV[0] into separate fields
- exec "$0 --skip-first-line -a $argfile @ARGV";
+sub record_env {
+ # Record current %ENV-keys in ~/.parallel/ignored_vars
+ # Returns: N/A
+ my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars";
+ if(open(my $vars_fh, ">", $ignore_filename)) {
+ print $vars_fh map { $_,"\n" } keys %ENV;
+ } else {
+ ::error("Cannot write to $ignore_filename.\n");
+ ::wait_and_exit(255);
}
+}
- Getopt::Long::Configure("bundling","pass_through");
- # Check if there is a --profile to set @::opt_profile
- GetOptions("profile|J=s" => \@::opt_profile) || die_usage();
- # Add options from .parallel/config and other profiles
- my @ARGV_profile = ();
- my @ARGV_env = ();
- my @config_profiles = (
- "/etc/parallel/config",
- $ENV{'HOME'}."/.parallel/config",
- $ENV{'HOME'}."/.parallelrc");
- my @profiles = @config_profiles;
- if(@::opt_profile) {
- # --profile overrides default profiles
- @profiles = ();
- for my $profile (@::opt_profile) {
- push @profiles, $ENV{'HOME'}."/.parallel/".$profile;
+sub parse_env_var {
+ # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen
+ #
+ # Bash functions must be parsed to export them remotely
+ # Pre-shellshock style bash function:
+ # myfunc=() {...
+ # Post-shellshock style bash function (v1):
+ # BASH_FUNC_myfunc()=() {...
+ # Post-shellshock style bash function (v2):
+ # BASH_FUNC_myfunc%%=() {...
+ #
+ # Uses:
+ # $Global::envvar = eval string that will set variables in both bash and csh
+ # $Global::envwarn = If functions are used: Give warning in csh
+ # $Global::envvarlen = length of $Global::envvar
+ # @opt::env
+ # $Global::shell
+ # %ENV
+ # Returns: N/A
+ $Global::envvar = "";
+
+ $Global::envvarlen = length $Global::envvar;
+}
+
+sub open_joblog {
+ # Open joblog as specified by --joblog
+ # Uses:
+ # $opt::resume
+ # $opt::resume_failed
+ # $opt::joblog
+ # $opt::results
+ # $Global::job_already_run
+ # %Global::fd
+ my $append = 0;
+ if(($opt::resume or $opt::resume_failed)
+ and
+ not ($opt::joblog or $opt::results)) {
+ ::error("--resume and --resume-failed require --joblog or --results.\n");
+ ::wait_and_exit(255);
+ }
+ if($opt::joblog) {
+ if($opt::resume || $opt::resume_failed) {
+ if(open(my $joblog_fh, "<", $opt::joblog)) {
+ # Read the joblog
+ $append = <$joblog_fh>; # If there is a header: Open as append later
+ my $joblog_regexp;
+ if($opt::resume_failed) {
+ # Make a regexp that only matches commands with exit+signal=0
+ # 4 host 1360490623.067 3.445 1023 1222 0 0 command
+ $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
+ } else {
+ # Just match the job number
+ $joblog_regexp='^(\d+)';
+ }
+ while(<$joblog_fh>) {
+ if(/$joblog_regexp/o) {
+ # This is 30% faster than set_job_already_run($1);
+ vec($Global::job_already_run,($1||0),1) = 1;
+ } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
+ ::error("Format of '$opt::joblog' is wrong: $_");
+ ::wait_and_exit(255);
+ }
+ }
+ close $joblog_fh;
+ }
+ }
+ if($append) {
+ # Append to joblog
+ if(not open($Global::joblog, ">>", $opt::joblog)) {
+ ::error("Cannot append to --joblog $opt::joblog.\n");
+ ::wait_and_exit(255);
+ }
+ } else {
+ if($opt::joblog eq "-") {
+ # Use STDOUT as joblog
+ $Global::joblog = $Global::fd{1};
+ } elsif(not open($Global::joblog, ">", $opt::joblog)) {
+ # Overwrite the joblog
+ ::error("Cannot write to --joblog $opt::joblog.\n");
+ ::wait_and_exit(255);
+ }
+ print $Global::joblog
+ join("\t", "Seq", "Host", "Starttime", "JobRuntime",
+ "Send", "Receive", "Exitval", "Signal", "Command"
+ ). "\n";
}
}
- for my $profile (@profiles) {
- if(-r $profile) {
- open (IN, "<", $profile) || ::die_bug("read-profile: $profile");
- while() {
- /^\s*\#/ and next;
- chomp;
- push @ARGV_profile, shell_unquote(split/(?'/dev/null , 1 2 3 , {1..3} , lz4 lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2
+ # sort -nk4 jl
+ my @prg = qw(lz4 pigz lzop plzip pbzip2 pxz gzip lzma xz bzip2 lzip);
+ for my $p (@prg) {
+ if(which($p)) {
+ return ("$p -c -1","$p -dc");
+ }
+ }
+ # Fall back to cat
+ return ("cat","cat");
+}
+
+
+sub read_options {
+ # Read options from command line, profile and $PARALLEL
+ # Uses:
+ # $opt::shebang_wrap
+ # $opt::shebang
+ # @ARGV
+ # $opt::plain
+ # @opt::profile
+ # $ENV{'HOME'}
+ # $ENV{'PARALLEL'}
+ # Returns:
+ # @ARGV_no_opt = @ARGV without --options
+
+ # This must be done first as this may exec myself
+ if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
+ $ARGV[0] =~ /^--shebang-?wrap/ or
+ $ARGV[0] =~ /^--hashbang/)) {
+ # Program is called from #! line in script
+ # remove --shebang-wrap if it is set
+ $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
+ # remove --shebang if it is set
+ $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
+ # remove --hashbang if it is set
+ $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
+ if($opt::shebang) {
+ my $argfile = shell_quote_scalar(pop @ARGV);
+ # exec myself to split $ARGV[0] into separate fields
+ exec "$0 --skip-first-line -a $argfile @ARGV";
+ }
+ if($opt::shebang_wrap) {
+ my @options;
+ my @parser;
+ if ($^O eq 'freebsd') {
+ # FreeBSD's #! puts different values in @ARGV than Linux' does.
+ my @nooptions = @ARGV;
+ get_options_from_array(\@nooptions);
+ while($#ARGV > $#nooptions) {
+ push @options, shift @ARGV;
+ }
+ while(@ARGV and $ARGV[0] ne ":::") {
+ push @parser, shift @ARGV;
+ }
+ if(@ARGV and $ARGV[0] eq ":::") {
+ shift @ARGV;
+ }
+ } else {
+ @options = shift @ARGV;
+ }
+ my $script = shell_quote_scalar(shift @ARGV);
+ # exec myself to split $ARGV[0] into separate fields
+ exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV";
+ }
+ }
+
+ Getopt::Long::Configure("bundling","require_order");
+ my @ARGV_copy = @ARGV;
+ # Check if there is a --profile to set @opt::profile
+ get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
+ my @ARGV_profile = ();
+ my @ARGV_env = ();
+ if(not $opt::plain) {
+ # Add options from .parallel/config and other profiles
+ my @config_profiles = (
+ "/etc/parallel/config",
+ $ENV{'HOME'}."/.parallel/config",
+ $ENV{'HOME'}."/.parallelrc");
+ my @profiles = @config_profiles;
+ if(@opt::profile) {
+ # --profile overrides default profiles
+ @profiles = ();
+ for my $profile (@opt::profile) {
+ if(-r $profile) {
+ push @profiles, $profile;
+ } else {
+ push @profiles, $ENV{'HOME'}."/.parallel/".$profile;
+ }
+ }
+ }
+ for my $profile (@profiles) {
+ if(-r $profile) {
+ open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile");
+ while(<$in_fh>) {
+ /^\s*\#/ and next;
+ chomp;
+ push @ARGV_profile, shellwords($_);
+ }
+ close $in_fh;
+ } else {
+ if(grep /^$profile$/, @config_profiles) {
+ # config file is not required to exist
+ } else {
+ ::error("$profile not readable.\n");
+ wait_and_exit(255);
+ }
+ }
+ }
+ # Add options from shell variable $PARALLEL
+ if($ENV{'PARALLEL'}) {
+ @ARGV_env = shellwords($ENV{'PARALLEL'});
+ }
}
Getopt::Long::Configure("bundling","require_order");
get_options_from_array(\@ARGV_profile) || die_usage();
- # Add options from shell variable $PARALLEL
- $ENV{'PARALLEL'} and @ARGV_env = shell_unquote(split/(? ".arg");
+ my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
unlink($name);
# Put args into argfile
print $outfh map { $_,$/ } @group;
seek $outfh, 0, 0;
+ exit_if_disk_full();
# Append filehandle to -a
- push @::opt_a, $outfh;
- } elsif($group eq $Global::arg_file_sep) {
- # Group of file names on the command line.
- # Append args into -a
- push @::opt_a, @group;
+ push @opt::a, $outfh;
} else {
::die_bug("Unknown command line group: $group");
}
@@ -933,103 +1520,175 @@ sub read_args_from_command_line {
sub cleanup {
# Returns: N/A
- if(@::opt_basefile) { cleanup_basefile(); }
+ unlink keys %Global::unlink;
+ map { rmdir $_ } keys %Global::unlink;
+ if(@opt::basefile) { cleanup_basefile(); }
}
sub __QUOTING_ARGUMENTS_FOR_SHELL__ {}
sub shell_quote {
+ # Input:
+ # @strings = strings to be quoted
+ # Output:
+ # @shell_quoted_strings = string quoted with \ as needed by the shell
my @strings = (@_);
for my $a (@strings) {
- $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'])/\\$1/g;
+ $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
$a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \'
}
return wantarray ? @strings : "@strings";
}
+sub shell_quote_empty {
+ # Inputs:
+ # @strings = strings to be quoted
+ # Returns:
+ # @quoted_strings = empty strings quoted as ''.
+ my @strings = shell_quote(@_);
+ for my $a (@strings) {
+ if($a eq "") {
+ $a = "''";
+ }
+ }
+ return wantarray ? @strings : "@strings";
+}
+
sub shell_quote_scalar {
# Quote the string so shell will not expand any special chars
+ # Inputs:
+ # $string = string to be quoted
# Returns:
- # string quoted with \ as needed by the shell
- my $a = shift;
- $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'])/\\$1/g;
- $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \'
+ # $shell_quoted = string quoted with \ as needed by the shell
+ my $a = $_[0];
+ if(defined $a) {
+ # Solaris sh wants ^ quoted.
+ # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
+ # This is 1% faster than the above
+ $a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go;
+ $a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \'
+ }
return $a;
}
-sub maybe_quote {
- # If $Global::quoting then quote the string so shell will not expand any special chars
- # Else do not quote
+sub shell_quote_file {
+ # Quote the string so shell will not expand any special chars and prepend ./ if needed
+ # Input:
+ # $filename = filename to be shell quoted
# Returns:
- # if $Global::quoting string quoted with \ as needed by the shell
- # else string unaltered
- if($Global::quoting) {
- return shell_quote_scalar(@_);
- } else {
- return "@_";
+ # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed
+ my $a = shell_quote_scalar(shift);
+ if(defined $a) {
+ if($a =~ m:^/: or $a =~ m:^\./:) {
+ # /abs/path or ./rel/path => skip
+ } else {
+ # rel/path => ./rel/path
+ $a = "./".$a;
+ }
}
+ return $a;
}
-sub maybe_unquote {
- # If $Global::quoting then unquote the string as shell would
- # Else do not unquote
+sub shellwords {
+ # Input:
+ # $string = shell line
# Returns:
- # if $Global::quoting string unquoted as done by the shell
- # else string unaltered
- if($Global::quoting) {
- return shell_unquote(@_);
- } else {
- return "@_";
- }
+ # @shell_words = $string split into words as shell would do
+ $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
+ return Text::ParseWords::shellwords(@_);
}
-sub shell_unquote {
- # Unquote strings from shell_quote
+sub perl_quote_scalar {
+ # Quote the string so perl's eval will not expand any special chars
+ # Inputs:
+ # $string = string to be quoted
# Returns:
- # string with shell quoting removed
- my @strings = (@_);
- my $arg;
- for $arg (@strings) {
- if(not defined $arg) {
- $arg = "";
- }
- $arg =~ s/'\n'/\n/g; # filenames with '\n' is quoted using \'
- $arg =~ s/\\([\002-\011\013-\032])/$1/g;
- $arg =~ s/\\([\#\?\`\(\)\{\}\*\>\<\~\|\; \"\!\$\&\'])/$1/g;
- $arg =~ s/\\\\/\\/g;
+ # $shell_quoted = string quoted with \ as needed by perl's eval
+ my $a = $_[0];
+ if(defined $a) {
+ $a =~ s/[\\\"\$\@]/\\$&/go;
}
- return wantarray ? @strings : "@strings";
+ return $a;
+}
+
+sub unquote_printf {
+ # Convert \t \n \r \000 \0
+ $_ = shift;
+ s/\\t/\t/g;
+ s/\\n/\n/g;
+ s/\\r/\r/g;
+ s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
+ s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
+ return $_;
}
sub __FILEHANDLES__ {}
+
+sub save_stdin_stdout_stderr {
+ # Remember the original STDIN, STDOUT and STDERR
+ # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
+ # Uses:
+ # %Global::fd
+ # $Global::original_stderr
+ # $Global::original_stdin
+ # Returns: N/A
+
+ # Find file descriptors that are already opened (by the shell)
+ for my $fdno (1..61) {
+ # /dev/fd/62 and above are used by bash for <(cmd)
+ my $fh;
+ # 2-argument-open is used to be compatible with old perl 5.8.0
+ # bug #43570: Perl 5.8.0 creates 61 files
+ if(open($fh,">&=$fdno")) {
+ $Global::fd{$fdno}=$fh;
+ }
+ }
+ open $Global::original_stderr, ">&", "STDERR" or
+ ::die_bug("Can't dup STDERR: $!");
+ open $Global::status_fd, ">&", "STDERR" or
+ ::die_bug("Can't dup STDERR: $!");
+ open $Global::original_stdin, "<&", "STDIN" or
+ ::die_bug("Can't dup STDIN: $!");
+}
+
sub enough_file_handles {
- # check that we have enough filehandles available for starting
+ # Check that we have enough filehandles available for starting
# another job
+ # Uses:
+ # $opt::ungroup
+ # %Global::fd
# Returns:
# 1 if ungrouped (thus not needing extra filehandles)
# 0 if too few filehandles
# 1 if enough filehandles
- if($Global::grouped) {
+ if(not $opt::ungroup) {
my %fh;
my $enough_filehandles = 1;
- # We need a filehandle for STDOUT and STDERR
- # perl uses 7 filehandles for something?
+ # perl uses 7 filehandles for something?
# open3 uses 2 extra filehandles temporarily
- for my $i (1..8) {
- $enough_filehandles &&= open($fh{$i},"&STDOUT" or
- ::die_bug("Can't dup STDOUT: $!");
- open $Global::original_stderr, ">&STDERR" or
- ::die_bug("Can't dup STDERR: $!");
- open $Global::original_stdin, "<&STDIN" or
- ::die_bug("Can't dup STDIN: $!");
$Global::total_running = 0;
$Global::total_started = 0;
$Global::tty_taken = 0;
$SIG{USR1} = \&list_running_jobs;
$SIG{USR2} = \&toggle_progress;
- if(@::opt_basefile) { setup_basefile(); }
+ if(@opt::basefile) { setup_basefile(); }
}
-sub start_more_jobs {
- # Returns:
- # number of jobs started
- my $jobs_started = 0;
- if(not $Global::start_no_new_jobs) {
- if($Global::max_procs_file) {
- my $mtime = (stat($Global::max_procs_file))[9];
- if($mtime > $Global::max_procs_file_last_mod) {
- $Global::max_procs_file_last_mod = $mtime;
- for my $sshlogin (values %Global::host) {
- $sshlogin->set_max_jobs_running(undef);
- }
- }
- }
- if($Global::max_load_file) {
- my $mtime = (stat($Global::max_load_file))[9];
- if($mtime > $Global::max_load_file_last_mod) {
- $Global::max_load_file_last_mod = $mtime;
- for my $sshlogin (values %Global::host) {
- $sshlogin->set_max_loadavg(undef);
- }
- }
- }
+{
+ my $last_time;
+ my %last_mtime;
+ my $max_procs_file_last_mod;
- for my $sshlogin (values %Global::host) {
- debug("Running jobs before on ".$sshlogin->string().": ".$sshlogin->jobs_running()."\n");
- if($::opt_load and $sshlogin->loadavg_too_high()) {
- # The load is too high or unknown
- next;
- }
- if($::opt_noswap and $sshlogin->swapping()) {
- # The server is swapping
- next;
- }
- while ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
- if($Global::JobQueue->empty() and not $::opt_pipe) {
- last;
- }
- debug($sshlogin->string()." has ".$sshlogin->jobs_running()
- . " out of " . $sshlogin->max_jobs_running()
- . " jobs running. Start another.\n");
- if(start_another_job($sshlogin) == 0) {
- # No more jobs to start on this $sshlogin
- debug("No jobs started on ".$sshlogin->string()."\n");
- last;
- }
- debug("Job started on ".$sshlogin->string()."\n");
- $sshlogin->inc_jobs_running();
- $jobs_started++;
- }
- debug("Running jobs after on ".$sshlogin->string().": ".$sshlogin->jobs_running()
- ." of ".$sshlogin->max_jobs_running() ."\n");
- }
- }
- return $jobs_started;
-}
-
-sub start_another_job {
- # Grab a job from Global::JobQueue, start it at sshlogin
- # and remember the pid, the STDOUT and the STDERR handles
- # Returns:
- # 1 if another jobs was started
- # 0 otherwise
- my $sshlogin = shift;
- # Do we have enough file handles to start another job?
- if(enough_file_handles()) {
- if($Global::JobQueue->empty() and not $::opt_pipe) {
- # No more commands to run
- debug("Not starting: JobQueue empty\n");
- return 0;
- } else {
- my $job;
- do {
- $job = get_job_with_sshlogin($sshlogin);
- if(not defined $job) {
- # No command available for that sshlogin
- debug("Not starting: no jobs available for ".$sshlogin->string()."\n");
- return 0;
- }
- } while ($job->is_already_in_joblog());
- debug("Command to run on '".$job->sshlogin()."': '".$job->replaced()."'\n");
- if($job->start()) {
- $Global::running{$job->pid()} = $job;
- debug("Started as seq ",$job->seq()," pid:",$job->pid(),"\n");
- return 1;
- } else {
- # If interactive says: Dont run the job, then skip it and run the next
- return start_another_job($sshlogin);
- }
- }
- } else {
- # No more file handles
- debug("Not starting: no more file handles\n");
- return 0;
- }
-}
-
-sub drain_job_queue {
- # Returns: N/A
- $Private::first_completed ||= time;
- if($::opt_progress) {
- print $Global::original_stderr init_progress();
- }
- my $last_header="";
- my $sleep = 0.2;
- do {
- while($Global::total_running > 0) {
- debug("jobs running: ", $Global::total_running, "==", scalar
- keys %Global::running," slots: ", $Global::max_jobs_running,
- " Memory usage:".my_memory_usage()." ");
- if($::opt_pipe) {
- # When using --pipe sometimes file handles are not closed properly
- for my $job (values %Global::running) {
- my $fh = $job->stdin();
- close $fh;
+ sub changed_procs_file {
+ # If --jobs is a file and it is modfied:
+ # Force recomputing of max_jobs_running for each $sshlogin
+ # Uses:
+ # $Global::max_procs_file
+ # %Global::host
+ # Returns: N/A
+ if($Global::max_procs_file) {
+ # --jobs filename
+ my $mtime = (stat($Global::max_procs_file))[9];
+ $max_procs_file_last_mod ||= 0;
+ if($mtime > $max_procs_file_last_mod) {
+ # file changed: Force re-computing max_jobs_running
+ $max_procs_file_last_mod = $mtime;
+ for my $sshlogin (values %Global::host) {
+ $sshlogin->set_max_jobs_running(undef);
}
}
- if($::opt_progress) {
- my %progress = progress();
- if($last_header ne $progress{'header'}) {
- print $Global::original_stderr "\n",$progress{'header'},"\n";
- $last_header = $progress{'header'};
- }
- print $Global::original_stderr "\r",$progress{'status'};
- }
- # Sometimes SIGCHLD is not registered, so force reaper
- $sleep = ::reap_usleep($sleep);
- }
- if(not $Global::JobQueue->empty()) {
- start_more_jobs(); # These jobs may not be started because of loadavg
- $sleep = ::reap_usleep($sleep);
- }
- } while ($Global::total_running > 0
- or
- not $Global::start_no_new_jobs and not $Global::JobQueue->empty());
+ }
+ }
- if($::opt_progress) {
- print $Global::original_stderr "\n";
+ sub changed_sshloginfile {
+ # If --slf is changed:
+ # reload --slf
+ # filter_hosts
+ # setup_basefile
+ # Uses:
+ # @opt::sshloginfile
+ # @Global::sshlogin
+ # %Global::host
+ # $opt::filter_hosts
+ # Returns: N/A
+ if(@opt::sshloginfile) {
+ # Is --sshloginfile changed?
+ for my $slf (@opt::sshloginfile) {
+ my $actual_file = expand_slf_shorthand($slf);
+ my $mtime = (stat($actual_file))[9];
+ $last_mtime{$actual_file} ||= $mtime;
+ if($mtime - $last_mtime{$actual_file} > 1) {
+ ::debug("run","--sshloginfile $actual_file changed. reload\n");
+ $last_mtime{$actual_file} = $mtime;
+ # Reload $slf
+ # Empty sshlogins
+ @Global::sshlogin = ();
+ for (values %Global::host) {
+ # Don't start new jobs on any host
+ # except the ones added back later
+ $_->set_max_jobs_running(0);
+ }
+ # This will set max_jobs_running on the SSHlogins
+ read_sshloginfile($actual_file);
+ parse_sshlogin();
+ $opt::filter_hosts and filter_hosts();
+ setup_basefile();
+ }
+ }
+ }
+ }
+
+ sub start_more_jobs {
+ # Run start_another_job() but only if:
+ # * not $Global::start_no_new_jobs set
+ # * not JobQueue is empty
+ # * not load on server is too high
+ # * not server swapping
+ # * not too short time since last remote login
+ # Uses:
+ # %Global::host
+ # $Global::start_no_new_jobs
+ # $Global::JobQueue
+ # $opt::pipe
+ # $opt::load
+ # $opt::noswap
+ # $opt::delay
+ # $Global::newest_starttime
+ # Returns:
+ # $jobs_started = number of jobs started
+ my $jobs_started = 0;
+ my $jobs_started_this_round = 0;
+ if($Global::start_no_new_jobs) {
+ return $jobs_started;
+ }
+ if(time - ($last_time||0) > 1) {
+ # At most do this every second
+ $last_time = time;
+ changed_procs_file();
+ changed_sshloginfile();
+ }
+ do {
+ $jobs_started_this_round = 0;
+ # This will start 1 job on each --sshlogin (if possible)
+ # thus distribute the jobs on the --sshlogins round robin
+ for my $sshlogin (values %Global::host) {
+ if($Global::JobQueue->empty() and not $opt::pipe) {
+ # No more jobs in the queue
+ last;
+ }
+ debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
+ $sshlogin->jobs_running(), "\n");
+ if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
+ if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) {
+ # It has been too short since last start
+ next;
+ }
+ if($opt::load and $sshlogin->loadavg_too_high()) {
+ # The load is too high or unknown
+ next;
+ }
+ if($opt::noswap and $sshlogin->swapping()) {
+ # The server is swapping
+ next;
+ }
+ if($opt::memfree and $sshlogin->memfree() < $opt::memfree) {
+ # The server has not enough mem free
+ ::debug("mem", "Not starting job: not enough mem\n");
+ next;
+ }
+ if($sshlogin->too_fast_remote_login()) {
+ # It has been too short since
+ next;
+ }
+ debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(),
+ " out of ", $sshlogin->max_jobs_running(),
+ " jobs running. Start another.\n");
+ if(start_another_job($sshlogin) == 0) {
+ # No more jobs to start on this $sshlogin
+ debug("run","No jobs started on ", $sshlogin->string(), "\n");
+ next;
+ }
+ $sshlogin->inc_jobs_running();
+ $sshlogin->set_last_login_at(::now());
+ $jobs_started++;
+ $jobs_started_this_round++;
+ }
+ debug("run","Running jobs after on ", $sshlogin->string(), ": ",
+ $sshlogin->jobs_running(), " of ",
+ $sshlogin->max_jobs_running(), "\n");
+ }
+ } while($jobs_started_this_round);
+
+ return $jobs_started;
}
}
-sub toggle_progress {
- # Turn on/off progress view
- # Returns: N/A
- $::opt_progress = not $::opt_progress;
- if($::opt_progress) {
- print $Global::original_stderr init_progress();
+{
+ my $no_more_file_handles_warned;
+
+ sub start_another_job {
+ # If there are enough filehandles
+ # and JobQueue not empty
+ # and not $job is in joblog
+ # Then grab a job from Global::JobQueue,
+ # start it at sshlogin
+ # mark it as virgin_job
+ # Inputs:
+ # $sshlogin = the SSHLogin to start the job on
+ # Uses:
+ # $Global::JobQueue
+ # $opt::pipe
+ # $opt::results
+ # $opt::resume
+ # @Global::virgin_jobs
+ # Returns:
+ # 1 if another jobs was started
+ # 0 otherwise
+ my $sshlogin = shift;
+ # Do we have enough file handles to start another job?
+ if(enough_file_handles()) {
+ if($Global::JobQueue->empty() and not $opt::pipe) {
+ # No more commands to run
+ debug("start", "Not starting: JobQueue empty\n");
+ return 0;
+ } else {
+ my $job;
+ # Skip jobs already in job log
+ # Skip jobs already in results
+ do {
+ $job = get_job_with_sshlogin($sshlogin);
+ if(not defined $job) {
+ # No command available for that sshlogin
+ debug("start", "Not starting: no jobs available for ",
+ $sshlogin->string(), "\n");
+ return 0;
+ }
+ } while ($job->is_already_in_joblog()
+ or
+ ($opt::results and $opt::resume and $job->is_already_in_results()));
+ debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
+ $job->replaced(),"'\n");
+ if($job->start()) {
+ if($opt::pipe) {
+ push(@Global::virgin_jobs,$job);
+ }
+ debug("start", "Started as seq ", $job->seq(),
+ " pid:", $job->pid(), "\n");
+ return 1;
+ } else {
+ # Not enough processes to run the job.
+ # Put it back on the queue.
+ $Global::JobQueue->unget($job);
+ # Count down the number of jobs to run for this SSHLogin.
+ my $max = $sshlogin->max_jobs_running();
+ if($max > 1) { $max--; } else {
+ ::error("No more processes: cannot run a single job. Something is wrong.\n");
+ ::wait_and_exit(255);
+ }
+ $sshlogin->set_max_jobs_running($max);
+ # Sleep up to 300 ms to give other processes time to die
+ ::usleep(rand()*300);
+ ::warning("No more processes: ",
+ "Decreasing number of running jobs to $max. ",
+ "Raising ulimit -u or /etc/security/limits.conf may help.\n");
+ return 0;
+ }
+ }
+ } else {
+ # No more file handles
+ $no_more_file_handles_warned++ or
+ ::warning("No more file handles. ",
+ "Raising ulimit -n or /etc/security/limits.conf may help.\n");
+ return 0;
+ }
}
}
sub init_progress {
+ # Uses:
+ # $opt::bar
# Returns:
# list of computers for progress output
$|=1;
+ if($opt::bar) {
+ return("","");
+ }
my %progress = progress();
return ("\nComputers / CPU cores / Max jobs to run\n",
$progress{'workerlist'});
}
+sub drain_job_queue {
+ # Uses:
+ # $opt::progress
+ # $Global::total_running
+ # $Global::max_jobs_running
+ # %Global::running
+ # $Global::JobQueue
+ # %Global::host
+ # $Global::start_no_new_jobs
+ # Returns: N/A
+ if($opt::progress) {
+ ::status(init_progress());
+ }
+ my $last_header = "";
+ my $sleep = 0.2;
+ do {
+ while($Global::total_running > 0) {
+ debug($Global::total_running, "==", scalar
+ keys %Global::running," slots: ", $Global::max_jobs_running);
+ if($opt::pipe) {
+ # When using --pipe sometimes file handles are not closed properly
+ for my $job (values %Global::running) {
+ close $job->fh(0,"w");
+ }
+ }
+ if($opt::progress) {
+ my %progress = progress();
+ if($last_header ne $progress{'header'}) {
+ ::status("\n", $progress{'header'}, "\n");
+ $last_header = $progress{'header'};
+ }
+ ::status("\r",$progress{'status'});
+ }
+ if($Global::total_running < $Global::max_jobs_running
+ and not $Global::JobQueue->empty()) {
+ # These jobs may not be started because of loadavg
+ # or too little time between each ssh login.
+ if(start_more_jobs() > 0) {
+ # Exponential back-on if jobs were started
+ $sleep = $sleep/2+0.001;
+ }
+ }
+ # Exponential back-off sleeping
+ $sleep = ::reap_usleep($sleep);
+ }
+ if(not $Global::JobQueue->empty()) {
+ # These jobs may not be started:
+ # * because there the --filter-hosts has removed all
+ if(not %Global::host) {
+ ::error("There are no hosts left to run on.\n");
+ ::wait_and_exit(255);
+ }
+ # * because of loadavg
+ # * because of too little time between each ssh login.
+ start_more_jobs();
+ $sleep = ::reap_usleep($sleep);
+ if($Global::max_jobs_running == 0) {
+ ::warning("There are no job slots available. Increase --jobs.\n");
+ }
+ }
+ } while ($Global::total_running > 0
+ or
+ not $Global::start_no_new_jobs and not $Global::JobQueue->empty());
+ if($opt::progress) {
+ my %progress = progress();
+ ::status("\r", $progress{'status'}, "\n");
+ }
+}
+
+sub toggle_progress {
+ # Turn on/off progress view
+ # Uses:
+ # $opt::progress
+ # Returns: N/A
+ $opt::progress = not $opt::progress;
+ if($opt::progress) {
+ ::status(init_progress());
+ }
+}
+
sub progress {
+ # Uses:
+ # $opt::bar
+ # $opt::eta
+ # %Global::host
+ # $Global::total_started
# Returns:
- # list of workers
- # header that will fit on the screen
- # status message that will fit on the screen
+ # $workerlist = list of workers
+ # $header = that will fit on the screen
+ # $status = message that will fit on the screen
+ if($opt::bar) {
+ return ("workerlist" => "", "header" => "", "status" => bar());
+ }
+ my $eta = "";
+ my ($status,$header)=("","");
+ if($opt::eta) {
+ my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
+ compute_eta();
+ $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
+ $this_eta, $left, $avgtime);
+ }
my $termcols = terminal_columns();
- my ($status, $header) = ("x"x($termcols+1),"");
my @workers = sort keys %Global::host;
my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers;
my $workerno = 1;
@@ -1252,34 +2101,8 @@ sub progress {
($Global::host{$w}->ncpus() || "-")." / ".
$Global::host{$w}->max_jobs_running()."\n";
}
- my $eta = "";
- if($::opt_eta) {
- my $completed = 0;
- for(@workers) { $completed += $Global::host{$_}->jobs_completed() }
- if($completed) {
- my $total = $Global::JobQueue->total_jobs();
- my $left = $total - $completed;
- my $pctcomplete = $completed / $total;
- my $timepassed = (time - $Private::first_completed);
- my $avgtime = $timepassed / $completed;
- $Private::smoothed_avg_time ||= $avgtime;
- # Smooth the eta so it does not jump wildly
- $Private::smoothed_avg_time = (1 - $pctcomplete) *
- $Private::smoothed_avg_time + $pctcomplete * $avgtime;
- my $this_eta;
- $Private::last_time ||= $timepassed;
- if($timepassed != $Private::last_time
- or not defined $Private::last_eta) {
- $Private::last_time = $timepassed;
- $this_eta = $left * $Private::smoothed_avg_time;
- $Private::last_eta = $this_eta;
- } else {
- $this_eta = $Private::last_eta;
- }
- $eta = sprintf("ETA: %ds %dleft %.2favg ", $this_eta, $left, $avgtime);
- }
- }
-
+ $status = "x"x($termcols+1);
+ # Select an output format that will fit on a single line
if(length $status > $termcols) {
# sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
$header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
@@ -1393,41 +2216,141 @@ sub progress {
return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
}
-sub terminal_columns {
- # Get the number of columns of the display
- # Returns:
- # number of columns of the screen
- if(not $Private::columns) {
- $Private::columns = $ENV{'COLUMNS'};
- if(not $Private::columns) {
- my $resize = qx{ resize 2>/dev/null };
- $resize =~ /COLUMNS=(\d+);/ and do { $Private::columns = $1; };
- }
- $Private::columns ||= 80;
+{
+ my ($total, $first_completed, $smoothed_avg_time);
+
+ sub compute_eta {
+ # Calculate important numbers for ETA
+ # Returns:
+ # $total = number of jobs in total
+ # $completed = number of jobs completed
+ # $left = number of jobs left
+ # $pctcomplete = percent of jobs completed
+ # $avgtime = averaged time
+ # $eta = smoothed eta
+ $total ||= $Global::JobQueue->total_jobs();
+ my $completed = 0;
+ for(values %Global::host) { $completed += $_->jobs_completed() }
+ my $left = $total - $completed;
+ if(not $completed) {
+ return($total, $completed, $left, 0, 0, 0);
+ }
+ my $pctcomplete = $completed / $total;
+ $first_completed ||= time;
+ my $timepassed = (time - $first_completed);
+ my $avgtime = $timepassed / $completed;
+ $smoothed_avg_time ||= $avgtime;
+ # Smooth the eta so it does not jump wildly
+ $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
+ $pctcomplete * $avgtime;
+ my $eta = int($left * $smoothed_avg_time);
+ return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
+ }
+}
+
+{
+ my ($rev,$reset);
+
+ sub bar {
+ # Return:
+ # $status = bar with eta, completed jobs, arg and pct
+ $rev ||= "\033[7m";
+ $reset ||= "\033[0m";
+ my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
+ compute_eta();
+ my $arg = $Global::newest_job ?
+ $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : "";
+ # These chars mess up display in the terminal
+ $arg =~ tr/[\011-\016\033\302-\365]//d;
+ my $bar_text =
+ sprintf("%d%% %d:%d=%ds %s",
+ $pctcomplete*100, $completed, $left, $eta, $arg);
+ my $terminal_width = terminal_columns();
+ my $s = sprintf("%-${terminal_width}s",
+ substr($bar_text." "x$terminal_width,
+ 0,$terminal_width));
+ my $width = int($terminal_width * $pctcomplete);
+ substr($s,$width,0) = $reset;
+ my $zenity = sprintf("%-${terminal_width}s",
+ substr("# $eta sec $arg",
+ 0,$terminal_width));
+ $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
+ "\r" . $rev . $s . $reset;
+ return $s;
+ }
+}
+
+{
+ my ($columns,$last_column_time);
+
+ sub terminal_columns {
+ # Get the number of columns of the terminal.
+ # Only update once per second.
+ # Returns:
+ # number of columns of the screen
+ if(not $columns or $last_column_time < time) {
+ $last_column_time = time;
+ $columns = $ENV{'COLUMNS'};
+ if(not $columns) {
+ my $stty = qx{ stty -a /dev/null' };
+ $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
+ }
+ $columns ||= 80;
+ }
+ return $columns;
}
- return $Private::columns;
}
sub get_job_with_sshlogin {
+ # Input:
+ # $sshlogin = which host should the job be run on?
+ # Uses:
+ # $opt::hostgroups
+ # $Global::JobQueue
# Returns:
- # next command to run with ssh command wrapping if remote
- # next command to run with no wrapping (clean_command)
+ # $job = next job object for $sshlogin if any available
my $sshlogin = shift;
+ my $job;
- if($::oodebug and $Global::JobQueue->empty()) {
- Carp::confess("get_job_with_sshlogin should never be called if empty");
+ if ($opt::hostgroups) {
+ my @other_hostgroup_jobs = ();
+
+ while($job = $Global::JobQueue->get()) {
+ if($sshlogin->in_hostgroups($job->hostgroups())) {
+ # Found a job to be run on a hostgroup of this
+ # $sshlogin
+ last;
+ } else {
+ # This job was not in the hostgroups of $sshlogin
+ push @other_hostgroup_jobs, $job;
+ }
+ }
+ $Global::JobQueue->unget(@other_hostgroup_jobs);
+ if(not defined $job) {
+ # No more jobs
+ return undef;
+ }
+ } else {
+ $job = $Global::JobQueue->get();
+ if(not defined $job) {
+ # No more jobs
+ ::debug("start", "No more jobs: JobQueue empty\n");
+ return undef;
+ }
}
- my $job = $Global::JobQueue->get();
- if(not defined $job) {
- # No more jobs
- ::debug("No more jobs: JobQueue empty\n");
- return undef;
- }
-
- if($::oodebug and not defined $job->{'commandline'}) {
- Carp::confess("get_job_with_sshlogin job->commandline should never be empty");
- }
my $clean_command = $job->replaced();
if($clean_command =~ /^\s*$/) {
# Do not run empty lines
@@ -1438,13 +2361,13 @@ sub get_job_with_sshlogin {
}
}
$job->set_sshlogin($sshlogin);
- if($::opt_retries and $clean_command and
+ if($opt::retries and $clean_command and
$job->failed_here()) {
# This command with these args failed for this sshlogin
my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
- #::my_dump(($no_of_failed_sshlogins,$min_failures));
- if($no_of_failed_sshlogins == keys %Global::host and
- $job->failed_here() == $min_failures) {
+ # Only look at the Global::host that have > 0 jobslots
+ if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host
+ and $job->failed_here() == $min_failures) {
# It failed the same or more times on another host:
# run it on this host
} else {
@@ -1467,80 +2390,154 @@ sub get_job_with_sshlogin {
sub __REMOTE_SSH__ {}
sub read_sshloginfiles {
+ # Read a list of --slf's
+ # Input:
+ # @files = files or symbolic file names to read
# Returns: N/A
- for (@_) {
- read_sshloginfile($_);
+ for my $s (@_) {
+ read_sshloginfile(expand_slf_shorthand($s));
}
}
+sub expand_slf_shorthand {
+ # Expand --slf shorthand into a read file name
+ # Input:
+ # $file = file or symbolic file name to read
+ # Returns:
+ # $file = actual file name to read
+ my $file = shift;
+ if($file eq "-") {
+ # skip: It is stdin
+ } elsif($file eq "..") {
+ $file = $ENV{'HOME'}."/.parallel/sshloginfile";
+ } elsif($file eq ".") {
+ $file = "/etc/parallel/sshloginfile";
+ } elsif(not -r $file) {
+ if(not -r $ENV{'HOME'}."/.parallel/".$file) {
+ # Try prepending ~/.parallel
+ ::error("Cannot open $file.\n");
+ ::wait_and_exit(255);
+ } else {
+ $file = $ENV{'HOME'}."/.parallel/".$file;
+ }
+ }
+ return $file;
+}
+
sub read_sshloginfile {
+ # Read sshloginfile into @Global::sshlogin
+ # Input:
+ # $file = file to read
+ # Uses:
+ # @Global::sshlogin
# Returns: N/A
my $file = shift;
my $close = 1;
- if($file eq "..") {
- $file = $ENV{'HOME'}."/.parallel/sshloginfile";
- }
- if($file eq ".") {
- $file = "/etc/parallel/sshloginfile";
- }
+ my $in_fh;
+ ::debug("init","--slf ",$file);
if($file eq "-") {
- *IN = *STDIN;
+ $in_fh = *STDIN;
$close = 0;
} else {
- if(not open(IN, $file)) {
- print $Global::original_stderr "Cannot open $file\n";
- exit(255);
+ if(not open($in_fh, "<", $file)) {
+ # Try the filename
+ ::error("Cannot open $file.\n");
+ ::wait_and_exit(255);
}
}
- while() {
+ while(<$in_fh>) {
chomp;
/^\s*#/ and next;
/^\s*$/ and next;
push @Global::sshlogin, $_;
}
if($close) {
- close IN;
+ close $in_fh;
}
}
sub parse_sshlogin {
+ # Parse @Global::sshlogin into %Global::host.
+ # Keep only hosts that are in one of the given ssh hostgroups.
+ # Uses:
+ # @Global::sshlogin
+ # $Global::minimal_command_line_length
+ # %Global::host
+ # $opt::transfer
+ # @opt::return
+ # $opt::cleanup
+ # @opt::basefile
+ # @opt::trc
# Returns: N/A
my @login;
if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
for my $sshlogin (@Global::sshlogin) {
# Split up -S sshlogin,sshlogin
- for my $s (split /,/, $sshlogin) {
+ for my $s (split /,|\n/, $sshlogin) {
if ($s eq ".." or $s eq "-") {
- read_sshloginfile($s);
+ # This may add to @Global::sshlogin - possibly bug
+ read_sshloginfile(expand_slf_shorthand($s));
} else {
+ $s =~ s/\s*$//;
push (@login, $s);
}
}
}
- for my $sshlogin_string (@login) {
- my $sshlogin = SSHLogin->new($sshlogin_string);
- $sshlogin->set_maxlength(Limits::Command::max_length());
- $Global::host{$sshlogin->string()} = $sshlogin;
+ $Global::minimal_command_line_length = 8_000_000;
+ my @allowed_hostgroups;
+ for my $ncpu_sshlogin_string (::uniq(@login)) {
+ my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
+ my $sshlogin_string = $sshlogin->string();
+ if($sshlogin_string eq "") {
+ # This is an ssh group: -S @webservers
+ push @allowed_hostgroups, $sshlogin->hostgroups();
+ next;
+ }
+ if($Global::host{$sshlogin_string}) {
+ # This sshlogin has already been added:
+ # It is probably a host that has come back
+ # Set the max_jobs_running back to the original
+ debug("run","Already seen $sshlogin_string\n");
+ if($sshlogin->{'ncpus'}) {
+ # If ncpus set by '#/' of the sshlogin, overwrite it:
+ $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
+ }
+ $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
+ next;
+ }
+ if($sshlogin_string eq ":") {
+ $sshlogin->set_maxlength(Limits::Command::max_length());
+ } else {
+ # If all chars needs to be quoted, every other character will be \
+ $sshlogin->set_maxlength(int(Limits::Command::max_length()/2));
+ }
+ $Global::minimal_command_line_length =
+ ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
+ $Global::host{$sshlogin_string} = $sshlogin;
}
- #debug("sshlogin: ", my_dump(%Global::host),"\n");
- if($::opt_transfer or @::opt_return or $::opt_cleanup or @::opt_basefile) {
+ if(@allowed_hostgroups) {
+ # Remove hosts that are not in these groups
+ while (my ($string, $sshlogin) = each %Global::host) {
+ if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
+ delete $Global::host{$string};
+ }
+ }
+ }
+
+ # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
+ if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) {
if(not remote_hosts()) {
# There are no remote hosts
- if(defined @::opt_trc) {
- print $Global::original_stderr
- "parallel: Warning: --trc ignored as there are no remote --sshlogin\n";
- } elsif (defined $::opt_transfer) {
- print $Global::original_stderr
- "parallel: Warning: --transfer ignored as there are no remote --sshlogin\n";
- } elsif (defined @::opt_return) {
- print $Global::original_stderr
- "parallel: Warning: --return ignored as there are no remote --sshlogin\n";
- } elsif (defined $::opt_cleanup) {
- print $Global::original_stderr
- "parallel: Warning: --cleanup ignored as there are no remote --sshlogin\n";
- } elsif (defined @::opt_basefile) {
- print $Global::original_stderr
- "parallel: Warning: --basefile ignored as there are no remote --sshlogin\n";
+ if(@opt::trc) {
+ ::warning("--trc ignored as there are no remote --sshlogin.\n");
+ } elsif (defined $opt::transfer) {
+ ::warning("--transfer ignored as there are no remote --sshlogin.\n");
+ } elsif (@opt::return) {
+ ::warning("--return ignored as there are no remote --sshlogin.\n");
+ } elsif (defined $opt::cleanup) {
+ ::warning("--cleanup ignored as there are no remote --sshlogin.\n");
+ } elsif (@opt::basefile) {
+ ::warning("--basefile ignored as there are no remote --sshlogin.\n");
}
}
}
@@ -1548,6 +2545,8 @@ sub parse_sshlogin {
sub remote_hosts {
# Return sshlogins that are not ':'
+ # Uses:
+ # %Global::host
# Returns:
# list of sshlogins with ':' removed
return grep !/^:$/, keys %Global::host;
@@ -1556,74 +2555,431 @@ sub remote_hosts {
sub setup_basefile {
# Transfer basefiles to each $sshlogin
# This needs to be done before first jobs on $sshlogin is run
+ # Uses:
+ # %Global::host
+ # @opt::basefile
# Returns: N/A
my $cmd = "";
+ my $rsync_destdir;
+ my $workdir;
for my $sshlogin (values %Global::host) {
- if($sshlogin->string() eq ":") { next }
- my $sshcmd = $sshlogin->sshcommand();
- my $serverlogin = $sshlogin->serverlogin();
- my $rsync_opt = "-rlDzR -e".shell_quote_scalar($sshcmd);
- for my $file (@::opt_basefile) {
- my $f = $file;
- my $relpath = ($f !~ m:^/:); # Is the path relative?
- # Use different subdirs depending on abs or rel path
- my $rsync_destdir = ($relpath ? "./" : "/");
- $f =~ s:/\./:/:g; # Rsync treats /./ special. We dont want that
- $f = shell_quote_scalar($f);
- $cmd .= "rsync $rsync_opt $f $serverlogin:$rsync_destdir &";
- }
+ if($sshlogin->string() eq ":") { next }
+ for my $file (@opt::basefile) {
+ if($file !~ m:^/: and $opt::workdir eq "...") {
+ ::error("Work dir '...' will not work with relative basefiles.\n");
+ ::wait_and_exit(255);
+ }
+ $workdir ||= Job->new("")->workdir();
+ $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&";
+ }
}
$cmd .= "wait;";
- debug("basesetup: $cmd\n");
+ debug("init", "basesetup: $cmd\n");
print `$cmd`;
}
sub cleanup_basefile {
# Remove the basefiles transferred
+ # Uses:
+ # %Global::host
+ # @opt::basefile
# Returns: N/A
- my $cmd="";
+ my $cmd = "";
+ my $workdir = Job->new("")->workdir();
for my $sshlogin (values %Global::host) {
if($sshlogin->string() eq ":") { next }
- my $sshcmd = $sshlogin->sshcommand();
- my $serverlogin = $sshlogin->serverlogin();
- for my $file (@::opt_basefile) {
- $cmd .= "$sshcmd $serverlogin rm -f ".shell_quote_scalar(shell_quote_scalar($file))."&";
+ for my $file (@opt::basefile) {
+ $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&";
}
}
$cmd .= "wait;";
- debug("basecleanup: $cmd\n");
+ debug("init", "basecleanup: $cmd\n");
print `$cmd`;
}
+sub filter_hosts {
+ # Remove down --sshlogins from active duty.
+ # Find ncpus, ncores, maxlen, time-to-login for each host.
+ # Uses:
+ # %Global::host
+ # $Global::minimal_command_line_length
+ # $opt::use_cpus_instead_of_cores
+ # Returns: N/A
+
+ my ($ncores_ref, $ncpus_ref, $time_to_login_ref, $maxlen_ref,
+ $echo_ref, $down_hosts_ref) =
+ parse_host_filtering(parallelized_host_filtering());
+
+ delete @Global::host{@$down_hosts_ref};
+ @$down_hosts_ref and ::warning("Removed @$down_hosts_ref\n");
+
+ $Global::minimal_command_line_length = 8_000_000;
+ while (my ($sshlogin, $obj) = each %Global::host) {
+ if($sshlogin eq ":") { next }
+ $ncpus_ref->{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin());
+ $ncores_ref->{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin());
+ $time_to_login_ref->{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin());
+ $maxlen_ref->{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin());
+ if($opt::use_cpus_instead_of_cores) {
+ $obj->set_ncpus($ncpus_ref->{$sshlogin});
+ } else {
+ $obj->set_ncpus($ncores_ref->{$sshlogin});
+ }
+ $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
+ $obj->set_maxlength($maxlen_ref->{$sshlogin});
+ $Global::minimal_command_line_length =
+ ::min($Global::minimal_command_line_length,
+ int($maxlen_ref->{$sshlogin}/2));
+ ::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus_ref->{$sshlogin},
+ " ncores:", $ncores_ref->{$sshlogin},
+ " time_to_login:", $time_to_login_ref->{$sshlogin},
+ " maxlen:", $maxlen_ref->{$sshlogin},
+ " min_max_len:", $Global::minimal_command_line_length,"\n");
+ }
+}
+
+sub parse_host_filtering {
+ # Input:
+ # @lines = output from parallelized_host_filtering()
+ # Returns:
+ # \%ncores = number of cores of {host}
+ # \%ncpus = number of cpus of {host}
+ # \%time_to_login = time_to_login on {host}
+ # \%maxlen = max command len on {host}
+ # \%echo = echo received from {host}
+ # \@down_hosts = list of hosts with no answer
+ my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts);
+
+ for (@_) {
+ chomp;
+ my @col = split /\t/, $_;
+ if(defined $col[6]) {
+ # This is a line from --joblog
+ # seq host time spent sent received exit signal command
+ # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
+ if($col[0] eq "Seq" and $col[1] eq "Host" and
+ $col[2] eq "Starttime") {
+ # Header => skip
+ next;
+ }
+ # Get server from: eval true server\;
+ $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]");
+ my $host = $1;
+ $host =~ tr/\\//d;
+ $Global::host{$host} or next;
+ if($col[6] eq "255" or $col[7] eq "15") {
+ # exit == 255 or signal == 15: ssh failed
+ # Remove sshlogin
+ ::debug("init", "--filtered $host\n");
+ push(@down_hosts, $host);
+ } elsif($col[6] eq "127") {
+ # signal == 127: parallel not installed remote
+ # Set ncpus and ncores = 1
+ ::warning("Could not figure out ",
+ "number of cpus on $host. Using 1.\n");
+ $ncores{$host} = 1;
+ $ncpus{$host} = 1;
+ $maxlen{$host} = Limits::Command::max_length();
+ } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
+ # Remember how log it took to log in
+ # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
+ $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
+ } else {
+ ::die_bug("host check unmatched long jobline: $_");
+ }
+ } elsif($Global::host{$col[0]}) {
+ # This output from --number-of-cores, --number-of-cpus,
+ # --max-line-length-allowed
+ # ncores: server 8
+ # ncpus: server 2
+ # maxlen: server 131071
+ if(not $ncores{$col[0]}) {
+ $ncores{$col[0]} = $col[1];
+ } elsif(not $ncpus{$col[0]}) {
+ $ncpus{$col[0]} = $col[1];
+ } elsif(not $maxlen{$col[0]}) {
+ $maxlen{$col[0]} = $col[1];
+ } elsif(not $echo{$col[0]}) {
+ $echo{$col[0]} = $col[1];
+ } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
+ # Skip these:
+ # perl: warning: Setting locale failed.
+ # perl: warning: Please check that your locale settings:
+ # LANGUAGE = (unset),
+ # LC_ALL = (unset),
+ # LANG = "en_US.UTF-8"
+ # are supported and installed on your system.
+ # perl: warning: Falling back to the standard locale ("C").
+ } else {
+ ::die_bug("host check too many col0: $_");
+ }
+ } else {
+ ::die_bug("host check unmatched short jobline ($col[0]): $_");
+ }
+ }
+ @down_hosts = uniq(@down_hosts);
+ return(\%ncores, \%ncpus, \%time_to_login, \%maxlen, \%echo, \@down_hosts);
+}
+
+sub parallelized_host_filtering {
+ # Uses:
+ # $Global::envvar
+ # %Global::host
+ # Returns:
+ # text entries with:
+ # * joblog line
+ # * hostname \t number of cores
+ # * hostname \t number of cpus
+ # * hostname \t max-line-length-allowed
+ # * hostname \t empty
+ my(@cores, @cpus, @maxline, @echo);
+ my $envvar = ::shell_quote_scalar($Global::envvar);
+ while (my ($host, $sshlogin) = each %Global::host) {
+ if($host eq ":") { next }
+ # The 'true' is used to get the $host out later
+ my $sshcmd = "true $host; exec " . $sshlogin->sshcommand()." ".$sshlogin->serverlogin();
+ push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0");
+ push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0");
+ push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0");
+ # 'echo' is used to get the best possible value for an ssh login time
+ push(@echo, $host."\t".$sshcmd." echo\n\0");
+ }
+ my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh");
+ print $fh @cores, @cpus, @maxline, @echo;
+ close $fh;
+ # --timeout 5: Setting up an SSH connection and running a simple
+ # command should never take > 5 sec.
+ # --delay 0.1: If multiple sshlogins use the same proxy the delay
+ # will make it less likely to overload the ssh daemon.
+ # --retries 3: If the ssh daemon it overloaded, try 3 times
+ # -s 16000: Half of the max line on UnixWare
+ # TODO sh -c wrapper to work in csh
+ my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null";
+ ::debug("init", $cmd, "\n");
+ my @out;
+ my $prepend = "";
+ open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
+ for(<$host_fh>) {
+ if(/\'$/) {
+ # if last char = ' then append next line
+ # This may be due to quoting of $Global::envvar
+ $prepend .= $_;
+ next;
+ }
+ $_ = $prepend . $_;
+ $prepend = "";
+ push @out, $_;
+ }
+ close $host_fh;
+ $Global::debug or unlink $tmpfile;
+ return @out;
+}
+
+sub onall {
+ # Runs @command on all hosts.
+ # Uses parallel to run @command on each host.
+ # --jobs = number of hosts to run on simultaneously.
+ # For each host a parallel command with the args will be running.
+ # Uses:
+ # $Global::quoting
+ # @opt::basefile
+ # $opt::jobs
+ # $opt::linebuffer
+ # $opt::ungroup
+ # $opt::group
+ # $opt::keeporder
+ # $opt::D
+ # $opt::plain
+ # $opt::max_chars
+ # $opt::linebuffer
+ # $opt::files
+ # $opt::colsep
+ # $opt::timeout
+ # $opt::plain
+ # $opt::retries
+ # $opt::max_chars
+ # $opt::arg_sep
+ # $opt::arg_file_sep
+ # @opt::v
+ # @opt::env
+ # %Global::host
+ # $Global::exitstatus
+ # $Global::debug
+ # $Global::joblog
+ # $opt::tag
+ # $opt::joblog
+ # Input:
+ # @command = command to run on all hosts
+ # Returns: N/A
+ sub tmp_joblog {
+ # Input:
+ # $joblog = filename of joblog - undef if none
+ # Returns:
+ # $tmpfile = temp file for joblog - undef if none
+ my $joblog = shift;
+ if(not defined $joblog) {
+ return undef;
+ }
+ my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
+ close $fh;
+ return $tmpfile;
+ }
+ my ($input_source_fh_ref,@command) = @_;
+ if($Global::quoting) {
+ @command = shell_quote_empty(@command);
+ }
+
+ # Copy all @input_source_fh (-a and :::) into tempfiles
+ my @argfiles = ();
+ for my $fh (@$input_source_fh_ref) {
+ my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1);
+ print $outfh (<$fh>);
+ close $outfh;
+ push @argfiles, $name;
+ }
+ if(@opt::basefile) { setup_basefile(); }
+ # for each sshlogin do:
+ # parallel -S $sshlogin $command :::: @argfiles
+ #
+ # Pass some of the options to the sub-parallels, not all of them as
+ # -P should only go to the first, and -S should not be copied at all.
+ my $options =
+ join(" ",
+ ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
+ ((defined $opt::linebuffer) ? "--linebuffer" : ""),
+ ((defined $opt::ungroup) ? "-u" : ""),
+ ((defined $opt::group) ? "-g" : ""),
+ ((defined $opt::keeporder) ? "--keeporder" : ""),
+ ((defined $opt::D) ? "-D $opt::D" : ""),
+ ((defined $opt::plain) ? "--plain" : ""),
+ ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
+ );
+ my $suboptions =
+ join(" ",
+ ((defined $opt::ungroup) ? "-u" : ""),
+ ((defined $opt::linebuffer) ? "--linebuffer" : ""),
+ ((defined $opt::group) ? "-g" : ""),
+ ((defined $opt::files) ? "--files" : ""),
+ ((defined $opt::keeporder) ? "--keeporder" : ""),
+ ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
+ ((@opt::v) ? "-vv" : ""),
+ ((defined $opt::D) ? "-D $opt::D" : ""),
+ ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
+ ((defined $opt::plain) ? "--plain" : ""),
+ ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
+ ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
+ ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
+ ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
+ (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""),
+ );
+ ::debug("init", "| $0 $options\n");
+ open(my $parallel_fh, "|-", "$0 --will-cite -j0 $options") ||
+ ::die_bug("This does not run GNU Parallel: $0 $options");
+ my @joblogs;
+ for my $host (sort keys %Global::host) {
+ my $sshlogin = $Global::host{$host};
+ my $joblog = tmp_joblog($opt::joblog);
+ if($joblog) {
+ push @joblogs, $joblog;
+ $joblog = "--joblog $joblog";
+ }
+ my $quad = $opt::arg_file_sep || "::::";
+ ::debug("init", "$0 $suboptions -j1 $joblog ",
+ ((defined $opt::tag) ?
+ "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""),
+ " -S ", shell_quote_scalar($sshlogin->string())," ",
+ join(" ",shell_quote(@command))," $quad @argfiles\n");
+ print $parallel_fh "$0 $suboptions -j1 $joblog ",
+ ((defined $opt::tag) ?
+ "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""),
+ " -S ", shell_quote_scalar($sshlogin->string())," ",
+ join(" ",shell_quote(@command))," $quad @argfiles\n";
+ }
+ close $parallel_fh;
+ $Global::exitstatus = $? >> 8;
+ debug("init", "--onall exitvalue ", $?);
+ if(@opt::basefile) { cleanup_basefile(); }
+ $Global::debug or unlink(@argfiles);
+ my %seen;
+ for my $joblog (@joblogs) {
+ # Append to $joblog
+ open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
+ # Skip first line (header);
+ <$fh>;
+ print $Global::joblog (<$fh>);
+ close $fh;
+ unlink($joblog);
+ }
+}
+
sub __SIGNAL_HANDLING__ {}
-sub list_running_jobs {
+sub save_original_signal_handler {
+ # Remember the original signal handler
+ # Uses:
+ # %Global::original_sig
# Returns: N/A
- for my $v (values %Global::running) {
- print $Global::original_stderr "$Global::progname: ",$v->replaced(),"\n";
+ $SIG{INT} = sub {
+ if($opt::tmux) { qx { tmux kill-session -t p$$ }; }
+ wait_and_exit(255);
+ };
+ $SIG{TERM} = sub {
+ if($opt::tmux) { qx { tmux kill-session -t p$$ }; }
+ wait_and_exit(255);
+ };
+ %Global::original_sig = %SIG;
+ $SIG{TERM} = sub {}; # Dummy until jobs really start
+ $SIG{ALRM} = 'IGNORE';
+}
+
+sub list_running_jobs {
+ # Print running jobs on tty
+ # Uses:
+ # %Global::running
+ # Returns: N/A
+ for my $job (values %Global::running) {
+ ::status("$Global::progname: ",$job->replaced(),"\n");
}
}
sub start_no_new_jobs {
+ # Start no more jobs
+ # Uses:
+ # %Global::original_sig
+ # %Global::unlink
+ # $Global::start_no_new_jobs
# Returns: N/A
$SIG{TERM} = $Global::original_sig{TERM};
- print $Global::original_stderr
+ unlink keys %Global::unlink;
+ ::status
("$Global::progname: SIGTERM received. No new jobs will be started.\n",
"$Global::progname: Waiting for these ", scalar(keys %Global::running),
" jobs to finish. Send SIGTERM again to stop now.\n");
list_running_jobs();
- $Global::start_no_new_jobs++;
+ $Global::start_no_new_jobs ||= 1;
}
sub reaper {
# A job finished.
# Print the output.
# Start another job
- # Returns: N/A
+ # Uses:
+ # %Global::sshmaster
+ # %Global::running
+ # $Global::tty_taken
+ # @Global::slots
+ # $opt::timeout
+ # $Global::timeoutq
+ # $opt::halt
+ # $opt::keeporder
+ # $Global::total_running
+ # Returns:
+ # $children_reaped = number of children finished
my $stiff;
my $children_reaped = 0;
- debug("Reaper called ");
+ debug("run", "Reaper ");
while (($stiff = waitpid(-1, &WNOHANG)) > 0) {
+ # $stiff = pid of dead process
$children_reaped++;
if($Global::sshmaster{$stiff}) {
# This is one of the ssh -M: ignore
@@ -1634,91 +2990,152 @@ sub reaper {
$job or next;
$job->set_exitstatus($? >> 8);
$job->set_exitsignal($? & 127);
- debug("died (".$job->exitstatus()."): ".$job->seq());
- $job->set_endtime();
+ debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")");
+ $job->set_endtime(::now());
if($stiff == $Global::tty_taken) {
# The process that died had the tty => release it
$Global::tty_taken = 0;
}
if(not $job->should_be_retried()) {
- # Force printing now if the job failed and we are going to exit
- my $print_now = ($job->exitstatus() and
- $::opt_halt_on_error and $::opt_halt_on_error == 2);
- if($Global::keeporder and not $print_now) {
- $Private::print_later{$job->seq()} = $job;
- $Private::job_end_sequence ||= 1;
- debug("Looking for: $Private::job_end_sequence ".
- "Current: ".$job->seq()."\n");
- while($Private::print_later{$Private::job_end_sequence}) {
- debug("Found job end $Private::job_end_sequence");
- $Private::print_later{$Private::job_end_sequence}->print();
- delete $Private::print_later{$Private::job_end_sequence};
- $Private::job_end_sequence++;
- }
- } else {
+ # The job is done
+ # Free the jobslot
+ push @Global::slots, $job->slot();
+ if($opt::timeout) {
+ # Update average runtime for timeout
+ $Global::timeoutq->update_median_runtime($job->runtime());
+ }
+ # Force printing now if --halt forces us to exit
+ my $print_now =
+ ($opt::halt and
+ (($opt::halt == 2 and $job->exitstatus())
+ or
+ ($opt::halt == -2 and not $job->exitstatus())));
+ if($opt::keeporder and not $print_now) {
+ $job->print_earlier_jobs();
+ } else {
$job->print();
}
- if($job->exitstatus()) {
- # The jobs had a exit status <> 0, so error
- $Global::exitstatus++;
- if($::opt_halt_on_error) {
- if($::opt_halt_on_error == 1) {
- # If halt on error == 1 we should gracefully exit
- print $Global::original_stderr
- ("$Global::progname: Starting no more jobs. ",
- "Waiting for ", scalar(keys %Global::running),
- " jobs to finish. This job failed:\n",
- $job->replaced(),"\n");
- $Global::start_no_new_jobs++;
- $Global::halt_on_error_exitstatus = $job->exitstatus();
- } elsif($::opt_halt_on_error == 2) {
- # If halt on error == 2 we should exit immediately
- print $Global::original_stderr
- ("$Global::progname: This job failed:\n",
- $job->replaced(),"\n");
- exit ($job->exitstatus());
- }
- }
- }
+ $job->should_we_halt();
}
my $sshlogin = $job->sshlogin();
$sshlogin->dec_jobs_running();
$sshlogin->inc_jobs_completed();
$Global::total_running--;
delete $Global::running{$stiff};
- start_more_jobs();
+ start_more_jobs();
+ if($opt::progress) {
+ my %progress = progress();
+ ::status("\r",$progress{'status'});
+ }
}
- debug("Reaper exit\n");
+ debug("run", "done ");
return $children_reaped;
}
-sub timeout {
- # SIGALRM was received. Check if there was a timeout
- # @Global::timeout is sorted by timeout
- while (@Global::timeouts) {
- my $t = $Global::timeouts[0];
- if($t->timed_out()) {
- $t->kill();
- shift @Global::timeouts;
- } else {
- # Because they are sorted by timeout
- last;
- }
- }
-}
sub __USAGE__ {}
+sub killall {
+ # Kill all jobs
+ # Send all jobs TERM
+ # Wait
+ # Send all jobs TERM
+ # Wait
+ # Send all jobs KILL
+ # Send all (grand*)children KILL
+
+ $Global::start_no_new_jobs ||= 1;
+ # pids of the all children and (grand*)children
+ # before we start the blood bath
+ my @family_pids = family_pids(keys %Global::running);
+ # Send jobs TERM
+ ::debug("kill","TERM ", join(' ',keys %Global::running),"\n");
+ kill "TERM", keys %Global::running;
+ # Wait up to 200 ms
+ my $sleepsum = 0;
+ my $sleep = 0.001;
+ for (; kill(0, keys %Global::running) and $sleepsum < 200;
+ $sleepsum += $sleep) {
+ # This can change %Global::running
+ ::debug("kill","Slept $sleepsum\n");
+ if((my $stiff = waitpid(-1, &WNOHANG)) > 0) {
+ ::debug("kill","reaped $stiff\n");
+
+ $Global::total_running--;
+ delete $Global::running{$stiff};
+ $sleep = $sleep/2+0.001;
+ }
+ $sleep *= 1.1;
+ ::usleep($sleep);
+ }
+ # Send jobs TERM (again)
+ ::debug("kill","TERM ", join(' ',keys %Global::running),"\n");
+ kill "TERM", keys %Global::running;
+ # Wait up to 200 ms
+ $sleepsum = 0;
+ $sleep = 0.001;
+ for (; kill(0, keys %Global::running) and $sleepsum < 200;
+ $sleepsum += $sleep) {
+ # This can change %Global::running
+ if((my $stiff = waitpid(-1, &WNOHANG)) > 0) {
+ $Global::total_running--;
+ delete $Global::running{$stiff};
+ $sleep = $sleep/2+0.001;
+ }
+ $sleep *= 1.1;
+ ::usleep($sleep);
+ }
+ # Send jobs KILL
+ ::debug("kill","KILL ", join(' ',keys %Global::running),"\n");
+ kill "KILL", keys %Global::running;
+ # Send all (grand*)children KILL (if there are any left)
+ ::debug("kill","KILL @family_pids\n");
+ kill "KILL", @family_pids;
+}
+
+sub family_pids {
+ # Find the pids with this->pid as (grand)*parent
+ # Input:
+ # @parents = pids of parents
+ # Returns:
+ # @pids = pids of (grand)*children
+ my @parents = @_;
+ my @pids;
+
+ my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table();
+
+ my @more = @parents;
+ # While more (grand)*children
+ while(@more) {
+ my @m;
+ push @pids, @more;
+ for my $parent (@more) {
+ if($children_of_ref->{$parent}) {
+ # add the children of this parent
+ push @m, @{$children_of_ref->{$parent}};
+ }
+ }
+ @more = @m;
+ }
+ return (@pids);
+}
+
sub wait_and_exit {
# If we do not wait, we sometimes get segfault
# Returns: N/A
+ my $error = shift;
+ unlink keys %Global::unlink;
+ if($error) {
+ # Kill all without printing
+ killall();
+ }
for (keys %Global::unkilled_children) {
kill 9, $_;
waitpid($_,0);
delete $Global::unkilled_children{$_};
}
wait();
- exit(shift);
+ exit($error);
}
sub die_usage {
@@ -1732,36 +3149,96 @@ sub usage {
print join
("\n",
"Usage:",
+ "",
"$Global::progname [options] [command [arguments]] < list_of_arguments",
"$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
"cat ... | $Global::progname --pipe [options] [command [arguments]]",
"",
- "-j n Run n jobs in parallel",
- "-k Keep same order",
- "-X Multiple arguments with context replace",
- "--colsep regexp Split input on regexp for positional replacements",
- "{} {.} {/} {/.} {#} Replacement strings",
- "{3} {3.} {3/} {3/.} Positional replacement strings",
+ "-j n Run n jobs in parallel",
+ "-k Keep same order",
+ "-X Multiple arguments with context replace",
+ "--colsep regexp Split input on regexp for positional replacements",
+ "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
+ "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
+ "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
+ " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
"",
- "-S sshlogin Example: foo\@server.example.com",
- "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
- "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
- "--onall Run the given command with argument on all sshlogins",
- "--nonall Run the given command with no arguments on all sshlogins",
+ "-S sshlogin Example: foo\@server.example.com",
+ "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
+ "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
+ "--onall Run the given command with argument on all sshlogins",
+ "--nonall Run the given command with no arguments on all sshlogins",
"",
- "--pipe Split stdin (standard input) to multiple jobs.",
- "--recend str Record end separator for --pipe.",
- "--recstart str Record start separator for --pipe.",
+ "--pipe Split stdin (standard input) to multiple jobs.",
+ "--recend str Record end separator for --pipe.",
+ "--recstart str Record start separator for --pipe.",
"",
"See 'man $Global::progname' for details",
"",
- "When using GNU Parallel for a publication please cite:",
+ "Academic tradition requires you to cite works you base your article on.",
+ "When using programs that use GNU Parallel to process data for publication",
+ "please cite:",
"",
- "O. Tange (2011): GNU Parallel - The Command-Line Power Tool,",
- ";login: The USENIX Magazine, February 2011:42-47.",
+ " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,",
+ " ;login: The USENIX Magazine, February 2011:42-47.",
+ "",
+ "This helps funding further development; and it won't cost you a cent.",
+ "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.\n",
"");
}
+
+sub citation_notice {
+ # if --will-cite or --plain: do nothing
+ # if stderr redirected: do nothing
+ # if ~/.parallel/will-cite: do nothing
+ # else: print citation notice to stderr
+ if($opt::willcite
+ or
+ $opt::plain
+ or
+ not -t $Global::original_stderr
+ or
+ -e $ENV{'HOME'}."/.parallel/will-cite") {
+ # skip
+ } else {
+ ::status
+ ("Academic tradition requires you to cite works you base your article on.\n",
+ "When using programs that use GNU Parallel to process data for publication\n",
+ "please cite:\n",
+ "\n",
+ " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n",
+ " ;login: The USENIX Magazine, February 2011:42-47.\n",
+ "\n",
+ "This helps funding further development; and it won't cost you a cent.\n",
+ "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.\n",
+ "\n",
+ "To silence the citation notice: run 'parallel --bibtex'.\n\n",
+ );
+ }
+}
+
+sub status {
+ my @w = @_;
+ my $fh = $Global::status_fd || *STDERR;
+ print $fh @w;
+ flush $fh;
+}
+
+sub warning {
+ my @w = @_;
+ my $fh = $Global::status_fd || *STDERR;
+ my $prog = $Global::progname || "parallel";
+ print $fh $prog, ": Warning: ", @w;
+}
+
+sub error {
+ my @w = @_;
+ my $fh = $Global::status_fd || *STDERR;
+ my $prog = $Global::progname || "parallel";
+ print $fh $prog, ": Error: ", @w;
+}
+
sub die_bug {
my $bugid = shift;
print STDERR
@@ -1778,43 +3255,71 @@ sub die_bug {
sub version {
# Returns: N/A
- if($::opt_tollef and not $::opt_gnu) {
- print "WARNING: YOU ARE USING --tollef. USE --gnu FOR GNU PARALLEL\n\n";
- }
print join("\n",
"GNU $Global::progname $Global::version",
- "Copyright (C) 2007,2008,2009,2010,2011,2012 Ole Tange and Free Software Foundation, Inc.",
+ "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange",
+ "and Free Software Foundation, Inc.",
"License GPLv3+: GNU GPL version 3 or later ",
"This is free software: you are free to change and redistribute it.",
"GNU $Global::progname comes with no warranty.",
"",
"Web site: http://www.gnu.org/software/${Global::progname}\n",
- "When using GNU Parallel for a publication please cite:\n",
- "O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ",
- ";login: The USENIX Magazine, February 2011:42-47.\n",
+ "When using programs that use GNU Parallel to process data for publication",
+ "please cite as described in 'parallel --bibtex'.\n",
);
}
sub bibtex {
# Returns: N/A
- if($::opt_tollef and not $::opt_gnu) {
- print "WARNING: YOU ARE USING --tollef. USE --gnu FOR GNU PARALLEL\n\n";
- }
print join("\n",
+ "Academic tradition requires you to cite works you base your article on.",
+ "When using programs that use GNU Parallel to process data for publication",
+ "please cite:",
+ "",
"\@article{Tange2011a,",
- " title = {GNU Parallel - The Command-Line Power Tool},",
- " author = {O. Tange},",
- " address = {Frederiksberg, Denmark},",
- " journal = {;login: The USENIX Magazine},",
- " month = {Feb},",
- " number = {1},",
- " volume = {36},",
- " url = {http://www.gnu.org/s/parallel},",
- " year = {2011},",
- " pages = {42-47}",
+ " title = {GNU Parallel - The Command-Line Power Tool},",
+ " author = {O. Tange},",
+ " address = {Frederiksberg, Denmark},",
+ " journal = {;login: The USENIX Magazine},",
+ " month = {Feb},",
+ " number = {1},",
+ " volume = {36},",
+ " url = {http://www.gnu.org/s/parallel},",
+ " year = {2011},",
+ " pages = {42-47}",
+ " doi = {10.5281/zenodo.16303}",
"}",
"",
+ "(Feel free to use \\nocite{Tange2011a})",
+ "",
+ "This helps funding further development; and it won't cost you a cent.",
+ "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
+ "",
+ "If you send a copy of your published article to tange\@gnu.org, it will be",
+ "mentioned in the release notes of next version of GNU Parallel.\n\n",
);
+ while(not -e $ENV{'HOME'}."/.parallel/will-cite") {
+ print "\nType: 'will cite' and press enter.\n> ";
+ my $input = ;
+ if($input =~ /will cite/i) {
+ mkdir $ENV{'HOME'}."/.parallel";
+ if(open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite")) {
+ close $fh;
+ print "\nThank you for your support. It is much appreciated. The citation\n",
+ "notice is now silenced. You may also use '--will-cite'.\n",
+ "If you use '--will-cite' in scripts you are expected to pay\n",
+ "the 10000 EUR, because you are making it harder to see the\n",
+ "citation notice.\n\n";
+ } else {
+ print "\nThank you for your support. It is much appreciated. The citation\n",
+ "cannot permanently be silenced. Use '--will-cite' instead.\n",
+ "If you use '--will-cite' in scripts you are expected to pay\n",
+ "the 10000 EUR, because you are making it harder to see the\n",
+ "citation notice.\n\n";
+ last;
+ }
+ }
+ }
}
sub show_limits {
@@ -1829,6 +3334,40 @@ sub show_limits {
sub __GENERIC_COMMON_FUNCTION__ {}
+sub tmpfile {
+ # Create tempfile as $TMPDIR/parXXXXX
+ # Returns:
+ # $filehandle = opened file handle
+ # $filename = file name created
+ return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
+}
+
+sub tmpname {
+ # Select a name that does not exist
+ # Do not create the file as that may cause problems
+ # if you ssh to localhost (or a shared file system) under a different name
+ my $name = shift;
+ my($tmpname);
+ do {
+ $tmpname = $ENV{'TMPDIR'}."/".$name.
+ join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
+ } while($Global::unlink{$tmpname}++ or -e $tmpname);
+ return $tmpname;
+}
+
+sub tmpfifo {
+ # Securely make a fifo by securely making a dir with a fifo in it
+ use POSIX qw(mkfifo);
+ my $tmpfifo = tmpname("fif",@_);
+ mkfifo($tmpfifo,0600);
+ return $tmpfifo;
+}
+
+sub uniq {
+ # Remove duplicates and return unique values
+ return keys %{{ map { $_ => 1 } @_ }};
+}
+
sub min {
# Returns:
# Minimum value of array
@@ -1877,82 +3416,375 @@ sub undef_as_empty {
return $a ? $a : "";
}
-sub hostname {
- if(not $Private::hostname) {
- my $hostname = `hostname`;
- chomp($hostname);
- $Private::hostname = $hostname || "nohostname";
+sub multiply_binary_prefix {
+ # Evalualte numbers with binary prefix
+ # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
+ # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
+ # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
+ # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
+ # 13G = 13*1024*1024*1024 = 13958643712
+ # Input:
+ # $s = string with prefixes
+ # Returns:
+ # $value = int with prefixes multiplied
+ my $s = shift;
+ if(not $s) {
+ return $s;
}
- return $Private::hostname;
+ $s =~ s/ki/*1024/gi;
+ $s =~ s/mi/*1024*1024/gi;
+ $s =~ s/gi/*1024*1024*1024/gi;
+ $s =~ s/ti/*1024*1024*1024*1024/gi;
+ $s =~ s/pi/*1024*1024*1024*1024*1024/gi;
+ $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi;
+ $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
+ $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
+ $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
+
+ $s =~ s/K/*1024/g;
+ $s =~ s/M/*1024*1024/g;
+ $s =~ s/G/*1024*1024*1024/g;
+ $s =~ s/T/*1024*1024*1024*1024/g;
+ $s =~ s/P/*1024*1024*1024*1024*1024/g;
+ $s =~ s/E/*1024*1024*1024*1024*1024*1024/g;
+ $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
+ $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
+ $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
+
+ $s =~ s/k/*1000/g;
+ $s =~ s/m/*1000*1000/g;
+ $s =~ s/g/*1000*1000*1000/g;
+ $s =~ s/t/*1000*1000*1000*1000/g;
+ $s =~ s/p/*1000*1000*1000*1000*1000/g;
+ $s =~ s/e/*1000*1000*1000*1000*1000*1000/g;
+ $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g;
+ $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
+ $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
+
+ $s = eval $s;
+ ::debug($s);
+ return $s;
}
-sub reap_usleep {
- # Reap dead children.
- # If no children: Sleep specified amount with exponential backoff
- # Returns:
- # 0.00001 if children reaped (0.00001 ms works best on highend)
- # $ms*1.1 if no children reaped
- my $ms = shift;
- if(reaper()) {
- return 0.00001;
- } else {
- usleep($ms);
- return (($ms < 1000) ? ($ms * 1.1) : ($ms)); # exponential back off
+{
+ my ($disk_full_fh, $b8193, $error_printed);
+ sub exit_if_disk_full {
+ # Checks if $TMPDIR is full by writing 8kb to a tmpfile
+ # If the disk is full: Exit immediately.
+ # Returns:
+ # N/A
+ if(not $disk_full_fh) {
+ my $name;
+ ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df");
+ # Separate unlink due to NFS dealing badly with File::Temp
+ unlink $name;
+ $b8193 = "x"x8193;
+ }
+ # Linux does not discover if a disk is full if writing <= 8192
+ # Tested on:
+ # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
+ # ntfs reiserfs tmpfs ubifs vfat xfs
+ # TODO this should be tested on different OS similar to this:
+ #
+ # doit() {
+ # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
+ # seq 100000 | parallel --tmpdir /mnt/loop/ true &
+ # seq 6900000 > /mnt/loop/i && echo seq OK
+ # seq 6980868 > /mnt/loop/i
+ # seq 10000 > /mnt/loop/ii
+ # sleep 3
+ # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
+ # echo >&2
+ # }
+ print $disk_full_fh $b8193;
+ if(not $disk_full_fh
+ or
+ tell $disk_full_fh != 8193) {
+ # On raspbian the disk can be full except for 10 chars.
+ if(not $error_printed) {
+ ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n");
+ ::error("Change \$TMPDIR with --tmpdir or use --compress.\n");
+ $error_printed = 1;
+ }
+ ::wait_and_exit(255);
+ }
+ truncate $disk_full_fh, 0;
+ seek($disk_full_fh, 0, 0) || die;
}
}
+sub spacefree {
+ # Remove comments and spaces
+ # Inputs:
+ # $spaces = keep 1 space?
+ # $s = string to remove spaces from
+ # Returns:
+ # $s = with spaces removed
+ my $spaces = shift;
+ my $s = shift;
+ $s =~ s/#.*//mg;
+ if($spaces) {
+ $s =~ s/\s+/ /mg;
+ } else {
+ $s =~ s/\s//mg;
+ }
+ return $s;
+}
+
+{
+ my $hostname;
+ sub hostname {
+ if(not $hostname) {
+ $hostname = `hostname`;
+ chomp($hostname);
+ $hostname ||= "nohostname";
+ }
+ return $hostname;
+ }
+}
+
+sub which {
+ # Input:
+ # @programs = programs to find the path to
+ # Returns:
+ # @full_path = full paths to @programs. Nothing if not found
+ my @which;
+ for my $prg (@_) {
+ push(@which, grep { not -d $_ and -x $_ }
+ map { $_."/".$prg } split(":",$ENV{'PATH'}));
+ }
+ return @which;
+}
+
+{
+ my ($regexp,%fakename);
+
+ sub parent_shell {
+ # Input:
+ # $pid = pid to see if (grand)*parent is a shell
+ # Returns:
+ # $shellpath = path to shell - undef if no shell found
+ my $pid = shift;
+ if(not $regexp) {
+ # All shells known to mankind
+ #
+ # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
+ # posh rbash rush rzsh sash sh static-sh tcsh yash zsh
+ my @shells = (qw(ash bash csh dash fdsh fish fizsh ksh
+ ksh93 mksh pdksh posh rbash rush rzsh
+ sash sh static-sh tcsh yash zsh -sh -csh),
+ '-sh (sh)' # sh on FreeBSD
+ );
+ # Can be formatted as:
+ # [sh] -sh sh busybox sh -sh (sh)
+ # /bin/sh /sbin/sh /opt/csw/sh
+ # NOT: foo.sh sshd crash flush pdflush scosh fsflush ssh
+ my $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
+ $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| [^(])';
+ %fakename = (
+ # sh disguises itself as -sh (sh) on FreeBSD
+ "-sh (sh)" => ["sh"],
+ # csh and tcsh disguise themselves as -sh/-csh
+ "-sh" => ["csh", "tcsh"],
+ "-csh" => ["tcsh", "csh"],
+ );
+ }
+ my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
+ my $shellpath;
+ my $testpid = $pid;
+ while($testpid) {
+ ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n");
+ if($name_of_ref->{$testpid} =~ /$regexp/o) {
+ ::debug("init", "which ".($3||$6)." => ");
+ $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0];
+ ::debug("init", "shell path $shellpath\n");
+ $shellpath and last;
+ }
+ if($testpid == $parent_of_ref->{$testpid}) {
+ # In Solaris zones, the PPID of the zsched process is itself
+ last;
+ }
+ $testpid = $parent_of_ref->{$testpid};
+ }
+ return $shellpath;
+ }
+}
+
+{
+ my %pid_parentpid_cmd;
+
+ sub pid_table {
+ # Returns:
+ # %children_of = { pid -> children of pid }
+ # %parent_of = { pid -> pid of parent }
+ # %name_of = { pid -> commandname }
+
+ if(not %pid_parentpid_cmd) {
+ # Filter for SysV-style `ps`
+ my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
+ q(s/^.{$s}//; print "@F[1,2] $_"' );
+ # Crazy msys: ' is not accepted on the cmd line, but " are treated as '
+ my $msys = q( ps -ef | perl -ane "1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
+ q(s/^.{$s}//; print qq{@F[1,2] $_}" );
+ # BSD-style `ps`
+ my $bsd = q(ps -o pid,ppid,command -ax);
+ %pid_parentpid_cmd =
+ (
+ 'aix' => $sysv,
+ 'cygwin' => $sysv,
+ 'darwin' => $bsd,
+ 'dec_osf' => $sysv,
+ 'dragonfly' => $bsd,
+ 'freebsd' => $bsd,
+ 'gnu' => $sysv,
+ 'hpux' => $sysv,
+ 'linux' => $sysv,
+ 'mirbsd' => $bsd,
+ 'msys' => $msys,
+ 'MSWin32' => $sysv,
+ 'netbsd' => $bsd,
+ 'nto' => $sysv,
+ 'openbsd' => $bsd,
+ 'solaris' => $sysv,
+ 'svr5' => $sysv,
+ 'syllable' => "echo ps not supported",
+ );
+ }
+ $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
+
+ my (@pidtable,%parent_of,%children_of,%name_of);
+ # Table with pid -> children of pid
+ @pidtable = `$pid_parentpid_cmd{$^O}`;
+ my $p=$$;
+ for (@pidtable) {
+ # must match: 24436 21224 busybox ash
+ # must match: 24436 21224 <>
+ # or: perl -e 'while($0=" "){}'
+ if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
+ or
+ $^O eq "darwin" and /^\s*(\S+)\s+(\S+)\s+()$/) {
+ $parent_of{$1} = $2;
+ push @{$children_of{$2}}, $1;
+ $name_of{$1} = $3;
+ } else {
+ ::die_bug("pidtable format: $_");
+ }
+ }
+ return(\%children_of, \%parent_of, \%name_of);
+ }
+}
+
+sub now {
+ # Returns time since epoch as in seconds with 3 decimals
+ # Uses:
+ # @Global::use
+ # Returns:
+ # $time = time now with millisecond accuracy
+ if(not $Global::use{"Time::HiRes"}) {
+ if(eval "use Time::HiRes qw ( time );") {
+ eval "sub TimeHiRestime { return Time::HiRes::time };";
+ } else {
+ eval "sub TimeHiRestime { return time() };";
+ }
+ $Global::use{"Time::HiRes"} = 1;
+ }
+
+ return (int(TimeHiRestime()*1000))/1000;
+}
+
sub usleep {
# Sleep this many milliseconds.
- my $secs = shift;
- ::debug("Sleeping ",$secs," millisecs\n");
- select(undef, undef, undef, $secs/1000);
- if($::opt_timeout) {
- ::debug(my_dump($Global::timeoutq));
- $Global::timeoutq->process_timeouts();
+ # Input:
+ # $ms = milliseconds to sleep
+ my $ms = shift;
+ ::debug(int($ms),"ms ");
+ select(undef, undef, undef, $ms/1000);
+}
+
+sub reap_usleep {
+ # Reap dead children.
+ # If no dead children: Sleep specified amount with exponential backoff
+ # Input:
+ # $ms = milliseconds to sleep
+ # Returns:
+ # $ms/2+0.001 if children reaped
+ # $ms*1.1 if no children reaped
+ my $ms = shift;
+ if(reaper()) {
+ # Sleep exponentially shorter (1/2^n) if a job finished
+ return $ms/2+0.001;
+ } else {
+ if($opt::timeout) {
+ $Global::timeoutq->process_timeouts();
+ }
+ if($opt::memfree) {
+ kill_youngster_if_not_enough_mem();
+ }
+ # When a child dies, wake up from sleep (or select(,,,))
+ $SIG{CHLD} = sub { kill "ALRM", $$ };
+ usleep($ms);
+ # --compress needs $SIG{CHLD} undefined
+ delete $SIG{CHLD};
+ exit_if_disk_full();
+ if($opt::linebuffer) {
+ for my $job (values %Global::running) {
+ $job->print();
+ }
+ }
+ # Sleep exponentially longer (1.1^n) if a job did not finish,
+ # though at most 1000 ms.
+ return (($ms < 1000) ? ($ms * 1.1) : ($ms));
}
}
-sub multiply_binary_prefix {
- # Evalualte numbers with binary prefix
- # k=10^3, m=10^6, g=10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
- # K=2^10, M=2^20, G=2^30, T=2^40, P=2^50, E=2^70, Z=2^80, Y=2^80
- # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
- # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
- # 13G = 13*1024*1024*1024 = 13958643712
- my $s = shift;
- $s =~ s/k/*1000/g;
- $s =~ s/M/*1000*1000/g;
- $s =~ s/G/*1000*1000*1000/g;
- $s =~ s/T/*1000*1000*1000*1000/g;
- $s =~ s/P/*1000*1000*1000*1000*1000/g;
- $s =~ s/E/*1000*1000*1000*1000*1000*1000/g;
- $s =~ s/Z/*1000*1000*1000*1000*1000*1000*1000/g;
- $s =~ s/Y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
- $s =~ s/X/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
+sub kill_youngster_if_not_enough_mem {
+ # Check each $sshlogin if there is enough mem.
+ # If less than 50% enough free mem: kill off the youngest child
+ # Put the child back in the queue.
+ # Uses:
+ # %Global::running
+ my %jobs_of;
+ my @sshlogins;
- $s =~ s/Ki?/*1024/gi;
- $s =~ s/Mi?/*1024*1024/gi;
- $s =~ s/Gi?/*1024*1024*1024/gi;
- $s =~ s/Ti?/*1024*1024*1024*1024/gi;
- $s =~ s/Pi?/*1024*1024*1024*1024*1024/gi;
- $s =~ s/Ei?/*1024*1024*1024*1024*1024*1024/gi;
- $s =~ s/Zi?/*1024*1024*1024*1024*1024*1024*1024/gi;
- $s =~ s/Yi?/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
- $s =~ s/Xi?/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
- $s = eval $s;
- return $s;
+ for my $job (values %Global::running) {
+ if(not $jobs_of{$job->sshlogin()}) {
+ push @sshlogins, $job->sshlogin();
+ }
+ push @{$jobs_of{$job->sshlogin()}}, $job;
+ }
+ for my $sshlogin (@sshlogins) {
+ for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
+ if($sshlogin->memfree() < $opt::memfree * 0.5) {
+ ::debug("mem","\n",map { $_->seq()." " } (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}));
+ ::debug("mem","\n", $job->seq(), "killed ",
+ $sshlogin->memfree()," < ",$opt::memfree * 0.5);
+ $job->kill();
+ $sshlogin->memfree_recompute();
+ } else {
+ last;
+ }
+ }
+ ::debug("mem","Free mem OK ", $sshlogin->memfree()," > ",$opt::memfree * 0.5);
+ }
}
sub __DEBUGGING__ {}
sub debug {
+ # Uses:
+ # $Global::debug
+ # %Global::fd
# Returns: N/A
$Global::debug or return;
@_ = grep { defined $_ ? $_ : "" } @_;
- if($Global::original_stdout) {
- print $Global::original_stdout @_;
- } else {
- print @_;
+ if($Global::debug eq "all" or $Global::debug eq $_[0]) {
+ if($Global::fd{1}) {
+ # Original stdout was saved
+ my $stdout = $Global::fd{1};
+ print $stdout @_[1..$#_];
+ } else {
+ print @_[1..$#_];
+ }
}
}
@@ -1981,7 +3813,7 @@ sub my_memory_usage {
sub my_size {
# Returns:
- # size of object if Devel::Size is installed
+ # $size = size of object if Devel::Size is installed
# -1 otherwise
my @size_this = (@_);
eval "use Devel::Size qw(size total_size)";
@@ -2004,7 +3836,7 @@ sub my_dump {
if ($@) {
my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
"Not dumping output\n";
- print $Global::original_stderr $err;
+ ::status($err);
return $err;
} else {
return Dumper(@dump_this);
@@ -2018,8 +3850,19 @@ sub my_dump {
}
}
-sub __OBJECT_ORIENTED_PARTS__ {}
+sub my_croak {
+ eval "use Carp; 1";
+ $Carp::Verbose = 1;
+ croak(@_);
+}
+sub my_carp {
+ eval "use Carp; 1";
+ $Carp::Verbose = 1;
+ carp(@_);
+}
+
+sub __OBJECT_ORIENTED_PARTS__ {}
package SSHLogin;
@@ -2027,29 +3870,50 @@ sub new {
my $class = shift;
my $sshlogin_string = shift;
my $ncpus;
- if($sshlogin_string =~ s:^(\d*)/:: and $1) {
- # Override default autodetected ncpus unless zero or missing
+ my %hostgroups;
+ # SSHLogins can have these formats:
+ # @grp+grp/ncpu//usr/bin/ssh user@server
+ # ncpu//usr/bin/ssh user@server
+ # /usr/bin/ssh user@server
+ # user@server
+ # ncpu/user@server
+ # @grp+grp/user@server
+ if($sshlogin_string =~ s:^\@([^/]+)/?::) {
+ # Look for SSHLogin hostgroups
+ %hostgroups = map { $_ => 1 } split(/\+/, $1);
+ }
+ if ($sshlogin_string =~ s:^(\d+)/::) {
+ # Override default autodetected ncpus unless missing
$ncpus = $1;
}
my $string = $sshlogin_string;
+ # An SSHLogin is always in the hostgroup of its $string-name
+ $hostgroups{$string} = 1;
+ @Global::hostgroups{keys %hostgroups} = values %hostgroups;
my @unget = ();
+ my $no_slash_string = $string;
+ $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
return bless {
'string' => $string,
'jobs_running' => 0,
'jobs_completed' => 0,
'maxlength' => undef,
'max_jobs_running' => undef,
+ 'orig_max_jobs_running' => undef,
'ncpus' => $ncpus,
+ 'hostgroups' => \%hostgroups,
'sshcommand' => undef,
'serverlogin' => undef,
'control_path_dir' => undef,
'control_path' => undef,
+ 'time_to_login' => undef,
+ 'last_login_at' => undef,
'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" .
- $$."-".$string,
+ $no_slash_string,
'loadavg' => undef,
'last_loadavg_update' => 0,
'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" .
- $$."-".$string,
+ $no_slash_string,
'swap_activity' => undef,
}, ref($class) || $class;
}
@@ -2068,7 +3932,6 @@ sub string {
sub jobs_running {
my $self = shift;
-
return ($self->{'jobs_running'} || "0");
}
@@ -2082,11 +3945,6 @@ sub dec_jobs_running {
$self->{'jobs_running'}--;
}
-#sub set_jobs_running {
-# my $self = shift;
-# $self->{'jobs_running'} = shift;
-#}
-
sub set_maxlength {
my $self = shift;
$self->{'maxlength'} = shift;
@@ -2102,6 +3960,21 @@ sub jobs_completed {
return $self->{'jobs_completed'};
}
+sub in_hostgroups {
+ # Input:
+ # @hostgroups = the hostgroups to look for
+ # Returns:
+ # true if intersection of @hostgroups and the hostgroups of this
+ # SSHLogin is non-empty
+ my $self = shift;
+ return grep { defined $self->{'hostgroups'}{$_} } @_;
+}
+
+sub hostgroups {
+ my $self = shift;
+ return keys %{$self->{'hostgroups'}};
+}
+
sub inc_jobs_completed {
my $self = shift;
$self->{'jobs_completed'}++;
@@ -2117,6 +3990,78 @@ sub set_max_jobs_running {
# max_jobs_running could be resat if -j is a changed file
$Global::max_jobs_running += $self->{'max_jobs_running'};
}
+ # Initialize orig to the first non-zero value that comes around
+ $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
+}
+
+sub memfree {
+ # Returns:
+ # $memfree in bytes
+ my $self = shift;
+ $self->memfree_recompute();
+ return (not defined $self->{'memfree'} or $self->{'memfree'})
+}
+
+sub memfree_recompute {
+ my $self = shift;
+ my $script = memfreescript();
+
+ # TODO add sshlogin and backgrounding
+ $self->{'memfree'} = qx{ $script };
+ #::debug("mem","New free:",$self->{'memfree'}," ");
+}
+
+{
+ my $script;
+
+ sub memfreescript {
+ # Returns:
+ # shellscript for giving available memory in bytes
+ if(not $script) {
+ my %script_of = (
+ # $ free
+ # total used free shared buffers cached
+ # Mem: 8075152 4922780 3152372 338856 233356 1658604
+ # -/+ buffers/cache: 3030820 5044332
+ # Swap: 8286204 116924 8169280
+ "linux" => q{ print (1024*((grep /buffers.cache/, `free`)[0] =~ /buffers.cache:\s+\S+\s+(\S+)/)[0]) },
+ # $ vmstat 1 1
+ # procs memory page faults cpu
+ # r b w avm free re at pi po fr de sr in sy cs us sy id
+ # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
+ "hpux" => q{ print (((reverse `vmstat 1 1`)[0] =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) },
+ # $ vmstat 1 2
+ # kthr memory page disk faults cpu
+ # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
+ # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
+ # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
+ #
+ # The last free is really free
+ "solaris" => q{ print (((reverse `vmstat 1 2`)[0] =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) },
+ "freebsd" => q{
+ for(qx{/sbin/sysctl -a}) {
+ if (/^([^:]+):\s+(.+)\s*$/s) {
+ $sysctl->{$1} = $2;
+ }
+ }
+ print $sysctl->{"hw.pagesize"} *
+ ($sysctl->{"vm.stats.vm.v_cache_count"}
+ + $sysctl->{"vm.stats.vm.v_inactive_count"}
+ + $sysctl->{"vm.stats.vm.v_free_count"});
+ },
+ );
+ my $perlscript = "";
+ # Make a perl script that detects the OS ($^O) and runs
+ # the appropriate command
+ for my $os (keys %script_of) {
+ $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
+ }
+ $perlscript =~ s/[\t\n ]+/ /g;
+ $perlscript = "perl -e " . ::shell_quote_scalar($perlscript);
+ $script = $Global::envvar. " " .$perlscript;
+ }
+ return $script
+ }
}
sub swapping {
@@ -2134,35 +4079,31 @@ sub swap_activity {
# Should we update the swap_activity file?
my $update_swap_activity_file = 0;
if(-r $self->{'swap_activity_file'}) {
- open(SWAP,"<".$self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r");
- my $swap_out = ;
- close SWAP;
+ open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r");
+ my $swap_out = <$swap_fh>;
+ close $swap_fh;
if($swap_out =~ /^(\d+)$/) {
$self->{'swap_activity'} = $1;
- ::debug("New swap_activity: ".$self->{'swap_activity'});
+ ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
}
- ::debug("Last update: ".$self->{'last_swap_activity_update'});
+ ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
if(time - $self->{'last_swap_activity_update'} > 10) {
# last swap activity update was started 10 seconds ago
- ::debug("Older than 10 sec: ".$self->{'swap_activity_file'});
+ ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
$update_swap_activity_file = 1;
}
} else {
- ::debug("No swap_activity file: ".$self->{'swap_activity_file'});
+ ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
$self->{'swap_activity'} = undef;
$update_swap_activity_file = 1;
}
if($update_swap_activity_file) {
- ::debug("Updating swap_activity file".$self->{'swap_activity_file'});
+ ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
$self->{'last_swap_activity_update'} = time;
-e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
-e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
my $swap_activity;
- # If the (remote) machine is Mac we should use vm_stat
- # swap_in and swap_out on GNU/Linux is $7 and $8
- # swap_in and swap_out on Mac is $10 and $11
- $swap_activity = q[ { vmstat 1 2> /dev/null || vm_stat 1; } | ].
- q[ awk 'NR!=4{next} NF==16{print $7*$8} NF==11{print $10*$11} {exit}' ];
+ $swap_activity = swapactivityscript();
if($self->{'string'} ne ":") {
$swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
::shell_quote_scalar($swap_activity);
@@ -2171,12 +4112,171 @@ sub swap_activity {
# As the command can take long to run if run remote
# save it to a tmp file before moving it to the correct file
my $file = $self->{'swap_activity_file'};
- my $tmpfile = $self->{'swap_activity_file'}.$$;
- qx{ ($swap_activity > $tmpfile; mv $tmpfile $file) & };
+ my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
+ ::debug("swap", "\n", $swap_activity, "\n");
+ qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & };
}
return $self->{'swap_activity'};
}
+{
+ my $script;
+
+ sub swapactivityscript {
+ # Returns:
+ # shellscript for detecting swap activity
+ #
+ # arguments for vmstat are OS dependant
+ # swap_in and swap_out are in different columns depending on OS
+ #
+ if(not $script) {
+ my %vmstat = (
+ # linux: $7*$8
+ # $ vmstat 1 2
+ # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
+ # r b swpd free buff cache si so bi bo in cs us sy id wa
+ # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
+ # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
+ 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
+
+ # solaris: $6*$7
+ # $ vmstat -S 1 2
+ # kthr memory page disk faults cpu
+ # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
+ # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
+ # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
+ 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
+
+ # darwin (macosx): $21*$22
+ # $ vm_stat -c 2 1
+ # Mach Virtual Memory Statistics: (page size of 4096 bytes)
+ # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
+ # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
+ # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
+ 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
+
+ # ultrix: $12*$13
+ # $ vmstat -S 1 2
+ # procs faults cpu memory page disk
+ # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
+ # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
+ # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
+ 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
+
+ # aix: $6*$7
+ # $ vmstat 1 2
+ # System configuration: lcpu=1 mem=2048MB
+ #
+ # kthr memory page faults cpu
+ # ----- ----------- ------------------------ ------------ -----------
+ # r b avm fre re pi po fr sr cy in sy cs us sy id wa
+ # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
+ # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
+ 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
+
+ # freebsd: $8*$9
+ # $ vmstat -H 1 2
+ # procs memory page disks faults cpu
+ # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
+ # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
+ # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
+ 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
+
+ # mirbsd: $8*$9
+ # $ vmstat 1 2
+ # procs memory page disks traps cpu
+ # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
+ # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
+ # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
+ 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
+
+ # netbsd: $7*$8
+ # $ vmstat 1 2
+ # procs memory page disks faults cpu
+ # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
+ # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
+ # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
+ 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
+
+ # openbsd: $8*$9
+ # $ vmstat 1 2
+ # procs memory page disks traps cpu
+ # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
+ # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
+ # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
+ 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
+
+ # hpux: $8*$9
+ # $ vmstat 1 2
+ # procs memory page faults cpu
+ # r b w avm free re at pi po fr de sr in sy cs us sy id
+ # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
+ # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
+ 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
+
+ # dec_osf (tru64): $11*$12
+ # $ vmstat 1 2
+ # Virtual Memory Statistics: (pagesize = 8192)
+ # procs memory pages intr cpu
+ # r w u act free wire fault cow zero react pin pout in sy cs us sy id
+ # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
+ # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
+ 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
+
+ # gnu (hurd): $7*$8
+ # $ vmstat -k 1 2
+ # (pagesize: 4, size: 512288, swap size: 894972)
+ # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
+ # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
+ # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
+ 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
+
+ # -nto (qnx has no swap)
+ #-irix
+ #-svr5 (scosysv)
+ );
+ my $perlscript = "";
+ # Make a perl script that detects the OS ($^O) and runs
+ # the appropriate vmstat command
+ for my $os (keys %vmstat) {
+ $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
+ $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
+ $vmstat{$os}[1] . '}"` }';
+ }
+ $perlscript = "perl -e " . ::shell_quote_scalar($perlscript);
+ $script = $Global::envvar. " " .$perlscript;
+ }
+ return $script;
+ }
+}
+
+sub too_fast_remote_login {
+ my $self = shift;
+ if($self->{'last_login_at'} and $self->{'time_to_login'}) {
+ # sshd normally allows 10 simultaneous logins
+ # A login takes time_to_login
+ # So time_to_login/5 should be safe
+ # If now <= last_login + time_to_login/5: Then it is too soon.
+ my $too_fast = (::now() <= $self->{'last_login_at'}
+ + $self->{'time_to_login'}/5);
+ ::debug("run", "Too fast? $too_fast ");
+ return $too_fast;
+ } else {
+ # No logins so far (or time_to_login not computed): it is not too fast
+ return 0;
+ }
+}
+
+sub last_login_at {
+ my $self = shift;
+ return $self->{'last_login_at'};
+}
+
+sub set_last_login_at {
+ my $self = shift;
+ $self->{'last_login_at'} = shift;
+}
+
sub loadavg_too_high {
my $self = shift;
my $loadavg = $self->loadavg();
@@ -2184,65 +4284,156 @@ sub loadavg_too_high {
$loadavg > $self->max_loadavg());
}
+{
+ my $cmd;
+ sub loadavg_cmd {
+ if(not $cmd) {
+ # aix => "ps -ae -o state,command" # state wrong
+ # bsd => "ps ax -o state,command"
+ # sysv => "ps -ef -o s -o comm"
+ # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
+ # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
+ # awk '{print $2,$1}'
+ # dec_osf => bsd
+ # dragonfly => bsd
+ # freebsd => bsd
+ # gnu => bsd
+ # hpux => ps -el|awk '{print $2,$14,$15}'
+ # irix => ps -ef -o state -o comm
+ # linux => bsd
+ # minix => ps el|awk '{print \$1,\$11}'
+ # mirbsd => bsd
+ # netbsd => bsd
+ # openbsd => bsd
+ # solaris => sysv
+ # svr5 => sysv
+ # ultrix => ps -ax | awk '{print $3,$5}'
+ # unixware => ps -el|awk '{print $2,$14,$15}'
+ my $ps = q{
+ $sysv="ps -ef -o s -o comm";
+ $sysv2="ps -ef -o state -o comm";
+ $bsd="ps ax -o state,command";
+ $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
+ $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
+ /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
+ awk '{print $2,$1}' };
+ $dummy="echo S COMMAND;echo R dummy";
+ %ps=(
+ 'aix' => "uptime",
+ 'cygwin' => $cygwin,
+ 'darwin' => $bsd,
+ 'dec_osf' => $sysv2,
+ 'dragonfly' => $bsd,
+ 'freebsd' => $bsd,
+ 'gnu' => $bsd,
+ 'hpux' => $psel,
+ 'irix' => $sysv2,
+ 'linux' => $bsd,
+ 'minix' => "ps el|awk '{print \$1,\$11}'",
+ 'mirbsd' => $bsd,
+ 'msys' => $sysv,
+ 'MSWin32' => $sysv,
+ 'netbsd' => $bsd,
+ 'nto' => $dummy,
+ 'openbsd' => $bsd,
+ 'solaris' => $sysv,
+ 'svr5' => $psel,
+ 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
+ );
+ print `$ps{$^O}`;
+ };
+ $ps =~ s/[ \t\n]+/ /g;
+ $cmd = "perl -e ".::shell_quote_scalar($ps);
+ }
+ return $cmd;
+ }
+}
+
+
sub loadavg {
# If the currently know loadavg is too old:
# Recompute a new one in the background
+ # The load average is computed as the number of processes waiting for disk
+ # or CPU right now. So it is the server load this instant and not averaged over
+ # several minutes. This is needed so GNU Parallel will at most start one job
+ # that will push the load over the limit.
+ #
# Returns:
- # last load average computed
+ # $last_loadavg = last load average computed (undef if none)
my $self = shift;
# Should we update the loadavg file?
my $update_loadavg_file = 0;
- if(-r $self->{'loadavg_file'}) {
- open(UPTIME,"<".$self->{'loadavg_file'}) || ::die_bug("loadavg_file-r");
+ if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
local $/ = undef;
- my $uptime_out = ;
- close UPTIME;
- # load average: 0.76, 1.53, 1.45
- if($uptime_out =~ /load average: (\d+.\d+)/) {
- $self->{'loadavg'} = $1;
- ::debug("New loadavg: ".$self->{'loadavg'});
- } else {
- ::die_bug("loadavg_invalid_content: $uptime_out");
+ my $load_out = <$load_fh>;
+ close $load_fh;
+ # Count lines starting with D,O,R but command does not start with [
+ my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
+ if($load > 0) {
+ # load is overestimated by 1
+ $self->{'loadavg'} = $load - 1;
+ ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
+ } elsif ($load_out=~/average: (\d+.\d+)/) {
+ # AIX does not support instant load average
+ # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
+ $self->{'loadavg'} = $1;
+ } else {
+ ::die_bug("loadavg_invalid_content: " .
+ $self->{'loadavg_file'} . "\n$load_out");
}
- ::debug("Last update: ".$self->{'last_loadavg_update'});
- if(time - $self->{'last_loadavg_update'} > 10) {
- # last loadavg was started 10 seconds ago
- ::debug("Older than 10 sec: ".$self->{'loadavg_file'});
+ # Because of instant load average, it should not be delayed 10 secs
+ # The instant load does not give 2 R if there is only 1 cpu.
+# ::debug("load", "Last update: ", $self->{'last_loadavg_update'});
+# if(time - $self->{'last_loadavg_update'} > 10) {
+# # last loadavg was started 10 seconds ago
+# ::debug("load", time - $self->{'last_loadavg_update'}, " secs old: ",
+# $self->{'loadavg_file'});
$update_loadavg_file = 1;
- }
+# }
} else {
- ::debug("No loadavg file: ".$self->{'loadavg_file'});
+ ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
$self->{'loadavg'} = undef;
$update_loadavg_file = 1;
}
if($update_loadavg_file) {
- ::debug("Updating loadavg file".$self->{'loadavg_file'}."\n");
+ ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
$self->{'last_loadavg_update'} = time;
-e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
-e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
- my $uptime;
- if($self->{'string'} eq ":") {
- $uptime = "LANG=C uptime";
- } else {
- $uptime = $self->sshcommand() . " " . $self->serverlogin() . " LANG=C uptime";
- }
- # Run uptime.
+ my $cmd = "";
+ if($self->{'string'} ne ":") {
+ $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
+ ::shell_quote_scalar(loadavg_cmd());
+ } else {
+ $cmd .= loadavg_cmd();
+ }
# As the command can take long to run if run remote
# save it to a tmp file before moving it to the correct file
+ ::debug("load", "Cmd: ", $cmd);
my $file = $self->{'loadavg_file'};
- my $tmpfile = $self->{'loadavg_file'}.$$;
- qx{ ($uptime > $tmpfile && mv $tmpfile $file) & };
+ my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa");
+ qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & };
}
return $self->{'loadavg'};
}
sub max_loadavg {
my $self = shift;
+ # If --load is a file it might be changed
+ if($Global::max_load_file) {
+ my $mtime = (stat($Global::max_load_file))[9];
+ if($mtime > $Global::max_load_file_last_mod) {
+ $Global::max_load_file_last_mod = $mtime;
+ for my $sshlogin (values %Global::host) {
+ $sshlogin->set_max_loadavg(undef);
+ }
+ }
+ }
if(not defined $self->{'max_loadavg'}) {
$self->{'max_loadavg'} =
- $self->compute_max_loadavg($::opt_load);
+ $self->compute_max_loadavg($opt::load);
}
- ::debug("max_loadavg: ".$self->string()." ".$self->{'max_loadavg'});
+ ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
return $self->{'max_loadavg'};
}
@@ -2278,16 +4469,16 @@ sub compute_max_loadavg {
} elsif (-f $loadspec) {
$Global::max_load_file = $loadspec;
$Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
- if(open(IN, $Global::max_load_file)) {
- my $opt_load_file = join("",);
- close IN;
+ if(open(my $in_fh, "<", $Global::max_load_file)) {
+ my $opt_load_file = join("",<$in_fh>);
+ close $in_fh;
$load = $self->compute_max_loadavg($opt_load_file);
} else {
- print $Global::original_stderr "Cannot open $loadspec\n";
- exit(255);
+ ::error("Cannot open $loadspec.\n");
+ ::wait_and_exit(255);
}
} else {
- print $Global::original_stderr "Parsing of --load failed\n";
+ ::error("Parsing of --load failed.\n");
::die_usage();
}
if($load < 0.01) {
@@ -2297,14 +4488,30 @@ sub compute_max_loadavg {
return $load;
}
+sub time_to_login {
+ my $self = shift;
+ return $self->{'time_to_login'};
+}
+
+sub set_time_to_login {
+ my $self = shift;
+ $self->{'time_to_login'} = shift;
+}
+
sub max_jobs_running {
my $self = shift;
if(not defined $self->{'max_jobs_running'}) {
- $self->set_max_jobs_running($self->compute_number_of_processes($::opt_P));
+ my $nproc = $self->compute_number_of_processes($opt::jobs);
+ $self->set_max_jobs_running($nproc);
}
return $self->{'max_jobs_running'};
}
+sub orig_max_jobs_running {
+ my $self = shift;
+ return $self->{'orig_max_jobs_running'};
+}
+
sub compute_number_of_processes {
# Number of processes wanted and limited by system resources
# Returns:
@@ -2315,150 +4522,203 @@ sub compute_number_of_processes {
if(not defined $wanted_processes) {
$wanted_processes = $Global::default_simultaneous_sshlogins;
}
- ::debug("Wanted procs: $wanted_processes\n");
+ ::debug("load", "Wanted procs: $wanted_processes\n");
my $system_limit =
$self->processes_available_by_system_limit($wanted_processes);
- $system_limit < 1 and ::die_bug('$system_limit < 1');
- ::debug("Limited to procs: $system_limit\n");
+ ::debug("load", "Limited to procs: $system_limit\n");
return $system_limit;
}
-sub processes_available_by_system_limit {
- # If the wanted number of processes is bigger than the system limits:
- # Limit them to the system limits
- # Limits are: File handles, number of input lines, processes,
- # and taking > 1 second to spawn 10 extra processes
- # Returns:
- # Number of processes
- my $self = shift;
- my $wanted_processes = shift;
-
- my $system_limit = 0;
- my @jobs = ();
- my $job;
- my @args = ();
- my $arg;
- my $more_filehandles = 1;
- my $max_system_proc_reached = 0;
- my $slow_spawining_warning_printed = 0;
- my $time = time;
- my %fh;
+{
my @children;
+ my $max_system_proc_reached;
+ my $more_filehandles;
+ my %fh;
+ my $tmpfhname;
+ my $count_jobs_already_read;
+ my @jobs;
+ my $job;
+ my @args;
+ my $arg;
- # Reserve filehandles
- # perl uses 7 filehandles for something?
- # parallel uses 1 for memory_usage
- for my $i (1..8) {
- open($fh{"init-$i"},"next_seq();
- my $wait_time_for_getting_args = 0;
- my $start_time = time;
- while(1) {
- $system_limit >= $wanted_processes and last;
- not $more_filehandles and last;
- $max_system_proc_reached and last;
- my $before_getting_arg = time;
- if($Global::semaphore) {
- # Skip
- } elsif(defined $::opt_retries and $count_jobs_already_read) {
- # For retries we may need to run all jobs on this sshlogin
- # so include the already read jobs for this sshlogin
- $count_jobs_already_read--;
- } else {
- if($::opt_X or $::opt_m) {
- # The arguments may have to be re-spread over several jobslots
- # So pessimistically only read one arg per jobslot
- # instead of a full commandline
- if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
- if($Global::JobQueue->empty()) {
- last;
- } else {
- ($job) = $Global::JobQueue->get();
- push(@jobs, $job);
- }
- } else {
- ($arg) = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
- push(@args, $arg);
- }
- } else {
- # If there are no more command lines, then we have a process
- # per command line, so no need to go further
- $Global::JobQueue->empty() and last;
- ($job) = $Global::JobQueue->get();
- push(@jobs, $job);
- }
- }
- $wait_time_for_getting_args += time - $before_getting_arg;
- $system_limit++;
- # Every simultaneous process uses 2 filehandles when grouping
- $more_filehandles = open($fh{$system_limit*10}," 10 and
- $forktime > 1 and
- $forktime > $system_limit * 0.01
- and not $slow_spawining_warning_printed) {
- # It took more than 0.01 second to fork a processes on avg.
- # Give the user a warning. He can press Ctrl-C if this
- # sucks.
- print $Global::original_stderr
- ("parallel: Warning: Starting $system_limit processes took > $forktime sec.\n",
- "Consider adjusting -j. Press CTRL-C to stop.\n");
- $slow_spawining_warning_printed = 1;
- }
+ }
}
- if($system_limit < $wanted_processes and not $more_filehandles) {
- print $Global::original_stderr
- ("parallel: Warning: Only enough filehandles to run ",
- $system_limit, " jobs in parallel. ",
- "Raising ulimit -n may help.\n");
+
+ sub get_args_or_jobs {
+ # Get an arg or a job (depending on mode)
+ if($Global::semaphore or $opt::pipe) {
+ # Skip: No need to get args
+ return 1;
+ } elsif(defined $opt::retries and $count_jobs_already_read) {
+ # For retries we may need to run all jobs on this sshlogin
+ # so include the already read jobs for this sshlogin
+ $count_jobs_already_read--;
+ return 1;
+ } else {
+ if($opt::X or $opt::m) {
+ # The arguments may have to be re-spread over several jobslots
+ # So pessimistically only read one arg per jobslot
+ # instead of a full commandline
+ if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
+ if($Global::JobQueue->empty()) {
+ return 0;
+ } else {
+ $job = $Global::JobQueue->get();
+ push(@jobs, $job);
+ return 1;
+ }
+ } else {
+ $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
+ push(@args, $arg);
+ return 1;
+ }
+ } else {
+ # If there are no more command lines, then we have a process
+ # per command line, so no need to go further
+ if($Global::JobQueue->empty()) {
+ return 0;
+ } else {
+ $job = $Global::JobQueue->get();
+ push(@jobs, $job);
+ return 1;
+ }
+ }
+ }
}
- if($system_limit < $wanted_processes and $max_system_proc_reached) {
- print $Global::original_stderr
- ("parallel: Warning: Only enough available processes to run ",
- $system_limit, " jobs in parallel.\n");
+
+ sub cleanup {
+ # Cleanup: Close the files
+ for (values %fh) { close $_ }
+ # Cleanup: Kill the children
+ for my $pid (@children) {
+ kill 9, $pid;
+ waitpid($pid,0);
+ delete $Global::unkilled_children{$pid};
+ }
+ # Cleanup: Unget the command_lines or the @args
+ $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
+ $Global::JobQueue->unget(@jobs);
+ @jobs = undef;
}
- if($Global::JobQueue->empty()) {
- $system_limit ||= 1;
+
+ sub processes_available_by_system_limit {
+ # If the wanted number of processes is bigger than the system limits:
+ # Limit them to the system limits
+ # Limits are: File handles, number of input lines, processes,
+ # and taking > 1 second to spawn 10 extra processes
+ # Returns:
+ # Number of processes
+ my $self = shift;
+ my $wanted_processes = shift;
+ my $system_limit = 0;
+ my $slow_spawining_warning_printed = 0;
+ my $time = time;
+ $more_filehandles = 1;
+ $tmpfhname = "TmpFhNamE";
+
+ # perl uses 7 filehandles for something?
+ # parallel uses 1 for memory_usage
+ # parallel uses 4 for ?
+ reserve_filehandles(12);
+ # Two processes for load avg and ?
+ reserve_process();
+ reserve_process();
+
+ # For --retries count also jobs already run
+ $count_jobs_already_read = $Global::JobQueue->next_seq();
+ my $wait_time_for_getting_args = 0;
+ my $start_time = time;
+ while(1) {
+ $system_limit >= $wanted_processes and last;
+ not $more_filehandles and last;
+ $max_system_proc_reached and last;
+
+ my $before_getting_arg = time;
+ get_args_or_jobs() or last;
+ $wait_time_for_getting_args += time - $before_getting_arg;
+ $system_limit++;
+
+ # Every simultaneous process uses 2 filehandles to write to
+ # and 2 filehandles to read from
+ reserve_filehandles(4);
+
+ # System process limit
+ reserve_process();
+
+ my $forktime = time - $time - $wait_time_for_getting_args;
+ ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
+ $forktime,
+ " (processes so far: ", $system_limit,")\n");
+ if($system_limit > 10 and
+ $forktime > 1 and
+ $forktime > $system_limit * 0.01
+ and not $slow_spawining_warning_printed) {
+ # It took more than 0.01 second to fork a processes on avg.
+ # Give the user a warning. He can press Ctrl-C if this
+ # sucks.
+ ::warning("Starting $system_limit processes took > $forktime sec.\n",
+ "Consider adjusting -j. Press CTRL-C to stop.\n");
+ $slow_spawining_warning_printed = 1;
+ }
+ }
+ cleanup();
+
+ if($system_limit < $wanted_processes) {
+ # The system_limit is less than the wanted_processes
+ if($system_limit < 1 and not $Global::JobQueue->empty()) {
+ ::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf\n",
+ "or /proc/sys/kernel/pid_max may help.\n");
+ ::wait_and_exit(255);
+ }
+ if(not $more_filehandles) {
+ ::warning("Only enough file handles to run ", $system_limit, " jobs in parallel.\n",
+ "Running 'parallel -j0 -N", $system_limit, " --pipe parallel -j0' or ",
+ "raising ulimit -n or /etc/security/limits.conf may help.\n");
+ }
+ if($max_system_proc_reached) {
+ ::warning("Only enough available processes to run ", $system_limit,
+ " jobs in parallel. Raising ulimit -u or /etc/security/limits.conf\n",
+ "or /proc/sys/kernel/pid_max may help.\n");
+ }
+ }
+ if($] == 5.008008 and $system_limit > 1000) {
+ # https://savannah.gnu.org/bugs/?36942
+ $system_limit = 1000;
+ }
+ if($Global::JobQueue->empty()) {
+ $system_limit ||= 1;
+ }
+ if($self->string() ne ":" and
+ $system_limit > $Global::default_simultaneous_sshlogins) {
+ $system_limit =
+ $self->simultaneous_sshlogin_limit($system_limit);
+ }
+ return $system_limit;
}
- # Cleanup: Close the files
- for (values %fh) { close $_ }
- # Cleanup: Kill the children
- for my $pid (@children) {
- kill 9, $pid;
- waitpid($pid,0);
- delete $Global::unkilled_children{$pid};
- }
- #wait();
- # Cleanup: Unget the command_lines or the @args
- $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
- $Global::JobQueue->unget(@jobs);
- if($self->string() ne ":" and
- $system_limit > $Global::default_simultaneous_sshlogins) {
- $system_limit =
- $self->simultaneous_sshlogin_limit($system_limit);
- }
- return $system_limit;
}
sub simultaneous_sshlogin_limit {
@@ -2467,20 +4727,23 @@ sub simultaneous_sshlogin_limit {
# min($wanted_processes,$working_simultaneous_ssh_logins-1)
my $self = shift;
my $wanted_processes = shift;
+ if($self->{'time_to_login'}) {
+ return $wanted_processes;
+ }
+
# Try twice because it guesses wrong sometimes
# Choose the minimal
my $ssh_limit =
::min($self->simultaneous_sshlogin($wanted_processes),
- $self->simultaneous_sshlogin($wanted_processes));
+ $self->simultaneous_sshlogin($wanted_processes));
if($ssh_limit < $wanted_processes) {
my $serverlogin = $self->serverlogin();
- print $Global::original_stderr
- ("parallel: Warning: ssh to $serverlogin only allows ",
- "for $ssh_limit simultaneous logins.\n",
- "You may raise this by changing ",
- "/etc/ssh/sshd_config:MaxStartup on $serverlogin\n",
- "Using only ",$ssh_limit-1," connections ",
- "to avoid race conditions\n");
+ ::warning("ssh to $serverlogin only allows ",
+ "for $ssh_limit simultaneous logins.\n",
+ "You may raise this by changing ",
+ "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.\n",
+ "Using only ",$ssh_limit-1," connections ",
+ "to avoid race conditions.\n");
}
# Race condition can cause problem if using all sshs.
if($ssh_limit > 1) { $ssh_limit -= 1; }
@@ -2497,12 +4760,14 @@ sub simultaneous_sshlogin {
my $wanted_processes = shift;
my $sshcmd = $self->sshcommand();
my $serverlogin = $self->serverlogin();
- my $cmd = "$sshcmd $serverlogin echo simultaneouslogin 2>&1 &"x$wanted_processes;
- ::debug("Trying $wanted_processes logins at $serverlogin");
- open (SIMUL, "($cmd)|grep simultaneouslogin | wc -l|") or
+ my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
+ # TODO sh -c wrapper to work for csh
+ my $cmd = "$sshdelay$sshcmd $serverlogin echo simultaneouslogin &1 &"x$wanted_processes;
+ ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
+ open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
::die_bug("simultaneouslogin");
- my $ssh_limit = ;
- close SIMUL;
+ my $ssh_limit = <$simul_fh>;
+ close $simul_fh;
chomp $ssh_limit;
return $ssh_limit;
}
@@ -2530,7 +4795,8 @@ sub user_requested_processes {
my $j = $1;
$processes =
$self->ncpus() - $j;
- } elsif ($opt_P =~ /^(\d+)\%$/) {
+ } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
+ # E.g. -P 10.5%
my $j = $1;
$processes =
$self->ncpus() * $j / 100;
@@ -2542,22 +4808,19 @@ sub user_requested_processes {
}
} elsif (-f $opt_P) {
$Global::max_procs_file = $opt_P;
- $Global::max_procs_file_last_mod = (stat($Global::max_procs_file))[9];
- if(open(IN, $Global::max_procs_file)) {
- my $opt_P_file = join("",);
- close IN;
+ if(open(my $in_fh, "<", $Global::max_procs_file)) {
+ my $opt_P_file = join("",<$in_fh>);
+ close $in_fh;
$processes = $self->user_requested_processes($opt_P_file);
} else {
- print $Global::original_stderr "Cannot open $opt_P\n";
- exit(255);
+ ::error("Cannot open $opt_P.\n");
+ ::wait_and_exit(255);
}
} else {
- print $Global::original_stderr "Parsing of --jobs/-j/--max-procs/-P failed\n";
+ ::error("Parsing of --jobs/-j/--max-procs/-P failed.\n");
::die_usage();
}
- if($processes < 1) {
- $processes = 1;
- }
+ $processes = ::ceil($processes);
}
return $processes;
}
@@ -2568,25 +4831,26 @@ sub ncpus {
my $sshcmd = $self->sshcommand();
my $serverlogin = $self->serverlogin();
if($serverlogin eq ":") {
- if($::opt_use_cpus_instead_of_cores) {
+ if($opt::use_cpus_instead_of_cores) {
$self->{'ncpus'} = no_of_cpus();
} else {
$self->{'ncpus'} = no_of_cores();
}
} else {
my $ncpu;
- if($::opt_use_cpus_instead_of_cores) {
- $ncpu = qx(echo|$sshcmd $serverlogin parallel --number-of-cpus);
+ my $sqe = ::shell_quote_scalar($Global::envvar);
+ if($opt::use_cpus_instead_of_cores) {
+ $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cpus);
} else {
- $ncpu = qx(echo|$sshcmd $serverlogin parallel --number-of-cores);
+ ::debug("init",qq(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores\n));
+ $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores);
}
chomp $ncpu;
if($ncpu =~ /^\s*[0-9]+\s*$/s) {
$self->{'ncpus'} = $ncpu;
} else {
- print $Global::original_stderr
- ("parallel: Warning: Could not figure out ",
- "number of cpus on $serverlogin ($ncpu). Using 1\n");
+ ::warning("Could not figure out ",
+ "number of cpus on $serverlogin ($ncpu). Using 1.\n");
$self->{'ncpus'} = 1;
}
}
@@ -2597,31 +4861,57 @@ sub ncpus {
sub no_of_cpus {
# Returns:
# Number of physical CPUs
- local $/="\n"; # If delimiter is set, then $/ will be wrong
+ local $/ = "\n"; # If delimiter is set, then $/ will be wrong
my $no_of_cpus;
if ($^O eq 'linux') {
- $no_of_cpus = no_of_cpus_gnu_linux();
+ $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux();
} elsif ($^O eq 'freebsd') {
$no_of_cpus = no_of_cpus_freebsd();
+ } elsif ($^O eq 'netbsd') {
+ $no_of_cpus = no_of_cpus_netbsd();
+ } elsif ($^O eq 'openbsd') {
+ $no_of_cpus = no_of_cpus_openbsd();
+ } elsif ($^O eq 'gnu') {
+ $no_of_cpus = no_of_cpus_hurd();
+ } elsif ($^O eq 'darwin') {
+ $no_of_cpus = no_of_cpus_darwin();
} elsif ($^O eq 'solaris') {
$no_of_cpus = no_of_cpus_solaris();
} elsif ($^O eq 'aix') {
$no_of_cpus = no_of_cpus_aix();
- } elsif ($^O eq 'darwin') {
- $no_of_cpus = no_of_cpus_darwin();
+ } elsif ($^O eq 'hpux') {
+ $no_of_cpus = no_of_cpus_hpux();
+ } elsif ($^O eq 'nto') {
+ $no_of_cpus = no_of_cpus_qnx();
+ } elsif ($^O eq 'svr5') {
+ $no_of_cpus = no_of_cpus_openserver();
+ } elsif ($^O eq 'irix') {
+ $no_of_cpus = no_of_cpus_irix();
+ } elsif ($^O eq 'dec_osf') {
+ $no_of_cpus = no_of_cpus_tru64();
} else {
- $no_of_cpus = (no_of_cpus_freebsd()
+ $no_of_cpus = (no_of_cpus_gnu_linux()
+ || no_of_cpus_freebsd()
+ || no_of_cpus_netbsd()
+ || no_of_cpus_openbsd()
+ || no_of_cpus_hurd()
|| no_of_cpus_darwin()
|| no_of_cpus_solaris()
|| no_of_cpus_aix()
- || no_of_cpus_gnu_linux()
+ || no_of_cpus_hpux()
+ || no_of_cpus_qnx()
+ || no_of_cpus_openserver()
+ || no_of_cpus_irix()
+ || no_of_cpus_tru64()
+ # Number of cores is better than no guess for #CPUs
+ || nproc()
);
}
if($no_of_cpus) {
chomp $no_of_cpus;
return $no_of_cpus;
} else {
- warn("parallel: Cannot figure out number of cpus. Using 1");
+ ::warning("Cannot figure out number of cpus. Using 1.\n");
return 1;
}
}
@@ -2629,52 +4919,102 @@ sub no_of_cpus {
sub no_of_cores {
# Returns:
# Number of CPU cores
- local $/="\n"; # If delimiter is set, then $/ will be wrong
+ local $/ = "\n"; # If delimiter is set, then $/ will be wrong
my $no_of_cores;
if ($^O eq 'linux') {
$no_of_cores = no_of_cores_gnu_linux();
} elsif ($^O eq 'freebsd') {
$no_of_cores = no_of_cores_freebsd();
+ } elsif ($^O eq 'netbsd') {
+ $no_of_cores = no_of_cores_netbsd();
+ } elsif ($^O eq 'openbsd') {
+ $no_of_cores = no_of_cores_openbsd();
+ } elsif ($^O eq 'gnu') {
+ $no_of_cores = no_of_cores_hurd();
+ } elsif ($^O eq 'darwin') {
+ $no_of_cores = no_of_cores_darwin();
} elsif ($^O eq 'solaris') {
$no_of_cores = no_of_cores_solaris();
} elsif ($^O eq 'aix') {
$no_of_cores = no_of_cores_aix();
- } elsif ($^O eq 'darwin') {
- $no_of_cores = no_of_cores_darwin();
+ } elsif ($^O eq 'hpux') {
+ $no_of_cores = no_of_cores_hpux();
+ } elsif ($^O eq 'nto') {
+ $no_of_cores = no_of_cores_qnx();
+ } elsif ($^O eq 'svr5') {
+ $no_of_cores = no_of_cores_openserver();
+ } elsif ($^O eq 'irix') {
+ $no_of_cores = no_of_cores_irix();
+ } elsif ($^O eq 'dec_osf') {
+ $no_of_cores = no_of_cores_tru64();
} else {
- $no_of_cores = (no_of_cores_freebsd()
+ $no_of_cores = (no_of_cores_gnu_linux()
+ || no_of_cores_freebsd()
+ || no_of_cores_netbsd()
+ || no_of_cores_openbsd()
+ || no_of_cores_hurd()
|| no_of_cores_darwin()
|| no_of_cores_solaris()
|| no_of_cores_aix()
- || no_of_cores_gnu_linux()
+ || no_of_cores_hpux()
+ || no_of_cores_qnx()
+ || no_of_cores_openserver()
+ || no_of_cores_irix()
+ || no_of_cores_tru64()
+ || nproc()
);
}
if($no_of_cores) {
chomp $no_of_cores;
return $no_of_cores;
} else {
- warn("parallel: Cannot figure out number of CPU cores. Using 1");
+ ::warning("Cannot figure out number of CPU cores. Using 1.\n");
return 1;
}
}
+sub nproc {
+ # Returns:
+ # Number of cores using `nproc`
+ my $no_of_cores = qx{ sh -c 'nproc 2>/dev/null' };
+ return $no_of_cores;
+}
+
sub no_of_cpus_gnu_linux {
# Returns:
# Number of physical CPUs on GNU/Linux
# undef if not GNU/Linux
my $no_of_cpus;
+ my $no_of_cores;
+ my $no_of_active_cores;
if(-e "/proc/cpuinfo") {
$no_of_cpus = 0;
+ $no_of_cores = 0;
my %seen;
- open(IN,"cat /proc/cpuinfo|") || return undef;
- while() {
- if(/^physical id.*[:](.*)/ and not $seen{$1}++) {
- $no_of_cpus++;
- }
- }
- close IN;
+ if(open(my $in_fh, "<", "/proc/cpuinfo")) {
+ while(<$in_fh>) {
+ if(/^physical id.*[:](.*)/ and not $seen{$1}++) {
+ $no_of_cpus++;
+ }
+ /^processor.*[:]/i and $no_of_cores++;
+ }
+ close $in_fh;
+ }
}
- return $no_of_cpus;
+ if(-e "/proc/self/status") {
+ # if 'taskset' is used to limit number of cores
+ if(open(my $in_fh, "<", "/proc/self/status")) {
+ while(<$in_fh>) {
+ if(/^Cpus_allowed:\s*(\S+)/) {
+ my $a = $1;
+ $a =~ tr/,//d;
+ $no_of_active_cores = unpack ("%32b*", pack ("H*",$a));
+ }
+ }
+ close $in_fh;
+ }
+ }
+ return (::min($no_of_cpus || $no_of_cores,$no_of_active_cores));
}
sub no_of_cores_gnu_linux {
@@ -2682,37 +5022,29 @@ sub no_of_cores_gnu_linux {
# Number of CPU cores on GNU/Linux
# undef if not GNU/Linux
my $no_of_cores;
+ my $no_of_active_cores;
if(-e "/proc/cpuinfo") {
$no_of_cores = 0;
- open(IN,"cat /proc/cpuinfo|") || return undef;
- while() {
- /^processor.*[:]/ and $no_of_cores++;
+ open(my $in_fh, "<", "/proc/cpuinfo") || return undef;
+ while(<$in_fh>) {
+ /^processor.*[:]/i and $no_of_cores++;
}
- close IN;
+ close $in_fh;
}
- return $no_of_cores;
-}
-
-sub no_of_cpus_darwin {
- # Returns:
- # Number of physical CPUs on Mac Darwin
- # undef if not Mac Darwin
- my $no_of_cpus =
- (`sysctl -n hw.physicalcpu 2>/dev/null`
- or
- `sysctl -a hw 2>/dev/null | grep -w physicalcpu | awk '{ print \$2 }'`);
- return $no_of_cpus;
-}
-
-sub no_of_cores_darwin {
- # Returns:
- # Number of CPU cores on Mac Darwin
- # undef if not Mac Darwin
- my $no_of_cores =
- (`sysctl -n hw.logicalcpu 2>/dev/null`
- or
- `sysctl -a hw 2>/dev/null | grep -w logicalcpu | awk '{ print \$2 }'`);
- return $no_of_cores;
+ if(-e "/proc/self/status") {
+ # if 'taskset' is used to limit number of cores
+ if(open(my $in_fh, "<", "/proc/self/status")) {
+ while(<$in_fh>) {
+ if(/^Cpus_allowed:\s*(\S+)/) {
+ my $a = $1;
+ $a =~ tr/,//d;
+ $no_of_active_cores = unpack ("%32b*", pack ("H*",$a));
+ }
+ }
+ close $in_fh;
+ }
+ }
+ return (::min($no_of_cores,$no_of_active_cores));
}
sub no_of_cpus_freebsd {
@@ -2720,9 +5052,9 @@ sub no_of_cpus_freebsd {
# Number of physical CPUs on FreeBSD
# undef if not FreeBSD
my $no_of_cpus =
- (`sysctl -a dev.cpu 2>/dev/null | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }'`
+ (qx{ sh -c 'sysctl -a dev.cpu 2>/dev/null' | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }
or
- `sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`);
+ qx{ sh -c 'sysctl hw.ncpu 2>/dev/null' | awk '{ print \$2 }' });
chomp $no_of_cpus;
return $no_of_cpus;
}
@@ -2732,13 +5064,89 @@ sub no_of_cores_freebsd {
# Number of CPU cores on FreeBSD
# undef if not FreeBSD
my $no_of_cores =
- (`sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`
+ (qx{ sh -c 'sysctl hw.ncpu 2>/dev/null' | awk '{ print \$2 }' }
or
- `sysctl -a hw 2>/dev/null | grep -w logicalcpu | awk '{ print \$2 }'`);
+ qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' });
chomp $no_of_cores;
return $no_of_cores;
}
+sub no_of_cpus_netbsd {
+ # Returns:
+ # Number of physical CPUs on NetBSD
+ # undef if not NetBSD
+ my $no_of_cpus = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' };
+ chomp $no_of_cpus;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_netbsd {
+ # Returns:
+ # Number of CPU cores on NetBSD
+ # undef if not NetBSD
+ my $no_of_cores = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' };
+ chomp $no_of_cores;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_openbsd {
+ # Returns:
+ # Number of physical CPUs on OpenBSD
+ # undef if not OpenBSD
+ my $no_of_cpus = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' };
+ chomp $no_of_cpus;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_openbsd {
+ # Returns:
+ # Number of CPU cores on OpenBSD
+ # undef if not OpenBSD
+ my $no_of_cores = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' };
+ chomp $no_of_cores;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_hurd {
+ # Returns:
+ # Number of physical CPUs on HURD
+ # undef if not HURD
+ my $no_of_cpus = qx{ nproc };
+ chomp $no_of_cpus;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_hurd {
+ # Returns:
+ # Number of physical CPUs on HURD
+ # undef if not HURD
+ my $no_of_cores = `nproc`;
+ chomp $no_of_cores;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_darwin {
+ # Returns:
+ # Number of physical CPUs on Mac Darwin
+ # undef if not Mac Darwin
+ my $no_of_cpus =
+ (qx{ sh -c 'sysctl -n hw.physicalcpu 2>/dev/null' }
+ or
+ qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' });
+ return $no_of_cpus;
+}
+
+sub no_of_cores_darwin {
+ # Returns:
+ # Number of CPU cores on Mac Darwin
+ # undef if not Mac Darwin
+ my $no_of_cores =
+ (qx{ sh -c 'sysctl -n hw.logicalcpu 2>/dev/null' }
+ or
+ qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' });
+ return $no_of_cores;
+}
+
sub no_of_cpus_solaris {
# Returns:
# Number of physical CPUs on Solaris
@@ -2750,7 +5158,7 @@ sub no_of_cpus_solaris {
}
}
if(-x "/usr/sbin/prtconf") {
- my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`;
+ my @prtconf = qx{ /usr/sbin/prtconf | grep cpu..instance };
if($#prtconf >= 0) {
return $#prtconf +1;
}
@@ -2769,7 +5177,7 @@ sub no_of_cores_solaris {
}
}
if(-x "/usr/sbin/prtconf") {
- my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`;
+ my @prtconf = qx{ /usr/sbin/prtconf | grep cpu..instance };
if($#prtconf >= 0) {
return $#prtconf +1;
}
@@ -2783,11 +5191,11 @@ sub no_of_cpus_aix {
# undef if not AIX
my $no_of_cpus = 0;
if(-x "/usr/sbin/lscfg") {
- open(IN,"/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' ' |")
+ open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")
|| return undef;
- $no_of_cpus = ;
+ $no_of_cpus = <$in_fh>;
chomp ($no_of_cpus);
- close IN;
+ close $in_fh;
}
return $no_of_cpus;
}
@@ -2798,15 +5206,111 @@ sub no_of_cores_aix {
# undef if not AIX
my $no_of_cores;
if(-x "/usr/bin/vmstat") {
- open(IN,"/usr/bin/vmstat 1 1|") || return undef;
- while() {
+ open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef;
+ while(<$in_fh>) {
/lcpu=([0-9]*) / and $no_of_cores = $1;
}
- close IN;
+ close $in_fh;
}
return $no_of_cores;
}
+sub no_of_cpus_hpux {
+ # Returns:
+ # Number of physical CPUs on HP-UX
+ # undef if not HP-UX
+ my $no_of_cpus =
+ qx{ sh -c '/usr/bin/mpsched -s 2>&1' | grep 'Locality Domain Count' | awk '{ print \$4 }'};
+ return $no_of_cpus;
+}
+
+sub no_of_cores_hpux {
+ # Returns:
+ # Number of CPU cores on HP-UX
+ # undef if not HP-UX
+ my $no_of_cores =
+ qx{ sh -c '/usr/bin/mpsched -s 2>&1' | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1\n"'};
+ return $no_of_cores;
+}
+
+sub no_of_cpus_qnx {
+ # Returns:
+ # Number of physical CPUs on QNX
+ # undef if not QNX
+ # BUG: It is not known how to calculate this.
+ my $no_of_cpus = 0;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_qnx {
+ # Returns:
+ # Number of CPU cores on QNX
+ # undef if not QNX
+ # BUG: It is not known how to calculate this.
+ my $no_of_cores = 0;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_openserver {
+ # Returns:
+ # Number of physical CPUs on SCO OpenServer
+ # undef if not SCO OpenServer
+ my $no_of_cpus = 0;
+ if(-x "/usr/sbin/psrinfo") {
+ my @psrinfo = `/usr/sbin/psrinfo`;
+ if($#psrinfo >= 0) {
+ return $#psrinfo +1;
+ }
+ }
+ return $no_of_cpus;
+}
+
+sub no_of_cores_openserver {
+ # Returns:
+ # Number of CPU cores on SCO OpenServer
+ # undef if not SCO OpenServer
+ my $no_of_cores = 0;
+ if(-x "/usr/sbin/psrinfo") {
+ my @psrinfo = `/usr/sbin/psrinfo`;
+ if($#psrinfo >= 0) {
+ return $#psrinfo +1;
+ }
+ }
+ return $no_of_cores;
+}
+
+sub no_of_cpus_irix {
+ # Returns:
+ # Number of physical CPUs on IRIX
+ # undef if not IRIX
+ my $no_of_cpus = `hinv | grep HZ | grep Processor | awk '{print \$1}'`;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_irix {
+ # Returns:
+ # Number of CPU cores on IRIX
+ # undef if not IRIX
+ my $no_of_cores = `hinv | grep HZ | grep Processor | awk '{print \$1}'`;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_tru64 {
+ # Returns:
+ # Number of physical CPUs on Tru64
+ # undef if not Tru64
+ my $no_of_cpus = `sizer -pr`;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_tru64 {
+ # Returns:
+ # Number of CPU cores on Tru64
+ # undef if not Tru64
+ my $no_of_cores = `sizer -pr`;
+ return $no_of_cores;
+}
+
sub sshcommand {
my $self = shift;
if (not defined $self->{'sshcommand'}) {
@@ -2839,22 +5343,29 @@ sub sshcommand_of_sshlogin {
$sshcmd = $1; $serverlogin = $2;
} else {
# Normal ssh
- if($::opt_controlmaster) {
+ if($opt::controlmaster) {
# Use control_path to make ssh faster
my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
$sshcmd = "ssh -S ".$control_path;
$serverlogin = $self->{'string'};
- my $master = "ssh -MTS $control_path $serverlogin sleep 1";
if(not $self->{'control_path'}{$control_path}++) {
# Master is not running for this control_path
# Start it
my $pid = fork();
if($pid) {
- $Global::sshmaster{$pid}++;
+ $Global::sshmaster{$pid} ||= 1;
} else {
- ::debug($master,"\n");
- `$master`;
- ::wait_and_exit(0);
+ $SIG{'TERM'} = undef;
+ # Ignore the 'foo' being printed
+ open(STDOUT,">","/dev/null");
+ # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
+ # STDERR >/dev/null to ignore "process_mux_new_session: tcgetattr: Invalid argument"
+ open(STDERR,">","/dev/null");
+ open(STDIN,"<","/dev/null");
+ # Run a sleep that outputs data, so it will discover if the ssh connection closes.
+ my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}');
+ my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep);
+ exec(@master);
}
}
} else {
@@ -2880,18 +5391,110 @@ sub control_path_dir {
return $self->{'control_path_dir'};
}
+sub rsync_transfer_cmd {
+ # Command to run to transfer a file
+ # Input:
+ # $file = filename of file to transfer
+ # $workdir = destination dir
+ # Returns:
+ # $cmd = rsync command to run to transfer $file ("" if unreadable)
+ my $self = shift;
+ my $file = shift;
+ my $workdir = shift;
+ if(not -r $file) {
+ ::warning($file, " is not readable and will not be transferred.\n");
+ return "true";
+ }
+ my $rsync_destdir;
+ if($file =~ m:^/:) {
+ # rsync /foo/bar /
+ $rsync_destdir = "/";
+ } else {
+ $rsync_destdir = ::shell_quote_file($workdir);
+ }
+ $file = ::shell_quote_file($file);
+ my $sshcmd = $self->sshcommand();
+ my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd);
+ my $serverlogin = $self->serverlogin();
+ # Make dir if it does not exist
+ return "( $sshcmd $serverlogin mkdir -p $rsync_destdir;" .
+ rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )";
+}
+
+sub cleanup_cmd {
+ # Command to run to remove the remote file
+ # Input:
+ # $file = filename to remove
+ # $workdir = destination dir
+ # Returns:
+ # $cmd = ssh command to run to remove $file and empty parent dirs
+ my $self = shift;
+ my $file = shift;
+ my $workdir = shift;
+ my $f = $file;
+ if($f =~ m:/\./:) {
+ # foo/bar/./baz/quux => workdir/baz/quux
+ # /foo/bar/./baz/quux => workdir/baz/quux
+ $f =~ s:.*/\./:$workdir/:;
+ } elsif($f =~ m:^[^/]:) {
+ # foo/bar => workdir/foo/bar
+ $f = $workdir."/".$f;
+ }
+ my @subdirs = split m:/:, ::dirname($f);
+ my @rmdir;
+ my $dir = "";
+ for(@subdirs) {
+ $dir .= $_."/";
+ unshift @rmdir, ::shell_quote_file($dir);
+ }
+ my $rmdir = @rmdir ? "sh -c 'rmdir @rmdir 2>/dev/null';" : "";
+ if(defined $opt::workdir and $opt::workdir eq "...") {
+ $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';';
+ }
+
+ $f = ::shell_quote_file($f);
+ my $sshcmd = $self->sshcommand();
+ my $serverlogin = $self->serverlogin();
+ return "$sshcmd $serverlogin ".::shell_quote_scalar("(rm -f $f; $rmdir)");
+}
+
+{
+ my $rsync;
+
+ sub rsync {
+ # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
+ # If the version >= 3.1.0: downgrade to protocol 30
+ if(not $rsync) {
+ my @out = `rsync --version`;
+ for (@out) {
+ if(/version (\d+.\d+)(.\d+)?/) {
+ if($1 >= 3.1) {
+ # Version 3.1.0 or later: Downgrade to protocol 30
+ $rsync = "rsync --protocol 30";
+ } else {
+ $rsync = "rsync";
+ }
+ }
+ }
+ $rsync or ::die_bug("Cannot figure out version of rsync: @out");
+ }
+ return $rsync;
+ }
+}
+
package JobQueue;
sub new {
my $class = shift;
- my $command = shift;
+ my $commandref = shift;
my $read_from = shift;
my $context_replace = shift;
my $max_number_of_args = shift;
my $return_files = shift;
- my $commandlinequeue = CommandLineQueue->new(
- $command,$read_from,$context_replace,$max_number_of_args,$return_files);
+ my $commandlinequeue = CommandLineQueue->new
+ ($commandref, $read_from, $context_replace, $max_number_of_args,
+ $return_files);
my @unget = ();
return bless {
'unget' => \@unget,
@@ -2926,7 +5529,7 @@ sub empty {
my $self = shift;
my $empty = (not @{$self->{'unget'}})
&& $self->{'commandlinequeue'}->empty();
- ::debug("JobQueue->empty $empty\n");
+ ::debug("run", "JobQueue->empty $empty ");
return $empty;
}
@@ -2935,10 +5538,32 @@ sub total_jobs {
if(not defined $self->{'total_jobs'}) {
my $job;
my @queue;
+ my $start = time;
+ while($job = $self->get()) {
+ if(time - $start > 10) {
+ ::warning("Reading ".scalar(@queue)." arguments took longer than 10 seconds.\n");
+ $opt::eta && ::warning("Consider removing --eta.\n");
+ $opt::bar && ::warning("Consider removing --bar.\n");
+ $opt::shuf && ::warning("Consider removing --shuf.\n");
+ last;
+ }
+ push @queue, $job;
+ }
while($job = $self->get()) {
push @queue, $job;
}
- $self->unget(@queue);
+ if($opt::shuf) {
+ my $i = @queue;
+ while (--$i) {
+ my $j = int rand($i+1);
+ @queue[$i,$j] = @queue[$j,$i];
+ }
+ my $seq = 1;
+ for my $job (@queue) {
+ $job->{'commandline'}->set_seq($seq++);
+ }
+ }
+ $self->unget(@queue);
$self->{'total_jobs'} = $#queue+1;
}
return $self->{'total_jobs'};
@@ -2960,17 +5585,14 @@ package Job;
sub new {
my $class = shift;
- my $commandline = shift;
+ my $commandlineref = shift;
return bless {
- 'commandline' => $commandline, # The commandline with no args
+ 'commandline' => $commandlineref, # CommandLine object
'workdir' => undef, # --workdir
- 'stdin' => undef, # filehandle for stdin (used for --pipe)
- 'stdout' => undef, # filehandle for stdout (used for --group)
+ # filehandle for stdin (used for --pipe)
# filename for writing stdout to (used for --files)
- 'stdoutfilename' => undef,
- 'stderr' => undef, # filehandle for stderr (used for --group)
- 'remaining' => "", # remaining data not sent to stdin (used for --pipe)
- 'datawritten' => 0, # amount of data sent via stdin (used for --pipe)
+ # remaining data not sent to stdin (used for --pipe)
+ # amount of data sent via stdin (used for --pipe)
'transfersize' => 0, # size of files using --transfer
'returnsize' => 0, # size of files using --return
'pid' => undef,
@@ -2983,12 +5605,13 @@ sub new {
'exitsignal' => undef,
# Timestamp for timeout if any
'timeout' => undef,
+ 'virgin' => 1,
}, ref($class) || $class;
}
sub replaced {
my $self = shift;
- $self->{'commandline'} or Carp::croak("cmdline empty");
+ $self->{'commandline'} or ::die_bug("commandline empty");
return $self->{'commandline'}->replaced();
}
@@ -2997,87 +5620,385 @@ sub seq {
return $self->{'commandline'}->seq();
}
-sub set_stdout {
+sub set_seq {
my $self = shift;
- $self->{'stdout'} = shift;
+ return $self->{'commandline'}->set_seq(shift);
}
-sub stdout {
+sub slot {
my $self = shift;
- return $self->{'stdout'};
+ return $self->{'commandline'}->slot();
}
-sub set_stdoutfilename {
- my $self = shift;
- $self->{'stdoutfilename'} = shift;
+{
+ my($cattail);
+
+ sub cattail {
+ # Returns:
+ # $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
+ if(not $cattail) {
+ $cattail = q{
+ # cat followed by tail (possibly with rm as soon at the file is opened)
+ # If $writerpid dead: finish after this round
+ use Fcntl;
+ $|=1;
+
+ my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
+ if($read_file) {
+ open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
+ } else {
+ *IN = *STDIN;
+ }
+ while(! -s $comfile) {
+ # Writer has not opened the buffer file, so we cannot remove it yet
+ $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
+ usleep($sleep);
+ }
+ # The writer and we have both opened the file, so it is safe to unlink it
+ unlink $unlink_file;
+ unlink $comfile;
+
+ my $first_round = 1;
+ my $flags;
+ fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
+ $flags |= O_NONBLOCK; # Add non-blocking to the flags
+ fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
+
+ while(1) {
+ # clear EOF
+ seek(IN,0,1);
+ my $writer_running = kill 0, $writerpid;
+ $read = sysread(IN,$buf,32768);
+ if($read) {
+ if($first_round) {
+ # Only start the command if there any input to process
+ $first_round = 0;
+ open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
+ }
+
+ # Blocking print
+ while($buf) {
+ my $bytes_written = syswrite(OUT,$buf);
+ # syswrite may be interrupted by SIGHUP
+ substr($buf,0,$bytes_written) = "";
+ }
+ # Something printed: Wait less next time
+ $sleep /= 2;
+ } else {
+ if(eof(IN) and not $writer_running) {
+ # Writer dead: There will never be more to read => exit
+ exit;
+ }
+ # TODO This could probably be done more efficiently using select(2)
+ # Nothing read: Wait longer before next read
+ # Up to 100 milliseconds
+ $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
+ usleep($sleep);
+ }
+ }
+
+ sub usleep {
+ # Sleep this many milliseconds.
+ my $secs = shift;
+ select(undef, undef, undef, $secs/1000);
+ }
+ };
+ $cattail =~ s/#.*//mg;
+ $cattail =~ s/\s+/ /g;
+ }
+ return $cattail;
+ }
}
-sub stdoutfilename {
+sub openoutputfiles {
+ # Open files for STDOUT and STDERR
+ # Set file handles in $self->fh
my $self = shift;
- return $self->{'stdoutfilename'};
+ my ($outfhw, $errfhw, $outname, $errname);
+ if($opt::results) {
+ my $args_as_dirname = $self->{'commandline'}->args_as_dirname();
+ # Output in: prefix/name1/val1/name2/val2/stdout
+ my $dir = $opt::results."/".$args_as_dirname;
+ if(eval{ File::Path::mkpath($dir); }) {
+ # OK
+ } else {
+ # mkpath failed: Argument probably too long.
+ # Set $Global::max_file_length, which will keep the individual
+ # dir names shorter than the max length
+ max_file_name_length($opt::results);
+ $args_as_dirname = $self->{'commandline'}->args_as_dirname();
+ # prefix/name1/val1/name2/val2/
+ $dir = $opt::results."/".$args_as_dirname;
+ File::Path::mkpath($dir);
+ }
+ # prefix/name1/val1/name2/val2/stdout
+ $outname = "$dir/stdout";
+ if(not open($outfhw, "+>", $outname)) {
+ ::error("Cannot write to `$outname'.\n");
+ ::wait_and_exit(255);
+ }
+ # prefix/name1/val1/name2/val2/stderr
+ $errname = "$dir/stderr";
+ if(not open($errfhw, "+>", $errname)) {
+ ::error("Cannot write to `$errname'.\n");
+ ::wait_and_exit(255);
+ }
+ $self->set_fh(1,"unlink","");
+ $self->set_fh(2,"unlink","");
+ } elsif(not $opt::ungroup) {
+ # To group we create temporary files for STDOUT and STDERR
+ # To avoid the cleanup unlink the files immediately (but keep them open)
+ if(@Global::tee_jobs) {
+ # files must be removed when the tee is done
+ } elsif($opt::files) {
+ ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
+ ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
+ # --files => only remove stderr
+ $self->set_fh(1,"unlink","");
+ $self->set_fh(2,"unlink",$errname);
+ } else {
+ ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
+ ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
+ $self->set_fh(1,"unlink",$outname);
+ $self->set_fh(2,"unlink",$errname);
+ }
+ } else {
+ # --ungroup
+ open($outfhw,">&",$Global::fd{1}) || die;
+ open($errfhw,">&",$Global::fd{2}) || die;
+ # File name must be empty as it will otherwise be printed
+ $outname = "";
+ $errname = "";
+ $self->set_fh(1,"unlink",$outname);
+ $self->set_fh(2,"unlink",$errname);
+ }
+ # Set writing FD
+ $self->set_fh(1,'w',$outfhw);
+ $self->set_fh(2,'w',$errfhw);
+ $self->set_fh(1,'name',$outname);
+ $self->set_fh(2,'name',$errname);
+ if($opt::compress) {
+ $self->filter_through_compress();
+ } elsif(not $opt::ungroup) {
+ $self->grouped();
+ }
+ if($opt::linebuffer) {
+ $self->set_non_blocking();
+ }
}
-sub stderr {
+sub grouped {
my $self = shift;
- return $self->{'stderr'};
+ # Set reading FD if using --group (--ungroup does not need)
+ for my $fdno (1,2) {
+ # Re-open the file for reading
+ # so fdw can be closed seperately
+ # and fdr can be seeked seperately (for --line-buffer)
+ open(my $fdr,"<", $self->fh($fdno,'name')) ||
+ ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
+ $self->set_fh($fdno,'r',$fdr);
+ # Unlink if required
+ $Global::debug or unlink $self->fh($fdno,"unlink");
+ }
}
-sub set_stderr {
- my $self = shift;
- $self->{'stderr'} = shift;
+sub empty_input_wrapper {
+ # If no input: exit(0)
+ # If some input: Pass input as input to command on STDIN
+ # This avoids starting the command if there is no input.
+ # Input:
+ # $command = command to pipe data to
+ # Returns:
+ # $wrapped_command = the wrapped command
+ my $command = shift;
+ my $script = '$c="'.::perl_quote_scalar($command).'";'.
+ ::spacefree(0,q{
+ if(sysread(STDIN, $buf, 1)) {
+ open($fh, "|-", $c) || die;
+ syswrite($fh, $buf);
+ while($read = sysread(STDIN, $buf, 32768)) {
+ syswrite($fh, $buf);
+ }
+ close $fh;
+ exit ($?&127 ? 128+($?&127) : 1+$?>>8)
+ }
+ });
+ ::debug("run",'Empty wrap: perl -e '.::shell_quote_scalar($script)."\n");
+ return 'perl -e '.::shell_quote_scalar($script);
}
-sub stdin {
+sub filter_through_compress {
my $self = shift;
- return $self->{'stdin'};
+ # Send stdout to stdin for $opt::compress_program(1)
+ # Send stderr to stdin for $opt::compress_program(2)
+ # cattail get pid: $pid = $self->fh($fdno,'rpid');
+ my $cattail = cattail();
+
+ for my $fdno (1,2) {
+ # Make a communication file.
+ my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
+ close $fh;
+ # Compressor: (echo > $comfile; compress pipe) > output
+ # When the echo is written to $comfile, it is known that output file is opened,
+ # thus output file can then be removed by the decompressor.
+ my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".empty_input_wrapper($opt::compress_program).") >".
+ $self->fh($fdno,'name')) || die $?;
+ $self->set_fh($fdno,'w',$fdw);
+ $self->set_fh($fdno,'wpid',$wpid);
+ # Decompressor: open output; -s $comfile > 0: rm $comfile output; decompress output > stdout
+ my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
+ $opt::decompress_program, $wpid,
+ $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?;
+ $self->set_fh($fdno,'r',$fdr);
+ $self->set_fh($fdno,'rpid',$rpid);
+ }
}
-sub set_stdin {
+sub set_non_blocking {
my $self = shift;
- my $stdin = shift;
- # set non-blocking
- fcntl($stdin, ::F_SETFL, ::O_NONBLOCK) or
- ::die_bug("Couldn't set flags for HANDLE: $!");
- $self->{'stdin'} = $stdin;
+ $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
+ for my $fdno (1,2) {
+ my $fdr = $self->fh($fdno,'r');
+ my $flags;
+ fcntl($fdr, &::F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
+ $flags |= &::O_NONBLOCK; # Add non-blocking to the flags
+ fcntl($fdr, &::F_SETFL, $flags) || die $!; # Set the flags on the filehandle
+ }
+}
+
+sub max_file_name_length {
+ # Figure out the max length of a subdir
+ # TODO and the max total length
+ # Ext4 = 255,130816
+ my $testdir = shift;
+
+ my $upper = 8_000_000;
+ my $len = 8;
+ my $dir = "x"x$len;
+ do {
+ rmdir($testdir."/".$dir);
+ $len *= 16;
+ $dir = "x"x$len;
+ } while ($len < $upper and mkdir $testdir."/".$dir);
+ # Then search for the actual max length between $len/16 and $len
+ my $min = $len/16;
+ my $max = $len;
+ while($max-$min > 5) {
+ # If we are within 5 chars of the exact value:
+ # it is not worth the extra time to find the exact value
+ my $test = int(($min+$max)/2);
+ $dir = "x"x$test;
+ if(mkdir $testdir."/".$dir) {
+ rmdir($testdir."/".$dir);
+ $min = $test;
+ } else {
+ $max = $test;
+ }
+ }
+ $Global::max_file_length = $min;
+ return $min;
+}
+
+sub set_fh {
+ # Set file handle
+ my ($self, $fd_no, $key, $fh) = @_;
+ $self->{'fd'}{$fd_no,$key} = $fh;
+}
+
+sub fh {
+ # Get file handle
+ my ($self, $fd_no, $key) = @_;
+ return $self->{'fd'}{$fd_no,$key};
}
sub write {
my $self = shift;
my $remaining_ref = shift;
- if(length($$remaining_ref)) {
- $self->{'remaining'} .= $$remaining_ref;
- $self->complete_write();
+ my $stdin_fh = $self->fh(0,"w");
+
+ my $len = length $$remaining_ref;
+ # syswrite may not write all in one go,
+ # so make sure everything is written.
+ while($len) {
+ my $written = syswrite($stdin_fh,$$remaining_ref);
+ substr($$remaining_ref,0,$written) = "";
+ $len -= $written;
}
}
-sub complete_write {
+sub set_stdin_buffer {
+ # Copy stdin buffer from $block_ref up to $endpos
+ # Prepend with $header_ref
+ # Remove $recstart and $recend if needed
+ # Input:
+ # $header_ref = ref to $header to prepend
+ # $block_ref = ref to $block to pass on
+ # $endpos = length of $block to pass on
+ # $recstart = --recstart regexp
+ # $recend = --recend regexp
# Returns:
- # number of bytes written (see syswrite)
+ # N/A
my $self = shift;
- my $in = $self->{'stdin'};
- my $len = syswrite($in,$self->{'remaining'});
- if (!defined($len) && $! == &::EAGAIN) {
- # write would block;
- } else {
- # Remove the part that was written
- substr($self->{'remaining'},0,$len) = "";
- $self->{'datawritten'} += $len;
+ my ($header_ref,$block_ref,$endpos,$recstart,$recend) = @_;
+ $self->{'stdin_buffer'} = ($self->virgin() ? $$header_ref : "").substr($$block_ref,0,$endpos);
+ if($opt::remove_rec_sep) {
+ remove_rec_sep(\$self->{'stdin_buffer'},$recstart,$recend);
}
- return $len;
+ $self->{'stdin_buffer_length'} = length $self->{'stdin_buffer'};
+ $self->{'stdin_buffer_pos'} = 0;
+ $self->add_transfersize($self->{'stdin_buffer_length'});
}
-sub remaining {
+sub stdin_buffer_length {
my $self = shift;
- if(defined $self->{'remaining'}) {
- return length $self->{'remaining'};
- } else {
- return undef;
- }
+ return $self->{'stdin_buffer_length'};
}
-sub datawritten {
+sub remove_rec_sep {
+ my ($block_ref,$recstart,$recend) = @_;
+ # Remove record separator
+ $$block_ref =~ s/$recend$recstart//gos;
+ $$block_ref =~ s/^$recstart//os;
+ $$block_ref =~ s/$recend$//os;
+}
+
+sub non_block_write {
my $self = shift;
- return $self->{'datawritten'};
+ my $something_written = 0;
+ use POSIX qw(:errno_h);
+ # for loop used to avoid copying substr: $buf will be an alias for the substr
+ for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) {
+ my $in = $self->fh(0,"w");
+ my $rv = syswrite($in, $buf);
+ if (!defined($rv) && $! == EAGAIN) {
+ # would block
+ $something_written = 0;
+ } elsif ($self->{'stdin_buffer_pos'}+$rv != $self->{'stdin_buffer_length'}) {
+ # incomplete write
+ # Remove the written part
+ $self->{'stdin_buffer_pos'} += $rv;
+ $something_written = $rv;
+ } else {
+ # successfully wrote everything
+ my $a = "";
+ $self->set_stdin_buffer(\$a,\$a,"","");
+ $something_written = $rv;
+ }
+ }
+
+ ::debug("pipe", "Non-block: ", $something_written);
+ return $something_written;
+}
+
+
+sub virgin {
+ my $self = shift;
+ return $self->{'virgin'};
+}
+
+sub set_virgin {
+ my $self = shift;
+ $self->{'virgin'} = shift;
}
sub pid {
@@ -3091,92 +6012,122 @@ sub set_pid {
}
sub starttime {
+ # Returns:
+ # UNIX-timestamp this job started
my $self = shift;
- return $self->{'starttime'};
+ return sprintf("%.3f",$self->{'starttime'});
}
sub set_starttime {
my $self = shift;
- my $starttime = shift || time;
+ my $starttime = shift || ::now();
$self->{'starttime'} = $starttime;
}
sub runtime {
+ # Returns:
+ # Run time in seconds
my $self = shift;
- return $self->{'endtime'}-$self->{'starttime'};
+ return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000);
}
sub endtime {
+ # Returns:
+ # UNIX-timestamp this job ended
+ # 0 if not ended yet
my $self = shift;
- return $self->{'endtime'};
+ return ($self->{'endtime'} || 0);
}
sub set_endtime {
my $self = shift;
- my $endtime = shift || time;
+ my $endtime = shift;
$self->{'endtime'} = $endtime;
}
-
-sub set_timeout {
+sub timedout {
+ # Is the job timedout?
+ # Input:
+ # $delta_time = time that the job may run
+ # Returns:
+ # True or false
my $self = shift;
my $delta_time = shift;
- $self->{'timeout'} = time + $delta_time;
-}
-
-sub timeout {
- my $self = shift;
- return $self->{'timeout'};
-}
-
-sub timedout {
- my $self = shift;
- return time > $self->{'timeout'};
+ return time > $self->{'starttime'} + $delta_time;
}
sub kill {
- # kill the jobs
+ # Kill the job.
+ # Send the signals to (grand)*children and pid.
+ # If no signals: TERM TERM KILL
+ # Wait 200 ms after each TERM.
+ # Input:
+ # @signals = signals to send
my $self = shift;
+ my @signals = @_;
my @family_pids = $self->family_pids();
+
# Record this jobs as failed
- $self->set_exitstatus(1);
+ $self->set_exitstatus(-1);
# Send two TERMs to give time to clean up
- for my $signal ("TERM", "TERM", "KILL") {
+ ::debug("run", "Kill seq ", $self->seq(), " signal '@signals'\n");
+ my @send_signals = @signals || ("TERM", "TERM", "KILL");
+ for my $signal (@send_signals) {
my $alive = 0;
for my $pid (@family_pids) {
if(kill 0, $pid) {
# The job still running
kill $signal, $pid;
$alive = 1;
+ ::debug("run","$pid is alive\n");
+ }
+ }
+ # If a signal was given as input, do not do the sleep below
+ @signals and next;
+
+ if($signal eq "TERM" and $alive) {
+ # Wait up to 200 ms between TERMs - but only if any pids are alive
+ my $sleep = 1;
+ for (my $sleepsum = 0; kill 0, $family_pids[0] and $sleepsum < 200;
+ $sleepsum += $sleep) {
+ $sleep = ::reap_usleep($sleep);
}
}
- # Wait 200 ms between TERMs - but only if any pids are alive
- if($signal eq "TERM" and $alive) { ::usleep(200); }
}
}
sub family_pids {
# Find the pids with this->pid as (grand)*parent
- # TODO test this on different OS as 'ps' is known to be different
+ # Returns:
+ # @pids = pids of (grand)*children
my $self = shift;
my $pid = $self->pid();
- my $script = q{
- family_pids() {
- for CHILDPID in `ps --ppid "$@" -o pid --no-headers`; do
- family_pids $CHILDPID &
- done
- echo "$@"
- }
- } .
- "family_pids $pid; wait";
- my @pids = qx{$script};
- chomp(@pids);
- return ($pid,@pids);
-}
+ my @pids;
+ my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table();
+
+ my @more = ($pid);
+ # While more (grand)*children
+ while(@more) {
+ my @m;
+ push @pids, @more;
+ for my $parent (@more) {
+ if($children_of_ref->{$parent}) {
+ # add the children of this parent
+ push @m, @{$children_of_ref->{$parent}};
+ }
+ }
+ @more = @m;
+ }
+ return (@pids);
+}
sub failed {
# return number of times failed for this $sshlogin
+ # Input:
+ # $sshlogin
+ # Returns:
+ # Number of times failed for $sshlogin
my $self = shift;
my $sshlogin = shift;
return $self->{'failed'}{$sshlogin};
@@ -3184,6 +6135,8 @@ sub failed {
sub failed_here {
# return number of times failed for the current $sshlogin
+ # Returns:
+ # Number of times failed for this sshlogin
my $self = shift;
return $self->{'failed'}{$self->sshlogin()};
}
@@ -3220,21 +6173,147 @@ sub min_failed {
# the minimal number of times this command has failed
my $self = shift;
my $min_failures =
- ::min(map { $self->{'failed'}{$_} }
- keys %{$self->{'failed'}});
+ ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
return ($number_of_sshlogins_failed_on,$min_failures);
}
sub total_failed {
# Returns:
- # the number of times this command has failed
+ # $total_failures = the number of times this command has failed
my $self = shift;
my $total_failures = 0;
for (values %{$self->{'failed'}}) {
$total_failures += $_;
}
- return ($total_failures);
+ return $total_failures;
+}
+
+{
+ my $script;
+
+ sub postpone_exit_and_cleanup {
+ # Command to remove files and dirs (given as args) without
+ # affecting the exit value in $?/$status.
+ if(not $script) {
+ $script = "perl -e '".
+ ::spacefree(0,q{
+ $bash=shift;
+ $csh=shift;
+ for(@ARGV){
+ unlink;
+ rmdir;
+ }
+ if($bash=~s/h//) {
+ exit $bash;
+ }
+ exit $csh;
+ }).
+ "' ".'"$?h" "$status" ';
+ }
+ return $script
+ }
+}
+
+sub wrapped {
+ # Wrap command with:
+ # * --shellquote
+ # * --nice
+ # * --cat
+ # * --fifo
+ # * --sshlogin
+ # * --pipepart (@Global::cat_partials)
+ # * --pipe
+ # * --tmux
+ # The ordering of the wrapping is important:
+ # * --nice/--cat/--fifo should be done on the remote machine
+ # * --pipepart/--pipe should be done on the local machine inside --tmux
+ # Uses:
+ # $Global::envvar
+ # $opt::shellquote
+ # $opt::nice
+ # $Global::shell
+ # $opt::cat
+ # $opt::fifo
+ # @Global::cat_partials
+ # $opt::pipe
+ # $opt::tmux
+ # Returns:
+ # $self->{'wrapped'} = the command wrapped with the above
+ my $self = shift;
+ if(not defined $self->{'wrapped'}) {
+ my $command = $self->replaced();
+ if($opt::shellquote) {
+ # Prepend echo
+ # and quote twice
+ $command = "echo " .
+ ::shell_quote_scalar(::shell_quote_scalar($command));
+ }
+ if($opt::nice) {
+ # Prepend \nice -n19 $SHELL -c
+ # and quote.
+ # The '\' before nice is needed to avoid tcsh's built-in
+ my $sshlogin = $self->sshlogin();
+ my $serverlogin = $sshlogin->serverlogin();
+ if($serverlogin eq ":") {
+ # Local use $Global::shell
+ $command = '\nice'. " -n". $opt::nice. " ".
+ $Global::shell. " -c ".
+ ::shell_quote_scalar($command);
+ } else {
+ # Remote systems use $SHELL
+ $command = '\nice'. " -n". $opt::nice.
+ ' $SHELL -c '.
+ ::shell_quote_scalar($command);
+ }
+ }
+ if($opt::cat) {
+# Append 'unlink {} without affecting $?'
+ $command =
+ $self->{'commandline'}->replace_placeholders(["cat > \257<\257>; "], 0, 0).
+ $command.";". postpone_exit_and_cleanup().
+ '$PARALLEL_TMP';
+ } elsif($opt::fifo) {
+ # Prepend 'mkfifo {}; ('
+ # Append ') & _PID=$!; cat > {}; wait $_PID; '
+ # (This makes it fail in csh, but give the correct exit code in bash)
+ # Append 'unlink {} without affecting $?'
+ # Set $ENV{PARALLEL_TMP} when starting a job
+ # Set $ENV{PARALLEL_TMP} in the remote wrapper
+ # mkfifo $PARALLEL_TMP;
+ # {} = $PARALLEL_TMP;
+ # (...) &
+ # cat > $PARALLEL_TMP; wait \$_PID; cleanup $PARALLEL_TMP
+ # perl -e 'open($fifo,">",shift); while(read){print FIFO};unlink $fifo;waitpid($pid,0);exit $?' $! $PARALLEL_FIFO
+ $command =
+ "mkfifo \$PARALLEL_TMP\n (".
+ $command.";".
+ ') & _PID=$!; cat > $PARALLEL_TMP; wait $_PID; '.
+ postpone_exit_and_cleanup().
+ '$PARALLEL_TMP';
+ }
+ # Wrap with ssh + tranferring of files
+ $command = $self->sshlogin_wrap($command);
+ if(@Global::cat_partials) {
+ # Prepend:
+ # < /tmp/foo perl -e 'while(@ARGV) {
+ # sysseek(STDIN,shift,0) || die; $left = shift;
+ # while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){
+ # $left -= $read; syswrite(STDOUT,$buf);
+ # }
+ # }' 0 0 0 11 |
+ $command = (shift @Global::cat_partials). " | ($command)";
+ } elsif($opt::pipe) {
+ # Wrap with EOF-detector to avoid starting $command if EOF.
+ $command = empty_input_wrapper($command);
+ }
+ if($opt::tmux) {
+ # Wrap command with 'tmux'
+ $command = $self->tmux_wrap($command);
+ }
+ $self->{'wrapped'} = $command;
+ }
+ return $self->{'wrapped'};
}
sub set_sshlogin {
@@ -3242,6 +6321,7 @@ sub set_sshlogin {
my $sshlogin = shift;
$self->{'sshlogin'} = $sshlogin;
delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
+ delete $self->{'wrapped'};
}
sub sshlogin {
@@ -3249,18 +6329,258 @@ sub sshlogin {
return $self->{'sshlogin'};
}
+sub string_zip_base64 {
+ # Pipe string through 'bzip2 -9' and base64 encode it into 1000
+ # byte blocks.
+ # 1000 bytes is the largest word size csh supports
+ # Input:
+ # @strings = to be encoded
+ # Returns:
+ # @base64 = 1000 byte block
+ my($zipin_fh, $zipout_fh,@base64);
+ ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
+ if(fork) {
+ close $zipin_fh;
+ $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
+ # Split base64 encoded into 1000 byte blocks
+ @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
+ close $zipout_fh;
+ } else {
+ close $zipout_fh;
+ print $zipin_fh @_;
+ close $zipin_fh;
+ exit;
+ }
+ ::debug("base64","Orig:@_\nAs base64:@base64\n");
+ return @base64;
+}
+
+sub base64_zip_eval {
+ # Script that:
+ # * reads base64 strings from @ARGV
+ # * decodes them
+ # * pipes through 'bzip2 -dc'
+ # * evals the result
+ # Reverse of string_zip_base64 + eval
+ # Will be wrapped in ' so single quote is forbidden
+ # Returns:
+ # $script = 1-liner for perl -e
+ my $script = ::spacefree(0,q{
+ @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
+ eval "@GNU_Parallel";
+
+ $SIG{CHLD}="IGNORE";
+ # Search for bzip2. Not found => use default path
+ my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
+ # $in = stdin on $zip, $out = stdout from $zip
+ my($in, $out,$eval);
+ open3($in,$out,">&STDERR",$zip,"-dc");
+ if(my $perlpid = fork) {
+ close $in;
+ $eval = join "", <$out>;
+ close $out;
+ } else {
+ close $out;
+ # Pipe decoded base64 into 'bzip2 -dc'
+ print $in (decode_base64(join"",@ARGV));
+ close $in;
+ exit;
+ }
+ wait;
+ eval $eval;
+ });
+ ::debug("base64",$script,"\n");
+ return $script;
+}
+
sub sshlogin_wrap {
# Wrap the command with the commands needed to run remotely
+ # Input:
+ # $command = command to run
+ # Returns:
+ # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
+ sub monitor_parent_sshd_script {
+ # This script is to solve the problem of
+ # * not mixing STDERR and STDOUT
+ # * terminating with ctrl-c
+ # If its parent is ssh: all good
+ # If its parent is init(1): ssh died, so kill children
+ my $monitor_parent_sshd_script;
+
+ if(not $monitor_parent_sshd_script) {
+ $monitor_parent_sshd_script =
+ # This will be packed in ', so only use "
+ ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
+ '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
+ q{
+ # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
+ do {
+ $ENV{PARALLEL_TMP} = $tmpdir."/par".
+ join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
+ } while(-e $ENV{PARALLEL_TMP});
+ $SIG{CHLD} = sub { $done = 1; };
+ $pid = fork;
+ unless($pid) {
+ # Make own process group to be able to kill HUP it later
+ setpgrp;
+ exec $shell, "-c", ($bashfunc."@ARGV");
+ die "exec: $!\n";
+ }
+ do {
+ # Parent is not init (ppid=1), so sshd is alive
+ # Exponential sleep up to 1 sec
+ $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
+ select(undef, undef, undef, $s);
+ } until ($done || getppid == 1);
+ # Kill HUP the process group if job not done
+ kill(SIGHUP, -${pid}) unless $done;
+ wait;
+ exit ($?&127 ? 128+($?&127) : 1+$?>>8)
+ });
+ }
+ return $monitor_parent_sshd_script;
+ }
+
+ sub vars_to_export {
+ # Uses:
+ # @opt::env
+ my @vars = ("parallel_bash_environment");
+ for my $varstring (@opt::env) {
+ # Split up --env VAR1,VAR2
+ push @vars, split /,/, $varstring;
+ }
+ for (@vars) {
+ if(-r $_ and not -d) {
+ # Read as environment definition bug #44041
+ # TODO parse this
+ my $fh = ::open_or_exit($_);
+ $Global::envdef = join("",<$fh>);
+ close $fh;
+ }
+ }
+ if(grep { /^_$/ } @vars) {
+ # --env _
+ # Include all vars that are not in a clean environment
+ if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) {
+ my @ignore = <$vars_fh>;
+ chomp @ignore;
+ my %ignore;
+ @ignore{@ignore} = @ignore;
+ close $vars_fh;
+ push @vars, grep { not defined $ignore{$_} } keys %ENV;
+ @vars = grep { not /^_$/ } @vars;
+ } else {
+ ::error("Run '$Global::progname --record-env' in a clean environment first.\n");
+ ::wait_and_exit(255);
+ }
+ }
+ # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
+ # So --env myfunc should also look for BASH_FUNC_myfunc()
+ push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
+ map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
+ # Keep only defined variables
+ return grep { defined($ENV{$_}) } @vars;
+ }
+
+ sub env_as_eval {
+ # Returns:
+ # $eval = '$ENV{"..."}=...; ...'
+ my @vars = vars_to_export();
+ my $csh_friendly = not grep { /\n/ } @ENV{@vars};
+ my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
+ my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars;
+ # eval of @envset will set %ENV
+ my $envset = join"", map {
+ '$ENV{"'.::perl_quote_scalar($_).'"}="'. ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
+
+ # running @bashfunc on the command line, will set the functions
+ my @bashfunc = map {
+ my $v=$_; s/BASH_FUNC_(.*)(\(\)|%%)/$1/; "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions;
+ # eval $bashfuncset will set $bashfunc
+ my $bashfuncset;
+ if(@bashfunc) {
+ # Functions are not supported for all shells
+ if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) {
+ ::warning("Shell functions may not be supported in $Global::shell\n");
+ }
+ $bashfuncset =
+ '@bash_functions=qw('."@bash_functions".");".
+ ::spacefree(1,q{
+ if($ENV{"SHELL"}=~/csh/) {
+ print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
+ exec "false";
+ }
+ }).
+ "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
+ } else {
+ $bashfuncset = '$bashfunc = "";'
+ }
+ if($ENV{"parallel_bash_environment"}) {
+ $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
+ }
+ ::debug("base64",$envset,$bashfuncset,"\n");
+ return $csh_friendly,$envset,$bashfuncset;
+ }
+
my $self = shift;
+ my $command = shift;
+ # TODO test that *sh -c 'parallel --env' use *sh
if(not defined $self->{'sshlogin_wrap'}) {
my $sshlogin = $self->sshlogin();
- my $sshcmd = $sshlogin->sshcommand();
my $serverlogin = $sshlogin->serverlogin();
- my $next_command_line = $self->replaced();
- my ($pre,$post,$cleanup)=("","","");
+ my $quoted_remote_command;
+ $ENV{'PARALLEL_SEQ'} = $self->seq();
+ $ENV{'PARALLEL_PID'} = $$;
if($serverlogin eq ":") {
- $self->{'sshlogin_wrap'} = $next_command_line;
+ if(@opt::env) {
+ # Prepend with environment setter, which sets functions in zsh
+ my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
+ my $env_command = $envset.$bashfuncset.
+ '@ARGV="'.::perl_quote_scalar($command).'";'.
+ "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
+ if(length $env_command > 999
+ or
+ not $csh_friendly
+ or
+ $command =~ /\n/) {
+ # csh does not deal well with > 1000 chars in one word
+ # csh does not deal well with $ENV with \n
+ $env_command = "perl -e '".base64_zip_eval()."' ".
+ join" ",string_zip_base64($env_command);
+ $self->{'sshlogin_wrap'} = $env_command;
+ } else {
+ $self->{'sshlogin_wrap'} = "perl -e ".::shell_quote_scalar($env_command);
+ }
+ } else {
+ $self->{'sshlogin_wrap'} = $command;
+ }
} else {
+ my $pwd = "";
+ if($opt::workdir) {
+ # Create remote workdir if needed. Then cd to it.
+ my $wd = $self->workdir();
+ $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
+ qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
+ }
+ my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
+ my $remote_command = $pwd.$envset.$bashfuncset.
+ '@ARGV="'.::perl_quote_scalar($command).'";'. monitor_parent_sshd_script();
+ $quoted_remote_command = "perl -e ".::shell_quote_scalar($remote_command);
+ if(length $quoted_remote_command > 999
+ or
+ not $csh_friendly
+ or
+ $command =~ /\n/) {
+ # csh does not deal well with > 1000 chars in one word
+ # csh does not deal well with $ENV with \n
+ $quoted_remote_command = "perl -e \\''".base64_zip_eval()."'\\' ".
+ join" ",string_zip_base64($remote_command);
+ } else {
+ $quoted_remote_command = ::shell_quote_scalar($quoted_remote_command);
+ }
+
+ my $sshcmd = $sshlogin->sshcommand();
+ my ($pre,$post,$cleanup)=("","","");
# --transfer
$pre .= $self->sshtransfer();
# --return
@@ -3271,24 +6591,12 @@ sub sshlogin_wrap {
# We need to save the exit status of the job
$post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;';
}
- # If the remote login shell is (t)csh then use 'setenv'
- # otherwise use 'export'
- my $parallel_env =
- q{'eval `echo $SHELL | grep -E "/(t)?csh" > /dev/null}
- . q{ && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\;}
- . q{ setenv PARALLEL_PID '$PARALLEL_PID'}
- . q{ || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\;}
- . q{PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;'};
- if($::opt_workdir) {
- $self->{'sshlogin_wrap'} =
- ($pre . "$sshcmd $serverlogin $parallel_env "
- . ::shell_quote_scalar("cd ".$self->workdir()." && ")
- . ::shell_quote_scalar($next_command_line).";".$post);
- } else {
- $self->{'sshlogin_wrap'} =
- ($pre . "$sshcmd $serverlogin $parallel_env "
- . ::shell_quote_scalar($next_command_line).";".$post);
- }
+ $self->{'sshlogin_wrap'} =
+ ($pre
+ . "$sshcmd $serverlogin exec "
+ . $quoted_remote_command
+ . ";"
+ . $post);
}
}
return $self->{'sshlogin_wrap'};
@@ -3296,10 +6604,12 @@ sub sshlogin_wrap {
sub transfer {
# Files to transfer
+ # Returns:
+ # @transfer - File names of files to transfer
my $self = shift;
my @transfer = ();
$self->{'transfersize'} = 0;
- if($::opt_transfer) {
+ if($opt::transfer) {
for my $record (@{$self->{'commandline'}{'arg_list'}}) {
# Merge arguments from records into args
for my $arg (@$record) {
@@ -3319,55 +6629,39 @@ sub transfersize {
return $self->{'transfersize'};
}
-sub sshtransfer {
+sub add_transfersize {
my $self = shift;
+ my $transfersize = shift;
+ $self->{'transfersize'} += $transfersize;
+}
+
+sub sshtransfer {
+ # Returns for each transfer file:
+ # rsync $file remote:$workdir
+ my $self = shift;
+ my @pre;
my $sshlogin = $self->sshlogin();
- my $sshcmd = $sshlogin->sshcommand();
- my $serverlogin = $sshlogin->serverlogin();
- my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd);
- my $pre = "";
+ my $workdir = $self->workdir();
for my $file ($self->transfer()) {
- $file =~ s:/\./:/:g; # Rsync treats /./ special. We dont want that
- $file =~ s:^\./::g; # Remove ./ if any
- my $relpath = ($file !~ m:^/:); # Is the path relative?
- # Use different subdirs depending on abs or rel path
- # Abs path: rsync -rlDzR /home/tange/dir/subdir/file.gz server:/
- # Rel path: rsync -rlDzR ./subdir/file.gz server:.parallel/tmp/tempid/
- # Rel path: rsync -rlDzR ./subdir/file.gz server:$workdir/
- my $remote_workdir = $self->workdir($file);
- my $rsync_destdir = ($relpath ? $remote_workdir : "/");
- if($relpath) {
- $file = "./".$file;
- }
- if(-r $file) {
- my $mkremote_workdir =
- $remote_workdir eq "." ? "true" :
- "ssh $serverlogin mkdir -p $rsync_destdir";
- $pre .= "$mkremote_workdir; rsync $rsync_opt "
- . ::shell_quote_scalar($file)." $serverlogin:$rsync_destdir;";
- } else {
- print $Global::original_stderr
- "parallel: Warning: "
- . $file . " is not readable and will not be transferred\n";
- }
+ push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
}
- return $pre;
+ return join("",@pre);
}
sub return {
# Files to return
- # Quoted and with {...} substituted
+ # Non-quoted and with {...} substituted
+ # Returns:
+ # @non_quoted_filenames
my $self = shift;
- my @return = ();
- for my $return (@{$self->{'commandline'}{'return_files'}}) {
- CORE::push @return,
- $self->{'commandline'}->replace_placeholders($return,1);
- }
- return @return;
+ return $self->{'commandline'}->
+ replace_placeholders($self->{'commandline'}{'return_files'},0,0);
}
sub returnsize {
# This is called after the job has finished
+ # Returns:
+ # $number_of_bytes transferred in return
my $self = shift;
for my $file ($self->return()) {
if(-e $file) {
@@ -3377,7 +6671,15 @@ sub returnsize {
return $self->{'returnsize'};
}
+sub add_returnsize {
+ my $self = shift;
+ my $returnsize = shift;
+ $self->{'returnsize'} += $returnsize;
+}
+
sub sshreturn {
+ # Returns for each return-file:
+ # rsync remote:$workdir/$file .
my $self = shift;
my $sshlogin = $self->sshlogin();
my $sshcmd = $sshlogin->sshcommand();
@@ -3385,24 +6687,32 @@ sub sshreturn {
my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd);
my $pre = "";
for my $file ($self->return()) {
- $file =~ s:/\./:/:g; # Rsync treats /./ special. We dont want that
$file =~ s:^\./::g; # Remove ./ if any
my $relpath = ($file !~ m:^/:); # Is the path relative?
- # Use different subdirs depending on abs or rel path
-
- # Return or cleanup
- my @cmd = ();
- my $rsync_destdir = ($relpath ? "./" : "/");
- my $ret_file = $file;
- my $remove = $::opt_cleanup ? "--remove-source-files" : "";
- # If relative path: prepend workdir/./ to avoid problems
- # if the dir contains ':' and to get the right relative return path
- my $replaced = ($relpath ? $self->workdir()."/./" : "") . $file;
+ my $cd = "";
+ my $wd = "";
+ if($relpath) {
+ # rsync -avR /foo/./bar/baz.c remote:/tmp/
+ # == (on old systems)
+ # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
+ $wd = ::shell_quote_file($self->workdir()."/");
+ }
+ # Only load File::Basename if actually needed
+ $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
+ # dir/./file means relative to dir, so remove dir on remote
+ $file =~ m:(.*)/\./:;
+ my $basedir = $1 ? ::shell_quote_file($1."/") : "";
+ my $nobasedir = $file;
+ $nobasedir =~ s:.*/\./::;
+ $cd = ::shell_quote_file(::dirname($nobasedir));
+ my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync");
+ my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file)));
# --return
- # Abs path: rsync -rlDzR server:/home/tange/dir/subdir/file.gz /
- # Rel path: rsync -rlDzR server:./subsir/file.gz ./
- $pre .= "rsync $rsync_opt $remove $serverlogin:".
- ::shell_quote_scalar($replaced) . " ".$rsync_destdir.";";
+ # mkdir -p /home/tange/dir/subdir/;
+ # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync"
+ # server:file.gz /home/tange/dir/subdir/
+ $pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:".
+ $basename . " ".$basedir.$cd.";";
}
return $pre;
}
@@ -3416,21 +6726,14 @@ sub sshcleanup {
my $sshcmd = $sshlogin->sshcommand();
my $serverlogin = $sshlogin->serverlogin();
my $workdir = $self->workdir();
- my $removeworkdir = "";
my $cleancmd = "";
for my $file ($self->cleanup()) {
my @subworkdirs = parentdirs_of($file);
- $file = ::shell_quote_scalar($file);
- if(@subworkdirs) {
- $removeworkdir = "; rmdir 2>/dev/null ".
- join(" ",map { ::shell_quote_scalar($workdir."/".$_) }
- @subworkdirs);
- }
- my $relpath = ($file !~ m:^/:); # Is the path relative?
- my $cleandir = ($relpath ? $workdir."/" : "");
- $cleancmd .= "$sshcmd $serverlogin rm -f "
- . ::shell_quote_scalar($cleandir.$file.$removeworkdir).";";
+ $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
+ }
+ if(defined $opt::workdir and $opt::workdir eq "...") {
+ $cleancmd .= "$sshcmd $serverlogin rm -rf " . ::shell_quote_scalar($workdir).';';
}
return $cleancmd;
}
@@ -3439,9 +6742,10 @@ sub cleanup {
# Returns:
# Files to remove at cleanup
my $self = shift;
- if($::opt_cleanup) {
+ if($opt::cleanup) {
my @transfer = $self->transfer();
- return @transfer;
+ my @return = $self->return();
+ return (@transfer,@return);
} else {
return ();
}
@@ -3453,13 +6757,13 @@ sub workdir {
my $self = shift;
if(not defined $self->{'workdir'}) {
my $workdir;
- if(defined $::opt_workdir) {
- if($::opt_workdir eq ".") {
+ if(defined $opt::workdir) {
+ if($opt::workdir eq ".") {
# . means current dir
my $home = $ENV{'HOME'};
eval 'use Cwd';
my $cwd = cwd();
- $::opt_workdir = $cwd;
+ $workdir = $cwd;
if($home) {
# If homedir exists: remove the homedir from
# workdir if cwd starts with homedir
@@ -3475,16 +6779,19 @@ sub workdir {
my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
if($parent_dev == $home_dev and $parent_ino == $home_ino) {
# dev and ino is the same: We found the homedir.
- $::opt_workdir = join("/",@dir_parts);
+ $workdir = join("/",@dir_parts);
last;
}
}
}
- } elsif($::opt_workdir eq "...") {
+ if($workdir eq "") {
+ $workdir = ".";
+ }
+ } elsif($opt::workdir eq "...") {
$workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
. "-" . $self->seq();
} else {
- $workdir = $::opt_workdir;
+ $workdir = $opt::workdir;
# Rsync treats /./ special. We dont want that
$workdir =~ s:/\./:/:g; # Remove /./
$workdir =~ s:/+$::; # Remove ending / if any
@@ -3493,7 +6800,7 @@ sub workdir {
} else {
$workdir = ".";
}
- $self->{'workdir'} = $workdir;
+ $self->{'workdir'} = ::shell_quote_scalar($workdir);
}
return $self->{'workdir'};
}
@@ -3516,93 +6823,218 @@ sub start {
# Returns:
# job-object or undef if job not to run
my $job = shift;
- my $command = $job->sshlogin_wrap();
+ # Get the shell command to be executed (possibly with ssh infront).
+ my $command = $job->wrapped();
if($Global::interactive or $Global::stderr_verbose) {
- if($Global::interactive) {
- print $Global::original_stderr "$command ?...";
- open(TTY,"/dev/tty") || ::die_bug("interactive-tty");
- my $answer = ;
- close TTY;
- my $run_yes = ($answer =~ /^\s*y/i);
- if (not $run_yes) {
- $command = "true"; # Run the command 'true'
- }
- } else {
- print $Global::original_stderr "$command\n";
- }
+ $command = interactive_start($command);
}
-
- local (*IN,*OUT,*ERR);
my $pid;
- if($Global::grouped) {
- my ($outfh,$errfh,$name);
- # To group we create temporary files for STDOUT and STDERR
- # To avoid the cleanup unlink the files immediately (but keep them open)
- ($outfh,$name) = ::tempfile(SUFFIX => ".par");
- $job->set_stdoutfilename($name);
- $::opt_files or unlink $name;
- ($errfh,$name) = ::tempfile(SUFFIX => ".par");
- unlink $name;
+ $job->openoutputfiles();
+ my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
+ local (*IN,*OUT,*ERR);
+ open OUT, '>&', $stdout_fh or ::die_bug("Can't redirect STDOUT: $!");
+ open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDOUT: $!");
- open OUT, '>&', $outfh or ::die_bug("Can't redirect STDOUT: $!");
- open ERR, '>&', $errfh or ::die_bug("Can't dup STDOUT: $!");
- $job->set_stdout($outfh);
- $job->set_stderr($errfh);
- } else {
- (*OUT,*ERR)=(*STDOUT,*STDERR);
+ if($opt::ungroup) {
+ print_dryrun_and_verbose($stdout_fh,$job,$command);
}
-
- if(($::opt_dryrun or $Global::verbose) and not $Global::grouped) {
- if($Global::verbose <= 1) {
- print OUT $job->replaced(),"\n";
- } else {
- # Verbose level > 1: Print the rsync and stuff
- print OUT $command,"\n";
- }
- }
- if($::opt_dryrun) {
+ if($opt::dryrun) {
$command = "true";
}
- $Global::total_running++;
- $Global::total_started++;
$ENV{'PARALLEL_SEQ'} = $job->seq();
$ENV{'PARALLEL_PID'} = $$;
- ::debug("$Global::total_running processes. Starting ("
- . $job->seq() . "): $command\n");
- if($::opt_pipe) {
- my ($in);
- $pid = ::open3($in, ">&OUT", ">&ERR", $ENV{SHELL}, "-c", $command) ||
- ::die_bug("open3-pipe");
- $job->set_stdin($in);
- } elsif(@::opt_a and not $Global::stdin_in_opt_a and $job->seq() == 1) {
- # Give STDIN to the first job if using -a
+ $ENV{'PARALLEL_TMP'} = ::tmpname("par");
+ ::debug("run", $Global::total_running, " processes . Starting (",
+ $job->seq(), "): $command\n");
+ if($opt::pipe) {
+ my ($stdin_fh);
+ # The eval is needed to catch exception from open3
+ eval {
+ $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
+ ::die_bug("open3-pipe");
+ 1;
+ };
+ $job->set_fh(0,"w",$stdin_fh);
+ } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1
+ and $job->sshlogin()->string() eq ":") {
+ # Give STDIN to the first job if using -a (but only if running
+ # locally - otherwise CTRL-C does not work for other jobs Bug#36585)
*IN = *STDIN;
- $pid = ::open3("<&IN", ">&OUT", ">&ERR", $ENV{SHELL}, "-c", $command) ||
- ::die_bug("open3-a");
+ # The eval is needed to catch exception from open3
+ eval {
+ $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
+ ::die_bug("open3-a");
+ 1;
+ };
# Re-open to avoid complaining
- open STDIN, "<&", $Global::original_stdin
+ open(STDIN, "<&", $Global::original_stdin)
or ::die_bug("dup-\$Global::original_stdin: $!");
- } elsif ($::opt_tty and not $Global::tty_taken and -c "/dev/tty" and
- open(DEVTTY, "/dev/tty")) {
+ } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and
+ open(my $devtty_fh, "<", "/dev/tty")) {
# Give /dev/tty to the command if no one else is using it
- *IN = *DEVTTY;
- $pid = ::open3("<&IN", ">&OUT", ">&ERR", $ENV{SHELL}, "-c", $command) ||
- ::die_bug("open3-/dev/tty");
- $Global::tty_taken = $pid;
- close DEVTTY;
+ *IN = $devtty_fh;
+ # The eval is needed to catch exception from open3
+ eval {
+ $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
+ ::die_bug("open3-/dev/tty");
+ $Global::tty_taken = $pid;
+ close $devtty_fh;
+ 1;
+ };
} else {
- $pid = ::open3(::gensym, ">&OUT", ">&ERR", $ENV{SHELL}, "-c", $command) ||
- ::die_bug("open3-gensym");
+ # The eval is needed to catch exception from open3
+ eval {
+ $pid = ::open3(::gensym, ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
+ ::die_bug("open3-gensym");
+ 1;
+ };
}
- $job->set_pid($pid);
- $job->set_starttime();
- if($::opt_timeout) {
- # Timeout must be set before inserting into queue
- $job->set_timeout($::opt_timeout);
- $Global::timeoutq->insert($job);
+ if($pid) {
+ # A job was started
+ $Global::total_running++;
+ $Global::total_started++;
+ $job->set_pid($pid);
+ $job->set_starttime();
+ $Global::running{$job->pid()} = $job;
+ if($opt::timeout) {
+ $Global::timeoutq->insert($job);
+ }
+ $Global::newest_job = $job;
+ $Global::newest_starttime = ::now();
+ return $job;
+ } else {
+ # No more processes
+ ::debug("run", "Cannot spawn more jobs.\n");
+ return undef;
}
- return $job;
+}
+
+sub interactive_start {
+ my $command = shift;
+ if($Global::interactive) {
+ ::status("$command ?...");
+ open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
+ my $answer = <$tty_fh>;
+ close $tty_fh;
+ my $run_yes = ($answer =~ /^\s*y/i);
+ if (not $run_yes) {
+ $command = "true"; # Run the command 'true'
+ }
+ } else {
+ print $Global::original_stderr "$command\n";
+ }
+ return $command;
+}
+
+sub print_dryrun_and_verbose {
+ # For $opt::ungroup we print these ASAP
+ # For $opt::group they are part of print()
+ my $stdout_fh = shift;
+ my $job = shift;
+ my $command = shift;
+ if($opt::dryrun or $Global::verbose) {
+ if($Global::verbose <= 1) {
+ print $stdout_fh $job->replaced(),"\n";
+ } else {
+ # Verbose level > 1: Print the rsync and stuff
+ print $stdout_fh $command,"\n";
+ }
+ }
+}
+
+{
+ my $tmuxsocket;
+
+ sub tmux_wrap {
+ # Wrap command with tmux for session pPID
+ # Input:
+ # $actual_command = the actual command being run (incl ssh wrap)
+ my $self = shift;
+ my $actual_command = shift;
+ # Temporary file name. Used for fifo to communicate exit val
+ my $tmpfifo=::tmpname("tmx");
+
+ if(length($tmpfifo) >=100) {
+ ::error("tmux does not support sockets with path > 100\n");
+ ::wait_and_exit(255);
+ }
+ my $visual_command = $self->replaced();
+ my $title = $visual_command;
+ if($visual_command =~ /\0/) {
+ ::error("Command line contains NUL. tmux is confused by NUL.\n");
+ ::wait_and_exit(255);
+ }
+ # ; causes problems
+ # ascii 194-245 annoys tmux
+ $title =~ tr/[\011-\016;\302-\365]//d;
+ $title = ::shell_quote_scalar($title);
+
+ my $l_act = length($actual_command);
+ my $l_tit = length($title);
+ my $l_fifo = length($tmpfifo);
+ # The line to run contains a 118 chars extra code + the title 2x
+ my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
+
+ while($l_tit < 1000 and
+ (
+ (890 < $l_tot and $l_tot < 1350)
+ or
+ (9250 < $l_tot and $l_tot < 9800)
+ )) {
+ # tmux blocks for certain lengths:
+ # 900 < title + command < 1200
+ # 9250 < title + command < 9800
+ # but only if title < 1000, so expand the title with 75 spaces
+ # The measured lengths are:
+ # 996 < (title + whole command) < 1127
+ # 9331 < (title + whole command) < 9636
+ $title = $title.('\ 'x75);
+ $l_tit = length($title);
+ $l_tot = 2 * $l_tit + $l_act + $l_fifo;
+ }
+
+ my $tmux;
+ $ENV{'TMUX'} ||= "tmux";
+ if(not $tmuxsocket) {
+ $tmuxsocket = ::tmpname("tms");
+ ::status("See output with: $ENV{'TMUX'} -S $tmuxsocket attach\n");
+ }
+ # TODO sh -c wrapper for >&
+ $tmux = $ENV{'TMUX'}." -S $tmuxsocket new-session -s p$$ -d 'sleep .2' >&/dev/null;" .
+ $ENV{'TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title";
+
+ ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
+ $Limits::Command::line_max_len, " tot ",
+ $l_tot, "\n");
+
+ return "mkfifo $tmpfifo && $tmux ".
+ # Run in tmux
+ ::shell_quote_scalar
+ (
+ "(".$actual_command.');'.
+ # The triple print is needed - otherwise the testsuite fails
+ q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&".
+ "echo $title; echo \007Job finished at: `date`;sleep 10"
+ ).
+ # Run outside tmux
+ # Read a / separated line: 0h/2 for csh, 2/0 for bash.
+ # If csh the first will be 0h, so use the second as exit value.
+ # Otherwise just use the first value as exit value.
+ q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
+ }
+}
+
+sub is_already_in_results {
+ # Do we already have results for this job?
+ # Returns:
+ # $job_already_run = bool whether there is output for this or not
+ my $job = $_[0];
+ my $args_as_dirname = $job->{'commandline'}->args_as_dirname();
+ # prefix/name1/val1/name2/val2/
+ my $dir = $opt::results."/".$args_as_dirname;
+ ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n");
+ return -e "$dir/stdout";
}
sub is_already_in_joblog {
@@ -3621,7 +7053,7 @@ sub should_be_retried {
# 0 - do not retry
# 1 - job queued for retry
my $self = shift;
- if (not $::opt_retries) {
+ if (not $opt::retries) {
return 0;
}
if(not $self->exitstatus()) {
@@ -3631,15 +7063,41 @@ sub should_be_retried {
} else {
# The job failed. Should it be retried?
$self->add_failed_here();
- if($self->total_failed() == $::opt_retries) {
+ if($self->total_failed() == $opt::retries) {
# This has been retried enough
return 0;
} else {
# This command should be retried
+ $self->set_endtime(undef);
+ $self->reset_exitstatus();
$Global::JobQueue->unget($self);
- ::debug("Retry ".$self->seq()."\n");
+ ::debug("run", "Retry ", $self->seq(), "\n");
return 1;
}
+}
+}
+
+{
+ my (%print_later,$job_end_sequence);
+
+ sub print_earlier_jobs {
+ # Print jobs completed earlier
+ # Returns: N/A
+ my $job = shift;
+ $print_later{$job->seq()} = $job;
+ $job_end_sequence ||= 1;
+ ::debug("run", "Looking for: $job_end_sequence ",
+ "Current: ", $job->seq(), "\n");
+ for(my $j = $print_later{$job_end_sequence};
+ $j or vec($Global::job_already_run,$job_end_sequence,1);
+ $job_end_sequence++,
+ $j = $print_later{$job_end_sequence}) {
+ ::debug("run", "Found job end $job_end_sequence");
+ if($j) {
+ $j->print();
+ delete $print_later{$job_end_sequence};
+ }
+ }
}
}
@@ -3648,95 +7106,303 @@ sub print {
# Returns: N/A
my $self = shift;
- ::debug(">>joboutput ".$self->replaced()."\n");
- # Only relevant for grouping
- $Global::grouped or return;
- my $out = $self->stdout();
- my $err = $self->stderr();
- my $command = $self->sshlogin_wrap();
-
- if($Global::joblog) {
- my $cmd;
- if($Global::verbose <= 1) {
- $cmd = $self->replaced();
- } else {
- # Verbose level > 1: Print the rsync and stuff
- $cmd = $command;
- }
- printf $Global::joblog
- join("\t", $self->seq(), $self->sshlogin()->string(),
- $self->starttime(), $self->runtime(),
- $self->transfersize(), $self->returnsize(),
- $self->exitstatus(), $self->exitsignal(), $cmd
- ). "\n";
- flush $Global::joblog;
- $self->set_job_in_joblog();
+ ::debug("print", ">>joboutput ", $self->replaced(), "\n");
+ if($opt::dryrun) {
+ # Nothing was printed to this job:
+ # cleanup tmp files if --files was set
+ unlink $self->fh(1,"name");
}
-
- if(($::opt_dryrun or $Global::verbose) and $Global::grouped) {
- if($Global::verbose <= 1) {
- print STDOUT $self->replaced(),"\n";
- } else {
- # Verbose level > 1: Print the rsync and stuff
- print STDOUT $command,"\n";
- }
- # If STDOUT and STDERR are merged,
- # we want the command to be printed first
- # so flush to avoid STDOUT being buffered
- flush STDOUT;
- }
- seek $err, 0, 0;
- if($Global::debug) {
- print STDERR "ERR:\n";
- }
- if($::opt_tag or defined $::opt_tagstring) {
- my $tag = $self->tag();
- while(<$err>) {
- print STDERR $tag,$_;
- }
+ if($opt::pipe and $self->virgin()) {
+ # Skip --joblog, --dryrun, --verbose
} else {
- my $buf;
- while(sysread($err,$buf,1000_000)) {
- print STDERR $buf;
+ if($opt::ungroup and $Global::joblog and defined $self->{'exitstatus'}) {
+ # Add to joblog when finished
+ $self->print_joblog();
+ # Printing is only relevant for grouped/--line-buffer output.
+ $opt::ungroup and return;
+ }
+
+ # Check for disk full
+ ::exit_if_disk_full();
+
+ if(($opt::dryrun or $Global::verbose)
+ and
+ not $self->{'verbose_printed'}) {
+ $self->{'verbose_printed'}++;
+ if($Global::verbose <= 1) {
+ print STDOUT $self->replaced(),"\n";
+ } else {
+ # Verbose level > 1: Print the rsync and stuff
+ print STDOUT $self->wrapped(),"\n";
+ }
+ # If STDOUT and STDERR are merged,
+ # we want the command to be printed first
+ # so flush to avoid STDOUT being buffered
+ flush STDOUT;
}
}
- flush STDERR;
-
- if($::opt_files) {
- print STDOUT $self->{'stdoutfilename'},"\n";
- } else {
- my $buf;
- seek $out, 0, 0;
- if($Global::debug) {
- print STDOUT "OUT:\n";
+ for my $fdno (sort { $a <=> $b } keys %Global::fd) {
+ # Sort by file descriptor numerically: 1,2,3,..,9,10,11
+ $fdno == 0 and next;
+ my $out_fd = $Global::fd{$fdno};
+ my $in_fh = $self->fh($fdno,"r");
+ if(not $in_fh) {
+ if(not $Job::file_descriptor_warning_printed{$fdno}++) {
+ # ::warning("File descriptor $fdno not defined\n");
+ }
+ next;
}
- if($::opt_tag or defined $::opt_tagstring) {
+ ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
+ if($opt::files) {
+ $self->files_print($fdno,$in_fh,$out_fd);
+ } elsif($opt::linebuffer) {
+ # Line buffered print out
+ $self->linebuffer_print($fdno,$in_fh,$out_fd);
+ } elsif($opt::tag or defined $opt::tagstring) {
+ $self->tag_print($fdno,$in_fh,$out_fd);
+ } else {
+ $self->normal_print($fdno,$in_fh,$out_fd);
+ }
+ flush $out_fd;
+ }
+ ::debug("print", "<{'exitstatus'}
+ and not ($self->virgin() and $opt::pipe)) {
+ # Add to joblog when finished
+ $self->print_joblog();
+ }
+}
+
+sub files_print {
+ my $self = shift;
+ my ($fdno,$in_fh,$out_fd) = @_;
+
+ # If the job is dead: close printing fh. Needed for --compress
+ close $self->fh($fdno,"w");
+ if($? and $opt::compress) {
+ ::error($opt::compress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+ if($opt::compress) {
+ # Kill the decompressor which will not be needed
+ CORE::kill "TERM", $self->fh($fdno,"rpid");
+ }
+ close $in_fh;
+
+ if($opt::pipe and $self->virgin()) {
+ # Nothing was printed to this job:
+ # cleanup unused tmp files if --files was set
+ for my $fdno (1,2) {
+ unlink $self->fh($fdno,"name");
+ unlink $self->fh($fdno,"unlink");
+ }
+ } elsif($fdno == 1 and $self->fh($fdno,"name")) {
+ print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
+ $self->add_returnsize(-s $self->fh($fdno,"name"));
+ }
+}
+
+sub linebuffer_print {
+ my $self = shift;
+ my ($fdno,$in_fh,$out_fd) = @_;
+ my $partial = \$self->{'partial_line',$fdno};
+
+ if(defined $self->{'exitstatus'}) {
+ # If the job is dead: close printing fh. Needed for --compress
+ close $self->fh($fdno,"w");
+ if($? and $opt::compress) {
+ ::error($opt::compress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+ if($opt::compress) {
+ # Blocked reading in final round
+ $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
+ for my $fdno (1,2) {
+ my $fdr = $self->fh($fdno,'r');
+ my $flags;
+ fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
+ $flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags
+ fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
+ }
+ }
+ }
+ # This seek will clear EOF
+ seek $in_fh, tell($in_fh), 0;
+ # The read is non-blocking: The $in_fh is set to non-blocking.
+ # 32768 --tag = 5.1s
+ # 327680 --tag = 4.4s
+ # 1024000 --tag = 4.4s
+ # 3276800 --tag = 4.3s
+ # 10240000 --tag = 4.3s
+ # 32768000 --tag = 4.7s
+ my $outputlength = 0;
+ while(read($in_fh,substr($$partial,length $$partial),3276800)) {
+ # Append to $$partial
+ # Find the last \n
+ my $i = ::rindex64($partial,"\n");
+ if($i != -1) {
+ # One or more complete lines were found
+ if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) {
+ # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
+ # This is a crappy way of ignoring it.
+ $$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//;
+ # Length of partial line has changed: Find the last \n again
+ $i = ::rindex64($partial,"\n");
+ }
+ $outputlength += $i+1;
+ if($opt::tag or defined $opt::tagstring) {
+ # Replace ^ with $tag within the full line
+ my $tag = $self->tag();
+ substr($$partial,0,$i+1) =~ s/^/$tag/gm;
+ # Length of partial line has changed: Find the last \n again
+ $i = ::rindex64($partial,"\n");
+ }
+ # Print up to and including the last \n
+ print $out_fd substr($$partial,0,$i+1);
+ # Remove the printed part
+ substr($$partial,0,$i+1) = "";
+ }
+ }
+ $self->add_returnsize($outputlength);
+ if(defined $self->{'exitstatus'}) {
+ # If the job is dead: print the remaining partial line
+ # read remaining
+ $self->add_returnsize(length $$partial);
+ if($$partial and ($opt::tag or defined $opt::tagstring)) {
my $tag = $self->tag();
- while(<$out>) {
- print STDOUT $tag,$_;
- }
+ $$partial =~ s/^/$tag/gm;
+ }
+ print $out_fd $$partial;
+ # Release the memory
+ $$partial = undef;
+ if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) {
+ # decompress still running
} else {
- my $buf;
- while(sysread($out,$buf,1000_000)) {
- print STDOUT $buf;
+ # decompress done: close fh
+ close $in_fh;
+ if($? and $opt::compress) {
+ ::error($opt::decompress_program." failed.\n");
+ $self->set_exitstatus(255);
}
}
- flush STDOUT;
- ::debug("<fh($fdno,"w");
+ if($? and $opt::compress) {
+ ::error($opt::compress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+ seek $in_fh, 0, 0;
+ # $in_fh is now ready for reading at position 0
+ my $tag = $self->tag();
+ if($fdno == 2) {
+ # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
+ # This is a crappy way of ignoring it.
+ while(<$in_fh>) {
+ if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) {
+ # Skip
+ } else {
+ $self->add_returnsize(length $_);
+ print $out_fd $tag,$_;
+ }
+ # At most run the loop once
+ last;
+ }
+ }
+ my $outputlength = 0;
+ while(<$in_fh>) {
+ print $out_fd $tag,$_;
+ $outputlength += length $_;
+ }
+ if($fdno == 1) {
+ $self->add_returnsize($outputlength);
+ }
+ close $in_fh;
+ if($? and $opt::compress) {
+ ::error($opt::decompress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+}
+
+sub normal_print {
+ my $self = shift;
+ my ($fdno,$in_fh,$out_fd) = @_;
+ my $buf;
+ close $self->fh($fdno,"w");
+ if($? and $opt::compress) {
+ ::error($opt::compress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+ seek $in_fh, 0, 0;
+ # $in_fh is now ready for reading at position 0
+ if($fdno == 2) {
+ # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
+ # This is a crappy way of ignoring it.
+ sysread($in_fh,$buf,1_000);
+ $buf =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//;
+ print $out_fd $buf;
+ $self->add_returnsize(length $buf);
+ }
+ my $outputlength = 0;
+ while(sysread($in_fh,$buf,32768)) {
+ print $out_fd $buf;
+ $outputlength += length $buf;
+ }
+ if($fdno == 1) {
+ $self->add_returnsize($outputlength);
+ }
+ close $in_fh;
+ if($? and $opt::compress) {
+ ::error($opt::decompress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+}
+
+sub print_joblog {
+ my $self = shift;
+ my $cmd;
+ if($Global::verbose <= 1) {
+ $cmd = $self->replaced();
+ } else {
+ # Verbose level > 1: Print the rsync and stuff
+ $cmd = "@command";
+ }
+ print $Global::joblog
+ join("\t", $self->seq(), $self->sshlogin()->string(),
+ $self->starttime(), sprintf("%10.3f",$self->runtime()),
+ $self->transfersize(), $self->returnsize(),
+ $self->exitstatus(), $self->exitsignal(), $cmd
+ ). "\n";
+ flush $Global::joblog;
+ $self->set_job_in_joblog();
}
sub tag {
my $self = shift;
- if(not defined $self->{'tag'}) {
- $self->{'tag'} = $self->{'commandline'}->
- replace_placeholders($::opt_tagstring,0)."\t";
+ if($opt::tag or defined $opt::tagstring) {
+ if(not defined $self->{'tag'}) {
+ $self->{'tag'} = $self->{'commandline'}->
+ replace_placeholders([$opt::tagstring],0,0)."\t";
+ }
+ } else {
+ return "";
}
return $self->{'tag'};
}
+sub hostgroups {
+ my $self = shift;
+ if(not defined $self->{'hostgroups'}) {
+ $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
+ }
+ return @{$self->{'hostgroups'}};
+}
+
sub exitstatus {
my $self = shift;
return $self->{'exitstatus'};
@@ -3755,6 +7421,11 @@ sub set_exitstatus {
}
}
+sub reset_exitstatus {
+ my $self = shift;
+ $self->{'exitstatus'} = undef;
+}
+
sub exitsignal {
my $self = shift;
return $self->{'exitsignal'};
@@ -3767,70 +7438,96 @@ sub set_exitsignal {
}
+{
+ my $status_printed;
+
+ sub should_we_halt {
+ # Should we halt? Immediately? Gracefully?
+ # Returns: N/A
+ my $job = shift;
+ if($job->exitstatus() or $job->exitsignal()) {
+ $Global::exitstatus++;
+ $Global::total_failed++;
+ if($opt::halt) {
+ if($opt::halt == 1
+ or
+ ($opt::halt > 0 and $opt::halt < 1 and $Global::total_failed > 3
+ and
+ $Global::total_failed / $Global::total_started > $opt::halt)) {
+ # If halt on error == 1 or --halt 10%
+ # we should gracefully exit
+ ::status
+ ("$Global::progname: Starting no more jobs. ",
+ "Waiting for ", scalar(keys %Global::running),
+ " jobs to finish. This job failed:\n",
+ $job->replaced(),"\n");
+ $Global::start_no_new_jobs ||= 1;
+ $Global::halt_exitstatus = $job->exitstatus();
+ } elsif($opt::halt == 2) {
+ # If halt on error == 2 we should exit immediately
+ if(not $status_printed++) {
+ ::status
+ ("$Global::progname: This job failed:\n",
+ $job->replaced(),"\n");
+ }
+ ::killall();
+ ::wait_and_exit($job->exitstatus());
+ }
+ }
+ } else {
+ if($opt::halt) {
+ if($opt::halt == -1) {
+ # If halt on error == -1
+ # we should gracefully exit
+ ::status
+ ("$Global::progname: Starting no more jobs. ",
+ "Waiting for ", scalar(keys %Global::running),
+ " jobs to finish. This job succeeded:\n",
+ $job->replaced(),"\n");
+ $Global::start_no_new_jobs ||= 1;
+ $Global::halt_exitstatus = $job->exitstatus();
+ } elsif($opt::halt == -2) {
+ # If halt on error == -2 we should exit immediately
+ ::status
+ ("$Global::progname: This job succeeded:\n",
+ $job->replaced(),"\n");
+ ::killall();
+ ::wait_and_exit($job->exitstatus());
+ }
+ }
+ }
+ }
+}
+
package CommandLine;
sub new {
my $class = shift;
my $seq = shift;
- my $command = ::undef_as_empty(shift);
+ my $commandref = shift;
+ $commandref || die;
my $arg_queue = shift;
my $context_replace = shift;
- my $max_number_of_args = shift; # for -N and normal (-N1)
+ my $max_number_of_args = shift; # for -N and normal (-n1)
my $return_files = shift;
- my $len = {
- '{}' => 0, # Total length of all {} replaced with all args
- '{/}' => 0, # Total length of all {/} replaced with all args
- '{//}' => 0, # Total length of all {//} replaced with all args
- '{.}' => 0, # Total length of all {.} replaced with all args
- '{/.}' => 0, # Total length of all {/.} replaced with all args
- 'no_args' => undef, # Length of command w/ all replacement args removed
- 'context' => undef, # Length of context of an additional arg
- };
- my($sum,%replacecount);
- ($sum,$len->{'no_args'},$len->{'context'},$len->{'contextgroups'},
- %replacecount) = number_of_replacements($command,$context_replace);
- if($sum == 0) {
- if($command eq "") {
- $command = $Global::replace{'{}'};
- } else {
- # Add {} to the command if there are no {...}'s
- $command .=" ".$Global::replace{'{}'};
- }
- ($sum,$len->{'no_args'},$len->{'context'},$len->{'contextgroups'},
- %replacecount) = number_of_replacements($command,$context_replace);
- }
- if(defined $::opt_tagstring) {
- my ($dummy1,$dummy2,$dummy3,$dummy4,%repcount) =
- number_of_replacements($::opt_tagstring,$context_replace);
- # Merge %repcount with %replacecount to get the keys
- # for replacing replacement strings in $::opt_tagstring
- # The number, however, does not matter.
- for (keys %repcount) {
- $replacecount{$_} ||= 0;
- }
- }
-
- my %positional_replace;
- my %multi_replace;
- for my $used (keys %replacecount) {
- if($used =~ /^{(\d+)(\D*)}$/) {
- $positional_replace{$1} = '\{'.$2.'\}';
- } else {
- $multi_replace{$used} = $used;
- }
+ my $replacecount_ref = shift;
+ my $len_ref = shift;
+ my %replacecount = %$replacecount_ref;
+ my %len = %$len_ref;
+ for (keys %$replacecount_ref) {
+ # Total length of this replacement string {} replaced with all args
+ $len{$_} = 0;
}
return bless {
- 'command' => $command,
+ 'command' => $commandref,
'seq' => $seq,
- 'len' => $len,
+ 'len' => \%len,
'arg_list' => [],
'arg_queue' => $arg_queue,
'max_number_of_args' => $max_number_of_args,
'replacecount' => \%replacecount,
'context_replace' => $context_replace,
'return_files' => $return_files,
- 'positional_replace' => \%positional_replace,
- 'multi_replace' => \%multi_replace,
'replaced' => undef,
}, ref($class) || $class;
}
@@ -3840,23 +7537,69 @@ sub seq {
return $self->{'seq'};
}
+sub set_seq {
+ my $self = shift;
+ $self->{'seq'} = shift;
+}
+
+{
+ my $max_slot_number;
+
+ sub slot {
+ # Find the number of a free job slot and return it
+ # Uses:
+ # @Global::slots - list with free jobslots
+ # Returns:
+ # $jobslot = number of jobslot
+ my $self = shift;
+ if(not $self->{'slot'}) {
+ if(not @Global::slots) {
+ # $Global::max_slot_number will typically be $Global::max_jobs_running
+ push @Global::slots, ++$max_slot_number;
+ }
+ $self->{'slot'} = shift @Global::slots;
+ }
+ return $self->{'slot'};
+ }
+}
+
sub populate {
# Add arguments from arg_queue until the number of arguments or
# max line length is reached
+ # Uses:
+ # $Global::minimal_command_line_length
+ # $opt::cat
+ # $opt::fifo
+ # $Global::JobQueue
+ # $opt::m
+ # $opt::X
+ # $CommandLine::already_spread
+ # $Global::max_jobs_running
+ # Returns: N/A
my $self = shift;
- if($::opt_pipe) {
- # Do no read any args
- $self->push([Arg->new("")]);
- return;
- }
my $next_arg;
+ my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length();
+
+ if($opt::cat) {
+ # $PARALLEL_TMP will point to a tempfile that will be used as {}
+ $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
+ unget([Arg->new('$PARALLEL_TMP')]);
+ }
+ if($opt::fifo) {
+ # $PARALLEL_TMP will point to a tempfile that will be used as {}
+ $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
+ unget([Arg->new('$PARALLEL_TMP')]);
+ }
while (not $self->{'arg_queue'}->empty()) {
$next_arg = $self->{'arg_queue'}->get();
if(not defined $next_arg) {
next;
}
$self->push($next_arg);
- if($self->len() >= Limits::Command::max_length()) {
+ if($self->len() >= $max_len) {
+ # Command length is now > max_length
+ # If there are arguments: remove the last
+ # If there are no arguments: Error
# TODO stuff about -x opt_x
if($self->number_of_args() > 1) {
# There is something to work on
@@ -3864,13 +7607,15 @@ sub populate {
last;
} else {
my $args = join(" ", map { $_->orig() } @$next_arg);
- print STDERR ("$Global::progname: Command line too ",
- "long (", $self->len(), " >= ",
- Limits::Command::max_length(),
- ") at number ",
- $self->{'arg_queue'}->arg_number(),
- ": ".
- (substr($args,0,50))."...\n");
+ ::error("Command line too long (",
+ $self->len(), " >= ",
+ $max_len,
+ ") at input ",
+ $self->{'arg_queue'}->arg_number(),
+ ": ".
+ ((length $args > 50) ?
+ (substr($args,0,50))."...\n" :
+ $args."\n"));
$self->{'arg_queue'}->unget($self->pop());
::wait_and_exit(255);
}
@@ -3882,11 +7627,11 @@ sub populate {
}
}
}
- if(($::opt_m or $::opt_X) and not $CommandLine::already_spread
+ if(($opt::m or $opt::X) and not $CommandLine::already_spread
and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
# -m or -X and EOF => Spread the arguments over all jobslots
# (unless they are already spread)
- $CommandLine::already_spread++;
+ $CommandLine::already_spread ||= 1;
if($self->number_of_args() > 1) {
$self->{'max_number_of_args'} =
::ceil($self->number_of_args()/$Global::max_jobs_running);
@@ -3902,29 +7647,22 @@ sub populate {
sub push {
# Add one or more records as arguments
+ # Returns: N/A
my $self = shift;
my $record = shift;
push @{$self->{'arg_list'}}, $record;
- #::my_dump($record);
- my $arg_no = ($self->number_of_args()-1) * ($#$record+1);
+ my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
+ my $rep;
for my $arg (@$record) {
- $arg_no++;
if(defined $arg) {
- if($self->{'positional_replace'}{$arg_no}) {
- # TODO probably bug here if both {1.} and {1} are used
- for my $used (keys %{$self->{'replacecount'}}) {
- # {} {/} {//} {.} or {/.}
- my $replacementfunction =
- $self->{'positional_replace'}{$arg_no};
- # Find the single replacements
- $self->{'len'}{$used} +=
- length $arg->replace($replacementfunction);
- }
- }
- for my $used (keys %{$self->{'multi_replace'}}) {
- # Add to the multireplacement
- $self->{'len'}{$used} += length $arg->replace($used);
+ for my $perlexpr (keys %{$self->{'replacecount'}}) {
+ # 50% faster than below
+ $self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self);
+ # $rep = $arg->replace($perlexpr,$quote_arg,$self);
+ # $self->{'len'}{$perlexpr} += length $rep;
+ # ::debug("length", "Length: ", length $rep,
+ # "(", $perlexpr, "=>", $rep, ")\n");
}
}
}
@@ -3932,13 +7670,16 @@ sub push {
sub pop {
# Remove last argument
+ # Returns:
+ # the last record
my $self = shift;
my $record = pop @{$self->{'arg_list'}};
+ my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
for my $arg (@$record) {
if(defined $arg) {
- for my $replacement_string (keys %{$self->{'replacecount'}}) {
- $self->{'len'}{$replacement_string} -=
- length $arg->replace($replacement_string);
+ for my $perlexpr (keys %{$self->{'replacecount'}}) {
+ $self->{'len'}{$perlexpr} -=
+ length $arg->replace($perlexpr,$quote_arg,$self);
}
}
}
@@ -3946,7 +7687,9 @@ sub pop {
}
sub pop_all {
- # Remove all arguments
+ # Remove all arguments and zeros the length of replacement strings
+ # Returns:
+ # all records
my $self = shift;
my @popped = @{$self->{'arg_list'}};
for my $replacement_string (keys %{$self->{'replacecount'}}) {
@@ -3957,11 +7700,53 @@ sub pop_all {
}
sub number_of_args {
+ # The number of records
+ # Returns:
+ # number of records
my $self = shift;
- # This is really number of records
+ # Ftq rudef oaawuq ime dqxqmeqp az 2011-01-24 mzp ime iaz nk MQhmd
+ # Mdzrvadp Nvmdymeaz az 2011-04-10. Ftue oaawuq dqxqmeqp az
+ # 2013-08-18 ue m nuf tmdpqd me kag tmhq fa geq daf14. Bxqmeq
+ # qymux oaawuq@fmzsq.pw itqz kag dqmp ftue.
+ #
+ # U my ftq ymuzfmuzqd ar m buqoq ar rdqq earfimdq omxxqp SZG
+ # Bmdmxxqx. Rdqq earfimdq sgmdmzfqqe kag mooqee fa ftq eagdoq
+ # oapq, ngf U tmhq nqqz iazpqduzs tai ymzk mofgmxxk _dqmp_ ftq
+ # eagdoq oapq.
+ #
+ # Fa fqef ftue U bgf uz m oayyqzf fqxxuzs bqabxq fa qymux yq itqz
+ # ftqk dqmp ftue. Ftq oayyqzf ime bgf uz m eqofuaz ar ftq oapq
+ # ftmf za azq iagxp xaaw fa ruj ad uybdahq ftq earfimdq - ea ftq
+ # eagdoq oapq qcguhmxqzf fa m pgefk oadzqd. Fa ymwq egdq ftq
+ # oayyqzf iagxp zaf etai gb ur eayq azq vgef sdqbbqp ftdagst ftq
+ # eagdoq oapq U daf13'qp ftq eagdoq oapq
+ # tffb://qz.iuwubqpum.ads/iuwu/DAF13
+ #
+ # 2.5 yazfte xmfqd U dqoquhqp mz qymux rday eayqazq ita zaf azxk
+ # ymzmsqp fa ruzp ftq oayyqzf, ngf mxea ymzmsqp fa sgqee ftq oapq
+ # tmp fa nq daf13'qp.
+ #
+ # Ftue nduzse yq fa ftq oazoxgeuaz ftmf ftqdq _mdq_ bqabxq, ita
+ # mdq zaf mrruxumfqp iuft ftq bdavqof, ftmf iuxx dqmp ftq eagdoq
+ # oapq - ftagst uf ymk zaf tmbbqz hqdk arfqz.
+ #
+ # This is really the number of records
return $#{$self->{'arg_list'}}+1;
}
+sub number_of_recargs {
+ # The number of args in records
+ # Returns:
+ # number of args records
+ my $self = shift;
+ my $sum = 0;
+ my $nrec = scalar @{$self->{'arg_list'}};
+ if($nrec) {
+ $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
+ }
+ return $sum;
+}
+
sub args_as_string {
# Returns:
# all unmodified arguments joined with ' ' (similar to {})
@@ -3970,356 +7755,292 @@ sub args_as_string {
map { @$_ } @{$self->{'arg_list'}});
}
+sub args_as_dirname {
+ # Returns:
+ # all unmodified arguments joined with '/' (similar to {})
+ # \t \0 \\ and / are quoted as: \t \0 \\ \_
+ # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
+ my $self = shift;
+ my @res = ();
+
+ for my $rec_ref (@{$self->{'arg_list'}}) {
+ # If headers are used, sort by them.
+ # Otherwise keep the order from the command line.
+ my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
+ for my $n (@header_indexes_sorted) {
+ CORE::push(@res,
+ $Global::input_source_header{$n},
+ map { my $s = $_;
+ # \t \0 \\ and / are quoted as: \t \0 \\ \_
+ $s =~ s/\\/\\\\/g;
+ $s =~ s/\t/\\t/g;
+ $s =~ s/\0/\\0/g;
+ $s =~ s:/:\\_:g;
+ if($Global::max_file_length) {
+ # Keep each subdir shorter than the longest
+ # allowed file name
+ $s = substr($s,0,$Global::max_file_length);
+ }
+ $s; }
+ $rec_ref->[$n-1]->orig());
+ }
+ }
+ return join "/", @res;
+}
+
+sub header_indexes_sorted {
+ # Sort headers first by number then by name.
+ # E.g.: 1a 1b 11a 11b
+ # Returns:
+ # Indexes of %Global::input_source_header sorted
+ my $max_col = shift;
+
+ no warnings 'numeric';
+ for my $col (1 .. $max_col) {
+ # Make sure the header is defined. If it is not: use column number
+ if(not defined $Global::input_source_header{$col}) {
+ $Global::input_source_header{$col} = $col;
+ }
+ }
+ my @header_indexes_sorted = sort {
+ # Sort headers numerically then asciibetically
+ $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
+ or
+ $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
+ } 1 .. $max_col;
+ return @header_indexes_sorted;
+}
+
sub len {
+ # Uses:
+ # $opt::shellquote
# The length of the command line with args substituted
my $self = shift;
my $len = 0;
# Add length of the original command with no args
- $len += $self->{'len'}{'no_args'};
+ # Length of command w/ all replacement args removed
+ $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
+ ::debug("length", "noncontext + command: $len\n");
+ my $recargs = $self->number_of_recargs();
if($self->{'context_replace'}) {
- $len += $self->number_of_args()*$self->{'len'}{'context'};
+ # Context is duplicated for each arg
+ $len += $recargs * $self->{'len'}{'context'};
for my $replstring (keys %{$self->{'replacecount'}}) {
- if(defined $self->{'len'}{$replstring}) {
- $len += $self->{'len'}{$replstring} *
- $self->{'replacecount'}{$replstring};
- }
+ # If the replacements string is more than once: mulitply its length
+ $len += $self->{'len'}{$replstring} *
+ $self->{'replacecount'}{$replstring};
+ ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
+ $self->{'replacecount'}{$replstring}, "\n");
}
- $len += ($self->number_of_args()-1) * $self->{'len'}{'contextgroups'};
+ # echo 11 22 33 44 55 66 77 88 99 1010
+ # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
+ # 5 + ctxgrp*arg
+ ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
+ " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
+ # Add space between context groups
+ $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
} else {
# Each replacement string may occur several times
# Add the length for each time
+ $len += 1*$self->{'len'}{'context'};
+ ::debug("length", "context+noncontext + command: $len\n");
for my $replstring (keys %{$self->{'replacecount'}}) {
- if(defined $self->{'len'}{$replstring}) {
- $len += $self->{'len'}{$replstring} *
- $self->{'replacecount'}{$replstring};
- }
- if($Global::replace{$replstring}) {
- # This is a multi replacestring ({} {/} {//} {.} {/.})
- # Add each space between two arguments
- my $number_of_args = ($#{$self->{'arg_list'}[0]}+1) *
- $self->number_of_args();
- $len += ($number_of_args-1) *
- $self->{'replacecount'}{$replstring};
- }
+ # (space between regargs + length of replacement)
+ # * number this replacement is used
+ $len += ($recargs -1 + $self->{'len'}{$replstring}) *
+ $self->{'replacecount'}{$replstring};
}
}
- if($::opt_nice) {
+ if($opt::nice) {
# Pessimistic length if --nice is set
# Worse than worst case: every char needs to be quoted with \
$len *= 2;
}
- if($::opt_shellquote) {
+ if($Global::quoting) {
+ # Pessimistic length if -q is set
+ # Worse than worst case: every char needs to be quoted with \
+ $len *= 2;
+ }
+ if($opt::shellquote) {
# Pessimistic length if --shellquote is set
# Worse than worst case: every char needs to be quoted with \ twice
$len *= 4;
}
+ # If we are using --env, add the prefix for that, too.
+ $len += $Global::envvarlen;
+
return $len;
}
-sub multi_regexp {
- if(not $CommandLine::multi_regexp) {
- $CommandLine::multi_regexp =
- "(?:".
- join("|",map {my $a=$_; $a =~ s/(\W)/\\$1/g; $a}
- ($Global::replace{"{}"},
- $Global::replace{"{.}"},
- $Global::replace{"{/}"},
- $Global::replace{"{//}"},
- $Global::replace{"{/.}"})
- ).")";
- }
- return $CommandLine::multi_regexp;
-}
-
-sub number_of_replacements {
- # Returns:
- # sum_of_count, length_of_command_with_no_args,
- # length_of_context { 'replacementstring' => count }
- my $command = shift;
- my $context_replace = shift;
- my %count = ();
- my $sum = 0;
- my $cmd = $command;
- my $multi_regexp = multi_regexp();
- my $replacement_regexp =
- "(?:". ::maybe_quote('\{') .
- '\d+(?:|\.|/\.|/|//)?' . # {n} {n.} {n/.} {n/} {n//}
- ::maybe_quote('\}') .
- '|'.
- join("|",map {$a=$_;$a=~s/(\W)/\\$1/g; $a} values %Global::replace).
- ")";
- my %c = ();
- $cmd =~ s/($replacement_regexp)/$c{$1}++;"\0"/ogex;
- for my $k (keys %c) {
- if(defined $Global::replace_rev{$k}) {
- $count{$Global::replace_rev{$k}} = $c{$k};
- } else {
- $count{::maybe_unquote($k)} = $c{$k};
- }
- $sum += $c{$k};
- }
- my $number_of_context_groups = 0;
- my $no_args;
- my $context;
- if($context_replace) {
- $cmd = $command;
- while($cmd =~ s/\S*$multi_regexp\S*//o) {
- $number_of_context_groups++;
- }
- $no_args = length $cmd;
- $context = length($command) - $no_args;
- } else {
- $cmd = $command;
- $cmd =~ s/$multi_regexp//go;
- $cmd =~ s/$replacement_regexp//go;
- $no_args = length($cmd);
- $context = length($command) - $no_args;
- }
- for my $k (keys %count) {
- if(defined $Global::replace{$k}) {
- # {} {/} {//} {.} {/.} {#}
- $context -= (length $Global::replace{$k}) * $count{$k};
- } else {
- # {n}
- $context -= (length $k) * $count{$k};
- }
- }
- return ($sum,$no_args,$context,$number_of_context_groups,%count);
-}
-
sub replaced {
+ # Uses:
+ # $Global::noquote
+ # $Global::quoting
+ # Returns:
+ # $replaced = command with place holders replaced and prepended
my $self = shift;
if(not defined $self->{'replaced'}) {
- $self->{'replaced'} = $self->replace_placeholders($self->{'command'},0);
- if($self->{'replaced'} =~ /^\s*(-\S+)/) {
- # Is this really a command in $PATH starting with '-'?
- my $cmd = $1;
- if(not grep { -e $_."/".$cmd } split(":",$ENV{'PATH'})) {
- print STDERR "parallel: Error:"
- . " Command ($cmd) starts with '-'."
- . " Is this a wrong option?\n";
- ::wait_and_exit(255);
- }
+ # Don't quote arguments if the input is the full command line
+ my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
+ # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
+ $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
+ $self->{'replaced'} = $self->
+ replace_placeholders($self->{'command'},$Global::quoting,
+ $quote_arg);
+ my $len = length $self->{'replaced'};
+ if ($len != $self->len()) {
+ ::debug("length", $len, " != ", $self->len(),
+ " ", $self->{'replaced'}, "\n");
+ } else {
+ ::debug("length", $len, " == ", $self->len(),
+ " ", $self->{'replaced'}, "\n");
}
- if($::opt_nice) {
- # Prepend nice -n19 $SHELL -c
- # and quote
- $self->{'replaced'} = nice() ." -n" . $::opt_nice . " "
- . $ENV{SHELL}." -c "
- . ::shell_quote_scalar($self->{'replaced'});
- }
- if($::opt_shellquote) {
- # Prepend echo
- # and quote twice
- $self->{'replaced'} = "echo " .
- ::shell_quote_scalar(::shell_quote_scalar($self->{'replaced'}));
- }
- }
- if($::oodebug and length($self->{'replaced'}) != ($self->len())) {
- ::my_dump($self);
- Carp::cluck("replaced len=" . length($self->{'replaced'})
- . " computed=" . ($self->len()));
}
return $self->{'replaced'};
}
-sub nice {
- # Returns:
- # path to nice
- # Needed because tcsh's built-in nice does not support 'nice -n19'
- if(not $Global::path_to_nice) {
- $Global::path_to_nice = "nice";
- for my $n ((split/:/, $ENV{'PATH'}), "/bin", "/usr/bin") {
- if(-x $n."/nice") {
- $Global::path_to_nice = $n."/nice";
- last;
- }
- }
- }
- return $Global::path_to_nice;
-}
+{
+ my @target;
+ my $context_replace;
+ my @arg;
+ my $perl_expressions_as_re;
-sub replace_placeholders {
- my $self = shift;
- my $target = shift;
- my $quoteall = shift;
- my $context_replace = $self->{'context_replace'};
- my $replaced;
-
- if($self->{'context_replace'}) {
- $replaced = $self->context_replace_placeholders($target,$quoteall);
- } else {
- $replaced = $self->simple_replace_placeholders($target,$quoteall);
- }
- return $replaced;
-}
-
-sub context_replace_placeholders {
- my $self = shift;
- my $target = shift;
- my $quoteall = shift;
- # -X = context replace
- # maybe multiple input sources
- # maybe --xapply
- # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
-
- my @args=();
- my @used_multi;
- my %replace;
-
- for my $record (@{$self->{'arg_list'}}) {
- # Merge arguments from records into args for easy access
- CORE::push @args, @$record;
- }
-
- # Replacement functions
- my @rep = qw({} {/} {//} {.} {/.});
- # Inner part of replacement functions
- my @rep_inner = ('', '/', '//', '.', '/.');
- # Regexp for replacement functions
- my $rep_regexp = "(?:". join('|', map { $_=~s/(\W)/\\$1/g; $_} @rep) . ")";
- # Regexp for inner replacement functions
- my $rep_inner_regexp = "(?:". join('|', map { $_=~s/(\W)/\\$1/g; $_} @rep_inner) . ")";
- # Seq replace string: {#}
- my $rep_seq_regexp = '(?:'.::maybe_quote('\{\#\}').")";
- # Normal replace strings
- my $rep_str_regexp = multi_regexp();
- # Positional replace strings
- my $rep_str_pos_regexp = ::maybe_quote('\{').'\d+'.$rep_inner_regexp.::maybe_quote('\}');
-
- # Fish out the words that have replacement strings in them
- my $tt = $target;
- my %word;
- while($tt =~ s/(\S*(?:$rep_str_regexp|$rep_str_pos_regexp|$rep_seq_regexp)\S*)/\0/o) {
- $word{$1}++;
- }
- # For each word: Generate the replacement string for that word.
- for my $origword (keys %word) {
- my @pos_replacements=();
- my @replacements=();
- my $w;
- my $word = $origword; # Make a local modifyable copy
-
- # replace {#} if it exists
- $word =~ s/$rep_seq_regexp/$self->seq()/geo;
- if($word =~ /$rep_str_pos_regexp/o) {
- # There are positional replacement strings
- my @argset;
- if($#{$self->{'arg_list'}->[0]} == 0) {
- # Only one input source: Treat it as a set
- @argset = [ @args ];
+ sub fish_out_words_containing_replacement_strings {
+ my %word;
+ for (@target) {
+ my $tt = $_;
+ ::debug("replace", "Target: $tt");
+ # Command line template:
+ # a{1}b{}c{}d
+ # becomes:
+ # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d
+ # becomes:
+ # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d
+ # Input A B C (no context) becomes:
+ # A B C => aAbA B CcA B Cd
+ # Input A B C (context -X) becomes:
+ # A B C => aAbAcAd aAbBcBd aAbCcCd
+ if($context_replace) {
+ while($tt =~ s/([^\s\257]* # before {=
+ (?:
+ \257< # {=
+ [^\257]*? # The perl expression
+ \257> # =}
+ [^\s\257]* # after =}
+ )+)/ /x) {
+ # $1 = pre \257 perlexpr \257 post
+ $word{"$1"} ||= 1;
+ }
} else {
- @argset = @{$self->{'arg_list'}};
- }
- # Match 1..n where n = max args in a argset
- my $pos_regexp = "(?:".join("|", 1 .. $#{$argset[0]}+1).")";
- my $pos_inner_regexp = ::maybe_quote('\{') .
- "($pos_regexp)($rep_inner_regexp)" .
- ::maybe_quote('\}');
- for my $argset (@argset) {
- # Replace all positional arguments - e.g. {7/.}
- # with the replacement function - e.g. {/.}
- # of that argument
- if(defined $self->{'max_number_of_args'}) {
- # Fill up if we have a half completed line, so {n} will be empty
- while($#$argset < $self->{'max_number_of_args'}) {
- CORE::push @$argset, Arg->new("");
- }
- }
- $w = $word;
- $w =~ s/$pos_inner_regexp/$argset->[$1-1]->replace('{'.$2.'}')/geo;
- CORE::push @pos_replacements, $w;
- }
- }
- if(not @pos_replacements) {
- @pos_replacements = ($word);
- }
-
- if($word =~ m:$rep_str_regexp:) {
- # There are normal replacement strings
- for my $w (@pos_replacements) {
- for my $arg (@args) {
- my $wmulti = $w;
- $wmulti =~ s/($rep_str_regexp)/$arg->replace($Global::replace_rev{$1})/geo;
- CORE::push @replacements, $wmulti;
+ while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) {
+ # $f = \257 perlexpr \257
+ $word{$1} ||= 1;
}
}
}
- if(@replacements) {
- CORE::push @{$replace{$origword}}, @replacements;
- } else {
- CORE::push @{$replace{$origword}}, @pos_replacements;
+ return keys %word;
+ }
+
+ sub flatten_arg_list {
+ my $arglist_ref = shift;
+ @arg = ();
+ for my $record (@$arglist_ref) {
+ # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
+ # Merge arg-objects from records into @arg for easy access
+ CORE::push @arg, @$record;
}
+ # Add one arg if empty to allow {#} and {%} to be computed only once
+ if(not @arg) { @arg = (Arg->new("")); }
}
- # Substitute the replace strings with the replacement values
- # Must be sorted by length if a short word is a substring of a long word
- my $regexp = join('|', map { $_=~s/(\W)/\\$1/g; $_}
- sort { length $b <=> length $a } keys %word);
- $target =~ s/($regexp)/join(" ",@{$replace{$1}})/ge;
- return $target;
-}
-sub simple_replace_placeholders {
- # no context (no -X)
- # maybe multiple input sources
- # maybe --xapply
- my $self = shift;
- my $target = shift;
- my $quoteall = shift;
- my @args=();
- my @used_multi;
- my %replace;
+ sub replace_placeholders {
+ # Replace foo{}bar with fooargbar
+ # Input:
+ # $targetref = command as shell words
+ # $quote = should everything be quoted?
+ # $quote_arg = should replaced arguments be quoted?
+ # Returns:
+ # @target with placeholders replaced
+ my $self = shift;
+ my $targetref = shift;
+ my $quote = shift;
+ my $quote_arg = shift;
+ my %replace;
+ $context_replace = $self->{'context_replace'};
+ @target = @$targetref;
+ ::debug("replace", "Replace @target\n");
+ # -X = context replace
+ # maybe multiple input sources
+ # maybe --xapply
+ if(not @target) {
+ # @target is empty: Return empty array
+ return @target;
+ }
+ # Fish out the words that have replacement strings in them
+ my @word = fish_out_words_containing_replacement_strings();
+ flatten_arg_list($self->{'arg_list'});
- for my $record (@{$self->{'arg_list'}}) {
- # Merge arguments from records into args for easy access
- CORE::push @args, @$record;
- }
- # Which replace strings are used?
- # {#} {} {/} {//} {.} {/.} {n} {n/} {n//} {n.} {n/.}
- for my $used (keys %{$self->{'replacecount'}}) {
- # What are the replacement values for the replace strings?
- if(grep { $used eq $_ } qw({} {/} {//} {.} {/.})) {
- # {} {/} {//} {.} {/.}
- $replace{$Global::replace{$used}} =
- join(" ", map { $_->replace($used) } @args);
- } elsif($used =~ /^\{(\d+)(|\/|\/\/|\.|\/\.)\}$/) {
- # {n} {n/} {n//} {n.} {n/.}
- my $positional = $1; # number if any
- my $replacementfunction = "{".::undef_as_empty($2)."}"; # {} {/} {//} {.} or {/.}
- # If -q then the replacementstrings will be quoted, too
- # {1.} -> \{1.\}
- $Global::replace{$used} ||= ::maybe_quote($used);
- if(defined $args[$positional-1]) {
- # we have a matching argument for {n}
- $replace{$Global::replace{$used}} =
- $args[$positional-1]->replace($replacementfunction);
- } else {
- if($positional <= $self->{'max_number_of_args'}) {
- # Fill up if we have a half completed line
- $replace{$Global::replace{$used}} = "";
+ # Number of arguments - used for positional arguments
+ my $n = $#arg+1;
+
+ # This is actually a CommandLine-object,
+ # but it looks nice to be able to say {= $job->slot() =}
+ my $job = $self;
+ for my $word (@word) {
+ # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF
+ ::debug("replace", "Replacing in $word\n");
+ my $normal_replace;
+
+ # for each arg:
+ # replace replacement strings with replacement in the word value
+ # push to replace word value
+ $perl_expressions_as_re ||=
+ join("|", map {s/^-?\d+//; "\Q$_\E"} keys %{$self->{'replacecount'}});
+ for my $arg (@arg) {
+ my $val = $word;
+ # Replace {= perl expr =} with value for each arg
+ $val =~ s{\257<(-?\d+)?($perl_expressions_as_re)\257>}
+ {
+ if($1) {
+ # Positional replace
+ # Find the relevant arg and replace it
+ ($arg[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
+ $arg[$1 > 0 ? $1-1 : $n+$1]->
+ replace($2,$quote_arg,$self)
+ : "");
+ } else {
+ # Normal replace
+ $normal_replace ||= 1;
+ ($arg ? $arg->replace($2,$quote_arg,$self) : "");
+ }
+ }goxe;
+ if($quote) {
+ CORE::push(@{$replace{::shell_quote_scalar($word)}},
+ ::shell_quote_scalar($val));
+ } else {
+ CORE::push(@{$replace{$word}}, $val);
}
+ # No normal replacements => only run once
+ $normal_replace or last;
}
- } elsif($used eq "{#}") {
- # {#}
- $replace{$Global::replace{$used}} = $self->seq();
- } else {
- ::die_bug('simple_replace_placeholders_20110530');
}
- }
- # Substitute the replace strings with the replacement values
- my $regexp = join('|', map { $_=~s/(\W)/\\$1/g; $_} keys %replace);
- if($regexp) {
- if($quoteall) {
- # This is for --return: The whole expression must be
- # quoted - not just the replacements
- %replace = map { $_ => ::shell_unquote($replace{$_}) } keys %replace;
- $target =~ s/($regexp)/$replace{$1}/g;
- $target = ::shell_quote_scalar($target);
- } else {
- $target =~ s/($regexp)/$replace{$1}/g;
+
+ if($quote) {
+ @target = ::shell_quote(@target);
}
+ # ::debug("replace", "%replace=",::my_dump(%replace),"\n");
+ if(%replace) {
+ # Substitute the replace strings with the replacement values
+ # Must be sorted by length if a short word is a substring of a long word
+ my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s }
+ sort { length $b <=> length $a } keys %replace);
+ for(@target) {
+ s/($regexp)/join(" ",@{$replace{$1}})/ge;
+ }
+ }
+ ::debug("replace", "Return @target\n");
+ return wantarray ? @target : "@target";
}
- return $target;
}
@@ -4327,17 +8048,81 @@ package CommandLineQueue;
sub new {
my $class = shift;
- my $command = shift;
+ my $commandref = shift;
my $read_from = shift;
my $context_replace = shift;
my $max_number_of_args = shift;
my $return_files = shift;
my @unget = ();
+ my ($count,$posrpl,$perlexpr);
+ my ($replacecount_ref, $len_ref);
+ my @command = @$commandref;
+ my $dummy = '';
+ # If the first command start with '-' it is probably an option
+ if($command[0] =~ /^\s*(-\S+)/) {
+ # Is this really a command in $PATH starting with '-'?
+ my $cmd = $1;
+ if(not ::which($cmd)) {
+ ::error("Command ($cmd) starts with '-'. Is this a wrong option?\n");
+ ::wait_and_exit(255);
+ }
+ }
+ # Replace replacement strings with {= perl expr =}
+ @command = merge_rpl_parts(@command);
+
+ # Protect matching inside {= perl expr =}
+ # by replacing {= and =} with \257< and \257>
+ # in @command, --return and --tagstring (if used)
+ for(@command,@$return_files,
+ (defined $opt::tagstring ? $opt::tagstring : $dummy)) {
+ # Disallow \257 to avoid nested {= {= =} =}
+ if(/\257/) {
+ ::error("Command cannot contain the character \257. Use a function for that.\n");
+ ::wait_and_exit(255);
+ }
+ # Needs to match rightmost left parens (Perl defaults to leftmost)
+ # to deal with: {={==}
+ while(s{([^\257]*) \Q$Global::parensleft\E ([^\257]*?) \Q$Global::parensright\E }
+ {$1\257<$2\257>}gx) {}
+ for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
+ # Replace long --rpl's before short ones, as a short may be a
+ # substring of a long:
+ # --rpl '% s/a/b/' --rpl '%% s/b/a/'
+ # Replace the short hand string (--rpl)
+ # with the {= perl expr =}
+ # Avoid replacing inside existing {= perl expr =}
+ while(s{((^|\257>)[^\257]*?) # Don't replace after \257 unless \257>
+ \Q$rpl\E}
+ {$1\257<$Global::rpl{$rpl}\257>}xg) {
+ }
+ # Do the same for the positional replacement strings
+ # A bit harder as we have to put in the position number
+ $posrpl = $rpl;
+ if($posrpl =~ s/^\{//) {
+ # Only do this if the shorthand start with {
+ s{\{(-?\d+)\Q$posrpl\E}
+ {\257<$1 $Global::rpl{$rpl}\257>}g;
+ }
+ }
+ }
+ # Add {} if no replacement strings in @command
+ ($replacecount_ref, $len_ref, @command) =
+ replacement_counts_and_lengths($return_files,@command);
+ if("@command" =~ /^[^ \t\n=]*\257) {
+ # Replacement string is (part of) the command (and not just
+ # argument or variable definition V1={})
+ # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
+ # Do no quote (Otherwise it will fail if the input contains spaces)
+ $Global::noquote = 1;
+ }
+
return bless {
'unget' => \@unget,
- 'command' => $command,
- 'arg_queue' => RecordQueue->new($read_from,$::opt_colsep),
+ 'command' => \@command,
+ 'replacecount' => $replacecount_ref,
+ 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
'context_replace' => $context_replace,
+ 'len' => $len_ref,
'max_number_of_args' => $max_number_of_args,
'size' => undef,
'return_files' => $return_files,
@@ -4345,27 +8130,151 @@ sub new {
}, ref($class) || $class;
}
+sub merge_rpl_parts {
+ # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
+ # Input:
+ # @in = the @command as given by the user
+ # Uses:
+ # $Global::parensleft
+ # $Global::parensright
+ # Returns:
+ # @command with parts merged to keep {= and =} as one
+ my @in = @_;
+ my @out;
+ my $l = quotemeta($Global::parensleft);
+ my $r = quotemeta($Global::parensright);
+
+ while(@in) {
+ my $s = shift @in;
+ $_ = $s;
+ # Remove matching (right most) parens
+ while(s/(.*)$l.*?$r/$1/o) {}
+ if(/$l/o) {
+ # Missing right parens
+ while(@in) {
+ $s .= " ".shift @in;
+ $_ = $s;
+ while(s/(.*)$l.*?$r/$1/o) {}
+ if(not /$l/o) {
+ last;
+ }
+ }
+ }
+ push @out, $s;
+ }
+ return @out;
+}
+
+sub replacement_counts_and_lengths {
+ # Count the number of different replacement strings.
+ # Find the lengths of context for context groups and non-context
+ # groups.
+ # If no {} found in @command: add it to @command
+ #
+ # Input:
+ # \@return_files = array of filenames to return
+ # @command = command template
+ # Output:
+ # \%replacecount, \%len, @command
+ my $return_files = shift;
+ my @command = @_;
+ my (%replacecount,%len);
+ my $sum = 0;
+ while($sum == 0) {
+ # Count how many times each replacement string is used
+ my @cmd = @command;
+ my $contextlen = 0;
+ my $noncontextlen = 0;
+ my $contextgroups = 0;
+ for my $c (@cmd) {
+ while($c =~ s/ \257<([^\257]*?)\257> /\000/x) {
+ # %replacecount = { "perlexpr" => number of times seen }
+ # e.g { "s/a/b/" => 2 }
+ $replacecount{$1}++;
+ $sum++;
+ }
+ # Measure the length of the context around the {= perl expr =}
+ # Use that {=...=} has been replaced with \000 above
+ # So there is no need to deal with \257<
+ while($c =~ s/ (\S*\000\S*) //x) {
+ my $w = $1;
+ $w =~ tr/\000//d; # Remove all \000's
+ $contextlen += length($w);
+ $contextgroups++;
+ }
+ # All {= perl expr =} have been removed: The rest is non-context
+ $noncontextlen += length $c;
+ }
+ for(@$return_files) {
+ my $t = $_;
+ while($t =~ s/ \257<([^\257]*)\257> //x) {
+ # %replacecount = { "perlexpr" => number of times seen }
+ # e.g { "$_++" => 2 }
+ # But for tagstring we just need to mark it as seen
+ $replacecount{$1} ||= 1;
+ }
+ }
+ if($opt::tagstring) {
+ my $t = $opt::tagstring;
+ while($t =~ s/ \257<([^\257]*)\257> //x) {
+ # %replacecount = { "perlexpr" => number of times seen }
+ # e.g { "$_++" => 2 }
+ # But for tagstring we just need to mark it as seen
+ $replacecount{$1} ||= 1;
+ }
+ }
+ if($opt::bar) {
+ # If the command does not contain {} force it to be computed
+ # as it is being used by --bar
+ $replacecount{""} ||= 1;
+ }
+
+ $len{'context'} = 0+$contextlen;
+ $len{'noncontext'} = $noncontextlen;
+ $len{'contextgroups'} = $contextgroups;
+ $len{'noncontextgroups'} = @cmd-$contextgroups;
+ ::debug("length", "@command Context: ", $len{'context'},
+ " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
+ " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
+ if($sum == 0) {
+ if(not @command) {
+ # Default command = {}
+ @command = ("\257<\257>");
+ } elsif(($opt::pipe or $opt::pipepart)
+ and not $opt::fifo and not $opt::cat) {
+ # With --pipe / --pipe-part you can have no replacement
+ last;
+ } else {
+ # Append {} to the command if there are no {...}'s and no {=...=}
+ push @command, ("\257<\257>");
+ }
+ }
+ }
+ return(\%replacecount,\%len,@command);
+}
+
sub get {
my $self = shift;
if(@{$self->{'unget'}}) {
my $cmd_line = shift @{$self->{'unget'}};
return ($cmd_line);
} else {
- my $cmd_line;
- $cmd_line = CommandLine->new($self->seq(),
- $self->{'command'},
- $self->{'arg_queue'},
- $self->{'context_replace'},
- $self->{'max_number_of_args'},
- $self->{'return_files'},
+ my $cmd_line = CommandLine->new($self->seq(),
+ $self->{'command'},
+ $self->{'arg_queue'},
+ $self->{'context_replace'},
+ $self->{'max_number_of_args'},
+ $self->{'return_files'},
+ $self->{'replacecount'},
+ $self->{'len'},
);
$cmd_line->populate();
- ::debug("cmd_line->number_of_args ".$cmd_line->number_of_args()."\n");
- if($::opt_pipe) {
+ ::debug("init","cmd_line->number_of_args ",
+ $cmd_line->number_of_args(), "\n");
+ if($opt::pipe or $opt::pipepart) {
if($cmd_line->replaced() eq "") {
# Empty command - pipe requires a command
- print STDERR "$Global::progname: --pipe must have a ".
- "command to pipe into (e.g. 'cat')\n";
+ ::error("--pipe must have a command to pipe into (e.g. 'cat').\n");
::wait_and_exit(255);
}
} else {
@@ -4390,7 +8299,7 @@ sub unget {
sub empty {
my $self = shift;
my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty();
- ::debug("CommandLineQueue->empty $empty\n");
+ ::debug("run", "CommandLineQueue->empty $empty");
return $empty;
}
@@ -4410,22 +8319,6 @@ sub quote_args {
return $self->{'command'};
}
-sub size {
- my $self = shift;
- if(not $self->{'size'}) {
- my @all_lines = ();
- while(not $self->{'arg_queue'}->empty()) {
- push @all_lines, CommandLine->new($self->{'command'},
- $self->{'arg_queue'},
- $self->{'context_replace'},
- $self->{'max_number_of_args'});
- }
- $self->{'size'} = @all_lines;
- $self->unget(@all_lines);
- }
- return $self->{'size'};
-}
-
package Limits::Command;
@@ -4435,21 +8328,30 @@ sub max_length {
# Returns:
# number of chars on the longest command line allowed
if(not $Limits::Command::line_max_len) {
- if($::opt_s) {
- if(is_acceptable_command_line_length($::opt_s)) {
- $Limits::Command::line_max_len = $::opt_s;
- } else {
- # -s is too long: Find the correct
- $Limits::Command::line_max_len = binary_find_max_length(0,$::opt_s);
- }
- if($::opt_s <= $Limits::Command::line_max_len) {
- $Limits::Command::line_max_len = $::opt_s;
- } else {
- print STDERR "$Global::progname: value for -s option ",
- "should be < $Limits::Command::line_max_len\n";
- }
+ # Disk cache of max command line length
+ my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname();
+ my $cached_limit;
+ if(-e $len_cache) {
+ open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
+ $cached_limit = <$fh>;
+ close $fh;
} else {
- $Limits::Command::line_max_len = real_max_length();
+ $cached_limit = real_max_length();
+ # If $HOME is write protected: Do not fail
+ mkdir($ENV{'HOME'} . "/.parallel");
+ mkdir($ENV{'HOME'} . "/.parallel/tmp");
+ open(my $fh, ">", $len_cache);
+ print $fh $cached_limit;
+ close $fh;
+ }
+ $Limits::Command::line_max_len = tmux_length($cached_limit);
+ if($opt::max_chars) {
+ if($opt::max_chars <= $cached_limit) {
+ $Limits::Command::line_max_len = $opt::max_chars;
+ } else {
+ ::warning("Value for -s option ",
+ "should be < $cached_limit.\n");
+ }
}
}
return $Limits::Command::line_max_len;
@@ -4477,7 +8379,7 @@ sub binary_find_max_length {
my ($lower, $upper) = (@_);
if($lower == $upper or $lower == $upper-1) { return $lower; }
my $middle = int (($upper-$lower)/2 + $lower);
- ::debug("Maxlen: $lower,$upper,$middle\n");
+ ::debug("init", "Maxlen: $lower,$upper,$middle : ");
if (is_acceptable_command_line_length($middle)) {
return binary_find_max_length($middle,$upper);
} else {
@@ -4492,16 +8394,49 @@ sub is_acceptable_command_line_length {
# 1 otherwise
my $len = shift;
- $CommandMaxLength::is_acceptable_command_line_length++;
- ::debug("$CommandMaxLength::is_acceptable_command_line_length $len\n");
local *STDERR;
- open (STDERR,">/dev/null");
+ open (STDERR, ">", "/dev/null");
system "true "."x"x$len;
close STDERR;
- ::debug("$len $?\n");
+ ::debug("init", "$len=$? ");
return not $?;
}
+sub tmux_length {
+ # If $opt::tmux set, find the limit for tmux
+ # tmux 1.8 has a 2kB limit
+ # tmux 1.9 has a 16kB limit
+ # Input:
+ # $len = maximal command line length
+ # Returns:
+ # $tmux_len = maximal length runable in tmux
+ my $len = shift;
+ if($opt::tmux) {
+ $ENV{'TMUX'} ||= "tmux";
+ if(not ::which($ENV{'TMUX'})) {
+ ::error($ENV{'TMUX'}." not found in \$PATH.\n");
+ ::wait_and_exit(255);
+ }
+ my @out;
+ for my $l (1, 2020, 16320, 100000, $len) {
+ my $tmpfile = ::tmpname("tms");
+ my $tmuxcmd = "sh -c '".$ENV{'TMUX'}." -S $tmpfile new-session -d -n echo $l".
+ ("x"x$l). " 2>/dev/null' && echo $l; rm -f $tmpfile";
+ push @out, qx{ $tmuxcmd };
+ unlink $tmpfile;
+ }
+ ::debug("tmux","tmux-length ",@out);
+ chomp @out;
+ # The arguments is given 3 times on the command line
+ # and the wrapping is around 30 chars
+ # (29 for tmux1.9, 33 for tmux1.8)
+ my $tmux_len = (::max(@out));
+ $len = ::min($len,int($tmux_len/4-33));
+ ::debug("tmux","tmux-length ",$len);
+ }
+ return $len;
+}
+
package RecordQueue;
@@ -4530,13 +8465,13 @@ sub get {
# reference to array of Arg-objects
my $self = shift;
if(@{$self->{'unget'}}) {
+ $self->{'arg_number'}++;
return shift @{$self->{'unget'}};
}
- $self->{'arg_number'}++;
my $ret = $self->{'arg_sub_queue'}->get();
if(defined $Global::max_number_of_args
and $Global::max_number_of_args == 0) {
- ::debug("Read 1 but return 0 args\n");
+ ::debug("run", "Read 1 but return 0 args\n");
return [Arg->new("")];
} else {
return $ret;
@@ -4545,8 +8480,8 @@ sub get {
sub unget {
my $self = shift;
- ::debug("RecordQueue-unget '@_'\n");
- $self->{'arg_number'}--;
+ ::debug("run", "RecordQueue-unget '@_'\n");
+ $self->{'arg_number'} -= @_;
unshift @{$self->{'unget'}}, @_;
}
@@ -4554,7 +8489,7 @@ sub empty {
my $self = shift;
my $empty = not @{$self->{'unget'}};
$empty &&= $self->{'arg_sub_queue'}->empty();
- ::debug("RecordQueue->empty $empty\n");
+ ::debug("run", "RecordQueue->empty $empty");
return $empty;
}
@@ -4592,11 +8527,11 @@ sub get {
if(defined $in_record) {
my @out_record = ();
for my $arg (@$in_record) {
- ::debug("RecordColQueue::arg $arg\n");
+ ::debug("run", "RecordColQueue::arg $arg\n");
my $line = $arg->orig();
- ::debug("line='$line'\n");
+ ::debug("run", "line='$line'\n");
if($line ne "") {
- for my $s (split /$::opt_colsep/o, $line, -1) {
+ for my $s (split /$opt::colsep/o, $line, -1) {
push @out_record, Arg->new($s);
}
} else {
@@ -4611,14 +8546,14 @@ sub get {
sub unget {
my $self = shift;
- ::debug("RecordColQueue-unget '@_'\n");
+ ::debug("run", "RecordColQueue-unget '@_'\n");
unshift @{$self->{'unget'}}, @_;
}
sub empty {
my $self = shift;
my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty());
- ::debug("RecordColQueue->empty $empty");
+ ::debug("run", "RecordColQueue->empty $empty");
return $empty;
}
@@ -4632,9 +8567,9 @@ sub new {
my $fhs = shift;
for my $fh (@$fhs) {
if(-t $fh) {
- print STDERR "$Global::progname: Input is read from the terminal. ".
- "Only experts do this on purpose. ".
- "Press CTRL-D to exit.\n";
+ ::warning("Input is read from the terminal.\n");
+ ::warning("Only experts do this on purpose. ".
+ "Press CTRL-D to exit.\n");
}
}
return bless {
@@ -4646,7 +8581,7 @@ sub new {
sub get {
my $self = shift;
- if($::opt_xapply) {
+ if($opt::xapply) {
return $self->xapply_get();
} else {
return $self->nest_get();
@@ -4655,7 +8590,7 @@ sub get {
sub unget {
my $self = shift;
- ::debug("MultifileQueue-unget '@_'\n");
+ ::debug("run", "MultifileQueue-unget '@_'\n");
unshift @{$self->{'unget'}}, @_;
}
@@ -4666,7 +8601,7 @@ sub empty {
for my $fh (@{$self->{'fhs'}}) {
$empty &&= eof($fh);
}
- ::debug("MultifileQueue->empty $empty\n");
+ ::debug("run", "MultifileQueue->empty $empty ");
return $empty;
}
@@ -4681,10 +8616,16 @@ sub xapply_get {
for my $fh (@{$self->{'fhs'}}) {
my $arg = read_arg_from_fh($fh);
if(defined $arg) {
+ # Record $arg for recycling at end of file
+ push @{$self->{'arg_matrix'}{$fh}}, $arg;
push @record, $arg;
$empty = 0;
} else {
- push @record, Arg->new("");
+ ::debug("run", "EOA ");
+ # End of file: Recycle arguments
+ push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
+ # return last @{$args->{'args'}{$fh}};
+ push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
}
}
if($empty) {
@@ -4780,22 +8721,22 @@ sub read_arg_from_fh {
my $prepend = undef;
my $arg;
do {{
- if(eof($fh)) {
+ # This makes 10% faster
+ if(not ($arg = <$fh>)) {
if(defined $prepend) {
return Arg->new($prepend);
} else {
return undef;
}
}
- $arg = <$fh>;
- ::debug("read $arg\n");
+# ::debug("run", "read $arg\n");
# Remove delimiter
$arg =~ s:$/$::;
if($Global::end_of_file_string and
$arg eq $Global::end_of_file_string) {
# Ignore the rest of input file
- while (<$fh>) {}
- ::debug("EOF-string $arg\n");
+ close $fh;
+ ::debug("run", "EOF-string ($arg) met\n");
if(defined $prepend) {
return Arg->new($prepend);
} else {
@@ -4818,7 +8759,7 @@ sub read_arg_from_fh {
redo;
}
}
- }} while (1 == 0); # Dummy loop for redo
+ }} while (1 == 0); # Dummy loop {{}} for redo
if(defined $arg) {
return Arg->new($arg);
} else {
@@ -4829,7 +8770,7 @@ sub read_arg_from_fh {
sub expand_combinations {
# Input:
# ([xmin,xmax], [ymin,ymax], ...)
- # Returns ([x,y,...],[x,y,...])
+ # Returns: ([x,y,...],[x,y,...])
# where xmin <= x <= xmax and ymin <= y <= ymax
my $minmax_ref = shift;
my $xmin = $$minmax_ref[0];
@@ -4855,44 +8796,66 @@ package Arg;
sub new {
my $class = shift;
my $orig = shift;
- if($::oodebug and not defined $orig) {
- Carp::cluck($orig);
+ my @hostgroups;
+ if($opt::hostgroups) {
+ if($orig =~ s:@(.+)::) {
+ # We found hostgroups on the arg
+ @hostgroups = split(/\+/, $1);
+ if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
+ ::warning("No such hostgroup (@hostgroups)\n");
+ @hostgroups = (keys %Global::hostgroups);
+ }
+ } else {
+ @hostgroups = (keys %Global::hostgroups);
+ }
}
return bless {
'orig' => $orig,
+ 'hostgroups' => \@hostgroups,
}, ref($class) || $class;
}
sub replace {
+ # Calculates the corresponding value for a given perl expression
+ # Returns:
+ # The calculated string (quoted if asked for)
my $self = shift;
- my $replacement_string = shift; # {} {/} {//} {.} {/.}
- if(not defined $self->{$replacement_string}) {
- my $s;
+ my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
+ my $quote = (shift) ? 1 : 0; # should the string be quoted?
+ # This is actually a CommandLine-object,
+ # but it looks nice to be able to say {= $job->slot() =}
+ my $job = shift;
+ $perlexpr =~ s/^-?\d+ //; # Positional replace treated as normal replace
+ if(not defined $self->{"rpl",0,$perlexpr}) {
+ local $_;
if($Global::trim eq "n") {
- $s = $self->{'orig'};
+ $_ = $self->{'orig'};
} else {
- $s = trim_of($self->{'orig'});
+ $_ = trim_of($self->{'orig'});
}
- if($replacement_string eq "{}") {
- # skip
- } elsif($replacement_string eq "{.}") {
- $s =~ s:\.[^/\.]*$::; # Remove .ext from argument
- } elsif($replacement_string eq "{/}") {
- $s =~ s:^.*/([^/]+)/?$:$1:; # Remove dir from argument. If ending in /, remove final /
- } elsif($replacement_string eq "{//}") {
- # Only load File::Basename if actually needed
- $Global::use{"File::Basename"} ||= eval "use File::Basename;";
- $s = dirname($s); # Keep dir from argument.
- } elsif($replacement_string eq "{/.}") {
- $s =~ s:^.*/([^/]+)/?$:$1:; # Remove dir from argument. If ending in /, remove final /
- $s =~ s:\.[^/\.]*$::; # Remove .ext from argument
+ ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
+ if(not $Global::perleval{$perlexpr}) {
+ # Make an anonymous function of the $perlexpr
+ # And more importantly: Compile it only once
+ if($Global::perleval{$perlexpr} =
+ eval('sub { no strict; no warnings; my $job = shift; '.
+ $perlexpr.' }')) {
+ # All is good
+ } else {
+ # The eval failed. Maybe $perlexpr is invalid perl?
+ ::error("Cannot use $perlexpr: $@\n");
+ ::wait_and_exit(255);
+ }
}
- if($Global::JobQueue->quote_args()) {
- $s = ::shell_quote_scalar($s);
- }
- $self->{$replacement_string} = $s;
+ # Execute the function
+ $Global::perleval{$perlexpr}->($job);
+ $self->{"rpl",0,$perlexpr} = $_;
}
- return $self->{$replacement_string};
+ if(not defined $self->{"rpl",$quote,$perlexpr}) {
+ $self->{"rpl",1,$perlexpr} =
+ ::shell_quote_scalar($self->{"rpl",0,$perlexpr});
+ }
+ return $self->{"rpl",$quote,$perlexpr};
}
sub orig {
@@ -4913,14 +8876,13 @@ sub trim_of {
if($Global::trim eq "n") {
# skip
} elsif($Global::trim eq "l") {
- for $arg (@strings) { $arg =~ s/^\s+//; }
+ for my $arg (@strings) { $arg =~ s/^\s+//; }
} elsif($Global::trim eq "r") {
- for $arg (@strings) { $arg =~ s/\s+$//; }
+ for my $arg (@strings) { $arg =~ s/\s+$//; }
} elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
- for $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
+ for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
} else {
- print STDERR "$Global::progname: --trim must be one of: r l ".
- "rl lr\n";
+ ::error("--trim must be one of: r l rl lr.\n");
::wait_and_exit(255);
}
return wantarray ? @strings : "@strings";
@@ -4932,26 +8894,82 @@ package TimeoutQueue;
sub new {
my $class = shift;
my $delta_time = shift;
-
+ my ($pct);
+ if($delta_time =~ /(\d+(\.\d+)?)%/) {
+ # Timeout in percent
+ $pct = $1/100;
+ $delta_time = 1_000_000;
+ }
return bless {
'queue' => [],
'delta_time' => $delta_time,
+ 'pct' => $pct,
+ 'remedian_idx' => 0,
+ 'remedian_arr' => [],
+ 'remedian' => undef,
}, ref($class) || $class;
}
+sub delta_time {
+ my $self = shift;
+ return $self->{'delta_time'};
+}
+
+sub set_delta_time {
+ my $self = shift;
+ $self->{'delta_time'} = shift;
+}
+
+sub remedian {
+ my $self = shift;
+ return $self->{'remedian'};
+}
+
+sub set_remedian {
+ # Set median of the last 999^3 (=997002999) values using Remedian
+ #
+ # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
+ # robust averaging method for large data sets." Journal of the
+ # American Statistical Association 85.409 (1990): 97-104.
+ my $self = shift;
+ my $val = shift;
+ my $i = $self->{'remedian_idx'}++;
+ my $rref = $self->{'remedian_arr'};
+ $rref->[0][$i%999] = $val;
+ $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
+ $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
+ $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
+}
+
+sub update_median_runtime {
+ # Update delta_time based on runtime of finished job if timeout is
+ # a percentage
+ my $self = shift;
+ my $runtime = shift;
+ if($self->{'pct'}) {
+ $self->set_remedian($runtime);
+ $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
+ ::debug("run", "Timeout: $self->{'delta_time'}s ");
+ }
+}
+
sub process_timeouts {
# Check if there was a timeout
my $self = shift;
- # @Global::timeout is sorted by timeout
+ # $self->{'queue'} is sorted by start time
while (@{$self->{'queue'}}) {
my $job = $self->{'queue'}[0];
- if($job->timedout()) {
+ if($job->endtime()) {
+ # Job already finished. No need to timeout the job
+ # This could be because of --keep-order
+ shift @{$self->{'queue'}};
+ } elsif($job->timedout($self->{'delta_time'})) {
# Need to shift off queue before kill
- # because kill calls usleep -> process_timeouts
+ # because kill calls usleep that calls process_timeouts
shift @{$self->{'queue'}};
$job->kill();
} else {
- # Because they are sorted by timeout
+ # Because they are sorted by start time the rest are later
last;
}
}
@@ -4960,24 +8978,7 @@ sub process_timeouts {
sub insert {
my $self = shift;
my $in = shift;
- my $lower = 0;
- my $upper = $#{$self->{'queue'}};
- my $looking = int(($lower + $upper)/2);
- my $in_time = $in->timeout();
-
- # Find the position between $lower and $upper
- while($lower < $upper) {
- if($self->{'queue'}[$looking]->timeout() < $in_time) {
- # Upper half
- $lower = $looking+1;
- } else {
- # Lower half
- $upper = $looking;
- }
- $looking = int(($lower + $upper)/2);
- }
- # splice at position $looking
- splice @{$self->{'queue'}}, $looking, 0, $in;
+ push @{$self->{'queue'}}, $in;
}
@@ -4997,12 +8998,12 @@ sub new {
my $class = shift;
my $id = shift;
my $count = shift;
- $id=~s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
- $id="id-".$id; # To distinguish it from a process id
+ $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
+ $id = "id-".$id; # To distinguish it from a process id
my $parallel_dir = $ENV{'HOME'}."/.parallel";
- -d $parallel_dir or mkdir $parallel_dir;
+ -d $parallel_dir or mkdir_or_die($parallel_dir);
my $parallel_locks = $parallel_dir."/semaphores";
- -d $parallel_locks or mkdir $parallel_locks;
+ -d $parallel_locks or mkdir_or_die($parallel_locks);
my $lockdir = "$parallel_locks/$id";
my $lockfile = $lockdir.".lock";
if($count < 1) { ::die_bug("semaphore-count: $count"); }
@@ -5018,46 +9019,61 @@ sub new {
}, ref($class) || $class;
}
+sub remove_dead_locks {
+ my $self = shift;
+ my $lockdir = $self->{'lockdir'};
+
+ for my $d (glob "$lockdir/*") {
+ $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
+ my ($pid, $host) = ($1, $2);
+ if($host eq ::hostname()) {
+ if(not kill 0, $pid) {
+ ::debug("sem", "Dead: $d\n");
+ unlink $d;
+ } else {
+ ::debug("sem", "Alive: $d\n");
+ }
+ }
+ }
+}
+
sub acquire {
my $self = shift;
my $sleep = 1; # 1 ms
my $start_time = time;
while(1) {
+ # Can we get a lock?
$self->atomic_link_if_count_less_than() and last;
- ::debug("Remove dead locks");
- my $lockdir = $self->{'lockdir'};
- for my $d (<$lockdir/*>) {
- ::debug("Lock $d $lockdir\n");
- $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
- my ($pid, $host) = ($1,$2);
- if($host eq ::hostname()) {
- if(not kill 0, $1) {
- ::debug("Dead: $d");
- unlink $d;
- } else {
- ::debug("Alive: $d");
- }
- }
- }
- # try again
- $self->atomic_link_if_count_less_than() and last;
+ $self->remove_dead_locks();
# Retry slower and slower up to 1 second
$sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
# Random to avoid every sleeping job waking up at the same time
::usleep(rand()*$sleep);
- if(defined($::opt_timeout) and
- $start_time + $::opt_timeout > time) {
- # Acquire the lock anyway
- if(not -e $self->{'idfile'}) {
- open (A, ">", $self->{'idfile'}) or
- ::die_bug("write_idfile: $self->{'idfile'}");
- close A;
+ if($opt::semaphoretimeout) {
+ if($opt::semaphoretimeout > 0
+ and
+ time - $start_time > $opt::semaphoretimeout) {
+ # Timeout: Take the semaphore anyway
+ ::warning("Semaphore timed out. Stealing the semaphore.\n");
+ if(not -e $self->{'idfile'}) {
+ open (my $fh, ">", $self->{'idfile'}) or
+ ::die_bug("timeout_write_idfile: $self->{'idfile'}");
+ close $fh;
+ }
+ link $self->{'idfile'}, $self->{'pidfile'};
+ last;
+ }
+ if($opt::semaphoretimeout < 0
+ and
+ time - $start_time > -$opt::semaphoretimeout) {
+ # Timeout: Exit
+ ::warning("Semaphore timed out. Exiting.\n");
+ exit(1);
+ last;
}
- link $self->{'idfile'}, $self->{'pidfile'};
- last;
}
}
- ::debug("acquired $self->{'pid'}\n");
+ ::debug("sem", "acquired $self->{'pid'}\n");
}
sub release {
@@ -5072,7 +9088,20 @@ sub release {
}
$self->unlock();
}
- ::debug("released $self->{'pid'}\n");
+ ::debug("run", "released $self->{'pid'}\n");
+}
+
+sub pid_change {
+ # This should do what release()+acquire() would do without having
+ # to re-acquire the semaphore
+ my $self = shift;
+
+ my $old_pidfile = $self->{'pidfile'};
+ $self->{'pid'} = $$;
+ $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
+ my $retval = link $self->{'idfile'}, $self->{'pidfile'};
+ ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
+ unlink $old_pidfile;
}
sub atomic_link_if_count_less_than {
@@ -5080,25 +9109,26 @@ sub atomic_link_if_count_less_than {
my $self = shift;
my $retval = 0;
$self->lock();
- ::debug($self->nlinks()."<".$self->{'count'});
- if($self->nlinks() < $self->{'count'}) {
- -d $self->{'lockdir'} || mkdir $self->{'lockdir'};
+ my $nlinks = $self->nlinks();
+ ::debug("sem","$nlinks<$self->{'count'} ");
+ if($nlinks < $self->{'count'}) {
+ -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'});
if(not -e $self->{'idfile'}) {
- open (A, ">", $self->{'idfile'}) or
+ open (my $fh, ">", $self->{'idfile'}) or
::die_bug("write_idfile: $self->{'idfile'}");
- close A;
+ close $fh;
}
$retval = link $self->{'idfile'}, $self->{'pidfile'};
+ ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
}
$self->unlock();
- ::debug("atomic $retval");
+ ::debug("sem", "atomic $retval");
return $retval;
}
sub nlinks {
my $self = shift;
if(-e $self->{'idfile'}) {
- ::debug("nlinks".((stat(_))[3])."\n");
return (stat(_))[3];
} else {
return 0;
@@ -5108,36 +9138,90 @@ sub nlinks {
sub lock {
my $self = shift;
my $sleep = 100; # 100 ms
- open $self->{'lockfh'}, ">", $self->{'lockfile'}
- or ::die_bug("Can't open semaphore file $self->{'lockfile'}: $!");
- chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
- $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock);";
- while(not flock $self->{'lockfh'}, LOCK_EX()|LOCK_NB()) {
- if ($! =~ m/Function not implemented/) {
- print $Global::original_stderr
- ("parallel: Warning: flock: $!");
- print "parallel: Will wait for a random while\n";
- ::usleep(rand(5000));
- last;
+ my $total_sleep = 0;
+ $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
+ my $locked = 0;
+ while(not $locked) {
+ if(tell($self->{'lockfh'}) == -1) {
+ # File not open
+ open($self->{'lockfh'}, ">", $self->{'lockfile'})
+ or ::debug("run", "Cannot open $self->{'lockfile'}");
}
-
- ::debug("Cannot lock $self->{'lockfile'}");
- # TODO if timeout: last
+ if($self->{'lockfh'}) {
+ # File is open
+ chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
+ if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
+ # The file is locked: No need to retry
+ $locked = 1;
+ last;
+ } else {
+ if ($! =~ m/Function not implemented/) {
+ ::warning("flock: $!");
+ ::warning("Will wait for a random while\n");
+ ::usleep(rand(5000));
+ # File cannot be locked: No need to retry
+ $locked = 2;
+ last;
+ }
+ }
+ }
+ # Locking failed in first round
+ # Sleep and try again
$sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
# Random to avoid every sleeping job waking up at the same time
::usleep(rand()*$sleep);
+ $total_sleep += $sleep;
+ if($opt::semaphoretimeout) {
+ if($opt::semaphoretimeout > 0
+ and
+ $total_sleep/1000 > $opt::semaphoretimeout) {
+ # Timeout: Take the semaphore anyway
+ ::warning("Semaphore timed out. Taking the semaphore.");
+ $locked = 3;
+ last;
+ }
+ if($opt::semaphoretimeout < 0
+ and
+ $total_sleep/1000 > -$opt::semaphoretimeout) {
+ # Timeout: Exit
+ ::warning("Semaphore timed out. Exiting.");
+ $locked = 4;
+ last;
+ }
+ } else {
+ if($total_sleep/1000 > 30) {
+ ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout.");
+ }
+ }
}
- ::debug("locked $self->{'lockfile'}");
+ ::debug("run", "locked $self->{'lockfile'}");
}
sub unlock {
my $self = shift;
unlink $self->{'lockfile'};
close $self->{'lockfh'};
- ::debug("unlocked\n");
+ ::debug("run", "unlocked\n");
+}
+
+sub mkdir_or_die {
+ # If dir is not writable: die
+ my $dir = shift;
+ my @dir_parts = split(m:/:,$dir);
+ my ($ddir,$part);
+ while(defined ($part = shift @dir_parts)) {
+ $part eq "" and next;
+ $ddir .= "/".$part;
+ -d $ddir and next;
+ mkdir $ddir;
+ }
+ if(not -w $dir) {
+ ::error("Cannot write to $dir: $!\n");
+ ::wait_and_exit(255);
+ }
}
# Keep perl -w happy
-$::opt_x = $Semaphore::timeout = $Semaphore::wait = $::opt_shebang =
-0;
-
+$opt::ctrlc = $opt::x = $Semaphore::timeout = $Semaphore::wait =
+$opt::ignored_option = $Job::file_descriptor_warning_printed =
+$Global::envdef = 0;
diff --git a/sem b/sem
new file mode 100755
index 0000000..9a92e45
--- /dev/null
+++ b/sem
@@ -0,0 +1,9227 @@
+#!/usr/bin/env perl
+
+# Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange
+# and Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see
+# or write to the Free Software Foundation, Inc., 51 Franklin St,
+# Fifth Floor, Boston, MA 02110-1301 USA
+
+# open3 used in Job::start
+use IPC::Open3;
+# &WNOHANG used in reaper
+use POSIX qw(:sys_wait_h setsid ceil :errno_h);
+# gensym used in Job::start
+use Symbol qw(gensym);
+# tempfile used in Job::start
+use File::Temp qw(tempfile tempdir);
+# mkpath used in openresultsfile
+use File::Path;
+# GetOptions used in get_options_from_array
+use Getopt::Long;
+# Used to ensure code quality
+use strict;
+use File::Basename;
+
+save_stdin_stdout_stderr();
+save_original_signal_handler();
+parse_options();
+::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n");
+my $number_of_args;
+if($Global::max_number_of_args) {
+ $number_of_args=$Global::max_number_of_args;
+} elsif ($opt::X or $opt::m or $opt::xargs) {
+ $number_of_args = undef;
+} else {
+ $number_of_args = 1;
+}
+
+my @command = @ARGV;
+
+my @input_source_fh;
+if($opt::pipepart) {
+ @input_source_fh = map { open_or_exit($_) } "/dev/null";
+} else {
+ @input_source_fh = map { open_or_exit($_) } @opt::a;
+ if(not @input_source_fh and not $opt::pipe) {
+ @input_source_fh = (*STDIN);
+ }
+}
+
+if($opt::skip_first_line) {
+ # Skip the first line for the first file handle
+ my $fh = $input_source_fh[0];
+ <$fh>;
+}
+if($opt::header and not $opt::pipe) {
+ # split with colsep or \t
+ # $header force $colsep = \t if undef?
+ my $delimiter = $opt::colsep;
+ $delimiter ||= "\t";
+ my $id = 1;
+ for my $fh (@input_source_fh) {
+ my $line = <$fh>;
+ chomp($line);
+ ::debug("init", "Delimiter: '$delimiter'");
+ for my $s (split /$delimiter/o, $line) {
+ ::debug("init", "Colname: '$s'");
+ # Replace {colname} with {2}
+ # TODO accept configurable short hands
+ # TODO how to deal with headers in {=...=}
+ for(@command) {
+ s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g;
+ }
+ $Global::input_source_header{$id} = $s;
+ $id++;
+ }
+ }
+} else {
+ my $id = 1;
+ for my $fh (@input_source_fh) {
+ $Global::input_source_header{$id} = $id;
+ $id++;
+ }
+}
+
+if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) {
+ # Parallel check all hosts are up. Remove hosts that are down
+ filter_hosts();
+}
+
+if($opt::nonall or $opt::onall) {
+ onall(\@input_source_fh,@command);
+ wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
+}
+
+# TODO --transfer foo/./bar --cleanup
+# multiple --transfer and --basefile with different /./
+
+$Global::JobQueue = JobQueue->new(
+ \@command,\@input_source_fh,$Global::ContextReplace,$number_of_args,\@Global::ret_files);
+
+if($opt::eta or $opt::bar or $opt::shuf) {
+ # Count the number of jobs or shuffle all jobs
+ # before starting any
+ $Global::JobQueue->total_jobs();
+}
+if($opt::pipepart) {
+ @Global::cat_partials = map { pipe_part_files($_) } @opt::a;
+ # Unget the command as many times as there are parts
+ $Global::JobQueue->{'commandlinequeue'}->unget(
+ map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials
+ );
+}
+for my $sshlogin (values %Global::host) {
+ $sshlogin->max_jobs_running();
+}
+
+init_run_jobs();
+my $sem;
+if($Global::semaphore) {
+ $sem = acquire_semaphore();
+}
+$SIG{TERM} = \&start_no_new_jobs;
+start_more_jobs();
+if(not $opt::pipepart) {
+ if($opt::pipe) {
+ spreadstdin();
+ }
+}
+::debug("init", "Start draining\n");
+drain_job_queue();
+::debug("init", "Done draining\n");
+reaper();
+::debug("init", "Done reaping\n");
+if($opt::pipe and @opt::a) {
+ for my $job (@Global::tee_jobs) {
+ unlink $job->fh(2,"name");
+ $job->set_fh(2,"name","");
+ $job->print();
+ unlink $job->fh(1,"name");
+ }
+}
+::debug("init", "Cleaning\n");
+cleanup();
+if($Global::semaphore) {
+ $sem->release();
+}
+for(keys %Global::sshmaster) {
+ # If 'ssh -M's are running: kill them
+ kill "TERM", $_;
+}
+::debug("init", "Halt\n");
+if($opt::halt) {
+ wait_and_exit($Global::halt_exitstatus);
+} else {
+ wait_and_exit(min(undef_as_zero($Global::exitstatus),254));
+}
+
+sub __PIPE_MODE__ {}
+
+sub pipe_part_files {
+ # Input:
+ # $file = the file to read
+ # Returns:
+ # @commands that will cat_partial each part
+ my ($file) = @_;
+ my $buf = "";
+ my $header = find_header(\$buf,open_or_exit($file));
+ # find positions
+ my @pos = find_split_positions($file,$opt::blocksize,length $header);
+ # Make @cat_partials
+ my @cat_partials = ();
+ for(my $i=0; $i<$#pos; $i++) {
+ push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]);
+ }
+ # Remote exec should look like:
+ # ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\)
+ # ssh -tt not allowed. Remote will die due to broken pipe anyway.
+ return @cat_partials;
+}
+
+sub find_header {
+ # Input:
+ # $buf_ref = reference to read-in buffer
+ # $fh = filehandle to read from
+ # Uses:
+ # $opt::header
+ # $opt::blocksize
+ # Returns:
+ # $header string
+ my ($buf_ref, $fh) = @_;
+ my $header = "";
+ if($opt::header) {
+ if($opt::header eq ":") { $opt::header = "(.*\n)"; }
+ # Number = number of lines
+ $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e;
+ while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) {
+ if($$buf_ref=~s/^($opt::header)//) {
+ $header = $1;
+ last;
+ }
+ }
+ }
+ return $header;
+}
+
+sub find_split_positions {
+ # Input:
+ # $file = the file to read
+ # $block = (minimal) --block-size of each chunk
+ # $headerlen = length of header to be skipped
+ # Uses:
+ # $opt::recstart
+ # $opt::recend
+ # Returns:
+ # @positions of block start/end
+ my($file, $block, $headerlen) = @_;
+ my $size = -s $file;
+ $block = int $block;
+ # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20
+ # The optimal dd blocksize for freebsd = 2^15..2^17
+ my $dd_block_size = 131072; # 2^17
+ my @pos;
+ my ($recstart,$recend) = recstartrecend();
+ my $recendrecstart = $recend.$recstart;
+ my $fh = ::open_or_exit($file);
+ push(@pos,$headerlen);
+ for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) {
+ my $buf;
+ seek($fh, $pos, 0) || die;
+ while(read($fh,substr($buf,length $buf,0),$dd_block_size)) {
+ if($opt::regexp) {
+ # If match /$recend$recstart/ => Record position
+ if($buf =~ /^(.*$recend)$recstart/os) {
+ # Start looking for next record _after_ this match
+ $pos += length($1);
+ push(@pos,$pos);
+ last;
+ }
+ } else {
+ # If match $recend$recstart => Record position
+ my $i = index64(\$buf,$recendrecstart);
+ if($i != -1) {
+ # Start looking for next record _after_ this match
+ $pos += $i + length($recendrecstart);
+ push(@pos,$pos);
+ last;
+ }
+ }
+ }
+ }
+ push(@pos,$size);
+ close $fh;
+ return @pos;
+}
+
+sub cat_partial {
+ # Input:
+ # $file = the file to read
+ # ($start, $end, [$start2, $end2, ...]) = start byte, end byte
+ # Returns:
+ # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout
+ my($file, @start_end) = @_;
+ my($start, $i);
+ # Convert start_end to start_len
+ my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end;
+ return "<". shell_quote_scalar($file) .
+ q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } .
+ " @start_len";
+}
+
+sub spreadstdin {
+ # read a record
+ # Spawn a job and print the record to it.
+ # Uses:
+ # $opt::blocksize
+ # STDIN
+ # $opt::r
+ # $Global::max_lines
+ # $Global::max_number_of_args
+ # $opt::regexp
+ # $Global::start_no_new_jobs
+ # $opt::roundrobin
+ # %Global::running
+ # Returns: N/A
+
+ my $buf = "";
+ my ($recstart,$recend) = recstartrecend();
+ my $recendrecstart = $recend.$recstart;
+ my $chunk_number = 1;
+ my $one_time_through;
+ my $two_gb = 2**31-1;
+ my $blocksize = $opt::blocksize;
+ my $in = *STDIN;
+ my $header = find_header(\$buf,$in);
+ while(1) {
+ my $anything_written = 0;
+ if(not read($in,substr($buf,length $buf,0),$blocksize)) {
+ # End-of-file
+ $chunk_number != 1 and last;
+ # Force the while-loop once if everything was read by header reading
+ $one_time_through++ and last;
+ }
+ if($opt::r) {
+ # Remove empty lines
+ $buf =~ s/^\s*\n//gm;
+ if(length $buf == 0) {
+ next;
+ }
+ }
+ if($Global::max_lines and not $Global::max_number_of_args) {
+ # Read n-line records
+ my $n_lines = $buf =~ tr/\n/\n/;
+ my $last_newline_pos = rindex64(\$buf,"\n");
+ while($n_lines % $Global::max_lines) {
+ $n_lines--;
+ $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1);
+ }
+ # Chop at $last_newline_pos as that is where n-line record ends
+ $anything_written +=
+ write_record_to_pipe($chunk_number++,\$header,\$buf,
+ $recstart,$recend,$last_newline_pos+1);
+ shorten(\$buf,$last_newline_pos+1);
+ } elsif($opt::regexp) {
+ if($Global::max_number_of_args) {
+ # -N => (start..*?end){n}
+ # -L -N => (start..*?end){n*l}
+ my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1);
+ while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) {
+ # Copy to modifiable variable
+ my $b = $1;
+ $anything_written +=
+ write_record_to_pipe($chunk_number++,\$header,\$b,
+ $recstart,$recend,length $1);
+ }
+ } else {
+ # Find the last recend-recstart in $buf
+ if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) {
+ # Copy to modifiable variable
+ my $b = $1;
+ $anything_written +=
+ write_record_to_pipe($chunk_number++,\$header,\$b,
+ $recstart,$recend,length $1);
+ }
+ }
+ } else {
+ if($Global::max_number_of_args) {
+ # -N => (start..*?end){n}
+ my $i = 0;
+ my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1);
+ while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) {
+ $i += length $recend; # find the actual splitting location
+ $anything_written +=
+ write_record_to_pipe($chunk_number++,\$header,\$buf,
+ $recstart,$recend,$i);
+ shorten(\$buf,$i);
+ }
+ } else {
+ # Find the last recend+recstart in $buf
+ my $i = rindex64(\$buf,$recendrecstart);
+ if($i != -1) {
+ $i += length $recend; # find the actual splitting location
+ $anything_written +=
+ write_record_to_pipe($chunk_number++,\$header,\$buf,
+ $recstart,$recend,$i);
+ shorten(\$buf,$i);
+ }
+ }
+ }
+ if(not $anything_written and not eof($in)) {
+ # Nothing was written - maybe the block size < record size?
+ # Increase blocksize exponentially up to 2GB-1 (2GB causes problems)
+ if($blocksize < $two_gb) {
+ my $old_blocksize = $blocksize;
+ $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb);
+ ::warning("A record was longer than $old_blocksize. " .
+ "Increasing to --blocksize $blocksize\n");
+ }
+ }
+ }
+ ::debug("init", "Done reading input\n");
+
+ # If there is anything left in the buffer write it
+ write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf);
+
+ $Global::start_no_new_jobs ||= 1;
+ if($opt::roundrobin) {
+ for my $job (values %Global::running) {
+ close $job->fh(0,"w");
+ }
+ my %incomplete_jobs = %Global::running;
+ my $sleep = 1;
+ while(keys %incomplete_jobs) {
+ my $something_written = 0;
+ for my $pid (keys %incomplete_jobs) {
+ my $job = $incomplete_jobs{$pid};
+ if($job->stdin_buffer_length()) {
+ $something_written += $job->non_block_write();
+ } else {
+ delete $incomplete_jobs{$pid}
+ }
+ }
+ if($something_written) {
+ $sleep = $sleep/2+0.001;
+ }
+ $sleep = ::reap_usleep($sleep);
+ }
+ }
+}
+
+sub recstartrecend {
+ # Uses:
+ # $opt::recstart
+ # $opt::recend
+ # Returns:
+ # $recstart,$recend with default values and regexp conversion
+ my($recstart,$recend);
+ if(defined($opt::recstart) and defined($opt::recend)) {
+ # If both --recstart and --recend is given then both must match
+ $recstart = $opt::recstart;
+ $recend = $opt::recend;
+ } elsif(defined($opt::recstart)) {
+ # If --recstart is given it must match start of record
+ $recstart = $opt::recstart;
+ $recend = "";
+ } elsif(defined($opt::recend)) {
+ # If --recend is given then it must match end of record
+ $recstart = "";
+ $recend = $opt::recend;
+ }
+
+ if($opt::regexp) {
+ # If $recstart/$recend contains '|' this should only apply to the regexp
+ $recstart = "(?:".$recstart.")";
+ $recend = "(?:".$recend.")";
+ } else {
+ # $recstart/$recend = printf strings (\n)
+ $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
+ $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee;
+ }
+ return ($recstart,$recend);
+}
+
+sub nindex {
+ # See if string is in buffer N times
+ # Returns:
+ # the position where the Nth copy is found
+ my ($buf_ref, $str, $n) = @_;
+ my $i = 0;
+ my $two_gb = 2**31-1;
+ for(1..$n) {
+ $i = index64($buf_ref,$str,$i+1);
+ if($i == -1) { last }
+ }
+ return $i;
+}
+
+{
+ my @robin_queue;
+
+ sub round_robin_write {
+ # Input:
+ # $header_ref = ref to $header string
+ # $block_ref = ref to $block to be written
+ # $recstart = record start string
+ # $recend = record end string
+ # $endpos = end position of $block
+ # Uses:
+ # %Global::running
+ # Returns:
+ # $something_written = amount of bytes written
+ my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_;
+ my $something_written = 0;
+ my $block_passed = 0;
+ my $sleep = 1;
+ while(not $block_passed) {
+ # Continue flushing existing buffers
+ # until one is empty and a new block is passed
+ # Make a queue to spread the blocks evenly
+ if(not @robin_queue) {
+ push @robin_queue, (sort { $a->seq() <=> $b->seq() }
+ values %Global::running);
+ }
+ while(my $job = shift @robin_queue) {
+ if($job->stdin_buffer_length() > 0) {
+ $something_written += $job->non_block_write();
+ } else {
+ $job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend);
+ $block_passed = 1;
+ $job->set_virgin(0);
+ $something_written += $job->non_block_write();
+ last;
+ }
+ }
+ $sleep = ::reap_usleep($sleep);
+ }
+ return $something_written;
+ }
+}
+
+sub index64 {
+ # Do index on strings > 2GB.
+ # index in Perl < v5.22 does not work for > 2GB
+ # Input:
+ # as index except STR which must be passed as a reference
+ # Output:
+ # as index
+ my $ref = shift;
+ my $match = shift;
+ my $pos = shift || 0;
+ my $block_size = 2**31-1;
+ my $strlen = length($$ref);
+ # No point in doing extra work if we don't need to.
+ if($strlen < $block_size or $] > 5.022) {
+ return index($$ref, $match, $pos);
+ }
+
+ my $matchlen = length($match);
+ my $ret;
+ my $offset = $pos;
+ while($offset < $strlen) {
+ $ret = index(
+ substr($$ref, $offset, $block_size),
+ $match, $pos-$offset);
+ if($ret != -1) {
+ return $ret + $offset;
+ }
+ $offset += ($block_size - $matchlen - 1);
+ }
+ return -1;
+}
+
+sub rindex64 {
+ # Do rindex on strings > 2GB.
+ # rindex in Perl < v5.22 does not work for > 2GB
+ # Input:
+ # as rindex except STR which must be passed as a reference
+ # Output:
+ # as rindex
+ my $ref = shift;
+ my $match = shift;
+ my $pos = shift;
+ my $block_size = 2**31-1;
+ my $strlen = length($$ref);
+ # Default: search from end
+ $pos = defined $pos ? $pos : $strlen;
+ # No point in doing extra work if we don't need to.
+ if($strlen < $block_size) {
+ return rindex($$ref, $match, $pos);
+ }
+
+ my $matchlen = length($match);
+ my $ret;
+ my $offset = $pos - $block_size + $matchlen;
+ if($offset < 0) {
+ # The offset is less than a $block_size
+ # Set the $offset to 0 and
+ # Adjust block_size accordingly
+ $block_size = $block_size + $offset;
+ $offset = 0;
+ }
+ while($offset >= 0) {
+ $ret = rindex(
+ substr($$ref, $offset, $block_size),
+ $match);
+ if($ret != -1) {
+ return $ret + $offset;
+ }
+ $offset -= ($block_size - $matchlen - 1);
+ }
+ return -1;
+}
+
+sub shorten {
+ # Do: substr($buf,0,$i) = "";
+ # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks
+ # Input:
+ # $buf_ref = \$buf
+ # $i = position to shorten to
+ # Returns: N/A
+ my ($buf_ref, $i) = @_;
+ my $two_gb = 2**31-1;
+ while($i > $two_gb) {
+ substr($$buf_ref,0,$two_gb) = "";
+ $i -= $two_gb;
+ }
+ substr($$buf_ref,0,$i) = "";
+}
+
+sub write_record_to_pipe {
+ # Fork then
+ # Write record from pos 0 .. $endpos to pipe
+ # Input:
+ # $chunk_number = sequence number - to see if already run
+ # $header_ref = reference to header string to prepend
+ # $record_ref = reference to record to write
+ # $recstart = start string of record
+ # $recend = end string of record
+ # $endpos = position in $record_ref where record ends
+ # Uses:
+ # $Global::job_already_run
+ # $opt::roundrobin
+ # @Global::virgin_jobs
+ # Returns:
+ # Number of chunks written (0 or 1)
+ my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_;
+ if($endpos == 0) { return 0; }
+ if(vec($Global::job_already_run,$chunk_number,1)) { return 1; }
+ if($opt::roundrobin) {
+ return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos);
+ }
+ # If no virgin found, backoff
+ my $sleep = 0.0001; # 0.01 ms - better performance on highend
+ while(not @Global::virgin_jobs) {
+ ::debug("pipe", "No virgin jobs");
+ $sleep = ::reap_usleep($sleep);
+ # Jobs may not be started because of loadavg
+ # or too little time between each ssh login.
+ start_more_jobs();
+ }
+ my $job = shift @Global::virgin_jobs;
+ # Job is no longer virgin
+ $job->set_virgin(0);
+ # We ignore the removed rec_sep which is technically wrong.
+ $job->add_transfersize($endpos + length $$header_ref);
+ if(fork()) {
+ # Skip
+ } else {
+ # Chop of at $endpos as we do not know how many rec_sep will
+ # be removed.
+ substr($$record_ref,$endpos,length $$record_ref) = "";
+ # Remove rec_sep
+ if($opt::remove_rec_sep) {
+ Job::remove_rec_sep($record_ref,$recstart,$recend);
+ }
+ $job->write($header_ref);
+ $job->write($record_ref);
+ close $job->fh(0,"w");
+ exit(0);
+ }
+ close $job->fh(0,"w");
+ return 1;
+}
+
+sub __SEM_MODE__ {}
+
+sub acquire_semaphore {
+ # Acquires semaphore. If needed: spawns to the background
+ # Uses:
+ # @Global::host
+ # Returns:
+ # The semaphore to be released when jobs is complete
+ $Global::host{':'} = SSHLogin->new(":");
+ my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running());
+ $sem->acquire();
+ if($Semaphore::fg) {
+ # skip
+ } else {
+ if(fork()) {
+ exit(0);
+ } else {
+ # If run in the background, the PID will change
+ $sem->pid_change();
+ }
+ }
+ return $sem;
+}
+
+sub __PARSE_OPTIONS__ {}
+
+sub options_hash {
+ # Returns:
+ # %hash = the GetOptions config
+ return
+ ("debug|D=s" => \$opt::D,
+ "xargs" => \$opt::xargs,
+ "m" => \$opt::m,
+ "X" => \$opt::X,
+ "v" => \@opt::v,
+ "joblog=s" => \$opt::joblog,
+ "results|result|res=s" => \$opt::results,
+ "resume" => \$opt::resume,
+ "resume-failed|resumefailed" => \$opt::resume_failed,
+ "silent" => \$opt::silent,
+ "keep-order|keeporder|k" => \$opt::keeporder,
+ "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder,
+ "group" => \$opt::group,
+ "g" => \$opt::retired,
+ "ungroup|u" => \$opt::ungroup,
+ "linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer,
+ "tmux" => \$opt::tmux,
+ "null|0" => \$opt::0,
+ "quote|q" => \$opt::q,
+ # Replacement strings
+ "parens=s" => \$opt::parens,
+ "rpl=s" => \@opt::rpl,
+ "plus" => \$opt::plus,
+ "I=s" => \$opt::I,
+ "extensionreplace|er=s" => \$opt::U,
+ "U=s" => \$opt::retired,
+ "basenamereplace|bnr=s" => \$opt::basenamereplace,
+ "dirnamereplace|dnr=s" => \$opt::dirnamereplace,
+ "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace,
+ "seqreplace=s" => \$opt::seqreplace,
+ "slotreplace=s" => \$opt::slotreplace,
+ "jobs|j=s" => \$opt::jobs,
+ "delay=f" => \$opt::delay,
+ "sshdelay=f" => \$opt::sshdelay,
+ "load=s" => \$opt::load,
+ "noswap" => \$opt::noswap,
+ "max-line-length-allowed" => \$opt::max_line_length_allowed,
+ "number-of-cpus" => \$opt::number_of_cpus,
+ "number-of-cores" => \$opt::number_of_cores,
+ "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores,
+ "shellquote|shell_quote|shell-quote" => \$opt::shellquote,
+ "nice=i" => \$opt::nice,
+ "tag" => \$opt::tag,
+ "tagstring|tag-string=s" => \$opt::tagstring,
+ "onall" => \$opt::onall,
+ "nonall" => \$opt::nonall,
+ "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts,
+ "sshlogin|S=s" => \@opt::sshlogin,
+ "sshloginfile|slf=s" => \@opt::sshloginfile,
+ "controlmaster|M" => \$opt::controlmaster,
+ "return=s" => \@opt::return,
+ "trc=s" => \@opt::trc,
+ "transfer" => \$opt::transfer,
+ "cleanup" => \$opt::cleanup,
+ "basefile|bf=s" => \@opt::basefile,
+ "B=s" => \$opt::retired,
+ "ctrlc|ctrl-c" => \$opt::ctrlc,
+ "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc,
+ "workdir|work-dir|wd=s" => \$opt::workdir,
+ "W=s" => \$opt::retired,
+ "tmpdir=s" => \$opt::tmpdir,
+ "tempdir=s" => \$opt::tmpdir,
+ "use-compress-program|compress-program=s" => \$opt::compress_program,
+ "use-decompress-program|decompress-program=s" => \$opt::decompress_program,
+ "compress" => \$opt::compress,
+ "tty" => \$opt::tty,
+ "T" => \$opt::retired,
+ "H=i" => \$opt::retired,
+ "dry-run|dryrun" => \$opt::dryrun,
+ "progress" => \$opt::progress,
+ "eta" => \$opt::eta,
+ "bar" => \$opt::bar,
+ "shuf" => \$opt::shuf,
+ "arg-sep|argsep=s" => \$opt::arg_sep,
+ "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep,
+ "trim=s" => \$opt::trim,
+ "env=s" => \@opt::env,
+ "recordenv|record-env" => \$opt::record_env,
+ "plain" => \$opt::plain,
+ "profile|J=s" => \@opt::profile,
+ "pipe|spreadstdin" => \$opt::pipe,
+ "robin|round-robin|roundrobin" => \$opt::roundrobin,
+ "recstart=s" => \$opt::recstart,
+ "recend=s" => \$opt::recend,
+ "regexp|regex" => \$opt::regexp,
+ "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep,
+ "files|output-as-files|outputasfiles" => \$opt::files,
+ "block|block-size|blocksize=s" => \$opt::blocksize,
+ "tollef" => \$opt::retired,
+ "gnu" => \$opt::ignored_option,
+ "xapply" => \$opt::xapply,
+ "bibtex" => \$opt::bibtex,
+ "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite,
+ # Termination and retries
+ "halt-on-error|halt=s" => \$opt::halt,
+ "memfree=s" => \$opt::memfree,
+ "retries=i" => \$opt::retries,
+ "timeout=s" => \$opt::timeout,
+ # xargs-compatibility - implemented, man, testsuite
+ "max-procs|P=s" => \$opt::jobs,
+ "delimiter|d=s" => \$opt::d,
+ "max-chars|s=i" => \$opt::max_chars,
+ "arg-file|a=s" => \@opt::a,
+ "no-run-if-empty|r" => \$opt::r,
+ "replace|i:s" => \$opt::i,
+ "E=s" => \$opt::eof,
+ "eof|e:s" => \$opt::eof,
+ "max-args|n=i" => \$opt::max_args,
+ "max-replace-args|N=i" => \$opt::max_replace_args,
+ "colsep|col-sep|C=s" => \$opt::colsep,
+ "help|h" => \$opt::help,
+ "L=f" => \$opt::L,
+ "max-lines|l:f" => \$opt::max_lines,
+ "interactive|p" => \$opt::p,
+ "verbose|t" => \$opt::verbose,
+ "version|V" => \$opt::version,
+ "minversion|min-version=i" => \$opt::minversion,
+ "show-limits|showlimits" => \$opt::show_limits,
+ "exit|x" => \$opt::x,
+ # Semaphore
+ "semaphore" => \$opt::semaphore,
+ "semaphoretimeout|st=i" => \$opt::semaphoretimeout,
+ "semaphorename|id=s" => \$opt::semaphorename,
+ "fg" => \$opt::fg,
+ "bg" => \$opt::bg,
+ "wait" => \$opt::wait,
+ # Shebang #!/usr/bin/parallel --shebang
+ "shebang|hashbang" => \$opt::shebang,
+ "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles,
+ "Y" => \$opt::retired,
+ "skip-first-line" => \$opt::skip_first_line,
+ "header=s" => \$opt::header,
+ "cat" => \$opt::cat,
+ "fifo" => \$opt::fifo,
+ "pipepart|pipe-part" => \$opt::pipepart,
+ "hgrp|hostgroup|hostgroups" => \$opt::hostgroups,
+ );
+}
+
+sub get_options_from_array {
+ # Run GetOptions on @array
+ # Input:
+ # $array_ref = ref to @ARGV to parse
+ # @keep_only = Keep only these options
+ # Uses:
+ # @ARGV
+ # Returns:
+ # true if parsing worked
+ # false if parsing failed
+ # @$array_ref is changed
+ my ($array_ref, @keep_only) = @_;
+ if(not @$array_ref) {
+ # Empty array: No need to look more at that
+ return 1;
+ }
+ # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not
+ # supported everywhere
+ my @save_argv;
+ my $this_is_ARGV = (\@::ARGV == $array_ref);
+ if(not $this_is_ARGV) {
+ @save_argv = @::ARGV;
+ @::ARGV = @{$array_ref};
+ }
+ # If @keep_only set: Ignore all values except @keep_only
+ my %options = options_hash();
+ if(@keep_only) {
+ my (%keep,@dummy);
+ @keep{@keep_only} = @keep_only;
+ for my $k (grep { not $keep{$_} } keys %options) {
+ # Store the value of the option in @dummy
+ $options{$k} = \@dummy;
+ }
+ }
+ my $retval = GetOptions(%options);
+ if(not $this_is_ARGV) {
+ @{$array_ref} = @::ARGV;
+ @::ARGV = @save_argv;
+ }
+ return $retval;
+}
+
+sub parse_options {
+ # Returns: N/A
+ init_globals();
+ @ARGV=read_options();
+
+ # no-* overrides *
+ if($opt::nokeeporder) { $opt::keeporder = undef; }
+ if($opt::noctrlc) { $opt::ctrlc = undef; }
+
+ if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2
+ $Global::debug = $opt::D;
+ $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh";
+ if(defined $opt::X) { $Global::ContextReplace = 1; }
+ if(defined $opt::silent) { $Global::verbose = 0; }
+ if(defined $opt::0) { $/ = "\0"; }
+ if(defined $opt::d) { $/ = unquote_printf($opt::d) }
+ if(defined $opt::tagstring) { $opt::tagstring = unquote_printf($opt::tagstring); }
+ if(defined $opt::p) { $Global::interactive = $opt::p; }
+ if(defined $opt::q) { $Global::quoting = 1; }
+ if(defined $opt::r) { $Global::ignore_empty = 1; }
+ if(defined $opt::verbose) { $Global::stderr_verbose = 1; }
+ parse_replacement_string_options();
+ if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; }
+ if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; }
+ if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); }
+ if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; }
+ if(defined $opt::help) { die_usage(); }
+ if(defined $opt::colsep) { $Global::trim = 'lr'; }
+ if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; }
+ if(defined $opt::trim) { $Global::trim = $opt::trim; }
+ if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; }
+ if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; }
+ if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); }
+ if(defined $opt::number_of_cores) {
+ print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0);
+ }
+ if(defined $opt::max_line_length_allowed) {
+ print Limits::Command::real_max_length(),"\n"; wait_and_exit(0);
+ }
+ if(defined $opt::version) { version(); wait_and_exit(0); }
+ if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); }
+ if(defined $opt::record_env) { record_env(); wait_and_exit(0); }
+ if(defined $opt::show_limits) { show_limits(); }
+ if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; }
+ if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); }
+ if(@opt::return) { push @Global::ret_files, @opt::return; }
+ if(not defined $opt::recstart and
+ not defined $opt::recend) { $opt::recend = "\n"; }
+ if(not defined $opt::blocksize) { $opt::blocksize = "1M"; }
+ $opt::blocksize = multiply_binary_prefix($opt::blocksize);
+ if($opt::blocksize > 2**31-1) {
+ warning("--blocksize >= 2G causes problems. Using 2G-1\n");
+ $opt::blocksize = 2**31-1;
+ }
+ $opt::memfree = multiply_binary_prefix($opt::memfree);
+ if(defined $opt::controlmaster) { $opt::noctrlc = 1; }
+ if(defined $opt::halt and
+ $opt::halt =~ /%/) { $opt::halt /= 100; }
+ if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) {
+ ::error("--timeout must be seconds or percentage\n");
+ wait_and_exit(255);
+ }
+ if(defined $opt::minversion) {
+ print $Global::version,"\n";
+ if($Global::version < $opt::minversion) {
+ wait_and_exit(255);
+ } else {
+ wait_and_exit(0);
+ }
+ }
+ if(not defined $opt::delay) {
+ # Set --delay to --sshdelay if not set
+ $opt::delay = $opt::sshdelay;
+ }
+ if($opt::compress_program) {
+ $opt::compress = 1;
+ $opt::decompress_program ||= $opt::compress_program." -dc";
+ }
+ if($opt::compress) {
+ my ($compress, $decompress) = find_compression_program();
+ $opt::compress_program ||= $compress;
+ $opt::decompress_program ||= $decompress;
+ }
+ if(defined $opt::nonall) {
+ # Append a dummy empty argument
+ push @ARGV, $Global::arg_sep, "";
+ }
+ if(defined $opt::tty) {
+ # Defaults for --tty: -j1 -u
+ # Can be overridden with -jXXX -g
+ if(not defined $opt::jobs) {
+ $opt::jobs = 1;
+ }
+ if(not defined $opt::group) {
+ $opt::ungroup = 1;
+ }
+ }
+ if(@opt::trc) {
+ push @Global::ret_files, @opt::trc;
+ $opt::transfer = 1;
+ $opt::cleanup = 1;
+ }
+ if(defined $opt::max_lines) {
+ if($opt::max_lines eq "-0") {
+ # -l -0 (swallowed -0)
+ $opt::max_lines = 1;
+ $opt::0 = 1;
+ $/ = "\0";
+ } elsif ($opt::max_lines == 0) {
+ # If not given (or if 0 is given) => 1
+ $opt::max_lines = 1;
+ }
+ $Global::max_lines = $opt::max_lines;
+ if(not $opt::pipe) {
+ # --pipe -L means length of record - not max_number_of_args
+ $Global::max_number_of_args ||= $Global::max_lines;
+ }
+ }
+
+ # Read more than one arg at a time (-L, -N)
+ if(defined $opt::L) {
+ $Global::max_lines = $opt::L;
+ if(not $opt::pipe) {
+ # --pipe -L means length of record - not max_number_of_args
+ $Global::max_number_of_args ||= $Global::max_lines;
+ }
+ }
+ if(defined $opt::max_replace_args) {
+ $Global::max_number_of_args = $opt::max_replace_args;
+ $Global::ContextReplace = 1;
+ }
+ if((defined $opt::L or defined $opt::max_replace_args)
+ and
+ not ($opt::xargs or $opt::m)) {
+ $Global::ContextReplace = 1;
+ }
+ if(defined $opt::tag and not defined $opt::tagstring) {
+ # Default = {}
+ $opt::tagstring = $Global::parensleft.$Global::parensright;
+ }
+ if(defined $opt::pipepart and
+ (defined $opt::L or defined $opt::max_lines
+ or defined $opt::max_replace_args)) {
+ ::error("--pipepart is incompatible with --max-replace-args, ",
+ "--max-lines, and -L.\n");
+ wait_and_exit(255);
+ }
+ if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) {
+ # Deal with ::: and ::::
+ @ARGV=read_args_from_command_line();
+ }
+ parse_semaphore();
+
+ if(defined $opt::eta) {
+ $opt::progress = $opt::eta;
+ }
+ if(defined $opt::bar) {
+ $opt::progress = $opt::bar;
+ }
+ if(defined $opt::retired) {
+ ::error("-g has been retired. Use --group.\n");
+ ::error("-B has been retired. Use --bf.\n");
+ ::error("-T has been retired. Use --tty.\n");
+ ::error("-U has been retired. Use --er.\n");
+ ::error("-W has been retired. Use --wd.\n");
+ ::error("-Y has been retired. Use --shebang.\n");
+ ::error("-H has been retired. Use --halt.\n");
+ ::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n");
+ ::wait_and_exit(255);
+ }
+ citation_notice();
+
+ parse_sshlogin();
+ parse_env_var();
+
+ if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) {
+ # As we do not know the max line length on the remote machine
+ # long commands generated by xargs may fail
+ # If $opt::max_replace_args is set, it is probably safe
+ ::warning("Using -X or -m with --sshlogin may fail.\n");
+ }
+
+ if(not defined $opt::jobs) {
+ $opt::jobs = "100%";
+ }
+ open_joblog();
+}
+
+sub init_globals {
+ # Defaults:
+ $Global::version = 20150522;
+ $Global::progname = 'parallel';
+ $Global::infinity = 2**31;
+ $Global::debug = 0;
+ $Global::verbose = 0;
+ $Global::quoting = 0;
+ # Read only table with default --rpl values
+ %Global::replace =
+ (
+ '{}' => '',
+ '{#}' => '1 $_=$job->seq()',
+ '{%}' => '1 $_=$job->slot()',
+ '{/}' => 's:.*/::',
+ '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);',
+ '{/.}' => 's:.*/::; s:\.[^/.]+$::;',
+ '{.}' => 's:\.[^/.]+$::',
+ );
+ %Global::plus =
+ (
+ # {} = {+/}/{/}
+ # = {.}.{+.} = {+/}/{/.}.{+.}
+ # = {..}.{+..} = {+/}/{/..}.{+..}
+ # = {...}.{+...} = {+/}/{/...}.{+...}
+ '{+/}' => 's:/[^/]*$::',
+ '{+.}' => 's:.*\.::',
+ '{+..}' => 's:.*\.([^.]*\.):$1:',
+ '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:',
+ '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::',
+ '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
+ '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::',
+ '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::',
+ );
+ # Modifiable copy of %Global::replace
+ %Global::rpl = %Global::replace;
+ $/ = "\n";
+ $Global::ignore_empty = 0;
+ $Global::interactive = 0;
+ $Global::stderr_verbose = 0;
+ $Global::default_simultaneous_sshlogins = 9;
+ $Global::exitstatus = 0;
+ $Global::halt_exitstatus = 0;
+ $Global::arg_sep = ":::";
+ $Global::arg_file_sep = "::::";
+ $Global::trim = 'n';
+ $Global::max_jobs_running = 0;
+ $Global::job_already_run = '';
+ $ENV{'TMPDIR'} ||= "/tmp";
+ if(not $ENV{HOME}) {
+ # $ENV{HOME} is sometimes not set if called from PHP
+ ::warning("\$HOME not set. Using /tmp\n");
+ $ENV{HOME} = "/tmp";
+ }
+}
+
+sub parse_replacement_string_options {
+ # Deal with --rpl
+ # Uses:
+ # %Global::rpl
+ # $Global::parensleft
+ # $Global::parensright
+ # $opt::parens
+ # $Global::parensleft
+ # $Global::parensright
+ # $opt::plus
+ # %Global::plus
+ # $opt::I
+ # $opt::U
+ # $opt::i
+ # $opt::basenamereplace
+ # $opt::dirnamereplace
+ # $opt::seqreplace
+ # $opt::slotreplace
+ # $opt::basenameextensionreplace
+
+ sub rpl {
+ # Modify %Global::rpl
+ # Replace $old with $new
+ my ($old,$new) = @_;
+ if($old ne $new) {
+ $Global::rpl{$new} = $Global::rpl{$old};
+ delete $Global::rpl{$old};
+ }
+ }
+ my $parens = "{==}";
+ if(defined $opt::parens) { $parens = $opt::parens; }
+ my $parenslen = 0.5*length $parens;
+ $Global::parensleft = substr($parens,0,$parenslen);
+ $Global::parensright = substr($parens,$parenslen);
+ if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); }
+ if(defined $opt::I) { rpl('{}',$opt::I); }
+ if(defined $opt::U) { rpl('{.}',$opt::U); }
+ if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); }
+ if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); }
+ if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); }
+ if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); }
+ if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); }
+ if(defined $opt::basenameextensionreplace) {
+ rpl('{/.}',$opt::basenameextensionreplace);
+ }
+ for(@opt::rpl) {
+ # Create $Global::rpl entries for --rpl options
+ # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;"
+ my ($shorthand,$long) = split/ /,$_,2;
+ $Global::rpl{$shorthand} = $long;
+ }
+}
+
+sub parse_semaphore {
+ # Semaphore defaults
+ # Must be done before computing number of processes and max_line_length
+ # because when running as a semaphore GNU Parallel does not read args
+ # Uses:
+ # $opt::semaphore
+ # $Global::semaphore
+ # $opt::semaphoretimeout
+ # $Semaphore::timeout
+ # $opt::semaphorename
+ # $Semaphore::name
+ # $opt::fg
+ # $Semaphore::fg
+ # $opt::wait
+ # $Semaphore::wait
+ # $opt::bg
+ # @opt::a
+ # @Global::unget_argv
+ # $Global::default_simultaneous_sshlogins
+ # $opt::jobs
+ # $Global::interactive
+ $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem'
+ if(defined $opt::semaphore) { $Global::semaphore = 1; }
+ if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; }
+ if(defined $opt::semaphorename) { $Global::semaphore = 1; }
+ if(defined $opt::fg) { $Global::semaphore = 1; }
+ if(defined $opt::bg) { $Global::semaphore = 1; }
+ if(defined $opt::wait) { $Global::semaphore = 1; }
+ if($Global::semaphore) {
+ # A semaphore does not take input from neither stdin nor file
+ @opt::a = ("/dev/null");
+ push(@Global::unget_argv, [Arg->new("")]);
+ $Semaphore::timeout = $opt::semaphoretimeout || 0;
+ if(defined $opt::semaphorename) {
+ $Semaphore::name = $opt::semaphorename;
+ } else {
+ $Semaphore::name = `tty`;
+ chomp $Semaphore::name;
+ }
+ $Semaphore::fg = $opt::fg;
+ $Semaphore::wait = $opt::wait;
+ $Global::default_simultaneous_sshlogins = 1;
+ if(not defined $opt::jobs) {
+ $opt::jobs = 1;
+ }
+ if($Global::interactive and $opt::bg) {
+ ::error("Jobs running in the ".
+ "background cannot be interactive.\n");
+ ::wait_and_exit(255);
+ }
+ }
+}
+
+sub record_env {
+ # Record current %ENV-keys in ~/.parallel/ignored_vars
+ # Returns: N/A
+ my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars";
+ if(open(my $vars_fh, ">", $ignore_filename)) {
+ print $vars_fh map { $_,"\n" } keys %ENV;
+ } else {
+ ::error("Cannot write to $ignore_filename.\n");
+ ::wait_and_exit(255);
+ }
+}
+
+sub parse_env_var {
+ # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen
+ #
+ # Bash functions must be parsed to export them remotely
+ # Pre-shellshock style bash function:
+ # myfunc=() {...
+ # Post-shellshock style bash function (v1):
+ # BASH_FUNC_myfunc()=() {...
+ # Post-shellshock style bash function (v2):
+ # BASH_FUNC_myfunc%%=() {...
+ #
+ # Uses:
+ # $Global::envvar = eval string that will set variables in both bash and csh
+ # $Global::envwarn = If functions are used: Give warning in csh
+ # $Global::envvarlen = length of $Global::envvar
+ # @opt::env
+ # $Global::shell
+ # %ENV
+ # Returns: N/A
+ $Global::envvar = "";
+
+ $Global::envvarlen = length $Global::envvar;
+}
+
+sub open_joblog {
+ # Open joblog as specified by --joblog
+ # Uses:
+ # $opt::resume
+ # $opt::resume_failed
+ # $opt::joblog
+ # $opt::results
+ # $Global::job_already_run
+ # %Global::fd
+ my $append = 0;
+ if(($opt::resume or $opt::resume_failed)
+ and
+ not ($opt::joblog or $opt::results)) {
+ ::error("--resume and --resume-failed require --joblog or --results.\n");
+ ::wait_and_exit(255);
+ }
+ if($opt::joblog) {
+ if($opt::resume || $opt::resume_failed) {
+ if(open(my $joblog_fh, "<", $opt::joblog)) {
+ # Read the joblog
+ $append = <$joblog_fh>; # If there is a header: Open as append later
+ my $joblog_regexp;
+ if($opt::resume_failed) {
+ # Make a regexp that only matches commands with exit+signal=0
+ # 4 host 1360490623.067 3.445 1023 1222 0 0 command
+ $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t';
+ } else {
+ # Just match the job number
+ $joblog_regexp='^(\d+)';
+ }
+ while(<$joblog_fh>) {
+ if(/$joblog_regexp/o) {
+ # This is 30% faster than set_job_already_run($1);
+ vec($Global::job_already_run,($1||0),1) = 1;
+ } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) {
+ ::error("Format of '$opt::joblog' is wrong: $_");
+ ::wait_and_exit(255);
+ }
+ }
+ close $joblog_fh;
+ }
+ }
+ if($append) {
+ # Append to joblog
+ if(not open($Global::joblog, ">>", $opt::joblog)) {
+ ::error("Cannot append to --joblog $opt::joblog.\n");
+ ::wait_and_exit(255);
+ }
+ } else {
+ if($opt::joblog eq "-") {
+ # Use STDOUT as joblog
+ $Global::joblog = $Global::fd{1};
+ } elsif(not open($Global::joblog, ">", $opt::joblog)) {
+ # Overwrite the joblog
+ ::error("Cannot write to --joblog $opt::joblog.\n");
+ ::wait_and_exit(255);
+ }
+ print $Global::joblog
+ join("\t", "Seq", "Host", "Starttime", "JobRuntime",
+ "Send", "Receive", "Exitval", "Signal", "Command"
+ ). "\n";
+ }
+ }
+}
+
+sub find_compression_program {
+ # Find a fast compression program
+ # Returns:
+ # $compress_program = compress program with options
+ # $decompress_program = decompress program with options
+
+ # Search for these. Sorted by speed on 16 core
+ # parallel -j1 --joblog jl --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: gz '>'/dev/null , 1 2 3 , {1..3} , lz4 lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2
+ # sort -nk4 jl
+ my @prg = qw(lz4 pigz lzop plzip pbzip2 pxz gzip lzma xz bzip2 lzip);
+ for my $p (@prg) {
+ if(which($p)) {
+ return ("$p -c -1","$p -dc");
+ }
+ }
+ # Fall back to cat
+ return ("cat","cat");
+}
+
+
+sub read_options {
+ # Read options from command line, profile and $PARALLEL
+ # Uses:
+ # $opt::shebang_wrap
+ # $opt::shebang
+ # @ARGV
+ # $opt::plain
+ # @opt::profile
+ # $ENV{'HOME'}
+ # $ENV{'PARALLEL'}
+ # Returns:
+ # @ARGV_no_opt = @ARGV without --options
+
+ # This must be done first as this may exec myself
+ if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or
+ $ARGV[0] =~ /^--shebang-?wrap/ or
+ $ARGV[0] =~ /^--hashbang/)) {
+ # Program is called from #! line in script
+ # remove --shebang-wrap if it is set
+ $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//);
+ # remove --shebang if it is set
+ $opt::shebang = ($ARGV[0] =~ s/^--shebang *//);
+ # remove --hashbang if it is set
+ $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//);
+ if($opt::shebang) {
+ my $argfile = shell_quote_scalar(pop @ARGV);
+ # exec myself to split $ARGV[0] into separate fields
+ exec "$0 --skip-first-line -a $argfile @ARGV";
+ }
+ if($opt::shebang_wrap) {
+ my @options;
+ my @parser;
+ if ($^O eq 'freebsd') {
+ # FreeBSD's #! puts different values in @ARGV than Linux' does.
+ my @nooptions = @ARGV;
+ get_options_from_array(\@nooptions);
+ while($#ARGV > $#nooptions) {
+ push @options, shift @ARGV;
+ }
+ while(@ARGV and $ARGV[0] ne ":::") {
+ push @parser, shift @ARGV;
+ }
+ if(@ARGV and $ARGV[0] eq ":::") {
+ shift @ARGV;
+ }
+ } else {
+ @options = shift @ARGV;
+ }
+ my $script = shell_quote_scalar(shift @ARGV);
+ # exec myself to split $ARGV[0] into separate fields
+ exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV";
+ }
+ }
+
+ Getopt::Long::Configure("bundling","require_order");
+ my @ARGV_copy = @ARGV;
+ # Check if there is a --profile to set @opt::profile
+ get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage();
+ my @ARGV_profile = ();
+ my @ARGV_env = ();
+ if(not $opt::plain) {
+ # Add options from .parallel/config and other profiles
+ my @config_profiles = (
+ "/etc/parallel/config",
+ $ENV{'HOME'}."/.parallel/config",
+ $ENV{'HOME'}."/.parallelrc");
+ my @profiles = @config_profiles;
+ if(@opt::profile) {
+ # --profile overrides default profiles
+ @profiles = ();
+ for my $profile (@opt::profile) {
+ if(-r $profile) {
+ push @profiles, $profile;
+ } else {
+ push @profiles, $ENV{'HOME'}."/.parallel/".$profile;
+ }
+ }
+ }
+ for my $profile (@profiles) {
+ if(-r $profile) {
+ open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile");
+ while(<$in_fh>) {
+ /^\s*\#/ and next;
+ chomp;
+ push @ARGV_profile, shellwords($_);
+ }
+ close $in_fh;
+ } else {
+ if(grep /^$profile$/, @config_profiles) {
+ # config file is not required to exist
+ } else {
+ ::error("$profile not readable.\n");
+ wait_and_exit(255);
+ }
+ }
+ }
+ # Add options from shell variable $PARALLEL
+ if($ENV{'PARALLEL'}) {
+ @ARGV_env = shellwords($ENV{'PARALLEL'});
+ }
+ }
+ Getopt::Long::Configure("bundling","require_order");
+ get_options_from_array(\@ARGV_profile) || die_usage();
+ get_options_from_array(\@ARGV_env) || die_usage();
+ get_options_from_array(\@ARGV) || die_usage();
+
+ # Prepend non-options to @ARGV (such as commands like 'nice')
+ unshift @ARGV, @ARGV_profile, @ARGV_env;
+ return @ARGV;
+}
+
+sub read_args_from_command_line {
+ # Arguments given on the command line after:
+ # ::: ($Global::arg_sep)
+ # :::: ($Global::arg_file_sep)
+ # Removes the arguments from @ARGV and:
+ # - puts filenames into -a
+ # - puts arguments into files and add the files to -a
+ # Input:
+ # @::ARGV = command option ::: arg arg arg :::: argfiles
+ # Uses:
+ # $Global::arg_sep
+ # $Global::arg_file_sep
+ # $opt::internal_pipe_means_argfiles
+ # $opt::pipe
+ # @opt::a
+ # Returns:
+ # @argv_no_argsep = @::ARGV without ::: and :::: and following args
+ my @new_argv = ();
+ for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) {
+ if($arg eq $Global::arg_sep
+ or
+ $arg eq $Global::arg_file_sep) {
+ my $group = $arg; # This group of arguments is args or argfiles
+ my @group;
+ while(defined ($arg = shift @ARGV)) {
+ if($arg eq $Global::arg_sep
+ or
+ $arg eq $Global::arg_file_sep) {
+ # exit while loop if finding new separator
+ last;
+ } else {
+ # If not hitting ::: or ::::
+ # Append it to the group
+ push @group, $arg;
+ }
+ }
+
+ if($group eq $Global::arg_file_sep
+ or ($opt::internal_pipe_means_argfiles and $opt::pipe)
+ ) {
+ # Group of file names on the command line.
+ # Append args into -a
+ push @opt::a, @group;
+ } elsif($group eq $Global::arg_sep) {
+ # Group of arguments on the command line.
+ # Put them into a file.
+ # Create argfile
+ my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg");
+ unlink($name);
+ # Put args into argfile
+ print $outfh map { $_,$/ } @group;
+ seek $outfh, 0, 0;
+ exit_if_disk_full();
+ # Append filehandle to -a
+ push @opt::a, $outfh;
+ } else {
+ ::die_bug("Unknown command line group: $group");
+ }
+ if(defined($arg)) {
+ # $arg is ::: or ::::
+ redo;
+ } else {
+ # $arg is undef -> @ARGV empty
+ last;
+ }
+ }
+ push @new_argv, $arg;
+ }
+ # Output: @ARGV = command to run with options
+ return @new_argv;
+}
+
+sub cleanup {
+ # Returns: N/A
+ unlink keys %Global::unlink;
+ map { rmdir $_ } keys %Global::unlink;
+ if(@opt::basefile) { cleanup_basefile(); }
+}
+
+sub __QUOTING_ARGUMENTS_FOR_SHELL__ {}
+
+sub shell_quote {
+ # Input:
+ # @strings = strings to be quoted
+ # Output:
+ # @shell_quoted_strings = string quoted with \ as needed by the shell
+ my @strings = (@_);
+ for my $a (@strings) {
+ $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
+ $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \'
+ }
+ return wantarray ? @strings : "@strings";
+}
+
+sub shell_quote_empty {
+ # Inputs:
+ # @strings = strings to be quoted
+ # Returns:
+ # @quoted_strings = empty strings quoted as ''.
+ my @strings = shell_quote(@_);
+ for my $a (@strings) {
+ if($a eq "") {
+ $a = "''";
+ }
+ }
+ return wantarray ? @strings : "@strings";
+}
+
+sub shell_quote_scalar {
+ # Quote the string so shell will not expand any special chars
+ # Inputs:
+ # $string = string to be quoted
+ # Returns:
+ # $shell_quoted = string quoted with \ as needed by the shell
+ my $a = $_[0];
+ if(defined $a) {
+ # Solaris sh wants ^ quoted.
+ # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g;
+ # This is 1% faster than the above
+ $a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go;
+ $a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \'
+ }
+ return $a;
+}
+
+sub shell_quote_file {
+ # Quote the string so shell will not expand any special chars and prepend ./ if needed
+ # Input:
+ # $filename = filename to be shell quoted
+ # Returns:
+ # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed
+ my $a = shell_quote_scalar(shift);
+ if(defined $a) {
+ if($a =~ m:^/: or $a =~ m:^\./:) {
+ # /abs/path or ./rel/path => skip
+ } else {
+ # rel/path => ./rel/path
+ $a = "./".$a;
+ }
+ }
+ return $a;
+}
+
+sub shellwords {
+ # Input:
+ # $string = shell line
+ # Returns:
+ # @shell_words = $string split into words as shell would do
+ $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;";
+ return Text::ParseWords::shellwords(@_);
+}
+
+sub perl_quote_scalar {
+ # Quote the string so perl's eval will not expand any special chars
+ # Inputs:
+ # $string = string to be quoted
+ # Returns:
+ # $shell_quoted = string quoted with \ as needed by perl's eval
+ my $a = $_[0];
+ if(defined $a) {
+ $a =~ s/[\\\"\$\@]/\\$&/go;
+ }
+ return $a;
+}
+
+sub unquote_printf {
+ # Convert \t \n \r \000 \0
+ $_ = shift;
+ s/\\t/\t/g;
+ s/\\n/\n/g;
+ s/\\r/\r/g;
+ s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge;
+ s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge;
+ return $_;
+}
+
+sub __FILEHANDLES__ {}
+
+
+sub save_stdin_stdout_stderr {
+ # Remember the original STDIN, STDOUT and STDERR
+ # and file descriptors opened by the shell (e.g. 3>/tmp/foo)
+ # Uses:
+ # %Global::fd
+ # $Global::original_stderr
+ # $Global::original_stdin
+ # Returns: N/A
+
+ # Find file descriptors that are already opened (by the shell)
+ for my $fdno (1..61) {
+ # /dev/fd/62 and above are used by bash for <(cmd)
+ my $fh;
+ # 2-argument-open is used to be compatible with old perl 5.8.0
+ # bug #43570: Perl 5.8.0 creates 61 files
+ if(open($fh,">&=$fdno")) {
+ $Global::fd{$fdno}=$fh;
+ }
+ }
+ open $Global::original_stderr, ">&", "STDERR" or
+ ::die_bug("Can't dup STDERR: $!");
+ open $Global::status_fd, ">&", "STDERR" or
+ ::die_bug("Can't dup STDERR: $!");
+ open $Global::original_stdin, "<&", "STDIN" or
+ ::die_bug("Can't dup STDIN: $!");
+}
+
+sub enough_file_handles {
+ # Check that we have enough filehandles available for starting
+ # another job
+ # Uses:
+ # $opt::ungroup
+ # %Global::fd
+ # Returns:
+ # 1 if ungrouped (thus not needing extra filehandles)
+ # 0 if too few filehandles
+ # 1 if enough filehandles
+ if(not $opt::ungroup) {
+ my %fh;
+ my $enough_filehandles = 1;
+ # perl uses 7 filehandles for something?
+ # open3 uses 2 extra filehandles temporarily
+ # We need a filehandle for each redirected file descriptor
+ # (normally just STDOUT and STDERR)
+ for my $i (1..(7+2+keys %Global::fd)) {
+ $enough_filehandles &&= open($fh{$i}, "<", "/dev/null");
+ }
+ for (values %fh) { close $_; }
+ return $enough_filehandles;
+ } else {
+ # Ungrouped does not need extra file handles
+ return 1;
+ }
+}
+
+sub open_or_exit {
+ # Open a file name or exit if the file cannot be opened
+ # Inputs:
+ # $file = filehandle or filename to open
+ # Uses:
+ # $Global::stdin_in_opt_a
+ # $Global::original_stdin
+ # Returns:
+ # $fh = file handle to read-opened file
+ my $file = shift;
+ if($file eq "-") {
+ $Global::stdin_in_opt_a = 1;
+ return ($Global::original_stdin || *STDIN);
+ }
+ if(ref $file eq "GLOB") {
+ # This is an open filehandle
+ return $file;
+ }
+ my $fh = gensym;
+ if(not open($fh, "<", $file)) {
+ ::error("Cannot open input file `$file': No such file or directory.\n");
+ wait_and_exit(255);
+ }
+ return $fh;
+}
+
+sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {}
+
+# Variable structure:
+#
+# $Global::running{$pid} = Pointer to Job-object
+# @Global::virgin_jobs = Pointer to Job-object that have received no input
+# $Global::host{$sshlogin} = Pointer to SSHLogin-object
+# $Global::total_running = total number of running jobs
+# $Global::total_started = total jobs started
+# $Global::tty_taken = is the tty in use by a running job?
+# $Global::max_procs_file = filename if --jobs is given a filename
+# $Global::JobQueue = JobQueue object for the queue of jobs
+# $Global::timeoutq = queue of times where jobs timeout
+# $Global::newest_job = Job object of the most recent job started
+# $Global::newest_starttime = timestamp of $Global::newest_job
+# @Global::sshlogin
+# $Global::minimal_command_line_length = minimum length supported by all sshlogins
+# $Global::start_no_new_jobs = should more jobs be started?
+# $Global::original_stderr = file handle for STDERR when the program started
+# $Global::total_started = total number of jobs started
+# $Global::envvar = string to set the shell environment variables
+# $Global::joblog = filehandle of joblog
+# $Global::debug = Is debugging on?
+# $Global::exitstatus = status code of GNU Parallel
+# $Global::quoting = quote the command to run
+
+sub init_run_jobs {
+ # Set Global variables and progress signal handlers
+ # Do the copying of basefiles
+ # Returns: N/A
+ $Global::total_running = 0;
+ $Global::total_started = 0;
+ $Global::tty_taken = 0;
+ $SIG{USR1} = \&list_running_jobs;
+ $SIG{USR2} = \&toggle_progress;
+ if(@opt::basefile) { setup_basefile(); }
+}
+
+{
+ my $last_time;
+ my %last_mtime;
+ my $max_procs_file_last_mod;
+
+ sub changed_procs_file {
+ # If --jobs is a file and it is modfied:
+ # Force recomputing of max_jobs_running for each $sshlogin
+ # Uses:
+ # $Global::max_procs_file
+ # %Global::host
+ # Returns: N/A
+ if($Global::max_procs_file) {
+ # --jobs filename
+ my $mtime = (stat($Global::max_procs_file))[9];
+ $max_procs_file_last_mod ||= 0;
+ if($mtime > $max_procs_file_last_mod) {
+ # file changed: Force re-computing max_jobs_running
+ $max_procs_file_last_mod = $mtime;
+ for my $sshlogin (values %Global::host) {
+ $sshlogin->set_max_jobs_running(undef);
+ }
+ }
+ }
+ }
+
+ sub changed_sshloginfile {
+ # If --slf is changed:
+ # reload --slf
+ # filter_hosts
+ # setup_basefile
+ # Uses:
+ # @opt::sshloginfile
+ # @Global::sshlogin
+ # %Global::host
+ # $opt::filter_hosts
+ # Returns: N/A
+ if(@opt::sshloginfile) {
+ # Is --sshloginfile changed?
+ for my $slf (@opt::sshloginfile) {
+ my $actual_file = expand_slf_shorthand($slf);
+ my $mtime = (stat($actual_file))[9];
+ $last_mtime{$actual_file} ||= $mtime;
+ if($mtime - $last_mtime{$actual_file} > 1) {
+ ::debug("run","--sshloginfile $actual_file changed. reload\n");
+ $last_mtime{$actual_file} = $mtime;
+ # Reload $slf
+ # Empty sshlogins
+ @Global::sshlogin = ();
+ for (values %Global::host) {
+ # Don't start new jobs on any host
+ # except the ones added back later
+ $_->set_max_jobs_running(0);
+ }
+ # This will set max_jobs_running on the SSHlogins
+ read_sshloginfile($actual_file);
+ parse_sshlogin();
+ $opt::filter_hosts and filter_hosts();
+ setup_basefile();
+ }
+ }
+ }
+ }
+
+ sub start_more_jobs {
+ # Run start_another_job() but only if:
+ # * not $Global::start_no_new_jobs set
+ # * not JobQueue is empty
+ # * not load on server is too high
+ # * not server swapping
+ # * not too short time since last remote login
+ # Uses:
+ # %Global::host
+ # $Global::start_no_new_jobs
+ # $Global::JobQueue
+ # $opt::pipe
+ # $opt::load
+ # $opt::noswap
+ # $opt::delay
+ # $Global::newest_starttime
+ # Returns:
+ # $jobs_started = number of jobs started
+ my $jobs_started = 0;
+ my $jobs_started_this_round = 0;
+ if($Global::start_no_new_jobs) {
+ return $jobs_started;
+ }
+ if(time - ($last_time||0) > 1) {
+ # At most do this every second
+ $last_time = time;
+ changed_procs_file();
+ changed_sshloginfile();
+ }
+ do {
+ $jobs_started_this_round = 0;
+ # This will start 1 job on each --sshlogin (if possible)
+ # thus distribute the jobs on the --sshlogins round robin
+ for my $sshlogin (values %Global::host) {
+ if($Global::JobQueue->empty() and not $opt::pipe) {
+ # No more jobs in the queue
+ last;
+ }
+ debug("run", "Running jobs before on ", $sshlogin->string(), ": ",
+ $sshlogin->jobs_running(), "\n");
+ if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) {
+ if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) {
+ # It has been too short since last start
+ next;
+ }
+ if($opt::load and $sshlogin->loadavg_too_high()) {
+ # The load is too high or unknown
+ next;
+ }
+ if($opt::noswap and $sshlogin->swapping()) {
+ # The server is swapping
+ next;
+ }
+ if($opt::memfree and $sshlogin->memfree() < $opt::memfree) {
+ # The server has not enough mem free
+ ::debug("mem", "Not starting job: not enough mem\n");
+ next;
+ }
+ if($sshlogin->too_fast_remote_login()) {
+ # It has been too short since
+ next;
+ }
+ debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(),
+ " out of ", $sshlogin->max_jobs_running(),
+ " jobs running. Start another.\n");
+ if(start_another_job($sshlogin) == 0) {
+ # No more jobs to start on this $sshlogin
+ debug("run","No jobs started on ", $sshlogin->string(), "\n");
+ next;
+ }
+ $sshlogin->inc_jobs_running();
+ $sshlogin->set_last_login_at(::now());
+ $jobs_started++;
+ $jobs_started_this_round++;
+ }
+ debug("run","Running jobs after on ", $sshlogin->string(), ": ",
+ $sshlogin->jobs_running(), " of ",
+ $sshlogin->max_jobs_running(), "\n");
+ }
+ } while($jobs_started_this_round);
+
+ return $jobs_started;
+ }
+}
+
+{
+ my $no_more_file_handles_warned;
+
+ sub start_another_job {
+ # If there are enough filehandles
+ # and JobQueue not empty
+ # and not $job is in joblog
+ # Then grab a job from Global::JobQueue,
+ # start it at sshlogin
+ # mark it as virgin_job
+ # Inputs:
+ # $sshlogin = the SSHLogin to start the job on
+ # Uses:
+ # $Global::JobQueue
+ # $opt::pipe
+ # $opt::results
+ # $opt::resume
+ # @Global::virgin_jobs
+ # Returns:
+ # 1 if another jobs was started
+ # 0 otherwise
+ my $sshlogin = shift;
+ # Do we have enough file handles to start another job?
+ if(enough_file_handles()) {
+ if($Global::JobQueue->empty() and not $opt::pipe) {
+ # No more commands to run
+ debug("start", "Not starting: JobQueue empty\n");
+ return 0;
+ } else {
+ my $job;
+ # Skip jobs already in job log
+ # Skip jobs already in results
+ do {
+ $job = get_job_with_sshlogin($sshlogin);
+ if(not defined $job) {
+ # No command available for that sshlogin
+ debug("start", "Not starting: no jobs available for ",
+ $sshlogin->string(), "\n");
+ return 0;
+ }
+ } while ($job->is_already_in_joblog()
+ or
+ ($opt::results and $opt::resume and $job->is_already_in_results()));
+ debug("start", "Command to run on '", $job->sshlogin()->string(), "': '",
+ $job->replaced(),"'\n");
+ if($job->start()) {
+ if($opt::pipe) {
+ push(@Global::virgin_jobs,$job);
+ }
+ debug("start", "Started as seq ", $job->seq(),
+ " pid:", $job->pid(), "\n");
+ return 1;
+ } else {
+ # Not enough processes to run the job.
+ # Put it back on the queue.
+ $Global::JobQueue->unget($job);
+ # Count down the number of jobs to run for this SSHLogin.
+ my $max = $sshlogin->max_jobs_running();
+ if($max > 1) { $max--; } else {
+ ::error("No more processes: cannot run a single job. Something is wrong.\n");
+ ::wait_and_exit(255);
+ }
+ $sshlogin->set_max_jobs_running($max);
+ # Sleep up to 300 ms to give other processes time to die
+ ::usleep(rand()*300);
+ ::warning("No more processes: ",
+ "Decreasing number of running jobs to $max. ",
+ "Raising ulimit -u or /etc/security/limits.conf may help.\n");
+ return 0;
+ }
+ }
+ } else {
+ # No more file handles
+ $no_more_file_handles_warned++ or
+ ::warning("No more file handles. ",
+ "Raising ulimit -n or /etc/security/limits.conf may help.\n");
+ return 0;
+ }
+ }
+}
+
+sub init_progress {
+ # Uses:
+ # $opt::bar
+ # Returns:
+ # list of computers for progress output
+ $|=1;
+ if($opt::bar) {
+ return("","");
+ }
+ my %progress = progress();
+ return ("\nComputers / CPU cores / Max jobs to run\n",
+ $progress{'workerlist'});
+}
+
+sub drain_job_queue {
+ # Uses:
+ # $opt::progress
+ # $Global::total_running
+ # $Global::max_jobs_running
+ # %Global::running
+ # $Global::JobQueue
+ # %Global::host
+ # $Global::start_no_new_jobs
+ # Returns: N/A
+ if($opt::progress) {
+ ::status(init_progress());
+ }
+ my $last_header = "";
+ my $sleep = 0.2;
+ do {
+ while($Global::total_running > 0) {
+ debug($Global::total_running, "==", scalar
+ keys %Global::running," slots: ", $Global::max_jobs_running);
+ if($opt::pipe) {
+ # When using --pipe sometimes file handles are not closed properly
+ for my $job (values %Global::running) {
+ close $job->fh(0,"w");
+ }
+ }
+ if($opt::progress) {
+ my %progress = progress();
+ if($last_header ne $progress{'header'}) {
+ ::status("\n", $progress{'header'}, "\n");
+ $last_header = $progress{'header'};
+ }
+ ::status("\r",$progress{'status'});
+ }
+ if($Global::total_running < $Global::max_jobs_running
+ and not $Global::JobQueue->empty()) {
+ # These jobs may not be started because of loadavg
+ # or too little time between each ssh login.
+ if(start_more_jobs() > 0) {
+ # Exponential back-on if jobs were started
+ $sleep = $sleep/2+0.001;
+ }
+ }
+ # Exponential back-off sleeping
+ $sleep = ::reap_usleep($sleep);
+ }
+ if(not $Global::JobQueue->empty()) {
+ # These jobs may not be started:
+ # * because there the --filter-hosts has removed all
+ if(not %Global::host) {
+ ::error("There are no hosts left to run on.\n");
+ ::wait_and_exit(255);
+ }
+ # * because of loadavg
+ # * because of too little time between each ssh login.
+ start_more_jobs();
+ $sleep = ::reap_usleep($sleep);
+ if($Global::max_jobs_running == 0) {
+ ::warning("There are no job slots available. Increase --jobs.\n");
+ }
+ }
+ } while ($Global::total_running > 0
+ or
+ not $Global::start_no_new_jobs and not $Global::JobQueue->empty());
+ if($opt::progress) {
+ my %progress = progress();
+ ::status("\r", $progress{'status'}, "\n");
+ }
+}
+
+sub toggle_progress {
+ # Turn on/off progress view
+ # Uses:
+ # $opt::progress
+ # Returns: N/A
+ $opt::progress = not $opt::progress;
+ if($opt::progress) {
+ ::status(init_progress());
+ }
+}
+
+sub progress {
+ # Uses:
+ # $opt::bar
+ # $opt::eta
+ # %Global::host
+ # $Global::total_started
+ # Returns:
+ # $workerlist = list of workers
+ # $header = that will fit on the screen
+ # $status = message that will fit on the screen
+ if($opt::bar) {
+ return ("workerlist" => "", "header" => "", "status" => bar());
+ }
+ my $eta = "";
+ my ($status,$header)=("","");
+ if($opt::eta) {
+ my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) =
+ compute_eta();
+ $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ",
+ $this_eta, $left, $avgtime);
+ }
+ my $termcols = terminal_columns();
+ my @workers = sort keys %Global::host;
+ my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers;
+ my $workerno = 1;
+ my %workerno = map { ($_=>$workerno++) } @workers;
+ my $workerlist = "";
+ for my $w (@workers) {
+ $workerlist .=
+ $workerno{$w}.":".$sshlogin{$w} ." / ".
+ ($Global::host{$w}->ncpus() || "-")." / ".
+ $Global::host{$w}->max_jobs_running()."\n";
+ }
+ $status = "x"x($termcols+1);
+ # Select an output format that will fit on a single line
+ if(length $status > $termcols) {
+ # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs
+ $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete";
+ $status = $eta .
+ join(" ",map
+ {
+ if($Global::total_started) {
+ my $completed = ($Global::host{$_}->jobs_completed()||0);
+ my $running = $Global::host{$_}->jobs_running();
+ my $time = $completed ? (time-$^T)/($completed) : "0";
+ sprintf("%s:%d/%d/%d%%/%.1fs ",
+ $sshlogin{$_}, $running, $completed,
+ ($running+$completed)*100
+ / $Global::total_started, $time);
+ }
+ } @workers);
+ }
+ if(length $status > $termcols) {
+ # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs
+ $header = "Computer:jobs running/jobs completed/%of started jobs";
+ $status = $eta .
+ join(" ",map
+ {
+ my $completed = ($Global::host{$_}->jobs_completed()||0);
+ my $running = $Global::host{$_}->jobs_running();
+ my $time = $completed ? (time-$^T)/($completed) : "0";
+ sprintf("%s:%d/%d/%d%%/%.1fs ",
+ $workerno{$_}, $running, $completed,
+ ($running+$completed)*100
+ / $Global::total_started, $time);
+ } @workers);
+ }
+ if(length $status > $termcols) {
+ # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX%
+ $header = "Computer:jobs running/jobs completed/%of started jobs";
+ $status = $eta .
+ join(" ",map
+ { sprintf("%s:%d/%d/%d%%",
+ $sshlogin{$_},
+ $Global::host{$_}->jobs_running(),
+ ($Global::host{$_}->jobs_completed()||0),
+ ($Global::host{$_}->jobs_running()+
+ ($Global::host{$_}->jobs_completed()||0))*100
+ / $Global::total_started) }
+ @workers);
+ }
+ if(length $status > $termcols) {
+ # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX%
+ $header = "Computer:jobs running/jobs completed/%of started jobs";
+ $status = $eta .
+ join(" ",map
+ { sprintf("%s:%d/%d/%d%%",
+ $workerno{$_},
+ $Global::host{$_}->jobs_running(),
+ ($Global::host{$_}->jobs_completed()||0),
+ ($Global::host{$_}->jobs_running()+
+ ($Global::host{$_}->jobs_completed()||0))*100
+ / $Global::total_started) }
+ @workers);
+ }
+ if(length $status > $termcols) {
+ # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX
+ $header = "Computer:jobs running/jobs completed";
+ $status = $eta .
+ join(" ",map
+ { sprintf("%s:%d/%d",
+ $sshlogin{$_}, $Global::host{$_}->jobs_running(),
+ ($Global::host{$_}->jobs_completed()||0)) }
+ @workers);
+ }
+ if(length $status > $termcols) {
+ # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX
+ $header = "Computer:jobs running/jobs completed";
+ $status = $eta .
+ join(" ",map
+ { sprintf("%s:%d/%d",
+ $sshlogin{$_}, $Global::host{$_}->jobs_running(),
+ ($Global::host{$_}->jobs_completed()||0)) }
+ @workers);
+ }
+ if(length $status > $termcols) {
+ # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX
+ $header = "Computer:jobs running/jobs completed";
+ $status = $eta .
+ join(" ",map
+ { sprintf("%s:%d/%d",
+ $workerno{$_}, $Global::host{$_}->jobs_running(),
+ ($Global::host{$_}->jobs_completed()||0)) }
+ @workers);
+ }
+ if(length $status > $termcols) {
+ # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX
+ $header = "Computer:jobs completed";
+ $status = $eta .
+ join(" ",map
+ { sprintf("%s:%d",
+ $sshlogin{$_},
+ ($Global::host{$_}->jobs_completed()||0)) }
+ @workers);
+ }
+ if(length $status > $termcols) {
+ # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX
+ $header = "Computer:jobs completed";
+ $status = $eta .
+ join(" ",map
+ { sprintf("%s:%d",
+ $workerno{$_},
+ ($Global::host{$_}->jobs_completed()||0)) }
+ @workers);
+ }
+ return ("workerlist" => $workerlist, "header" => $header, "status" => $status);
+}
+
+{
+ my ($total, $first_completed, $smoothed_avg_time);
+
+ sub compute_eta {
+ # Calculate important numbers for ETA
+ # Returns:
+ # $total = number of jobs in total
+ # $completed = number of jobs completed
+ # $left = number of jobs left
+ # $pctcomplete = percent of jobs completed
+ # $avgtime = averaged time
+ # $eta = smoothed eta
+ $total ||= $Global::JobQueue->total_jobs();
+ my $completed = 0;
+ for(values %Global::host) { $completed += $_->jobs_completed() }
+ my $left = $total - $completed;
+ if(not $completed) {
+ return($total, $completed, $left, 0, 0, 0);
+ }
+ my $pctcomplete = $completed / $total;
+ $first_completed ||= time;
+ my $timepassed = (time - $first_completed);
+ my $avgtime = $timepassed / $completed;
+ $smoothed_avg_time ||= $avgtime;
+ # Smooth the eta so it does not jump wildly
+ $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time +
+ $pctcomplete * $avgtime;
+ my $eta = int($left * $smoothed_avg_time);
+ return($total, $completed, $left, $pctcomplete, $avgtime, $eta);
+ }
+}
+
+{
+ my ($rev,$reset);
+
+ sub bar {
+ # Return:
+ # $status = bar with eta, completed jobs, arg and pct
+ $rev ||= "\033[7m";
+ $reset ||= "\033[0m";
+ my($total, $completed, $left, $pctcomplete, $avgtime, $eta) =
+ compute_eta();
+ my $arg = $Global::newest_job ?
+ $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : "";
+ # These chars mess up display in the terminal
+ $arg =~ tr/[\011-\016\033\302-\365]//d;
+ my $bar_text =
+ sprintf("%d%% %d:%d=%ds %s",
+ $pctcomplete*100, $completed, $left, $eta, $arg);
+ my $terminal_width = terminal_columns();
+ my $s = sprintf("%-${terminal_width}s",
+ substr($bar_text." "x$terminal_width,
+ 0,$terminal_width));
+ my $width = int($terminal_width * $pctcomplete);
+ substr($s,$width,0) = $reset;
+ my $zenity = sprintf("%-${terminal_width}s",
+ substr("# $eta sec $arg",
+ 0,$terminal_width));
+ $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header
+ "\r" . $rev . $s . $reset;
+ return $s;
+ }
+}
+
+{
+ my ($columns,$last_column_time);
+
+ sub terminal_columns {
+ # Get the number of columns of the terminal.
+ # Only update once per second.
+ # Returns:
+ # number of columns of the screen
+ if(not $columns or $last_column_time < time) {
+ $last_column_time = time;
+ $columns = $ENV{'COLUMNS'};
+ if(not $columns) {
+ my $stty = qx{ stty -a /dev/null' };
+ $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; };
+ }
+ $columns ||= 80;
+ }
+ return $columns;
+ }
+}
+
+sub get_job_with_sshlogin {
+ # Input:
+ # $sshlogin = which host should the job be run on?
+ # Uses:
+ # $opt::hostgroups
+ # $Global::JobQueue
+ # Returns:
+ # $job = next job object for $sshlogin if any available
+ my $sshlogin = shift;
+ my $job;
+
+ if ($opt::hostgroups) {
+ my @other_hostgroup_jobs = ();
+
+ while($job = $Global::JobQueue->get()) {
+ if($sshlogin->in_hostgroups($job->hostgroups())) {
+ # Found a job to be run on a hostgroup of this
+ # $sshlogin
+ last;
+ } else {
+ # This job was not in the hostgroups of $sshlogin
+ push @other_hostgroup_jobs, $job;
+ }
+ }
+ $Global::JobQueue->unget(@other_hostgroup_jobs);
+ if(not defined $job) {
+ # No more jobs
+ return undef;
+ }
+ } else {
+ $job = $Global::JobQueue->get();
+ if(not defined $job) {
+ # No more jobs
+ ::debug("start", "No more jobs: JobQueue empty\n");
+ return undef;
+ }
+ }
+
+ my $clean_command = $job->replaced();
+ if($clean_command =~ /^\s*$/) {
+ # Do not run empty lines
+ if(not $Global::JobQueue->empty()) {
+ return get_job_with_sshlogin($sshlogin);
+ } else {
+ return undef;
+ }
+ }
+ $job->set_sshlogin($sshlogin);
+ if($opt::retries and $clean_command and
+ $job->failed_here()) {
+ # This command with these args failed for this sshlogin
+ my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed();
+ # Only look at the Global::host that have > 0 jobslots
+ if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host
+ and $job->failed_here() == $min_failures) {
+ # It failed the same or more times on another host:
+ # run it on this host
+ } else {
+ # If it failed fewer times on another host:
+ # Find another job to run
+ my $nextjob;
+ if(not $Global::JobQueue->empty()) {
+ # This can potentially recurse for all args
+ no warnings 'recursion';
+ $nextjob = get_job_with_sshlogin($sshlogin);
+ }
+ # Push the command back on the queue
+ $Global::JobQueue->unget($job);
+ return $nextjob;
+ }
+ }
+ return $job;
+}
+
+sub __REMOTE_SSH__ {}
+
+sub read_sshloginfiles {
+ # Read a list of --slf's
+ # Input:
+ # @files = files or symbolic file names to read
+ # Returns: N/A
+ for my $s (@_) {
+ read_sshloginfile(expand_slf_shorthand($s));
+ }
+}
+
+sub expand_slf_shorthand {
+ # Expand --slf shorthand into a read file name
+ # Input:
+ # $file = file or symbolic file name to read
+ # Returns:
+ # $file = actual file name to read
+ my $file = shift;
+ if($file eq "-") {
+ # skip: It is stdin
+ } elsif($file eq "..") {
+ $file = $ENV{'HOME'}."/.parallel/sshloginfile";
+ } elsif($file eq ".") {
+ $file = "/etc/parallel/sshloginfile";
+ } elsif(not -r $file) {
+ if(not -r $ENV{'HOME'}."/.parallel/".$file) {
+ # Try prepending ~/.parallel
+ ::error("Cannot open $file.\n");
+ ::wait_and_exit(255);
+ } else {
+ $file = $ENV{'HOME'}."/.parallel/".$file;
+ }
+ }
+ return $file;
+}
+
+sub read_sshloginfile {
+ # Read sshloginfile into @Global::sshlogin
+ # Input:
+ # $file = file to read
+ # Uses:
+ # @Global::sshlogin
+ # Returns: N/A
+ my $file = shift;
+ my $close = 1;
+ my $in_fh;
+ ::debug("init","--slf ",$file);
+ if($file eq "-") {
+ $in_fh = *STDIN;
+ $close = 0;
+ } else {
+ if(not open($in_fh, "<", $file)) {
+ # Try the filename
+ ::error("Cannot open $file.\n");
+ ::wait_and_exit(255);
+ }
+ }
+ while(<$in_fh>) {
+ chomp;
+ /^\s*#/ and next;
+ /^\s*$/ and next;
+ push @Global::sshlogin, $_;
+ }
+ if($close) {
+ close $in_fh;
+ }
+}
+
+sub parse_sshlogin {
+ # Parse @Global::sshlogin into %Global::host.
+ # Keep only hosts that are in one of the given ssh hostgroups.
+ # Uses:
+ # @Global::sshlogin
+ # $Global::minimal_command_line_length
+ # %Global::host
+ # $opt::transfer
+ # @opt::return
+ # $opt::cleanup
+ # @opt::basefile
+ # @opt::trc
+ # Returns: N/A
+ my @login;
+ if(not @Global::sshlogin) { @Global::sshlogin = (":"); }
+ for my $sshlogin (@Global::sshlogin) {
+ # Split up -S sshlogin,sshlogin
+ for my $s (split /,|\n/, $sshlogin) {
+ if ($s eq ".." or $s eq "-") {
+ # This may add to @Global::sshlogin - possibly bug
+ read_sshloginfile(expand_slf_shorthand($s));
+ } else {
+ $s =~ s/\s*$//;
+ push (@login, $s);
+ }
+ }
+ }
+ $Global::minimal_command_line_length = 8_000_000;
+ my @allowed_hostgroups;
+ for my $ncpu_sshlogin_string (::uniq(@login)) {
+ my $sshlogin = SSHLogin->new($ncpu_sshlogin_string);
+ my $sshlogin_string = $sshlogin->string();
+ if($sshlogin_string eq "") {
+ # This is an ssh group: -S @webservers
+ push @allowed_hostgroups, $sshlogin->hostgroups();
+ next;
+ }
+ if($Global::host{$sshlogin_string}) {
+ # This sshlogin has already been added:
+ # It is probably a host that has come back
+ # Set the max_jobs_running back to the original
+ debug("run","Already seen $sshlogin_string\n");
+ if($sshlogin->{'ncpus'}) {
+ # If ncpus set by '#/' of the sshlogin, overwrite it:
+ $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus());
+ }
+ $Global::host{$sshlogin_string}->set_max_jobs_running(undef);
+ next;
+ }
+ if($sshlogin_string eq ":") {
+ $sshlogin->set_maxlength(Limits::Command::max_length());
+ } else {
+ # If all chars needs to be quoted, every other character will be \
+ $sshlogin->set_maxlength(int(Limits::Command::max_length()/2));
+ }
+ $Global::minimal_command_line_length =
+ ::min($Global::minimal_command_line_length, $sshlogin->maxlength());
+ $Global::host{$sshlogin_string} = $sshlogin;
+ }
+ if(@allowed_hostgroups) {
+ # Remove hosts that are not in these groups
+ while (my ($string, $sshlogin) = each %Global::host) {
+ if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) {
+ delete $Global::host{$string};
+ }
+ }
+ }
+
+ # debug("start", "sshlogin: ", my_dump(%Global::host),"\n");
+ if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) {
+ if(not remote_hosts()) {
+ # There are no remote hosts
+ if(@opt::trc) {
+ ::warning("--trc ignored as there are no remote --sshlogin.\n");
+ } elsif (defined $opt::transfer) {
+ ::warning("--transfer ignored as there are no remote --sshlogin.\n");
+ } elsif (@opt::return) {
+ ::warning("--return ignored as there are no remote --sshlogin.\n");
+ } elsif (defined $opt::cleanup) {
+ ::warning("--cleanup ignored as there are no remote --sshlogin.\n");
+ } elsif (@opt::basefile) {
+ ::warning("--basefile ignored as there are no remote --sshlogin.\n");
+ }
+ }
+ }
+}
+
+sub remote_hosts {
+ # Return sshlogins that are not ':'
+ # Uses:
+ # %Global::host
+ # Returns:
+ # list of sshlogins with ':' removed
+ return grep !/^:$/, keys %Global::host;
+}
+
+sub setup_basefile {
+ # Transfer basefiles to each $sshlogin
+ # This needs to be done before first jobs on $sshlogin is run
+ # Uses:
+ # %Global::host
+ # @opt::basefile
+ # Returns: N/A
+ my $cmd = "";
+ my $rsync_destdir;
+ my $workdir;
+ for my $sshlogin (values %Global::host) {
+ if($sshlogin->string() eq ":") { next }
+ for my $file (@opt::basefile) {
+ if($file !~ m:^/: and $opt::workdir eq "...") {
+ ::error("Work dir '...' will not work with relative basefiles.\n");
+ ::wait_and_exit(255);
+ }
+ $workdir ||= Job->new("")->workdir();
+ $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&";
+ }
+ }
+ $cmd .= "wait;";
+ debug("init", "basesetup: $cmd\n");
+ print `$cmd`;
+}
+
+sub cleanup_basefile {
+ # Remove the basefiles transferred
+ # Uses:
+ # %Global::host
+ # @opt::basefile
+ # Returns: N/A
+ my $cmd = "";
+ my $workdir = Job->new("")->workdir();
+ for my $sshlogin (values %Global::host) {
+ if($sshlogin->string() eq ":") { next }
+ for my $file (@opt::basefile) {
+ $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&";
+ }
+ }
+ $cmd .= "wait;";
+ debug("init", "basecleanup: $cmd\n");
+ print `$cmd`;
+}
+
+sub filter_hosts {
+ # Remove down --sshlogins from active duty.
+ # Find ncpus, ncores, maxlen, time-to-login for each host.
+ # Uses:
+ # %Global::host
+ # $Global::minimal_command_line_length
+ # $opt::use_cpus_instead_of_cores
+ # Returns: N/A
+
+ my ($ncores_ref, $ncpus_ref, $time_to_login_ref, $maxlen_ref,
+ $echo_ref, $down_hosts_ref) =
+ parse_host_filtering(parallelized_host_filtering());
+
+ delete @Global::host{@$down_hosts_ref};
+ @$down_hosts_ref and ::warning("Removed @$down_hosts_ref\n");
+
+ $Global::minimal_command_line_length = 8_000_000;
+ while (my ($sshlogin, $obj) = each %Global::host) {
+ if($sshlogin eq ":") { next }
+ $ncpus_ref->{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin());
+ $ncores_ref->{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin());
+ $time_to_login_ref->{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin());
+ $maxlen_ref->{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin());
+ if($opt::use_cpus_instead_of_cores) {
+ $obj->set_ncpus($ncpus_ref->{$sshlogin});
+ } else {
+ $obj->set_ncpus($ncores_ref->{$sshlogin});
+ }
+ $obj->set_time_to_login($time_to_login_ref->{$sshlogin});
+ $obj->set_maxlength($maxlen_ref->{$sshlogin});
+ $Global::minimal_command_line_length =
+ ::min($Global::minimal_command_line_length,
+ int($maxlen_ref->{$sshlogin}/2));
+ ::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus_ref->{$sshlogin},
+ " ncores:", $ncores_ref->{$sshlogin},
+ " time_to_login:", $time_to_login_ref->{$sshlogin},
+ " maxlen:", $maxlen_ref->{$sshlogin},
+ " min_max_len:", $Global::minimal_command_line_length,"\n");
+ }
+}
+
+sub parse_host_filtering {
+ # Input:
+ # @lines = output from parallelized_host_filtering()
+ # Returns:
+ # \%ncores = number of cores of {host}
+ # \%ncpus = number of cpus of {host}
+ # \%time_to_login = time_to_login on {host}
+ # \%maxlen = max command len on {host}
+ # \%echo = echo received from {host}
+ # \@down_hosts = list of hosts with no answer
+ my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts);
+
+ for (@_) {
+ chomp;
+ my @col = split /\t/, $_;
+ if(defined $col[6]) {
+ # This is a line from --joblog
+ # seq host time spent sent received exit signal command
+ # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores
+ if($col[0] eq "Seq" and $col[1] eq "Host" and
+ $col[2] eq "Starttime") {
+ # Header => skip
+ next;
+ }
+ # Get server from: eval true server\;
+ $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]");
+ my $host = $1;
+ $host =~ tr/\\//d;
+ $Global::host{$host} or next;
+ if($col[6] eq "255" or $col[7] eq "15") {
+ # exit == 255 or signal == 15: ssh failed
+ # Remove sshlogin
+ ::debug("init", "--filtered $host\n");
+ push(@down_hosts, $host);
+ } elsif($col[6] eq "127") {
+ # signal == 127: parallel not installed remote
+ # Set ncpus and ncores = 1
+ ::warning("Could not figure out ",
+ "number of cpus on $host. Using 1.\n");
+ $ncores{$host} = 1;
+ $ncpus{$host} = 1;
+ $maxlen{$host} = Limits::Command::max_length();
+ } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) {
+ # Remember how log it took to log in
+ # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo
+ $time_to_login{$host} = ::min($time_to_login{$host},$col[3]);
+ } else {
+ ::die_bug("host check unmatched long jobline: $_");
+ }
+ } elsif($Global::host{$col[0]}) {
+ # This output from --number-of-cores, --number-of-cpus,
+ # --max-line-length-allowed
+ # ncores: server 8
+ # ncpus: server 2
+ # maxlen: server 131071
+ if(not $ncores{$col[0]}) {
+ $ncores{$col[0]} = $col[1];
+ } elsif(not $ncpus{$col[0]}) {
+ $ncpus{$col[0]} = $col[1];
+ } elsif(not $maxlen{$col[0]}) {
+ $maxlen{$col[0]} = $col[1];
+ } elsif(not $echo{$col[0]}) {
+ $echo{$col[0]} = $col[1];
+ } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) {
+ # Skip these:
+ # perl: warning: Setting locale failed.
+ # perl: warning: Please check that your locale settings:
+ # LANGUAGE = (unset),
+ # LC_ALL = (unset),
+ # LANG = "en_US.UTF-8"
+ # are supported and installed on your system.
+ # perl: warning: Falling back to the standard locale ("C").
+ } else {
+ ::die_bug("host check too many col0: $_");
+ }
+ } else {
+ ::die_bug("host check unmatched short jobline ($col[0]): $_");
+ }
+ }
+ @down_hosts = uniq(@down_hosts);
+ return(\%ncores, \%ncpus, \%time_to_login, \%maxlen, \%echo, \@down_hosts);
+}
+
+sub parallelized_host_filtering {
+ # Uses:
+ # $Global::envvar
+ # %Global::host
+ # Returns:
+ # text entries with:
+ # * joblog line
+ # * hostname \t number of cores
+ # * hostname \t number of cpus
+ # * hostname \t max-line-length-allowed
+ # * hostname \t empty
+ my(@cores, @cpus, @maxline, @echo);
+ my $envvar = ::shell_quote_scalar($Global::envvar);
+ while (my ($host, $sshlogin) = each %Global::host) {
+ if($host eq ":") { next }
+ # The 'true' is used to get the $host out later
+ my $sshcmd = "true $host; exec " . $sshlogin->sshcommand()." ".$sshlogin->serverlogin();
+ push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0");
+ push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0");
+ push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0");
+ # 'echo' is used to get the best possible value for an ssh login time
+ push(@echo, $host."\t".$sshcmd." echo\n\0");
+ }
+ my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh");
+ print $fh @cores, @cpus, @maxline, @echo;
+ close $fh;
+ # --timeout 5: Setting up an SSH connection and running a simple
+ # command should never take > 5 sec.
+ # --delay 0.1: If multiple sshlogins use the same proxy the delay
+ # will make it less likely to overload the ssh daemon.
+ # --retries 3: If the ssh daemon it overloaded, try 3 times
+ # -s 16000: Half of the max line on UnixWare
+ # TODO sh -c wrapper to work in csh
+ my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null";
+ ::debug("init", $cmd, "\n");
+ my @out;
+ my $prepend = "";
+ open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd");
+ for(<$host_fh>) {
+ if(/\'$/) {
+ # if last char = ' then append next line
+ # This may be due to quoting of $Global::envvar
+ $prepend .= $_;
+ next;
+ }
+ $_ = $prepend . $_;
+ $prepend = "";
+ push @out, $_;
+ }
+ close $host_fh;
+ $Global::debug or unlink $tmpfile;
+ return @out;
+}
+
+sub onall {
+ # Runs @command on all hosts.
+ # Uses parallel to run @command on each host.
+ # --jobs = number of hosts to run on simultaneously.
+ # For each host a parallel command with the args will be running.
+ # Uses:
+ # $Global::quoting
+ # @opt::basefile
+ # $opt::jobs
+ # $opt::linebuffer
+ # $opt::ungroup
+ # $opt::group
+ # $opt::keeporder
+ # $opt::D
+ # $opt::plain
+ # $opt::max_chars
+ # $opt::linebuffer
+ # $opt::files
+ # $opt::colsep
+ # $opt::timeout
+ # $opt::plain
+ # $opt::retries
+ # $opt::max_chars
+ # $opt::arg_sep
+ # $opt::arg_file_sep
+ # @opt::v
+ # @opt::env
+ # %Global::host
+ # $Global::exitstatus
+ # $Global::debug
+ # $Global::joblog
+ # $opt::tag
+ # $opt::joblog
+ # Input:
+ # @command = command to run on all hosts
+ # Returns: N/A
+ sub tmp_joblog {
+ # Input:
+ # $joblog = filename of joblog - undef if none
+ # Returns:
+ # $tmpfile = temp file for joblog - undef if none
+ my $joblog = shift;
+ if(not defined $joblog) {
+ return undef;
+ }
+ my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log");
+ close $fh;
+ return $tmpfile;
+ }
+ my ($input_source_fh_ref,@command) = @_;
+ if($Global::quoting) {
+ @command = shell_quote_empty(@command);
+ }
+
+ # Copy all @input_source_fh (-a and :::) into tempfiles
+ my @argfiles = ();
+ for my $fh (@$input_source_fh_ref) {
+ my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1);
+ print $outfh (<$fh>);
+ close $outfh;
+ push @argfiles, $name;
+ }
+ if(@opt::basefile) { setup_basefile(); }
+ # for each sshlogin do:
+ # parallel -S $sshlogin $command :::: @argfiles
+ #
+ # Pass some of the options to the sub-parallels, not all of them as
+ # -P should only go to the first, and -S should not be copied at all.
+ my $options =
+ join(" ",
+ ((defined $opt::jobs) ? "-P $opt::jobs" : ""),
+ ((defined $opt::linebuffer) ? "--linebuffer" : ""),
+ ((defined $opt::ungroup) ? "-u" : ""),
+ ((defined $opt::group) ? "-g" : ""),
+ ((defined $opt::keeporder) ? "--keeporder" : ""),
+ ((defined $opt::D) ? "-D $opt::D" : ""),
+ ((defined $opt::plain) ? "--plain" : ""),
+ ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
+ );
+ my $suboptions =
+ join(" ",
+ ((defined $opt::ungroup) ? "-u" : ""),
+ ((defined $opt::linebuffer) ? "--linebuffer" : ""),
+ ((defined $opt::group) ? "-g" : ""),
+ ((defined $opt::files) ? "--files" : ""),
+ ((defined $opt::keeporder) ? "--keeporder" : ""),
+ ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""),
+ ((@opt::v) ? "-vv" : ""),
+ ((defined $opt::D) ? "-D $opt::D" : ""),
+ ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""),
+ ((defined $opt::plain) ? "--plain" : ""),
+ ((defined $opt::retries) ? "--retries ".$opt::retries : ""),
+ ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""),
+ ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""),
+ ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""),
+ (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""),
+ );
+ ::debug("init", "| $0 $options\n");
+ open(my $parallel_fh, "|-", "$0 --will-cite -j0 $options") ||
+ ::die_bug("This does not run GNU Parallel: $0 $options");
+ my @joblogs;
+ for my $host (sort keys %Global::host) {
+ my $sshlogin = $Global::host{$host};
+ my $joblog = tmp_joblog($opt::joblog);
+ if($joblog) {
+ push @joblogs, $joblog;
+ $joblog = "--joblog $joblog";
+ }
+ my $quad = $opt::arg_file_sep || "::::";
+ ::debug("init", "$0 $suboptions -j1 $joblog ",
+ ((defined $opt::tag) ?
+ "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""),
+ " -S ", shell_quote_scalar($sshlogin->string())," ",
+ join(" ",shell_quote(@command))," $quad @argfiles\n");
+ print $parallel_fh "$0 $suboptions -j1 $joblog ",
+ ((defined $opt::tag) ?
+ "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""),
+ " -S ", shell_quote_scalar($sshlogin->string())," ",
+ join(" ",shell_quote(@command))," $quad @argfiles\n";
+ }
+ close $parallel_fh;
+ $Global::exitstatus = $? >> 8;
+ debug("init", "--onall exitvalue ", $?);
+ if(@opt::basefile) { cleanup_basefile(); }
+ $Global::debug or unlink(@argfiles);
+ my %seen;
+ for my $joblog (@joblogs) {
+ # Append to $joblog
+ open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog");
+ # Skip first line (header);
+ <$fh>;
+ print $Global::joblog (<$fh>);
+ close $fh;
+ unlink($joblog);
+ }
+}
+
+sub __SIGNAL_HANDLING__ {}
+
+sub save_original_signal_handler {
+ # Remember the original signal handler
+ # Uses:
+ # %Global::original_sig
+ # Returns: N/A
+ $SIG{INT} = sub {
+ if($opt::tmux) { qx { tmux kill-session -t p$$ }; }
+ wait_and_exit(255);
+ };
+ $SIG{TERM} = sub {
+ if($opt::tmux) { qx { tmux kill-session -t p$$ }; }
+ wait_and_exit(255);
+ };
+ %Global::original_sig = %SIG;
+ $SIG{TERM} = sub {}; # Dummy until jobs really start
+ $SIG{ALRM} = 'IGNORE';
+}
+
+sub list_running_jobs {
+ # Print running jobs on tty
+ # Uses:
+ # %Global::running
+ # Returns: N/A
+ for my $job (values %Global::running) {
+ ::status("$Global::progname: ",$job->replaced(),"\n");
+ }
+}
+
+sub start_no_new_jobs {
+ # Start no more jobs
+ # Uses:
+ # %Global::original_sig
+ # %Global::unlink
+ # $Global::start_no_new_jobs
+ # Returns: N/A
+ $SIG{TERM} = $Global::original_sig{TERM};
+ unlink keys %Global::unlink;
+ ::status
+ ("$Global::progname: SIGTERM received. No new jobs will be started.\n",
+ "$Global::progname: Waiting for these ", scalar(keys %Global::running),
+ " jobs to finish. Send SIGTERM again to stop now.\n");
+ list_running_jobs();
+ $Global::start_no_new_jobs ||= 1;
+}
+
+sub reaper {
+ # A job finished.
+ # Print the output.
+ # Start another job
+ # Uses:
+ # %Global::sshmaster
+ # %Global::running
+ # $Global::tty_taken
+ # @Global::slots
+ # $opt::timeout
+ # $Global::timeoutq
+ # $opt::halt
+ # $opt::keeporder
+ # $Global::total_running
+ # Returns:
+ # $children_reaped = number of children finished
+ my $stiff;
+ my $children_reaped = 0;
+ debug("run", "Reaper ");
+ while (($stiff = waitpid(-1, &WNOHANG)) > 0) {
+ # $stiff = pid of dead process
+ $children_reaped++;
+ if($Global::sshmaster{$stiff}) {
+ # This is one of the ssh -M: ignore
+ next;
+ }
+ my $job = $Global::running{$stiff};
+ # '-a <(seq 10)' will give us a pid not in %Global::running
+ $job or next;
+ $job->set_exitstatus($? >> 8);
+ $job->set_exitsignal($? & 127);
+ debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")");
+ $job->set_endtime(::now());
+ if($stiff == $Global::tty_taken) {
+ # The process that died had the tty => release it
+ $Global::tty_taken = 0;
+ }
+
+ if(not $job->should_be_retried()) {
+ # The job is done
+ # Free the jobslot
+ push @Global::slots, $job->slot();
+ if($opt::timeout) {
+ # Update average runtime for timeout
+ $Global::timeoutq->update_median_runtime($job->runtime());
+ }
+ # Force printing now if --halt forces us to exit
+ my $print_now =
+ ($opt::halt and
+ (($opt::halt == 2 and $job->exitstatus())
+ or
+ ($opt::halt == -2 and not $job->exitstatus())));
+ if($opt::keeporder and not $print_now) {
+ $job->print_earlier_jobs();
+ } else {
+ $job->print();
+ }
+ $job->should_we_halt();
+ }
+ my $sshlogin = $job->sshlogin();
+ $sshlogin->dec_jobs_running();
+ $sshlogin->inc_jobs_completed();
+ $Global::total_running--;
+ delete $Global::running{$stiff};
+ start_more_jobs();
+ if($opt::progress) {
+ my %progress = progress();
+ ::status("\r",$progress{'status'});
+ }
+ }
+ debug("run", "done ");
+ return $children_reaped;
+}
+
+sub __USAGE__ {}
+
+sub killall {
+ # Kill all jobs
+ # Send all jobs TERM
+ # Wait
+ # Send all jobs TERM
+ # Wait
+ # Send all jobs KILL
+ # Send all (grand*)children KILL
+
+ $Global::start_no_new_jobs ||= 1;
+ # pids of the all children and (grand*)children
+ # before we start the blood bath
+ my @family_pids = family_pids(keys %Global::running);
+ # Send jobs TERM
+ ::debug("kill","TERM ", join(' ',keys %Global::running),"\n");
+ kill "TERM", keys %Global::running;
+ # Wait up to 200 ms
+ my $sleepsum = 0;
+ my $sleep = 0.001;
+ for (; kill(0, keys %Global::running) and $sleepsum < 200;
+ $sleepsum += $sleep) {
+ # This can change %Global::running
+ ::debug("kill","Slept $sleepsum\n");
+ if((my $stiff = waitpid(-1, &WNOHANG)) > 0) {
+ ::debug("kill","reaped $stiff\n");
+
+ $Global::total_running--;
+ delete $Global::running{$stiff};
+ $sleep = $sleep/2+0.001;
+ }
+ $sleep *= 1.1;
+ ::usleep($sleep);
+ }
+ # Send jobs TERM (again)
+ ::debug("kill","TERM ", join(' ',keys %Global::running),"\n");
+ kill "TERM", keys %Global::running;
+ # Wait up to 200 ms
+ $sleepsum = 0;
+ $sleep = 0.001;
+ for (; kill(0, keys %Global::running) and $sleepsum < 200;
+ $sleepsum += $sleep) {
+ # This can change %Global::running
+ if((my $stiff = waitpid(-1, &WNOHANG)) > 0) {
+ $Global::total_running--;
+ delete $Global::running{$stiff};
+ $sleep = $sleep/2+0.001;
+ }
+ $sleep *= 1.1;
+ ::usleep($sleep);
+ }
+ # Send jobs KILL
+ ::debug("kill","KILL ", join(' ',keys %Global::running),"\n");
+ kill "KILL", keys %Global::running;
+ # Send all (grand*)children KILL (if there are any left)
+ ::debug("kill","KILL @family_pids\n");
+ kill "KILL", @family_pids;
+}
+
+sub family_pids {
+ # Find the pids with this->pid as (grand)*parent
+ # Input:
+ # @parents = pids of parents
+ # Returns:
+ # @pids = pids of (grand)*children
+ my @parents = @_;
+ my @pids;
+
+ my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table();
+
+ my @more = @parents;
+ # While more (grand)*children
+ while(@more) {
+ my @m;
+ push @pids, @more;
+ for my $parent (@more) {
+ if($children_of_ref->{$parent}) {
+ # add the children of this parent
+ push @m, @{$children_of_ref->{$parent}};
+ }
+ }
+ @more = @m;
+ }
+ return (@pids);
+}
+
+sub wait_and_exit {
+ # If we do not wait, we sometimes get segfault
+ # Returns: N/A
+ my $error = shift;
+ unlink keys %Global::unlink;
+ if($error) {
+ # Kill all without printing
+ killall();
+ }
+ for (keys %Global::unkilled_children) {
+ kill 9, $_;
+ waitpid($_,0);
+ delete $Global::unkilled_children{$_};
+ }
+ wait();
+ exit($error);
+}
+
+sub die_usage {
+ # Returns: N/A
+ usage();
+ wait_and_exit(255);
+}
+
+sub usage {
+ # Returns: N/A
+ print join
+ ("\n",
+ "Usage:",
+ "",
+ "$Global::progname [options] [command [arguments]] < list_of_arguments",
+ "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...",
+ "cat ... | $Global::progname --pipe [options] [command [arguments]]",
+ "",
+ "-j n Run n jobs in parallel",
+ "-k Keep same order",
+ "-X Multiple arguments with context replace",
+ "--colsep regexp Split input on regexp for positional replacements",
+ "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings",
+ "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings",
+ "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =",
+ " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}",
+ "",
+ "-S sshlogin Example: foo\@server.example.com",
+ "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins",
+ "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup",
+ "--onall Run the given command with argument on all sshlogins",
+ "--nonall Run the given command with no arguments on all sshlogins",
+ "",
+ "--pipe Split stdin (standard input) to multiple jobs.",
+ "--recend str Record end separator for --pipe.",
+ "--recstart str Record start separator for --pipe.",
+ "",
+ "See 'man $Global::progname' for details",
+ "",
+ "Academic tradition requires you to cite works you base your article on.",
+ "When using programs that use GNU Parallel to process data for publication",
+ "please cite:",
+ "",
+ " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,",
+ " ;login: The USENIX Magazine, February 2011:42-47.",
+ "",
+ "This helps funding further development; and it won't cost you a cent.",
+ "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.\n",
+ "");
+}
+
+
+sub citation_notice {
+ # if --will-cite or --plain: do nothing
+ # if stderr redirected: do nothing
+ # if ~/.parallel/will-cite: do nothing
+ # else: print citation notice to stderr
+ if($opt::willcite
+ or
+ $opt::plain
+ or
+ not -t $Global::original_stderr
+ or
+ -e $ENV{'HOME'}."/.parallel/will-cite") {
+ # skip
+ } else {
+ ::status
+ ("Academic tradition requires you to cite works you base your article on.\n",
+ "When using programs that use GNU Parallel to process data for publication\n",
+ "please cite:\n",
+ "\n",
+ " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n",
+ " ;login: The USENIX Magazine, February 2011:42-47.\n",
+ "\n",
+ "This helps funding further development; and it won't cost you a cent.\n",
+ "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.\n",
+ "\n",
+ "To silence the citation notice: run 'parallel --bibtex'.\n\n",
+ );
+ }
+}
+
+sub status {
+ my @w = @_;
+ my $fh = $Global::status_fd || *STDERR;
+ print $fh @w;
+ flush $fh;
+}
+
+sub warning {
+ my @w = @_;
+ my $fh = $Global::status_fd || *STDERR;
+ my $prog = $Global::progname || "parallel";
+ print $fh $prog, ": Warning: ", @w;
+}
+
+sub error {
+ my @w = @_;
+ my $fh = $Global::status_fd || *STDERR;
+ my $prog = $Global::progname || "parallel";
+ print $fh $prog, ": Error: ", @w;
+}
+
+sub die_bug {
+ my $bugid = shift;
+ print STDERR
+ ("$Global::progname: This should not happen. You have found a bug.\n",
+ "Please contact and include:\n",
+ "* The version number: $Global::version\n",
+ "* The bugid: $bugid\n",
+ "* The command line being run\n",
+ "* The files being read (put the files on a webserver if they are big)\n",
+ "\n",
+ "If you get the error on smaller/fewer files, please include those instead.\n");
+ ::wait_and_exit(255);
+}
+
+sub version {
+ # Returns: N/A
+ print join("\n",
+ "GNU $Global::progname $Global::version",
+ "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange",
+ "and Free Software Foundation, Inc.",
+ "License GPLv3+: GNU GPL version 3 or later ",
+ "This is free software: you are free to change and redistribute it.",
+ "GNU $Global::progname comes with no warranty.",
+ "",
+ "Web site: http://www.gnu.org/software/${Global::progname}\n",
+ "When using programs that use GNU Parallel to process data for publication",
+ "please cite as described in 'parallel --bibtex'.\n",
+ );
+}
+
+sub bibtex {
+ # Returns: N/A
+ print join("\n",
+ "Academic tradition requires you to cite works you base your article on.",
+ "When using programs that use GNU Parallel to process data for publication",
+ "please cite:",
+ "",
+ "\@article{Tange2011a,",
+ " title = {GNU Parallel - The Command-Line Power Tool},",
+ " author = {O. Tange},",
+ " address = {Frederiksberg, Denmark},",
+ " journal = {;login: The USENIX Magazine},",
+ " month = {Feb},",
+ " number = {1},",
+ " volume = {36},",
+ " url = {http://www.gnu.org/s/parallel},",
+ " year = {2011},",
+ " pages = {42-47}",
+ " doi = {10.5281/zenodo.16303}",
+ "}",
+ "",
+ "(Feel free to use \\nocite{Tange2011a})",
+ "",
+ "This helps funding further development; and it won't cost you a cent.",
+ "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.",
+ "",
+ "If you send a copy of your published article to tange\@gnu.org, it will be",
+ "mentioned in the release notes of next version of GNU Parallel.\n\n",
+ );
+ while(not -e $ENV{'HOME'}."/.parallel/will-cite") {
+ print "\nType: 'will cite' and press enter.\n> ";
+ my $input = ;
+ if($input =~ /will cite/i) {
+ mkdir $ENV{'HOME'}."/.parallel";
+ if(open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite")) {
+ close $fh;
+ print "\nThank you for your support. It is much appreciated. The citation\n",
+ "notice is now silenced. You may also use '--will-cite'.\n",
+ "If you use '--will-cite' in scripts you are expected to pay\n",
+ "the 10000 EUR, because you are making it harder to see the\n",
+ "citation notice.\n\n";
+ } else {
+ print "\nThank you for your support. It is much appreciated. The citation\n",
+ "cannot permanently be silenced. Use '--will-cite' instead.\n",
+ "If you use '--will-cite' in scripts you are expected to pay\n",
+ "the 10000 EUR, because you are making it harder to see the\n",
+ "citation notice.\n\n";
+ last;
+ }
+ }
+ }
+}
+
+sub show_limits {
+ # Returns: N/A
+ print("Maximal size of command: ",Limits::Command::real_max_length(),"\n",
+ "Maximal used size of command: ",Limits::Command::max_length(),"\n",
+ "\n",
+ "Execution of will continue now, and it will try to read its input\n",
+ "and run commands; if this is not what you wanted to happen, please\n",
+ "press CTRL-D or CTRL-C\n");
+}
+
+sub __GENERIC_COMMON_FUNCTION__ {}
+
+sub tmpfile {
+ # Create tempfile as $TMPDIR/parXXXXX
+ # Returns:
+ # $filehandle = opened file handle
+ # $filename = file name created
+ return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_);
+}
+
+sub tmpname {
+ # Select a name that does not exist
+ # Do not create the file as that may cause problems
+ # if you ssh to localhost (or a shared file system) under a different name
+ my $name = shift;
+ my($tmpname);
+ do {
+ $tmpname = $ENV{'TMPDIR'}."/".$name.
+ join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
+ } while($Global::unlink{$tmpname}++ or -e $tmpname);
+ return $tmpname;
+}
+
+sub tmpfifo {
+ # Securely make a fifo by securely making a dir with a fifo in it
+ use POSIX qw(mkfifo);
+ my $tmpfifo = tmpname("fif",@_);
+ mkfifo($tmpfifo,0600);
+ return $tmpfifo;
+}
+
+sub uniq {
+ # Remove duplicates and return unique values
+ return keys %{{ map { $_ => 1 } @_ }};
+}
+
+sub min {
+ # Returns:
+ # Minimum value of array
+ my $min;
+ for (@_) {
+ # Skip undefs
+ defined $_ or next;
+ defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef
+ $min = ($min < $_) ? $min : $_;
+ }
+ return $min;
+}
+
+sub max {
+ # Returns:
+ # Maximum value of array
+ my $max;
+ for (@_) {
+ # Skip undefs
+ defined $_ or next;
+ defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef
+ $max = ($max > $_) ? $max : $_;
+ }
+ return $max;
+}
+
+sub sum {
+ # Returns:
+ # Sum of values of array
+ my @args = @_;
+ my $sum = 0;
+ for (@args) {
+ # Skip undefs
+ $_ and do { $sum += $_; }
+ }
+ return $sum;
+}
+
+sub undef_as_zero {
+ my $a = shift;
+ return $a ? $a : 0;
+}
+
+sub undef_as_empty {
+ my $a = shift;
+ return $a ? $a : "";
+}
+
+sub multiply_binary_prefix {
+ # Evalualte numbers with binary prefix
+ # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80
+ # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80
+ # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80
+ # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24
+ # 13G = 13*1024*1024*1024 = 13958643712
+ # Input:
+ # $s = string with prefixes
+ # Returns:
+ # $value = int with prefixes multiplied
+ my $s = shift;
+ if(not $s) {
+ return $s;
+ }
+ $s =~ s/ki/*1024/gi;
+ $s =~ s/mi/*1024*1024/gi;
+ $s =~ s/gi/*1024*1024*1024/gi;
+ $s =~ s/ti/*1024*1024*1024*1024/gi;
+ $s =~ s/pi/*1024*1024*1024*1024*1024/gi;
+ $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi;
+ $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi;
+ $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi;
+ $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi;
+
+ $s =~ s/K/*1024/g;
+ $s =~ s/M/*1024*1024/g;
+ $s =~ s/G/*1024*1024*1024/g;
+ $s =~ s/T/*1024*1024*1024*1024/g;
+ $s =~ s/P/*1024*1024*1024*1024*1024/g;
+ $s =~ s/E/*1024*1024*1024*1024*1024*1024/g;
+ $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g;
+ $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g;
+ $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g;
+
+ $s =~ s/k/*1000/g;
+ $s =~ s/m/*1000*1000/g;
+ $s =~ s/g/*1000*1000*1000/g;
+ $s =~ s/t/*1000*1000*1000*1000/g;
+ $s =~ s/p/*1000*1000*1000*1000*1000/g;
+ $s =~ s/e/*1000*1000*1000*1000*1000*1000/g;
+ $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g;
+ $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g;
+ $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g;
+
+ $s = eval $s;
+ ::debug($s);
+ return $s;
+}
+
+{
+ my ($disk_full_fh, $b8193, $error_printed);
+ sub exit_if_disk_full {
+ # Checks if $TMPDIR is full by writing 8kb to a tmpfile
+ # If the disk is full: Exit immediately.
+ # Returns:
+ # N/A
+ if(not $disk_full_fh) {
+ my $name;
+ ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df");
+ # Separate unlink due to NFS dealing badly with File::Temp
+ unlink $name;
+ $b8193 = "x"x8193;
+ }
+ # Linux does not discover if a disk is full if writing <= 8192
+ # Tested on:
+ # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos
+ # ntfs reiserfs tmpfs ubifs vfat xfs
+ # TODO this should be tested on different OS similar to this:
+ #
+ # doit() {
+ # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop
+ # seq 100000 | parallel --tmpdir /mnt/loop/ true &
+ # seq 6900000 > /mnt/loop/i && echo seq OK
+ # seq 6980868 > /mnt/loop/i
+ # seq 10000 > /mnt/loop/ii
+ # sleep 3
+ # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/
+ # echo >&2
+ # }
+ print $disk_full_fh $b8193;
+ if(not $disk_full_fh
+ or
+ tell $disk_full_fh != 8193) {
+ # On raspbian the disk can be full except for 10 chars.
+ if(not $error_printed) {
+ ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n");
+ ::error("Change \$TMPDIR with --tmpdir or use --compress.\n");
+ $error_printed = 1;
+ }
+ ::wait_and_exit(255);
+ }
+ truncate $disk_full_fh, 0;
+ seek($disk_full_fh, 0, 0) || die;
+ }
+}
+
+sub spacefree {
+ # Remove comments and spaces
+ # Inputs:
+ # $spaces = keep 1 space?
+ # $s = string to remove spaces from
+ # Returns:
+ # $s = with spaces removed
+ my $spaces = shift;
+ my $s = shift;
+ $s =~ s/#.*//mg;
+ if($spaces) {
+ $s =~ s/\s+/ /mg;
+ } else {
+ $s =~ s/\s//mg;
+ }
+ return $s;
+}
+
+{
+ my $hostname;
+ sub hostname {
+ if(not $hostname) {
+ $hostname = `hostname`;
+ chomp($hostname);
+ $hostname ||= "nohostname";
+ }
+ return $hostname;
+ }
+}
+
+sub which {
+ # Input:
+ # @programs = programs to find the path to
+ # Returns:
+ # @full_path = full paths to @programs. Nothing if not found
+ my @which;
+ for my $prg (@_) {
+ push(@which, grep { not -d $_ and -x $_ }
+ map { $_."/".$prg } split(":",$ENV{'PATH'}));
+ }
+ return @which;
+}
+
+{
+ my ($regexp,%fakename);
+
+ sub parent_shell {
+ # Input:
+ # $pid = pid to see if (grand)*parent is a shell
+ # Returns:
+ # $shellpath = path to shell - undef if no shell found
+ my $pid = shift;
+ if(not $regexp) {
+ # All shells known to mankind
+ #
+ # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh
+ # posh rbash rush rzsh sash sh static-sh tcsh yash zsh
+ my @shells = (qw(ash bash csh dash fdsh fish fizsh ksh
+ ksh93 mksh pdksh posh rbash rush rzsh
+ sash sh static-sh tcsh yash zsh -sh -csh),
+ '-sh (sh)' # sh on FreeBSD
+ );
+ # Can be formatted as:
+ # [sh] -sh sh busybox sh -sh (sh)
+ # /bin/sh /sbin/sh /opt/csw/sh
+ # NOT: foo.sh sshd crash flush pdflush scosh fsflush ssh
+ my $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")";
+ $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| [^(])';
+ %fakename = (
+ # sh disguises itself as -sh (sh) on FreeBSD
+ "-sh (sh)" => ["sh"],
+ # csh and tcsh disguise themselves as -sh/-csh
+ "-sh" => ["csh", "tcsh"],
+ "-csh" => ["tcsh", "csh"],
+ );
+ }
+ my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table();
+ my $shellpath;
+ my $testpid = $pid;
+ while($testpid) {
+ ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n");
+ if($name_of_ref->{$testpid} =~ /$regexp/o) {
+ ::debug("init", "which ".($3||$6)." => ");
+ $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0];
+ ::debug("init", "shell path $shellpath\n");
+ $shellpath and last;
+ }
+ if($testpid == $parent_of_ref->{$testpid}) {
+ # In Solaris zones, the PPID of the zsched process is itself
+ last;
+ }
+ $testpid = $parent_of_ref->{$testpid};
+ }
+ return $shellpath;
+ }
+}
+
+{
+ my %pid_parentpid_cmd;
+
+ sub pid_table {
+ # Returns:
+ # %children_of = { pid -> children of pid }
+ # %parent_of = { pid -> pid of parent }
+ # %name_of = { pid -> commandname }
+
+ if(not %pid_parentpid_cmd) {
+ # Filter for SysV-style `ps`
+ my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
+ q(s/^.{$s}//; print "@F[1,2] $_"' );
+ # Crazy msys: ' is not accepted on the cmd line, but " are treated as '
+ my $msys = q( ps -ef | perl -ane "1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;).
+ q(s/^.{$s}//; print qq{@F[1,2] $_}" );
+ # BSD-style `ps`
+ my $bsd = q(ps -o pid,ppid,command -ax);
+ %pid_parentpid_cmd =
+ (
+ 'aix' => $sysv,
+ 'cygwin' => $sysv,
+ 'darwin' => $bsd,
+ 'dec_osf' => $sysv,
+ 'dragonfly' => $bsd,
+ 'freebsd' => $bsd,
+ 'gnu' => $sysv,
+ 'hpux' => $sysv,
+ 'linux' => $sysv,
+ 'mirbsd' => $bsd,
+ 'msys' => $msys,
+ 'MSWin32' => $sysv,
+ 'netbsd' => $bsd,
+ 'nto' => $sysv,
+ 'openbsd' => $bsd,
+ 'solaris' => $sysv,
+ 'svr5' => $sysv,
+ 'syllable' => "echo ps not supported",
+ );
+ }
+ $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing");
+
+ my (@pidtable,%parent_of,%children_of,%name_of);
+ # Table with pid -> children of pid
+ @pidtable = `$pid_parentpid_cmd{$^O}`;
+ my $p=$$;
+ for (@pidtable) {
+ # must match: 24436 21224 busybox ash
+ # must match: 24436 21224 <>
+ # or: perl -e 'while($0=" "){}'
+ if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/
+ or
+ $^O eq "darwin" and /^\s*(\S+)\s+(\S+)\s+()$/) {
+ $parent_of{$1} = $2;
+ push @{$children_of{$2}}, $1;
+ $name_of{$1} = $3;
+ } else {
+ ::die_bug("pidtable format: $_");
+ }
+ }
+ return(\%children_of, \%parent_of, \%name_of);
+ }
+}
+
+sub now {
+ # Returns time since epoch as in seconds with 3 decimals
+ # Uses:
+ # @Global::use
+ # Returns:
+ # $time = time now with millisecond accuracy
+ if(not $Global::use{"Time::HiRes"}) {
+ if(eval "use Time::HiRes qw ( time );") {
+ eval "sub TimeHiRestime { return Time::HiRes::time };";
+ } else {
+ eval "sub TimeHiRestime { return time() };";
+ }
+ $Global::use{"Time::HiRes"} = 1;
+ }
+
+ return (int(TimeHiRestime()*1000))/1000;
+}
+
+sub usleep {
+ # Sleep this many milliseconds.
+ # Input:
+ # $ms = milliseconds to sleep
+ my $ms = shift;
+ ::debug(int($ms),"ms ");
+ select(undef, undef, undef, $ms/1000);
+}
+
+sub reap_usleep {
+ # Reap dead children.
+ # If no dead children: Sleep specified amount with exponential backoff
+ # Input:
+ # $ms = milliseconds to sleep
+ # Returns:
+ # $ms/2+0.001 if children reaped
+ # $ms*1.1 if no children reaped
+ my $ms = shift;
+ if(reaper()) {
+ # Sleep exponentially shorter (1/2^n) if a job finished
+ return $ms/2+0.001;
+ } else {
+ if($opt::timeout) {
+ $Global::timeoutq->process_timeouts();
+ }
+ if($opt::memfree) {
+ kill_youngster_if_not_enough_mem();
+ }
+ # When a child dies, wake up from sleep (or select(,,,))
+ $SIG{CHLD} = sub { kill "ALRM", $$ };
+ usleep($ms);
+ # --compress needs $SIG{CHLD} undefined
+ delete $SIG{CHLD};
+ exit_if_disk_full();
+ if($opt::linebuffer) {
+ for my $job (values %Global::running) {
+ $job->print();
+ }
+ }
+ # Sleep exponentially longer (1.1^n) if a job did not finish,
+ # though at most 1000 ms.
+ return (($ms < 1000) ? ($ms * 1.1) : ($ms));
+ }
+}
+
+sub kill_youngster_if_not_enough_mem {
+ # Check each $sshlogin if there is enough mem.
+ # If less than 50% enough free mem: kill off the youngest child
+ # Put the child back in the queue.
+ # Uses:
+ # %Global::running
+ my %jobs_of;
+ my @sshlogins;
+
+ for my $job (values %Global::running) {
+ if(not $jobs_of{$job->sshlogin()}) {
+ push @sshlogins, $job->sshlogin();
+ }
+ push @{$jobs_of{$job->sshlogin()}}, $job;
+ }
+ for my $sshlogin (@sshlogins) {
+ for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) {
+ if($sshlogin->memfree() < $opt::memfree * 0.5) {
+ ::debug("mem","\n",map { $_->seq()." " } (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}));
+ ::debug("mem","\n", $job->seq(), "killed ",
+ $sshlogin->memfree()," < ",$opt::memfree * 0.5);
+ $job->kill();
+ $sshlogin->memfree_recompute();
+ } else {
+ last;
+ }
+ }
+ ::debug("mem","Free mem OK ", $sshlogin->memfree()," > ",$opt::memfree * 0.5);
+ }
+}
+
+sub __DEBUGGING__ {}
+
+sub debug {
+ # Uses:
+ # $Global::debug
+ # %Global::fd
+ # Returns: N/A
+ $Global::debug or return;
+ @_ = grep { defined $_ ? $_ : "" } @_;
+ if($Global::debug eq "all" or $Global::debug eq $_[0]) {
+ if($Global::fd{1}) {
+ # Original stdout was saved
+ my $stdout = $Global::fd{1};
+ print $stdout @_[1..$#_];
+ } else {
+ print @_[1..$#_];
+ }
+ }
+}
+
+sub my_memory_usage {
+ # Returns:
+ # memory usage if found
+ # 0 otherwise
+ use strict;
+ use FileHandle;
+
+ my $pid = $$;
+ if(-e "/proc/$pid/stat") {
+ my $fh = FileHandle->new(";
+ chomp $data;
+ $fh->close;
+
+ my @procinfo = split(/\s+/,$data);
+
+ return undef_as_zero($procinfo[22]);
+ } else {
+ return 0;
+ }
+}
+
+sub my_size {
+ # Returns:
+ # $size = size of object if Devel::Size is installed
+ # -1 otherwise
+ my @size_this = (@_);
+ eval "use Devel::Size qw(size total_size)";
+ if ($@) {
+ return -1;
+ } else {
+ return total_size(@_);
+ }
+}
+
+sub my_dump {
+ # Returns:
+ # ascii expression of object if Data::Dump(er) is installed
+ # error code otherwise
+ my @dump_this = (@_);
+ eval "use Data::Dump qw(dump);";
+ if ($@) {
+ # Data::Dump not installed
+ eval "use Data::Dumper;";
+ if ($@) {
+ my $err = "Neither Data::Dump nor Data::Dumper is installed\n".
+ "Not dumping output\n";
+ ::status($err);
+ return $err;
+ } else {
+ return Dumper(@dump_this);
+ }
+ } else {
+ # Create a dummy Data::Dump:dump as Hans Schou sometimes has
+ # it undefined
+ eval "sub Data::Dump:dump {}";
+ eval "use Data::Dump qw(dump);";
+ return (Data::Dump::dump(@dump_this));
+ }
+}
+
+sub my_croak {
+ eval "use Carp; 1";
+ $Carp::Verbose = 1;
+ croak(@_);
+}
+
+sub my_carp {
+ eval "use Carp; 1";
+ $Carp::Verbose = 1;
+ carp(@_);
+}
+
+sub __OBJECT_ORIENTED_PARTS__ {}
+
+package SSHLogin;
+
+sub new {
+ my $class = shift;
+ my $sshlogin_string = shift;
+ my $ncpus;
+ my %hostgroups;
+ # SSHLogins can have these formats:
+ # @grp+grp/ncpu//usr/bin/ssh user@server
+ # ncpu//usr/bin/ssh user@server
+ # /usr/bin/ssh user@server
+ # user@server
+ # ncpu/user@server
+ # @grp+grp/user@server
+ if($sshlogin_string =~ s:^\@([^/]+)/?::) {
+ # Look for SSHLogin hostgroups
+ %hostgroups = map { $_ => 1 } split(/\+/, $1);
+ }
+ if ($sshlogin_string =~ s:^(\d+)/::) {
+ # Override default autodetected ncpus unless missing
+ $ncpus = $1;
+ }
+ my $string = $sshlogin_string;
+ # An SSHLogin is always in the hostgroup of its $string-name
+ $hostgroups{$string} = 1;
+ @Global::hostgroups{keys %hostgroups} = values %hostgroups;
+ my @unget = ();
+ my $no_slash_string = $string;
+ $no_slash_string =~ s/[^-a-z0-9:]/_/gi;
+ return bless {
+ 'string' => $string,
+ 'jobs_running' => 0,
+ 'jobs_completed' => 0,
+ 'maxlength' => undef,
+ 'max_jobs_running' => undef,
+ 'orig_max_jobs_running' => undef,
+ 'ncpus' => $ncpus,
+ 'hostgroups' => \%hostgroups,
+ 'sshcommand' => undef,
+ 'serverlogin' => undef,
+ 'control_path_dir' => undef,
+ 'control_path' => undef,
+ 'time_to_login' => undef,
+ 'last_login_at' => undef,
+ 'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" .
+ $no_slash_string,
+ 'loadavg' => undef,
+ 'last_loadavg_update' => 0,
+ 'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" .
+ $no_slash_string,
+ 'swap_activity' => undef,
+ }, ref($class) || $class;
+}
+
+sub DESTROY {
+ my $self = shift;
+ # Remove temporary files if they are created.
+ unlink $self->{'loadavg_file'};
+ unlink $self->{'swap_activity_file'};
+}
+
+sub string {
+ my $self = shift;
+ return $self->{'string'};
+}
+
+sub jobs_running {
+ my $self = shift;
+ return ($self->{'jobs_running'} || "0");
+}
+
+sub inc_jobs_running {
+ my $self = shift;
+ $self->{'jobs_running'}++;
+}
+
+sub dec_jobs_running {
+ my $self = shift;
+ $self->{'jobs_running'}--;
+}
+
+sub set_maxlength {
+ my $self = shift;
+ $self->{'maxlength'} = shift;
+}
+
+sub maxlength {
+ my $self = shift;
+ return $self->{'maxlength'};
+}
+
+sub jobs_completed {
+ my $self = shift;
+ return $self->{'jobs_completed'};
+}
+
+sub in_hostgroups {
+ # Input:
+ # @hostgroups = the hostgroups to look for
+ # Returns:
+ # true if intersection of @hostgroups and the hostgroups of this
+ # SSHLogin is non-empty
+ my $self = shift;
+ return grep { defined $self->{'hostgroups'}{$_} } @_;
+}
+
+sub hostgroups {
+ my $self = shift;
+ return keys %{$self->{'hostgroups'}};
+}
+
+sub inc_jobs_completed {
+ my $self = shift;
+ $self->{'jobs_completed'}++;
+}
+
+sub set_max_jobs_running {
+ my $self = shift;
+ if(defined $self->{'max_jobs_running'}) {
+ $Global::max_jobs_running -= $self->{'max_jobs_running'};
+ }
+ $self->{'max_jobs_running'} = shift;
+ if(defined $self->{'max_jobs_running'}) {
+ # max_jobs_running could be resat if -j is a changed file
+ $Global::max_jobs_running += $self->{'max_jobs_running'};
+ }
+ # Initialize orig to the first non-zero value that comes around
+ $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'};
+}
+
+sub memfree {
+ # Returns:
+ # $memfree in bytes
+ my $self = shift;
+ $self->memfree_recompute();
+ return (not defined $self->{'memfree'} or $self->{'memfree'})
+}
+
+sub memfree_recompute {
+ my $self = shift;
+ my $script = memfreescript();
+
+ # TODO add sshlogin and backgrounding
+ $self->{'memfree'} = qx{ $script };
+ #::debug("mem","New free:",$self->{'memfree'}," ");
+}
+
+{
+ my $script;
+
+ sub memfreescript {
+ # Returns:
+ # shellscript for giving available memory in bytes
+ if(not $script) {
+ my %script_of = (
+ # $ free
+ # total used free shared buffers cached
+ # Mem: 8075152 4922780 3152372 338856 233356 1658604
+ # -/+ buffers/cache: 3030820 5044332
+ # Swap: 8286204 116924 8169280
+ "linux" => q{ print (1024*((grep /buffers.cache/, `free`)[0] =~ /buffers.cache:\s+\S+\s+(\S+)/)[0]) },
+ # $ vmstat 1 1
+ # procs memory page faults cpu
+ # r b w avm free re at pi po fr de sr in sy cs us sy id
+ # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99
+ "hpux" => q{ print (((reverse `vmstat 1 1`)[0] =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) },
+ # $ vmstat 1 2
+ # kthr memory page disk faults cpu
+ # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id
+ # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97
+ # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92
+ #
+ # The last free is really free
+ "solaris" => q{ print (((reverse `vmstat 1 2`)[0] =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) },
+ "freebsd" => q{
+ for(qx{/sbin/sysctl -a}) {
+ if (/^([^:]+):\s+(.+)\s*$/s) {
+ $sysctl->{$1} = $2;
+ }
+ }
+ print $sysctl->{"hw.pagesize"} *
+ ($sysctl->{"vm.stats.vm.v_cache_count"}
+ + $sysctl->{"vm.stats.vm.v_inactive_count"}
+ + $sysctl->{"vm.stats.vm.v_free_count"});
+ },
+ );
+ my $perlscript = "";
+ # Make a perl script that detects the OS ($^O) and runs
+ # the appropriate command
+ for my $os (keys %script_of) {
+ $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}';
+ }
+ $perlscript =~ s/[\t\n ]+/ /g;
+ $perlscript = "perl -e " . ::shell_quote_scalar($perlscript);
+ $script = $Global::envvar. " " .$perlscript;
+ }
+ return $script
+ }
+}
+
+sub swapping {
+ my $self = shift;
+ my $swapping = $self->swap_activity();
+ return (not defined $swapping or $swapping)
+}
+
+sub swap_activity {
+ # If the currently known swap activity is too old:
+ # Recompute a new one in the background
+ # Returns:
+ # last swap activity computed
+ my $self = shift;
+ # Should we update the swap_activity file?
+ my $update_swap_activity_file = 0;
+ if(-r $self->{'swap_activity_file'}) {
+ open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r");
+ my $swap_out = <$swap_fh>;
+ close $swap_fh;
+ if($swap_out =~ /^(\d+)$/) {
+ $self->{'swap_activity'} = $1;
+ ::debug("swap", "New swap_activity: ", $self->{'swap_activity'});
+ }
+ ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'});
+ if(time - $self->{'last_swap_activity_update'} > 10) {
+ # last swap activity update was started 10 seconds ago
+ ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'});
+ $update_swap_activity_file = 1;
+ }
+ } else {
+ ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'});
+ $self->{'swap_activity'} = undef;
+ $update_swap_activity_file = 1;
+ }
+ if($update_swap_activity_file) {
+ ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'});
+ $self->{'last_swap_activity_update'} = time;
+ -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
+ -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
+ my $swap_activity;
+ $swap_activity = swapactivityscript();
+ if($self->{'string'} ne ":") {
+ $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " .
+ ::shell_quote_scalar($swap_activity);
+ }
+ # Run swap_activity measuring.
+ # As the command can take long to run if run remote
+ # save it to a tmp file before moving it to the correct file
+ my $file = $self->{'swap_activity_file'};
+ my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp");
+ ::debug("swap", "\n", $swap_activity, "\n");
+ qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & };
+ }
+ return $self->{'swap_activity'};
+}
+
+{
+ my $script;
+
+ sub swapactivityscript {
+ # Returns:
+ # shellscript for detecting swap activity
+ #
+ # arguments for vmstat are OS dependant
+ # swap_in and swap_out are in different columns depending on OS
+ #
+ if(not $script) {
+ my %vmstat = (
+ # linux: $7*$8
+ # $ vmstat 1 2
+ # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu----
+ # r b swpd free buff cache si so bi bo in cs us sy id wa
+ # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1
+ # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0
+ 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'],
+
+ # solaris: $6*$7
+ # $ vmstat -S 1 2
+ # kthr memory page disk faults cpu
+ # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id
+ # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97
+ # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98
+ 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'],
+
+ # darwin (macosx): $21*$22
+ # $ vm_stat -c 2 1
+ # Mach Virtual Memory Statistics: (page size of 4096 bytes)
+ # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts
+ # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0
+ # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0
+ 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'],
+
+ # ultrix: $12*$13
+ # $ vmstat -S 1 2
+ # procs faults cpu memory page disk
+ # r b w in sy cs us sy id avm fre si so pi po fr de sr s0
+ # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0
+ # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0
+ 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'],
+
+ # aix: $6*$7
+ # $ vmstat 1 2
+ # System configuration: lcpu=1 mem=2048MB
+ #
+ # kthr memory page faults cpu
+ # ----- ----------- ------------------------ ------------ -----------
+ # r b avm fre re pi po fr sr cy in sy cs us sy id wa
+ # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0
+ # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5
+ 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'],
+
+ # freebsd: $8*$9
+ # $ vmstat -H 1 2
+ # procs memory page disks faults cpu
+ # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id
+ # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99
+ # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99
+ 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'],
+
+ # mirbsd: $8*$9
+ # $ vmstat 1 2
+ # procs memory page disks traps cpu
+ # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id
+ # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96
+ # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100
+ 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
+
+ # netbsd: $7*$8
+ # $ vmstat 1 2
+ # procs memory page disks faults cpu
+ # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id
+ # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100
+ # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100
+ 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'],
+
+ # openbsd: $8*$9
+ # $ vmstat 1 2
+ # procs memory page disks traps cpu
+ # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id
+ # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99
+ # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99
+ 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'],
+
+ # hpux: $8*$9
+ # $ vmstat 1 2
+ # procs memory page faults cpu
+ # r b w avm free re at pi po fr de sr in sy cs us sy id
+ # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83
+ # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105
+ 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'],
+
+ # dec_osf (tru64): $11*$12
+ # $ vmstat 1 2
+ # Virtual Memory Statistics: (pagesize = 8192)
+ # procs memory pages intr cpu
+ # r w u act free wire fault cow zero react pin pout in sy cs us sy id
+ # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94
+ # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98
+ 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'],
+
+ # gnu (hurd): $7*$8
+ # $ vmstat -k 1 2
+ # (pagesize: 4, size: 512288, swap size: 894972)
+ # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree
+ # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972
+ # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972
+ 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'],
+
+ # -nto (qnx has no swap)
+ #-irix
+ #-svr5 (scosysv)
+ );
+ my $perlscript = "";
+ # Make a perl script that detects the OS ($^O) and runs
+ # the appropriate vmstat command
+ for my $os (keys %vmstat) {
+ $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$
+ $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' .
+ $vmstat{$os}[1] . '}"` }';
+ }
+ $perlscript = "perl -e " . ::shell_quote_scalar($perlscript);
+ $script = $Global::envvar. " " .$perlscript;
+ }
+ return $script;
+ }
+}
+
+sub too_fast_remote_login {
+ my $self = shift;
+ if($self->{'last_login_at'} and $self->{'time_to_login'}) {
+ # sshd normally allows 10 simultaneous logins
+ # A login takes time_to_login
+ # So time_to_login/5 should be safe
+ # If now <= last_login + time_to_login/5: Then it is too soon.
+ my $too_fast = (::now() <= $self->{'last_login_at'}
+ + $self->{'time_to_login'}/5);
+ ::debug("run", "Too fast? $too_fast ");
+ return $too_fast;
+ } else {
+ # No logins so far (or time_to_login not computed): it is not too fast
+ return 0;
+ }
+}
+
+sub last_login_at {
+ my $self = shift;
+ return $self->{'last_login_at'};
+}
+
+sub set_last_login_at {
+ my $self = shift;
+ $self->{'last_login_at'} = shift;
+}
+
+sub loadavg_too_high {
+ my $self = shift;
+ my $loadavg = $self->loadavg();
+ return (not defined $loadavg or
+ $loadavg > $self->max_loadavg());
+}
+
+{
+ my $cmd;
+ sub loadavg_cmd {
+ if(not $cmd) {
+ # aix => "ps -ae -o state,command" # state wrong
+ # bsd => "ps ax -o state,command"
+ # sysv => "ps -ef -o s -o comm"
+ # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \
+ # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
+ # awk '{print $2,$1}'
+ # dec_osf => bsd
+ # dragonfly => bsd
+ # freebsd => bsd
+ # gnu => bsd
+ # hpux => ps -el|awk '{print $2,$14,$15}'
+ # irix => ps -ef -o state -o comm
+ # linux => bsd
+ # minix => ps el|awk '{print \$1,\$11}'
+ # mirbsd => bsd
+ # netbsd => bsd
+ # openbsd => bsd
+ # solaris => sysv
+ # svr5 => sysv
+ # ultrix => ps -ax | awk '{print $3,$5}'
+ # unixware => ps -el|awk '{print $2,$14,$15}'
+ my $ps = q{
+ $sysv="ps -ef -o s -o comm";
+ $sysv2="ps -ef -o state -o comm";
+ $bsd="ps ax -o state,command";
+ $psel="ps -el|awk '{ print \$2,\$14,\$15 }'";
+ $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n";
+ /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status |
+ awk '{print $2,$1}' };
+ $dummy="echo S COMMAND;echo R dummy";
+ %ps=(
+ 'aix' => "uptime",
+ 'cygwin' => $cygwin,
+ 'darwin' => $bsd,
+ 'dec_osf' => $sysv2,
+ 'dragonfly' => $bsd,
+ 'freebsd' => $bsd,
+ 'gnu' => $bsd,
+ 'hpux' => $psel,
+ 'irix' => $sysv2,
+ 'linux' => $bsd,
+ 'minix' => "ps el|awk '{print \$1,\$11}'",
+ 'mirbsd' => $bsd,
+ 'msys' => $sysv,
+ 'MSWin32' => $sysv,
+ 'netbsd' => $bsd,
+ 'nto' => $dummy,
+ 'openbsd' => $bsd,
+ 'solaris' => $sysv,
+ 'svr5' => $psel,
+ 'ultrix' => "ps -ax | awk '{print \$3,\$5}'",
+ );
+ print `$ps{$^O}`;
+ };
+ $ps =~ s/[ \t\n]+/ /g;
+ $cmd = "perl -e ".::shell_quote_scalar($ps);
+ }
+ return $cmd;
+ }
+}
+
+
+sub loadavg {
+ # If the currently know loadavg is too old:
+ # Recompute a new one in the background
+ # The load average is computed as the number of processes waiting for disk
+ # or CPU right now. So it is the server load this instant and not averaged over
+ # several minutes. This is needed so GNU Parallel will at most start one job
+ # that will push the load over the limit.
+ #
+ # Returns:
+ # $last_loadavg = last load average computed (undef if none)
+ my $self = shift;
+ # Should we update the loadavg file?
+ my $update_loadavg_file = 0;
+ if(open(my $load_fh, "<", $self->{'loadavg_file'})) {
+ local $/ = undef;
+ my $load_out = <$load_fh>;
+ close $load_fh;
+ # Count lines starting with D,O,R but command does not start with [
+ my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm);
+ if($load > 0) {
+ # load is overestimated by 1
+ $self->{'loadavg'} = $load - 1;
+ ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n");
+ } elsif ($load_out=~/average: (\d+.\d+)/) {
+ # AIX does not support instant load average
+ # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55
+ $self->{'loadavg'} = $1;
+ } else {
+ ::die_bug("loadavg_invalid_content: " .
+ $self->{'loadavg_file'} . "\n$load_out");
+ }
+ # Because of instant load average, it should not be delayed 10 secs
+ # The instant load does not give 2 R if there is only 1 cpu.
+# ::debug("load", "Last update: ", $self->{'last_loadavg_update'});
+# if(time - $self->{'last_loadavg_update'} > 10) {
+# # last loadavg was started 10 seconds ago
+# ::debug("load", time - $self->{'last_loadavg_update'}, " secs old: ",
+# $self->{'loadavg_file'});
+ $update_loadavg_file = 1;
+# }
+ } else {
+ ::debug("load", "No loadavg file: ", $self->{'loadavg_file'});
+ $self->{'loadavg'} = undef;
+ $update_loadavg_file = 1;
+ }
+ if($update_loadavg_file) {
+ ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n");
+ $self->{'last_loadavg_update'} = time;
+ -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
+ -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
+ my $cmd = "";
+ if($self->{'string'} ne ":") {
+ $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " .
+ ::shell_quote_scalar(loadavg_cmd());
+ } else {
+ $cmd .= loadavg_cmd();
+ }
+ # As the command can take long to run if run remote
+ # save it to a tmp file before moving it to the correct file
+ ::debug("load", "Cmd: ", $cmd);
+ my $file = $self->{'loadavg_file'};
+ my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa");
+ qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & };
+ }
+ return $self->{'loadavg'};
+}
+
+sub max_loadavg {
+ my $self = shift;
+ # If --load is a file it might be changed
+ if($Global::max_load_file) {
+ my $mtime = (stat($Global::max_load_file))[9];
+ if($mtime > $Global::max_load_file_last_mod) {
+ $Global::max_load_file_last_mod = $mtime;
+ for my $sshlogin (values %Global::host) {
+ $sshlogin->set_max_loadavg(undef);
+ }
+ }
+ }
+ if(not defined $self->{'max_loadavg'}) {
+ $self->{'max_loadavg'} =
+ $self->compute_max_loadavg($opt::load);
+ }
+ ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'});
+ return $self->{'max_loadavg'};
+}
+
+sub set_max_loadavg {
+ my $self = shift;
+ $self->{'max_loadavg'} = shift;
+}
+
+sub compute_max_loadavg {
+ # Parse the max loadaverage that the user asked for using --load
+ # Returns:
+ # max loadaverage
+ my $self = shift;
+ my $loadspec = shift;
+ my $load;
+ if(defined $loadspec) {
+ if($loadspec =~ /^\+(\d+)$/) {
+ # E.g. --load +2
+ my $j = $1;
+ $load =
+ $self->ncpus() + $j;
+ } elsif ($loadspec =~ /^-(\d+)$/) {
+ # E.g. --load -2
+ my $j = $1;
+ $load =
+ $self->ncpus() - $j;
+ } elsif ($loadspec =~ /^(\d+)\%$/) {
+ my $j = $1;
+ $load =
+ $self->ncpus() * $j / 100;
+ } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) {
+ $load = $1;
+ } elsif (-f $loadspec) {
+ $Global::max_load_file = $loadspec;
+ $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9];
+ if(open(my $in_fh, "<", $Global::max_load_file)) {
+ my $opt_load_file = join("",<$in_fh>);
+ close $in_fh;
+ $load = $self->compute_max_loadavg($opt_load_file);
+ } else {
+ ::error("Cannot open $loadspec.\n");
+ ::wait_and_exit(255);
+ }
+ } else {
+ ::error("Parsing of --load failed.\n");
+ ::die_usage();
+ }
+ if($load < 0.01) {
+ $load = 0.01;
+ }
+ }
+ return $load;
+}
+
+sub time_to_login {
+ my $self = shift;
+ return $self->{'time_to_login'};
+}
+
+sub set_time_to_login {
+ my $self = shift;
+ $self->{'time_to_login'} = shift;
+}
+
+sub max_jobs_running {
+ my $self = shift;
+ if(not defined $self->{'max_jobs_running'}) {
+ my $nproc = $self->compute_number_of_processes($opt::jobs);
+ $self->set_max_jobs_running($nproc);
+ }
+ return $self->{'max_jobs_running'};
+}
+
+sub orig_max_jobs_running {
+ my $self = shift;
+ return $self->{'orig_max_jobs_running'};
+}
+
+sub compute_number_of_processes {
+ # Number of processes wanted and limited by system resources
+ # Returns:
+ # Number of processes
+ my $self = shift;
+ my $opt_P = shift;
+ my $wanted_processes = $self->user_requested_processes($opt_P);
+ if(not defined $wanted_processes) {
+ $wanted_processes = $Global::default_simultaneous_sshlogins;
+ }
+ ::debug("load", "Wanted procs: $wanted_processes\n");
+ my $system_limit =
+ $self->processes_available_by_system_limit($wanted_processes);
+ ::debug("load", "Limited to procs: $system_limit\n");
+ return $system_limit;
+}
+
+{
+ my @children;
+ my $max_system_proc_reached;
+ my $more_filehandles;
+ my %fh;
+ my $tmpfhname;
+ my $count_jobs_already_read;
+ my @jobs;
+ my $job;
+ my @args;
+ my $arg;
+
+ sub reserve_filehandles {
+ # Reserves filehandle
+ my $n = shift;
+ for (1..$n) {
+ $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null");
+ }
+ }
+
+ sub reserve_process {
+ # Spawn a dummy process
+ my $child;
+ if($child = fork()) {
+ push @children, $child;
+ $Global::unkilled_children{$child} = 1;
+ } elsif(defined $child) {
+ # This is the child
+ # The child takes one process slot
+ # It will be killed later
+ $SIG{TERM} = $Global::original_sig{TERM};
+ sleep 10000000;
+ exit(0);
+ } else {
+ # Failed to spawn
+ $max_system_proc_reached = 1;
+ }
+ }
+
+ sub get_args_or_jobs {
+ # Get an arg or a job (depending on mode)
+ if($Global::semaphore or $opt::pipe) {
+ # Skip: No need to get args
+ return 1;
+ } elsif(defined $opt::retries and $count_jobs_already_read) {
+ # For retries we may need to run all jobs on this sshlogin
+ # so include the already read jobs for this sshlogin
+ $count_jobs_already_read--;
+ return 1;
+ } else {
+ if($opt::X or $opt::m) {
+ # The arguments may have to be re-spread over several jobslots
+ # So pessimistically only read one arg per jobslot
+ # instead of a full commandline
+ if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) {
+ if($Global::JobQueue->empty()) {
+ return 0;
+ } else {
+ $job = $Global::JobQueue->get();
+ push(@jobs, $job);
+ return 1;
+ }
+ } else {
+ $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get();
+ push(@args, $arg);
+ return 1;
+ }
+ } else {
+ # If there are no more command lines, then we have a process
+ # per command line, so no need to go further
+ if($Global::JobQueue->empty()) {
+ return 0;
+ } else {
+ $job = $Global::JobQueue->get();
+ push(@jobs, $job);
+ return 1;
+ }
+ }
+ }
+ }
+
+ sub cleanup {
+ # Cleanup: Close the files
+ for (values %fh) { close $_ }
+ # Cleanup: Kill the children
+ for my $pid (@children) {
+ kill 9, $pid;
+ waitpid($pid,0);
+ delete $Global::unkilled_children{$pid};
+ }
+ # Cleanup: Unget the command_lines or the @args
+ $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args);
+ $Global::JobQueue->unget(@jobs);
+ @jobs = undef;
+ }
+
+ sub processes_available_by_system_limit {
+ # If the wanted number of processes is bigger than the system limits:
+ # Limit them to the system limits
+ # Limits are: File handles, number of input lines, processes,
+ # and taking > 1 second to spawn 10 extra processes
+ # Returns:
+ # Number of processes
+ my $self = shift;
+ my $wanted_processes = shift;
+ my $system_limit = 0;
+ my $slow_spawining_warning_printed = 0;
+ my $time = time;
+ $more_filehandles = 1;
+ $tmpfhname = "TmpFhNamE";
+
+ # perl uses 7 filehandles for something?
+ # parallel uses 1 for memory_usage
+ # parallel uses 4 for ?
+ reserve_filehandles(12);
+ # Two processes for load avg and ?
+ reserve_process();
+ reserve_process();
+
+ # For --retries count also jobs already run
+ $count_jobs_already_read = $Global::JobQueue->next_seq();
+ my $wait_time_for_getting_args = 0;
+ my $start_time = time;
+ while(1) {
+ $system_limit >= $wanted_processes and last;
+ not $more_filehandles and last;
+ $max_system_proc_reached and last;
+
+ my $before_getting_arg = time;
+ get_args_or_jobs() or last;
+ $wait_time_for_getting_args += time - $before_getting_arg;
+ $system_limit++;
+
+ # Every simultaneous process uses 2 filehandles to write to
+ # and 2 filehandles to read from
+ reserve_filehandles(4);
+
+ # System process limit
+ reserve_process();
+
+ my $forktime = time - $time - $wait_time_for_getting_args;
+ ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ",
+ $forktime,
+ " (processes so far: ", $system_limit,")\n");
+ if($system_limit > 10 and
+ $forktime > 1 and
+ $forktime > $system_limit * 0.01
+ and not $slow_spawining_warning_printed) {
+ # It took more than 0.01 second to fork a processes on avg.
+ # Give the user a warning. He can press Ctrl-C if this
+ # sucks.
+ ::warning("Starting $system_limit processes took > $forktime sec.\n",
+ "Consider adjusting -j. Press CTRL-C to stop.\n");
+ $slow_spawining_warning_printed = 1;
+ }
+ }
+ cleanup();
+
+ if($system_limit < $wanted_processes) {
+ # The system_limit is less than the wanted_processes
+ if($system_limit < 1 and not $Global::JobQueue->empty()) {
+ ::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf\n",
+ "or /proc/sys/kernel/pid_max may help.\n");
+ ::wait_and_exit(255);
+ }
+ if(not $more_filehandles) {
+ ::warning("Only enough file handles to run ", $system_limit, " jobs in parallel.\n",
+ "Running 'parallel -j0 -N", $system_limit, " --pipe parallel -j0' or ",
+ "raising ulimit -n or /etc/security/limits.conf may help.\n");
+ }
+ if($max_system_proc_reached) {
+ ::warning("Only enough available processes to run ", $system_limit,
+ " jobs in parallel. Raising ulimit -u or /etc/security/limits.conf\n",
+ "or /proc/sys/kernel/pid_max may help.\n");
+ }
+ }
+ if($] == 5.008008 and $system_limit > 1000) {
+ # https://savannah.gnu.org/bugs/?36942
+ $system_limit = 1000;
+ }
+ if($Global::JobQueue->empty()) {
+ $system_limit ||= 1;
+ }
+ if($self->string() ne ":" and
+ $system_limit > $Global::default_simultaneous_sshlogins) {
+ $system_limit =
+ $self->simultaneous_sshlogin_limit($system_limit);
+ }
+ return $system_limit;
+ }
+}
+
+sub simultaneous_sshlogin_limit {
+ # Test by logging in wanted number of times simultaneously
+ # Returns:
+ # min($wanted_processes,$working_simultaneous_ssh_logins-1)
+ my $self = shift;
+ my $wanted_processes = shift;
+ if($self->{'time_to_login'}) {
+ return $wanted_processes;
+ }
+
+ # Try twice because it guesses wrong sometimes
+ # Choose the minimal
+ my $ssh_limit =
+ ::min($self->simultaneous_sshlogin($wanted_processes),
+ $self->simultaneous_sshlogin($wanted_processes));
+ if($ssh_limit < $wanted_processes) {
+ my $serverlogin = $self->serverlogin();
+ ::warning("ssh to $serverlogin only allows ",
+ "for $ssh_limit simultaneous logins.\n",
+ "You may raise this by changing ",
+ "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.\n",
+ "Using only ",$ssh_limit-1," connections ",
+ "to avoid race conditions.\n");
+ }
+ # Race condition can cause problem if using all sshs.
+ if($ssh_limit > 1) { $ssh_limit -= 1; }
+ return $ssh_limit;
+}
+
+sub simultaneous_sshlogin {
+ # Using $sshlogin try to see if we can do $wanted_processes
+ # simultaneous logins
+ # (ssh host echo simultaneouslogin & ssh host echo simultaneouslogin & ...)|grep simul|wc -l
+ # Returns:
+ # Number of succesful logins
+ my $self = shift;
+ my $wanted_processes = shift;
+ my $sshcmd = $self->sshcommand();
+ my $serverlogin = $self->serverlogin();
+ my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : "";
+ # TODO sh -c wrapper to work for csh
+ my $cmd = "$sshdelay$sshcmd $serverlogin echo simultaneouslogin &1 &"x$wanted_processes;
+ ::debug("init", "Trying $wanted_processes logins at $serverlogin\n");
+ open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or
+ ::die_bug("simultaneouslogin");
+ my $ssh_limit = <$simul_fh>;
+ close $simul_fh;
+ chomp $ssh_limit;
+ return $ssh_limit;
+}
+
+sub set_ncpus {
+ my $self = shift;
+ $self->{'ncpus'} = shift;
+}
+
+sub user_requested_processes {
+ # Parse the number of processes that the user asked for using -j
+ # Returns:
+ # the number of processes to run on this sshlogin
+ my $self = shift;
+ my $opt_P = shift;
+ my $processes;
+ if(defined $opt_P) {
+ if($opt_P =~ /^\+(\d+)$/) {
+ # E.g. -P +2
+ my $j = $1;
+ $processes =
+ $self->ncpus() + $j;
+ } elsif ($opt_P =~ /^-(\d+)$/) {
+ # E.g. -P -2
+ my $j = $1;
+ $processes =
+ $self->ncpus() - $j;
+ } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) {
+ # E.g. -P 10.5%
+ my $j = $1;
+ $processes =
+ $self->ncpus() * $j / 100;
+ } elsif ($opt_P =~ /^(\d+)$/) {
+ $processes = $1;
+ if($processes == 0) {
+ # -P 0 = infinity (or at least close)
+ $processes = $Global::infinity;
+ }
+ } elsif (-f $opt_P) {
+ $Global::max_procs_file = $opt_P;
+ if(open(my $in_fh, "<", $Global::max_procs_file)) {
+ my $opt_P_file = join("",<$in_fh>);
+ close $in_fh;
+ $processes = $self->user_requested_processes($opt_P_file);
+ } else {
+ ::error("Cannot open $opt_P.\n");
+ ::wait_and_exit(255);
+ }
+ } else {
+ ::error("Parsing of --jobs/-j/--max-procs/-P failed.\n");
+ ::die_usage();
+ }
+ $processes = ::ceil($processes);
+ }
+ return $processes;
+}
+
+sub ncpus {
+ my $self = shift;
+ if(not defined $self->{'ncpus'}) {
+ my $sshcmd = $self->sshcommand();
+ my $serverlogin = $self->serverlogin();
+ if($serverlogin eq ":") {
+ if($opt::use_cpus_instead_of_cores) {
+ $self->{'ncpus'} = no_of_cpus();
+ } else {
+ $self->{'ncpus'} = no_of_cores();
+ }
+ } else {
+ my $ncpu;
+ my $sqe = ::shell_quote_scalar($Global::envvar);
+ if($opt::use_cpus_instead_of_cores) {
+ $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cpus);
+ } else {
+ ::debug("init",qq(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores\n));
+ $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores);
+ }
+ chomp $ncpu;
+ if($ncpu =~ /^\s*[0-9]+\s*$/s) {
+ $self->{'ncpus'} = $ncpu;
+ } else {
+ ::warning("Could not figure out ",
+ "number of cpus on $serverlogin ($ncpu). Using 1.\n");
+ $self->{'ncpus'} = 1;
+ }
+ }
+ }
+ return $self->{'ncpus'};
+}
+
+sub no_of_cpus {
+ # Returns:
+ # Number of physical CPUs
+ local $/ = "\n"; # If delimiter is set, then $/ will be wrong
+ my $no_of_cpus;
+ if ($^O eq 'linux') {
+ $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux();
+ } elsif ($^O eq 'freebsd') {
+ $no_of_cpus = no_of_cpus_freebsd();
+ } elsif ($^O eq 'netbsd') {
+ $no_of_cpus = no_of_cpus_netbsd();
+ } elsif ($^O eq 'openbsd') {
+ $no_of_cpus = no_of_cpus_openbsd();
+ } elsif ($^O eq 'gnu') {
+ $no_of_cpus = no_of_cpus_hurd();
+ } elsif ($^O eq 'darwin') {
+ $no_of_cpus = no_of_cpus_darwin();
+ } elsif ($^O eq 'solaris') {
+ $no_of_cpus = no_of_cpus_solaris();
+ } elsif ($^O eq 'aix') {
+ $no_of_cpus = no_of_cpus_aix();
+ } elsif ($^O eq 'hpux') {
+ $no_of_cpus = no_of_cpus_hpux();
+ } elsif ($^O eq 'nto') {
+ $no_of_cpus = no_of_cpus_qnx();
+ } elsif ($^O eq 'svr5') {
+ $no_of_cpus = no_of_cpus_openserver();
+ } elsif ($^O eq 'irix') {
+ $no_of_cpus = no_of_cpus_irix();
+ } elsif ($^O eq 'dec_osf') {
+ $no_of_cpus = no_of_cpus_tru64();
+ } else {
+ $no_of_cpus = (no_of_cpus_gnu_linux()
+ || no_of_cpus_freebsd()
+ || no_of_cpus_netbsd()
+ || no_of_cpus_openbsd()
+ || no_of_cpus_hurd()
+ || no_of_cpus_darwin()
+ || no_of_cpus_solaris()
+ || no_of_cpus_aix()
+ || no_of_cpus_hpux()
+ || no_of_cpus_qnx()
+ || no_of_cpus_openserver()
+ || no_of_cpus_irix()
+ || no_of_cpus_tru64()
+ # Number of cores is better than no guess for #CPUs
+ || nproc()
+ );
+ }
+ if($no_of_cpus) {
+ chomp $no_of_cpus;
+ return $no_of_cpus;
+ } else {
+ ::warning("Cannot figure out number of cpus. Using 1.\n");
+ return 1;
+ }
+}
+
+sub no_of_cores {
+ # Returns:
+ # Number of CPU cores
+ local $/ = "\n"; # If delimiter is set, then $/ will be wrong
+ my $no_of_cores;
+ if ($^O eq 'linux') {
+ $no_of_cores = no_of_cores_gnu_linux();
+ } elsif ($^O eq 'freebsd') {
+ $no_of_cores = no_of_cores_freebsd();
+ } elsif ($^O eq 'netbsd') {
+ $no_of_cores = no_of_cores_netbsd();
+ } elsif ($^O eq 'openbsd') {
+ $no_of_cores = no_of_cores_openbsd();
+ } elsif ($^O eq 'gnu') {
+ $no_of_cores = no_of_cores_hurd();
+ } elsif ($^O eq 'darwin') {
+ $no_of_cores = no_of_cores_darwin();
+ } elsif ($^O eq 'solaris') {
+ $no_of_cores = no_of_cores_solaris();
+ } elsif ($^O eq 'aix') {
+ $no_of_cores = no_of_cores_aix();
+ } elsif ($^O eq 'hpux') {
+ $no_of_cores = no_of_cores_hpux();
+ } elsif ($^O eq 'nto') {
+ $no_of_cores = no_of_cores_qnx();
+ } elsif ($^O eq 'svr5') {
+ $no_of_cores = no_of_cores_openserver();
+ } elsif ($^O eq 'irix') {
+ $no_of_cores = no_of_cores_irix();
+ } elsif ($^O eq 'dec_osf') {
+ $no_of_cores = no_of_cores_tru64();
+ } else {
+ $no_of_cores = (no_of_cores_gnu_linux()
+ || no_of_cores_freebsd()
+ || no_of_cores_netbsd()
+ || no_of_cores_openbsd()
+ || no_of_cores_hurd()
+ || no_of_cores_darwin()
+ || no_of_cores_solaris()
+ || no_of_cores_aix()
+ || no_of_cores_hpux()
+ || no_of_cores_qnx()
+ || no_of_cores_openserver()
+ || no_of_cores_irix()
+ || no_of_cores_tru64()
+ || nproc()
+ );
+ }
+ if($no_of_cores) {
+ chomp $no_of_cores;
+ return $no_of_cores;
+ } else {
+ ::warning("Cannot figure out number of CPU cores. Using 1.\n");
+ return 1;
+ }
+}
+
+sub nproc {
+ # Returns:
+ # Number of cores using `nproc`
+ my $no_of_cores = qx{ sh -c 'nproc 2>/dev/null' };
+ return $no_of_cores;
+}
+
+sub no_of_cpus_gnu_linux {
+ # Returns:
+ # Number of physical CPUs on GNU/Linux
+ # undef if not GNU/Linux
+ my $no_of_cpus;
+ my $no_of_cores;
+ my $no_of_active_cores;
+ if(-e "/proc/cpuinfo") {
+ $no_of_cpus = 0;
+ $no_of_cores = 0;
+ my %seen;
+ if(open(my $in_fh, "<", "/proc/cpuinfo")) {
+ while(<$in_fh>) {
+ if(/^physical id.*[:](.*)/ and not $seen{$1}++) {
+ $no_of_cpus++;
+ }
+ /^processor.*[:]/i and $no_of_cores++;
+ }
+ close $in_fh;
+ }
+ }
+ if(-e "/proc/self/status") {
+ # if 'taskset' is used to limit number of cores
+ if(open(my $in_fh, "<", "/proc/self/status")) {
+ while(<$in_fh>) {
+ if(/^Cpus_allowed:\s*(\S+)/) {
+ my $a = $1;
+ $a =~ tr/,//d;
+ $no_of_active_cores = unpack ("%32b*", pack ("H*",$a));
+ }
+ }
+ close $in_fh;
+ }
+ }
+ return (::min($no_of_cpus || $no_of_cores,$no_of_active_cores));
+}
+
+sub no_of_cores_gnu_linux {
+ # Returns:
+ # Number of CPU cores on GNU/Linux
+ # undef if not GNU/Linux
+ my $no_of_cores;
+ my $no_of_active_cores;
+ if(-e "/proc/cpuinfo") {
+ $no_of_cores = 0;
+ open(my $in_fh, "<", "/proc/cpuinfo") || return undef;
+ while(<$in_fh>) {
+ /^processor.*[:]/i and $no_of_cores++;
+ }
+ close $in_fh;
+ }
+ if(-e "/proc/self/status") {
+ # if 'taskset' is used to limit number of cores
+ if(open(my $in_fh, "<", "/proc/self/status")) {
+ while(<$in_fh>) {
+ if(/^Cpus_allowed:\s*(\S+)/) {
+ my $a = $1;
+ $a =~ tr/,//d;
+ $no_of_active_cores = unpack ("%32b*", pack ("H*",$a));
+ }
+ }
+ close $in_fh;
+ }
+ }
+ return (::min($no_of_cores,$no_of_active_cores));
+}
+
+sub no_of_cpus_freebsd {
+ # Returns:
+ # Number of physical CPUs on FreeBSD
+ # undef if not FreeBSD
+ my $no_of_cpus =
+ (qx{ sh -c 'sysctl -a dev.cpu 2>/dev/null' | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' }
+ or
+ qx{ sh -c 'sysctl hw.ncpu 2>/dev/null' | awk '{ print \$2 }' });
+ chomp $no_of_cpus;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_freebsd {
+ # Returns:
+ # Number of CPU cores on FreeBSD
+ # undef if not FreeBSD
+ my $no_of_cores =
+ (qx{ sh -c 'sysctl hw.ncpu 2>/dev/null' | awk '{ print \$2 }' }
+ or
+ qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' });
+ chomp $no_of_cores;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_netbsd {
+ # Returns:
+ # Number of physical CPUs on NetBSD
+ # undef if not NetBSD
+ my $no_of_cpus = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' };
+ chomp $no_of_cpus;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_netbsd {
+ # Returns:
+ # Number of CPU cores on NetBSD
+ # undef if not NetBSD
+ my $no_of_cores = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' };
+ chomp $no_of_cores;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_openbsd {
+ # Returns:
+ # Number of physical CPUs on OpenBSD
+ # undef if not OpenBSD
+ my $no_of_cpus = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' };
+ chomp $no_of_cpus;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_openbsd {
+ # Returns:
+ # Number of CPU cores on OpenBSD
+ # undef if not OpenBSD
+ my $no_of_cores = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' };
+ chomp $no_of_cores;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_hurd {
+ # Returns:
+ # Number of physical CPUs on HURD
+ # undef if not HURD
+ my $no_of_cpus = qx{ nproc };
+ chomp $no_of_cpus;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_hurd {
+ # Returns:
+ # Number of physical CPUs on HURD
+ # undef if not HURD
+ my $no_of_cores = `nproc`;
+ chomp $no_of_cores;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_darwin {
+ # Returns:
+ # Number of physical CPUs on Mac Darwin
+ # undef if not Mac Darwin
+ my $no_of_cpus =
+ (qx{ sh -c 'sysctl -n hw.physicalcpu 2>/dev/null' }
+ or
+ qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' });
+ return $no_of_cpus;
+}
+
+sub no_of_cores_darwin {
+ # Returns:
+ # Number of CPU cores on Mac Darwin
+ # undef if not Mac Darwin
+ my $no_of_cores =
+ (qx{ sh -c 'sysctl -n hw.logicalcpu 2>/dev/null' }
+ or
+ qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' });
+ return $no_of_cores;
+}
+
+sub no_of_cpus_solaris {
+ # Returns:
+ # Number of physical CPUs on Solaris
+ # undef if not Solaris
+ if(-x "/usr/sbin/psrinfo") {
+ my @psrinfo = `/usr/sbin/psrinfo`;
+ if($#psrinfo >= 0) {
+ return $#psrinfo +1;
+ }
+ }
+ if(-x "/usr/sbin/prtconf") {
+ my @prtconf = qx{ /usr/sbin/prtconf | grep cpu..instance };
+ if($#prtconf >= 0) {
+ return $#prtconf +1;
+ }
+ }
+ return undef;
+}
+
+sub no_of_cores_solaris {
+ # Returns:
+ # Number of CPU cores on Solaris
+ # undef if not Solaris
+ if(-x "/usr/sbin/psrinfo") {
+ my @psrinfo = `/usr/sbin/psrinfo`;
+ if($#psrinfo >= 0) {
+ return $#psrinfo +1;
+ }
+ }
+ if(-x "/usr/sbin/prtconf") {
+ my @prtconf = qx{ /usr/sbin/prtconf | grep cpu..instance };
+ if($#prtconf >= 0) {
+ return $#prtconf +1;
+ }
+ }
+ return undef;
+}
+
+sub no_of_cpus_aix {
+ # Returns:
+ # Number of physical CPUs on AIX
+ # undef if not AIX
+ my $no_of_cpus = 0;
+ if(-x "/usr/sbin/lscfg") {
+ open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '")
+ || return undef;
+ $no_of_cpus = <$in_fh>;
+ chomp ($no_of_cpus);
+ close $in_fh;
+ }
+ return $no_of_cpus;
+}
+
+sub no_of_cores_aix {
+ # Returns:
+ # Number of CPU cores on AIX
+ # undef if not AIX
+ my $no_of_cores;
+ if(-x "/usr/bin/vmstat") {
+ open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef;
+ while(<$in_fh>) {
+ /lcpu=([0-9]*) / and $no_of_cores = $1;
+ }
+ close $in_fh;
+ }
+ return $no_of_cores;
+}
+
+sub no_of_cpus_hpux {
+ # Returns:
+ # Number of physical CPUs on HP-UX
+ # undef if not HP-UX
+ my $no_of_cpus =
+ qx{ sh -c '/usr/bin/mpsched -s 2>&1' | grep 'Locality Domain Count' | awk '{ print \$4 }'};
+ return $no_of_cpus;
+}
+
+sub no_of_cores_hpux {
+ # Returns:
+ # Number of CPU cores on HP-UX
+ # undef if not HP-UX
+ my $no_of_cores =
+ qx{ sh -c '/usr/bin/mpsched -s 2>&1' | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1\n"'};
+ return $no_of_cores;
+}
+
+sub no_of_cpus_qnx {
+ # Returns:
+ # Number of physical CPUs on QNX
+ # undef if not QNX
+ # BUG: It is not known how to calculate this.
+ my $no_of_cpus = 0;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_qnx {
+ # Returns:
+ # Number of CPU cores on QNX
+ # undef if not QNX
+ # BUG: It is not known how to calculate this.
+ my $no_of_cores = 0;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_openserver {
+ # Returns:
+ # Number of physical CPUs on SCO OpenServer
+ # undef if not SCO OpenServer
+ my $no_of_cpus = 0;
+ if(-x "/usr/sbin/psrinfo") {
+ my @psrinfo = `/usr/sbin/psrinfo`;
+ if($#psrinfo >= 0) {
+ return $#psrinfo +1;
+ }
+ }
+ return $no_of_cpus;
+}
+
+sub no_of_cores_openserver {
+ # Returns:
+ # Number of CPU cores on SCO OpenServer
+ # undef if not SCO OpenServer
+ my $no_of_cores = 0;
+ if(-x "/usr/sbin/psrinfo") {
+ my @psrinfo = `/usr/sbin/psrinfo`;
+ if($#psrinfo >= 0) {
+ return $#psrinfo +1;
+ }
+ }
+ return $no_of_cores;
+}
+
+sub no_of_cpus_irix {
+ # Returns:
+ # Number of physical CPUs on IRIX
+ # undef if not IRIX
+ my $no_of_cpus = `hinv | grep HZ | grep Processor | awk '{print \$1}'`;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_irix {
+ # Returns:
+ # Number of CPU cores on IRIX
+ # undef if not IRIX
+ my $no_of_cores = `hinv | grep HZ | grep Processor | awk '{print \$1}'`;
+ return $no_of_cores;
+}
+
+sub no_of_cpus_tru64 {
+ # Returns:
+ # Number of physical CPUs on Tru64
+ # undef if not Tru64
+ my $no_of_cpus = `sizer -pr`;
+ return $no_of_cpus;
+}
+
+sub no_of_cores_tru64 {
+ # Returns:
+ # Number of CPU cores on Tru64
+ # undef if not Tru64
+ my $no_of_cores = `sizer -pr`;
+ return $no_of_cores;
+}
+
+sub sshcommand {
+ my $self = shift;
+ if (not defined $self->{'sshcommand'}) {
+ $self->sshcommand_of_sshlogin();
+ }
+ return $self->{'sshcommand'};
+}
+
+sub serverlogin {
+ my $self = shift;
+ if (not defined $self->{'serverlogin'}) {
+ $self->sshcommand_of_sshlogin();
+ }
+ return $self->{'serverlogin'};
+}
+
+sub sshcommand_of_sshlogin {
+ # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server')
+ # 'user@server' -> ('ssh','user@server')
+ # 'myssh user@server' -> ('myssh','user@server')
+ # 'myssh -l user server' -> ('myssh -l user','server')
+ # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server')
+ # Returns:
+ # sshcommand - defaults to 'ssh'
+ # login@host
+ my $self = shift;
+ my ($sshcmd, $serverlogin);
+ if($self->{'string'} =~ /(.+) (\S+)$/) {
+ # Own ssh command
+ $sshcmd = $1; $serverlogin = $2;
+ } else {
+ # Normal ssh
+ if($opt::controlmaster) {
+ # Use control_path to make ssh faster
+ my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p";
+ $sshcmd = "ssh -S ".$control_path;
+ $serverlogin = $self->{'string'};
+ if(not $self->{'control_path'}{$control_path}++) {
+ # Master is not running for this control_path
+ # Start it
+ my $pid = fork();
+ if($pid) {
+ $Global::sshmaster{$pid} ||= 1;
+ } else {
+ $SIG{'TERM'} = undef;
+ # Ignore the 'foo' being printed
+ open(STDOUT,">","/dev/null");
+ # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
+ # STDERR >/dev/null to ignore "process_mux_new_session: tcgetattr: Invalid argument"
+ open(STDERR,">","/dev/null");
+ open(STDIN,"<","/dev/null");
+ # Run a sleep that outputs data, so it will discover if the ssh connection closes.
+ my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}');
+ my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep);
+ exec(@master);
+ }
+ }
+ } else {
+ $sshcmd = "ssh"; $serverlogin = $self->{'string'};
+ }
+ }
+ $self->{'sshcommand'} = $sshcmd;
+ $self->{'serverlogin'} = $serverlogin;
+}
+
+sub control_path_dir {
+ # Returns:
+ # path to directory
+ my $self = shift;
+ if(not defined $self->{'control_path_dir'}) {
+ -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel";
+ -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp";
+ $self->{'control_path_dir'} =
+ File::Temp::tempdir($ENV{'HOME'}
+ . "/.parallel/tmp/control_path_dir-XXXX",
+ CLEANUP => 1);
+ }
+ return $self->{'control_path_dir'};
+}
+
+sub rsync_transfer_cmd {
+ # Command to run to transfer a file
+ # Input:
+ # $file = filename of file to transfer
+ # $workdir = destination dir
+ # Returns:
+ # $cmd = rsync command to run to transfer $file ("" if unreadable)
+ my $self = shift;
+ my $file = shift;
+ my $workdir = shift;
+ if(not -r $file) {
+ ::warning($file, " is not readable and will not be transferred.\n");
+ return "true";
+ }
+ my $rsync_destdir;
+ if($file =~ m:^/:) {
+ # rsync /foo/bar /
+ $rsync_destdir = "/";
+ } else {
+ $rsync_destdir = ::shell_quote_file($workdir);
+ }
+ $file = ::shell_quote_file($file);
+ my $sshcmd = $self->sshcommand();
+ my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd);
+ my $serverlogin = $self->serverlogin();
+ # Make dir if it does not exist
+ return "( $sshcmd $serverlogin mkdir -p $rsync_destdir;" .
+ rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )";
+}
+
+sub cleanup_cmd {
+ # Command to run to remove the remote file
+ # Input:
+ # $file = filename to remove
+ # $workdir = destination dir
+ # Returns:
+ # $cmd = ssh command to run to remove $file and empty parent dirs
+ my $self = shift;
+ my $file = shift;
+ my $workdir = shift;
+ my $f = $file;
+ if($f =~ m:/\./:) {
+ # foo/bar/./baz/quux => workdir/baz/quux
+ # /foo/bar/./baz/quux => workdir/baz/quux
+ $f =~ s:.*/\./:$workdir/:;
+ } elsif($f =~ m:^[^/]:) {
+ # foo/bar => workdir/foo/bar
+ $f = $workdir."/".$f;
+ }
+ my @subdirs = split m:/:, ::dirname($f);
+ my @rmdir;
+ my $dir = "";
+ for(@subdirs) {
+ $dir .= $_."/";
+ unshift @rmdir, ::shell_quote_file($dir);
+ }
+ my $rmdir = @rmdir ? "sh -c 'rmdir @rmdir 2>/dev/null';" : "";
+ if(defined $opt::workdir and $opt::workdir eq "...") {
+ $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';';
+ }
+
+ $f = ::shell_quote_file($f);
+ my $sshcmd = $self->sshcommand();
+ my $serverlogin = $self->serverlogin();
+ return "$sshcmd $serverlogin ".::shell_quote_scalar("(rm -f $f; $rmdir)");
+}
+
+{
+ my $rsync;
+
+ sub rsync {
+ # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7.
+ # If the version >= 3.1.0: downgrade to protocol 30
+ if(not $rsync) {
+ my @out = `rsync --version`;
+ for (@out) {
+ if(/version (\d+.\d+)(.\d+)?/) {
+ if($1 >= 3.1) {
+ # Version 3.1.0 or later: Downgrade to protocol 30
+ $rsync = "rsync --protocol 30";
+ } else {
+ $rsync = "rsync";
+ }
+ }
+ }
+ $rsync or ::die_bug("Cannot figure out version of rsync: @out");
+ }
+ return $rsync;
+ }
+}
+
+
+package JobQueue;
+
+sub new {
+ my $class = shift;
+ my $commandref = shift;
+ my $read_from = shift;
+ my $context_replace = shift;
+ my $max_number_of_args = shift;
+ my $return_files = shift;
+ my $commandlinequeue = CommandLineQueue->new
+ ($commandref, $read_from, $context_replace, $max_number_of_args,
+ $return_files);
+ my @unget = ();
+ return bless {
+ 'unget' => \@unget,
+ 'commandlinequeue' => $commandlinequeue,
+ 'total_jobs' => undef,
+ }, ref($class) || $class;
+}
+
+sub get {
+ my $self = shift;
+
+ if(@{$self->{'unget'}}) {
+ my $job = shift @{$self->{'unget'}};
+ return ($job);
+ } else {
+ my $commandline = $self->{'commandlinequeue'}->get();
+ if(defined $commandline) {
+ my $job = Job->new($commandline);
+ return $job;
+ } else {
+ return undef;
+ }
+ }
+}
+
+sub unget {
+ my $self = shift;
+ unshift @{$self->{'unget'}}, @_;
+}
+
+sub empty {
+ my $self = shift;
+ my $empty = (not @{$self->{'unget'}})
+ && $self->{'commandlinequeue'}->empty();
+ ::debug("run", "JobQueue->empty $empty ");
+ return $empty;
+}
+
+sub total_jobs {
+ my $self = shift;
+ if(not defined $self->{'total_jobs'}) {
+ my $job;
+ my @queue;
+ my $start = time;
+ while($job = $self->get()) {
+ if(time - $start > 10) {
+ ::warning("Reading ".scalar(@queue)." arguments took longer than 10 seconds.\n");
+ $opt::eta && ::warning("Consider removing --eta.\n");
+ $opt::bar && ::warning("Consider removing --bar.\n");
+ $opt::shuf && ::warning("Consider removing --shuf.\n");
+ last;
+ }
+ push @queue, $job;
+ }
+ while($job = $self->get()) {
+ push @queue, $job;
+ }
+ if($opt::shuf) {
+ my $i = @queue;
+ while (--$i) {
+ my $j = int rand($i+1);
+ @queue[$i,$j] = @queue[$j,$i];
+ }
+ my $seq = 1;
+ for my $job (@queue) {
+ $job->{'commandline'}->set_seq($seq++);
+ }
+ }
+ $self->unget(@queue);
+ $self->{'total_jobs'} = $#queue+1;
+ }
+ return $self->{'total_jobs'};
+}
+
+sub next_seq {
+ my $self = shift;
+
+ return $self->{'commandlinequeue'}->seq();
+}
+
+sub quote_args {
+ my $self = shift;
+ return $self->{'commandlinequeue'}->quote_args();
+}
+
+
+package Job;
+
+sub new {
+ my $class = shift;
+ my $commandlineref = shift;
+ return bless {
+ 'commandline' => $commandlineref, # CommandLine object
+ 'workdir' => undef, # --workdir
+ # filehandle for stdin (used for --pipe)
+ # filename for writing stdout to (used for --files)
+ # remaining data not sent to stdin (used for --pipe)
+ # amount of data sent via stdin (used for --pipe)
+ 'transfersize' => 0, # size of files using --transfer
+ 'returnsize' => 0, # size of files using --return
+ 'pid' => undef,
+ # hash of { SSHLogins => number of times the command failed there }
+ 'failed' => undef,
+ 'sshlogin' => undef,
+ # The commandline wrapped with rsync and ssh
+ 'sshlogin_wrap' => undef,
+ 'exitstatus' => undef,
+ 'exitsignal' => undef,
+ # Timestamp for timeout if any
+ 'timeout' => undef,
+ 'virgin' => 1,
+ }, ref($class) || $class;
+}
+
+sub replaced {
+ my $self = shift;
+ $self->{'commandline'} or ::die_bug("commandline empty");
+ return $self->{'commandline'}->replaced();
+}
+
+sub seq {
+ my $self = shift;
+ return $self->{'commandline'}->seq();
+}
+
+sub set_seq {
+ my $self = shift;
+ return $self->{'commandline'}->set_seq(shift);
+}
+
+sub slot {
+ my $self = shift;
+ return $self->{'commandline'}->slot();
+}
+
+{
+ my($cattail);
+
+ sub cattail {
+ # Returns:
+ # $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink]
+ if(not $cattail) {
+ $cattail = q{
+ # cat followed by tail (possibly with rm as soon at the file is opened)
+ # If $writerpid dead: finish after this round
+ use Fcntl;
+ $|=1;
+
+ my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV;
+ if($read_file) {
+ open(IN,"<",$read_file) || die("cattail: Cannot open $read_file");
+ } else {
+ *IN = *STDIN;
+ }
+ while(! -s $comfile) {
+ # Writer has not opened the buffer file, so we cannot remove it yet
+ $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep);
+ usleep($sleep);
+ }
+ # The writer and we have both opened the file, so it is safe to unlink it
+ unlink $unlink_file;
+ unlink $comfile;
+
+ my $first_round = 1;
+ my $flags;
+ fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
+ $flags |= O_NONBLOCK; # Add non-blocking to the flags
+ fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle
+
+ while(1) {
+ # clear EOF
+ seek(IN,0,1);
+ my $writer_running = kill 0, $writerpid;
+ $read = sysread(IN,$buf,32768);
+ if($read) {
+ if($first_round) {
+ # Only start the command if there any input to process
+ $first_round = 0;
+ open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd");
+ }
+
+ # Blocking print
+ while($buf) {
+ my $bytes_written = syswrite(OUT,$buf);
+ # syswrite may be interrupted by SIGHUP
+ substr($buf,0,$bytes_written) = "";
+ }
+ # Something printed: Wait less next time
+ $sleep /= 2;
+ } else {
+ if(eof(IN) and not $writer_running) {
+ # Writer dead: There will never be more to read => exit
+ exit;
+ }
+ # TODO This could probably be done more efficiently using select(2)
+ # Nothing read: Wait longer before next read
+ # Up to 100 milliseconds
+ $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep);
+ usleep($sleep);
+ }
+ }
+
+ sub usleep {
+ # Sleep this many milliseconds.
+ my $secs = shift;
+ select(undef, undef, undef, $secs/1000);
+ }
+ };
+ $cattail =~ s/#.*//mg;
+ $cattail =~ s/\s+/ /g;
+ }
+ return $cattail;
+ }
+}
+
+sub openoutputfiles {
+ # Open files for STDOUT and STDERR
+ # Set file handles in $self->fh
+ my $self = shift;
+ my ($outfhw, $errfhw, $outname, $errname);
+ if($opt::results) {
+ my $args_as_dirname = $self->{'commandline'}->args_as_dirname();
+ # Output in: prefix/name1/val1/name2/val2/stdout
+ my $dir = $opt::results."/".$args_as_dirname;
+ if(eval{ File::Path::mkpath($dir); }) {
+ # OK
+ } else {
+ # mkpath failed: Argument probably too long.
+ # Set $Global::max_file_length, which will keep the individual
+ # dir names shorter than the max length
+ max_file_name_length($opt::results);
+ $args_as_dirname = $self->{'commandline'}->args_as_dirname();
+ # prefix/name1/val1/name2/val2/
+ $dir = $opt::results."/".$args_as_dirname;
+ File::Path::mkpath($dir);
+ }
+ # prefix/name1/val1/name2/val2/stdout
+ $outname = "$dir/stdout";
+ if(not open($outfhw, "+>", $outname)) {
+ ::error("Cannot write to `$outname'.\n");
+ ::wait_and_exit(255);
+ }
+ # prefix/name1/val1/name2/val2/stderr
+ $errname = "$dir/stderr";
+ if(not open($errfhw, "+>", $errname)) {
+ ::error("Cannot write to `$errname'.\n");
+ ::wait_and_exit(255);
+ }
+ $self->set_fh(1,"unlink","");
+ $self->set_fh(2,"unlink","");
+ } elsif(not $opt::ungroup) {
+ # To group we create temporary files for STDOUT and STDERR
+ # To avoid the cleanup unlink the files immediately (but keep them open)
+ if(@Global::tee_jobs) {
+ # files must be removed when the tee is done
+ } elsif($opt::files) {
+ ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
+ ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
+ # --files => only remove stderr
+ $self->set_fh(1,"unlink","");
+ $self->set_fh(2,"unlink",$errname);
+ } else {
+ ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par");
+ ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par");
+ $self->set_fh(1,"unlink",$outname);
+ $self->set_fh(2,"unlink",$errname);
+ }
+ } else {
+ # --ungroup
+ open($outfhw,">&",$Global::fd{1}) || die;
+ open($errfhw,">&",$Global::fd{2}) || die;
+ # File name must be empty as it will otherwise be printed
+ $outname = "";
+ $errname = "";
+ $self->set_fh(1,"unlink",$outname);
+ $self->set_fh(2,"unlink",$errname);
+ }
+ # Set writing FD
+ $self->set_fh(1,'w',$outfhw);
+ $self->set_fh(2,'w',$errfhw);
+ $self->set_fh(1,'name',$outname);
+ $self->set_fh(2,'name',$errname);
+ if($opt::compress) {
+ $self->filter_through_compress();
+ } elsif(not $opt::ungroup) {
+ $self->grouped();
+ }
+ if($opt::linebuffer) {
+ $self->set_non_blocking();
+ }
+}
+
+sub grouped {
+ my $self = shift;
+ # Set reading FD if using --group (--ungroup does not need)
+ for my $fdno (1,2) {
+ # Re-open the file for reading
+ # so fdw can be closed seperately
+ # and fdr can be seeked seperately (for --line-buffer)
+ open(my $fdr,"<", $self->fh($fdno,'name')) ||
+ ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name'));
+ $self->set_fh($fdno,'r',$fdr);
+ # Unlink if required
+ $Global::debug or unlink $self->fh($fdno,"unlink");
+ }
+}
+
+sub empty_input_wrapper {
+ # If no input: exit(0)
+ # If some input: Pass input as input to command on STDIN
+ # This avoids starting the command if there is no input.
+ # Input:
+ # $command = command to pipe data to
+ # Returns:
+ # $wrapped_command = the wrapped command
+ my $command = shift;
+ my $script = '$c="'.::perl_quote_scalar($command).'";'.
+ ::spacefree(0,q{
+ if(sysread(STDIN, $buf, 1)) {
+ open($fh, "|-", $c) || die;
+ syswrite($fh, $buf);
+ while($read = sysread(STDIN, $buf, 32768)) {
+ syswrite($fh, $buf);
+ }
+ close $fh;
+ exit ($?&127 ? 128+($?&127) : 1+$?>>8)
+ }
+ });
+ ::debug("run",'Empty wrap: perl -e '.::shell_quote_scalar($script)."\n");
+ return 'perl -e '.::shell_quote_scalar($script);
+}
+
+sub filter_through_compress {
+ my $self = shift;
+ # Send stdout to stdin for $opt::compress_program(1)
+ # Send stderr to stdin for $opt::compress_program(2)
+ # cattail get pid: $pid = $self->fh($fdno,'rpid');
+ my $cattail = cattail();
+
+ for my $fdno (1,2) {
+ # Make a communication file.
+ my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac");
+ close $fh;
+ # Compressor: (echo > $comfile; compress pipe) > output
+ # When the echo is written to $comfile, it is known that output file is opened,
+ # thus output file can then be removed by the decompressor.
+ my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".empty_input_wrapper($opt::compress_program).") >".
+ $self->fh($fdno,'name')) || die $?;
+ $self->set_fh($fdno,'w',$fdw);
+ $self->set_fh($fdno,'wpid',$wpid);
+ # Decompressor: open output; -s $comfile > 0: rm $comfile output; decompress output > stdout
+ my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile,
+ $opt::decompress_program, $wpid,
+ $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?;
+ $self->set_fh($fdno,'r',$fdr);
+ $self->set_fh($fdno,'rpid',$rpid);
+ }
+}
+
+sub set_non_blocking {
+ my $self = shift;
+ $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
+ for my $fdno (1,2) {
+ my $fdr = $self->fh($fdno,'r');
+ my $flags;
+ fcntl($fdr, &::F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
+ $flags |= &::O_NONBLOCK; # Add non-blocking to the flags
+ fcntl($fdr, &::F_SETFL, $flags) || die $!; # Set the flags on the filehandle
+ }
+}
+
+sub max_file_name_length {
+ # Figure out the max length of a subdir
+ # TODO and the max total length
+ # Ext4 = 255,130816
+ my $testdir = shift;
+
+ my $upper = 8_000_000;
+ my $len = 8;
+ my $dir = "x"x$len;
+ do {
+ rmdir($testdir."/".$dir);
+ $len *= 16;
+ $dir = "x"x$len;
+ } while ($len < $upper and mkdir $testdir."/".$dir);
+ # Then search for the actual max length between $len/16 and $len
+ my $min = $len/16;
+ my $max = $len;
+ while($max-$min > 5) {
+ # If we are within 5 chars of the exact value:
+ # it is not worth the extra time to find the exact value
+ my $test = int(($min+$max)/2);
+ $dir = "x"x$test;
+ if(mkdir $testdir."/".$dir) {
+ rmdir($testdir."/".$dir);
+ $min = $test;
+ } else {
+ $max = $test;
+ }
+ }
+ $Global::max_file_length = $min;
+ return $min;
+}
+
+sub set_fh {
+ # Set file handle
+ my ($self, $fd_no, $key, $fh) = @_;
+ $self->{'fd'}{$fd_no,$key} = $fh;
+}
+
+sub fh {
+ # Get file handle
+ my ($self, $fd_no, $key) = @_;
+ return $self->{'fd'}{$fd_no,$key};
+}
+
+sub write {
+ my $self = shift;
+ my $remaining_ref = shift;
+ my $stdin_fh = $self->fh(0,"w");
+
+ my $len = length $$remaining_ref;
+ # syswrite may not write all in one go,
+ # so make sure everything is written.
+ while($len) {
+ my $written = syswrite($stdin_fh,$$remaining_ref);
+ substr($$remaining_ref,0,$written) = "";
+ $len -= $written;
+ }
+}
+
+sub set_stdin_buffer {
+ # Copy stdin buffer from $block_ref up to $endpos
+ # Prepend with $header_ref
+ # Remove $recstart and $recend if needed
+ # Input:
+ # $header_ref = ref to $header to prepend
+ # $block_ref = ref to $block to pass on
+ # $endpos = length of $block to pass on
+ # $recstart = --recstart regexp
+ # $recend = --recend regexp
+ # Returns:
+ # N/A
+ my $self = shift;
+ my ($header_ref,$block_ref,$endpos,$recstart,$recend) = @_;
+ $self->{'stdin_buffer'} = ($self->virgin() ? $$header_ref : "").substr($$block_ref,0,$endpos);
+ if($opt::remove_rec_sep) {
+ remove_rec_sep(\$self->{'stdin_buffer'},$recstart,$recend);
+ }
+ $self->{'stdin_buffer_length'} = length $self->{'stdin_buffer'};
+ $self->{'stdin_buffer_pos'} = 0;
+ $self->add_transfersize($self->{'stdin_buffer_length'});
+}
+
+sub stdin_buffer_length {
+ my $self = shift;
+ return $self->{'stdin_buffer_length'};
+}
+
+sub remove_rec_sep {
+ my ($block_ref,$recstart,$recend) = @_;
+ # Remove record separator
+ $$block_ref =~ s/$recend$recstart//gos;
+ $$block_ref =~ s/^$recstart//os;
+ $$block_ref =~ s/$recend$//os;
+}
+
+sub non_block_write {
+ my $self = shift;
+ my $something_written = 0;
+ use POSIX qw(:errno_h);
+ # for loop used to avoid copying substr: $buf will be an alias for the substr
+ for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) {
+ my $in = $self->fh(0,"w");
+ my $rv = syswrite($in, $buf);
+ if (!defined($rv) && $! == EAGAIN) {
+ # would block
+ $something_written = 0;
+ } elsif ($self->{'stdin_buffer_pos'}+$rv != $self->{'stdin_buffer_length'}) {
+ # incomplete write
+ # Remove the written part
+ $self->{'stdin_buffer_pos'} += $rv;
+ $something_written = $rv;
+ } else {
+ # successfully wrote everything
+ my $a = "";
+ $self->set_stdin_buffer(\$a,\$a,"","");
+ $something_written = $rv;
+ }
+ }
+
+ ::debug("pipe", "Non-block: ", $something_written);
+ return $something_written;
+}
+
+
+sub virgin {
+ my $self = shift;
+ return $self->{'virgin'};
+}
+
+sub set_virgin {
+ my $self = shift;
+ $self->{'virgin'} = shift;
+}
+
+sub pid {
+ my $self = shift;
+ return $self->{'pid'};
+}
+
+sub set_pid {
+ my $self = shift;
+ $self->{'pid'} = shift;
+}
+
+sub starttime {
+ # Returns:
+ # UNIX-timestamp this job started
+ my $self = shift;
+ return sprintf("%.3f",$self->{'starttime'});
+}
+
+sub set_starttime {
+ my $self = shift;
+ my $starttime = shift || ::now();
+ $self->{'starttime'} = $starttime;
+}
+
+sub runtime {
+ # Returns:
+ # Run time in seconds
+ my $self = shift;
+ return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000);
+}
+
+sub endtime {
+ # Returns:
+ # UNIX-timestamp this job ended
+ # 0 if not ended yet
+ my $self = shift;
+ return ($self->{'endtime'} || 0);
+}
+
+sub set_endtime {
+ my $self = shift;
+ my $endtime = shift;
+ $self->{'endtime'} = $endtime;
+}
+
+sub timedout {
+ # Is the job timedout?
+ # Input:
+ # $delta_time = time that the job may run
+ # Returns:
+ # True or false
+ my $self = shift;
+ my $delta_time = shift;
+ return time > $self->{'starttime'} + $delta_time;
+}
+
+sub kill {
+ # Kill the job.
+ # Send the signals to (grand)*children and pid.
+ # If no signals: TERM TERM KILL
+ # Wait 200 ms after each TERM.
+ # Input:
+ # @signals = signals to send
+ my $self = shift;
+ my @signals = @_;
+ my @family_pids = $self->family_pids();
+
+ # Record this jobs as failed
+ $self->set_exitstatus(-1);
+ # Send two TERMs to give time to clean up
+ ::debug("run", "Kill seq ", $self->seq(), " signal '@signals'\n");
+ my @send_signals = @signals || ("TERM", "TERM", "KILL");
+ for my $signal (@send_signals) {
+ my $alive = 0;
+ for my $pid (@family_pids) {
+ if(kill 0, $pid) {
+ # The job still running
+ kill $signal, $pid;
+ $alive = 1;
+ ::debug("run","$pid is alive\n");
+ }
+ }
+ # If a signal was given as input, do not do the sleep below
+ @signals and next;
+
+ if($signal eq "TERM" and $alive) {
+ # Wait up to 200 ms between TERMs - but only if any pids are alive
+ my $sleep = 1;
+ for (my $sleepsum = 0; kill 0, $family_pids[0] and $sleepsum < 200;
+ $sleepsum += $sleep) {
+ $sleep = ::reap_usleep($sleep);
+ }
+ }
+ }
+}
+
+sub family_pids {
+ # Find the pids with this->pid as (grand)*parent
+ # Returns:
+ # @pids = pids of (grand)*children
+ my $self = shift;
+ my $pid = $self->pid();
+ my @pids;
+
+ my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table();
+
+ my @more = ($pid);
+ # While more (grand)*children
+ while(@more) {
+ my @m;
+ push @pids, @more;
+ for my $parent (@more) {
+ if($children_of_ref->{$parent}) {
+ # add the children of this parent
+ push @m, @{$children_of_ref->{$parent}};
+ }
+ }
+ @more = @m;
+ }
+ return (@pids);
+}
+
+sub failed {
+ # return number of times failed for this $sshlogin
+ # Input:
+ # $sshlogin
+ # Returns:
+ # Number of times failed for $sshlogin
+ my $self = shift;
+ my $sshlogin = shift;
+ return $self->{'failed'}{$sshlogin};
+}
+
+sub failed_here {
+ # return number of times failed for the current $sshlogin
+ # Returns:
+ # Number of times failed for this sshlogin
+ my $self = shift;
+ return $self->{'failed'}{$self->sshlogin()};
+}
+
+sub add_failed {
+ # increase the number of times failed for this $sshlogin
+ my $self = shift;
+ my $sshlogin = shift;
+ $self->{'failed'}{$sshlogin}++;
+}
+
+sub add_failed_here {
+ # increase the number of times failed for the current $sshlogin
+ my $self = shift;
+ $self->{'failed'}{$self->sshlogin()}++;
+}
+
+sub reset_failed {
+ # increase the number of times failed for this $sshlogin
+ my $self = shift;
+ my $sshlogin = shift;
+ delete $self->{'failed'}{$sshlogin};
+}
+
+sub reset_failed_here {
+ # increase the number of times failed for this $sshlogin
+ my $self = shift;
+ delete $self->{'failed'}{$self->sshlogin()};
+}
+
+sub min_failed {
+ # Returns:
+ # the number of sshlogins this command has failed on
+ # the minimal number of times this command has failed
+ my $self = shift;
+ my $min_failures =
+ ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}});
+ my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}};
+ return ($number_of_sshlogins_failed_on,$min_failures);
+}
+
+sub total_failed {
+ # Returns:
+ # $total_failures = the number of times this command has failed
+ my $self = shift;
+ my $total_failures = 0;
+ for (values %{$self->{'failed'}}) {
+ $total_failures += $_;
+ }
+ return $total_failures;
+}
+
+{
+ my $script;
+
+ sub postpone_exit_and_cleanup {
+ # Command to remove files and dirs (given as args) without
+ # affecting the exit value in $?/$status.
+ if(not $script) {
+ $script = "perl -e '".
+ ::spacefree(0,q{
+ $bash=shift;
+ $csh=shift;
+ for(@ARGV){
+ unlink;
+ rmdir;
+ }
+ if($bash=~s/h//) {
+ exit $bash;
+ }
+ exit $csh;
+ }).
+ "' ".'"$?h" "$status" ';
+ }
+ return $script
+ }
+}
+
+sub wrapped {
+ # Wrap command with:
+ # * --shellquote
+ # * --nice
+ # * --cat
+ # * --fifo
+ # * --sshlogin
+ # * --pipepart (@Global::cat_partials)
+ # * --pipe
+ # * --tmux
+ # The ordering of the wrapping is important:
+ # * --nice/--cat/--fifo should be done on the remote machine
+ # * --pipepart/--pipe should be done on the local machine inside --tmux
+ # Uses:
+ # $Global::envvar
+ # $opt::shellquote
+ # $opt::nice
+ # $Global::shell
+ # $opt::cat
+ # $opt::fifo
+ # @Global::cat_partials
+ # $opt::pipe
+ # $opt::tmux
+ # Returns:
+ # $self->{'wrapped'} = the command wrapped with the above
+ my $self = shift;
+ if(not defined $self->{'wrapped'}) {
+ my $command = $self->replaced();
+ if($opt::shellquote) {
+ # Prepend echo
+ # and quote twice
+ $command = "echo " .
+ ::shell_quote_scalar(::shell_quote_scalar($command));
+ }
+ if($opt::nice) {
+ # Prepend \nice -n19 $SHELL -c
+ # and quote.
+ # The '\' before nice is needed to avoid tcsh's built-in
+ my $sshlogin = $self->sshlogin();
+ my $serverlogin = $sshlogin->serverlogin();
+ if($serverlogin eq ":") {
+ # Local use $Global::shell
+ $command = '\nice'. " -n". $opt::nice. " ".
+ $Global::shell. " -c ".
+ ::shell_quote_scalar($command);
+ } else {
+ # Remote systems use $SHELL
+ $command = '\nice'. " -n". $opt::nice.
+ ' $SHELL -c '.
+ ::shell_quote_scalar($command);
+ }
+ }
+ if($opt::cat) {
+# Append 'unlink {} without affecting $?'
+ $command =
+ $self->{'commandline'}->replace_placeholders(["cat > \257<\257>; "], 0, 0).
+ $command.";". postpone_exit_and_cleanup().
+ '$PARALLEL_TMP';
+ } elsif($opt::fifo) {
+ # Prepend 'mkfifo {}; ('
+ # Append ') & _PID=$!; cat > {}; wait $_PID; '
+ # (This makes it fail in csh, but give the correct exit code in bash)
+ # Append 'unlink {} without affecting $?'
+ # Set $ENV{PARALLEL_TMP} when starting a job
+ # Set $ENV{PARALLEL_TMP} in the remote wrapper
+ # mkfifo $PARALLEL_TMP;
+ # {} = $PARALLEL_TMP;
+ # (...) &
+ # cat > $PARALLEL_TMP; wait \$_PID; cleanup $PARALLEL_TMP
+ # perl -e 'open($fifo,">",shift); while(read){print FIFO};unlink $fifo;waitpid($pid,0);exit $?' $! $PARALLEL_FIFO
+ $command =
+ "mkfifo \$PARALLEL_TMP\n (".
+ $command.";".
+ ') & _PID=$!; cat > $PARALLEL_TMP; wait $_PID; '.
+ postpone_exit_and_cleanup().
+ '$PARALLEL_TMP';
+ }
+ # Wrap with ssh + tranferring of files
+ $command = $self->sshlogin_wrap($command);
+ if(@Global::cat_partials) {
+ # Prepend:
+ # < /tmp/foo perl -e 'while(@ARGV) {
+ # sysseek(STDIN,shift,0) || die; $left = shift;
+ # while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){
+ # $left -= $read; syswrite(STDOUT,$buf);
+ # }
+ # }' 0 0 0 11 |
+ $command = (shift @Global::cat_partials). " | ($command)";
+ } elsif($opt::pipe) {
+ # Wrap with EOF-detector to avoid starting $command if EOF.
+ $command = empty_input_wrapper($command);
+ }
+ if($opt::tmux) {
+ # Wrap command with 'tmux'
+ $command = $self->tmux_wrap($command);
+ }
+ $self->{'wrapped'} = $command;
+ }
+ return $self->{'wrapped'};
+}
+
+sub set_sshlogin {
+ my $self = shift;
+ my $sshlogin = shift;
+ $self->{'sshlogin'} = $sshlogin;
+ delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong
+ delete $self->{'wrapped'};
+}
+
+sub sshlogin {
+ my $self = shift;
+ return $self->{'sshlogin'};
+}
+
+sub string_zip_base64 {
+ # Pipe string through 'bzip2 -9' and base64 encode it into 1000
+ # byte blocks.
+ # 1000 bytes is the largest word size csh supports
+ # Input:
+ # @strings = to be encoded
+ # Returns:
+ # @base64 = 1000 byte block
+ my($zipin_fh, $zipout_fh,@base64);
+ ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9");
+ if(fork) {
+ close $zipin_fh;
+ $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;";
+ # Split base64 encoded into 1000 byte blocks
+ @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),""));
+ close $zipout_fh;
+ } else {
+ close $zipout_fh;
+ print $zipin_fh @_;
+ close $zipin_fh;
+ exit;
+ }
+ ::debug("base64","Orig:@_\nAs base64:@base64\n");
+ return @base64;
+}
+
+sub base64_zip_eval {
+ # Script that:
+ # * reads base64 strings from @ARGV
+ # * decodes them
+ # * pipes through 'bzip2 -dc'
+ # * evals the result
+ # Reverse of string_zip_base64 + eval
+ # Will be wrapped in ' so single quote is forbidden
+ # Returns:
+ # $script = 1-liner for perl -e
+ my $script = ::spacefree(0,q{
+ @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64");
+ eval "@GNU_Parallel";
+
+ $SIG{CHLD}="IGNORE";
+ # Search for bzip2. Not found => use default path
+ my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2";
+ # $in = stdin on $zip, $out = stdout from $zip
+ my($in, $out,$eval);
+ open3($in,$out,">&STDERR",$zip,"-dc");
+ if(my $perlpid = fork) {
+ close $in;
+ $eval = join "", <$out>;
+ close $out;
+ } else {
+ close $out;
+ # Pipe decoded base64 into 'bzip2 -dc'
+ print $in (decode_base64(join"",@ARGV));
+ close $in;
+ exit;
+ }
+ wait;
+ eval $eval;
+ });
+ ::debug("base64",$script,"\n");
+ return $script;
+}
+
+sub sshlogin_wrap {
+ # Wrap the command with the commands needed to run remotely
+ # Input:
+ # $command = command to run
+ # Returns:
+ # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands
+ sub monitor_parent_sshd_script {
+ # This script is to solve the problem of
+ # * not mixing STDERR and STDOUT
+ # * terminating with ctrl-c
+ # If its parent is ssh: all good
+ # If its parent is init(1): ssh died, so kill children
+ my $monitor_parent_sshd_script;
+
+ if(not $monitor_parent_sshd_script) {
+ $monitor_parent_sshd_script =
+ # This will be packed in ', so only use "
+ ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'.
+ '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'.
+ q{
+ # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR
+ do {
+ $ENV{PARALLEL_TMP} = $tmpdir."/par".
+ join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5);
+ } while(-e $ENV{PARALLEL_TMP});
+ $SIG{CHLD} = sub { $done = 1; };
+ $pid = fork;
+ unless($pid) {
+ # Make own process group to be able to kill HUP it later
+ setpgrp;
+ exec $shell, "-c", ($bashfunc."@ARGV");
+ die "exec: $!\n";
+ }
+ do {
+ # Parent is not init (ppid=1), so sshd is alive
+ # Exponential sleep up to 1 sec
+ $s = $s < 1 ? 0.001 + $s * 1.03 : $s;
+ select(undef, undef, undef, $s);
+ } until ($done || getppid == 1);
+ # Kill HUP the process group if job not done
+ kill(SIGHUP, -${pid}) unless $done;
+ wait;
+ exit ($?&127 ? 128+($?&127) : 1+$?>>8)
+ });
+ }
+ return $monitor_parent_sshd_script;
+ }
+
+ sub vars_to_export {
+ # Uses:
+ # @opt::env
+ my @vars = ("parallel_bash_environment");
+ for my $varstring (@opt::env) {
+ # Split up --env VAR1,VAR2
+ push @vars, split /,/, $varstring;
+ }
+ for (@vars) {
+ if(-r $_ and not -d) {
+ # Read as environment definition bug #44041
+ # TODO parse this
+ my $fh = ::open_or_exit($_);
+ $Global::envdef = join("",<$fh>);
+ close $fh;
+ }
+ }
+ if(grep { /^_$/ } @vars) {
+ # --env _
+ # Include all vars that are not in a clean environment
+ if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) {
+ my @ignore = <$vars_fh>;
+ chomp @ignore;
+ my %ignore;
+ @ignore{@ignore} = @ignore;
+ close $vars_fh;
+ push @vars, grep { not defined $ignore{$_} } keys %ENV;
+ @vars = grep { not /^_$/ } @vars;
+ } else {
+ ::error("Run '$Global::progname --record-env' in a clean environment first.\n");
+ ::wait_and_exit(255);
+ }
+ }
+ # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2)
+ # So --env myfunc should also look for BASH_FUNC_myfunc()
+ push(@vars, "PARALLEL_PID", "PARALLEL_SEQ",
+ map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars);
+ # Keep only defined variables
+ return grep { defined($ENV{$_}) } @vars;
+ }
+
+ sub env_as_eval {
+ # Returns:
+ # $eval = '$ENV{"..."}=...; ...'
+ my @vars = vars_to_export();
+ my $csh_friendly = not grep { /\n/ } @ENV{@vars};
+ my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars;
+ my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars;
+ # eval of @envset will set %ENV
+ my $envset = join"", map {
+ '$ENV{"'.::perl_quote_scalar($_).'"}="'. ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions;
+
+ # running @bashfunc on the command line, will set the functions
+ my @bashfunc = map {
+ my $v=$_; s/BASH_FUNC_(.*)(\(\)|%%)/$1/; "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions;
+ # eval $bashfuncset will set $bashfunc
+ my $bashfuncset;
+ if(@bashfunc) {
+ # Functions are not supported for all shells
+ if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) {
+ ::warning("Shell functions may not be supported in $Global::shell\n");
+ }
+ $bashfuncset =
+ '@bash_functions=qw('."@bash_functions".");".
+ ::spacefree(1,q{
+ if($ENV{"SHELL"}=~/csh/) {
+ print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n";
+ exec "false";
+ }
+ }).
+ "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";';
+ } else {
+ $bashfuncset = '$bashfunc = "";'
+ }
+ if($ENV{"parallel_bash_environment"}) {
+ $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";';
+ }
+ ::debug("base64",$envset,$bashfuncset,"\n");
+ return $csh_friendly,$envset,$bashfuncset;
+ }
+
+ my $self = shift;
+ my $command = shift;
+ # TODO test that *sh -c 'parallel --env' use *sh
+ if(not defined $self->{'sshlogin_wrap'}) {
+ my $sshlogin = $self->sshlogin();
+ my $serverlogin = $sshlogin->serverlogin();
+ my $quoted_remote_command;
+ $ENV{'PARALLEL_SEQ'} = $self->seq();
+ $ENV{'PARALLEL_PID'} = $$;
+ if($serverlogin eq ":") {
+ if(@opt::env) {
+ # Prepend with environment setter, which sets functions in zsh
+ my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
+ my $env_command = $envset.$bashfuncset.
+ '@ARGV="'.::perl_quote_scalar($command).'";'.
+ "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;";
+ if(length $env_command > 999
+ or
+ not $csh_friendly
+ or
+ $command =~ /\n/) {
+ # csh does not deal well with > 1000 chars in one word
+ # csh does not deal well with $ENV with \n
+ $env_command = "perl -e '".base64_zip_eval()."' ".
+ join" ",string_zip_base64($env_command);
+ $self->{'sshlogin_wrap'} = $env_command;
+ } else {
+ $self->{'sshlogin_wrap'} = "perl -e ".::shell_quote_scalar($env_command);
+ }
+ } else {
+ $self->{'sshlogin_wrap'} = $command;
+ }
+ } else {
+ my $pwd = "";
+ if($opt::workdir) {
+ # Create remote workdir if needed. Then cd to it.
+ my $wd = $self->workdir();
+ $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}.
+ qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;};
+ }
+ my ($csh_friendly,$envset,$bashfuncset) = env_as_eval();
+ my $remote_command = $pwd.$envset.$bashfuncset.
+ '@ARGV="'.::perl_quote_scalar($command).'";'. monitor_parent_sshd_script();
+ $quoted_remote_command = "perl -e ".::shell_quote_scalar($remote_command);
+ if(length $quoted_remote_command > 999
+ or
+ not $csh_friendly
+ or
+ $command =~ /\n/) {
+ # csh does not deal well with > 1000 chars in one word
+ # csh does not deal well with $ENV with \n
+ $quoted_remote_command = "perl -e \\''".base64_zip_eval()."'\\' ".
+ join" ",string_zip_base64($remote_command);
+ } else {
+ $quoted_remote_command = ::shell_quote_scalar($quoted_remote_command);
+ }
+
+ my $sshcmd = $sshlogin->sshcommand();
+ my ($pre,$post,$cleanup)=("","","");
+ # --transfer
+ $pre .= $self->sshtransfer();
+ # --return
+ $post .= $self->sshreturn();
+ # --cleanup
+ $post .= $self->sshcleanup();
+ if($post) {
+ # We need to save the exit status of the job
+ $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;';
+ }
+ $self->{'sshlogin_wrap'} =
+ ($pre
+ . "$sshcmd $serverlogin exec "
+ . $quoted_remote_command
+ . ";"
+ . $post);
+ }
+ }
+ return $self->{'sshlogin_wrap'};
+}
+
+sub transfer {
+ # Files to transfer
+ # Returns:
+ # @transfer - File names of files to transfer
+ my $self = shift;
+ my @transfer = ();
+ $self->{'transfersize'} = 0;
+ if($opt::transfer) {
+ for my $record (@{$self->{'commandline'}{'arg_list'}}) {
+ # Merge arguments from records into args
+ for my $arg (@$record) {
+ CORE::push @transfer, $arg->orig();
+ # filesize
+ if(-e $arg->orig()) {
+ $self->{'transfersize'} += (stat($arg->orig()))[7];
+ }
+ }
+ }
+ }
+ return @transfer;
+}
+
+sub transfersize {
+ my $self = shift;
+ return $self->{'transfersize'};
+}
+
+sub add_transfersize {
+ my $self = shift;
+ my $transfersize = shift;
+ $self->{'transfersize'} += $transfersize;
+}
+
+sub sshtransfer {
+ # Returns for each transfer file:
+ # rsync $file remote:$workdir
+ my $self = shift;
+ my @pre;
+ my $sshlogin = $self->sshlogin();
+ my $workdir = $self->workdir();
+ for my $file ($self->transfer()) {
+ push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";";
+ }
+ return join("",@pre);
+}
+
+sub return {
+ # Files to return
+ # Non-quoted and with {...} substituted
+ # Returns:
+ # @non_quoted_filenames
+ my $self = shift;
+ return $self->{'commandline'}->
+ replace_placeholders($self->{'commandline'}{'return_files'},0,0);
+}
+
+sub returnsize {
+ # This is called after the job has finished
+ # Returns:
+ # $number_of_bytes transferred in return
+ my $self = shift;
+ for my $file ($self->return()) {
+ if(-e $file) {
+ $self->{'returnsize'} += (stat($file))[7];
+ }
+ }
+ return $self->{'returnsize'};
+}
+
+sub add_returnsize {
+ my $self = shift;
+ my $returnsize = shift;
+ $self->{'returnsize'} += $returnsize;
+}
+
+sub sshreturn {
+ # Returns for each return-file:
+ # rsync remote:$workdir/$file .
+ my $self = shift;
+ my $sshlogin = $self->sshlogin();
+ my $sshcmd = $sshlogin->sshcommand();
+ my $serverlogin = $sshlogin->serverlogin();
+ my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd);
+ my $pre = "";
+ for my $file ($self->return()) {
+ $file =~ s:^\./::g; # Remove ./ if any
+ my $relpath = ($file !~ m:^/:); # Is the path relative?
+ my $cd = "";
+ my $wd = "";
+ if($relpath) {
+ # rsync -avR /foo/./bar/baz.c remote:/tmp/
+ # == (on old systems)
+ # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/
+ $wd = ::shell_quote_file($self->workdir()."/");
+ }
+ # Only load File::Basename if actually needed
+ $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;";
+ # dir/./file means relative to dir, so remove dir on remote
+ $file =~ m:(.*)/\./:;
+ my $basedir = $1 ? ::shell_quote_file($1."/") : "";
+ my $nobasedir = $file;
+ $nobasedir =~ s:.*/\./::;
+ $cd = ::shell_quote_file(::dirname($nobasedir));
+ my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync");
+ my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file)));
+ # --return
+ # mkdir -p /home/tange/dir/subdir/;
+ # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync"
+ # server:file.gz /home/tange/dir/subdir/
+ $pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:".
+ $basename . " ".$basedir.$cd.";";
+ }
+ return $pre;
+}
+
+sub sshcleanup {
+ # Return the sshcommand needed to remove the file
+ # Returns:
+ # ssh command needed to remove files from sshlogin
+ my $self = shift;
+ my $sshlogin = $self->sshlogin();
+ my $sshcmd = $sshlogin->sshcommand();
+ my $serverlogin = $sshlogin->serverlogin();
+ my $workdir = $self->workdir();
+ my $cleancmd = "";
+
+ for my $file ($self->cleanup()) {
+ my @subworkdirs = parentdirs_of($file);
+ $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";";
+ }
+ if(defined $opt::workdir and $opt::workdir eq "...") {
+ $cleancmd .= "$sshcmd $serverlogin rm -rf " . ::shell_quote_scalar($workdir).';';
+ }
+ return $cleancmd;
+}
+
+sub cleanup {
+ # Returns:
+ # Files to remove at cleanup
+ my $self = shift;
+ if($opt::cleanup) {
+ my @transfer = $self->transfer();
+ my @return = $self->return();
+ return (@transfer,@return);
+ } else {
+ return ();
+ }
+}
+
+sub workdir {
+ # Returns:
+ # the workdir on a remote machine
+ my $self = shift;
+ if(not defined $self->{'workdir'}) {
+ my $workdir;
+ if(defined $opt::workdir) {
+ if($opt::workdir eq ".") {
+ # . means current dir
+ my $home = $ENV{'HOME'};
+ eval 'use Cwd';
+ my $cwd = cwd();
+ $workdir = $cwd;
+ if($home) {
+ # If homedir exists: remove the homedir from
+ # workdir if cwd starts with homedir
+ # E.g. /home/foo/my/dir => my/dir
+ # E.g. /tmp/my/dir => /tmp/my/dir
+ my ($home_dev, $home_ino) = (stat($home))[0,1];
+ my $parent = "";
+ my @dir_parts = split(m:/:,$cwd);
+ my $part;
+ while(defined ($part = shift @dir_parts)) {
+ $part eq "" and next;
+ $parent .= "/".$part;
+ my ($parent_dev, $parent_ino) = (stat($parent))[0,1];
+ if($parent_dev == $home_dev and $parent_ino == $home_ino) {
+ # dev and ino is the same: We found the homedir.
+ $workdir = join("/",@dir_parts);
+ last;
+ }
+ }
+ }
+ if($workdir eq "") {
+ $workdir = ".";
+ }
+ } elsif($opt::workdir eq "...") {
+ $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$
+ . "-" . $self->seq();
+ } else {
+ $workdir = $opt::workdir;
+ # Rsync treats /./ special. We dont want that
+ $workdir =~ s:/\./:/:g; # Remove /./
+ $workdir =~ s:/+$::; # Remove ending / if any
+ $workdir =~ s:^\./::g; # Remove starting ./ if any
+ }
+ } else {
+ $workdir = ".";
+ }
+ $self->{'workdir'} = ::shell_quote_scalar($workdir);
+ }
+ return $self->{'workdir'};
+}
+
+sub parentdirs_of {
+ # Return:
+ # all parentdirs except . of this dir or file - sorted desc by length
+ my $d = shift;
+ my @parents = ();
+ while($d =~ s:/[^/]+$::) {
+ if($d ne ".") {
+ push @parents, $d;
+ }
+ }
+ return @parents;
+}
+
+sub start {
+ # Setup STDOUT and STDERR for a job and start it.
+ # Returns:
+ # job-object or undef if job not to run
+ my $job = shift;
+ # Get the shell command to be executed (possibly with ssh infront).
+ my $command = $job->wrapped();
+
+ if($Global::interactive or $Global::stderr_verbose) {
+ $command = interactive_start($command);
+ }
+ my $pid;
+ $job->openoutputfiles();
+ my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w"));
+ local (*IN,*OUT,*ERR);
+ open OUT, '>&', $stdout_fh or ::die_bug("Can't redirect STDOUT: $!");
+ open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDOUT: $!");
+
+ if($opt::ungroup) {
+ print_dryrun_and_verbose($stdout_fh,$job,$command);
+ }
+ if($opt::dryrun) {
+ $command = "true";
+ }
+ $ENV{'PARALLEL_SEQ'} = $job->seq();
+ $ENV{'PARALLEL_PID'} = $$;
+ $ENV{'PARALLEL_TMP'} = ::tmpname("par");
+ ::debug("run", $Global::total_running, " processes . Starting (",
+ $job->seq(), "): $command\n");
+ if($opt::pipe) {
+ my ($stdin_fh);
+ # The eval is needed to catch exception from open3
+ eval {
+ $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
+ ::die_bug("open3-pipe");
+ 1;
+ };
+ $job->set_fh(0,"w",$stdin_fh);
+ } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1
+ and $job->sshlogin()->string() eq ":") {
+ # Give STDIN to the first job if using -a (but only if running
+ # locally - otherwise CTRL-C does not work for other jobs Bug#36585)
+ *IN = *STDIN;
+ # The eval is needed to catch exception from open3
+ eval {
+ $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
+ ::die_bug("open3-a");
+ 1;
+ };
+ # Re-open to avoid complaining
+ open(STDIN, "<&", $Global::original_stdin)
+ or ::die_bug("dup-\$Global::original_stdin: $!");
+ } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and
+ open(my $devtty_fh, "<", "/dev/tty")) {
+ # Give /dev/tty to the command if no one else is using it
+ *IN = $devtty_fh;
+ # The eval is needed to catch exception from open3
+ eval {
+ $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
+ ::die_bug("open3-/dev/tty");
+ $Global::tty_taken = $pid;
+ close $devtty_fh;
+ 1;
+ };
+ } else {
+ # The eval is needed to catch exception from open3
+ eval {
+ $pid = ::open3(::gensym, ">&OUT", ">&ERR", $Global::shell, "-c", $command) ||
+ ::die_bug("open3-gensym");
+ 1;
+ };
+ }
+ if($pid) {
+ # A job was started
+ $Global::total_running++;
+ $Global::total_started++;
+ $job->set_pid($pid);
+ $job->set_starttime();
+ $Global::running{$job->pid()} = $job;
+ if($opt::timeout) {
+ $Global::timeoutq->insert($job);
+ }
+ $Global::newest_job = $job;
+ $Global::newest_starttime = ::now();
+ return $job;
+ } else {
+ # No more processes
+ ::debug("run", "Cannot spawn more jobs.\n");
+ return undef;
+ }
+}
+
+sub interactive_start {
+ my $command = shift;
+ if($Global::interactive) {
+ ::status("$command ?...");
+ open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty");
+ my $answer = <$tty_fh>;
+ close $tty_fh;
+ my $run_yes = ($answer =~ /^\s*y/i);
+ if (not $run_yes) {
+ $command = "true"; # Run the command 'true'
+ }
+ } else {
+ print $Global::original_stderr "$command\n";
+ }
+ return $command;
+}
+
+sub print_dryrun_and_verbose {
+ # For $opt::ungroup we print these ASAP
+ # For $opt::group they are part of print()
+ my $stdout_fh = shift;
+ my $job = shift;
+ my $command = shift;
+ if($opt::dryrun or $Global::verbose) {
+ if($Global::verbose <= 1) {
+ print $stdout_fh $job->replaced(),"\n";
+ } else {
+ # Verbose level > 1: Print the rsync and stuff
+ print $stdout_fh $command,"\n";
+ }
+ }
+}
+
+{
+ my $tmuxsocket;
+
+ sub tmux_wrap {
+ # Wrap command with tmux for session pPID
+ # Input:
+ # $actual_command = the actual command being run (incl ssh wrap)
+ my $self = shift;
+ my $actual_command = shift;
+ # Temporary file name. Used for fifo to communicate exit val
+ my $tmpfifo=::tmpname("tmx");
+
+ if(length($tmpfifo) >=100) {
+ ::error("tmux does not support sockets with path > 100\n");
+ ::wait_and_exit(255);
+ }
+ my $visual_command = $self->replaced();
+ my $title = $visual_command;
+ if($visual_command =~ /\0/) {
+ ::error("Command line contains NUL. tmux is confused by NUL.\n");
+ ::wait_and_exit(255);
+ }
+ # ; causes problems
+ # ascii 194-245 annoys tmux
+ $title =~ tr/[\011-\016;\302-\365]//d;
+ $title = ::shell_quote_scalar($title);
+
+ my $l_act = length($actual_command);
+ my $l_tit = length($title);
+ my $l_fifo = length($tmpfifo);
+ # The line to run contains a 118 chars extra code + the title 2x
+ my $l_tot = 2 * $l_tit + $l_act + $l_fifo;
+
+ while($l_tit < 1000 and
+ (
+ (890 < $l_tot and $l_tot < 1350)
+ or
+ (9250 < $l_tot and $l_tot < 9800)
+ )) {
+ # tmux blocks for certain lengths:
+ # 900 < title + command < 1200
+ # 9250 < title + command < 9800
+ # but only if title < 1000, so expand the title with 75 spaces
+ # The measured lengths are:
+ # 996 < (title + whole command) < 1127
+ # 9331 < (title + whole command) < 9636
+ $title = $title.('\ 'x75);
+ $l_tit = length($title);
+ $l_tot = 2 * $l_tit + $l_act + $l_fifo;
+ }
+
+ my $tmux;
+ $ENV{'TMUX'} ||= "tmux";
+ if(not $tmuxsocket) {
+ $tmuxsocket = ::tmpname("tms");
+ ::status("See output with: $ENV{'TMUX'} -S $tmuxsocket attach\n");
+ }
+ # TODO sh -c wrapper for >&
+ $tmux = $ENV{'TMUX'}." -S $tmuxsocket new-session -s p$$ -d 'sleep .2' >&/dev/null;" .
+ $ENV{'TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title";
+
+ ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ",
+ $Limits::Command::line_max_len, " tot ",
+ $l_tot, "\n");
+
+ return "mkfifo $tmpfifo && $tmux ".
+ # Run in tmux
+ ::shell_quote_scalar
+ (
+ "(".$actual_command.');'.
+ # The triple print is needed - otherwise the testsuite fails
+ q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&".
+ "echo $title; echo \007Job finished at: `date`;sleep 10"
+ ).
+ # Run outside tmux
+ # Read a / separated line: 0h/2 for csh, 2/0 for bash.
+ # If csh the first will be 0h, so use the second as exit value.
+ # Otherwise just use the first value as exit value.
+ q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo;
+ }
+}
+
+sub is_already_in_results {
+ # Do we already have results for this job?
+ # Returns:
+ # $job_already_run = bool whether there is output for this or not
+ my $job = $_[0];
+ my $args_as_dirname = $job->{'commandline'}->args_as_dirname();
+ # prefix/name1/val1/name2/val2/
+ my $dir = $opt::results."/".$args_as_dirname;
+ ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n");
+ return -e "$dir/stdout";
+}
+
+sub is_already_in_joblog {
+ my $job = shift;
+ return vec($Global::job_already_run,$job->seq(),1);
+}
+
+sub set_job_in_joblog {
+ my $job = shift;
+ vec($Global::job_already_run,$job->seq(),1) = 1;
+}
+
+sub should_be_retried {
+ # Should this job be retried?
+ # Returns
+ # 0 - do not retry
+ # 1 - job queued for retry
+ my $self = shift;
+ if (not $opt::retries) {
+ return 0;
+ }
+ if(not $self->exitstatus()) {
+ # Completed with success. If there is a recorded failure: forget it
+ $self->reset_failed_here();
+ return 0
+ } else {
+ # The job failed. Should it be retried?
+ $self->add_failed_here();
+ if($self->total_failed() == $opt::retries) {
+ # This has been retried enough
+ return 0;
+ } else {
+ # This command should be retried
+ $self->set_endtime(undef);
+ $self->reset_exitstatus();
+ $Global::JobQueue->unget($self);
+ ::debug("run", "Retry ", $self->seq(), "\n");
+ return 1;
+ }
+}
+}
+
+{
+ my (%print_later,$job_end_sequence);
+
+ sub print_earlier_jobs {
+ # Print jobs completed earlier
+ # Returns: N/A
+ my $job = shift;
+ $print_later{$job->seq()} = $job;
+ $job_end_sequence ||= 1;
+ ::debug("run", "Looking for: $job_end_sequence ",
+ "Current: ", $job->seq(), "\n");
+ for(my $j = $print_later{$job_end_sequence};
+ $j or vec($Global::job_already_run,$job_end_sequence,1);
+ $job_end_sequence++,
+ $j = $print_later{$job_end_sequence}) {
+ ::debug("run", "Found job end $job_end_sequence");
+ if($j) {
+ $j->print();
+ delete $print_later{$job_end_sequence};
+ }
+ }
+ }
+}
+
+sub print {
+ # Print the output of the jobs
+ # Returns: N/A
+
+ my $self = shift;
+ ::debug("print", ">>joboutput ", $self->replaced(), "\n");
+ if($opt::dryrun) {
+ # Nothing was printed to this job:
+ # cleanup tmp files if --files was set
+ unlink $self->fh(1,"name");
+ }
+ if($opt::pipe and $self->virgin()) {
+ # Skip --joblog, --dryrun, --verbose
+ } else {
+ if($opt::ungroup and $Global::joblog and defined $self->{'exitstatus'}) {
+ # Add to joblog when finished
+ $self->print_joblog();
+ # Printing is only relevant for grouped/--line-buffer output.
+ $opt::ungroup and return;
+ }
+
+ # Check for disk full
+ ::exit_if_disk_full();
+
+ if(($opt::dryrun or $Global::verbose)
+ and
+ not $self->{'verbose_printed'}) {
+ $self->{'verbose_printed'}++;
+ if($Global::verbose <= 1) {
+ print STDOUT $self->replaced(),"\n";
+ } else {
+ # Verbose level > 1: Print the rsync and stuff
+ print STDOUT $self->wrapped(),"\n";
+ }
+ # If STDOUT and STDERR are merged,
+ # we want the command to be printed first
+ # so flush to avoid STDOUT being buffered
+ flush STDOUT;
+ }
+ }
+ for my $fdno (sort { $a <=> $b } keys %Global::fd) {
+ # Sort by file descriptor numerically: 1,2,3,..,9,10,11
+ $fdno == 0 and next;
+ my $out_fd = $Global::fd{$fdno};
+ my $in_fh = $self->fh($fdno,"r");
+ if(not $in_fh) {
+ if(not $Job::file_descriptor_warning_printed{$fdno}++) {
+ # ::warning("File descriptor $fdno not defined\n");
+ }
+ next;
+ }
+ ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n");
+ if($opt::files) {
+ $self->files_print($fdno,$in_fh,$out_fd);
+ } elsif($opt::linebuffer) {
+ # Line buffered print out
+ $self->linebuffer_print($fdno,$in_fh,$out_fd);
+ } elsif($opt::tag or defined $opt::tagstring) {
+ $self->tag_print($fdno,$in_fh,$out_fd);
+ } else {
+ $self->normal_print($fdno,$in_fh,$out_fd);
+ }
+ flush $out_fd;
+ }
+ ::debug("print", "<{'exitstatus'}
+ and not ($self->virgin() and $opt::pipe)) {
+ # Add to joblog when finished
+ $self->print_joblog();
+ }
+}
+
+sub files_print {
+ my $self = shift;
+ my ($fdno,$in_fh,$out_fd) = @_;
+
+ # If the job is dead: close printing fh. Needed for --compress
+ close $self->fh($fdno,"w");
+ if($? and $opt::compress) {
+ ::error($opt::compress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+ if($opt::compress) {
+ # Kill the decompressor which will not be needed
+ CORE::kill "TERM", $self->fh($fdno,"rpid");
+ }
+ close $in_fh;
+
+ if($opt::pipe and $self->virgin()) {
+ # Nothing was printed to this job:
+ # cleanup unused tmp files if --files was set
+ for my $fdno (1,2) {
+ unlink $self->fh($fdno,"name");
+ unlink $self->fh($fdno,"unlink");
+ }
+ } elsif($fdno == 1 and $self->fh($fdno,"name")) {
+ print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n";
+ $self->add_returnsize(-s $self->fh($fdno,"name"));
+ }
+}
+
+sub linebuffer_print {
+ my $self = shift;
+ my ($fdno,$in_fh,$out_fd) = @_;
+ my $partial = \$self->{'partial_line',$fdno};
+
+ if(defined $self->{'exitstatus'}) {
+ # If the job is dead: close printing fh. Needed for --compress
+ close $self->fh($fdno,"w");
+ if($? and $opt::compress) {
+ ::error($opt::compress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+ if($opt::compress) {
+ # Blocked reading in final round
+ $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
+ for my $fdno (1,2) {
+ my $fdr = $self->fh($fdno,'r');
+ my $flags;
+ fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle
+ $flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags
+ fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle
+ }
+ }
+ }
+ # This seek will clear EOF
+ seek $in_fh, tell($in_fh), 0;
+ # The read is non-blocking: The $in_fh is set to non-blocking.
+ # 32768 --tag = 5.1s
+ # 327680 --tag = 4.4s
+ # 1024000 --tag = 4.4s
+ # 3276800 --tag = 4.3s
+ # 10240000 --tag = 4.3s
+ # 32768000 --tag = 4.7s
+ my $outputlength = 0;
+ while(read($in_fh,substr($$partial,length $$partial),3276800)) {
+ # Append to $$partial
+ # Find the last \n
+ my $i = ::rindex64($partial,"\n");
+ if($i != -1) {
+ # One or more complete lines were found
+ if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) {
+ # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
+ # This is a crappy way of ignoring it.
+ $$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//;
+ # Length of partial line has changed: Find the last \n again
+ $i = ::rindex64($partial,"\n");
+ }
+ $outputlength += $i+1;
+ if($opt::tag or defined $opt::tagstring) {
+ # Replace ^ with $tag within the full line
+ my $tag = $self->tag();
+ substr($$partial,0,$i+1) =~ s/^/$tag/gm;
+ # Length of partial line has changed: Find the last \n again
+ $i = ::rindex64($partial,"\n");
+ }
+ # Print up to and including the last \n
+ print $out_fd substr($$partial,0,$i+1);
+ # Remove the printed part
+ substr($$partial,0,$i+1) = "";
+ }
+ }
+ $self->add_returnsize($outputlength);
+ if(defined $self->{'exitstatus'}) {
+ # If the job is dead: print the remaining partial line
+ # read remaining
+ $self->add_returnsize(length $$partial);
+ if($$partial and ($opt::tag or defined $opt::tagstring)) {
+ my $tag = $self->tag();
+ $$partial =~ s/^/$tag/gm;
+ }
+ print $out_fd $$partial;
+ # Release the memory
+ $$partial = undef;
+ if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) {
+ # decompress still running
+ } else {
+ # decompress done: close fh
+ close $in_fh;
+ if($? and $opt::compress) {
+ ::error($opt::decompress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+ }
+ }
+}
+
+sub tag_print {
+ my $self = shift;
+ my ($fdno,$in_fh,$out_fd) = @_;
+ my $buf;
+ close $self->fh($fdno,"w");
+ if($? and $opt::compress) {
+ ::error($opt::compress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+ seek $in_fh, 0, 0;
+ # $in_fh is now ready for reading at position 0
+ my $tag = $self->tag();
+ if($fdno == 2) {
+ # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
+ # This is a crappy way of ignoring it.
+ while(<$in_fh>) {
+ if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) {
+ # Skip
+ } else {
+ $self->add_returnsize(length $_);
+ print $out_fd $tag,$_;
+ }
+ # At most run the loop once
+ last;
+ }
+ }
+ my $outputlength = 0;
+ while(<$in_fh>) {
+ print $out_fd $tag,$_;
+ $outputlength += length $_;
+ }
+ if($fdno == 1) {
+ $self->add_returnsize($outputlength);
+ }
+ close $in_fh;
+ if($? and $opt::compress) {
+ ::error($opt::decompress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+}
+
+sub normal_print {
+ my $self = shift;
+ my ($fdno,$in_fh,$out_fd) = @_;
+ my $buf;
+ close $self->fh($fdno,"w");
+ if($? and $opt::compress) {
+ ::error($opt::compress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+ seek $in_fh, 0, 0;
+ # $in_fh is now ready for reading at position 0
+ if($fdno == 2) {
+ # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt
+ # This is a crappy way of ignoring it.
+ sysread($in_fh,$buf,1_000);
+ $buf =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//;
+ print $out_fd $buf;
+ $self->add_returnsize(length $buf);
+ }
+ my $outputlength = 0;
+ while(sysread($in_fh,$buf,32768)) {
+ print $out_fd $buf;
+ $outputlength += length $buf;
+ }
+ if($fdno == 1) {
+ $self->add_returnsize($outputlength);
+ }
+ close $in_fh;
+ if($? and $opt::compress) {
+ ::error($opt::decompress_program." failed.\n");
+ $self->set_exitstatus(255);
+ }
+}
+
+sub print_joblog {
+ my $self = shift;
+ my $cmd;
+ if($Global::verbose <= 1) {
+ $cmd = $self->replaced();
+ } else {
+ # Verbose level > 1: Print the rsync and stuff
+ $cmd = "@command";
+ }
+ print $Global::joblog
+ join("\t", $self->seq(), $self->sshlogin()->string(),
+ $self->starttime(), sprintf("%10.3f",$self->runtime()),
+ $self->transfersize(), $self->returnsize(),
+ $self->exitstatus(), $self->exitsignal(), $cmd
+ ). "\n";
+ flush $Global::joblog;
+ $self->set_job_in_joblog();
+}
+
+sub tag {
+ my $self = shift;
+ if($opt::tag or defined $opt::tagstring) {
+ if(not defined $self->{'tag'}) {
+ $self->{'tag'} = $self->{'commandline'}->
+ replace_placeholders([$opt::tagstring],0,0)."\t";
+ }
+ } else {
+ return "";
+ }
+ return $self->{'tag'};
+}
+
+sub hostgroups {
+ my $self = shift;
+ if(not defined $self->{'hostgroups'}) {
+ $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'};
+ }
+ return @{$self->{'hostgroups'}};
+}
+
+sub exitstatus {
+ my $self = shift;
+ return $self->{'exitstatus'};
+}
+
+sub set_exitstatus {
+ my $self = shift;
+ my $exitstatus = shift;
+ if($exitstatus) {
+ # Overwrite status if non-zero
+ $self->{'exitstatus'} = $exitstatus;
+ } else {
+ # Set status but do not overwrite
+ # Status may have been set by --timeout
+ $self->{'exitstatus'} ||= $exitstatus;
+ }
+}
+
+sub reset_exitstatus {
+ my $self = shift;
+ $self->{'exitstatus'} = undef;
+}
+
+sub exitsignal {
+ my $self = shift;
+ return $self->{'exitsignal'};
+}
+
+sub set_exitsignal {
+ my $self = shift;
+ my $exitsignal = shift;
+ $self->{'exitsignal'} = $exitsignal;
+}
+
+
+{
+ my $status_printed;
+
+ sub should_we_halt {
+ # Should we halt? Immediately? Gracefully?
+ # Returns: N/A
+ my $job = shift;
+ if($job->exitstatus() or $job->exitsignal()) {
+ $Global::exitstatus++;
+ $Global::total_failed++;
+ if($opt::halt) {
+ if($opt::halt == 1
+ or
+ ($opt::halt > 0 and $opt::halt < 1 and $Global::total_failed > 3
+ and
+ $Global::total_failed / $Global::total_started > $opt::halt)) {
+ # If halt on error == 1 or --halt 10%
+ # we should gracefully exit
+ ::status
+ ("$Global::progname: Starting no more jobs. ",
+ "Waiting for ", scalar(keys %Global::running),
+ " jobs to finish. This job failed:\n",
+ $job->replaced(),"\n");
+ $Global::start_no_new_jobs ||= 1;
+ $Global::halt_exitstatus = $job->exitstatus();
+ } elsif($opt::halt == 2) {
+ # If halt on error == 2 we should exit immediately
+ if(not $status_printed++) {
+ ::status
+ ("$Global::progname: This job failed:\n",
+ $job->replaced(),"\n");
+ }
+ ::killall();
+ ::wait_and_exit($job->exitstatus());
+ }
+ }
+ } else {
+ if($opt::halt) {
+ if($opt::halt == -1) {
+ # If halt on error == -1
+ # we should gracefully exit
+ ::status
+ ("$Global::progname: Starting no more jobs. ",
+ "Waiting for ", scalar(keys %Global::running),
+ " jobs to finish. This job succeeded:\n",
+ $job->replaced(),"\n");
+ $Global::start_no_new_jobs ||= 1;
+ $Global::halt_exitstatus = $job->exitstatus();
+ } elsif($opt::halt == -2) {
+ # If halt on error == -2 we should exit immediately
+ ::status
+ ("$Global::progname: This job succeeded:\n",
+ $job->replaced(),"\n");
+ ::killall();
+ ::wait_and_exit($job->exitstatus());
+ }
+ }
+ }
+ }
+}
+
+package CommandLine;
+
+sub new {
+ my $class = shift;
+ my $seq = shift;
+ my $commandref = shift;
+ $commandref || die;
+ my $arg_queue = shift;
+ my $context_replace = shift;
+ my $max_number_of_args = shift; # for -N and normal (-n1)
+ my $return_files = shift;
+ my $replacecount_ref = shift;
+ my $len_ref = shift;
+ my %replacecount = %$replacecount_ref;
+ my %len = %$len_ref;
+ for (keys %$replacecount_ref) {
+ # Total length of this replacement string {} replaced with all args
+ $len{$_} = 0;
+ }
+ return bless {
+ 'command' => $commandref,
+ 'seq' => $seq,
+ 'len' => \%len,
+ 'arg_list' => [],
+ 'arg_queue' => $arg_queue,
+ 'max_number_of_args' => $max_number_of_args,
+ 'replacecount' => \%replacecount,
+ 'context_replace' => $context_replace,
+ 'return_files' => $return_files,
+ 'replaced' => undef,
+ }, ref($class) || $class;
+}
+
+sub seq {
+ my $self = shift;
+ return $self->{'seq'};
+}
+
+sub set_seq {
+ my $self = shift;
+ $self->{'seq'} = shift;
+}
+
+{
+ my $max_slot_number;
+
+ sub slot {
+ # Find the number of a free job slot and return it
+ # Uses:
+ # @Global::slots - list with free jobslots
+ # Returns:
+ # $jobslot = number of jobslot
+ my $self = shift;
+ if(not $self->{'slot'}) {
+ if(not @Global::slots) {
+ # $Global::max_slot_number will typically be $Global::max_jobs_running
+ push @Global::slots, ++$max_slot_number;
+ }
+ $self->{'slot'} = shift @Global::slots;
+ }
+ return $self->{'slot'};
+ }
+}
+
+sub populate {
+ # Add arguments from arg_queue until the number of arguments or
+ # max line length is reached
+ # Uses:
+ # $Global::minimal_command_line_length
+ # $opt::cat
+ # $opt::fifo
+ # $Global::JobQueue
+ # $opt::m
+ # $opt::X
+ # $CommandLine::already_spread
+ # $Global::max_jobs_running
+ # Returns: N/A
+ my $self = shift;
+ my $next_arg;
+ my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length();
+
+ if($opt::cat) {
+ # $PARALLEL_TMP will point to a tempfile that will be used as {}
+ $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
+ unget([Arg->new('$PARALLEL_TMP')]);
+ }
+ if($opt::fifo) {
+ # $PARALLEL_TMP will point to a tempfile that will be used as {}
+ $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->
+ unget([Arg->new('$PARALLEL_TMP')]);
+ }
+ while (not $self->{'arg_queue'}->empty()) {
+ $next_arg = $self->{'arg_queue'}->get();
+ if(not defined $next_arg) {
+ next;
+ }
+ $self->push($next_arg);
+ if($self->len() >= $max_len) {
+ # Command length is now > max_length
+ # If there are arguments: remove the last
+ # If there are no arguments: Error
+ # TODO stuff about -x opt_x
+ if($self->number_of_args() > 1) {
+ # There is something to work on
+ $self->{'arg_queue'}->unget($self->pop());
+ last;
+ } else {
+ my $args = join(" ", map { $_->orig() } @$next_arg);
+ ::error("Command line too long (",
+ $self->len(), " >= ",
+ $max_len,
+ ") at input ",
+ $self->{'arg_queue'}->arg_number(),
+ ": ".
+ ((length $args > 50) ?
+ (substr($args,0,50))."...\n" :
+ $args."\n"));
+ $self->{'arg_queue'}->unget($self->pop());
+ ::wait_and_exit(255);
+ }
+ }
+
+ if(defined $self->{'max_number_of_args'}) {
+ if($self->number_of_args() >= $self->{'max_number_of_args'}) {
+ last;
+ }
+ }
+ }
+ if(($opt::m or $opt::X) and not $CommandLine::already_spread
+ and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) {
+ # -m or -X and EOF => Spread the arguments over all jobslots
+ # (unless they are already spread)
+ $CommandLine::already_spread ||= 1;
+ if($self->number_of_args() > 1) {
+ $self->{'max_number_of_args'} =
+ ::ceil($self->number_of_args()/$Global::max_jobs_running);
+ $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} =
+ $self->{'max_number_of_args'};
+ $self->{'arg_queue'}->unget($self->pop_all());
+ while($self->number_of_args() < $self->{'max_number_of_args'}) {
+ $self->push($self->{'arg_queue'}->get());
+ }
+ }
+ }
+}
+
+sub push {
+ # Add one or more records as arguments
+ # Returns: N/A
+ my $self = shift;
+ my $record = shift;
+ push @{$self->{'arg_list'}}, $record;
+
+ my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
+ my $rep;
+ for my $arg (@$record) {
+ if(defined $arg) {
+ for my $perlexpr (keys %{$self->{'replacecount'}}) {
+ # 50% faster than below
+ $self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self);
+ # $rep = $arg->replace($perlexpr,$quote_arg,$self);
+ # $self->{'len'}{$perlexpr} += length $rep;
+ # ::debug("length", "Length: ", length $rep,
+ # "(", $perlexpr, "=>", $rep, ")\n");
+ }
+ }
+ }
+}
+
+sub pop {
+ # Remove last argument
+ # Returns:
+ # the last record
+ my $self = shift;
+ my $record = pop @{$self->{'arg_list'}};
+ my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
+ for my $arg (@$record) {
+ if(defined $arg) {
+ for my $perlexpr (keys %{$self->{'replacecount'}}) {
+ $self->{'len'}{$perlexpr} -=
+ length $arg->replace($perlexpr,$quote_arg,$self);
+ }
+ }
+ }
+ return $record;
+}
+
+sub pop_all {
+ # Remove all arguments and zeros the length of replacement strings
+ # Returns:
+ # all records
+ my $self = shift;
+ my @popped = @{$self->{'arg_list'}};
+ for my $replacement_string (keys %{$self->{'replacecount'}}) {
+ $self->{'len'}{$replacement_string} = 0;
+ }
+ $self->{'arg_list'} = [];
+ return @popped;
+}
+
+sub number_of_args {
+ # The number of records
+ # Returns:
+ # number of records
+ my $self = shift;
+ # Ftq rudef oaawuq ime dqxqmeqp az 2011-01-24 mzp ime iaz nk MQhmd
+ # Mdzrvadp Nvmdymeaz az 2011-04-10. Ftue oaawuq dqxqmeqp az
+ # 2013-08-18 ue m nuf tmdpqd me kag tmhq fa geq daf14. Bxqmeq
+ # qymux oaawuq@fmzsq.pw itqz kag dqmp ftue.
+ #
+ # U my ftq ymuzfmuzqd ar m buqoq ar rdqq earfimdq omxxqp SZG
+ # Bmdmxxqx. Rdqq earfimdq sgmdmzfqqe kag mooqee fa ftq eagdoq
+ # oapq, ngf U tmhq nqqz iazpqduzs tai ymzk mofgmxxk _dqmp_ ftq
+ # eagdoq oapq.
+ #
+ # Fa fqef ftue U bgf uz m oayyqzf fqxxuzs bqabxq fa qymux yq itqz
+ # ftqk dqmp ftue. Ftq oayyqzf ime bgf uz m eqofuaz ar ftq oapq
+ # ftmf za azq iagxp xaaw fa ruj ad uybdahq ftq earfimdq - ea ftq
+ # eagdoq oapq qcguhmxqzf fa m pgefk oadzqd. Fa ymwq egdq ftq
+ # oayyqzf iagxp zaf etai gb ur eayq azq vgef sdqbbqp ftdagst ftq
+ # eagdoq oapq U daf13'qp ftq eagdoq oapq
+ # tffb://qz.iuwubqpum.ads/iuwu/DAF13
+ #
+ # 2.5 yazfte xmfqd U dqoquhqp mz qymux rday eayqazq ita zaf azxk
+ # ymzmsqp fa ruzp ftq oayyqzf, ngf mxea ymzmsqp fa sgqee ftq oapq
+ # tmp fa nq daf13'qp.
+ #
+ # Ftue nduzse yq fa ftq oazoxgeuaz ftmf ftqdq _mdq_ bqabxq, ita
+ # mdq zaf mrruxumfqp iuft ftq bdavqof, ftmf iuxx dqmp ftq eagdoq
+ # oapq - ftagst uf ymk zaf tmbbqz hqdk arfqz.
+ #
+ # This is really the number of records
+ return $#{$self->{'arg_list'}}+1;
+}
+
+sub number_of_recargs {
+ # The number of args in records
+ # Returns:
+ # number of args records
+ my $self = shift;
+ my $sum = 0;
+ my $nrec = scalar @{$self->{'arg_list'}};
+ if($nrec) {
+ $sum = $nrec * (scalar @{$self->{'arg_list'}[0]});
+ }
+ return $sum;
+}
+
+sub args_as_string {
+ # Returns:
+ # all unmodified arguments joined with ' ' (similar to {})
+ my $self = shift;
+ return (join " ", map { $_->orig() }
+ map { @$_ } @{$self->{'arg_list'}});
+}
+
+sub args_as_dirname {
+ # Returns:
+ # all unmodified arguments joined with '/' (similar to {})
+ # \t \0 \\ and / are quoted as: \t \0 \\ \_
+ # If $Global::max_file_length: Keep subdirs < $Global::max_file_length
+ my $self = shift;
+ my @res = ();
+
+ for my $rec_ref (@{$self->{'arg_list'}}) {
+ # If headers are used, sort by them.
+ # Otherwise keep the order from the command line.
+ my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1);
+ for my $n (@header_indexes_sorted) {
+ CORE::push(@res,
+ $Global::input_source_header{$n},
+ map { my $s = $_;
+ # \t \0 \\ and / are quoted as: \t \0 \\ \_
+ $s =~ s/\\/\\\\/g;
+ $s =~ s/\t/\\t/g;
+ $s =~ s/\0/\\0/g;
+ $s =~ s:/:\\_:g;
+ if($Global::max_file_length) {
+ # Keep each subdir shorter than the longest
+ # allowed file name
+ $s = substr($s,0,$Global::max_file_length);
+ }
+ $s; }
+ $rec_ref->[$n-1]->orig());
+ }
+ }
+ return join "/", @res;
+}
+
+sub header_indexes_sorted {
+ # Sort headers first by number then by name.
+ # E.g.: 1a 1b 11a 11b
+ # Returns:
+ # Indexes of %Global::input_source_header sorted
+ my $max_col = shift;
+
+ no warnings 'numeric';
+ for my $col (1 .. $max_col) {
+ # Make sure the header is defined. If it is not: use column number
+ if(not defined $Global::input_source_header{$col}) {
+ $Global::input_source_header{$col} = $col;
+ }
+ }
+ my @header_indexes_sorted = sort {
+ # Sort headers numerically then asciibetically
+ $Global::input_source_header{$a} <=> $Global::input_source_header{$b}
+ or
+ $Global::input_source_header{$a} cmp $Global::input_source_header{$b}
+ } 1 .. $max_col;
+ return @header_indexes_sorted;
+}
+
+sub len {
+ # Uses:
+ # $opt::shellquote
+ # The length of the command line with args substituted
+ my $self = shift;
+ my $len = 0;
+ # Add length of the original command with no args
+ # Length of command w/ all replacement args removed
+ $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1;
+ ::debug("length", "noncontext + command: $len\n");
+ my $recargs = $self->number_of_recargs();
+ if($self->{'context_replace'}) {
+ # Context is duplicated for each arg
+ $len += $recargs * $self->{'len'}{'context'};
+ for my $replstring (keys %{$self->{'replacecount'}}) {
+ # If the replacements string is more than once: mulitply its length
+ $len += $self->{'len'}{$replstring} *
+ $self->{'replacecount'}{$replstring};
+ ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*",
+ $self->{'replacecount'}{$replstring}, "\n");
+ }
+ # echo 11 22 33 44 55 66 77 88 99 1010
+ # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
+ # 5 + ctxgrp*arg
+ ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'},
+ " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n");
+ # Add space between context groups
+ $len += ($recargs-1) * ($self->{'len'}{'contextgroups'});
+ } else {
+ # Each replacement string may occur several times
+ # Add the length for each time
+ $len += 1*$self->{'len'}{'context'};
+ ::debug("length", "context+noncontext + command: $len\n");
+ for my $replstring (keys %{$self->{'replacecount'}}) {
+ # (space between regargs + length of replacement)
+ # * number this replacement is used
+ $len += ($recargs -1 + $self->{'len'}{$replstring}) *
+ $self->{'replacecount'}{$replstring};
+ }
+ }
+ if($opt::nice) {
+ # Pessimistic length if --nice is set
+ # Worse than worst case: every char needs to be quoted with \
+ $len *= 2;
+ }
+ if($Global::quoting) {
+ # Pessimistic length if -q is set
+ # Worse than worst case: every char needs to be quoted with \
+ $len *= 2;
+ }
+ if($opt::shellquote) {
+ # Pessimistic length if --shellquote is set
+ # Worse than worst case: every char needs to be quoted with \ twice
+ $len *= 4;
+ }
+ # If we are using --env, add the prefix for that, too.
+ $len += $Global::envvarlen;
+
+ return $len;
+}
+
+sub replaced {
+ # Uses:
+ # $Global::noquote
+ # $Global::quoting
+ # Returns:
+ # $replaced = command with place holders replaced and prepended
+ my $self = shift;
+ if(not defined $self->{'replaced'}) {
+ # Don't quote arguments if the input is the full command line
+ my $quote_arg = $Global::noquote ? 0 : not $Global::quoting;
+ # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP
+ $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg;
+ $self->{'replaced'} = $self->
+ replace_placeholders($self->{'command'},$Global::quoting,
+ $quote_arg);
+ my $len = length $self->{'replaced'};
+ if ($len != $self->len()) {
+ ::debug("length", $len, " != ", $self->len(),
+ " ", $self->{'replaced'}, "\n");
+ } else {
+ ::debug("length", $len, " == ", $self->len(),
+ " ", $self->{'replaced'}, "\n");
+ }
+ }
+ return $self->{'replaced'};
+}
+
+{
+ my @target;
+ my $context_replace;
+ my @arg;
+ my $perl_expressions_as_re;
+
+ sub fish_out_words_containing_replacement_strings {
+ my %word;
+ for (@target) {
+ my $tt = $_;
+ ::debug("replace", "Target: $tt");
+ # Command line template:
+ # a{1}b{}c{}d
+ # becomes:
+ # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d
+ # becomes:
+ # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d
+ # Input A B C (no context) becomes:
+ # A B C => aAbA B CcA B Cd
+ # Input A B C (context -X) becomes:
+ # A B C => aAbAcAd aAbBcBd aAbCcCd
+ if($context_replace) {
+ while($tt =~ s/([^\s\257]* # before {=
+ (?:
+ \257< # {=
+ [^\257]*? # The perl expression
+ \257> # =}
+ [^\s\257]* # after =}
+ )+)/ /x) {
+ # $1 = pre \257 perlexpr \257 post
+ $word{"$1"} ||= 1;
+ }
+ } else {
+ while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) {
+ # $f = \257 perlexpr \257
+ $word{$1} ||= 1;
+ }
+ }
+ }
+ return keys %word;
+ }
+
+ sub flatten_arg_list {
+ my $arglist_ref = shift;
+ @arg = ();
+ for my $record (@$arglist_ref) {
+ # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ]
+ # Merge arg-objects from records into @arg for easy access
+ CORE::push @arg, @$record;
+ }
+ # Add one arg if empty to allow {#} and {%} to be computed only once
+ if(not @arg) { @arg = (Arg->new("")); }
+ }
+
+ sub replace_placeholders {
+ # Replace foo{}bar with fooargbar
+ # Input:
+ # $targetref = command as shell words
+ # $quote = should everything be quoted?
+ # $quote_arg = should replaced arguments be quoted?
+ # Returns:
+ # @target with placeholders replaced
+ my $self = shift;
+ my $targetref = shift;
+ my $quote = shift;
+ my $quote_arg = shift;
+ my %replace;
+ $context_replace = $self->{'context_replace'};
+ @target = @$targetref;
+ ::debug("replace", "Replace @target\n");
+ # -X = context replace
+ # maybe multiple input sources
+ # maybe --xapply
+ if(not @target) {
+ # @target is empty: Return empty array
+ return @target;
+ }
+ # Fish out the words that have replacement strings in them
+ my @word = fish_out_words_containing_replacement_strings();
+ flatten_arg_list($self->{'arg_list'});
+
+ # Number of arguments - used for positional arguments
+ my $n = $#arg+1;
+
+ # This is actually a CommandLine-object,
+ # but it looks nice to be able to say {= $job->slot() =}
+ my $job = $self;
+ for my $word (@word) {
+ # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF
+ ::debug("replace", "Replacing in $word\n");
+ my $normal_replace;
+
+ # for each arg:
+ # replace replacement strings with replacement in the word value
+ # push to replace word value
+ $perl_expressions_as_re ||=
+ join("|", map {s/^-?\d+//; "\Q$_\E"} keys %{$self->{'replacecount'}});
+ for my $arg (@arg) {
+ my $val = $word;
+ # Replace {= perl expr =} with value for each arg
+ $val =~ s{\257<(-?\d+)?($perl_expressions_as_re)\257>}
+ {
+ if($1) {
+ # Positional replace
+ # Find the relevant arg and replace it
+ ($arg[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace
+ $arg[$1 > 0 ? $1-1 : $n+$1]->
+ replace($2,$quote_arg,$self)
+ : "");
+ } else {
+ # Normal replace
+ $normal_replace ||= 1;
+ ($arg ? $arg->replace($2,$quote_arg,$self) : "");
+ }
+ }goxe;
+ if($quote) {
+ CORE::push(@{$replace{::shell_quote_scalar($word)}},
+ ::shell_quote_scalar($val));
+ } else {
+ CORE::push(@{$replace{$word}}, $val);
+ }
+ # No normal replacements => only run once
+ $normal_replace or last;
+ }
+ }
+
+ if($quote) {
+ @target = ::shell_quote(@target);
+ }
+ # ::debug("replace", "%replace=",::my_dump(%replace),"\n");
+ if(%replace) {
+ # Substitute the replace strings with the replacement values
+ # Must be sorted by length if a short word is a substring of a long word
+ my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s }
+ sort { length $b <=> length $a } keys %replace);
+ for(@target) {
+ s/($regexp)/join(" ",@{$replace{$1}})/ge;
+ }
+ }
+ ::debug("replace", "Return @target\n");
+ return wantarray ? @target : "@target";
+ }
+}
+
+
+package CommandLineQueue;
+
+sub new {
+ my $class = shift;
+ my $commandref = shift;
+ my $read_from = shift;
+ my $context_replace = shift;
+ my $max_number_of_args = shift;
+ my $return_files = shift;
+ my @unget = ();
+ my ($count,$posrpl,$perlexpr);
+ my ($replacecount_ref, $len_ref);
+ my @command = @$commandref;
+ my $dummy = '';
+ # If the first command start with '-' it is probably an option
+ if($command[0] =~ /^\s*(-\S+)/) {
+ # Is this really a command in $PATH starting with '-'?
+ my $cmd = $1;
+ if(not ::which($cmd)) {
+ ::error("Command ($cmd) starts with '-'. Is this a wrong option?\n");
+ ::wait_and_exit(255);
+ }
+ }
+ # Replace replacement strings with {= perl expr =}
+ @command = merge_rpl_parts(@command);
+
+ # Protect matching inside {= perl expr =}
+ # by replacing {= and =} with \257< and \257>
+ # in @command, --return and --tagstring (if used)
+ for(@command,@$return_files,
+ (defined $opt::tagstring ? $opt::tagstring : $dummy)) {
+ # Disallow \257 to avoid nested {= {= =} =}
+ if(/\257/) {
+ ::error("Command cannot contain the character \257. Use a function for that.\n");
+ ::wait_and_exit(255);
+ }
+ # Needs to match rightmost left parens (Perl defaults to leftmost)
+ # to deal with: {={==}
+ while(s{([^\257]*) \Q$Global::parensleft\E ([^\257]*?) \Q$Global::parensright\E }
+ {$1\257<$2\257>}gx) {}
+ for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) {
+ # Replace long --rpl's before short ones, as a short may be a
+ # substring of a long:
+ # --rpl '% s/a/b/' --rpl '%% s/b/a/'
+ # Replace the short hand string (--rpl)
+ # with the {= perl expr =}
+ # Avoid replacing inside existing {= perl expr =}
+ while(s{((^|\257>)[^\257]*?) # Don't replace after \257 unless \257>
+ \Q$rpl\E}
+ {$1\257<$Global::rpl{$rpl}\257>}xg) {
+ }
+ # Do the same for the positional replacement strings
+ # A bit harder as we have to put in the position number
+ $posrpl = $rpl;
+ if($posrpl =~ s/^\{//) {
+ # Only do this if the shorthand start with {
+ s{\{(-?\d+)\Q$posrpl\E}
+ {\257<$1 $Global::rpl{$rpl}\257>}g;
+ }
+ }
+ }
+ # Add {} if no replacement strings in @command
+ ($replacecount_ref, $len_ref, @command) =
+ replacement_counts_and_lengths($return_files,@command);
+ if("@command" =~ /^[^ \t\n=]*\257) {
+ # Replacement string is (part of) the command (and not just
+ # argument or variable definition V1={})
+ # E.g. parallel {}, parallel my_{= s/_//=}, parallel {2}
+ # Do no quote (Otherwise it will fail if the input contains spaces)
+ $Global::noquote = 1;
+ }
+
+ return bless {
+ 'unget' => \@unget,
+ 'command' => \@command,
+ 'replacecount' => $replacecount_ref,
+ 'arg_queue' => RecordQueue->new($read_from,$opt::colsep),
+ 'context_replace' => $context_replace,
+ 'len' => $len_ref,
+ 'max_number_of_args' => $max_number_of_args,
+ 'size' => undef,
+ 'return_files' => $return_files,
+ 'seq' => 1,
+ }, ref($class) || $class;
+}
+
+sub merge_rpl_parts {
+ # '{=' 'perlexpr' '=}' => '{= perlexpr =}'
+ # Input:
+ # @in = the @command as given by the user
+ # Uses:
+ # $Global::parensleft
+ # $Global::parensright
+ # Returns:
+ # @command with parts merged to keep {= and =} as one
+ my @in = @_;
+ my @out;
+ my $l = quotemeta($Global::parensleft);
+ my $r = quotemeta($Global::parensright);
+
+ while(@in) {
+ my $s = shift @in;
+ $_ = $s;
+ # Remove matching (right most) parens
+ while(s/(.*)$l.*?$r/$1/o) {}
+ if(/$l/o) {
+ # Missing right parens
+ while(@in) {
+ $s .= " ".shift @in;
+ $_ = $s;
+ while(s/(.*)$l.*?$r/$1/o) {}
+ if(not /$l/o) {
+ last;
+ }
+ }
+ }
+ push @out, $s;
+ }
+ return @out;
+}
+
+sub replacement_counts_and_lengths {
+ # Count the number of different replacement strings.
+ # Find the lengths of context for context groups and non-context
+ # groups.
+ # If no {} found in @command: add it to @command
+ #
+ # Input:
+ # \@return_files = array of filenames to return
+ # @command = command template
+ # Output:
+ # \%replacecount, \%len, @command
+ my $return_files = shift;
+ my @command = @_;
+ my (%replacecount,%len);
+ my $sum = 0;
+ while($sum == 0) {
+ # Count how many times each replacement string is used
+ my @cmd = @command;
+ my $contextlen = 0;
+ my $noncontextlen = 0;
+ my $contextgroups = 0;
+ for my $c (@cmd) {
+ while($c =~ s/ \257<([^\257]*?)\257> /\000/x) {
+ # %replacecount = { "perlexpr" => number of times seen }
+ # e.g { "s/a/b/" => 2 }
+ $replacecount{$1}++;
+ $sum++;
+ }
+ # Measure the length of the context around the {= perl expr =}
+ # Use that {=...=} has been replaced with \000 above
+ # So there is no need to deal with \257<
+ while($c =~ s/ (\S*\000\S*) //x) {
+ my $w = $1;
+ $w =~ tr/\000//d; # Remove all \000's
+ $contextlen += length($w);
+ $contextgroups++;
+ }
+ # All {= perl expr =} have been removed: The rest is non-context
+ $noncontextlen += length $c;
+ }
+ for(@$return_files) {
+ my $t = $_;
+ while($t =~ s/ \257<([^\257]*)\257> //x) {
+ # %replacecount = { "perlexpr" => number of times seen }
+ # e.g { "$_++" => 2 }
+ # But for tagstring we just need to mark it as seen
+ $replacecount{$1} ||= 1;
+ }
+ }
+ if($opt::tagstring) {
+ my $t = $opt::tagstring;
+ while($t =~ s/ \257<([^\257]*)\257> //x) {
+ # %replacecount = { "perlexpr" => number of times seen }
+ # e.g { "$_++" => 2 }
+ # But for tagstring we just need to mark it as seen
+ $replacecount{$1} ||= 1;
+ }
+ }
+ if($opt::bar) {
+ # If the command does not contain {} force it to be computed
+ # as it is being used by --bar
+ $replacecount{""} ||= 1;
+ }
+
+ $len{'context'} = 0+$contextlen;
+ $len{'noncontext'} = $noncontextlen;
+ $len{'contextgroups'} = $contextgroups;
+ $len{'noncontextgroups'} = @cmd-$contextgroups;
+ ::debug("length", "@command Context: ", $len{'context'},
+ " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'},
+ " NonCtxGrp: ", $len{'noncontextgroups'}, "\n");
+ if($sum == 0) {
+ if(not @command) {
+ # Default command = {}
+ @command = ("\257<\257>");
+ } elsif(($opt::pipe or $opt::pipepart)
+ and not $opt::fifo and not $opt::cat) {
+ # With --pipe / --pipe-part you can have no replacement
+ last;
+ } else {
+ # Append {} to the command if there are no {...}'s and no {=...=}
+ push @command, ("\257<\257>");
+ }
+ }
+ }
+ return(\%replacecount,\%len,@command);
+}
+
+sub get {
+ my $self = shift;
+ if(@{$self->{'unget'}}) {
+ my $cmd_line = shift @{$self->{'unget'}};
+ return ($cmd_line);
+ } else {
+ my $cmd_line = CommandLine->new($self->seq(),
+ $self->{'command'},
+ $self->{'arg_queue'},
+ $self->{'context_replace'},
+ $self->{'max_number_of_args'},
+ $self->{'return_files'},
+ $self->{'replacecount'},
+ $self->{'len'},
+ );
+ $cmd_line->populate();
+ ::debug("init","cmd_line->number_of_args ",
+ $cmd_line->number_of_args(), "\n");
+ if($opt::pipe or $opt::pipepart) {
+ if($cmd_line->replaced() eq "") {
+ # Empty command - pipe requires a command
+ ::error("--pipe must have a command to pipe into (e.g. 'cat').\n");
+ ::wait_and_exit(255);
+ }
+ } else {
+ if($cmd_line->number_of_args() == 0) {
+ # We did not get more args - maybe at EOF string?
+ return undef;
+ } elsif($cmd_line->replaced() eq "") {
+ # Empty command - get the next instead
+ return $self->get();
+ }
+ }
+ $self->set_seq($self->seq()+1);
+ return $cmd_line;
+ }
+}
+
+sub unget {
+ my $self = shift;
+ unshift @{$self->{'unget'}}, @_;
+}
+
+sub empty {
+ my $self = shift;
+ my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty();
+ ::debug("run", "CommandLineQueue->empty $empty");
+ return $empty;
+}
+
+sub seq {
+ my $self = shift;
+ return $self->{'seq'};
+}
+
+sub set_seq {
+ my $self = shift;
+ $self->{'seq'} = shift;
+}
+
+sub quote_args {
+ my $self = shift;
+ # If there is not command emulate |bash
+ return $self->{'command'};
+}
+
+
+package Limits::Command;
+
+# Maximal command line length (for -m and -X)
+sub max_length {
+ # Find the max_length of a command line and cache it
+ # Returns:
+ # number of chars on the longest command line allowed
+ if(not $Limits::Command::line_max_len) {
+ # Disk cache of max command line length
+ my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname();
+ my $cached_limit;
+ if(-e $len_cache) {
+ open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache");
+ $cached_limit = <$fh>;
+ close $fh;
+ } else {
+ $cached_limit = real_max_length();
+ # If $HOME is write protected: Do not fail
+ mkdir($ENV{'HOME'} . "/.parallel");
+ mkdir($ENV{'HOME'} . "/.parallel/tmp");
+ open(my $fh, ">", $len_cache);
+ print $fh $cached_limit;
+ close $fh;
+ }
+ $Limits::Command::line_max_len = tmux_length($cached_limit);
+ if($opt::max_chars) {
+ if($opt::max_chars <= $cached_limit) {
+ $Limits::Command::line_max_len = $opt::max_chars;
+ } else {
+ ::warning("Value for -s option ",
+ "should be < $cached_limit.\n");
+ }
+ }
+ }
+ return $Limits::Command::line_max_len;
+}
+
+sub real_max_length {
+ # Find the max_length of a command line
+ # Returns:
+ # The maximal command line length
+ # Use an upper bound of 8 MB if the shell allows for for infinite long lengths
+ my $upper = 8_000_000;
+ my $len = 8;
+ do {
+ if($len > $upper) { return $len };
+ $len *= 16;
+ } while (is_acceptable_command_line_length($len));
+ # Then search for the actual max length between 0 and upper bound
+ return binary_find_max_length(int($len/16),$len);
+}
+
+sub binary_find_max_length {
+ # Given a lower and upper bound find the max_length of a command line
+ # Returns:
+ # number of chars on the longest command line allowed
+ my ($lower, $upper) = (@_);
+ if($lower == $upper or $lower == $upper-1) { return $lower; }
+ my $middle = int (($upper-$lower)/2 + $lower);
+ ::debug("init", "Maxlen: $lower,$upper,$middle : ");
+ if (is_acceptable_command_line_length($middle)) {
+ return binary_find_max_length($middle,$upper);
+ } else {
+ return binary_find_max_length($lower,$middle);
+ }
+}
+
+sub is_acceptable_command_line_length {
+ # Test if a command line of this length can run
+ # Returns:
+ # 0 if the command line length is too long
+ # 1 otherwise
+ my $len = shift;
+
+ local *STDERR;
+ open (STDERR, ">", "/dev/null");
+ system "true "."x"x$len;
+ close STDERR;
+ ::debug("init", "$len=$? ");
+ return not $?;
+}
+
+sub tmux_length {
+ # If $opt::tmux set, find the limit for tmux
+ # tmux 1.8 has a 2kB limit
+ # tmux 1.9 has a 16kB limit
+ # Input:
+ # $len = maximal command line length
+ # Returns:
+ # $tmux_len = maximal length runable in tmux
+ my $len = shift;
+ if($opt::tmux) {
+ $ENV{'TMUX'} ||= "tmux";
+ if(not ::which($ENV{'TMUX'})) {
+ ::error($ENV{'TMUX'}." not found in \$PATH.\n");
+ ::wait_and_exit(255);
+ }
+ my @out;
+ for my $l (1, 2020, 16320, 100000, $len) {
+ my $tmpfile = ::tmpname("tms");
+ my $tmuxcmd = "sh -c '".$ENV{'TMUX'}." -S $tmpfile new-session -d -n echo $l".
+ ("x"x$l). " 2>/dev/null' && echo $l; rm -f $tmpfile";
+ push @out, qx{ $tmuxcmd };
+ unlink $tmpfile;
+ }
+ ::debug("tmux","tmux-length ",@out);
+ chomp @out;
+ # The arguments is given 3 times on the command line
+ # and the wrapping is around 30 chars
+ # (29 for tmux1.9, 33 for tmux1.8)
+ my $tmux_len = (::max(@out));
+ $len = ::min($len,int($tmux_len/4-33));
+ ::debug("tmux","tmux-length ",$len);
+ }
+ return $len;
+}
+
+
+package RecordQueue;
+
+sub new {
+ my $class = shift;
+ my $fhs = shift;
+ my $colsep = shift;
+ my @unget = ();
+ my $arg_sub_queue;
+ if($colsep) {
+ # Open one file with colsep
+ $arg_sub_queue = RecordColQueue->new($fhs);
+ } else {
+ # Open one or more files if multiple -a
+ $arg_sub_queue = MultifileQueue->new($fhs);
+ }
+ return bless {
+ 'unget' => \@unget,
+ 'arg_number' => 0,
+ 'arg_sub_queue' => $arg_sub_queue,
+ }, ref($class) || $class;
+}
+
+sub get {
+ # Returns:
+ # reference to array of Arg-objects
+ my $self = shift;
+ if(@{$self->{'unget'}}) {
+ $self->{'arg_number'}++;
+ return shift @{$self->{'unget'}};
+ }
+ my $ret = $self->{'arg_sub_queue'}->get();
+ if(defined $Global::max_number_of_args
+ and $Global::max_number_of_args == 0) {
+ ::debug("run", "Read 1 but return 0 args\n");
+ return [Arg->new("")];
+ } else {
+ return $ret;
+ }
+}
+
+sub unget {
+ my $self = shift;
+ ::debug("run", "RecordQueue-unget '@_'\n");
+ $self->{'arg_number'} -= @_;
+ unshift @{$self->{'unget'}}, @_;
+}
+
+sub empty {
+ my $self = shift;
+ my $empty = not @{$self->{'unget'}};
+ $empty &&= $self->{'arg_sub_queue'}->empty();
+ ::debug("run", "RecordQueue->empty $empty");
+ return $empty;
+}
+
+sub arg_number {
+ my $self = shift;
+ return $self->{'arg_number'};
+}
+
+
+package RecordColQueue;
+
+sub new {
+ my $class = shift;
+ my $fhs = shift;
+ my @unget = ();
+ my $arg_sub_queue = MultifileQueue->new($fhs);
+ return bless {
+ 'unget' => \@unget,
+ 'arg_sub_queue' => $arg_sub_queue,
+ }, ref($class) || $class;
+}
+
+sub get {
+ # Returns:
+ # reference to array of Arg-objects
+ my $self = shift;
+ if(@{$self->{'unget'}}) {
+ return shift @{$self->{'unget'}};
+ }
+ my $unget_ref=$self->{'unget'};
+ if($self->{'arg_sub_queue'}->empty()) {
+ return undef;
+ }
+ my $in_record = $self->{'arg_sub_queue'}->get();
+ if(defined $in_record) {
+ my @out_record = ();
+ for my $arg (@$in_record) {
+ ::debug("run", "RecordColQueue::arg $arg\n");
+ my $line = $arg->orig();
+ ::debug("run", "line='$line'\n");
+ if($line ne "") {
+ for my $s (split /$opt::colsep/o, $line, -1) {
+ push @out_record, Arg->new($s);
+ }
+ } else {
+ push @out_record, Arg->new("");
+ }
+ }
+ return \@out_record;
+ } else {
+ return undef;
+ }
+}
+
+sub unget {
+ my $self = shift;
+ ::debug("run", "RecordColQueue-unget '@_'\n");
+ unshift @{$self->{'unget'}}, @_;
+}
+
+sub empty {
+ my $self = shift;
+ my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty());
+ ::debug("run", "RecordColQueue->empty $empty");
+ return $empty;
+}
+
+
+package MultifileQueue;
+
+@Global::unget_argv=();
+
+sub new {
+ my $class = shift;
+ my $fhs = shift;
+ for my $fh (@$fhs) {
+ if(-t $fh) {
+ ::warning("Input is read from the terminal.\n");
+ ::warning("Only experts do this on purpose. ".
+ "Press CTRL-D to exit.\n");
+ }
+ }
+ return bless {
+ 'unget' => \@Global::unget_argv,
+ 'fhs' => $fhs,
+ 'arg_matrix' => undef,
+ }, ref($class) || $class;
+}
+
+sub get {
+ my $self = shift;
+ if($opt::xapply) {
+ return $self->xapply_get();
+ } else {
+ return $self->nest_get();
+ }
+}
+
+sub unget {
+ my $self = shift;
+ ::debug("run", "MultifileQueue-unget '@_'\n");
+ unshift @{$self->{'unget'}}, @_;
+}
+
+sub empty {
+ my $self = shift;
+ my $empty = (not @Global::unget_argv
+ and not @{$self->{'unget'}});
+ for my $fh (@{$self->{'fhs'}}) {
+ $empty &&= eof($fh);
+ }
+ ::debug("run", "MultifileQueue->empty $empty ");
+ return $empty;
+}
+
+sub xapply_get {
+ my $self = shift;
+ if(@{$self->{'unget'}}) {
+ return shift @{$self->{'unget'}};
+ }
+ my @record = ();
+ my $prepend = undef;
+ my $empty = 1;
+ for my $fh (@{$self->{'fhs'}}) {
+ my $arg = read_arg_from_fh($fh);
+ if(defined $arg) {
+ # Record $arg for recycling at end of file
+ push @{$self->{'arg_matrix'}{$fh}}, $arg;
+ push @record, $arg;
+ $empty = 0;
+ } else {
+ ::debug("run", "EOA ");
+ # End of file: Recycle arguments
+ push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}};
+ # return last @{$args->{'args'}{$fh}};
+ push @record, @{$self->{'arg_matrix'}{$fh}}[-1];
+ }
+ }
+ if($empty) {
+ return undef;
+ } else {
+ return \@record;
+ }
+}
+
+sub nest_get {
+ my $self = shift;
+ if(@{$self->{'unget'}}) {
+ return shift @{$self->{'unget'}};
+ }
+ my @record = ();
+ my $prepend = undef;
+ my $empty = 1;
+ my $no_of_inputsources = $#{$self->{'fhs'}} + 1;
+ if(not $self->{'arg_matrix'}) {
+ # Initialize @arg_matrix with one arg from each file
+ # read one line from each file
+ my @first_arg_set;
+ my $all_empty = 1;
+ for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) {
+ my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
+ if(defined $arg) {
+ $all_empty = 0;
+ }
+ $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new("");
+ push @first_arg_set, $self->{'arg_matrix'}[$fhno][0];
+ }
+ if($all_empty) {
+ # All filehandles were at eof or eof-string
+ return undef;
+ }
+ return [@first_arg_set];
+ }
+
+ # Treat the case with one input source special. For multiple
+ # input sources we need to remember all previously read values to
+ # generate all combinations. But for one input source we can
+ # forget the value after first use.
+ if($no_of_inputsources == 1) {
+ my $arg = read_arg_from_fh($self->{'fhs'}[0]);
+ if(defined($arg)) {
+ return [$arg];
+ }
+ return undef;
+ }
+ for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) {
+ if(eof($self->{'fhs'}[$fhno])) {
+ next;
+ } else {
+ # read one
+ my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]);
+ defined($arg) || next; # If we just read an EOF string: Treat this as EOF
+ my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1;
+ $self->{'arg_matrix'}[$fhno][$len] = $arg;
+ # make all new combinations
+ my @combarg = ();
+ for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) {
+ push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}];
+ }
+ $combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry
+ # map combinations
+ # [ 1, 3, 7 ], [ 2, 4, 1 ]
+ # =>
+ # [ m[0][1], m[1][3], m[3][7] ], [ m[0][2], m[1][4], m[2][1] ]
+ my @mapped;
+ for my $c (expand_combinations(@combarg)) {
+ my @a;
+ for my $n (0 .. $no_of_inputsources - 1 ) {
+ push @a, $self->{'arg_matrix'}[$n][$$c[$n]];
+ }
+ push @mapped, \@a;
+ }
+ # append the mapped to the ungotten arguments
+ push @{$self->{'unget'}}, @mapped;
+ # get the first
+ return shift @{$self->{'unget'}};
+ }
+ }
+ # all are eof or at EOF string; return from the unget queue
+ return shift @{$self->{'unget'}};
+}
+
+sub read_arg_from_fh {
+ # Read one Arg from filehandle
+ # Returns:
+ # Arg-object with one read line
+ # undef if end of file
+ my $fh = shift;
+ my $prepend = undef;
+ my $arg;
+ do {{
+ # This makes 10% faster
+ if(not ($arg = <$fh>)) {
+ if(defined $prepend) {
+ return Arg->new($prepend);
+ } else {
+ return undef;
+ }
+ }
+# ::debug("run", "read $arg\n");
+ # Remove delimiter
+ $arg =~ s:$/$::;
+ if($Global::end_of_file_string and
+ $arg eq $Global::end_of_file_string) {
+ # Ignore the rest of input file
+ close $fh;
+ ::debug("run", "EOF-string ($arg) met\n");
+ if(defined $prepend) {
+ return Arg->new($prepend);
+ } else {
+ return undef;
+ }
+ }
+ if(defined $prepend) {
+ $arg = $prepend.$arg; # For line continuation
+ $prepend = undef; #undef;
+ }
+ if($Global::ignore_empty) {
+ if($arg =~ /^\s*$/) {
+ redo; # Try the next line
+ }
+ }
+ if($Global::max_lines) {
+ if($arg =~ /\s$/) {
+ # Trailing space => continued on next line
+ $prepend = $arg;
+ redo;
+ }
+ }
+ }} while (1 == 0); # Dummy loop {{}} for redo
+ if(defined $arg) {
+ return Arg->new($arg);
+ } else {
+ ::die_bug("multiread arg undefined");
+ }
+}
+
+sub expand_combinations {
+ # Input:
+ # ([xmin,xmax], [ymin,ymax], ...)
+ # Returns: ([x,y,...],[x,y,...])
+ # where xmin <= x <= xmax and ymin <= y <= ymax
+ my $minmax_ref = shift;
+ my $xmin = $$minmax_ref[0];
+ my $xmax = $$minmax_ref[1];
+ my @p;
+ if(@_) {
+ # If there are more columns: Compute those recursively
+ my @rest = expand_combinations(@_);
+ for(my $x = $xmin; $x <= $xmax; $x++) {
+ push @p, map { [$x, @$_] } @rest;
+ }
+ } else {
+ for(my $x = $xmin; $x <= $xmax; $x++) {
+ push @p, [$x];
+ }
+ }
+ return @p;
+}
+
+
+package Arg;
+
+sub new {
+ my $class = shift;
+ my $orig = shift;
+ my @hostgroups;
+ if($opt::hostgroups) {
+ if($orig =~ s:@(.+)::) {
+ # We found hostgroups on the arg
+ @hostgroups = split(/\+/, $1);
+ if(not grep { defined $Global::hostgroups{$_} } @hostgroups) {
+ ::warning("No such hostgroup (@hostgroups)\n");
+ @hostgroups = (keys %Global::hostgroups);
+ }
+ } else {
+ @hostgroups = (keys %Global::hostgroups);
+ }
+ }
+ return bless {
+ 'orig' => $orig,
+ 'hostgroups' => \@hostgroups,
+ }, ref($class) || $class;
+}
+
+sub replace {
+ # Calculates the corresponding value for a given perl expression
+ # Returns:
+ # The calculated string (quoted if asked for)
+ my $self = shift;
+ my $perlexpr = shift; # E.g. $_=$_ or s/.gz//
+ my $quote = (shift) ? 1 : 0; # should the string be quoted?
+ # This is actually a CommandLine-object,
+ # but it looks nice to be able to say {= $job->slot() =}
+ my $job = shift;
+ $perlexpr =~ s/^-?\d+ //; # Positional replace treated as normal replace
+ if(not defined $self->{"rpl",0,$perlexpr}) {
+ local $_;
+ if($Global::trim eq "n") {
+ $_ = $self->{'orig'};
+ } else {
+ $_ = trim_of($self->{'orig'});
+ }
+ ::debug("replace", "eval ", $perlexpr, " ", $_, "\n");
+ if(not $Global::perleval{$perlexpr}) {
+ # Make an anonymous function of the $perlexpr
+ # And more importantly: Compile it only once
+ if($Global::perleval{$perlexpr} =
+ eval('sub { no strict; no warnings; my $job = shift; '.
+ $perlexpr.' }')) {
+ # All is good
+ } else {
+ # The eval failed. Maybe $perlexpr is invalid perl?
+ ::error("Cannot use $perlexpr: $@\n");
+ ::wait_and_exit(255);
+ }
+ }
+ # Execute the function
+ $Global::perleval{$perlexpr}->($job);
+ $self->{"rpl",0,$perlexpr} = $_;
+ }
+ if(not defined $self->{"rpl",$quote,$perlexpr}) {
+ $self->{"rpl",1,$perlexpr} =
+ ::shell_quote_scalar($self->{"rpl",0,$perlexpr});
+ }
+ return $self->{"rpl",$quote,$perlexpr};
+}
+
+sub orig {
+ my $self = shift;
+ return $self->{'orig'};
+}
+
+sub trim_of {
+ # Removes white space as specifed by --trim:
+ # n = nothing
+ # l = start
+ # r = end
+ # lr|rl = both
+ # Returns:
+ # string with white space removed as needed
+ my @strings = map { defined $_ ? $_ : "" } (@_);
+ my $arg;
+ if($Global::trim eq "n") {
+ # skip
+ } elsif($Global::trim eq "l") {
+ for my $arg (@strings) { $arg =~ s/^\s+//; }
+ } elsif($Global::trim eq "r") {
+ for my $arg (@strings) { $arg =~ s/\s+$//; }
+ } elsif($Global::trim eq "rl" or $Global::trim eq "lr") {
+ for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; }
+ } else {
+ ::error("--trim must be one of: r l rl lr.\n");
+ ::wait_and_exit(255);
+ }
+ return wantarray ? @strings : "@strings";
+}
+
+
+package TimeoutQueue;
+
+sub new {
+ my $class = shift;
+ my $delta_time = shift;
+ my ($pct);
+ if($delta_time =~ /(\d+(\.\d+)?)%/) {
+ # Timeout in percent
+ $pct = $1/100;
+ $delta_time = 1_000_000;
+ }
+ return bless {
+ 'queue' => [],
+ 'delta_time' => $delta_time,
+ 'pct' => $pct,
+ 'remedian_idx' => 0,
+ 'remedian_arr' => [],
+ 'remedian' => undef,
+ }, ref($class) || $class;
+}
+
+sub delta_time {
+ my $self = shift;
+ return $self->{'delta_time'};
+}
+
+sub set_delta_time {
+ my $self = shift;
+ $self->{'delta_time'} = shift;
+}
+
+sub remedian {
+ my $self = shift;
+ return $self->{'remedian'};
+}
+
+sub set_remedian {
+ # Set median of the last 999^3 (=997002999) values using Remedian
+ #
+ # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A
+ # robust averaging method for large data sets." Journal of the
+ # American Statistical Association 85.409 (1990): 97-104.
+ my $self = shift;
+ my $val = shift;
+ my $i = $self->{'remedian_idx'}++;
+ my $rref = $self->{'remedian_arr'};
+ $rref->[0][$i%999] = $val;
+ $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2];
+ $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2];
+ $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2];
+}
+
+sub update_median_runtime {
+ # Update delta_time based on runtime of finished job if timeout is
+ # a percentage
+ my $self = shift;
+ my $runtime = shift;
+ if($self->{'pct'}) {
+ $self->set_remedian($runtime);
+ $self->{'delta_time'} = $self->{'pct'} * $self->remedian();
+ ::debug("run", "Timeout: $self->{'delta_time'}s ");
+ }
+}
+
+sub process_timeouts {
+ # Check if there was a timeout
+ my $self = shift;
+ # $self->{'queue'} is sorted by start time
+ while (@{$self->{'queue'}}) {
+ my $job = $self->{'queue'}[0];
+ if($job->endtime()) {
+ # Job already finished. No need to timeout the job
+ # This could be because of --keep-order
+ shift @{$self->{'queue'}};
+ } elsif($job->timedout($self->{'delta_time'})) {
+ # Need to shift off queue before kill
+ # because kill calls usleep that calls process_timeouts
+ shift @{$self->{'queue'}};
+ $job->kill();
+ } else {
+ # Because they are sorted by start time the rest are later
+ last;
+ }
+ }
+}
+
+sub insert {
+ my $self = shift;
+ my $in = shift;
+ push @{$self->{'queue'}}, $in;
+}
+
+
+package Semaphore;
+
+# This package provides a counting semaphore
+#
+# If a process dies without releasing the semaphore the next process
+# that needs that entry will clean up dead semaphores
+#
+# The semaphores are stored in ~/.parallel/semaphores/id- Each
+# file in ~/.parallel/semaphores/id-/ is the process ID of the
+# process holding the entry. If the process dies, the entry can be
+# taken by another process.
+
+sub new {
+ my $class = shift;
+ my $id = shift;
+ my $count = shift;
+ $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex
+ $id = "id-".$id; # To distinguish it from a process id
+ my $parallel_dir = $ENV{'HOME'}."/.parallel";
+ -d $parallel_dir or mkdir_or_die($parallel_dir);
+ my $parallel_locks = $parallel_dir."/semaphores";
+ -d $parallel_locks or mkdir_or_die($parallel_locks);
+ my $lockdir = "$parallel_locks/$id";
+ my $lockfile = $lockdir.".lock";
+ if($count < 1) { ::die_bug("semaphore-count: $count"); }
+ return bless {
+ 'lockfile' => $lockfile,
+ 'lockfh' => Symbol::gensym(),
+ 'lockdir' => $lockdir,
+ 'id' => $id,
+ 'idfile' => $lockdir."/".$id,
+ 'pid' => $$,
+ 'pidfile' => $lockdir."/".$$.'@'.::hostname(),
+ 'count' => $count + 1 # nlinks returns a link for the 'id-' as well
+ }, ref($class) || $class;
+}
+
+sub remove_dead_locks {
+ my $self = shift;
+ my $lockdir = $self->{'lockdir'};
+
+ for my $d (glob "$lockdir/*") {
+ $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next;
+ my ($pid, $host) = ($1, $2);
+ if($host eq ::hostname()) {
+ if(not kill 0, $pid) {
+ ::debug("sem", "Dead: $d\n");
+ unlink $d;
+ } else {
+ ::debug("sem", "Alive: $d\n");
+ }
+ }
+ }
+}
+
+sub acquire {
+ my $self = shift;
+ my $sleep = 1; # 1 ms
+ my $start_time = time;
+ while(1) {
+ # Can we get a lock?
+ $self->atomic_link_if_count_less_than() and last;
+ $self->remove_dead_locks();
+ # Retry slower and slower up to 1 second
+ $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
+ # Random to avoid every sleeping job waking up at the same time
+ ::usleep(rand()*$sleep);
+ if($opt::semaphoretimeout) {
+ if($opt::semaphoretimeout > 0
+ and
+ time - $start_time > $opt::semaphoretimeout) {
+ # Timeout: Take the semaphore anyway
+ ::warning("Semaphore timed out. Stealing the semaphore.\n");
+ if(not -e $self->{'idfile'}) {
+ open (my $fh, ">", $self->{'idfile'}) or
+ ::die_bug("timeout_write_idfile: $self->{'idfile'}");
+ close $fh;
+ }
+ link $self->{'idfile'}, $self->{'pidfile'};
+ last;
+ }
+ if($opt::semaphoretimeout < 0
+ and
+ time - $start_time > -$opt::semaphoretimeout) {
+ # Timeout: Exit
+ ::warning("Semaphore timed out. Exiting.\n");
+ exit(1);
+ last;
+ }
+ }
+ }
+ ::debug("sem", "acquired $self->{'pid'}\n");
+}
+
+sub release {
+ my $self = shift;
+ unlink $self->{'pidfile'};
+ if($self->nlinks() == 1) {
+ # This is the last link, so atomic cleanup
+ $self->lock();
+ if($self->nlinks() == 1) {
+ unlink $self->{'idfile'};
+ rmdir $self->{'lockdir'};
+ }
+ $self->unlock();
+ }
+ ::debug("run", "released $self->{'pid'}\n");
+}
+
+sub pid_change {
+ # This should do what release()+acquire() would do without having
+ # to re-acquire the semaphore
+ my $self = shift;
+
+ my $old_pidfile = $self->{'pidfile'};
+ $self->{'pid'} = $$;
+ $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname();
+ my $retval = link $self->{'idfile'}, $self->{'pidfile'};
+ ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
+ unlink $old_pidfile;
+}
+
+sub atomic_link_if_count_less_than {
+ # Link $file1 to $file2 if nlinks to $file1 < $count
+ my $self = shift;
+ my $retval = 0;
+ $self->lock();
+ my $nlinks = $self->nlinks();
+ ::debug("sem","$nlinks<$self->{'count'} ");
+ if($nlinks < $self->{'count'}) {
+ -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'});
+ if(not -e $self->{'idfile'}) {
+ open (my $fh, ">", $self->{'idfile'}) or
+ ::die_bug("write_idfile: $self->{'idfile'}");
+ close $fh;
+ }
+ $retval = link $self->{'idfile'}, $self->{'pidfile'};
+ ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n");
+ }
+ $self->unlock();
+ ::debug("sem", "atomic $retval");
+ return $retval;
+}
+
+sub nlinks {
+ my $self = shift;
+ if(-e $self->{'idfile'}) {
+ return (stat(_))[3];
+ } else {
+ return 0;
+ }
+}
+
+sub lock {
+ my $self = shift;
+ my $sleep = 100; # 100 ms
+ my $total_sleep = 0;
+ $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;";
+ my $locked = 0;
+ while(not $locked) {
+ if(tell($self->{'lockfh'}) == -1) {
+ # File not open
+ open($self->{'lockfh'}, ">", $self->{'lockfile'})
+ or ::debug("run", "Cannot open $self->{'lockfile'}");
+ }
+ if($self->{'lockfh'}) {
+ # File is open
+ chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw
+ if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) {
+ # The file is locked: No need to retry
+ $locked = 1;
+ last;
+ } else {
+ if ($! =~ m/Function not implemented/) {
+ ::warning("flock: $!");
+ ::warning("Will wait for a random while\n");
+ ::usleep(rand(5000));
+ # File cannot be locked: No need to retry
+ $locked = 2;
+ last;
+ }
+ }
+ }
+ # Locking failed in first round
+ # Sleep and try again
+ $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep);
+ # Random to avoid every sleeping job waking up at the same time
+ ::usleep(rand()*$sleep);
+ $total_sleep += $sleep;
+ if($opt::semaphoretimeout) {
+ if($opt::semaphoretimeout > 0
+ and
+ $total_sleep/1000 > $opt::semaphoretimeout) {
+ # Timeout: Take the semaphore anyway
+ ::warning("Semaphore timed out. Taking the semaphore.");
+ $locked = 3;
+ last;
+ }
+ if($opt::semaphoretimeout < 0
+ and
+ $total_sleep/1000 > -$opt::semaphoretimeout) {
+ # Timeout: Exit
+ ::warning("Semaphore timed out. Exiting.");
+ $locked = 4;
+ last;
+ }
+ } else {
+ if($total_sleep/1000 > 30) {
+ ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout.");
+ }
+ }
+ }
+ ::debug("run", "locked $self->{'lockfile'}");
+}
+
+sub unlock {
+ my $self = shift;
+ unlink $self->{'lockfile'};
+ close $self->{'lockfh'};
+ ::debug("run", "unlocked\n");
+}
+
+sub mkdir_or_die {
+ # If dir is not writable: die
+ my $dir = shift;
+ my @dir_parts = split(m:/:,$dir);
+ my ($ddir,$part);
+ while(defined ($part = shift @dir_parts)) {
+ $part eq "" and next;
+ $ddir .= "/".$part;
+ -d $ddir and next;
+ mkdir $ddir;
+ }
+ if(not -w $dir) {
+ ::error("Cannot write to $dir: $!\n");
+ ::wait_and_exit(255);
+ }
+}
+
+# Keep perl -w happy
+$opt::ctrlc = $opt::x = $Semaphore::timeout = $Semaphore::wait =
+$opt::ignored_option = $Job::file_descriptor_warning_printed =
+$Global::envdef = 0;
diff --git a/sql b/sql
new file mode 100755
index 0000000..0b4634d
--- /dev/null
+++ b/sql
@@ -0,0 +1,1109 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+sql - execute a command on a database determined by a dburl
+
+=head1 SYNOPSIS
+
+B [options] I [I]
+
+B [options] I < commandfile
+
+B<#!/usr/bin/sql> B<--shebang> [options] I
+
+=head1 DESCRIPTION
+
+GNU B aims to give a simple, unified interface for accessing
+databases through all the different databases' command line
+clients. So far the focus has been on giving a common way to specify
+login information (protocol, username, password, hostname, and port
+number), size (database and table size), and running queries.
+
+The database is addressed using a DBURL. If I are left out
+you will get that database's interactive shell.
+
+GNU B is often used in combination with GNU B.
+
+=over 9
+
+=item I
+
+A DBURL has the following syntax:
+[sql:]vendor://
+[[user][:password]@][host][:port]/[database][?sqlquery]
+
+See the section DBURL below.
+
+=item I
+
+The SQL commands to run. Each argument will have a newline
+appended.
+
+Example: "SELECT * FROM foo;" "SELECT * FROM bar;"
+
+If the arguments contain '\n' or '\x0a' this will be replaced with a
+newline:
+
+Example: "SELECT * FROM foo;\n SELECT * FROM bar;"
+
+If no commands are given SQL is read from the keyboard or STDIN.
+
+Example: echo 'SELECT * FROM foo;' | sql mysql:///
+
+
+=item B<--db-size>
+
+=item B<--dbsize>
+
+Size of database. Show the size of the database on disk. For Oracle
+this requires access to read the table I - the user
+I has that.
+
+
+=item B<--help>
+
+=item B<-h>
+
+Print a summary of the options to GNU B and exit.
+
+
+=item B<--html>
+
+HTML output. Turn on HTML tabular output.
+
+
+=item B<--show-processlist>
+
+=item B<--proclist>
+
+=item B<--listproc>
+
+Show the list of running queries.
+
+
+=item B<--show-databases>
+
+=item B<--showdbs>
+
+=item B<--list-databases>
+
+=item B<--listdbs>
+
+List the databases (table spaces) in the database.
+
+
+=item B<--show-tables>
+
+=item B<--list-tables>
+
+=item B<--table-list>
+
+List the tables in the database.
+
+
+=item B<--noheaders>
+
+=item B<--no-headers>
+
+=item B<-n>
+
+Remove headers and footers and print only tuples. Bug in Oracle: it
+still prints number of rows found.
+
+
+=item B<-p> I
+
+The string following -p will be given to the database connection
+program as arguments. Multiple -p's will be joined with
+space. Example: pass '-U' and the user name to the program:
+
+I<-p "-U scott"> can also be written I<-p -U -p scott>.
+
+
+=item B<-r>
+
+Try 3 times. Short version of I<--retries 3>.
+
+
+=item B<--retries> I
+
+Try I times. If the client program returns with an error,
+retry the command. Default is I<--retries 1>.
+
+
+=item B<--sep> I
+
+=item B<-s> I
+
+Field separator. Use I as separator between columns.
+
+
+=item B<--skip-first-line>
+
+Do not use the first line of input (used by GNU B itself
+when called with B<--shebang>).
+
+
+=item B<--table-size>
+
+=item B<--tablesize>
+
+Size of tables. Show the size of the tables in the database.
+
+
+=item B<--verbose>
+
+=item B<-v>
+
+Print which command is sent.
+
+
+=item B<--version>
+
+=item B<-V>
+
+Print the version GNU B and exit.
+
+
+=item B<--shebang>
+
+=item B<-Y>
+
+GNU B can be called as a shebang (#!) command as the first line of a script. Like this:
+
+ #!/usr/bin/sql -Y mysql:///
+
+ SELECT * FROM foo;
+
+For this to work B<--shebang> or B<-Y> must be set as the first option.
+
+=back
+
+=head1 DBURL
+
+A DBURL has the following syntax:
+[sql:]vendor://
+[[user][:password]@][host][:port]/[database][?sqlquery]
+
+To quote special characters use %-encoding specified in
+http://tools.ietf.org/html/rfc3986#section-2.1 (E.g. a password
+containing '/' would contain '%2F').
+
+Examples:
+ mysql://scott:tiger@my.example.com/mydb
+ sql:oracle://scott:tiger@ora.example.com/xe
+ postgresql://scott:tiger@pg.example.com/pgdb
+ pg:///
+ postgresqlssl://scott@pg.example.com:3333/pgdb
+ sql:sqlite2:////tmp/db.sqlite?SELECT * FROM foo;
+ sqlite3:///../db.sqlite3?SELECT%20*%20FROM%20foo;
+
+Currently supported vendors: MySQL (mysql), MySQL with SSL (mysqls,
+mysqlssl), Oracle (oracle, ora), PostgreSQL (postgresql, pg, pgsql,
+postgres), PostgreSQL with SSL (postgresqlssl, pgs, pgsqlssl,
+postgresssl, pgssl, postgresqls, pgsqls, postgress), SQLite2 (sqlite,
+sqlite2), SQLite3 (sqlite3).
+
+Aliases must start with ':' and are read from
+/etc/sql/aliases and ~/.sql/aliases. The user's own
+~/.sql/aliases should only be readable by the user.
+
+Example of aliases:
+
+ :myalias1 pg://scott:tiger@pg.example.com/pgdb
+ :myalias2 ora://scott:tiger@ora.example.com/xe
+ # Short form of mysql://`whoami`:nopassword@localhost:3306/`whoami`
+ :myalias3 mysql:///
+ # Short form of mysql://`whoami`:nopassword@localhost:33333/mydb
+ :myalias4 mysql://:33333/mydb
+ # Alias for an alias
+ :m :myalias4
+ # the sortest alias possible
+ : sqlite2:////tmp/db.sqlite
+ # Including an SQL query
+ :query sqlite:////tmp/db.sqlite?SELECT * FROM foo;
+
+=head1 EXAMPLES
+
+=head2 Get an interactive prompt
+
+The most basic use of GNU B is to get an interactive prompt:
+
+B
+
+If you have setup an alias you can do:
+
+B
+
+
+=head2 Run a query
+
+To run a query directly from the command line:
+
+B
+
+Oracle requires newlines after each statement. This can be done like
+this:
+
+B
+
+Or this:
+
+B
+
+
+=head2 Copy a PostgreSQL database
+
+To copy a PostgreSQL database use pg_dump to generate the dump and GNU
+B to import it:
+
+B
+
+
+=head2 Empty all tables in a MySQL database
+
+Using GNU B it is easy to empty all tables without dropping them:
+
+B
+
+
+=head2 Drop all tables in a PostgreSQL database
+
+To drop all tables in a PostgreSQL database do:
+
+B
+
+
+=head2 Run as a script
+
+Instead of doing:
+
+B
+
+you can combine the sqlfile with the DBURL to make a
+UNIX-script. Create a script called I:
+
+B<#!/usr/bin/sql -Y mysql:///>
+
+B