From 569c4a7715146d44cbde289637f1d150a8ba1260 Mon Sep 17 00:00:00 2001 From: Abhay Rana Date: Fri, 19 Jun 2015 14:39:53 +0530 Subject: [PATCH] Installed parallel and other utils from the package --- niceload | 882 ++++++ parallel | 8698 ++++++++++++++++++++++++++++++++++++-------------- sem | 9227 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ sql | 1109 +++++++ 4 files changed, 17609 insertions(+), 2307 deletions(-) create mode 100755 niceload create mode 100755 sem create mode 100755 sql diff --git a/niceload b/niceload new file mode 100755 index 0000000..ba7af53 --- /dev/null +++ b/niceload @@ -0,0 +1,882 @@ +#!/usr/bin/perl -w + +# Copyright (C) 2004,2005,2006,2006,2008,2009,2010 Ole Tange, +# http://ole.tange.dk +# +# Copyright (C) 2010,2011,2012,2013,2014,2015 Ole Tange, +# http://ole.tange.dk and Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA + +use strict; +use Getopt::Long; +$Global::progname="niceload"; +$Global::version = 20150522; +Getopt::Long::Configure("bundling","require_order"); +get_options_from_array(\@ARGV) || die_usage(); +if($opt::version) { + version(); + exit 0; +} +if($opt::help) { + help(); + exit 0; +} +if($opt::factor and $opt::suspend) { + # You cannot have --suspend and --factor + help(); + exit; +} + +if(not (defined $opt::start_io or defined $opt::run_io + or defined $opt::start_load or defined $opt::run_load + or defined $opt::start_mem or defined $opt::run_mem + or defined $opt::start_noswap or defined $opt::run_noswap + or defined $opt::io or defined $opt::load + or defined $opt::mem or defined $opt::noswap)) { + # Default is --runload=1 + $opt::run_load = 1; +} + +if(not defined $opt::start_io) { $opt::start_io = $opt::io; } +if(not defined $opt::run_io) { $opt::run_io = $opt::io; } +if(not defined $opt::start_load) { $opt::start_load = $opt::load; } +if(not defined $opt::run_load) { $opt::run_load = $opt::load; } +if(not defined $opt::start_mem) { $opt::start_mem = $opt::mem; } +if(not defined $opt::run_mem) { $opt::run_mem = $opt::mem; } +if(not defined $opt::start_noswap) { $opt::start_noswap = $opt::noswap; } +if(not defined $opt::run_noswap) { $opt::run_noswap = $opt::noswap; } + +if(defined $opt::load) { multiply_binary_prefix($opt::load); } + +my $limit = Limit->new(); +my $process = Process->new($opt::nice,@ARGV); +$::exitstatus = 0; +if(@opt::prg) { + # Find all pids of prg + my $out = `pidof -x @opt::prg`; + $process->set_pid(split /\s+/,$out); + $::resume_process = $process; + $SIG{TERM} = $SIG{INT} = \&resume; +} elsif(@opt::pid) { + $process->set_pid(@opt::pid); + $::resume_process = $process; + $SIG{TERM} = $SIG{INT} = \&resume; +} elsif (@ARGV) { + # Wait until limit is below start_limit and run_limit + while($limit->over_start_limit() + or + ($limit->hard() and $limit->over_run_limit())) { + $limit->sleep_for_recheck(); + } + $process->start(); +} + +while($process->is_alive()) { + if($limit->over_run_limit()) { + $process->suspend(); + $limit->sleep_for_recheck(); + if(not $limit->hard()) { + $process->resume(); + $limit->sleep_while_running(); + } + } else { + $process->resume(); + $limit->sleep_while_running(); + } +} + +exit($::exitstatus); + +sub resume { + $::resume_process->resume(); + exit(0); +} + +sub uniq { + # Remove duplicates and return unique values + return keys %{{ map { $_ => 1 } @_ }}; +} + +sub multiply_binary_prefix { + # Evalualte numbers with binary prefix + # k=10^3, m=10^6, g=10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 + # K=2^10, M=2^20, G=2^30, T=2^40, P=2^50, E=2^70, Z=2^80, Y=2^80 + # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 + # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 + # 13G = 13*1024*1024*1024 = 13958643712 + my $s = shift; + $s =~ s/k/*1000/g; + $s =~ s/M/*1000*1000/g; + $s =~ s/G/*1000*1000*1000/g; + $s =~ s/T/*1000*1000*1000*1000/g; + $s =~ s/P/*1000*1000*1000*1000*1000/g; + $s =~ s/E/*1000*1000*1000*1000*1000*1000/g; + $s =~ s/Z/*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/Y/*1000*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/X/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; + + $s =~ s/Ki?/*1024/gi; + $s =~ s/Mi?/*1024*1024/gi; + $s =~ s/Gi?/*1024*1024*1024/gi; + $s =~ s/Ti?/*1024*1024*1024*1024/gi; + $s =~ s/Pi?/*1024*1024*1024*1024*1024/gi; + $s =~ s/Ei?/*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/Zi?/*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/Yi?/*1024*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/Xi?/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; + $s = eval $s; + return $s; +} + +sub get_options_from_array { + # Run GetOptions on @array + # Returns: + # true if parsing worked + # false if parsing failed + # @array is changed + my $array_ref = shift; + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + my @retval = GetOptions + ("debug|D" => \$opt::debug, + "factor|f=s" => \$opt::factor, + "hard|H" => \$opt::hard, + "soft|S" => \$opt::soft, + "sensor=s" => \$opt::sensor, + + "si|sio|startio|start-io=s" => \$opt::start_io, + "ri|rio|runio|run-io=s" => \$opt::run_io, + "io|I=s" => \$opt::io, + + "sl|startload|start-load=s" => \$opt::start_load, + "rl|runload|run-load=s" => \$opt::run_load, + "load|L|l=s" => \$opt::load, + + "sm|startmem|start-mem=s" => \$opt::start_mem, + "rm|runmem|run-mem=s" => \$opt::run_mem, + "mem|M=s" => \$opt::mem, + + "sn|startnoswap|start-noswap|start-no-swap" => \$opt::start_noswap, + "rn|runnoswap|run-noswap|run-no-swap" => \$opt::run_noswap, + "noswap|N" => \$opt::noswap, + + # niceload -l -1 --sensor 'cat /sys/class/power_supply/BAT0/status /proc/acpi/battery/BAT0/state 2>/dev/null |grep -i -q discharging; echo $?' + "battery|B" => \$opt::battery, + + "nice|n=i" => \$opt::nice, + "program|prg=s" => \@opt::prg, + "process|pid|p=s" => \@opt::pid, + "suspend|s=s" => \$opt::suspend, + "recheck|t=s" => \$opt::recheck, + "quote|q" => \$opt::quote, + "help|h" => \$opt::help, + "verbose|v" => \$opt::verbose, + "version|V" => \$opt::version, + ); + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return @retval; +} + + +sub die_usage { + help(); + exit 1; +} + + +sub help { + print q{ +Usage: + niceload [-v] [-n niceness] [-L loadavg] [-I io] [-N] [-M mem] + [-s suspend_sec|-f factor] [-H] [-S] + command or -p pid +}; +} + + +sub die_bug { + my $bugid = shift; + print STDERR + ("$Global::progname: This should not happen. You have found a bug.\n", + "Please contact and include:\n", + "* The version number: $Global::version\n", + "* The bugid: $bugid\n", + "* The command line being run\n", + "* The files being read (put the files on a webserver if they are big)\n", + "\n", + "If you get the error on smaller/fewer files, please include those instead.\n"); + exit(255); +} + + +sub usleep { + # Sleep this many milliseconds. + my $secs = shift; + ::debug("Sleeping ",$secs," millisecs\n"); + select(undef, undef, undef, $secs/1000); +} + + +sub debug { + if($opt::debug) { + print STDERR @_; + } +} + + +sub my_dump { + # Returns: + # ascii expression of object if Data::Dump(er) is installed + # error code otherwise + my @dump_this = (@_); + eval "use Data::Dump qw(dump);"; + if ($@) { + # Data::Dump not installed + eval "use Data::Dumper;"; + if ($@) { + my $err = "Neither Data::Dump nor Data::Dumper is installed\n". + "Not dumping output\n"; + print STDERR $err; + return $err; + } else { + return Dumper(@dump_this); + } + } else { + eval "use Data::Dump qw(dump);"; + return (Data::Dump::dump(@dump_this)); + } +} + + +sub version { + # Returns: N/A + print join("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2004,2005,2006,2007,2008,2009 Ole Tange", + "Copyright (C) 2010,2011 Ole Tange and Free Software Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later ", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: http://www.gnu.org/software/parallel\n" + ); +} + + +sub max { + # Returns: + # Maximum value of array + my $max; + for (@_) { + # Skip undefs + defined $_ or next; + defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef + $max = ($max > $_) ? $max : $_; + } + return $max; +} + +sub min { + # Returns: + # Minimum value of array + my $min; + for (@_) { + # Skip undefs + defined $_ or next; + defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef + $min = ($min < $_) ? $min : $_; + } + return $min; +} + + +package Process; + +sub new { + my $class = shift; + my $nice = shift; + my @ARGV = @_; + if($nice) { + unshift(@ARGV, "nice", "-n", $nice); + } + return bless { + 'running' => 0, # Is the process running now? + 'command' => [@ARGV], + }, ref($class) || $class; +} + +sub pgrp { + my $self = shift; + my @pgrp; + if(not $self->{'pgrp'}) { + for(@{$self->{'pids'}}) { + push @pgrp,-getpgrp($_); + } + @pgrp = ::uniq(@pgrp); + @{$self->{'pgrp'}} = @pgrp; + } + return @{$self->{'pgrp'}}; +} + +sub set_pid { + my $self = shift; + push(@{$self->{'pids'}},@_); + $self->{'running'} = 1; + $::exitstatus = 0; +} + +sub start { + # Start the program + my $self = shift; + ::debug("Starting @{$self->{'command'}}\n"); + $self->{'running'} = 1; + if($self->{'pid'} = fork) { + # set signal handler to kill children if parent is killed + push @{$self->{'pids'}}, $self->{'pid'}; + $Global::process = $self; + $SIG{CHLD} = \&REAPER; + $SIG{INT}=\&kill_child_INT; + $SIG{TSTP}=\&kill_child_TSTP; + $SIG{CONT}=\&kill_child_CONT; + sleep 1; # Give child time to setpgrp(0,0); + } else { + setpgrp(0,0); + ::debug("Child pid: $$, pgrp: ",getpgrp $$,"\n"); + ::debug("@{$self->{'command'}}\n"); + if($opt::quote) { + system(@{$self->{'command'}}); + } else { + system("@{$self->{'command'}}"); + } + $::exitstatus = $? >> 8; + $::exitsignal = $? & 127; + ::debug("Child exit $::exitstatus\n"); + exit($::exitstatus); + } +} + +use POSIX ":sys_wait_h"; +use POSIX qw(:sys_wait_h); + +sub REAPER { + my $stiff; + while (($stiff = waitpid(-1, &WNOHANG)) > 0) { + # do something with $stiff if you want + $::exitstatus = $? >> 8; + $::exitsignal = $? & 127; + } + $SIG{CHLD} = \&REAPER; # install *after* calling waitpid +} + + +sub kill_child_CONT { + my $self = $Global::process; + ::debug("SIGCONT received. Killing @{$self->{'pgrp'}}\n"); + kill CONT => $self->pgrp(); +} + + +sub kill_child_TSTP { + my $self = $Global::process; + ::debug("SIGTSTP received. Killing $self->{'pid'} and self ($$)\n"); + kill TSTP => $self->pgrp(); + kill STOP => -$$; + kill STOP => $$; +} + + +sub kill_child_INT { + my $self = $Global::process; + ::debug("SIGINT received.\n"); + if(not @opt::pid) { + ::debug("Killing $self->{'pid'} Exit\n"); + kill INT => $self->pgrp(); + } else { + ::debug("Continue pids $self->{'pid'} Exit\n"); + kill CONT => $self->pgrp(); + } + exit; +} + + +sub resume { + my $self = shift; + ::debug("Resume @{$self->{'pids'}}\n"); + if(not $self->{'running'}) { + # - = PID group + map { kill "CONT", -$_ } @{$self->{'pids'}}; + # If using -p it is not in a group + map { kill "CONT", $_ } @{$self->{'pids'}}; + $self->{'running'} = 1; + } +} + + +sub suspend { + my $self = shift; + ::debug("Suspend @{$self->{'pids'}}\n"); + if($self->{'running'}) { + # - = PID group + map { kill "STOP", -$_ } @{$self->{'pids'}}; + # If using -p it is not in a group + map { kill "STOP", $_ } @{$self->{'pids'}}; + $self->{'running'} = 0; + } +} + + +sub is_alive { + # The process is dead if none of the pids exist + my $self = shift; + my ($exists) = 0; + for my $pid (@{$self->{'pids'}}) { + if(kill 0 => $pid) { $exists++ } + } + ::debug("is_alive: $exists\n"); + return $exists; +} + + +package Limit; + +sub new { + my $class = shift; + my %limits = @_; + my $hard = $opt::soft ? 0 : $opt::hard; + my $runio = $opt::run_io ? ::multiply_binary_prefix($opt::run_io) : 0; + my $startio = $opt::start_io ? ::multiply_binary_prefix($opt::start_io) : 0; + my $runload = $opt::run_load ? ::multiply_binary_prefix($opt::run_load) : 0; + my $startload = $opt::start_load ? ::multiply_binary_prefix($opt::start_load) : 0; + my $runmem = $opt::run_mem ? ::multiply_binary_prefix($opt::run_mem) : 0; + my $startmem = $opt::start_mem ? ::multiply_binary_prefix($opt::start_mem) : 0; + my $runnoswap = $opt::run_noswap ? ::multiply_binary_prefix($opt::run_noswap) : 0; + my $startnoswap = $opt::start_noswap ? ::multiply_binary_prefix($opt::start_noswap) : 0; + my $recheck = $opt::recheck ? ::multiply_binary_prefix($opt::recheck) : 1; # Default + my $runtime = $opt::suspend ? ::multiply_binary_prefix($opt::suspend) : 1; # Default + + return bless { + 'hard' => $hard, + 'recheck' => $recheck, + 'runio' => $runio, + 'startio' => $startio, + 'runload' => $runload, + 'startload' => $startload, + 'runmem' => $runmem, + 'startmem' => $startmem, + 'runnoswap' => $runnoswap, + 'startnoswap' => $startnoswap, + 'factor' => $opt::factor || 1, + 'recheck' => $recheck, + 'runtime' => $runtime, + 'over_run_limit' => 1, + 'over_start_limit' => 1, + 'verbose' => $opt::verbose, + }, ref($class) || $class; +} + + +sub over_run_limit { + my $self = shift; + my $status = 0; + if($self->{'runmem'}) { + # mem should be between 0-10ish + # 100% available => 0 (1-1) + # 50% available => 1 (2-1) + # 10% available => 9 (10-1) + my $mem = $self->mem_status(); + ::debug("Run memory: $self->{'runmem'}/$mem\n"); + $status += (::max(1,$self->{'runmem'}/$mem)-1); + } + if($self->{'runload'}) { + # If used with other limits load should be between 0-10ish + no warnings 'numeric'; + my $load = $self->load_status(); + if($self->{'runload'} > 0) { + # Stop if the load is above the limit + $status += ::max(0,$load - $self->{'runload'}); + } else { + # Stop if the load is below the limit (for sensor) + $status += ::max(0,-$load - $self->{'runload'}); + } + } + if($self->{'runnoswap'}) { + # swap should be between 0-10ish + # swap in or swap out or no swap = 0 + # else log(swapin*swapout) + my $swap = $self->swap_status(); + $status += log(::max(1, $swap - $self->{'runnoswap'})); + } + if($self->{'runio'}) { + my $io = $self->io_status(); + $status += ::max(0,$io - $self->{'runio'}); + } + $self->{'over_run_limit'} = $status; + if(not $opt::recheck) { + $self->{'recheck'} = $self->{'factor'} * $self->{'over_run_limit'}; + } + ::debug("over_run_limit: $status\n"); + return $self->{'over_run_limit'}; +} + +sub over_start_limit { + my $self = shift; + my $status = 0; + if($self->{'startmem'}) { + # mem should be between 0-10ish + # 100% available => 0 (1-1) + # 50% available => 1 (2-1) + # 10% available => 9 (10-1) + my $mem = $self->mem_status(); + ::debug("Start memory: $self->{'startmem'}/$mem\n"); + $status += (::max(1,$self->{'startmem'}/$mem)-1); + } + if($self->{'startload'}) { + # load should be between 0-10ish + # 0 load => 0 + no warnings 'numeric'; + my $load = $self->load_status(); + if($self->{'startload'} > 0) { + # Stop if the load is above the limit + $status += ::max(0,$load - $self->{'startload'}); + } else { + # Stop if the load is below the limit (for sensor) + $status += ::max(0,-$load - $self->{'startload'}); + } + } + if($self->{'startnoswap'}) { + # swap should be between 0-10ish + # swap in or swap out or no swap = 0 + # else log(swapin*swapout) + my $swap = $self->swap_status(); + $status += log(::max(1, $swap - $self->{'startnoswap'})); + } + if($self->{'startio'}) { + my $io = $self->io_status(); + $status += ::max(0,$io - $self->{'startio'}); + } + $self->{'over_start_limit'} = $status; + if(not $opt::recheck) { + $self->{'recheck'} = $self->{'factor'} * $self->{'over_start_limit'}; + } + ::debug("over_start_limit: $status\n"); + return $self->{'over_start_limit'}; +} + + +sub hard { + my $self = shift; + return $self->{'hard'}; +} + + +sub verbose { + my $self = shift; + return $self->{'verbose'}; +} + + +sub sleep_for_recheck { + my $self = shift; + if($self->{'recheck'} < 0.5) { + # Never sleep less than 0.5 sec + $self->{'recheck'} = 0.5; + } + if($self->verbose()) { + $self->{'recheck'} = int($self->{'recheck'}*100)/100; + print STDERR "Sleeping $self->{'recheck'}s\n"; + } + ::debug("recheck in $self->{'recheck'}s\n"); + ::usleep(1000*$self->{'recheck'}); +} + + +sub sleep_while_running { + my $self = shift; + ::debug("check in $self->{'runtime'}s\n"); + if($self->verbose()) { + $self->{'runtime'} = int($self->{'runtime'}*100)/100; + print STDERR "Running $self->{'runtime'}s\n"; + } + ::usleep(1000*$self->{'runtime'}); +} + + +sub nonblockGetLines { + # An non-blocking filehandle read that returns an array of lines read + # Returns: ($eof,@lines) + # Example: --sensor 'vmstat 1 | perl -ane '\''$|=1; 4..0 and print $F[8],"\n"'\' + my ($fh,$timeout) = @_; + + $timeout = 0 unless defined $timeout; + my $rfd = ''; + $::nonblockGetLines_last{$fh} = '' + unless defined $::nonblockGetLines_last{$fh}; + + vec($rfd,fileno($fh),1) = 1; + return unless select($rfd, undef, undef, $timeout)>=0; + # I'm not sure the following is necessary? + return unless vec($rfd,fileno($fh),1); + my $buf = ''; + my $n = sysread($fh,$buf,1024*1024); + + my $eof = eof($fh); + # If we're done, make sure to send the last unfinished line + return ($eof,$::nonblockGetLines_last{$fh}) unless $n; + # Prepend the last unfinished line + $buf = $::nonblockGetLines_last{$fh}.$buf; + # And save any newly unfinished lines + $::nonblockGetLines_last{$fh} = + (substr($buf,-1) !~ /[\r\n]/ && $buf =~ s/([^\r\n]*)$//) + ? $1 : ''; + $buf ? ($eof,split(/\n/,$buf)) : ($eof); +} + +sub read_sensor { + my $self = shift; + ::debug("read_sensor"); + my $fh = $self->{'sensor_fh'}; + if(not $fh) { + # Start the sensor + open($fh, "-|", $opt::sensor) || ::die_bug("Cannot open: $opt::sensor"); + $self->{'sensor_fh'} = $fh; + } + # Read as much as we can (non_block) + my ($eof,@lines) = nonblockGetLines($fh); + + # new load = last full line + foreach my $line (@lines) { + if(defined $line) { + ::debug("Pipe saw: $eof [$line]\n"); + $Global::last_sensor_reading = $line; + } + } + if($eof) { + # End of file => Restart the sensor + close $fh; + open($fh, "-|", $opt::sensor) || ::die_bug("Cannot open: $opt::sensor"); + $self->{'sensor_fh'} = $fh; + } + + return $Global::last_sensor_reading; +} + +sub load_status { + # Returns: + # loadavg or sensor measurement + my $self = shift; + + if($opt::sensor) { + if(not defined $self->{'load_status'} or + $self->{'load_status_cache_time'} + $self->{'recheck'} < time) { + $self->{'load_status'} = $self->read_sensor(); + while (not defined $self->{'load_status'}) { + sleep 1; + $self->{'load_status'} = $self->read_sensor(); + } + $self->{'load_status_cache_time'} = time - 0.001; + } + } else { + # Normal load avg + # Cache for some seconds + if(not defined $self->{'load_status'} or + $self->{'load_status_cache_time'} + $self->{'recheck'} < time) { + $self->{'load_status'} = load_status_linux() if $^O ne 'darwin'; + $self->{'load_status'} = load_status_darwin() if $^O eq 'darwin'; + $self->{'load_status_cache_time'} = time; + } + } + ::debug("load_status: ".$self->{'load_status'}."\n"); + return $self->{'load_status'}; +} + +sub undef_as_zero { + my $a = shift; + return $a ? $a : 0; +} + + +sub load_status_linux { + my ($loadavg); + if(open(IN,"/proc/loadavg")) { + # Linux specific (but fast) + my $upString = ; + if($upString =~ m/^(\d+\.\d+)/) { + $loadavg = $1; + } else { + ::die_bug("proc_loadavg"); + } + close IN; + } elsif (open(IN,"uptime|")) { + my $upString = ; + if($upString =~ m/averages?.\s*(\d+\.\d+)/) { + $loadavg = $1; + } else { + ::die_bug("uptime"); + } + close IN; + } + return $loadavg; +} + +sub load_status_darwin { + my $loadavg = `sysctl vm.loadavg`; + if($loadavg =~ /vm\.loadavg: { ([0-9.]+) ([0-9.]+) ([0-9.]+) }/) { + $loadavg = $1; + } elsif (open(IN,"LANG=C uptime|")) { + my $upString = ; + if($upString =~ m/averages?.\s*(\d+\.\d+)/) { + $loadavg = $1; + } else { + ::die_bug("uptime"); + } + close IN; + } + return $loadavg; +} + + +sub swap_status { + # Returns: + # (swap in)*(swap out) kb + my $self = shift; + my $status; + # Cache for some seconds + if(not defined $self->{'swap_status'} or + $self->{'swap_status_cache_time'}+$self->{'recheck'} < time) { + $status = swap_status_linux() if $^O ne 'darwin'; + $status = swap_status_darwin() if $^O eq 'darwin'; + $self->{'swap_status'} = ::max($status,0); + $self->{'swap_status_cache_time'} = time; + } + ::debug("swap_status: $self->{'swap_status'}\n"); + return $self->{'swap_status'}; +} + + +sub swap_status_linux { + my $swap_activity; + $swap_activity = "vmstat 1 2 | tail -n1 | awk '{print \$7*\$8}'"; + # Run swap_activity measuring. + return qx{ $swap_activity }; +} + +sub swap_status_darwin { + # Mach Virtual Memory Statistics: (page size of 4096 bytes, cache hits 0%) + # free active spec inactive wire faults copy 0fill reactive pageins pageout + # 298987 251463 162637 69437 265724 29730558 299022 2308237 1 110058 0 + # 298991 251479 162637 69437 265726 43 4 16 0 0 0 + my ($pagesize, $pageins, $pageouts); + my @vm_stat = `vm_stat 1 | head -n4`; + $pagesize = $1 if $vm_stat[0] =~ m/page size of (\d+) bytes/; + $pageins = (split(/\s+/,$vm_stat[3]))[9]; + $pageouts = (split(/\s+/,$vm_stat[3]))[10]; + return ($pageins*$pageouts*$pagesize)/1024; +} + + +sub mem_status { + # Returns: + # number of bytes (free+cache) + my $self = shift; + # Cache for one second + if(not defined $self->{'mem_status'} or + $self->{'mem_status_cache_time'}+$self->{'recheck'} < time) { + $self->{'mem_status'} = mem_status_linux() if $^O ne 'darwin'; + $self->{'mem_status'} = mem_status_darwin() if $^O eq 'darwin'; + $self->{'mem_status_cache_time'} = time; + } + ::debug("mem_status: $self->{'mem_status'}\n"); + return $self->{'mem_status'}; +} + + +sub mem_status_linux { + # total used free shared buffers cached + # Mem: 3366496 2901664 464832 0 179228 1850692 + # -/+ buffers/cache: 871744 2494752 + # Swap: 6445476 1396860 5048616 + my @free = `free`; + my $free = (split(/\s+/,$free[2]))[3]; + return $free*1024; +} + +sub mem_status_darwin { + # Mach Virtual Memory Statistics: (page size of 4096 bytes, cache hits 0%) + # free active spec inactive wire faults copy 0fill reactive pageins pageout + # 298987 251463 162637 69437 265724 29730558 299022 2308237 1 110058 0 + # 298991 251479 162637 69437 265726 43 4 16 0 0 0 + my ($pagesize, $pages_free, $pages_speculative); + my @vm_stat = `vm_stat 1 | head -n4`; + $pagesize = $1 if $vm_stat[0] =~ m/page size of (\d+) bytes/; + $pages_free = (split(/\s+/,$vm_stat[3]))[0]; + $pages_speculative = (split(/\s+/,$vm_stat[3]))[2]; + return ($pages_free+$pages_speculative)*$pagesize; +} + + +sub io_status { + # Returns: + # max percent for all devices + my $self = shift; + # Cache for one second + if(not defined $self->{'io_status'} or + $self->{'io_status_cache_time'}+$self->{'recheck'} < time) { + $self->{'io_status'} = io_status_linux() if $^O ne 'darwin'; + $self->{'io_status'} = io_status_darwin() if $^O eq 'darwin'; + $self->{'io_status_cache_time'} = time; + } + ::debug("io_status: $self->{'io_status'}\n"); + return $self->{'io_status'}; +} + + +sub io_status_linux { + # Device: rrqm/s wrqm/s r/s w/s rkB/s wkB/s avgrq-sz avgqu-sz await r_await w_await svctm %util + # sda 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 + my @iostat_out = `LANG=C iostat -x 1 2`; + # throw away all execpt the last Device:-section + my @iostat; + for(reverse @iostat_out) { + /Device:/ and last; + push @iostat, (split(/\s+/,$_))[13]; + } + my $io = ::max(@iostat); + return undef_as_zero($io)/10; +} + +sub io_status_darwin { + # disk0 disk1 disk2 + # KB/t tps MB/s KB/t tps MB/s KB/t tps MB/s + # 14.95 15 0.22 11.18 35 0.38 2.00 0 0.00 + # 0.00 0 0.00 0.00 0 0.00 0.00 0 0.00 + my @iostat_out = `LANG=C iostat -d -w 1 -c 2`; + # return the MB/s of the last second (not the %util) + my @iostat = split(/\s+/, $iostat_out[3]); + my $io = $iostat[3] + $iostat[6] + $iostat[9]; + return ::min($io, 10); +} + +$::exitsignal = $::exitstatus = $opt::battery = 0; # Dummy diff --git a/parallel b/parallel index c207889..9a92e45 100755 --- a/parallel +++ b/parallel @@ -1,7 +1,7 @@ -#!/usr/bin/perl -w +#!/usr/bin/env perl -# Copyright (C) 2007,2008,2009,2010,2011,2012 Ole Tange and Free Software -# Foundation, Inc. +# Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange +# and Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -26,124 +26,102 @@ use POSIX qw(:sys_wait_h setsid ceil :errno_h); use Symbol qw(gensym); # tempfile used in Job::start use File::Temp qw(tempfile tempdir); +# mkpath used in openresultsfile +use File::Path; # GetOptions used in get_options_from_array use Getopt::Long; # Used to ensure code quality use strict; +use File::Basename; -$::oodebug=0; -$SIG{TERM} ||= sub { exit 0; }; # $SIG{TERM} is not set on Mac OS X -if(not $ENV{SHELL}) { - # $ENV{SHELL} is sometimes not set on Mac OS X - print STDERR ("parallel: Warning: \$SHELL not set. Using /bin/sh\n"); - $ENV{SHELL} = "/bin/sh"; -} -%Global::original_sig = %SIG; -$SIG{TERM} = sub {}; # Dummy until jobs really start -open $Global::original_stderr, ">&STDERR" or ::die_bug("Can't dup STDERR: $!"); - +save_stdin_stdout_stderr(); +save_original_signal_handler(); parse_options(); +::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n"); my $number_of_args; if($Global::max_number_of_args) { $number_of_args=$Global::max_number_of_args; -} elsif ($::opt_X or $::opt_m) { +} elsif ($opt::X or $opt::m or $opt::xargs) { $number_of_args = undef; } else { $number_of_args = 1; } -my $command = ""; -if(@ARGV) { - if($Global::quoting) { - $command = shell_quote(@ARGV); - } else { - $command = join(" ", @ARGV); +my @command = @ARGV; + +my @input_source_fh; +if($opt::pipepart) { + @input_source_fh = map { open_or_exit($_) } "/dev/null"; +} else { + @input_source_fh = map { open_or_exit($_) } @opt::a; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); } } -my @fhlist; -@fhlist = map { open_or_exit($_) } @::opt_a; -if(not @fhlist) { - @fhlist = (*STDIN); -} -if($::opt_skip_first_line) { +if($opt::skip_first_line) { # Skip the first line for the first file handle - my $fh = $fhlist[0]; + my $fh = $input_source_fh[0]; <$fh>; } -if($::opt_header and not $::opt_pipe) { - my $fh = $fhlist[0]; +if($opt::header and not $opt::pipe) { # split with colsep or \t - # TODO should $header force $colsep = \t if undef? - my $delimiter = $::opt_colsep; + # $header force $colsep = \t if undef? + my $delimiter = $opt::colsep; + $delimiter ||= "\t"; my $id = 1; - for my $fh (@fhlist) { + for my $fh (@input_source_fh) { my $line = <$fh>; chomp($line); - ::debug("Delimiter: '$delimiter'"); + ::debug("init", "Delimiter: '$delimiter'"); for my $s (split /$delimiter/o, $line) { - ::debug("Colname: '$s'"); - $command =~ s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; + ::debug("init", "Colname: '$s'"); + # Replace {colname} with {2} + # TODO accept configurable short hands + # TODO how to deal with headers in {=...=} + for(@command) { + s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; + } + $Global::input_source_header{$id} = $s; $id++; } } +} else { + my $id = 1; + for my $fh (@input_source_fh) { + $Global::input_source_header{$id} = $id; + $id++; + } } -if($::opt_nonall or $::opt_onall) { - # Copy all @fhlist into tempfiles - my @argfiles = (); - for my $fh (@fhlist) { - my ($outfh,$name) = ::tempfile(SUFFIX => ".all"); - print $outfh (<$fh>); - close $outfh; - push @argfiles, $name; - } - if(@::opt_basefile) { setup_basefile(); } - # for each sshlogin do: - # parallel -S $sshlogin $command :::: @argfiles - # - # Pass some of the options to the sub-parallels, not all of them as - # -P should only go to the first, and -S should not be copied at all. - my $options = - join(" ", - ((defined $::opt_P) ? "-P $::opt_P" : ""), - ((defined $::opt_u) ? "-u" : ""), - ((defined $::opt_group) ? "-g" : ""), - ((defined $::opt_D) ? "-D" : ""), - ); - my $suboptions = - join(" ", - ((defined $::opt_u) ? "-u" : ""), - ((defined $::opt_group) ? "-g" : ""), - ((defined $::opt_colsep) ? "--colsep ".shell_quote($::opt_colsep) : ""), - ((defined @::opt_v) ? "-vv" : ""), - ((defined $::opt_D) ? "-D" : ""), - ((defined $::opt_timeout) ? "--timeout ".$::opt_timeout : ""), - ); - ::debug("| parallel"); - open(PARALLEL,"| $0 $options") || - ::die_bug("This does not run GNU Parallel: $0 $options"); - for my $sshlogin (values %Global::host) { - print PARALLEL "$0 $suboptions -j1 ". - ((defined $::opt_tag) ? - "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""). - " -S ". shell_quote_scalar($sshlogin->string())." ". - shell_quote_scalar($command)." :::: @argfiles\n"; - } - close PARALLEL; - $Global::exitstatus = $? >> 8; - debug("--onall exitvalue ",$?); - if(@::opt_basefile) { cleanup_basefile(); } - unlink(@argfiles); +if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { + # Parallel check all hosts are up. Remove hosts that are down + filter_hosts(); +} + +if($opt::nonall or $opt::onall) { + onall(\@input_source_fh,@command); wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); } +# TODO --transfer foo/./bar --cleanup +# multiple --transfer and --basefile with different /./ + $Global::JobQueue = JobQueue->new( - $command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files); -if($::opt_eta) { - # Count the number of jobs before starting any + \@command,\@input_source_fh,$Global::ContextReplace,$number_of_args,\@Global::ret_files); + +if($opt::eta or $opt::bar or $opt::shuf) { + # Count the number of jobs or shuffle all jobs + # before starting any $Global::JobQueue->total_jobs(); } +if($opt::pipepart) { + @Global::cat_partials = map { pipe_part_files($_) } @opt::a; + # Unget the command as many times as there are parts + $Global::JobQueue->{'commandlinequeue'}->unget( + map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials + ); +} for my $sshlogin (values %Global::host) { $sshlogin->max_jobs_running(); } @@ -155,244 +133,545 @@ if($Global::semaphore) { } $SIG{TERM} = \&start_no_new_jobs; start_more_jobs(); -if($::opt_pipe) { - spreadstdin(@fhlist); +if(not $opt::pipepart) { + if($opt::pipe) { + spreadstdin(); + } } -::debug("Start draining\n"); +::debug("init", "Start draining\n"); drain_job_queue(); -::debug("Done draining\n"); +::debug("init", "Done draining\n"); reaper(); +::debug("init", "Done reaping\n"); +if($opt::pipe and @opt::a) { + for my $job (@Global::tee_jobs) { + unlink $job->fh(2,"name"); + $job->set_fh(2,"name",""); + $job->print(); + unlink $job->fh(1,"name"); + } +} +::debug("init", "Cleaning\n"); cleanup(); if($Global::semaphore) { $sem->release(); } -if($::opt_halt_on_error) { - wait_and_exit($Global::halt_on_error_exitstatus); +for(keys %Global::sshmaster) { + # If 'ssh -M's are running: kill them + kill "TERM", $_; +} +::debug("init", "Halt\n"); +if($opt::halt) { + wait_and_exit($Global::halt_exitstatus); } else { wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); } sub __PIPE_MODE__ {} -sub spreadstdin { - # read a record - # Spawn a job and print the record to it. - my @fhlist = @_; # Filehandles to read from (Defaults to STDIN) - my $record; +sub pipe_part_files { + # Input: + # $file = the file to read + # Returns: + # @commands that will cat_partial each part + my ($file) = @_; my $buf = ""; + my $header = find_header(\$buf,open_or_exit($file)); + # find positions + my @pos = find_split_positions($file,$opt::blocksize,length $header); + # Make @cat_partials + my @cat_partials = (); + for(my $i=0; $i<$#pos; $i++) { + push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]); + } + # Remote exec should look like: + # ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\) + # ssh -tt not allowed. Remote will die due to broken pipe anyway. + return @cat_partials; +} + +sub find_header { + # Input: + # $buf_ref = reference to read-in buffer + # $fh = filehandle to read from + # Uses: + # $opt::header + # $opt::blocksize + # Returns: + # $header string + my ($buf_ref, $fh) = @_; my $header = ""; - if($::opt_header) { - my $non_greedy_regexp = $::opt_header; - # ? , * , + , {} => ?? , *? , +? , {}? - $non_greedy_regexp =~ s/(\?|\*|\+|\})/$1\?/g; - while(read(STDIN,substr($buf,length $buf,0),$::opt_blocksize)) { - if($buf=~s/^(.*?$non_greedy_regexp)//) { - $header = $1; last; + if($opt::header) { + if($opt::header eq ":") { $opt::header = "(.*\n)"; } + # Number = number of lines + $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; + while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) { + if($$buf_ref=~s/^($opt::header)//) { + $header = $1; + last; } } } - my ($recstart,$recend,$recerror); - if(defined($::opt_recstart) and defined($::opt_recend)) { + return $header; +} + +sub find_split_positions { + # Input: + # $file = the file to read + # $block = (minimal) --block-size of each chunk + # $headerlen = length of header to be skipped + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # @positions of block start/end + my($file, $block, $headerlen) = @_; + my $size = -s $file; + $block = int $block; + # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 + # The optimal dd blocksize for freebsd = 2^15..2^17 + my $dd_block_size = 131072; # 2^17 + my @pos; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $fh = ::open_or_exit($file); + push(@pos,$headerlen); + for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) { + my $buf; + seek($fh, $pos, 0) || die; + while(read($fh,substr($buf,length $buf,0),$dd_block_size)) { + if($opt::regexp) { + # If match /$recend$recstart/ => Record position + if($buf =~ /^(.*$recend)$recstart/os) { + # Start looking for next record _after_ this match + $pos += length($1); + push(@pos,$pos); + last; + } + } else { + # If match $recend$recstart => Record position + my $i = index64(\$buf,$recendrecstart); + if($i != -1) { + # Start looking for next record _after_ this match + $pos += $i + length($recendrecstart); + push(@pos,$pos); + last; + } + } + } + } + push(@pos,$size); + close $fh; + return @pos; +} + +sub cat_partial { + # Input: + # $file = the file to read + # ($start, $end, [$start2, $end2, ...]) = start byte, end byte + # Returns: + # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout + my($file, @start_end) = @_; + my($start, $i); + # Convert start_end to start_len + my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end; + return "<". shell_quote_scalar($file) . + q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } . + " @start_len"; +} + +sub spreadstdin { + # read a record + # Spawn a job and print the record to it. + # Uses: + # $opt::blocksize + # STDIN + # $opt::r + # $Global::max_lines + # $Global::max_number_of_args + # $opt::regexp + # $Global::start_no_new_jobs + # $opt::roundrobin + # %Global::running + # Returns: N/A + + my $buf = ""; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $chunk_number = 1; + my $one_time_through; + my $two_gb = 2**31-1; + my $blocksize = $opt::blocksize; + my $in = *STDIN; + my $header = find_header(\$buf,$in); + while(1) { + my $anything_written = 0; + if(not read($in,substr($buf,length $buf,0),$blocksize)) { + # End-of-file + $chunk_number != 1 and last; + # Force the while-loop once if everything was read by header reading + $one_time_through++ and last; + } + if($opt::r) { + # Remove empty lines + $buf =~ s/^\s*\n//gm; + if(length $buf == 0) { + next; + } + } + if($Global::max_lines and not $Global::max_number_of_args) { + # Read n-line records + my $n_lines = $buf =~ tr/\n/\n/; + my $last_newline_pos = rindex64(\$buf,"\n"); + while($n_lines % $Global::max_lines) { + $n_lines--; + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + } + # Chop at $last_newline_pos as that is where n-line record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } elsif($opt::regexp) { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + # -L -N => (start..*?end){n*l} + my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); + while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) { + # Copy to modifiable variable + my $b = $1; + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$b, + $recstart,$recend,length $1); + } + } else { + # Find the last recend-recstart in $buf + if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { + # Copy to modifiable variable + my $b = $1; + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$b, + $recstart,$recend,length $1); + } + } + } else { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + my $i = 0; + my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); + while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } else { + # Find the last recend+recstart in $buf + my $i = rindex64(\$buf,$recendrecstart); + if($i != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + } + if(not $anything_written and not eof($in)) { + # Nothing was written - maybe the block size < record size? + # Increase blocksize exponentially up to 2GB-1 (2GB causes problems) + if($blocksize < $two_gb) { + my $old_blocksize = $blocksize; + $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb); + ::warning("A record was longer than $old_blocksize. " . + "Increasing to --blocksize $blocksize\n"); + } + } + } + ::debug("init", "Done reading input\n"); + + # If there is anything left in the buffer write it + write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf); + + $Global::start_no_new_jobs ||= 1; + if($opt::roundrobin) { + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + my %incomplete_jobs = %Global::running; + my $sleep = 1; + while(keys %incomplete_jobs) { + my $something_written = 0; + for my $pid (keys %incomplete_jobs) { + my $job = $incomplete_jobs{$pid}; + if($job->stdin_buffer_length()) { + $something_written += $job->non_block_write(); + } else { + delete $incomplete_jobs{$pid} + } + } + if($something_written) { + $sleep = $sleep/2+0.001; + } + $sleep = ::reap_usleep($sleep); + } + } +} + +sub recstartrecend { + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # $recstart,$recend with default values and regexp conversion + my($recstart,$recend); + if(defined($opt::recstart) and defined($opt::recend)) { # If both --recstart and --recend is given then both must match - $recstart = $::opt_recstart; - $recend = $::opt_recend; - $recerror = "parallel: Warning: --recend and --recstart unmatched. Is --blocksize too small?"; - } elsif(defined($::opt_recstart)) { + $recstart = $opt::recstart; + $recend = $opt::recend; + } elsif(defined($opt::recstart)) { # If --recstart is given it must match start of record - $recstart = $::opt_recstart; + $recstart = $opt::recstart; $recend = ""; - $recerror = "parallel: Warning: --recstart unmatched. Is --blocksize too small?"; - } elsif(defined($::opt_recend)) { + } elsif(defined($opt::recend)) { # If --recend is given then it must match end of record $recstart = ""; - $recend = $::opt_recend; - $recerror = "parallel: Warning: --recend unmatched. Is --blocksize too small?"; + $recend = $opt::recend; } - if($::opt_regexp) { + if($opt::regexp) { # If $recstart/$recend contains '|' this should only apply to the regexp $recstart = "(?:".$recstart.")"; $recend = "(?:".$recend.")"; } else { # $recstart/$recend = printf strings (\n) - $recstart =~ s/\\([rnt'"\\])/"qq|\\$1|"/gee; - $recend =~ s/\\([rnt'"\\])/"qq|\\$1|"/gee; + $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; } - my $recendrecstart = $recend.$recstart; - # Force the while-loop once if everything was read by header reading - my $force_one_time_through = 0; - for my $in (@fhlist) { - while(!$force_one_time_through++ or read($in,substr($buf,length $buf,0),$::opt_blocksize)) { - # substr above = append to $buf - if($::opt_r) { - # Remove empty lines - $buf=~s/^\s*\n//gm; - if(length $buf == 0) { - next; - } - } - if($::opt_regexp) { - if($Global::max_number_of_args) { - # -N => (start..*?end){n} - while($buf =~ s/((?:$recstart.*?$recend){$Global::max_number_of_args})($recstart.*)$/$2/os) { - $record = $header.$1; - ::debug("Read record -N: ".length($record)."\n"); - write_record_to_pipe(\$record,$recstart,$recend); - } - } else { - # Find the last recend-recstart in $buf - if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { - $record = $header.$1; - ::debug("Matched record: ".length($record)."/".length($buf)."\n"); - write_record_to_pipe(\$record,$recstart,$recend); - } - } - } else { - if($Global::max_number_of_args) { - # -N => (start..*?end){n} - my $i = 0; - while(($i = nindex(\$buf,$recendrecstart,$Global::max_number_of_args)) != -1) { - $i += length $recend; # find the actual splitting location - my $record = $header.substr($buf,0,$i); - substr($buf,0,$i) = ""; - ::debug("Read record: ".length($record)."\n"); - write_record_to_pipe(\$record,$recstart,$recend); - } - } else { - # Find the last recend-recstart in $buf - my $i = rindex($buf,$recendrecstart); - if($i != -1) { - $i += length $recend; # find the actual splitting location - my $record = $header.substr($buf,0,$i); - substr($buf,0,$i) = ""; - # ::debug("Read record: ".length($record)."\n"); - write_record_to_pipe(\$record,$recstart,$recend); - } - } - } - } -} - - # If there is anything left in the buffer write it - substr($buf,0,0) = $header; - write_record_to_pipe(\$buf,$recstart,$recend); - - ::debug("Done reading input\n"); - flush_and_close_pipes(); - ::debug("Done flushing to children\n"); - $Global::start_no_new_jobs = 1; + return ($recstart,$recend); } sub nindex { # See if string is in buffer N times # Returns: # the position where the Nth copy is found - my $buf_ref = shift; - my $str = shift; - my $n = shift; + my ($buf_ref, $str, $n) = @_; my $i = 0; + my $two_gb = 2**31-1; for(1..$n) { - $i = index($$buf_ref,$str,$i+1); + $i = index64($buf_ref,$str,$i+1); if($i == -1) { last } } return $i; } -sub flush_and_close_pipes { - # Flush that that is cached to the open pipes - # and close them. - my $flush_done; - my $sleep = 0.05; - do { - $flush_done = 1; - # Make sure everything is written to the jobs - for my $job (values %Global::running) { - if($job->remaining()) { - if($job->complete_write()) { - # Some data was written - reset sleep timer - $sleep = 0.05; - } - $flush_done = 0; +{ + my @robin_queue; + + sub round_robin_write { + # Input: + # $header_ref = ref to $header string + # $block_ref = ref to $block to be written + # $recstart = record start string + # $recend = record end string + # $endpos = end position of $block + # Uses: + # %Global::running + # Returns: + # $something_written = amount of bytes written + my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_; + my $something_written = 0; + my $block_passed = 0; + my $sleep = 1; + while(not $block_passed) { + # Continue flushing existing buffers + # until one is empty and a new block is passed + # Make a queue to spread the blocks evenly + if(not @robin_queue) { + push @robin_queue, (sort { $a->seq() <=> $b->seq() } + values %Global::running); } + while(my $job = shift @robin_queue) { + if($job->stdin_buffer_length() > 0) { + $something_written += $job->non_block_write(); + } else { + $job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend); + $block_passed = 1; + $job->set_virgin(0); + $something_written += $job->non_block_write(); + last; + } + } + $sleep = ::reap_usleep($sleep); } - $sleep = ::reap_usleep($sleep); - } while (not $flush_done); - for my $job (values %Global::running) { - my $fh = $job->stdin(); - close $fh; + return $something_written; } } -sub write_record_to_pipe { - my $record_ref = shift; - my $recstart = shift; - my $recend = shift; - if(length $$record_ref == 0) { return; } - if($::opt_remove_rec_sep) { - # Remove record separator - $$record_ref =~ s/$recend$recstart//gos; - $$record_ref =~ s/^$recstart//os; - $$record_ref =~ s/$recend$//os; +sub index64 { + # Do index on strings > 2GB. + # index in Perl < v5.22 does not work for > 2GB + # Input: + # as index except STR which must be passed as a reference + # Output: + # as index + my $ref = shift; + my $match = shift; + my $pos = shift || 0; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # No point in doing extra work if we don't need to. + if($strlen < $block_size or $] > 5.022) { + return index($$ref, $match, $pos); } - # Keep the pipes hot, but if nothing happens sleep should back off - my $sleep = 0.00001; # 0.00001 ms - better performance on highend - write_record: while(1) { - # Sorting according to sequence is necessary for -k to work - for my $job (sort { $a->seq() <=> $b->seq() } values %Global::running) { - ::debug("Looking at ",$job->seq(),"-",$job->remaining(),"-",$job->datawritten(),"\n"); - if($job->remaining()) { - # Part of the job's last record has not finished being written - if($job->complete_write()) { - # Something got written - reset sleep timer - $sleep = 0.00001; - } - } else { - if($job->datawritten() > 0) { - # There is no data remaining and we have written data before: - # So this means we have completed writing a block. - # close stdin - # This will cause the job to finish and when it dies we will spawn another job - my $fh = $job->stdin(); - close $fh; - } else { - $job->write($record_ref); - # Something got written - reset sleep timer - $sleep = 0.00001; - last write_record; - } - } - } - # Maybe this should be in an if statement: if sleep > 0.001: start more - start_more_jobs(); # These jobs may not be started because of loadavg - $sleep = ::reap_usleep($sleep); - } - return; + + my $matchlen = length($match); + my $ret; + my $offset = $pos; + while($offset < $strlen) { + $ret = index( + substr($$ref, $offset, $block_size), + $match, $pos-$offset); + if($ret != -1) { + return $ret + $offset; + } + $offset += ($block_size - $matchlen - 1); + } + return -1; +} + +sub rindex64 { + # Do rindex on strings > 2GB. + # rindex in Perl < v5.22 does not work for > 2GB + # Input: + # as rindex except STR which must be passed as a reference + # Output: + # as rindex + my $ref = shift; + my $match = shift; + my $pos = shift; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # Default: search from end + $pos = defined $pos ? $pos : $strlen; + # No point in doing extra work if we don't need to. + if($strlen < $block_size) { + return rindex($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos - $block_size + $matchlen; + if($offset < 0) { + # The offset is less than a $block_size + # Set the $offset to 0 and + # Adjust block_size accordingly + $block_size = $block_size + $offset; + $offset = 0; + } + while($offset >= 0) { + $ret = rindex( + substr($$ref, $offset, $block_size), + $match); + if($ret != -1) { + return $ret + $offset; + } + $offset -= ($block_size - $matchlen - 1); + } + return -1; +} + +sub shorten { + # Do: substr($buf,0,$i) = ""; + # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks + # Input: + # $buf_ref = \$buf + # $i = position to shorten to + # Returns: N/A + my ($buf_ref, $i) = @_; + my $two_gb = 2**31-1; + while($i > $two_gb) { + substr($$buf_ref,0,$two_gb) = ""; + $i -= $two_gb; + } + substr($$buf_ref,0,$i) = ""; +} + +sub write_record_to_pipe { + # Fork then + # Write record from pos 0 .. $endpos to pipe + # Input: + # $chunk_number = sequence number - to see if already run + # $header_ref = reference to header string to prepend + # $record_ref = reference to record to write + # $recstart = start string of record + # $recend = end string of record + # $endpos = position in $record_ref where record ends + # Uses: + # $Global::job_already_run + # $opt::roundrobin + # @Global::virgin_jobs + # Returns: + # Number of chunks written (0 or 1) + my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_; + if($endpos == 0) { return 0; } + if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } + if($opt::roundrobin) { + return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos); + } + # If no virgin found, backoff + my $sleep = 0.0001; # 0.01 ms - better performance on highend + while(not @Global::virgin_jobs) { + ::debug("pipe", "No virgin jobs"); + $sleep = ::reap_usleep($sleep); + # Jobs may not be started because of loadavg + # or too little time between each ssh login. + start_more_jobs(); + } + my $job = shift @Global::virgin_jobs; + # Job is no longer virgin + $job->set_virgin(0); + # We ignore the removed rec_sep which is technically wrong. + $job->add_transfersize($endpos + length $$header_ref); + if(fork()) { + # Skip + } else { + # Chop of at $endpos as we do not know how many rec_sep will + # be removed. + substr($$record_ref,$endpos,length $$record_ref) = ""; + # Remove rec_sep + if($opt::remove_rec_sep) { + Job::remove_rec_sep($record_ref,$recstart,$recend); + } + $job->write($header_ref); + $job->write($record_ref); + close $job->fh(0,"w"); + exit(0); + } + close $job->fh(0,"w"); + return 1; } sub __SEM_MODE__ {} sub acquire_semaphore { # Acquires semaphore. If needed: spawns to the background + # Uses: + # @Global::host # Returns: # The semaphore to be released when jobs is complete $Global::host{':'} = SSHLogin->new(":"); my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); $sem->acquire(); - debug("run"); if($Semaphore::fg) { # skip } else { - # If run in the background, the PID will change - # therefore release and re-acquire the semaphore - $sem->release(); if(fork()) { exit(0); } else { - # child - # Get a semaphore for this pid - ::die_bug("Can't start a new session: $!") if setsid() == -1; - $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); - $sem->acquire(); + # If run in the background, the PID will change + $sem->pid_change(); } } return $sem; @@ -401,123 +680,164 @@ sub acquire_semaphore { sub __PARSE_OPTIONS__ {} sub options_hash { - # Returns a hash of the GetOptions config + # Returns: + # %hash = the GetOptions config return - ("debug|D" => \$::opt_D, - "xargs" => \$::opt_xargs, - "m" => \$::opt_m, - "X" => \$::opt_X, - "v" => \@::opt_v, - "joblog=s" => \$::opt_joblog, - "resume" => \$::opt_resume, - "silent" => \$::opt_silent, - #"silent-error|silenterror" => \$::opt_silent_error, - "keep-order|keeporder|k" => \$::opt_k, - "group" => \$::opt_group, - "g" => \$::opt_retired, - "ungroup|u" => \$::opt_u, - "null|0" => \$::opt_0, - "quote|q" => \$::opt_q, - "I=s" => \$::opt_I, - "extensionreplace|er=s" => \$::opt_U, - "U=s" => \$::opt_retired, - "basenamereplace|bnr=s" => \$::opt_basenamereplace, - "dirnamereplace|dnr=s" => \$::opt_dirnamereplace, - "basenameextensionreplace|bner=s" => \$::opt_basenameextensionreplace, - "seqreplace=s" => \$::opt_seqreplace, - "jobs|j=s" => \$::opt_P, - "load=s" => \$::opt_load, - "noswap" => \$::opt_noswap, - "max-line-length-allowed" => \$::opt_max_line_length_allowed, - "number-of-cpus" => \$::opt_number_of_cpus, - "number-of-cores" => \$::opt_number_of_cores, - "use-cpus-instead-of-cores" => \$::opt_use_cpus_instead_of_cores, - "shellquote|shell_quote|shell-quote" => \$::opt_shellquote, - "nice=i" => \$::opt_nice, - "timeout=i" => \$::opt_timeout, - "tag" => \$::opt_tag, - "tagstring=s" => \$::opt_tagstring, - "onall" => \$::opt_onall, - "nonall" => \$::opt_nonall, - "sshlogin|S=s" => \@::opt_sshlogin, - "sshloginfile|slf=s" => \@::opt_sshloginfile, - "controlmaster|M" => \$::opt_controlmaster, - "return=s" => \@::opt_return, - "trc=s" => \@::opt_trc, - "transfer" => \$::opt_transfer, - "cleanup" => \$::opt_cleanup, - "basefile|bf=s" => \@::opt_basefile, - "B=s" => \$::opt_retired, - "workdir|wd=s" => \$::opt_workdir, - "W=s" => \$::opt_retired, - "tmpdir=s" => \$::opt_tmpdir, - "tempdir=s" => \$::opt_tmpdir, - "tty" => \$::opt_tty, - "T" => \$::opt_retired, - "halt-on-error|halt=i" => \$::opt_halt_on_error, - "H=i" => \$::opt_retired, - "retries=i" => \$::opt_retries, - "dry-run|dryrun" => \$::opt_dryrun, - "progress" => \$::opt_progress, - "eta" => \$::opt_eta, - "arg-sep|argsep=s" => \$::opt_arg_sep, - "arg-file-sep|argfilesep=s" => \$::opt_arg_file_sep, - "trim=s" => \$::opt_trim, - "profile|J=s" => \@::opt_profile, - "pipe|spreadstdin" => \$::opt_pipe, - "recstart=s" => \$::opt_recstart, - "recend=s" => \$::opt_recend, - "regexp|regex" => \$::opt_regexp, - "remove-rec-sep|removerecsep|rrs" => \$::opt_remove_rec_sep, - "files|output-as-files|outputasfiles" => \$::opt_files, - "block|block-size|blocksize=s" => \$::opt_blocksize, - "tollef" => \$::opt_tollef, - "gnu" => \$::opt_gnu, - "xapply" => \$::opt_xapply, - "bibtex" => \$::opt_bibtex, + ("debug|D=s" => \$opt::D, + "xargs" => \$opt::xargs, + "m" => \$opt::m, + "X" => \$opt::X, + "v" => \@opt::v, + "joblog=s" => \$opt::joblog, + "results|result|res=s" => \$opt::results, + "resume" => \$opt::resume, + "resume-failed|resumefailed" => \$opt::resume_failed, + "silent" => \$opt::silent, + "keep-order|keeporder|k" => \$opt::keeporder, + "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder, + "group" => \$opt::group, + "g" => \$opt::retired, + "ungroup|u" => \$opt::ungroup, + "linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer, + "tmux" => \$opt::tmux, + "null|0" => \$opt::0, + "quote|q" => \$opt::q, + # Replacement strings + "parens=s" => \$opt::parens, + "rpl=s" => \@opt::rpl, + "plus" => \$opt::plus, + "I=s" => \$opt::I, + "extensionreplace|er=s" => \$opt::U, + "U=s" => \$opt::retired, + "basenamereplace|bnr=s" => \$opt::basenamereplace, + "dirnamereplace|dnr=s" => \$opt::dirnamereplace, + "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace, + "seqreplace=s" => \$opt::seqreplace, + "slotreplace=s" => \$opt::slotreplace, + "jobs|j=s" => \$opt::jobs, + "delay=f" => \$opt::delay, + "sshdelay=f" => \$opt::sshdelay, + "load=s" => \$opt::load, + "noswap" => \$opt::noswap, + "max-line-length-allowed" => \$opt::max_line_length_allowed, + "number-of-cpus" => \$opt::number_of_cpus, + "number-of-cores" => \$opt::number_of_cores, + "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores, + "shellquote|shell_quote|shell-quote" => \$opt::shellquote, + "nice=i" => \$opt::nice, + "tag" => \$opt::tag, + "tagstring|tag-string=s" => \$opt::tagstring, + "onall" => \$opt::onall, + "nonall" => \$opt::nonall, + "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts, + "sshlogin|S=s" => \@opt::sshlogin, + "sshloginfile|slf=s" => \@opt::sshloginfile, + "controlmaster|M" => \$opt::controlmaster, + "return=s" => \@opt::return, + "trc=s" => \@opt::trc, + "transfer" => \$opt::transfer, + "cleanup" => \$opt::cleanup, + "basefile|bf=s" => \@opt::basefile, + "B=s" => \$opt::retired, + "ctrlc|ctrl-c" => \$opt::ctrlc, + "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc, + "workdir|work-dir|wd=s" => \$opt::workdir, + "W=s" => \$opt::retired, + "tmpdir=s" => \$opt::tmpdir, + "tempdir=s" => \$opt::tmpdir, + "use-compress-program|compress-program=s" => \$opt::compress_program, + "use-decompress-program|decompress-program=s" => \$opt::decompress_program, + "compress" => \$opt::compress, + "tty" => \$opt::tty, + "T" => \$opt::retired, + "H=i" => \$opt::retired, + "dry-run|dryrun" => \$opt::dryrun, + "progress" => \$opt::progress, + "eta" => \$opt::eta, + "bar" => \$opt::bar, + "shuf" => \$opt::shuf, + "arg-sep|argsep=s" => \$opt::arg_sep, + "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep, + "trim=s" => \$opt::trim, + "env=s" => \@opt::env, + "recordenv|record-env" => \$opt::record_env, + "plain" => \$opt::plain, + "profile|J=s" => \@opt::profile, + "pipe|spreadstdin" => \$opt::pipe, + "robin|round-robin|roundrobin" => \$opt::roundrobin, + "recstart=s" => \$opt::recstart, + "recend=s" => \$opt::recend, + "regexp|regex" => \$opt::regexp, + "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep, + "files|output-as-files|outputasfiles" => \$opt::files, + "block|block-size|blocksize=s" => \$opt::blocksize, + "tollef" => \$opt::retired, + "gnu" => \$opt::ignored_option, + "xapply" => \$opt::xapply, + "bibtex" => \$opt::bibtex, + "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite, + # Termination and retries + "halt-on-error|halt=s" => \$opt::halt, + "memfree=s" => \$opt::memfree, + "retries=i" => \$opt::retries, + "timeout=s" => \$opt::timeout, # xargs-compatibility - implemented, man, testsuite - "max-procs|P=s" => \$::opt_P, - "delimiter|d=s" => \$::opt_d, - "max-chars|s=i" => \$::opt_s, - "arg-file|a=s" => \@::opt_a, - "no-run-if-empty|r" => \$::opt_r, - "replace|i:s" => \$::opt_i, - "E=s" => \$::opt_E, - "eof|e:s" => \$::opt_E, - "max-args|n=i" => \$::opt_n, - "max-replace-args|N=i" => \$::opt_N, - "colsep|col-sep|C=s" => \$::opt_colsep, - "help|h" => \$::opt_help, - "L=f" => \$::opt_L, - "max-lines|l:f" => \$::opt_l, - "interactive|p" => \$::opt_p, - "verbose|t" => \$::opt_verbose, - "version|V" => \$::opt_version, - "minversion|min-version=i" => \$::opt_minversion, - "show-limits|showlimits" => \$::opt_show_limits, - "exit|x" => \$::opt_x, + "max-procs|P=s" => \$opt::jobs, + "delimiter|d=s" => \$opt::d, + "max-chars|s=i" => \$opt::max_chars, + "arg-file|a=s" => \@opt::a, + "no-run-if-empty|r" => \$opt::r, + "replace|i:s" => \$opt::i, + "E=s" => \$opt::eof, + "eof|e:s" => \$opt::eof, + "max-args|n=i" => \$opt::max_args, + "max-replace-args|N=i" => \$opt::max_replace_args, + "colsep|col-sep|C=s" => \$opt::colsep, + "help|h" => \$opt::help, + "L=f" => \$opt::L, + "max-lines|l:f" => \$opt::max_lines, + "interactive|p" => \$opt::p, + "verbose|t" => \$opt::verbose, + "version|V" => \$opt::version, + "minversion|min-version=i" => \$opt::minversion, + "show-limits|showlimits" => \$opt::show_limits, + "exit|x" => \$opt::x, # Semaphore - "semaphore" => \$::opt_semaphore, - "semaphoretimeout=i" => \$::opt_semaphoretimeout, - "semaphorename|id=s" => \$::opt_semaphorename, - "fg" => \$::opt_fg, - "bg" => \$::opt_bg, - "wait" => \$::opt_wait, + "semaphore" => \$opt::semaphore, + "semaphoretimeout|st=i" => \$opt::semaphoretimeout, + "semaphorename|id=s" => \$opt::semaphorename, + "fg" => \$opt::fg, + "bg" => \$opt::bg, + "wait" => \$opt::wait, # Shebang #!/usr/bin/parallel --shebang - "shebang|hashbang" => \$::opt_shebang, - "Y" => \$::opt_retired, - "skip-first-line" => \$::opt_skip_first_line, - "header=s" => \$::opt_header, + "shebang|hashbang" => \$opt::shebang, + "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles, + "Y" => \$opt::retired, + "skip-first-line" => \$opt::skip_first_line, + "header=s" => \$opt::header, + "cat" => \$opt::cat, + "fifo" => \$opt::fifo, + "pipepart|pipe-part" => \$opt::pipepart, + "hgrp|hostgroup|hostgroups" => \$opt::hostgroups, ); } sub get_options_from_array { # Run GetOptions on @array + # Input: + # $array_ref = ref to @ARGV to parse + # @keep_only = Keep only these options + # Uses: + # @ARGV # Returns: # true if parsing worked # false if parsing failed - # @array is changed - my $array_ref = shift; + # @$array_ref is changed + my ($array_ref, @keep_only) = @_; + if(not @$array_ref) { + # Empty array: No need to look more at that + return 1; + } # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not # supported everywhere my @save_argv; @@ -526,342 +846,598 @@ sub get_options_from_array { @save_argv = @::ARGV; @::ARGV = @{$array_ref}; } - my @retval = GetOptions(options_hash()); - if(not $this_is_ARGV) { + # If @keep_only set: Ignore all values except @keep_only + my %options = options_hash(); + if(@keep_only) { + my (%keep,@dummy); + @keep{@keep_only} = @keep_only; + for my $k (grep { not $keep{$_} } keys %options) { + # Store the value of the option in @dummy + $options{$k} = \@dummy; + } + } + my $retval = GetOptions(%options); + if(not $this_is_ARGV) { @{$array_ref} = @::ARGV; @::ARGV = @save_argv; } - return @retval; + return $retval; } sub parse_options { # Returns: N/A - # Defaults: - $Global::version = 20120503; - $Global::progname = 'parallel'; - $Global::infinity = 2**31; - $Global::debug = 0; - $Global::verbose = 0; - $Global::grouped = 1; - $Global::keeporder = 0; - $Global::quoting = 0; - $Global::replace{'{}'} = '{}'; - $Global::replace{'{.}'} = '{.}'; - $Global::replace{'{/}'} = '{/}'; - $Global::replace{'{//}'} = '{//}'; - $Global::replace{'{/.}'} = '{/.}'; - $Global::replace{'{#}'} = '{#}'; - $/="\n"; - $Global::ignore_empty = 0; - $Global::interactive = 0; - $Global::stderr_verbose = 0; - $Global::default_simultaneous_sshlogins = 9; - $Global::exitstatus = 0; - $Global::halt_on_error_exitstatus = 0; - $Global::arg_sep = ":::"; - $Global::arg_file_sep = "::::"; - $Global::trim = 'n'; - $Global::max_jobs_running = 0; - $Global::job_already_run = ''; - + init_globals(); @ARGV=read_options(); - if(defined $::opt_retired) { - print STDERR "$Global::progname: -g has been retired. Use --group.\n"; - print STDERR "$Global::progname: -B has been retired. Use --bf.\n"; - print STDERR "$Global::progname: -T has been retired. Use --tty.\n"; - print STDERR "$Global::progname: -U has been retired. Use --er.\n"; - print STDERR "$Global::progname: -W has been retired. Use --wd.\n"; - print STDERR "$Global::progname: -Y has been retired. Use --shebang.\n"; - print STDERR "$Global::progname: -H has been retired. Use --halt.\n"; - ::wait_and_exit(255); - } - if(defined @::opt_v) { $Global::verbose = $#::opt_v+1; } # Convert -v -v to v=2 - $Global::debug = (defined $::opt_D); - if(defined $::opt_X) { $Global::ContextReplace = 1; } - if(defined $::opt_silent) { $Global::verbose = 0; } - if(defined $::opt_k) { $Global::keeporder = 1; } - if(defined $::opt_group) { $Global::grouped = 1; } - if(defined $::opt_u) { $Global::grouped = 0; } - if(defined $::opt_0) { $/ = "\0"; } - if(defined $::opt_d) { my $e="sprintf \"$::opt_d\""; $/ = eval $e; } - if(defined $::opt_p) { $Global::interactive = $::opt_p; } - if(defined $::opt_q) { $Global::quoting = 1; } - if(defined $::opt_r) { $Global::ignore_empty = 1; } - if(defined $::opt_verbose) { $Global::stderr_verbose = 1; } - if(defined $::opt_I) { $Global::replace{'{}'} = $::opt_I; } - if(defined $::opt_U) { $Global::replace{'{.}'} = $::opt_U; } - if(defined $::opt_i) { - $Global::replace{'{}'} = $::opt_i eq "" ? "{}" : $::opt_i; - } - if(defined $::opt_basenamereplace) { $Global::replace{'{/}'} = $::opt_basenamereplace; } - if(defined $::opt_dirnamereplace) { $Global::replace{'{//}'} = $::opt_dirnamereplace; } - if(defined $::opt_basenameextensionreplace) { - $Global::replace{'{/.}'} = $::opt_basenameextensionreplace; - } - if(defined $::opt_seqreplace) { - $Global::replace{'{#}'} = $::opt_seqreplace; - } - if(defined $::opt_E) { $Global::end_of_file_string = $::opt_E; } - if(defined $::opt_n) { $Global::max_number_of_args = $::opt_n; } - if(defined $::opt_timeout) { $Global::timeoutq = TimeoutQueue->new($::opt_timeout); } - if(defined $::opt_tmpdir) { $ENV{'TMPDIR'} = $::opt_tmpdir; } - if(defined $::opt_help) { die_usage(); } - if(defined $::opt_colsep) { $Global::trim = 'lr'; } - if(defined $::opt_header) { $::opt_colsep = defined $::opt_colsep ? $::opt_colsep : "\t"; } - if(defined $::opt_trim) { $Global::trim = $::opt_trim; } - if(defined $::opt_arg_sep) { $Global::arg_sep = $::opt_arg_sep; } - if(defined $::opt_arg_file_sep) { $Global::arg_file_sep = $::opt_arg_file_sep; } - if(defined $::opt_number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); } - if(defined $::opt_number_of_cores) { + # no-* overrides * + if($opt::nokeeporder) { $opt::keeporder = undef; } + if($opt::noctrlc) { $opt::ctrlc = undef; } + + if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 + $Global::debug = $opt::D; + $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh"; + if(defined $opt::X) { $Global::ContextReplace = 1; } + if(defined $opt::silent) { $Global::verbose = 0; } + if(defined $opt::0) { $/ = "\0"; } + if(defined $opt::d) { $/ = unquote_printf($opt::d) } + if(defined $opt::tagstring) { $opt::tagstring = unquote_printf($opt::tagstring); } + if(defined $opt::p) { $Global::interactive = $opt::p; } + if(defined $opt::q) { $Global::quoting = 1; } + if(defined $opt::r) { $Global::ignore_empty = 1; } + if(defined $opt::verbose) { $Global::stderr_verbose = 1; } + parse_replacement_string_options(); + if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } + if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; } + if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); } + if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } + if(defined $opt::help) { die_usage(); } + if(defined $opt::colsep) { $Global::trim = 'lr'; } + if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; } + if(defined $opt::trim) { $Global::trim = $opt::trim; } + if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } + if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; } + if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); } + if(defined $opt::number_of_cores) { print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); } - if(defined $::opt_max_line_length_allowed) { + if(defined $opt::max_line_length_allowed) { print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); } - if(defined $::opt_version) { version(); wait_and_exit(0); } - if(defined $::opt_bibtex) { bibtex(); wait_and_exit(0); } - if(defined $::opt_show_limits) { show_limits(); } - if(defined @::opt_sshlogin) { @Global::sshlogin = @::opt_sshlogin; } - if(defined @::opt_sshloginfile) { read_sshloginfiles(@::opt_sshloginfile); } - if(defined @::opt_return) { push @Global::ret_files, @::opt_return; } - if(not defined $::opt_recstart and - not defined $::opt_recend) { $::opt_recend = "\n"; } - if(not defined $::opt_blocksize) { $::opt_blocksize = "1M"; } - $::opt_blocksize = multiply_binary_prefix($::opt_blocksize); - if(defined $::opt_semaphore) { $Global::semaphore = 1; } - if(defined $::opt_semaphoretimeout) { $Global::semaphore = 1; } - if(defined $::opt_semaphorename) { $Global::semaphore = 1; } - if(defined $::opt_fg) { $Global::semaphore = 1; } - if(defined $::opt_bg) { $Global::semaphore = 1; } - if(defined $::opt_wait) { $Global::semaphore = 1; } - if(defined $::opt_minversion) { + if(defined $opt::version) { version(); wait_and_exit(0); } + if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); } + if(defined $opt::record_env) { record_env(); wait_and_exit(0); } + if(defined $opt::show_limits) { show_limits(); } + if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } + if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } + if(@opt::return) { push @Global::ret_files, @opt::return; } + if(not defined $opt::recstart and + not defined $opt::recend) { $opt::recend = "\n"; } + if(not defined $opt::blocksize) { $opt::blocksize = "1M"; } + $opt::blocksize = multiply_binary_prefix($opt::blocksize); + if($opt::blocksize > 2**31-1) { + warning("--blocksize >= 2G causes problems. Using 2G-1\n"); + $opt::blocksize = 2**31-1; + } + $opt::memfree = multiply_binary_prefix($opt::memfree); + if(defined $opt::controlmaster) { $opt::noctrlc = 1; } + if(defined $opt::halt and + $opt::halt =~ /%/) { $opt::halt /= 100; } + if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) { + ::error("--timeout must be seconds or percentage\n"); + wait_and_exit(255); + } + if(defined $opt::minversion) { print $Global::version,"\n"; - if($Global::version < $::opt_minversion) { + if($Global::version < $opt::minversion) { wait_and_exit(255); } else { wait_and_exit(0); } } - if(defined $::opt_nonall) { - # Append a dummy empty argument - push @ARGV, ":::", ""; + if(not defined $opt::delay) { + # Set --delay to --sshdelay if not set + $opt::delay = $opt::sshdelay; } - if(defined $::opt_tty) { + if($opt::compress_program) { + $opt::compress = 1; + $opt::decompress_program ||= $opt::compress_program." -dc"; + } + if($opt::compress) { + my ($compress, $decompress) = find_compression_program(); + $opt::compress_program ||= $compress; + $opt::decompress_program ||= $decompress; + } + if(defined $opt::nonall) { + # Append a dummy empty argument + push @ARGV, $Global::arg_sep, ""; + } + if(defined $opt::tty) { # Defaults for --tty: -j1 -u # Can be overridden with -jXXX -g - if(not defined $::opt_P) { - $::opt_P = 1; + if(not defined $opt::jobs) { + $opt::jobs = 1; } - if(not defined $::opt_group) { - $Global::grouped = 0; + if(not defined $opt::group) { + $opt::ungroup = 1; } } - if(defined @::opt_trc) { - push @Global::ret_files, @::opt_trc; - $::opt_transfer = 1; - $::opt_cleanup = 1; + if(@opt::trc) { + push @Global::ret_files, @opt::trc; + $opt::transfer = 1; + $opt::cleanup = 1; } - if($::opt_tollef and not $::opt_gnu) { - # Behave like tollef parallel (from moreutils) - if(defined $::opt_l) { - $::opt_load = $::opt_l; - $::opt_l = undef; - } - if(not defined $::opt_arg_sep) { - $Global::arg_sep = "--"; - } - } - if(defined $::opt_l) { - if($::opt_l eq "-0") { + if(defined $opt::max_lines) { + if($opt::max_lines eq "-0") { # -l -0 (swallowed -0) - $::opt_l = 1; - $::opt_0 = 1; + $opt::max_lines = 1; + $opt::0 = 1; $/ = "\0"; - } elsif ($::opt_l == 0) { + } elsif ($opt::max_lines == 0) { # If not given (or if 0 is given) => 1 - $::opt_l = 1; + $opt::max_lines = 1; + } + $Global::max_lines = $opt::max_lines; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; } - $Global::max_lines = $::opt_l; - $Global::max_number_of_args ||= $Global::max_lines; } # Read more than one arg at a time (-L, -N) - if(defined $::opt_L) { - $Global::max_lines = $::opt_L; - $Global::max_number_of_args ||= $Global::max_lines; + if(defined $opt::L) { + $Global::max_lines = $opt::L; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } } - if(defined $::opt_N) { - $Global::max_number_of_args = $::opt_N; + if(defined $opt::max_replace_args) { + $Global::max_number_of_args = $opt::max_replace_args; $Global::ContextReplace = 1; } - if((defined $::opt_L or defined $::opt_N) + if((defined $opt::L or defined $opt::max_replace_args) and - not ($::opt_xargs or $::opt_m)) { + not ($opt::xargs or $opt::m)) { $Global::ContextReplace = 1; } - - for (keys %Global::replace) { - $Global::replace{$_} = ::maybe_quote($Global::replace{$_}); + if(defined $opt::tag and not defined $opt::tagstring) { + # Default = {} + $opt::tagstring = $Global::parensleft.$Global::parensright; } - %Global::replace_rev = reverse %Global::replace; - if(defined $::opt_tag and not defined $::opt_tagstring) { - $::opt_tagstring = $Global::replace{'{}'}; + if(defined $opt::pipepart and + (defined $opt::L or defined $opt::max_lines + or defined $opt::max_replace_args)) { + ::error("--pipepart is incompatible with --max-replace-args, ", + "--max-lines, and -L.\n"); + wait_and_exit(255); } - if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) { # Deal with ::: and :::: @ARGV=read_args_from_command_line(); } + parse_semaphore(); - # Semaphore defaults - # Must be done before computing number of processes and max_line_length - # because when running as a semaphore GNU Parallel does not read args - $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' - if($Global::semaphore) { - # A semaphore does not take input from neither stdin nor file - @::opt_a = ("/dev/null"); - push(@Global::unget_argv, [Arg->new("")]); - $Semaphore::timeout = $::opt_semaphoretimeout || 0; - if(defined $::opt_semaphorename) { - $Semaphore::name = $::opt_semaphorename; - } else { - $Semaphore::name = `tty`; - chomp $Semaphore::name; - } - $Semaphore::fg = $::opt_fg; - $Semaphore::wait = $::opt_wait; - $Global::default_simultaneous_sshlogins = 1; - if(not defined $::opt_P) { - $::opt_P = 1; - } - if($Global::interactive and $::opt_bg) { - print STDERR "$Global::progname: Jobs running in the ". - "background cannot be interactive.\n"; + if(defined $opt::eta) { + $opt::progress = $opt::eta; + } + if(defined $opt::bar) { + $opt::progress = $opt::bar; + } + if(defined $opt::retired) { + ::error("-g has been retired. Use --group.\n"); + ::error("-B has been retired. Use --bf.\n"); + ::error("-T has been retired. Use --tty.\n"); + ::error("-U has been retired. Use --er.\n"); + ::error("-W has been retired. Use --wd.\n"); + ::error("-Y has been retired. Use --shebang.\n"); + ::error("-H has been retired. Use --halt.\n"); + ::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n"); ::wait_and_exit(255); - } - } - if(defined $::opt_eta) { - $::opt_progress = $::opt_eta; } + citation_notice(); parse_sshlogin(); + parse_env_var(); - if(remote_hosts() and ($::opt_X or $::opt_m or $::opt_xargs)) { + if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { # As we do not know the max line length on the remote machine # long commands generated by xargs may fail - # If opt_N is set, it is probably safe - print STDERR ("$Global::progname: Warning: using -X or -m ", - "with --sshlogin may fail\n"); + # If $opt::max_replace_args is set, it is probably safe + ::warning("Using -X or -m with --sshlogin may fail.\n"); } - if(not defined $::opt_P) { - $::opt_P = "100%"; + if(not defined $opt::jobs) { + $opt::jobs = "100%"; } open_joblog(); } -sub open_joblog { - my $append = 0; - if($::opt_resume and not $::opt_joblog) { - print STDERR ("$Global::progname: --resume requires --joblog\n"); - ::wait_and_exit(255); +sub init_globals { + # Defaults: + $Global::version = 20150522; + $Global::progname = 'parallel'; + $Global::infinity = 2**31; + $Global::debug = 0; + $Global::verbose = 0; + $Global::quoting = 0; + # Read only table with default --rpl values + %Global::replace = + ( + '{}' => '', + '{#}' => '1 $_=$job->seq()', + '{%}' => '1 $_=$job->slot()', + '{/}' => 's:.*/::', + '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);', + '{/.}' => 's:.*/::; s:\.[^/.]+$::;', + '{.}' => 's:\.[^/.]+$::', + ); + %Global::plus = + ( + # {} = {+/}/{/} + # = {.}.{+.} = {+/}/{/.}.{+.} + # = {..}.{+..} = {+/}/{/..}.{+..} + # = {...}.{+...} = {+/}/{/...}.{+...} + '{+/}' => 's:/[^/]*$::', + '{+.}' => 's:.*\.::', + '{+..}' => 's:.*\.([^.]*\.):$1:', + '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:', + '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::', + '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', + '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::', + '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', + ); + # Modifiable copy of %Global::replace + %Global::rpl = %Global::replace; + $/ = "\n"; + $Global::ignore_empty = 0; + $Global::interactive = 0; + $Global::stderr_verbose = 0; + $Global::default_simultaneous_sshlogins = 9; + $Global::exitstatus = 0; + $Global::halt_exitstatus = 0; + $Global::arg_sep = ":::"; + $Global::arg_file_sep = "::::"; + $Global::trim = 'n'; + $Global::max_jobs_running = 0; + $Global::job_already_run = ''; + $ENV{'TMPDIR'} ||= "/tmp"; + if(not $ENV{HOME}) { + # $ENV{HOME} is sometimes not set if called from PHP + ::warning("\$HOME not set. Using /tmp\n"); + $ENV{HOME} = "/tmp"; } - if($::opt_joblog) { - if($::opt_resume) { - if(open(JOBLOG, $::opt_joblog)) { - # Read the joblog - $append = ; # If there is a header: Open as append later - while() { - if(/^(\d+)/) { - # This is 30% faster than set_job_already_run($1); - vec($Global::job_already_run,$1,1) = 1; - } else { - print STDERR ("$Global::progname: Format of '$::opt_joblog' is wrong\n"); - ::wait_and_exit(255); - } - } - close JOBLOG; - } +} + +sub parse_replacement_string_options { + # Deal with --rpl + # Uses: + # %Global::rpl + # $Global::parensleft + # $Global::parensright + # $opt::parens + # $Global::parensleft + # $Global::parensright + # $opt::plus + # %Global::plus + # $opt::I + # $opt::U + # $opt::i + # $opt::basenamereplace + # $opt::dirnamereplace + # $opt::seqreplace + # $opt::slotreplace + # $opt::basenameextensionreplace + + sub rpl { + # Modify %Global::rpl + # Replace $old with $new + my ($old,$new) = @_; + if($old ne $new) { + $Global::rpl{$new} = $Global::rpl{$old}; + delete $Global::rpl{$old}; } - if($append) { - # Append to joblog - if(not open($Global::joblog,">>$::opt_joblog")) { - print STDERR ("$Global::progname: Cannot append to ", - "--joblog $::opt_joblog\n"); - ::wait_and_exit(255); - } - } else { - # Overwrite the joblog - if(not open($Global::joblog,">$::opt_joblog")) { - print STDERR ("$Global::progname: Cannot write to ", - "--joblog $::opt_joblog\n"); - ::wait_and_exit(255); - } else { - print $Global::joblog - join("\t", "Seq", "Host", "Starttime", "Runtime", - "Send", "Receive", "Exitval", "Signal", "Command" - ). "\n"; - } + } + my $parens = "{==}"; + if(defined $opt::parens) { $parens = $opt::parens; } + my $parenslen = 0.5*length $parens; + $Global::parensleft = substr($parens,0,$parenslen); + $Global::parensright = substr($parens,$parenslen); + if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } + if(defined $opt::I) { rpl('{}',$opt::I); } + if(defined $opt::U) { rpl('{.}',$opt::U); } + if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } + if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } + if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } + if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } + if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } + if(defined $opt::basenameextensionreplace) { + rpl('{/.}',$opt::basenameextensionreplace); + } + for(@opt::rpl) { + # Create $Global::rpl entries for --rpl options + # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" + my ($shorthand,$long) = split/ /,$_,2; + $Global::rpl{$shorthand} = $long; + } +} + +sub parse_semaphore { + # Semaphore defaults + # Must be done before computing number of processes and max_line_length + # because when running as a semaphore GNU Parallel does not read args + # Uses: + # $opt::semaphore + # $Global::semaphore + # $opt::semaphoretimeout + # $Semaphore::timeout + # $opt::semaphorename + # $Semaphore::name + # $opt::fg + # $Semaphore::fg + # $opt::wait + # $Semaphore::wait + # $opt::bg + # @opt::a + # @Global::unget_argv + # $Global::default_simultaneous_sshlogins + # $opt::jobs + # $Global::interactive + $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' + if(defined $opt::semaphore) { $Global::semaphore = 1; } + if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } + if(defined $opt::semaphorename) { $Global::semaphore = 1; } + if(defined $opt::fg) { $Global::semaphore = 1; } + if(defined $opt::bg) { $Global::semaphore = 1; } + if(defined $opt::wait) { $Global::semaphore = 1; } + if($Global::semaphore) { + # A semaphore does not take input from neither stdin nor file + @opt::a = ("/dev/null"); + push(@Global::unget_argv, [Arg->new("")]); + $Semaphore::timeout = $opt::semaphoretimeout || 0; + if(defined $opt::semaphorename) { + $Semaphore::name = $opt::semaphorename; + } else { + $Semaphore::name = `tty`; + chomp $Semaphore::name; + } + $Semaphore::fg = $opt::fg; + $Semaphore::wait = $opt::wait; + $Global::default_simultaneous_sshlogins = 1; + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if($Global::interactive and $opt::bg) { + ::error("Jobs running in the ". + "background cannot be interactive.\n"); + ::wait_and_exit(255); } } } -sub read_options { - # Read options from command line, profile and $PARALLEL - # Returns: - # @ARGV without --options - # This must be done first as this may exec myself - if(defined $ARGV[0] and ($ARGV[0]=~/^--shebang / or - $ARGV[0]=~/^--hashbang /)) { - # Program is called from #! line in script - $ARGV[0]=~s/^--shebang *//; # remove --shebang if it is set - $ARGV[0]=~s/^--hashbang *//; # remove --hashbang if it is set - my $argfile = pop @ARGV; - # exec myself to split $ARGV[0] into separate fields - exec "$0 --skip-first-line -a $argfile @ARGV"; +sub record_env { + # Record current %ENV-keys in ~/.parallel/ignored_vars + # Returns: N/A + my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars"; + if(open(my $vars_fh, ">", $ignore_filename)) { + print $vars_fh map { $_,"\n" } keys %ENV; + } else { + ::error("Cannot write to $ignore_filename.\n"); + ::wait_and_exit(255); } +} - Getopt::Long::Configure("bundling","pass_through"); - # Check if there is a --profile to set @::opt_profile - GetOptions("profile|J=s" => \@::opt_profile) || die_usage(); - # Add options from .parallel/config and other profiles - my @ARGV_profile = (); - my @ARGV_env = (); - my @config_profiles = ( - "/etc/parallel/config", - $ENV{'HOME'}."/.parallel/config", - $ENV{'HOME'}."/.parallelrc"); - my @profiles = @config_profiles; - if(@::opt_profile) { - # --profile overrides default profiles - @profiles = (); - for my $profile (@::opt_profile) { - push @profiles, $ENV{'HOME'}."/.parallel/".$profile; +sub parse_env_var { + # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen + # + # Bash functions must be parsed to export them remotely + # Pre-shellshock style bash function: + # myfunc=() {... + # Post-shellshock style bash function (v1): + # BASH_FUNC_myfunc()=() {... + # Post-shellshock style bash function (v2): + # BASH_FUNC_myfunc%%=() {... + # + # Uses: + # $Global::envvar = eval string that will set variables in both bash and csh + # $Global::envwarn = If functions are used: Give warning in csh + # $Global::envvarlen = length of $Global::envvar + # @opt::env + # $Global::shell + # %ENV + # Returns: N/A + $Global::envvar = ""; + + $Global::envvarlen = length $Global::envvar; +} + +sub open_joblog { + # Open joblog as specified by --joblog + # Uses: + # $opt::resume + # $opt::resume_failed + # $opt::joblog + # $opt::results + # $Global::job_already_run + # %Global::fd + my $append = 0; + if(($opt::resume or $opt::resume_failed) + and + not ($opt::joblog or $opt::results)) { + ::error("--resume and --resume-failed require --joblog or --results.\n"); + ::wait_and_exit(255); + } + if($opt::joblog) { + if($opt::resume || $opt::resume_failed) { + if(open(my $joblog_fh, "<", $opt::joblog)) { + # Read the joblog + $append = <$joblog_fh>; # If there is a header: Open as append later + my $joblog_regexp; + if($opt::resume_failed) { + # Make a regexp that only matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + } else { + # Just match the job number + $joblog_regexp='^(\d+)'; + } + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) { + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + close $joblog_fh; + } + } + if($append) { + # Append to joblog + if(not open($Global::joblog, ">>", $opt::joblog)) { + ::error("Cannot append to --joblog $opt::joblog.\n"); + ::wait_and_exit(255); + } + } else { + if($opt::joblog eq "-") { + # Use STDOUT as joblog + $Global::joblog = $Global::fd{1}; + } elsif(not open($Global::joblog, ">", $opt::joblog)) { + # Overwrite the joblog + ::error("Cannot write to --joblog $opt::joblog.\n"); + ::wait_and_exit(255); + } + print $Global::joblog + join("\t", "Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command" + ). "\n"; } } - for my $profile (@profiles) { - if(-r $profile) { - open (IN, "<", $profile) || ::die_bug("read-profile: $profile"); - while() { - /^\s*\#/ and next; - chomp; - push @ARGV_profile, shell_unquote(split/(?'/dev/null , 1 2 3 , {1..3} , lz4 lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2 + # sort -nk4 jl + my @prg = qw(lz4 pigz lzop plzip pbzip2 pxz gzip lzma xz bzip2 lzip); + for my $p (@prg) { + if(which($p)) { + return ("$p -c -1","$p -dc"); + } + } + # Fall back to cat + return ("cat","cat"); +} + + +sub read_options { + # Read options from command line, profile and $PARALLEL + # Uses: + # $opt::shebang_wrap + # $opt::shebang + # @ARGV + # $opt::plain + # @opt::profile + # $ENV{'HOME'} + # $ENV{'PARALLEL'} + # Returns: + # @ARGV_no_opt = @ARGV without --options + + # This must be done first as this may exec myself + if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or + $ARGV[0] =~ /^--shebang-?wrap/ or + $ARGV[0] =~ /^--hashbang/)) { + # Program is called from #! line in script + # remove --shebang-wrap if it is set + $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); + # remove --shebang if it is set + $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); + # remove --hashbang if it is set + $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); + if($opt::shebang) { + my $argfile = shell_quote_scalar(pop @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --skip-first-line -a $argfile @ARGV"; + } + if($opt::shebang_wrap) { + my @options; + my @parser; + if ($^O eq 'freebsd') { + # FreeBSD's #! puts different values in @ARGV than Linux' does. + my @nooptions = @ARGV; + get_options_from_array(\@nooptions); + while($#ARGV > $#nooptions) { + push @options, shift @ARGV; + } + while(@ARGV and $ARGV[0] ne ":::") { + push @parser, shift @ARGV; + } + if(@ARGV and $ARGV[0] eq ":::") { + shift @ARGV; + } + } else { + @options = shift @ARGV; + } + my $script = shell_quote_scalar(shift @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV"; + } + } + + Getopt::Long::Configure("bundling","require_order"); + my @ARGV_copy = @ARGV; + # Check if there is a --profile to set @opt::profile + get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); + my @ARGV_profile = (); + my @ARGV_env = (); + if(not $opt::plain) { + # Add options from .parallel/config and other profiles + my @config_profiles = ( + "/etc/parallel/config", + $ENV{'HOME'}."/.parallel/config", + $ENV{'HOME'}."/.parallelrc"); + my @profiles = @config_profiles; + if(@opt::profile) { + # --profile overrides default profiles + @profiles = (); + for my $profile (@opt::profile) { + if(-r $profile) { + push @profiles, $profile; + } else { + push @profiles, $ENV{'HOME'}."/.parallel/".$profile; + } + } + } + for my $profile (@profiles) { + if(-r $profile) { + open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile"); + while(<$in_fh>) { + /^\s*\#/ and next; + chomp; + push @ARGV_profile, shellwords($_); + } + close $in_fh; + } else { + if(grep /^$profile$/, @config_profiles) { + # config file is not required to exist + } else { + ::error("$profile not readable.\n"); + wait_and_exit(255); + } + } + } + # Add options from shell variable $PARALLEL + if($ENV{'PARALLEL'}) { + @ARGV_env = shellwords($ENV{'PARALLEL'}); + } } Getopt::Long::Configure("bundling","require_order"); get_options_from_array(\@ARGV_profile) || die_usage(); - # Add options from shell variable $PARALLEL - $ENV{'PARALLEL'} and @ARGV_env = shell_unquote(split/(? ".arg"); + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); unlink($name); # Put args into argfile print $outfh map { $_,$/ } @group; seek $outfh, 0, 0; + exit_if_disk_full(); # Append filehandle to -a - push @::opt_a, $outfh; - } elsif($group eq $Global::arg_file_sep) { - # Group of file names on the command line. - # Append args into -a - push @::opt_a, @group; + push @opt::a, $outfh; } else { ::die_bug("Unknown command line group: $group"); } @@ -933,103 +1520,175 @@ sub read_args_from_command_line { sub cleanup { # Returns: N/A - if(@::opt_basefile) { cleanup_basefile(); } + unlink keys %Global::unlink; + map { rmdir $_ } keys %Global::unlink; + if(@opt::basefile) { cleanup_basefile(); } } sub __QUOTING_ARGUMENTS_FOR_SHELL__ {} sub shell_quote { + # Input: + # @strings = strings to be quoted + # Output: + # @shell_quoted_strings = string quoted with \ as needed by the shell my @strings = (@_); for my $a (@strings) { - $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'])/\\$1/g; + $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \' } return wantarray ? @strings : "@strings"; } +sub shell_quote_empty { + # Inputs: + # @strings = strings to be quoted + # Returns: + # @quoted_strings = empty strings quoted as ''. + my @strings = shell_quote(@_); + for my $a (@strings) { + if($a eq "") { + $a = "''"; + } + } + return wantarray ? @strings : "@strings"; +} + sub shell_quote_scalar { # Quote the string so shell will not expand any special chars + # Inputs: + # $string = string to be quoted # Returns: - # string quoted with \ as needed by the shell - my $a = shift; - $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'])/\\$1/g; - $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \' + # $shell_quoted = string quoted with \ as needed by the shell + my $a = $_[0]; + if(defined $a) { + # Solaris sh wants ^ quoted. + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + $a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go; + $a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \' + } return $a; } -sub maybe_quote { - # If $Global::quoting then quote the string so shell will not expand any special chars - # Else do not quote +sub shell_quote_file { + # Quote the string so shell will not expand any special chars and prepend ./ if needed + # Input: + # $filename = filename to be shell quoted # Returns: - # if $Global::quoting string quoted with \ as needed by the shell - # else string unaltered - if($Global::quoting) { - return shell_quote_scalar(@_); - } else { - return "@_"; + # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed + my $a = shell_quote_scalar(shift); + if(defined $a) { + if($a =~ m:^/: or $a =~ m:^\./:) { + # /abs/path or ./rel/path => skip + } else { + # rel/path => ./rel/path + $a = "./".$a; + } } + return $a; } -sub maybe_unquote { - # If $Global::quoting then unquote the string as shell would - # Else do not unquote +sub shellwords { + # Input: + # $string = shell line # Returns: - # if $Global::quoting string unquoted as done by the shell - # else string unaltered - if($Global::quoting) { - return shell_unquote(@_); - } else { - return "@_"; - } + # @shell_words = $string split into words as shell would do + $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; + return Text::ParseWords::shellwords(@_); } -sub shell_unquote { - # Unquote strings from shell_quote +sub perl_quote_scalar { + # Quote the string so perl's eval will not expand any special chars + # Inputs: + # $string = string to be quoted # Returns: - # string with shell quoting removed - my @strings = (@_); - my $arg; - for $arg (@strings) { - if(not defined $arg) { - $arg = ""; - } - $arg =~ s/'\n'/\n/g; # filenames with '\n' is quoted using \' - $arg =~ s/\\([\002-\011\013-\032])/$1/g; - $arg =~ s/\\([\#\?\`\(\)\{\}\*\>\<\~\|\; \"\!\$\&\'])/$1/g; - $arg =~ s/\\\\/\\/g; + # $shell_quoted = string quoted with \ as needed by perl's eval + my $a = $_[0]; + if(defined $a) { + $a =~ s/[\\\"\$\@]/\\$&/go; } - return wantarray ? @strings : "@strings"; + return $a; +} + +sub unquote_printf { + # Convert \t \n \r \000 \0 + $_ = shift; + s/\\t/\t/g; + s/\\n/\n/g; + s/\\r/\r/g; + s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge; + s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge; + return $_; } sub __FILEHANDLES__ {} + +sub save_stdin_stdout_stderr { + # Remember the original STDIN, STDOUT and STDERR + # and file descriptors opened by the shell (e.g. 3>/tmp/foo) + # Uses: + # %Global::fd + # $Global::original_stderr + # $Global::original_stdin + # Returns: N/A + + # Find file descriptors that are already opened (by the shell) + for my $fdno (1..61) { + # /dev/fd/62 and above are used by bash for <(cmd) + my $fh; + # 2-argument-open is used to be compatible with old perl 5.8.0 + # bug #43570: Perl 5.8.0 creates 61 files + if(open($fh,">&=$fdno")) { + $Global::fd{$fdno}=$fh; + } + } + open $Global::original_stderr, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::status_fd, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::original_stdin, "<&", "STDIN" or + ::die_bug("Can't dup STDIN: $!"); +} + sub enough_file_handles { - # check that we have enough filehandles available for starting + # Check that we have enough filehandles available for starting # another job + # Uses: + # $opt::ungroup + # %Global::fd # Returns: # 1 if ungrouped (thus not needing extra filehandles) # 0 if too few filehandles # 1 if enough filehandles - if($Global::grouped) { + if(not $opt::ungroup) { my %fh; my $enough_filehandles = 1; - # We need a filehandle for STDOUT and STDERR - # perl uses 7 filehandles for something? + # perl uses 7 filehandles for something? # open3 uses 2 extra filehandles temporarily - for my $i (1..8) { - $enough_filehandles &&= open($fh{$i},"&STDOUT" or - ::die_bug("Can't dup STDOUT: $!"); - open $Global::original_stderr, ">&STDERR" or - ::die_bug("Can't dup STDERR: $!"); - open $Global::original_stdin, "<&STDIN" or - ::die_bug("Can't dup STDIN: $!"); $Global::total_running = 0; $Global::total_started = 0; $Global::tty_taken = 0; $SIG{USR1} = \&list_running_jobs; $SIG{USR2} = \&toggle_progress; - if(@::opt_basefile) { setup_basefile(); } + if(@opt::basefile) { setup_basefile(); } } -sub start_more_jobs { - # Returns: - # number of jobs started - my $jobs_started = 0; - if(not $Global::start_no_new_jobs) { - if($Global::max_procs_file) { - my $mtime = (stat($Global::max_procs_file))[9]; - if($mtime > $Global::max_procs_file_last_mod) { - $Global::max_procs_file_last_mod = $mtime; - for my $sshlogin (values %Global::host) { - $sshlogin->set_max_jobs_running(undef); - } - } - } - if($Global::max_load_file) { - my $mtime = (stat($Global::max_load_file))[9]; - if($mtime > $Global::max_load_file_last_mod) { - $Global::max_load_file_last_mod = $mtime; - for my $sshlogin (values %Global::host) { - $sshlogin->set_max_loadavg(undef); - } - } - } +{ + my $last_time; + my %last_mtime; + my $max_procs_file_last_mod; - for my $sshlogin (values %Global::host) { - debug("Running jobs before on ".$sshlogin->string().": ".$sshlogin->jobs_running()."\n"); - if($::opt_load and $sshlogin->loadavg_too_high()) { - # The load is too high or unknown - next; - } - if($::opt_noswap and $sshlogin->swapping()) { - # The server is swapping - next; - } - while ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { - if($Global::JobQueue->empty() and not $::opt_pipe) { - last; - } - debug($sshlogin->string()." has ".$sshlogin->jobs_running() - . " out of " . $sshlogin->max_jobs_running() - . " jobs running. Start another.\n"); - if(start_another_job($sshlogin) == 0) { - # No more jobs to start on this $sshlogin - debug("No jobs started on ".$sshlogin->string()."\n"); - last; - } - debug("Job started on ".$sshlogin->string()."\n"); - $sshlogin->inc_jobs_running(); - $jobs_started++; - } - debug("Running jobs after on ".$sshlogin->string().": ".$sshlogin->jobs_running() - ." of ".$sshlogin->max_jobs_running() ."\n"); - } - } - return $jobs_started; -} - -sub start_another_job { - # Grab a job from Global::JobQueue, start it at sshlogin - # and remember the pid, the STDOUT and the STDERR handles - # Returns: - # 1 if another jobs was started - # 0 otherwise - my $sshlogin = shift; - # Do we have enough file handles to start another job? - if(enough_file_handles()) { - if($Global::JobQueue->empty() and not $::opt_pipe) { - # No more commands to run - debug("Not starting: JobQueue empty\n"); - return 0; - } else { - my $job; - do { - $job = get_job_with_sshlogin($sshlogin); - if(not defined $job) { - # No command available for that sshlogin - debug("Not starting: no jobs available for ".$sshlogin->string()."\n"); - return 0; - } - } while ($job->is_already_in_joblog()); - debug("Command to run on '".$job->sshlogin()."': '".$job->replaced()."'\n"); - if($job->start()) { - $Global::running{$job->pid()} = $job; - debug("Started as seq ",$job->seq()," pid:",$job->pid(),"\n"); - return 1; - } else { - # If interactive says: Dont run the job, then skip it and run the next - return start_another_job($sshlogin); - } - } - } else { - # No more file handles - debug("Not starting: no more file handles\n"); - return 0; - } -} - -sub drain_job_queue { - # Returns: N/A - $Private::first_completed ||= time; - if($::opt_progress) { - print $Global::original_stderr init_progress(); - } - my $last_header=""; - my $sleep = 0.2; - do { - while($Global::total_running > 0) { - debug("jobs running: ", $Global::total_running, "==", scalar - keys %Global::running," slots: ", $Global::max_jobs_running, - " Memory usage:".my_memory_usage()." "); - if($::opt_pipe) { - # When using --pipe sometimes file handles are not closed properly - for my $job (values %Global::running) { - my $fh = $job->stdin(); - close $fh; + sub changed_procs_file { + # If --jobs is a file and it is modfied: + # Force recomputing of max_jobs_running for each $sshlogin + # Uses: + # $Global::max_procs_file + # %Global::host + # Returns: N/A + if($Global::max_procs_file) { + # --jobs filename + my $mtime = (stat($Global::max_procs_file))[9]; + $max_procs_file_last_mod ||= 0; + if($mtime > $max_procs_file_last_mod) { + # file changed: Force re-computing max_jobs_running + $max_procs_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_jobs_running(undef); } } - if($::opt_progress) { - my %progress = progress(); - if($last_header ne $progress{'header'}) { - print $Global::original_stderr "\n",$progress{'header'},"\n"; - $last_header = $progress{'header'}; - } - print $Global::original_stderr "\r",$progress{'status'}; - } - # Sometimes SIGCHLD is not registered, so force reaper - $sleep = ::reap_usleep($sleep); - } - if(not $Global::JobQueue->empty()) { - start_more_jobs(); # These jobs may not be started because of loadavg - $sleep = ::reap_usleep($sleep); - } - } while ($Global::total_running > 0 - or - not $Global::start_no_new_jobs and not $Global::JobQueue->empty()); + } + } - if($::opt_progress) { - print $Global::original_stderr "\n"; + sub changed_sshloginfile { + # If --slf is changed: + # reload --slf + # filter_hosts + # setup_basefile + # Uses: + # @opt::sshloginfile + # @Global::sshlogin + # %Global::host + # $opt::filter_hosts + # Returns: N/A + if(@opt::sshloginfile) { + # Is --sshloginfile changed? + for my $slf (@opt::sshloginfile) { + my $actual_file = expand_slf_shorthand($slf); + my $mtime = (stat($actual_file))[9]; + $last_mtime{$actual_file} ||= $mtime; + if($mtime - $last_mtime{$actual_file} > 1) { + ::debug("run","--sshloginfile $actual_file changed. reload\n"); + $last_mtime{$actual_file} = $mtime; + # Reload $slf + # Empty sshlogins + @Global::sshlogin = (); + for (values %Global::host) { + # Don't start new jobs on any host + # except the ones added back later + $_->set_max_jobs_running(0); + } + # This will set max_jobs_running on the SSHlogins + read_sshloginfile($actual_file); + parse_sshlogin(); + $opt::filter_hosts and filter_hosts(); + setup_basefile(); + } + } + } + } + + sub start_more_jobs { + # Run start_another_job() but only if: + # * not $Global::start_no_new_jobs set + # * not JobQueue is empty + # * not load on server is too high + # * not server swapping + # * not too short time since last remote login + # Uses: + # %Global::host + # $Global::start_no_new_jobs + # $Global::JobQueue + # $opt::pipe + # $opt::load + # $opt::noswap + # $opt::delay + # $Global::newest_starttime + # Returns: + # $jobs_started = number of jobs started + my $jobs_started = 0; + my $jobs_started_this_round = 0; + if($Global::start_no_new_jobs) { + return $jobs_started; + } + if(time - ($last_time||0) > 1) { + # At most do this every second + $last_time = time; + changed_procs_file(); + changed_sshloginfile(); + } + do { + $jobs_started_this_round = 0; + # This will start 1 job on each --sshlogin (if possible) + # thus distribute the jobs on the --sshlogins round robin + for my $sshlogin (values %Global::host) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more jobs in the queue + last; + } + debug("run", "Running jobs before on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), "\n"); + if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { + if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) { + # It has been too short since last start + next; + } + if($opt::load and $sshlogin->loadavg_too_high()) { + # The load is too high or unknown + next; + } + if($opt::noswap and $sshlogin->swapping()) { + # The server is swapping + next; + } + if($opt::memfree and $sshlogin->memfree() < $opt::memfree) { + # The server has not enough mem free + ::debug("mem", "Not starting job: not enough mem\n"); + next; + } + if($sshlogin->too_fast_remote_login()) { + # It has been too short since + next; + } + debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(), + " out of ", $sshlogin->max_jobs_running(), + " jobs running. Start another.\n"); + if(start_another_job($sshlogin) == 0) { + # No more jobs to start on this $sshlogin + debug("run","No jobs started on ", $sshlogin->string(), "\n"); + next; + } + $sshlogin->inc_jobs_running(); + $sshlogin->set_last_login_at(::now()); + $jobs_started++; + $jobs_started_this_round++; + } + debug("run","Running jobs after on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), " of ", + $sshlogin->max_jobs_running(), "\n"); + } + } while($jobs_started_this_round); + + return $jobs_started; } } -sub toggle_progress { - # Turn on/off progress view - # Returns: N/A - $::opt_progress = not $::opt_progress; - if($::opt_progress) { - print $Global::original_stderr init_progress(); +{ + my $no_more_file_handles_warned; + + sub start_another_job { + # If there are enough filehandles + # and JobQueue not empty + # and not $job is in joblog + # Then grab a job from Global::JobQueue, + # start it at sshlogin + # mark it as virgin_job + # Inputs: + # $sshlogin = the SSHLogin to start the job on + # Uses: + # $Global::JobQueue + # $opt::pipe + # $opt::results + # $opt::resume + # @Global::virgin_jobs + # Returns: + # 1 if another jobs was started + # 0 otherwise + my $sshlogin = shift; + # Do we have enough file handles to start another job? + if(enough_file_handles()) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more commands to run + debug("start", "Not starting: JobQueue empty\n"); + return 0; + } else { + my $job; + # Skip jobs already in job log + # Skip jobs already in results + do { + $job = get_job_with_sshlogin($sshlogin); + if(not defined $job) { + # No command available for that sshlogin + debug("start", "Not starting: no jobs available for ", + $sshlogin->string(), "\n"); + return 0; + } + } while ($job->is_already_in_joblog() + or + ($opt::results and $opt::resume and $job->is_already_in_results())); + debug("start", "Command to run on '", $job->sshlogin()->string(), "': '", + $job->replaced(),"'\n"); + if($job->start()) { + if($opt::pipe) { + push(@Global::virgin_jobs,$job); + } + debug("start", "Started as seq ", $job->seq(), + " pid:", $job->pid(), "\n"); + return 1; + } else { + # Not enough processes to run the job. + # Put it back on the queue. + $Global::JobQueue->unget($job); + # Count down the number of jobs to run for this SSHLogin. + my $max = $sshlogin->max_jobs_running(); + if($max > 1) { $max--; } else { + ::error("No more processes: cannot run a single job. Something is wrong.\n"); + ::wait_and_exit(255); + } + $sshlogin->set_max_jobs_running($max); + # Sleep up to 300 ms to give other processes time to die + ::usleep(rand()*300); + ::warning("No more processes: ", + "Decreasing number of running jobs to $max. ", + "Raising ulimit -u or /etc/security/limits.conf may help.\n"); + return 0; + } + } + } else { + # No more file handles + $no_more_file_handles_warned++ or + ::warning("No more file handles. ", + "Raising ulimit -n or /etc/security/limits.conf may help.\n"); + return 0; + } } } sub init_progress { + # Uses: + # $opt::bar # Returns: # list of computers for progress output $|=1; + if($opt::bar) { + return("",""); + } my %progress = progress(); return ("\nComputers / CPU cores / Max jobs to run\n", $progress{'workerlist'}); } +sub drain_job_queue { + # Uses: + # $opt::progress + # $Global::total_running + # $Global::max_jobs_running + # %Global::running + # $Global::JobQueue + # %Global::host + # $Global::start_no_new_jobs + # Returns: N/A + if($opt::progress) { + ::status(init_progress()); + } + my $last_header = ""; + my $sleep = 0.2; + do { + while($Global::total_running > 0) { + debug($Global::total_running, "==", scalar + keys %Global::running," slots: ", $Global::max_jobs_running); + if($opt::pipe) { + # When using --pipe sometimes file handles are not closed properly + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + } + if($opt::progress) { + my %progress = progress(); + if($last_header ne $progress{'header'}) { + ::status("\n", $progress{'header'}, "\n"); + $last_header = $progress{'header'}; + } + ::status("\r",$progress{'status'}); + } + if($Global::total_running < $Global::max_jobs_running + and not $Global::JobQueue->empty()) { + # These jobs may not be started because of loadavg + # or too little time between each ssh login. + if(start_more_jobs() > 0) { + # Exponential back-on if jobs were started + $sleep = $sleep/2+0.001; + } + } + # Exponential back-off sleeping + $sleep = ::reap_usleep($sleep); + } + if(not $Global::JobQueue->empty()) { + # These jobs may not be started: + # * because there the --filter-hosts has removed all + if(not %Global::host) { + ::error("There are no hosts left to run on.\n"); + ::wait_and_exit(255); + } + # * because of loadavg + # * because of too little time between each ssh login. + start_more_jobs(); + $sleep = ::reap_usleep($sleep); + if($Global::max_jobs_running == 0) { + ::warning("There are no job slots available. Increase --jobs.\n"); + } + } + } while ($Global::total_running > 0 + or + not $Global::start_no_new_jobs and not $Global::JobQueue->empty()); + if($opt::progress) { + my %progress = progress(); + ::status("\r", $progress{'status'}, "\n"); + } +} + +sub toggle_progress { + # Turn on/off progress view + # Uses: + # $opt::progress + # Returns: N/A + $opt::progress = not $opt::progress; + if($opt::progress) { + ::status(init_progress()); + } +} + sub progress { + # Uses: + # $opt::bar + # $opt::eta + # %Global::host + # $Global::total_started # Returns: - # list of workers - # header that will fit on the screen - # status message that will fit on the screen + # $workerlist = list of workers + # $header = that will fit on the screen + # $status = message that will fit on the screen + if($opt::bar) { + return ("workerlist" => "", "header" => "", "status" => bar()); + } + my $eta = ""; + my ($status,$header)=("",""); + if($opt::eta) { + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + } my $termcols = terminal_columns(); - my ($status, $header) = ("x"x($termcols+1),""); my @workers = sort keys %Global::host; my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; my $workerno = 1; @@ -1252,34 +2101,8 @@ sub progress { ($Global::host{$w}->ncpus() || "-")." / ". $Global::host{$w}->max_jobs_running()."\n"; } - my $eta = ""; - if($::opt_eta) { - my $completed = 0; - for(@workers) { $completed += $Global::host{$_}->jobs_completed() } - if($completed) { - my $total = $Global::JobQueue->total_jobs(); - my $left = $total - $completed; - my $pctcomplete = $completed / $total; - my $timepassed = (time - $Private::first_completed); - my $avgtime = $timepassed / $completed; - $Private::smoothed_avg_time ||= $avgtime; - # Smooth the eta so it does not jump wildly - $Private::smoothed_avg_time = (1 - $pctcomplete) * - $Private::smoothed_avg_time + $pctcomplete * $avgtime; - my $this_eta; - $Private::last_time ||= $timepassed; - if($timepassed != $Private::last_time - or not defined $Private::last_eta) { - $Private::last_time = $timepassed; - $this_eta = $left * $Private::smoothed_avg_time; - $Private::last_eta = $this_eta; - } else { - $this_eta = $Private::last_eta; - } - $eta = sprintf("ETA: %ds %dleft %.2favg ", $this_eta, $left, $avgtime); - } - } - + $status = "x"x($termcols+1); + # Select an output format that will fit on a single line if(length $status > $termcols) { # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete"; @@ -1393,41 +2216,141 @@ sub progress { return ("workerlist" => $workerlist, "header" => $header, "status" => $status); } -sub terminal_columns { - # Get the number of columns of the display - # Returns: - # number of columns of the screen - if(not $Private::columns) { - $Private::columns = $ENV{'COLUMNS'}; - if(not $Private::columns) { - my $resize = qx{ resize 2>/dev/null }; - $resize =~ /COLUMNS=(\d+);/ and do { $Private::columns = $1; }; - } - $Private::columns ||= 80; +{ + my ($total, $first_completed, $smoothed_avg_time); + + sub compute_eta { + # Calculate important numbers for ETA + # Returns: + # $total = number of jobs in total + # $completed = number of jobs completed + # $left = number of jobs left + # $pctcomplete = percent of jobs completed + # $avgtime = averaged time + # $eta = smoothed eta + $total ||= $Global::JobQueue->total_jobs(); + my $completed = 0; + for(values %Global::host) { $completed += $_->jobs_completed() } + my $left = $total - $completed; + if(not $completed) { + return($total, $completed, $left, 0, 0, 0); + } + my $pctcomplete = $completed / $total; + $first_completed ||= time; + my $timepassed = (time - $first_completed); + my $avgtime = $timepassed / $completed; + $smoothed_avg_time ||= $avgtime; + # Smooth the eta so it does not jump wildly + $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + + $pctcomplete * $avgtime; + my $eta = int($left * $smoothed_avg_time); + return($total, $completed, $left, $pctcomplete, $avgtime, $eta); + } +} + +{ + my ($rev,$reset); + + sub bar { + # Return: + # $status = bar with eta, completed jobs, arg and pct + $rev ||= "\033[7m"; + $reset ||= "\033[0m"; + my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = + compute_eta(); + my $arg = $Global::newest_job ? + $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : ""; + # These chars mess up display in the terminal + $arg =~ tr/[\011-\016\033\302-\365]//d; + my $bar_text = + sprintf("%d%% %d:%d=%ds %s", + $pctcomplete*100, $completed, $left, $eta, $arg); + my $terminal_width = terminal_columns(); + my $s = sprintf("%-${terminal_width}s", + substr($bar_text." "x$terminal_width, + 0,$terminal_width)); + my $width = int($terminal_width * $pctcomplete); + substr($s,$width,0) = $reset; + my $zenity = sprintf("%-${terminal_width}s", + substr("# $eta sec $arg", + 0,$terminal_width)); + $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header + "\r" . $rev . $s . $reset; + return $s; + } +} + +{ + my ($columns,$last_column_time); + + sub terminal_columns { + # Get the number of columns of the terminal. + # Only update once per second. + # Returns: + # number of columns of the screen + if(not $columns or $last_column_time < time) { + $last_column_time = time; + $columns = $ENV{'COLUMNS'}; + if(not $columns) { + my $stty = qx{ stty -a /dev/null' }; + $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; }; + } + $columns ||= 80; + } + return $columns; } - return $Private::columns; } sub get_job_with_sshlogin { + # Input: + # $sshlogin = which host should the job be run on? + # Uses: + # $opt::hostgroups + # $Global::JobQueue # Returns: - # next command to run with ssh command wrapping if remote - # next command to run with no wrapping (clean_command) + # $job = next job object for $sshlogin if any available my $sshlogin = shift; + my $job; - if($::oodebug and $Global::JobQueue->empty()) { - Carp::confess("get_job_with_sshlogin should never be called if empty"); + if ($opt::hostgroups) { + my @other_hostgroup_jobs = (); + + while($job = $Global::JobQueue->get()) { + if($sshlogin->in_hostgroups($job->hostgroups())) { + # Found a job to be run on a hostgroup of this + # $sshlogin + last; + } else { + # This job was not in the hostgroups of $sshlogin + push @other_hostgroup_jobs, $job; + } + } + $Global::JobQueue->unget(@other_hostgroup_jobs); + if(not defined $job) { + # No more jobs + return undef; + } + } else { + $job = $Global::JobQueue->get(); + if(not defined $job) { + # No more jobs + ::debug("start", "No more jobs: JobQueue empty\n"); + return undef; + } } - my $job = $Global::JobQueue->get(); - if(not defined $job) { - # No more jobs - ::debug("No more jobs: JobQueue empty\n"); - return undef; - } - - if($::oodebug and not defined $job->{'commandline'}) { - Carp::confess("get_job_with_sshlogin job->commandline should never be empty"); - } my $clean_command = $job->replaced(); if($clean_command =~ /^\s*$/) { # Do not run empty lines @@ -1438,13 +2361,13 @@ sub get_job_with_sshlogin { } } $job->set_sshlogin($sshlogin); - if($::opt_retries and $clean_command and + if($opt::retries and $clean_command and $job->failed_here()) { # This command with these args failed for this sshlogin my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); - #::my_dump(($no_of_failed_sshlogins,$min_failures)); - if($no_of_failed_sshlogins == keys %Global::host and - $job->failed_here() == $min_failures) { + # Only look at the Global::host that have > 0 jobslots + if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host + and $job->failed_here() == $min_failures) { # It failed the same or more times on another host: # run it on this host } else { @@ -1467,80 +2390,154 @@ sub get_job_with_sshlogin { sub __REMOTE_SSH__ {} sub read_sshloginfiles { + # Read a list of --slf's + # Input: + # @files = files or symbolic file names to read # Returns: N/A - for (@_) { - read_sshloginfile($_); + for my $s (@_) { + read_sshloginfile(expand_slf_shorthand($s)); } } +sub expand_slf_shorthand { + # Expand --slf shorthand into a read file name + # Input: + # $file = file or symbolic file name to read + # Returns: + # $file = actual file name to read + my $file = shift; + if($file eq "-") { + # skip: It is stdin + } elsif($file eq "..") { + $file = $ENV{'HOME'}."/.parallel/sshloginfile"; + } elsif($file eq ".") { + $file = "/etc/parallel/sshloginfile"; + } elsif(not -r $file) { + if(not -r $ENV{'HOME'}."/.parallel/".$file) { + # Try prepending ~/.parallel + ::error("Cannot open $file.\n"); + ::wait_and_exit(255); + } else { + $file = $ENV{'HOME'}."/.parallel/".$file; + } + } + return $file; +} + sub read_sshloginfile { + # Read sshloginfile into @Global::sshlogin + # Input: + # $file = file to read + # Uses: + # @Global::sshlogin # Returns: N/A my $file = shift; my $close = 1; - if($file eq "..") { - $file = $ENV{'HOME'}."/.parallel/sshloginfile"; - } - if($file eq ".") { - $file = "/etc/parallel/sshloginfile"; - } + my $in_fh; + ::debug("init","--slf ",$file); if($file eq "-") { - *IN = *STDIN; + $in_fh = *STDIN; $close = 0; } else { - if(not open(IN, $file)) { - print $Global::original_stderr "Cannot open $file\n"; - exit(255); + if(not open($in_fh, "<", $file)) { + # Try the filename + ::error("Cannot open $file.\n"); + ::wait_and_exit(255); } } - while() { + while(<$in_fh>) { chomp; /^\s*#/ and next; /^\s*$/ and next; push @Global::sshlogin, $_; } if($close) { - close IN; + close $in_fh; } } sub parse_sshlogin { + # Parse @Global::sshlogin into %Global::host. + # Keep only hosts that are in one of the given ssh hostgroups. + # Uses: + # @Global::sshlogin + # $Global::minimal_command_line_length + # %Global::host + # $opt::transfer + # @opt::return + # $opt::cleanup + # @opt::basefile + # @opt::trc # Returns: N/A my @login; if(not @Global::sshlogin) { @Global::sshlogin = (":"); } for my $sshlogin (@Global::sshlogin) { # Split up -S sshlogin,sshlogin - for my $s (split /,/, $sshlogin) { + for my $s (split /,|\n/, $sshlogin) { if ($s eq ".." or $s eq "-") { - read_sshloginfile($s); + # This may add to @Global::sshlogin - possibly bug + read_sshloginfile(expand_slf_shorthand($s)); } else { + $s =~ s/\s*$//; push (@login, $s); } } } - for my $sshlogin_string (@login) { - my $sshlogin = SSHLogin->new($sshlogin_string); - $sshlogin->set_maxlength(Limits::Command::max_length()); - $Global::host{$sshlogin->string()} = $sshlogin; + $Global::minimal_command_line_length = 8_000_000; + my @allowed_hostgroups; + for my $ncpu_sshlogin_string (::uniq(@login)) { + my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); + my $sshlogin_string = $sshlogin->string(); + if($sshlogin_string eq "") { + # This is an ssh group: -S @webservers + push @allowed_hostgroups, $sshlogin->hostgroups(); + next; + } + if($Global::host{$sshlogin_string}) { + # This sshlogin has already been added: + # It is probably a host that has come back + # Set the max_jobs_running back to the original + debug("run","Already seen $sshlogin_string\n"); + if($sshlogin->{'ncpus'}) { + # If ncpus set by '#/' of the sshlogin, overwrite it: + $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); + } + $Global::host{$sshlogin_string}->set_max_jobs_running(undef); + next; + } + if($sshlogin_string eq ":") { + $sshlogin->set_maxlength(Limits::Command::max_length()); + } else { + # If all chars needs to be quoted, every other character will be \ + $sshlogin->set_maxlength(int(Limits::Command::max_length()/2)); + } + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); + $Global::host{$sshlogin_string} = $sshlogin; } - #debug("sshlogin: ", my_dump(%Global::host),"\n"); - if($::opt_transfer or @::opt_return or $::opt_cleanup or @::opt_basefile) { + if(@allowed_hostgroups) { + # Remove hosts that are not in these groups + while (my ($string, $sshlogin) = each %Global::host) { + if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { + delete $Global::host{$string}; + } + } + } + + # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); + if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) { if(not remote_hosts()) { # There are no remote hosts - if(defined @::opt_trc) { - print $Global::original_stderr - "parallel: Warning: --trc ignored as there are no remote --sshlogin\n"; - } elsif (defined $::opt_transfer) { - print $Global::original_stderr - "parallel: Warning: --transfer ignored as there are no remote --sshlogin\n"; - } elsif (defined @::opt_return) { - print $Global::original_stderr - "parallel: Warning: --return ignored as there are no remote --sshlogin\n"; - } elsif (defined $::opt_cleanup) { - print $Global::original_stderr - "parallel: Warning: --cleanup ignored as there are no remote --sshlogin\n"; - } elsif (defined @::opt_basefile) { - print $Global::original_stderr - "parallel: Warning: --basefile ignored as there are no remote --sshlogin\n"; + if(@opt::trc) { + ::warning("--trc ignored as there are no remote --sshlogin.\n"); + } elsif (defined $opt::transfer) { + ::warning("--transfer ignored as there are no remote --sshlogin.\n"); + } elsif (@opt::return) { + ::warning("--return ignored as there are no remote --sshlogin.\n"); + } elsif (defined $opt::cleanup) { + ::warning("--cleanup ignored as there are no remote --sshlogin.\n"); + } elsif (@opt::basefile) { + ::warning("--basefile ignored as there are no remote --sshlogin.\n"); } } } @@ -1548,6 +2545,8 @@ sub parse_sshlogin { sub remote_hosts { # Return sshlogins that are not ':' + # Uses: + # %Global::host # Returns: # list of sshlogins with ':' removed return grep !/^:$/, keys %Global::host; @@ -1556,74 +2555,431 @@ sub remote_hosts { sub setup_basefile { # Transfer basefiles to each $sshlogin # This needs to be done before first jobs on $sshlogin is run + # Uses: + # %Global::host + # @opt::basefile # Returns: N/A my $cmd = ""; + my $rsync_destdir; + my $workdir; for my $sshlogin (values %Global::host) { - if($sshlogin->string() eq ":") { next } - my $sshcmd = $sshlogin->sshcommand(); - my $serverlogin = $sshlogin->serverlogin(); - my $rsync_opt = "-rlDzR -e".shell_quote_scalar($sshcmd); - for my $file (@::opt_basefile) { - my $f = $file; - my $relpath = ($f !~ m:^/:); # Is the path relative? - # Use different subdirs depending on abs or rel path - my $rsync_destdir = ($relpath ? "./" : "/"); - $f =~ s:/\./:/:g; # Rsync treats /./ special. We dont want that - $f = shell_quote_scalar($f); - $cmd .= "rsync $rsync_opt $f $serverlogin:$rsync_destdir &"; - } + if($sshlogin->string() eq ":") { next } + for my $file (@opt::basefile) { + if($file !~ m:^/: and $opt::workdir eq "...") { + ::error("Work dir '...' will not work with relative basefiles.\n"); + ::wait_and_exit(255); + } + $workdir ||= Job->new("")->workdir(); + $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&"; + } } $cmd .= "wait;"; - debug("basesetup: $cmd\n"); + debug("init", "basesetup: $cmd\n"); print `$cmd`; } sub cleanup_basefile { # Remove the basefiles transferred + # Uses: + # %Global::host + # @opt::basefile # Returns: N/A - my $cmd=""; + my $cmd = ""; + my $workdir = Job->new("")->workdir(); for my $sshlogin (values %Global::host) { if($sshlogin->string() eq ":") { next } - my $sshcmd = $sshlogin->sshcommand(); - my $serverlogin = $sshlogin->serverlogin(); - for my $file (@::opt_basefile) { - $cmd .= "$sshcmd $serverlogin rm -f ".shell_quote_scalar(shell_quote_scalar($file))."&"; + for my $file (@opt::basefile) { + $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&"; } } $cmd .= "wait;"; - debug("basecleanup: $cmd\n"); + debug("init", "basecleanup: $cmd\n"); print `$cmd`; } +sub filter_hosts { + # Remove down --sshlogins from active duty. + # Find ncpus, ncores, maxlen, time-to-login for each host. + # Uses: + # %Global::host + # $Global::minimal_command_line_length + # $opt::use_cpus_instead_of_cores + # Returns: N/A + + my ($ncores_ref, $ncpus_ref, $time_to_login_ref, $maxlen_ref, + $echo_ref, $down_hosts_ref) = + parse_host_filtering(parallelized_host_filtering()); + + delete @Global::host{@$down_hosts_ref}; + @$down_hosts_ref and ::warning("Removed @$down_hosts_ref\n"); + + $Global::minimal_command_line_length = 8_000_000; + while (my ($sshlogin, $obj) = each %Global::host) { + if($sshlogin eq ":") { next } + $ncpus_ref->{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin()); + $ncores_ref->{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin()); + $time_to_login_ref->{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin()); + $maxlen_ref->{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin()); + if($opt::use_cpus_instead_of_cores) { + $obj->set_ncpus($ncpus_ref->{$sshlogin}); + } else { + $obj->set_ncpus($ncores_ref->{$sshlogin}); + } + $obj->set_time_to_login($time_to_login_ref->{$sshlogin}); + $obj->set_maxlength($maxlen_ref->{$sshlogin}); + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, + int($maxlen_ref->{$sshlogin}/2)); + ::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus_ref->{$sshlogin}, + " ncores:", $ncores_ref->{$sshlogin}, + " time_to_login:", $time_to_login_ref->{$sshlogin}, + " maxlen:", $maxlen_ref->{$sshlogin}, + " min_max_len:", $Global::minimal_command_line_length,"\n"); + } +} + +sub parse_host_filtering { + # Input: + # @lines = output from parallelized_host_filtering() + # Returns: + # \%ncores = number of cores of {host} + # \%ncpus = number of cpus of {host} + # \%time_to_login = time_to_login on {host} + # \%maxlen = max command len on {host} + # \%echo = echo received from {host} + # \@down_hosts = list of hosts with no answer + my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); + + for (@_) { + chomp; + my @col = split /\t/, $_; + if(defined $col[6]) { + # This is a line from --joblog + # seq host time spent sent received exit signal command + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores + if($col[0] eq "Seq" and $col[1] eq "Host" and + $col[2] eq "Starttime") { + # Header => skip + next; + } + # Get server from: eval true server\; + $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]"); + my $host = $1; + $host =~ tr/\\//d; + $Global::host{$host} or next; + if($col[6] eq "255" or $col[7] eq "15") { + # exit == 255 or signal == 15: ssh failed + # Remove sshlogin + ::debug("init", "--filtered $host\n"); + push(@down_hosts, $host); + } elsif($col[6] eq "127") { + # signal == 127: parallel not installed remote + # Set ncpus and ncores = 1 + ::warning("Could not figure out ", + "number of cpus on $host. Using 1.\n"); + $ncores{$host} = 1; + $ncpus{$host} = 1; + $maxlen{$host} = Limits::Command::max_length(); + } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { + # Remember how log it took to log in + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo + $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); + } else { + ::die_bug("host check unmatched long jobline: $_"); + } + } elsif($Global::host{$col[0]}) { + # This output from --number-of-cores, --number-of-cpus, + # --max-line-length-allowed + # ncores: server 8 + # ncpus: server 2 + # maxlen: server 131071 + if(not $ncores{$col[0]}) { + $ncores{$col[0]} = $col[1]; + } elsif(not $ncpus{$col[0]}) { + $ncpus{$col[0]} = $col[1]; + } elsif(not $maxlen{$col[0]}) { + $maxlen{$col[0]} = $col[1]; + } elsif(not $echo{$col[0]}) { + $echo{$col[0]} = $col[1]; + } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { + # Skip these: + # perl: warning: Setting locale failed. + # perl: warning: Please check that your locale settings: + # LANGUAGE = (unset), + # LC_ALL = (unset), + # LANG = "en_US.UTF-8" + # are supported and installed on your system. + # perl: warning: Falling back to the standard locale ("C"). + } else { + ::die_bug("host check too many col0: $_"); + } + } else { + ::die_bug("host check unmatched short jobline ($col[0]): $_"); + } + } + @down_hosts = uniq(@down_hosts); + return(\%ncores, \%ncpus, \%time_to_login, \%maxlen, \%echo, \@down_hosts); +} + +sub parallelized_host_filtering { + # Uses: + # $Global::envvar + # %Global::host + # Returns: + # text entries with: + # * joblog line + # * hostname \t number of cores + # * hostname \t number of cpus + # * hostname \t max-line-length-allowed + # * hostname \t empty + my(@cores, @cpus, @maxline, @echo); + my $envvar = ::shell_quote_scalar($Global::envvar); + while (my ($host, $sshlogin) = each %Global::host) { + if($host eq ":") { next } + # The 'true' is used to get the $host out later + my $sshcmd = "true $host; exec " . $sshlogin->sshcommand()." ".$sshlogin->serverlogin(); + push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0"); + push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0"); + push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0"); + # 'echo' is used to get the best possible value for an ssh login time + push(@echo, $host."\t".$sshcmd." echo\n\0"); + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh"); + print $fh @cores, @cpus, @maxline, @echo; + close $fh; + # --timeout 5: Setting up an SSH connection and running a simple + # command should never take > 5 sec. + # --delay 0.1: If multiple sshlogins use the same proxy the delay + # will make it less likely to overload the ssh daemon. + # --retries 3: If the ssh daemon it overloaded, try 3 times + # -s 16000: Half of the max line on UnixWare + # TODO sh -c wrapper to work in csh + my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null"; + ::debug("init", $cmd, "\n"); + my @out; + my $prepend = ""; + open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); + for(<$host_fh>) { + if(/\'$/) { + # if last char = ' then append next line + # This may be due to quoting of $Global::envvar + $prepend .= $_; + next; + } + $_ = $prepend . $_; + $prepend = ""; + push @out, $_; + } + close $host_fh; + $Global::debug or unlink $tmpfile; + return @out; +} + +sub onall { + # Runs @command on all hosts. + # Uses parallel to run @command on each host. + # --jobs = number of hosts to run on simultaneously. + # For each host a parallel command with the args will be running. + # Uses: + # $Global::quoting + # @opt::basefile + # $opt::jobs + # $opt::linebuffer + # $opt::ungroup + # $opt::group + # $opt::keeporder + # $opt::D + # $opt::plain + # $opt::max_chars + # $opt::linebuffer + # $opt::files + # $opt::colsep + # $opt::timeout + # $opt::plain + # $opt::retries + # $opt::max_chars + # $opt::arg_sep + # $opt::arg_file_sep + # @opt::v + # @opt::env + # %Global::host + # $Global::exitstatus + # $Global::debug + # $Global::joblog + # $opt::tag + # $opt::joblog + # Input: + # @command = command to run on all hosts + # Returns: N/A + sub tmp_joblog { + # Input: + # $joblog = filename of joblog - undef if none + # Returns: + # $tmpfile = temp file for joblog - undef if none + my $joblog = shift; + if(not defined $joblog) { + return undef; + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); + close $fh; + return $tmpfile; + } + my ($input_source_fh_ref,@command) = @_; + if($Global::quoting) { + @command = shell_quote_empty(@command); + } + + # Copy all @input_source_fh (-a and :::) into tempfiles + my @argfiles = (); + for my $fh (@$input_source_fh_ref) { + my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + if(@opt::basefile) { setup_basefile(); } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + # + # Pass some of the options to the sub-parallels, not all of them as + # -P should only go to the first, and -S should not be copied at all. + my $options = + join(" ", + ((defined $opt::jobs) ? "-P $opt::jobs" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::ungroup) ? "-u" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ); + my $suboptions = + join(" ", + ((defined $opt::ungroup) ? "-u" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::files) ? "--files" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), + ((@opt::v) ? "-vv" : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::retries) ? "--retries ".$opt::retries : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), + ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), + (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), + ); + ::debug("init", "| $0 $options\n"); + open(my $parallel_fh, "|-", "$0 --will-cite -j0 $options") || + ::die_bug("This does not run GNU Parallel: $0 $options"); + my @joblogs; + for my $host (sort keys %Global::host) { + my $sshlogin = $Global::host{$host}; + my $joblog = tmp_joblog($opt::joblog); + if($joblog) { + push @joblogs, $joblog; + $joblog = "--joblog $joblog"; + } + my $quad = $opt::arg_file_sep || "::::"; + ::debug("init", "$0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), + " -S ", shell_quote_scalar($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"); + print $parallel_fh "$0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), + " -S ", shell_quote_scalar($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"; + } + close $parallel_fh; + $Global::exitstatus = $? >> 8; + debug("init", "--onall exitvalue ", $?); + if(@opt::basefile) { cleanup_basefile(); } + $Global::debug or unlink(@argfiles); + my %seen; + for my $joblog (@joblogs) { + # Append to $joblog + open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog"); + # Skip first line (header); + <$fh>; + print $Global::joblog (<$fh>); + close $fh; + unlink($joblog); + } +} + sub __SIGNAL_HANDLING__ {} -sub list_running_jobs { +sub save_original_signal_handler { + # Remember the original signal handler + # Uses: + # %Global::original_sig # Returns: N/A - for my $v (values %Global::running) { - print $Global::original_stderr "$Global::progname: ",$v->replaced(),"\n"; + $SIG{INT} = sub { + if($opt::tmux) { qx { tmux kill-session -t p$$ }; } + wait_and_exit(255); + }; + $SIG{TERM} = sub { + if($opt::tmux) { qx { tmux kill-session -t p$$ }; } + wait_and_exit(255); + }; + %Global::original_sig = %SIG; + $SIG{TERM} = sub {}; # Dummy until jobs really start + $SIG{ALRM} = 'IGNORE'; +} + +sub list_running_jobs { + # Print running jobs on tty + # Uses: + # %Global::running + # Returns: N/A + for my $job (values %Global::running) { + ::status("$Global::progname: ",$job->replaced(),"\n"); } } sub start_no_new_jobs { + # Start no more jobs + # Uses: + # %Global::original_sig + # %Global::unlink + # $Global::start_no_new_jobs # Returns: N/A $SIG{TERM} = $Global::original_sig{TERM}; - print $Global::original_stderr + unlink keys %Global::unlink; + ::status ("$Global::progname: SIGTERM received. No new jobs will be started.\n", "$Global::progname: Waiting for these ", scalar(keys %Global::running), " jobs to finish. Send SIGTERM again to stop now.\n"); list_running_jobs(); - $Global::start_no_new_jobs++; + $Global::start_no_new_jobs ||= 1; } sub reaper { # A job finished. # Print the output. # Start another job - # Returns: N/A + # Uses: + # %Global::sshmaster + # %Global::running + # $Global::tty_taken + # @Global::slots + # $opt::timeout + # $Global::timeoutq + # $opt::halt + # $opt::keeporder + # $Global::total_running + # Returns: + # $children_reaped = number of children finished my $stiff; my $children_reaped = 0; - debug("Reaper called "); + debug("run", "Reaper "); while (($stiff = waitpid(-1, &WNOHANG)) > 0) { + # $stiff = pid of dead process $children_reaped++; if($Global::sshmaster{$stiff}) { # This is one of the ssh -M: ignore @@ -1634,91 +2990,152 @@ sub reaper { $job or next; $job->set_exitstatus($? >> 8); $job->set_exitsignal($? & 127); - debug("died (".$job->exitstatus()."): ".$job->seq()); - $job->set_endtime(); + debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")"); + $job->set_endtime(::now()); if($stiff == $Global::tty_taken) { # The process that died had the tty => release it $Global::tty_taken = 0; } if(not $job->should_be_retried()) { - # Force printing now if the job failed and we are going to exit - my $print_now = ($job->exitstatus() and - $::opt_halt_on_error and $::opt_halt_on_error == 2); - if($Global::keeporder and not $print_now) { - $Private::print_later{$job->seq()} = $job; - $Private::job_end_sequence ||= 1; - debug("Looking for: $Private::job_end_sequence ". - "Current: ".$job->seq()."\n"); - while($Private::print_later{$Private::job_end_sequence}) { - debug("Found job end $Private::job_end_sequence"); - $Private::print_later{$Private::job_end_sequence}->print(); - delete $Private::print_later{$Private::job_end_sequence}; - $Private::job_end_sequence++; - } - } else { + # The job is done + # Free the jobslot + push @Global::slots, $job->slot(); + if($opt::timeout) { + # Update average runtime for timeout + $Global::timeoutq->update_median_runtime($job->runtime()); + } + # Force printing now if --halt forces us to exit + my $print_now = + ($opt::halt and + (($opt::halt == 2 and $job->exitstatus()) + or + ($opt::halt == -2 and not $job->exitstatus()))); + if($opt::keeporder and not $print_now) { + $job->print_earlier_jobs(); + } else { $job->print(); } - if($job->exitstatus()) { - # The jobs had a exit status <> 0, so error - $Global::exitstatus++; - if($::opt_halt_on_error) { - if($::opt_halt_on_error == 1) { - # If halt on error == 1 we should gracefully exit - print $Global::original_stderr - ("$Global::progname: Starting no more jobs. ", - "Waiting for ", scalar(keys %Global::running), - " jobs to finish. This job failed:\n", - $job->replaced(),"\n"); - $Global::start_no_new_jobs++; - $Global::halt_on_error_exitstatus = $job->exitstatus(); - } elsif($::opt_halt_on_error == 2) { - # If halt on error == 2 we should exit immediately - print $Global::original_stderr - ("$Global::progname: This job failed:\n", - $job->replaced(),"\n"); - exit ($job->exitstatus()); - } - } - } + $job->should_we_halt(); } my $sshlogin = $job->sshlogin(); $sshlogin->dec_jobs_running(); $sshlogin->inc_jobs_completed(); $Global::total_running--; delete $Global::running{$stiff}; - start_more_jobs(); + start_more_jobs(); + if($opt::progress) { + my %progress = progress(); + ::status("\r",$progress{'status'}); + } } - debug("Reaper exit\n"); + debug("run", "done "); return $children_reaped; } -sub timeout { - # SIGALRM was received. Check if there was a timeout - # @Global::timeout is sorted by timeout - while (@Global::timeouts) { - my $t = $Global::timeouts[0]; - if($t->timed_out()) { - $t->kill(); - shift @Global::timeouts; - } else { - # Because they are sorted by timeout - last; - } - } -} sub __USAGE__ {} +sub killall { + # Kill all jobs + # Send all jobs TERM + # Wait + # Send all jobs TERM + # Wait + # Send all jobs KILL + # Send all (grand*)children KILL + + $Global::start_no_new_jobs ||= 1; + # pids of the all children and (grand*)children + # before we start the blood bath + my @family_pids = family_pids(keys %Global::running); + # Send jobs TERM + ::debug("kill","TERM ", join(' ',keys %Global::running),"\n"); + kill "TERM", keys %Global::running; + # Wait up to 200 ms + my $sleepsum = 0; + my $sleep = 0.001; + for (; kill(0, keys %Global::running) and $sleepsum < 200; + $sleepsum += $sleep) { + # This can change %Global::running + ::debug("kill","Slept $sleepsum\n"); + if((my $stiff = waitpid(-1, &WNOHANG)) > 0) { + ::debug("kill","reaped $stiff\n"); + + $Global::total_running--; + delete $Global::running{$stiff}; + $sleep = $sleep/2+0.001; + } + $sleep *= 1.1; + ::usleep($sleep); + } + # Send jobs TERM (again) + ::debug("kill","TERM ", join(' ',keys %Global::running),"\n"); + kill "TERM", keys %Global::running; + # Wait up to 200 ms + $sleepsum = 0; + $sleep = 0.001; + for (; kill(0, keys %Global::running) and $sleepsum < 200; + $sleepsum += $sleep) { + # This can change %Global::running + if((my $stiff = waitpid(-1, &WNOHANG)) > 0) { + $Global::total_running--; + delete $Global::running{$stiff}; + $sleep = $sleep/2+0.001; + } + $sleep *= 1.1; + ::usleep($sleep); + } + # Send jobs KILL + ::debug("kill","KILL ", join(' ',keys %Global::running),"\n"); + kill "KILL", keys %Global::running; + # Send all (grand*)children KILL (if there are any left) + ::debug("kill","KILL @family_pids\n"); + kill "KILL", @family_pids; +} + +sub family_pids { + # Find the pids with this->pid as (grand)*parent + # Input: + # @parents = pids of parents + # Returns: + # @pids = pids of (grand)*children + my @parents = @_; + my @pids; + + my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table(); + + my @more = @parents; + # While more (grand)*children + while(@more) { + my @m; + push @pids, @more; + for my $parent (@more) { + if($children_of_ref->{$parent}) { + # add the children of this parent + push @m, @{$children_of_ref->{$parent}}; + } + } + @more = @m; + } + return (@pids); +} + sub wait_and_exit { # If we do not wait, we sometimes get segfault # Returns: N/A + my $error = shift; + unlink keys %Global::unlink; + if($error) { + # Kill all without printing + killall(); + } for (keys %Global::unkilled_children) { kill 9, $_; waitpid($_,0); delete $Global::unkilled_children{$_}; } wait(); - exit(shift); + exit($error); } sub die_usage { @@ -1732,36 +3149,96 @@ sub usage { print join ("\n", "Usage:", + "", "$Global::progname [options] [command [arguments]] < list_of_arguments", "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", "cat ... | $Global::progname --pipe [options] [command [arguments]]", "", - "-j n Run n jobs in parallel", - "-k Keep same order", - "-X Multiple arguments with context replace", - "--colsep regexp Split input on regexp for positional replacements", - "{} {.} {/} {/.} {#} Replacement strings", - "{3} {3.} {3/} {3/.} Positional replacement strings", + "-j n Run n jobs in parallel", + "-k Keep same order", + "-X Multiple arguments with context replace", + "--colsep regexp Split input on regexp for positional replacements", + "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", + "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", + "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", + " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", "", - "-S sshlogin Example: foo\@server.example.com", - "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", - "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", - "--onall Run the given command with argument on all sshlogins", - "--nonall Run the given command with no arguments on all sshlogins", + "-S sshlogin Example: foo\@server.example.com", + "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", + "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", + "--onall Run the given command with argument on all sshlogins", + "--nonall Run the given command with no arguments on all sshlogins", "", - "--pipe Split stdin (standard input) to multiple jobs.", - "--recend str Record end separator for --pipe.", - "--recstart str Record start separator for --pipe.", + "--pipe Split stdin (standard input) to multiple jobs.", + "--recend str Record end separator for --pipe.", + "--recstart str Record start separator for --pipe.", "", "See 'man $Global::progname' for details", "", - "When using GNU Parallel for a publication please cite:", + "Academic tradition requires you to cite works you base your article on.", + "When using programs that use GNU Parallel to process data for publication", + "please cite:", "", - "O. Tange (2011): GNU Parallel - The Command-Line Power Tool,", - ";login: The USENIX Magazine, February 2011:42-47.", + " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,", + " ;login: The USENIX Magazine, February 2011:42-47.", + "", + "This helps funding further development; and it won't cost you a cent.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.\n", ""); } + +sub citation_notice { + # if --will-cite or --plain: do nothing + # if stderr redirected: do nothing + # if ~/.parallel/will-cite: do nothing + # else: print citation notice to stderr + if($opt::willcite + or + $opt::plain + or + not -t $Global::original_stderr + or + -e $ENV{'HOME'}."/.parallel/will-cite") { + # skip + } else { + ::status + ("Academic tradition requires you to cite works you base your article on.\n", + "When using programs that use GNU Parallel to process data for publication\n", + "please cite:\n", + "\n", + " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n", + " ;login: The USENIX Magazine, February 2011:42-47.\n", + "\n", + "This helps funding further development; and it won't cost you a cent.\n", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.\n", + "\n", + "To silence the citation notice: run 'parallel --bibtex'.\n\n", + ); + } +} + +sub status { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh @w; + flush $fh; +} + +sub warning { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + my $prog = $Global::progname || "parallel"; + print $fh $prog, ": Warning: ", @w; +} + +sub error { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + my $prog = $Global::progname || "parallel"; + print $fh $prog, ": Error: ", @w; +} + sub die_bug { my $bugid = shift; print STDERR @@ -1778,43 +3255,71 @@ sub die_bug { sub version { # Returns: N/A - if($::opt_tollef and not $::opt_gnu) { - print "WARNING: YOU ARE USING --tollef. USE --gnu FOR GNU PARALLEL\n\n"; - } print join("\n", "GNU $Global::progname $Global::version", - "Copyright (C) 2007,2008,2009,2010,2011,2012 Ole Tange and Free Software Foundation, Inc.", + "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange", + "and Free Software Foundation, Inc.", "License GPLv3+: GNU GPL version 3 or later ", "This is free software: you are free to change and redistribute it.", "GNU $Global::progname comes with no warranty.", "", "Web site: http://www.gnu.org/software/${Global::progname}\n", - "When using GNU Parallel for a publication please cite:\n", - "O. Tange (2011): GNU Parallel - The Command-Line Power Tool, ", - ";login: The USENIX Magazine, February 2011:42-47.\n", + "When using programs that use GNU Parallel to process data for publication", + "please cite as described in 'parallel --bibtex'.\n", ); } sub bibtex { # Returns: N/A - if($::opt_tollef and not $::opt_gnu) { - print "WARNING: YOU ARE USING --tollef. USE --gnu FOR GNU PARALLEL\n\n"; - } print join("\n", + "Academic tradition requires you to cite works you base your article on.", + "When using programs that use GNU Parallel to process data for publication", + "please cite:", + "", "\@article{Tange2011a,", - " title = {GNU Parallel - The Command-Line Power Tool},", - " author = {O. Tange},", - " address = {Frederiksberg, Denmark},", - " journal = {;login: The USENIX Magazine},", - " month = {Feb},", - " number = {1},", - " volume = {36},", - " url = {http://www.gnu.org/s/parallel},", - " year = {2011},", - " pages = {42-47}", + " title = {GNU Parallel - The Command-Line Power Tool},", + " author = {O. Tange},", + " address = {Frederiksberg, Denmark},", + " journal = {;login: The USENIX Magazine},", + " month = {Feb},", + " number = {1},", + " volume = {36},", + " url = {http://www.gnu.org/s/parallel},", + " year = {2011},", + " pages = {42-47}", + " doi = {10.5281/zenodo.16303}", "}", "", + "(Feel free to use \\nocite{Tange2011a})", + "", + "This helps funding further development; and it won't cost you a cent.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "If you send a copy of your published article to tange\@gnu.org, it will be", + "mentioned in the release notes of next version of GNU Parallel.\n\n", ); + while(not -e $ENV{'HOME'}."/.parallel/will-cite") { + print "\nType: 'will cite' and press enter.\n> "; + my $input = ; + if($input =~ /will cite/i) { + mkdir $ENV{'HOME'}."/.parallel"; + if(open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite")) { + close $fh; + print "\nThank you for your support. It is much appreciated. The citation\n", + "notice is now silenced. You may also use '--will-cite'.\n", + "If you use '--will-cite' in scripts you are expected to pay\n", + "the 10000 EUR, because you are making it harder to see the\n", + "citation notice.\n\n"; + } else { + print "\nThank you for your support. It is much appreciated. The citation\n", + "cannot permanently be silenced. Use '--will-cite' instead.\n", + "If you use '--will-cite' in scripts you are expected to pay\n", + "the 10000 EUR, because you are making it harder to see the\n", + "citation notice.\n\n"; + last; + } + } + } } sub show_limits { @@ -1829,6 +3334,40 @@ sub show_limits { sub __GENERIC_COMMON_FUNCTION__ {} +sub tmpfile { + # Create tempfile as $TMPDIR/parXXXXX + # Returns: + # $filehandle = opened file handle + # $filename = file name created + return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); +} + +sub tmpname { + # Select a name that does not exist + # Do not create the file as that may cause problems + # if you ssh to localhost (or a shared file system) under a different name + my $name = shift; + my($tmpname); + do { + $tmpname = $ENV{'TMPDIR'}."/".$name. + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while($Global::unlink{$tmpname}++ or -e $tmpname); + return $tmpname; +} + +sub tmpfifo { + # Securely make a fifo by securely making a dir with a fifo in it + use POSIX qw(mkfifo); + my $tmpfifo = tmpname("fif",@_); + mkfifo($tmpfifo,0600); + return $tmpfifo; +} + +sub uniq { + # Remove duplicates and return unique values + return keys %{{ map { $_ => 1 } @_ }}; +} + sub min { # Returns: # Minimum value of array @@ -1877,82 +3416,375 @@ sub undef_as_empty { return $a ? $a : ""; } -sub hostname { - if(not $Private::hostname) { - my $hostname = `hostname`; - chomp($hostname); - $Private::hostname = $hostname || "nohostname"; +sub multiply_binary_prefix { + # Evalualte numbers with binary prefix + # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 + # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 + # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 + # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 + # 13G = 13*1024*1024*1024 = 13958643712 + # Input: + # $s = string with prefixes + # Returns: + # $value = int with prefixes multiplied + my $s = shift; + if(not $s) { + return $s; } - return $Private::hostname; + $s =~ s/ki/*1024/gi; + $s =~ s/mi/*1024*1024/gi; + $s =~ s/gi/*1024*1024*1024/gi; + $s =~ s/ti/*1024*1024*1024*1024/gi; + $s =~ s/pi/*1024*1024*1024*1024*1024/gi; + $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; + + $s =~ s/K/*1024/g; + $s =~ s/M/*1024*1024/g; + $s =~ s/G/*1024*1024*1024/g; + $s =~ s/T/*1024*1024*1024*1024/g; + $s =~ s/P/*1024*1024*1024*1024*1024/g; + $s =~ s/E/*1024*1024*1024*1024*1024*1024/g; + $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g; + $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; + $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; + + $s =~ s/k/*1000/g; + $s =~ s/m/*1000*1000/g; + $s =~ s/g/*1000*1000*1000/g; + $s =~ s/t/*1000*1000*1000*1000/g; + $s =~ s/p/*1000*1000*1000*1000*1000/g; + $s =~ s/e/*1000*1000*1000*1000*1000*1000/g; + $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; + + $s = eval $s; + ::debug($s); + return $s; } -sub reap_usleep { - # Reap dead children. - # If no children: Sleep specified amount with exponential backoff - # Returns: - # 0.00001 if children reaped (0.00001 ms works best on highend) - # $ms*1.1 if no children reaped - my $ms = shift; - if(reaper()) { - return 0.00001; - } else { - usleep($ms); - return (($ms < 1000) ? ($ms * 1.1) : ($ms)); # exponential back off +{ + my ($disk_full_fh, $b8193, $error_printed); + sub exit_if_disk_full { + # Checks if $TMPDIR is full by writing 8kb to a tmpfile + # If the disk is full: Exit immediately. + # Returns: + # N/A + if(not $disk_full_fh) { + my $name; + ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df"); + # Separate unlink due to NFS dealing badly with File::Temp + unlink $name; + $b8193 = "x"x8193; + } + # Linux does not discover if a disk is full if writing <= 8192 + # Tested on: + # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos + # ntfs reiserfs tmpfs ubifs vfat xfs + # TODO this should be tested on different OS similar to this: + # + # doit() { + # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop + # seq 100000 | parallel --tmpdir /mnt/loop/ true & + # seq 6900000 > /mnt/loop/i && echo seq OK + # seq 6980868 > /mnt/loop/i + # seq 10000 > /mnt/loop/ii + # sleep 3 + # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ + # echo >&2 + # } + print $disk_full_fh $b8193; + if(not $disk_full_fh + or + tell $disk_full_fh != 8193) { + # On raspbian the disk can be full except for 10 chars. + if(not $error_printed) { + ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n"); + ::error("Change \$TMPDIR with --tmpdir or use --compress.\n"); + $error_printed = 1; + } + ::wait_and_exit(255); + } + truncate $disk_full_fh, 0; + seek($disk_full_fh, 0, 0) || die; } } +sub spacefree { + # Remove comments and spaces + # Inputs: + # $spaces = keep 1 space? + # $s = string to remove spaces from + # Returns: + # $s = with spaces removed + my $spaces = shift; + my $s = shift; + $s =~ s/#.*//mg; + if($spaces) { + $s =~ s/\s+/ /mg; + } else { + $s =~ s/\s//mg; + } + return $s; +} + +{ + my $hostname; + sub hostname { + if(not $hostname) { + $hostname = `hostname`; + chomp($hostname); + $hostname ||= "nohostname"; + } + return $hostname; + } +} + +sub which { + # Input: + # @programs = programs to find the path to + # Returns: + # @full_path = full paths to @programs. Nothing if not found + my @which; + for my $prg (@_) { + push(@which, grep { not -d $_ and -x $_ } + map { $_."/".$prg } split(":",$ENV{'PATH'})); + } + return @which; +} + +{ + my ($regexp,%fakename); + + sub parent_shell { + # Input: + # $pid = pid to see if (grand)*parent is a shell + # Returns: + # $shellpath = path to shell - undef if no shell found + my $pid = shift; + if(not $regexp) { + # All shells known to mankind + # + # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh + # posh rbash rush rzsh sash sh static-sh tcsh yash zsh + my @shells = (qw(ash bash csh dash fdsh fish fizsh ksh + ksh93 mksh pdksh posh rbash rush rzsh + sash sh static-sh tcsh yash zsh -sh -csh), + '-sh (sh)' # sh on FreeBSD + ); + # Can be formatted as: + # [sh] -sh sh busybox sh -sh (sh) + # /bin/sh /sbin/sh /opt/csw/sh + # NOT: foo.sh sshd crash flush pdflush scosh fsflush ssh + my $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")"; + $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| [^(])'; + %fakename = ( + # sh disguises itself as -sh (sh) on FreeBSD + "-sh (sh)" => ["sh"], + # csh and tcsh disguise themselves as -sh/-csh + "-sh" => ["csh", "tcsh"], + "-csh" => ["tcsh", "csh"], + ); + } + my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); + my $shellpath; + my $testpid = $pid; + while($testpid) { + ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n"); + if($name_of_ref->{$testpid} =~ /$regexp/o) { + ::debug("init", "which ".($3||$6)." => "); + $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0]; + ::debug("init", "shell path $shellpath\n"); + $shellpath and last; + } + if($testpid == $parent_of_ref->{$testpid}) { + # In Solaris zones, the PPID of the zsched process is itself + last; + } + $testpid = $parent_of_ref->{$testpid}; + } + return $shellpath; + } +} + +{ + my %pid_parentpid_cmd; + + sub pid_table { + # Returns: + # %children_of = { pid -> children of pid } + # %parent_of = { pid -> pid of parent } + # %name_of = { pid -> commandname } + + if(not %pid_parentpid_cmd) { + # Filter for SysV-style `ps` + my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print "@F[1,2] $_"' ); + # Crazy msys: ' is not accepted on the cmd line, but " are treated as ' + my $msys = q( ps -ef | perl -ane "1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print qq{@F[1,2] $_}" ); + # BSD-style `ps` + my $bsd = q(ps -o pid,ppid,command -ax); + %pid_parentpid_cmd = + ( + 'aix' => $sysv, + 'cygwin' => $sysv, + 'darwin' => $bsd, + 'dec_osf' => $sysv, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $sysv, + 'hpux' => $sysv, + 'linux' => $sysv, + 'mirbsd' => $bsd, + 'msys' => $msys, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $sysv, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $sysv, + 'syllable' => "echo ps not supported", + ); + } + $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing"); + + my (@pidtable,%parent_of,%children_of,%name_of); + # Table with pid -> children of pid + @pidtable = `$pid_parentpid_cmd{$^O}`; + my $p=$$; + for (@pidtable) { + # must match: 24436 21224 busybox ash + # must match: 24436 21224 <> + # or: perl -e 'while($0=" "){}' + if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/ + or + $^O eq "darwin" and /^\s*(\S+)\s+(\S+)\s+()$/) { + $parent_of{$1} = $2; + push @{$children_of{$2}}, $1; + $name_of{$1} = $3; + } else { + ::die_bug("pidtable format: $_"); + } + } + return(\%children_of, \%parent_of, \%name_of); + } +} + +sub now { + # Returns time since epoch as in seconds with 3 decimals + # Uses: + # @Global::use + # Returns: + # $time = time now with millisecond accuracy + if(not $Global::use{"Time::HiRes"}) { + if(eval "use Time::HiRes qw ( time );") { + eval "sub TimeHiRestime { return Time::HiRes::time };"; + } else { + eval "sub TimeHiRestime { return time() };"; + } + $Global::use{"Time::HiRes"} = 1; + } + + return (int(TimeHiRestime()*1000))/1000; +} + sub usleep { # Sleep this many milliseconds. - my $secs = shift; - ::debug("Sleeping ",$secs," millisecs\n"); - select(undef, undef, undef, $secs/1000); - if($::opt_timeout) { - ::debug(my_dump($Global::timeoutq)); - $Global::timeoutq->process_timeouts(); + # Input: + # $ms = milliseconds to sleep + my $ms = shift; + ::debug(int($ms),"ms "); + select(undef, undef, undef, $ms/1000); +} + +sub reap_usleep { + # Reap dead children. + # If no dead children: Sleep specified amount with exponential backoff + # Input: + # $ms = milliseconds to sleep + # Returns: + # $ms/2+0.001 if children reaped + # $ms*1.1 if no children reaped + my $ms = shift; + if(reaper()) { + # Sleep exponentially shorter (1/2^n) if a job finished + return $ms/2+0.001; + } else { + if($opt::timeout) { + $Global::timeoutq->process_timeouts(); + } + if($opt::memfree) { + kill_youngster_if_not_enough_mem(); + } + # When a child dies, wake up from sleep (or select(,,,)) + $SIG{CHLD} = sub { kill "ALRM", $$ }; + usleep($ms); + # --compress needs $SIG{CHLD} undefined + delete $SIG{CHLD}; + exit_if_disk_full(); + if($opt::linebuffer) { + for my $job (values %Global::running) { + $job->print(); + } + } + # Sleep exponentially longer (1.1^n) if a job did not finish, + # though at most 1000 ms. + return (($ms < 1000) ? ($ms * 1.1) : ($ms)); } } -sub multiply_binary_prefix { - # Evalualte numbers with binary prefix - # k=10^3, m=10^6, g=10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 - # K=2^10, M=2^20, G=2^30, T=2^40, P=2^50, E=2^70, Z=2^80, Y=2^80 - # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 - # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 - # 13G = 13*1024*1024*1024 = 13958643712 - my $s = shift; - $s =~ s/k/*1000/g; - $s =~ s/M/*1000*1000/g; - $s =~ s/G/*1000*1000*1000/g; - $s =~ s/T/*1000*1000*1000*1000/g; - $s =~ s/P/*1000*1000*1000*1000*1000/g; - $s =~ s/E/*1000*1000*1000*1000*1000*1000/g; - $s =~ s/Z/*1000*1000*1000*1000*1000*1000*1000/g; - $s =~ s/Y/*1000*1000*1000*1000*1000*1000*1000*1000/g; - $s =~ s/X/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; +sub kill_youngster_if_not_enough_mem { + # Check each $sshlogin if there is enough mem. + # If less than 50% enough free mem: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my %jobs_of; + my @sshlogins; - $s =~ s/Ki?/*1024/gi; - $s =~ s/Mi?/*1024*1024/gi; - $s =~ s/Gi?/*1024*1024*1024/gi; - $s =~ s/Ti?/*1024*1024*1024*1024/gi; - $s =~ s/Pi?/*1024*1024*1024*1024*1024/gi; - $s =~ s/Ei?/*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/Zi?/*1024*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/Yi?/*1024*1024*1024*1024*1024*1024*1024*1024/gi; - $s =~ s/Xi?/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; - $s = eval $s; - return $s; + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) { + if($sshlogin->memfree() < $opt::memfree * 0.5) { + ::debug("mem","\n",map { $_->seq()." " } (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}})); + ::debug("mem","\n", $job->seq(), "killed ", + $sshlogin->memfree()," < ",$opt::memfree * 0.5); + $job->kill(); + $sshlogin->memfree_recompute(); + } else { + last; + } + } + ::debug("mem","Free mem OK ", $sshlogin->memfree()," > ",$opt::memfree * 0.5); + } } sub __DEBUGGING__ {} sub debug { + # Uses: + # $Global::debug + # %Global::fd # Returns: N/A $Global::debug or return; @_ = grep { defined $_ ? $_ : "" } @_; - if($Global::original_stdout) { - print $Global::original_stdout @_; - } else { - print @_; + if($Global::debug eq "all" or $Global::debug eq $_[0]) { + if($Global::fd{1}) { + # Original stdout was saved + my $stdout = $Global::fd{1}; + print $stdout @_[1..$#_]; + } else { + print @_[1..$#_]; + } } } @@ -1981,7 +3813,7 @@ sub my_memory_usage { sub my_size { # Returns: - # size of object if Devel::Size is installed + # $size = size of object if Devel::Size is installed # -1 otherwise my @size_this = (@_); eval "use Devel::Size qw(size total_size)"; @@ -2004,7 +3836,7 @@ sub my_dump { if ($@) { my $err = "Neither Data::Dump nor Data::Dumper is installed\n". "Not dumping output\n"; - print $Global::original_stderr $err; + ::status($err); return $err; } else { return Dumper(@dump_this); @@ -2018,8 +3850,19 @@ sub my_dump { } } -sub __OBJECT_ORIENTED_PARTS__ {} +sub my_croak { + eval "use Carp; 1"; + $Carp::Verbose = 1; + croak(@_); +} +sub my_carp { + eval "use Carp; 1"; + $Carp::Verbose = 1; + carp(@_); +} + +sub __OBJECT_ORIENTED_PARTS__ {} package SSHLogin; @@ -2027,29 +3870,50 @@ sub new { my $class = shift; my $sshlogin_string = shift; my $ncpus; - if($sshlogin_string =~ s:^(\d*)/:: and $1) { - # Override default autodetected ncpus unless zero or missing + my %hostgroups; + # SSHLogins can have these formats: + # @grp+grp/ncpu//usr/bin/ssh user@server + # ncpu//usr/bin/ssh user@server + # /usr/bin/ssh user@server + # user@server + # ncpu/user@server + # @grp+grp/user@server + if($sshlogin_string =~ s:^\@([^/]+)/?::) { + # Look for SSHLogin hostgroups + %hostgroups = map { $_ => 1 } split(/\+/, $1); + } + if ($sshlogin_string =~ s:^(\d+)/::) { + # Override default autodetected ncpus unless missing $ncpus = $1; } my $string = $sshlogin_string; + # An SSHLogin is always in the hostgroup of its $string-name + $hostgroups{$string} = 1; + @Global::hostgroups{keys %hostgroups} = values %hostgroups; my @unget = (); + my $no_slash_string = $string; + $no_slash_string =~ s/[^-a-z0-9:]/_/gi; return bless { 'string' => $string, 'jobs_running' => 0, 'jobs_completed' => 0, 'maxlength' => undef, 'max_jobs_running' => undef, + 'orig_max_jobs_running' => undef, 'ncpus' => $ncpus, + 'hostgroups' => \%hostgroups, 'sshcommand' => undef, 'serverlogin' => undef, 'control_path_dir' => undef, 'control_path' => undef, + 'time_to_login' => undef, + 'last_login_at' => undef, 'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" . - $$."-".$string, + $no_slash_string, 'loadavg' => undef, 'last_loadavg_update' => 0, 'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" . - $$."-".$string, + $no_slash_string, 'swap_activity' => undef, }, ref($class) || $class; } @@ -2068,7 +3932,6 @@ sub string { sub jobs_running { my $self = shift; - return ($self->{'jobs_running'} || "0"); } @@ -2082,11 +3945,6 @@ sub dec_jobs_running { $self->{'jobs_running'}--; } -#sub set_jobs_running { -# my $self = shift; -# $self->{'jobs_running'} = shift; -#} - sub set_maxlength { my $self = shift; $self->{'maxlength'} = shift; @@ -2102,6 +3960,21 @@ sub jobs_completed { return $self->{'jobs_completed'}; } +sub in_hostgroups { + # Input: + # @hostgroups = the hostgroups to look for + # Returns: + # true if intersection of @hostgroups and the hostgroups of this + # SSHLogin is non-empty + my $self = shift; + return grep { defined $self->{'hostgroups'}{$_} } @_; +} + +sub hostgroups { + my $self = shift; + return keys %{$self->{'hostgroups'}}; +} + sub inc_jobs_completed { my $self = shift; $self->{'jobs_completed'}++; @@ -2117,6 +3990,78 @@ sub set_max_jobs_running { # max_jobs_running could be resat if -j is a changed file $Global::max_jobs_running += $self->{'max_jobs_running'}; } + # Initialize orig to the first non-zero value that comes around + $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; +} + +sub memfree { + # Returns: + # $memfree in bytes + my $self = shift; + $self->memfree_recompute(); + return (not defined $self->{'memfree'} or $self->{'memfree'}) +} + +sub memfree_recompute { + my $self = shift; + my $script = memfreescript(); + + # TODO add sshlogin and backgrounding + $self->{'memfree'} = qx{ $script }; + #::debug("mem","New free:",$self->{'memfree'}," "); +} + +{ + my $script; + + sub memfreescript { + # Returns: + # shellscript for giving available memory in bytes + if(not $script) { + my %script_of = ( + # $ free + # total used free shared buffers cached + # Mem: 8075152 4922780 3152372 338856 233356 1658604 + # -/+ buffers/cache: 3030820 5044332 + # Swap: 8286204 116924 8169280 + "linux" => q{ print (1024*((grep /buffers.cache/, `free`)[0] =~ /buffers.cache:\s+\S+\s+(\S+)/)[0]) }, + # $ vmstat 1 1 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99 + "hpux" => q{ print (((reverse `vmstat 1 1`)[0] =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) }, + # $ vmstat 1 2 + # kthr memory page disk faults cpu + # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97 + # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92 + # + # The last free is really free + "solaris" => q{ print (((reverse `vmstat 1 2`)[0] =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) }, + "freebsd" => q{ + for(qx{/sbin/sysctl -a}) { + if (/^([^:]+):\s+(.+)\s*$/s) { + $sysctl->{$1} = $2; + } + } + print $sysctl->{"hw.pagesize"} * + ($sysctl->{"vm.stats.vm.v_cache_count"} + + $sysctl->{"vm.stats.vm.v_inactive_count"} + + $sysctl->{"vm.stats.vm.v_free_count"}); + }, + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate command + for my $os (keys %script_of) { + $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}'; + } + $perlscript =~ s/[\t\n ]+/ /g; + $perlscript = "perl -e " . ::shell_quote_scalar($perlscript); + $script = $Global::envvar. " " .$perlscript; + } + return $script + } } sub swapping { @@ -2134,35 +4079,31 @@ sub swap_activity { # Should we update the swap_activity file? my $update_swap_activity_file = 0; if(-r $self->{'swap_activity_file'}) { - open(SWAP,"<".$self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r"); - my $swap_out = ; - close SWAP; + open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r"); + my $swap_out = <$swap_fh>; + close $swap_fh; if($swap_out =~ /^(\d+)$/) { $self->{'swap_activity'} = $1; - ::debug("New swap_activity: ".$self->{'swap_activity'}); + ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); } - ::debug("Last update: ".$self->{'last_swap_activity_update'}); + ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); if(time - $self->{'last_swap_activity_update'} > 10) { # last swap activity update was started 10 seconds ago - ::debug("Older than 10 sec: ".$self->{'swap_activity_file'}); + ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); $update_swap_activity_file = 1; } } else { - ::debug("No swap_activity file: ".$self->{'swap_activity_file'}); + ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); $self->{'swap_activity'} = undef; $update_swap_activity_file = 1; } if($update_swap_activity_file) { - ::debug("Updating swap_activity file".$self->{'swap_activity_file'}); + ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); $self->{'last_swap_activity_update'} = time; -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; my $swap_activity; - # If the (remote) machine is Mac we should use vm_stat - # swap_in and swap_out on GNU/Linux is $7 and $8 - # swap_in and swap_out on Mac is $10 and $11 - $swap_activity = q[ { vmstat 1 2> /dev/null || vm_stat 1; } | ]. - q[ awk 'NR!=4{next} NF==16{print $7*$8} NF==11{print $10*$11} {exit}' ]; + $swap_activity = swapactivityscript(); if($self->{'string'} ne ":") { $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " . ::shell_quote_scalar($swap_activity); @@ -2171,12 +4112,171 @@ sub swap_activity { # As the command can take long to run if run remote # save it to a tmp file before moving it to the correct file my $file = $self->{'swap_activity_file'}; - my $tmpfile = $self->{'swap_activity_file'}.$$; - qx{ ($swap_activity > $tmpfile; mv $tmpfile $file) & }; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); + ::debug("swap", "\n", $swap_activity, "\n"); + qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; } return $self->{'swap_activity'}; } +{ + my $script; + + sub swapactivityscript { + # Returns: + # shellscript for detecting swap activity + # + # arguments for vmstat are OS dependant + # swap_in and swap_out are in different columns depending on OS + # + if(not $script) { + my %vmstat = ( + # linux: $7*$8 + # $ vmstat 1 2 + # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- + # r b swpd free buff cache si so bi bo in cs us sy id wa + # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 + # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 + 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # solaris: $6*$7 + # $ vmstat -S 1 2 + # kthr memory page disk faults cpu + # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 + # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 + 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], + + # darwin (macosx): $21*$22 + # $ vm_stat -c 2 1 + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts + # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 + # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 + 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], + + # ultrix: $12*$13 + # $ vmstat -S 1 2 + # procs faults cpu memory page disk + # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 + # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 + # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 + 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], + + # aix: $6*$7 + # $ vmstat 1 2 + # System configuration: lcpu=1 mem=2048MB + # + # kthr memory page faults cpu + # ----- ----------- ------------------------ ------------ ----------- + # r b avm fre re pi po fr sr cy in sy cs us sy id wa + # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 + # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 + 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], + + # freebsd: $8*$9 + # $ vmstat -H 1 2 + # procs memory page disks faults cpu + # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id + # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 + # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 + 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], + + # mirbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id + # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 + # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 + 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # netbsd: $7*$8 + # $ vmstat 1 2 + # procs memory page disks faults cpu + # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id + # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 + # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 + 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # openbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id + # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 + # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 + 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # hpux: $8*$9 + # $ vmstat 1 2 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 + # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 + 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # dec_osf (tru64): $11*$12 + # $ vmstat 1 2 + # Virtual Memory Statistics: (pagesize = 8192) + # procs memory pages intr cpu + # r w u act free wire fault cow zero react pin pout in sy cs us sy id + # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 + # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 + 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], + + # gnu (hurd): $7*$8 + # $ vmstat -k 1 2 + # (pagesize: 4, size: 512288, swap size: 894972) + # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree + # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 + # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 + 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], + + # -nto (qnx has no swap) + #-irix + #-svr5 (scosysv) + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate vmstat command + for my $os (keys %vmstat) { + $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ + $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . + $vmstat{$os}[1] . '}"` }'; + } + $perlscript = "perl -e " . ::shell_quote_scalar($perlscript); + $script = $Global::envvar. " " .$perlscript; + } + return $script; + } +} + +sub too_fast_remote_login { + my $self = shift; + if($self->{'last_login_at'} and $self->{'time_to_login'}) { + # sshd normally allows 10 simultaneous logins + # A login takes time_to_login + # So time_to_login/5 should be safe + # If now <= last_login + time_to_login/5: Then it is too soon. + my $too_fast = (::now() <= $self->{'last_login_at'} + + $self->{'time_to_login'}/5); + ::debug("run", "Too fast? $too_fast "); + return $too_fast; + } else { + # No logins so far (or time_to_login not computed): it is not too fast + return 0; + } +} + +sub last_login_at { + my $self = shift; + return $self->{'last_login_at'}; +} + +sub set_last_login_at { + my $self = shift; + $self->{'last_login_at'} = shift; +} + sub loadavg_too_high { my $self = shift; my $loadavg = $self->loadavg(); @@ -2184,65 +4284,156 @@ sub loadavg_too_high { $loadavg > $self->max_loadavg()); } +{ + my $cmd; + sub loadavg_cmd { + if(not $cmd) { + # aix => "ps -ae -o state,command" # state wrong + # bsd => "ps ax -o state,command" + # sysv => "ps -ef -o s -o comm" + # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \ + # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + # awk '{print $2,$1}' + # dec_osf => bsd + # dragonfly => bsd + # freebsd => bsd + # gnu => bsd + # hpux => ps -el|awk '{print $2,$14,$15}' + # irix => ps -ef -o state -o comm + # linux => bsd + # minix => ps el|awk '{print \$1,\$11}' + # mirbsd => bsd + # netbsd => bsd + # openbsd => bsd + # solaris => sysv + # svr5 => sysv + # ultrix => ps -ax | awk '{print $3,$5}' + # unixware => ps -el|awk '{print $2,$14,$15}' + my $ps = q{ + $sysv="ps -ef -o s -o comm"; + $sysv2="ps -ef -o state -o comm"; + $bsd="ps ax -o state,command"; + $psel="ps -el|awk '{ print \$2,\$14,\$15 }'"; + $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n"; + /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + awk '{print $2,$1}' }; + $dummy="echo S COMMAND;echo R dummy"; + %ps=( + 'aix' => "uptime", + 'cygwin' => $cygwin, + 'darwin' => $bsd, + 'dec_osf' => $sysv2, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $bsd, + 'hpux' => $psel, + 'irix' => $sysv2, + 'linux' => $bsd, + 'minix' => "ps el|awk '{print \$1,\$11}'", + 'mirbsd' => $bsd, + 'msys' => $sysv, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $dummy, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $psel, + 'ultrix' => "ps -ax | awk '{print \$3,\$5}'", + ); + print `$ps{$^O}`; + }; + $ps =~ s/[ \t\n]+/ /g; + $cmd = "perl -e ".::shell_quote_scalar($ps); + } + return $cmd; + } +} + + sub loadavg { # If the currently know loadavg is too old: # Recompute a new one in the background + # The load average is computed as the number of processes waiting for disk + # or CPU right now. So it is the server load this instant and not averaged over + # several minutes. This is needed so GNU Parallel will at most start one job + # that will push the load over the limit. + # # Returns: - # last load average computed + # $last_loadavg = last load average computed (undef if none) my $self = shift; # Should we update the loadavg file? my $update_loadavg_file = 0; - if(-r $self->{'loadavg_file'}) { - open(UPTIME,"<".$self->{'loadavg_file'}) || ::die_bug("loadavg_file-r"); + if(open(my $load_fh, "<", $self->{'loadavg_file'})) { local $/ = undef; - my $uptime_out = ; - close UPTIME; - # load average: 0.76, 1.53, 1.45 - if($uptime_out =~ /load average: (\d+.\d+)/) { - $self->{'loadavg'} = $1; - ::debug("New loadavg: ".$self->{'loadavg'}); - } else { - ::die_bug("loadavg_invalid_content: $uptime_out"); + my $load_out = <$load_fh>; + close $load_fh; + # Count lines starting with D,O,R but command does not start with [ + my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm); + if($load > 0) { + # load is overestimated by 1 + $self->{'loadavg'} = $load - 1; + ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n"); + } elsif ($load_out=~/average: (\d+.\d+)/) { + # AIX does not support instant load average + # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55 + $self->{'loadavg'} = $1; + } else { + ::die_bug("loadavg_invalid_content: " . + $self->{'loadavg_file'} . "\n$load_out"); } - ::debug("Last update: ".$self->{'last_loadavg_update'}); - if(time - $self->{'last_loadavg_update'} > 10) { - # last loadavg was started 10 seconds ago - ::debug("Older than 10 sec: ".$self->{'loadavg_file'}); + # Because of instant load average, it should not be delayed 10 secs + # The instant load does not give 2 R if there is only 1 cpu. +# ::debug("load", "Last update: ", $self->{'last_loadavg_update'}); +# if(time - $self->{'last_loadavg_update'} > 10) { +# # last loadavg was started 10 seconds ago +# ::debug("load", time - $self->{'last_loadavg_update'}, " secs old: ", +# $self->{'loadavg_file'}); $update_loadavg_file = 1; - } +# } } else { - ::debug("No loadavg file: ".$self->{'loadavg_file'}); + ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); $self->{'loadavg'} = undef; $update_loadavg_file = 1; } if($update_loadavg_file) { - ::debug("Updating loadavg file".$self->{'loadavg_file'}."\n"); + ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); $self->{'last_loadavg_update'} = time; -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; - my $uptime; - if($self->{'string'} eq ":") { - $uptime = "LANG=C uptime"; - } else { - $uptime = $self->sshcommand() . " " . $self->serverlogin() . " LANG=C uptime"; - } - # Run uptime. + my $cmd = ""; + if($self->{'string'} ne ":") { + $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " . + ::shell_quote_scalar(loadavg_cmd()); + } else { + $cmd .= loadavg_cmd(); + } # As the command can take long to run if run remote # save it to a tmp file before moving it to the correct file + ::debug("load", "Cmd: ", $cmd); my $file = $self->{'loadavg_file'}; - my $tmpfile = $self->{'loadavg_file'}.$$; - qx{ ($uptime > $tmpfile && mv $tmpfile $file) & }; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa"); + qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; } return $self->{'loadavg'}; } sub max_loadavg { my $self = shift; + # If --load is a file it might be changed + if($Global::max_load_file) { + my $mtime = (stat($Global::max_load_file))[9]; + if($mtime > $Global::max_load_file_last_mod) { + $Global::max_load_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_loadavg(undef); + } + } + } if(not defined $self->{'max_loadavg'}) { $self->{'max_loadavg'} = - $self->compute_max_loadavg($::opt_load); + $self->compute_max_loadavg($opt::load); } - ::debug("max_loadavg: ".$self->string()." ".$self->{'max_loadavg'}); + ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); return $self->{'max_loadavg'}; } @@ -2278,16 +4469,16 @@ sub compute_max_loadavg { } elsif (-f $loadspec) { $Global::max_load_file = $loadspec; $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; - if(open(IN, $Global::max_load_file)) { - my $opt_load_file = join("",); - close IN; + if(open(my $in_fh, "<", $Global::max_load_file)) { + my $opt_load_file = join("",<$in_fh>); + close $in_fh; $load = $self->compute_max_loadavg($opt_load_file); } else { - print $Global::original_stderr "Cannot open $loadspec\n"; - exit(255); + ::error("Cannot open $loadspec.\n"); + ::wait_and_exit(255); } } else { - print $Global::original_stderr "Parsing of --load failed\n"; + ::error("Parsing of --load failed.\n"); ::die_usage(); } if($load < 0.01) { @@ -2297,14 +4488,30 @@ sub compute_max_loadavg { return $load; } +sub time_to_login { + my $self = shift; + return $self->{'time_to_login'}; +} + +sub set_time_to_login { + my $self = shift; + $self->{'time_to_login'} = shift; +} + sub max_jobs_running { my $self = shift; if(not defined $self->{'max_jobs_running'}) { - $self->set_max_jobs_running($self->compute_number_of_processes($::opt_P)); + my $nproc = $self->compute_number_of_processes($opt::jobs); + $self->set_max_jobs_running($nproc); } return $self->{'max_jobs_running'}; } +sub orig_max_jobs_running { + my $self = shift; + return $self->{'orig_max_jobs_running'}; +} + sub compute_number_of_processes { # Number of processes wanted and limited by system resources # Returns: @@ -2315,150 +4522,203 @@ sub compute_number_of_processes { if(not defined $wanted_processes) { $wanted_processes = $Global::default_simultaneous_sshlogins; } - ::debug("Wanted procs: $wanted_processes\n"); + ::debug("load", "Wanted procs: $wanted_processes\n"); my $system_limit = $self->processes_available_by_system_limit($wanted_processes); - $system_limit < 1 and ::die_bug('$system_limit < 1'); - ::debug("Limited to procs: $system_limit\n"); + ::debug("load", "Limited to procs: $system_limit\n"); return $system_limit; } -sub processes_available_by_system_limit { - # If the wanted number of processes is bigger than the system limits: - # Limit them to the system limits - # Limits are: File handles, number of input lines, processes, - # and taking > 1 second to spawn 10 extra processes - # Returns: - # Number of processes - my $self = shift; - my $wanted_processes = shift; - - my $system_limit = 0; - my @jobs = (); - my $job; - my @args = (); - my $arg; - my $more_filehandles = 1; - my $max_system_proc_reached = 0; - my $slow_spawining_warning_printed = 0; - my $time = time; - my %fh; +{ my @children; + my $max_system_proc_reached; + my $more_filehandles; + my %fh; + my $tmpfhname; + my $count_jobs_already_read; + my @jobs; + my $job; + my @args; + my $arg; - # Reserve filehandles - # perl uses 7 filehandles for something? - # parallel uses 1 for memory_usage - for my $i (1..8) { - open($fh{"init-$i"},"next_seq(); - my $wait_time_for_getting_args = 0; - my $start_time = time; - while(1) { - $system_limit >= $wanted_processes and last; - not $more_filehandles and last; - $max_system_proc_reached and last; - my $before_getting_arg = time; - if($Global::semaphore) { - # Skip - } elsif(defined $::opt_retries and $count_jobs_already_read) { - # For retries we may need to run all jobs on this sshlogin - # so include the already read jobs for this sshlogin - $count_jobs_already_read--; - } else { - if($::opt_X or $::opt_m) { - # The arguments may have to be re-spread over several jobslots - # So pessimistically only read one arg per jobslot - # instead of a full commandline - if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { - if($Global::JobQueue->empty()) { - last; - } else { - ($job) = $Global::JobQueue->get(); - push(@jobs, $job); - } - } else { - ($arg) = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); - push(@args, $arg); - } - } else { - # If there are no more command lines, then we have a process - # per command line, so no need to go further - $Global::JobQueue->empty() and last; - ($job) = $Global::JobQueue->get(); - push(@jobs, $job); - } - } - $wait_time_for_getting_args += time - $before_getting_arg; - $system_limit++; - # Every simultaneous process uses 2 filehandles when grouping - $more_filehandles = open($fh{$system_limit*10}," 10 and - $forktime > 1 and - $forktime > $system_limit * 0.01 - and not $slow_spawining_warning_printed) { - # It took more than 0.01 second to fork a processes on avg. - # Give the user a warning. He can press Ctrl-C if this - # sucks. - print $Global::original_stderr - ("parallel: Warning: Starting $system_limit processes took > $forktime sec.\n", - "Consider adjusting -j. Press CTRL-C to stop.\n"); - $slow_spawining_warning_printed = 1; - } + } } - if($system_limit < $wanted_processes and not $more_filehandles) { - print $Global::original_stderr - ("parallel: Warning: Only enough filehandles to run ", - $system_limit, " jobs in parallel. ", - "Raising ulimit -n may help.\n"); + + sub get_args_or_jobs { + # Get an arg or a job (depending on mode) + if($Global::semaphore or $opt::pipe) { + # Skip: No need to get args + return 1; + } elsif(defined $opt::retries and $count_jobs_already_read) { + # For retries we may need to run all jobs on this sshlogin + # so include the already read jobs for this sshlogin + $count_jobs_already_read--; + return 1; + } else { + if($opt::X or $opt::m) { + # The arguments may have to be re-spread over several jobslots + # So pessimistically only read one arg per jobslot + # instead of a full commandline + if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + push(@jobs, $job); + return 1; + } + } else { + $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + push(@args, $arg); + return 1; + } + } else { + # If there are no more command lines, then we have a process + # per command line, so no need to go further + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + push(@jobs, $job); + return 1; + } + } + } } - if($system_limit < $wanted_processes and $max_system_proc_reached) { - print $Global::original_stderr - ("parallel: Warning: Only enough available processes to run ", - $system_limit, " jobs in parallel.\n"); + + sub cleanup { + # Cleanup: Close the files + for (values %fh) { close $_ } + # Cleanup: Kill the children + for my $pid (@children) { + kill 9, $pid; + waitpid($pid,0); + delete $Global::unkilled_children{$pid}; + } + # Cleanup: Unget the command_lines or the @args + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args); + $Global::JobQueue->unget(@jobs); + @jobs = undef; } - if($Global::JobQueue->empty()) { - $system_limit ||= 1; + + sub processes_available_by_system_limit { + # If the wanted number of processes is bigger than the system limits: + # Limit them to the system limits + # Limits are: File handles, number of input lines, processes, + # and taking > 1 second to spawn 10 extra processes + # Returns: + # Number of processes + my $self = shift; + my $wanted_processes = shift; + my $system_limit = 0; + my $slow_spawining_warning_printed = 0; + my $time = time; + $more_filehandles = 1; + $tmpfhname = "TmpFhNamE"; + + # perl uses 7 filehandles for something? + # parallel uses 1 for memory_usage + # parallel uses 4 for ? + reserve_filehandles(12); + # Two processes for load avg and ? + reserve_process(); + reserve_process(); + + # For --retries count also jobs already run + $count_jobs_already_read = $Global::JobQueue->next_seq(); + my $wait_time_for_getting_args = 0; + my $start_time = time; + while(1) { + $system_limit >= $wanted_processes and last; + not $more_filehandles and last; + $max_system_proc_reached and last; + + my $before_getting_arg = time; + get_args_or_jobs() or last; + $wait_time_for_getting_args += time - $before_getting_arg; + $system_limit++; + + # Every simultaneous process uses 2 filehandles to write to + # and 2 filehandles to read from + reserve_filehandles(4); + + # System process limit + reserve_process(); + + my $forktime = time - $time - $wait_time_for_getting_args; + ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ", + $forktime, + " (processes so far: ", $system_limit,")\n"); + if($system_limit > 10 and + $forktime > 1 and + $forktime > $system_limit * 0.01 + and not $slow_spawining_warning_printed) { + # It took more than 0.01 second to fork a processes on avg. + # Give the user a warning. He can press Ctrl-C if this + # sucks. + ::warning("Starting $system_limit processes took > $forktime sec.\n", + "Consider adjusting -j. Press CTRL-C to stop.\n"); + $slow_spawining_warning_printed = 1; + } + } + cleanup(); + + if($system_limit < $wanted_processes) { + # The system_limit is less than the wanted_processes + if($system_limit < 1 and not $Global::JobQueue->empty()) { + ::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf\n", + "or /proc/sys/kernel/pid_max may help.\n"); + ::wait_and_exit(255); + } + if(not $more_filehandles) { + ::warning("Only enough file handles to run ", $system_limit, " jobs in parallel.\n", + "Running 'parallel -j0 -N", $system_limit, " --pipe parallel -j0' or ", + "raising ulimit -n or /etc/security/limits.conf may help.\n"); + } + if($max_system_proc_reached) { + ::warning("Only enough available processes to run ", $system_limit, + " jobs in parallel. Raising ulimit -u or /etc/security/limits.conf\n", + "or /proc/sys/kernel/pid_max may help.\n"); + } + } + if($] == 5.008008 and $system_limit > 1000) { + # https://savannah.gnu.org/bugs/?36942 + $system_limit = 1000; + } + if($Global::JobQueue->empty()) { + $system_limit ||= 1; + } + if($self->string() ne ":" and + $system_limit > $Global::default_simultaneous_sshlogins) { + $system_limit = + $self->simultaneous_sshlogin_limit($system_limit); + } + return $system_limit; } - # Cleanup: Close the files - for (values %fh) { close $_ } - # Cleanup: Kill the children - for my $pid (@children) { - kill 9, $pid; - waitpid($pid,0); - delete $Global::unkilled_children{$pid}; - } - #wait(); - # Cleanup: Unget the command_lines or the @args - $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args); - $Global::JobQueue->unget(@jobs); - if($self->string() ne ":" and - $system_limit > $Global::default_simultaneous_sshlogins) { - $system_limit = - $self->simultaneous_sshlogin_limit($system_limit); - } - return $system_limit; } sub simultaneous_sshlogin_limit { @@ -2467,20 +4727,23 @@ sub simultaneous_sshlogin_limit { # min($wanted_processes,$working_simultaneous_ssh_logins-1) my $self = shift; my $wanted_processes = shift; + if($self->{'time_to_login'}) { + return $wanted_processes; + } + # Try twice because it guesses wrong sometimes # Choose the minimal my $ssh_limit = ::min($self->simultaneous_sshlogin($wanted_processes), - $self->simultaneous_sshlogin($wanted_processes)); + $self->simultaneous_sshlogin($wanted_processes)); if($ssh_limit < $wanted_processes) { my $serverlogin = $self->serverlogin(); - print $Global::original_stderr - ("parallel: Warning: ssh to $serverlogin only allows ", - "for $ssh_limit simultaneous logins.\n", - "You may raise this by changing ", - "/etc/ssh/sshd_config:MaxStartup on $serverlogin\n", - "Using only ",$ssh_limit-1," connections ", - "to avoid race conditions\n"); + ::warning("ssh to $serverlogin only allows ", + "for $ssh_limit simultaneous logins.\n", + "You may raise this by changing ", + "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.\n", + "Using only ",$ssh_limit-1," connections ", + "to avoid race conditions.\n"); } # Race condition can cause problem if using all sshs. if($ssh_limit > 1) { $ssh_limit -= 1; } @@ -2497,12 +4760,14 @@ sub simultaneous_sshlogin { my $wanted_processes = shift; my $sshcmd = $self->sshcommand(); my $serverlogin = $self->serverlogin(); - my $cmd = "$sshcmd $serverlogin echo simultaneouslogin 2>&1 &"x$wanted_processes; - ::debug("Trying $wanted_processes logins at $serverlogin"); - open (SIMUL, "($cmd)|grep simultaneouslogin | wc -l|") or + my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; + # TODO sh -c wrapper to work for csh + my $cmd = "$sshdelay$sshcmd $serverlogin echo simultaneouslogin &1 &"x$wanted_processes; + ::debug("init", "Trying $wanted_processes logins at $serverlogin\n"); + open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or ::die_bug("simultaneouslogin"); - my $ssh_limit = ; - close SIMUL; + my $ssh_limit = <$simul_fh>; + close $simul_fh; chomp $ssh_limit; return $ssh_limit; } @@ -2530,7 +4795,8 @@ sub user_requested_processes { my $j = $1; $processes = $self->ncpus() - $j; - } elsif ($opt_P =~ /^(\d+)\%$/) { + } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) { + # E.g. -P 10.5% my $j = $1; $processes = $self->ncpus() * $j / 100; @@ -2542,22 +4808,19 @@ sub user_requested_processes { } } elsif (-f $opt_P) { $Global::max_procs_file = $opt_P; - $Global::max_procs_file_last_mod = (stat($Global::max_procs_file))[9]; - if(open(IN, $Global::max_procs_file)) { - my $opt_P_file = join("",); - close IN; + if(open(my $in_fh, "<", $Global::max_procs_file)) { + my $opt_P_file = join("",<$in_fh>); + close $in_fh; $processes = $self->user_requested_processes($opt_P_file); } else { - print $Global::original_stderr "Cannot open $opt_P\n"; - exit(255); + ::error("Cannot open $opt_P.\n"); + ::wait_and_exit(255); } } else { - print $Global::original_stderr "Parsing of --jobs/-j/--max-procs/-P failed\n"; + ::error("Parsing of --jobs/-j/--max-procs/-P failed.\n"); ::die_usage(); } - if($processes < 1) { - $processes = 1; - } + $processes = ::ceil($processes); } return $processes; } @@ -2568,25 +4831,26 @@ sub ncpus { my $sshcmd = $self->sshcommand(); my $serverlogin = $self->serverlogin(); if($serverlogin eq ":") { - if($::opt_use_cpus_instead_of_cores) { + if($opt::use_cpus_instead_of_cores) { $self->{'ncpus'} = no_of_cpus(); } else { $self->{'ncpus'} = no_of_cores(); } } else { my $ncpu; - if($::opt_use_cpus_instead_of_cores) { - $ncpu = qx(echo|$sshcmd $serverlogin parallel --number-of-cpus); + my $sqe = ::shell_quote_scalar($Global::envvar); + if($opt::use_cpus_instead_of_cores) { + $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cpus); } else { - $ncpu = qx(echo|$sshcmd $serverlogin parallel --number-of-cores); + ::debug("init",qq(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores\n)); + $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores); } chomp $ncpu; if($ncpu =~ /^\s*[0-9]+\s*$/s) { $self->{'ncpus'} = $ncpu; } else { - print $Global::original_stderr - ("parallel: Warning: Could not figure out ", - "number of cpus on $serverlogin ($ncpu). Using 1\n"); + ::warning("Could not figure out ", + "number of cpus on $serverlogin ($ncpu). Using 1.\n"); $self->{'ncpus'} = 1; } } @@ -2597,31 +4861,57 @@ sub ncpus { sub no_of_cpus { # Returns: # Number of physical CPUs - local $/="\n"; # If delimiter is set, then $/ will be wrong + local $/ = "\n"; # If delimiter is set, then $/ will be wrong my $no_of_cpus; if ($^O eq 'linux') { - $no_of_cpus = no_of_cpus_gnu_linux(); + $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux(); } elsif ($^O eq 'freebsd') { $no_of_cpus = no_of_cpus_freebsd(); + } elsif ($^O eq 'netbsd') { + $no_of_cpus = no_of_cpus_netbsd(); + } elsif ($^O eq 'openbsd') { + $no_of_cpus = no_of_cpus_openbsd(); + } elsif ($^O eq 'gnu') { + $no_of_cpus = no_of_cpus_hurd(); + } elsif ($^O eq 'darwin') { + $no_of_cpus = no_of_cpus_darwin(); } elsif ($^O eq 'solaris') { $no_of_cpus = no_of_cpus_solaris(); } elsif ($^O eq 'aix') { $no_of_cpus = no_of_cpus_aix(); - } elsif ($^O eq 'darwin') { - $no_of_cpus = no_of_cpus_darwin(); + } elsif ($^O eq 'hpux') { + $no_of_cpus = no_of_cpus_hpux(); + } elsif ($^O eq 'nto') { + $no_of_cpus = no_of_cpus_qnx(); + } elsif ($^O eq 'svr5') { + $no_of_cpus = no_of_cpus_openserver(); + } elsif ($^O eq 'irix') { + $no_of_cpus = no_of_cpus_irix(); + } elsif ($^O eq 'dec_osf') { + $no_of_cpus = no_of_cpus_tru64(); } else { - $no_of_cpus = (no_of_cpus_freebsd() + $no_of_cpus = (no_of_cpus_gnu_linux() + || no_of_cpus_freebsd() + || no_of_cpus_netbsd() + || no_of_cpus_openbsd() + || no_of_cpus_hurd() || no_of_cpus_darwin() || no_of_cpus_solaris() || no_of_cpus_aix() - || no_of_cpus_gnu_linux() + || no_of_cpus_hpux() + || no_of_cpus_qnx() + || no_of_cpus_openserver() + || no_of_cpus_irix() + || no_of_cpus_tru64() + # Number of cores is better than no guess for #CPUs + || nproc() ); } if($no_of_cpus) { chomp $no_of_cpus; return $no_of_cpus; } else { - warn("parallel: Cannot figure out number of cpus. Using 1"); + ::warning("Cannot figure out number of cpus. Using 1.\n"); return 1; } } @@ -2629,52 +4919,102 @@ sub no_of_cpus { sub no_of_cores { # Returns: # Number of CPU cores - local $/="\n"; # If delimiter is set, then $/ will be wrong + local $/ = "\n"; # If delimiter is set, then $/ will be wrong my $no_of_cores; if ($^O eq 'linux') { $no_of_cores = no_of_cores_gnu_linux(); } elsif ($^O eq 'freebsd') { $no_of_cores = no_of_cores_freebsd(); + } elsif ($^O eq 'netbsd') { + $no_of_cores = no_of_cores_netbsd(); + } elsif ($^O eq 'openbsd') { + $no_of_cores = no_of_cores_openbsd(); + } elsif ($^O eq 'gnu') { + $no_of_cores = no_of_cores_hurd(); + } elsif ($^O eq 'darwin') { + $no_of_cores = no_of_cores_darwin(); } elsif ($^O eq 'solaris') { $no_of_cores = no_of_cores_solaris(); } elsif ($^O eq 'aix') { $no_of_cores = no_of_cores_aix(); - } elsif ($^O eq 'darwin') { - $no_of_cores = no_of_cores_darwin(); + } elsif ($^O eq 'hpux') { + $no_of_cores = no_of_cores_hpux(); + } elsif ($^O eq 'nto') { + $no_of_cores = no_of_cores_qnx(); + } elsif ($^O eq 'svr5') { + $no_of_cores = no_of_cores_openserver(); + } elsif ($^O eq 'irix') { + $no_of_cores = no_of_cores_irix(); + } elsif ($^O eq 'dec_osf') { + $no_of_cores = no_of_cores_tru64(); } else { - $no_of_cores = (no_of_cores_freebsd() + $no_of_cores = (no_of_cores_gnu_linux() + || no_of_cores_freebsd() + || no_of_cores_netbsd() + || no_of_cores_openbsd() + || no_of_cores_hurd() || no_of_cores_darwin() || no_of_cores_solaris() || no_of_cores_aix() - || no_of_cores_gnu_linux() + || no_of_cores_hpux() + || no_of_cores_qnx() + || no_of_cores_openserver() + || no_of_cores_irix() + || no_of_cores_tru64() + || nproc() ); } if($no_of_cores) { chomp $no_of_cores; return $no_of_cores; } else { - warn("parallel: Cannot figure out number of CPU cores. Using 1"); + ::warning("Cannot figure out number of CPU cores. Using 1.\n"); return 1; } } +sub nproc { + # Returns: + # Number of cores using `nproc` + my $no_of_cores = qx{ sh -c 'nproc 2>/dev/null' }; + return $no_of_cores; +} + sub no_of_cpus_gnu_linux { # Returns: # Number of physical CPUs on GNU/Linux # undef if not GNU/Linux my $no_of_cpus; + my $no_of_cores; + my $no_of_active_cores; if(-e "/proc/cpuinfo") { $no_of_cpus = 0; + $no_of_cores = 0; my %seen; - open(IN,"cat /proc/cpuinfo|") || return undef; - while() { - if(/^physical id.*[:](.*)/ and not $seen{$1}++) { - $no_of_cpus++; - } - } - close IN; + if(open(my $in_fh, "<", "/proc/cpuinfo")) { + while(<$in_fh>) { + if(/^physical id.*[:](.*)/ and not $seen{$1}++) { + $no_of_cpus++; + } + /^processor.*[:]/i and $no_of_cores++; + } + close $in_fh; + } } - return $no_of_cpus; + if(-e "/proc/self/status") { + # if 'taskset' is used to limit number of cores + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $no_of_active_cores = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return (::min($no_of_cpus || $no_of_cores,$no_of_active_cores)); } sub no_of_cores_gnu_linux { @@ -2682,37 +5022,29 @@ sub no_of_cores_gnu_linux { # Number of CPU cores on GNU/Linux # undef if not GNU/Linux my $no_of_cores; + my $no_of_active_cores; if(-e "/proc/cpuinfo") { $no_of_cores = 0; - open(IN,"cat /proc/cpuinfo|") || return undef; - while() { - /^processor.*[:]/ and $no_of_cores++; + open(my $in_fh, "<", "/proc/cpuinfo") || return undef; + while(<$in_fh>) { + /^processor.*[:]/i and $no_of_cores++; } - close IN; + close $in_fh; } - return $no_of_cores; -} - -sub no_of_cpus_darwin { - # Returns: - # Number of physical CPUs on Mac Darwin - # undef if not Mac Darwin - my $no_of_cpus = - (`sysctl -n hw.physicalcpu 2>/dev/null` - or - `sysctl -a hw 2>/dev/null | grep -w physicalcpu | awk '{ print \$2 }'`); - return $no_of_cpus; -} - -sub no_of_cores_darwin { - # Returns: - # Number of CPU cores on Mac Darwin - # undef if not Mac Darwin - my $no_of_cores = - (`sysctl -n hw.logicalcpu 2>/dev/null` - or - `sysctl -a hw 2>/dev/null | grep -w logicalcpu | awk '{ print \$2 }'`); - return $no_of_cores; + if(-e "/proc/self/status") { + # if 'taskset' is used to limit number of cores + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $no_of_active_cores = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return (::min($no_of_cores,$no_of_active_cores)); } sub no_of_cpus_freebsd { @@ -2720,9 +5052,9 @@ sub no_of_cpus_freebsd { # Number of physical CPUs on FreeBSD # undef if not FreeBSD my $no_of_cpus = - (`sysctl -a dev.cpu 2>/dev/null | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }'` + (qx{ sh -c 'sysctl -a dev.cpu 2>/dev/null' | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' } or - `sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'`); + qx{ sh -c 'sysctl hw.ncpu 2>/dev/null' | awk '{ print \$2 }' }); chomp $no_of_cpus; return $no_of_cpus; } @@ -2732,13 +5064,89 @@ sub no_of_cores_freebsd { # Number of CPU cores on FreeBSD # undef if not FreeBSD my $no_of_cores = - (`sysctl hw.ncpu 2>/dev/null | awk '{ print \$2 }'` + (qx{ sh -c 'sysctl hw.ncpu 2>/dev/null' | awk '{ print \$2 }' } or - `sysctl -a hw 2>/dev/null | grep -w logicalcpu | awk '{ print \$2 }'`); + qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }); chomp $no_of_cores; return $no_of_cores; } +sub no_of_cpus_netbsd { + # Returns: + # Number of physical CPUs on NetBSD + # undef if not NetBSD + my $no_of_cpus = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' }; + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_netbsd { + # Returns: + # Number of CPU cores on NetBSD + # undef if not NetBSD + my $no_of_cores = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' }; + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_openbsd { + # Returns: + # Number of physical CPUs on OpenBSD + # undef if not OpenBSD + my $no_of_cpus = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' }; + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_openbsd { + # Returns: + # Number of CPU cores on OpenBSD + # undef if not OpenBSD + my $no_of_cores = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' }; + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_hurd { + # Returns: + # Number of physical CPUs on HURD + # undef if not HURD + my $no_of_cpus = qx{ nproc }; + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_hurd { + # Returns: + # Number of physical CPUs on HURD + # undef if not HURD + my $no_of_cores = `nproc`; + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_darwin { + # Returns: + # Number of physical CPUs on Mac Darwin + # undef if not Mac Darwin + my $no_of_cpus = + (qx{ sh -c 'sysctl -n hw.physicalcpu 2>/dev/null' } + or + qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }); + return $no_of_cpus; +} + +sub no_of_cores_darwin { + # Returns: + # Number of CPU cores on Mac Darwin + # undef if not Mac Darwin + my $no_of_cores = + (qx{ sh -c 'sysctl -n hw.logicalcpu 2>/dev/null' } + or + qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }); + return $no_of_cores; +} + sub no_of_cpus_solaris { # Returns: # Number of physical CPUs on Solaris @@ -2750,7 +5158,7 @@ sub no_of_cpus_solaris { } } if(-x "/usr/sbin/prtconf") { - my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`; + my @prtconf = qx{ /usr/sbin/prtconf | grep cpu..instance }; if($#prtconf >= 0) { return $#prtconf +1; } @@ -2769,7 +5177,7 @@ sub no_of_cores_solaris { } } if(-x "/usr/sbin/prtconf") { - my @prtconf = `/usr/sbin/prtconf | grep cpu..instance`; + my @prtconf = qx{ /usr/sbin/prtconf | grep cpu..instance }; if($#prtconf >= 0) { return $#prtconf +1; } @@ -2783,11 +5191,11 @@ sub no_of_cpus_aix { # undef if not AIX my $no_of_cpus = 0; if(-x "/usr/sbin/lscfg") { - open(IN,"/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' ' |") + open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '") || return undef; - $no_of_cpus = ; + $no_of_cpus = <$in_fh>; chomp ($no_of_cpus); - close IN; + close $in_fh; } return $no_of_cpus; } @@ -2798,15 +5206,111 @@ sub no_of_cores_aix { # undef if not AIX my $no_of_cores; if(-x "/usr/bin/vmstat") { - open(IN,"/usr/bin/vmstat 1 1|") || return undef; - while() { + open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef; + while(<$in_fh>) { /lcpu=([0-9]*) / and $no_of_cores = $1; } - close IN; + close $in_fh; } return $no_of_cores; } +sub no_of_cpus_hpux { + # Returns: + # Number of physical CPUs on HP-UX + # undef if not HP-UX + my $no_of_cpus = + qx{ sh -c '/usr/bin/mpsched -s 2>&1' | grep 'Locality Domain Count' | awk '{ print \$4 }'}; + return $no_of_cpus; +} + +sub no_of_cores_hpux { + # Returns: + # Number of CPU cores on HP-UX + # undef if not HP-UX + my $no_of_cores = + qx{ sh -c '/usr/bin/mpsched -s 2>&1' | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1\n"'}; + return $no_of_cores; +} + +sub no_of_cpus_qnx { + # Returns: + # Number of physical CPUs on QNX + # undef if not QNX + # BUG: It is not known how to calculate this. + my $no_of_cpus = 0; + return $no_of_cpus; +} + +sub no_of_cores_qnx { + # Returns: + # Number of CPU cores on QNX + # undef if not QNX + # BUG: It is not known how to calculate this. + my $no_of_cores = 0; + return $no_of_cores; +} + +sub no_of_cpus_openserver { + # Returns: + # Number of physical CPUs on SCO OpenServer + # undef if not SCO OpenServer + my $no_of_cpus = 0; + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = `/usr/sbin/psrinfo`; + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + return $no_of_cpus; +} + +sub no_of_cores_openserver { + # Returns: + # Number of CPU cores on SCO OpenServer + # undef if not SCO OpenServer + my $no_of_cores = 0; + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = `/usr/sbin/psrinfo`; + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + return $no_of_cores; +} + +sub no_of_cpus_irix { + # Returns: + # Number of physical CPUs on IRIX + # undef if not IRIX + my $no_of_cpus = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; + return $no_of_cpus; +} + +sub no_of_cores_irix { + # Returns: + # Number of CPU cores on IRIX + # undef if not IRIX + my $no_of_cores = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; + return $no_of_cores; +} + +sub no_of_cpus_tru64 { + # Returns: + # Number of physical CPUs on Tru64 + # undef if not Tru64 + my $no_of_cpus = `sizer -pr`; + return $no_of_cpus; +} + +sub no_of_cores_tru64 { + # Returns: + # Number of CPU cores on Tru64 + # undef if not Tru64 + my $no_of_cores = `sizer -pr`; + return $no_of_cores; +} + sub sshcommand { my $self = shift; if (not defined $self->{'sshcommand'}) { @@ -2839,22 +5343,29 @@ sub sshcommand_of_sshlogin { $sshcmd = $1; $serverlogin = $2; } else { # Normal ssh - if($::opt_controlmaster) { + if($opt::controlmaster) { # Use control_path to make ssh faster my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; $sshcmd = "ssh -S ".$control_path; $serverlogin = $self->{'string'}; - my $master = "ssh -MTS $control_path $serverlogin sleep 1"; if(not $self->{'control_path'}{$control_path}++) { # Master is not running for this control_path # Start it my $pid = fork(); if($pid) { - $Global::sshmaster{$pid}++; + $Global::sshmaster{$pid} ||= 1; } else { - ::debug($master,"\n"); - `$master`; - ::wait_and_exit(0); + $SIG{'TERM'} = undef; + # Ignore the 'foo' being printed + open(STDOUT,">","/dev/null"); + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # STDERR >/dev/null to ignore "process_mux_new_session: tcgetattr: Invalid argument" + open(STDERR,">","/dev/null"); + open(STDIN,"<","/dev/null"); + # Run a sleep that outputs data, so it will discover if the ssh connection closes. + my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}'); + my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep); + exec(@master); } } } else { @@ -2880,18 +5391,110 @@ sub control_path_dir { return $self->{'control_path_dir'}; } +sub rsync_transfer_cmd { + # Command to run to transfer a file + # Input: + # $file = filename of file to transfer + # $workdir = destination dir + # Returns: + # $cmd = rsync command to run to transfer $file ("" if unreadable) + my $self = shift; + my $file = shift; + my $workdir = shift; + if(not -r $file) { + ::warning($file, " is not readable and will not be transferred.\n"); + return "true"; + } + my $rsync_destdir; + if($file =~ m:^/:) { + # rsync /foo/bar / + $rsync_destdir = "/"; + } else { + $rsync_destdir = ::shell_quote_file($workdir); + } + $file = ::shell_quote_file($file); + my $sshcmd = $self->sshcommand(); + my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd); + my $serverlogin = $self->serverlogin(); + # Make dir if it does not exist + return "( $sshcmd $serverlogin mkdir -p $rsync_destdir;" . + rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )"; +} + +sub cleanup_cmd { + # Command to run to remove the remote file + # Input: + # $file = filename to remove + # $workdir = destination dir + # Returns: + # $cmd = ssh command to run to remove $file and empty parent dirs + my $self = shift; + my $file = shift; + my $workdir = shift; + my $f = $file; + if($f =~ m:/\./:) { + # foo/bar/./baz/quux => workdir/baz/quux + # /foo/bar/./baz/quux => workdir/baz/quux + $f =~ s:.*/\./:$workdir/:; + } elsif($f =~ m:^[^/]:) { + # foo/bar => workdir/foo/bar + $f = $workdir."/".$f; + } + my @subdirs = split m:/:, ::dirname($f); + my @rmdir; + my $dir = ""; + for(@subdirs) { + $dir .= $_."/"; + unshift @rmdir, ::shell_quote_file($dir); + } + my $rmdir = @rmdir ? "sh -c 'rmdir @rmdir 2>/dev/null';" : ""; + if(defined $opt::workdir and $opt::workdir eq "...") { + $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; + } + + $f = ::shell_quote_file($f); + my $sshcmd = $self->sshcommand(); + my $serverlogin = $self->serverlogin(); + return "$sshcmd $serverlogin ".::shell_quote_scalar("(rm -f $f; $rmdir)"); +} + +{ + my $rsync; + + sub rsync { + # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. + # If the version >= 3.1.0: downgrade to protocol 30 + if(not $rsync) { + my @out = `rsync --version`; + for (@out) { + if(/version (\d+.\d+)(.\d+)?/) { + if($1 >= 3.1) { + # Version 3.1.0 or later: Downgrade to protocol 30 + $rsync = "rsync --protocol 30"; + } else { + $rsync = "rsync"; + } + } + } + $rsync or ::die_bug("Cannot figure out version of rsync: @out"); + } + return $rsync; + } +} + package JobQueue; sub new { my $class = shift; - my $command = shift; + my $commandref = shift; my $read_from = shift; my $context_replace = shift; my $max_number_of_args = shift; my $return_files = shift; - my $commandlinequeue = CommandLineQueue->new( - $command,$read_from,$context_replace,$max_number_of_args,$return_files); + my $commandlinequeue = CommandLineQueue->new + ($commandref, $read_from, $context_replace, $max_number_of_args, + $return_files); my @unget = (); return bless { 'unget' => \@unget, @@ -2926,7 +5529,7 @@ sub empty { my $self = shift; my $empty = (not @{$self->{'unget'}}) && $self->{'commandlinequeue'}->empty(); - ::debug("JobQueue->empty $empty\n"); + ::debug("run", "JobQueue->empty $empty "); return $empty; } @@ -2935,10 +5538,32 @@ sub total_jobs { if(not defined $self->{'total_jobs'}) { my $job; my @queue; + my $start = time; + while($job = $self->get()) { + if(time - $start > 10) { + ::warning("Reading ".scalar(@queue)." arguments took longer than 10 seconds.\n"); + $opt::eta && ::warning("Consider removing --eta.\n"); + $opt::bar && ::warning("Consider removing --bar.\n"); + $opt::shuf && ::warning("Consider removing --shuf.\n"); + last; + } + push @queue, $job; + } while($job = $self->get()) { push @queue, $job; } - $self->unget(@queue); + if($opt::shuf) { + my $i = @queue; + while (--$i) { + my $j = int rand($i+1); + @queue[$i,$j] = @queue[$j,$i]; + } + my $seq = 1; + for my $job (@queue) { + $job->{'commandline'}->set_seq($seq++); + } + } + $self->unget(@queue); $self->{'total_jobs'} = $#queue+1; } return $self->{'total_jobs'}; @@ -2960,17 +5585,14 @@ package Job; sub new { my $class = shift; - my $commandline = shift; + my $commandlineref = shift; return bless { - 'commandline' => $commandline, # The commandline with no args + 'commandline' => $commandlineref, # CommandLine object 'workdir' => undef, # --workdir - 'stdin' => undef, # filehandle for stdin (used for --pipe) - 'stdout' => undef, # filehandle for stdout (used for --group) + # filehandle for stdin (used for --pipe) # filename for writing stdout to (used for --files) - 'stdoutfilename' => undef, - 'stderr' => undef, # filehandle for stderr (used for --group) - 'remaining' => "", # remaining data not sent to stdin (used for --pipe) - 'datawritten' => 0, # amount of data sent via stdin (used for --pipe) + # remaining data not sent to stdin (used for --pipe) + # amount of data sent via stdin (used for --pipe) 'transfersize' => 0, # size of files using --transfer 'returnsize' => 0, # size of files using --return 'pid' => undef, @@ -2983,12 +5605,13 @@ sub new { 'exitsignal' => undef, # Timestamp for timeout if any 'timeout' => undef, + 'virgin' => 1, }, ref($class) || $class; } sub replaced { my $self = shift; - $self->{'commandline'} or Carp::croak("cmdline empty"); + $self->{'commandline'} or ::die_bug("commandline empty"); return $self->{'commandline'}->replaced(); } @@ -2997,87 +5620,385 @@ sub seq { return $self->{'commandline'}->seq(); } -sub set_stdout { +sub set_seq { my $self = shift; - $self->{'stdout'} = shift; + return $self->{'commandline'}->set_seq(shift); } -sub stdout { +sub slot { my $self = shift; - return $self->{'stdout'}; + return $self->{'commandline'}->slot(); } -sub set_stdoutfilename { - my $self = shift; - $self->{'stdoutfilename'} = shift; +{ + my($cattail); + + sub cattail { + # Returns: + # $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink] + if(not $cattail) { + $cattail = q{ + # cat followed by tail (possibly with rm as soon at the file is opened) + # If $writerpid dead: finish after this round + use Fcntl; + $|=1; + + my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV; + if($read_file) { + open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); + } else { + *IN = *STDIN; + } + while(! -s $comfile) { + # Writer has not opened the buffer file, so we cannot remove it yet + $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + # The writer and we have both opened the file, so it is safe to unlink it + unlink $unlink_file; + unlink $comfile; + + my $first_round = 1; + my $flags; + fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= O_NONBLOCK; # Add non-blocking to the flags + fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle + + while(1) { + # clear EOF + seek(IN,0,1); + my $writer_running = kill 0, $writerpid; + $read = sysread(IN,$buf,32768); + if($read) { + if($first_round) { + # Only start the command if there any input to process + $first_round = 0; + open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); + } + + # Blocking print + while($buf) { + my $bytes_written = syswrite(OUT,$buf); + # syswrite may be interrupted by SIGHUP + substr($buf,0,$bytes_written) = ""; + } + # Something printed: Wait less next time + $sleep /= 2; + } else { + if(eof(IN) and not $writer_running) { + # Writer dead: There will never be more to read => exit + exit; + } + # TODO This could probably be done more efficiently using select(2) + # Nothing read: Wait longer before next read + # Up to 100 milliseconds + $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + } + + sub usleep { + # Sleep this many milliseconds. + my $secs = shift; + select(undef, undef, undef, $secs/1000); + } + }; + $cattail =~ s/#.*//mg; + $cattail =~ s/\s+/ /g; + } + return $cattail; + } } -sub stdoutfilename { +sub openoutputfiles { + # Open files for STDOUT and STDERR + # Set file handles in $self->fh my $self = shift; - return $self->{'stdoutfilename'}; + my ($outfhw, $errfhw, $outname, $errname); + if($opt::results) { + my $args_as_dirname = $self->{'commandline'}->args_as_dirname(); + # Output in: prefix/name1/val1/name2/val2/stdout + my $dir = $opt::results."/".$args_as_dirname; + if(eval{ File::Path::mkpath($dir); }) { + # OK + } else { + # mkpath failed: Argument probably too long. + # Set $Global::max_file_length, which will keep the individual + # dir names shorter than the max length + max_file_name_length($opt::results); + $args_as_dirname = $self->{'commandline'}->args_as_dirname(); + # prefix/name1/val1/name2/val2/ + $dir = $opt::results."/".$args_as_dirname; + File::Path::mkpath($dir); + } + # prefix/name1/val1/name2/val2/stdout + $outname = "$dir/stdout"; + if(not open($outfhw, "+>", $outname)) { + ::error("Cannot write to `$outname'.\n"); + ::wait_and_exit(255); + } + # prefix/name1/val1/name2/val2/stderr + $errname = "$dir/stderr"; + if(not open($errfhw, "+>", $errname)) { + ::error("Cannot write to `$errname'.\n"); + ::wait_and_exit(255); + } + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",""); + } elsif(not $opt::ungroup) { + # To group we create temporary files for STDOUT and STDERR + # To avoid the cleanup unlink the files immediately (but keep them open) + if(@Global::tee_jobs) { + # files must be removed when the tee is done + } elsif($opt::files) { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + # --files => only remove stderr + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",$errname); + } else { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + } else { + # --ungroup + open($outfhw,">&",$Global::fd{1}) || die; + open($errfhw,">&",$Global::fd{2}) || die; + # File name must be empty as it will otherwise be printed + $outname = ""; + $errname = ""; + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + # Set writing FD + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'name',$outname); + $self->set_fh(2,'name',$errname); + if($opt::compress) { + $self->filter_through_compress(); + } elsif(not $opt::ungroup) { + $self->grouped(); + } + if($opt::linebuffer) { + $self->set_non_blocking(); + } } -sub stderr { +sub grouped { my $self = shift; - return $self->{'stderr'}; + # Set reading FD if using --group (--ungroup does not need) + for my $fdno (1,2) { + # Re-open the file for reading + # so fdw can be closed seperately + # and fdr can be seeked seperately (for --line-buffer) + open(my $fdr,"<", $self->fh($fdno,'name')) || + ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name')); + $self->set_fh($fdno,'r',$fdr); + # Unlink if required + $Global::debug or unlink $self->fh($fdno,"unlink"); + } } -sub set_stderr { - my $self = shift; - $self->{'stderr'} = shift; +sub empty_input_wrapper { + # If no input: exit(0) + # If some input: Pass input as input to command on STDIN + # This avoids starting the command if there is no input. + # Input: + # $command = command to pipe data to + # Returns: + # $wrapped_command = the wrapped command + my $command = shift; + my $script = '$c="'.::perl_quote_scalar($command).'";'. + ::spacefree(0,q{ + if(sysread(STDIN, $buf, 1)) { + open($fh, "|-", $c) || die; + syswrite($fh, $buf); + while($read = sysread(STDIN, $buf, 32768)) { + syswrite($fh, $buf); + } + close $fh; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + } + }); + ::debug("run",'Empty wrap: perl -e '.::shell_quote_scalar($script)."\n"); + return 'perl -e '.::shell_quote_scalar($script); } -sub stdin { +sub filter_through_compress { my $self = shift; - return $self->{'stdin'}; + # Send stdout to stdin for $opt::compress_program(1) + # Send stderr to stdin for $opt::compress_program(2) + # cattail get pid: $pid = $self->fh($fdno,'rpid'); + my $cattail = cattail(); + + for my $fdno (1,2) { + # Make a communication file. + my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac"); + close $fh; + # Compressor: (echo > $comfile; compress pipe) > output + # When the echo is written to $comfile, it is known that output file is opened, + # thus output file can then be removed by the decompressor. + my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".empty_input_wrapper($opt::compress_program).") >". + $self->fh($fdno,'name')) || die $?; + $self->set_fh($fdno,'w',$fdw); + $self->set_fh($fdno,'wpid',$wpid); + # Decompressor: open output; -s $comfile > 0: rm $comfile output; decompress output > stdout + my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile, + $opt::decompress_program, $wpid, + $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?; + $self->set_fh($fdno,'r',$fdr); + $self->set_fh($fdno,'rpid',$rpid); + } } -sub set_stdin { +sub set_non_blocking { my $self = shift; - my $stdin = shift; - # set non-blocking - fcntl($stdin, ::F_SETFL, ::O_NONBLOCK) or - ::die_bug("Couldn't set flags for HANDLE: $!"); - $self->{'stdin'} = $stdin; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + for my $fdno (1,2) { + my $fdr = $self->fh($fdno,'r'); + my $flags; + fcntl($fdr, &::F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= &::O_NONBLOCK; # Add non-blocking to the flags + fcntl($fdr, &::F_SETFL, $flags) || die $!; # Set the flags on the filehandle + } +} + +sub max_file_name_length { + # Figure out the max length of a subdir + # TODO and the max total length + # Ext4 = 255,130816 + my $testdir = shift; + + my $upper = 8_000_000; + my $len = 8; + my $dir = "x"x$len; + do { + rmdir($testdir."/".$dir); + $len *= 16; + $dir = "x"x$len; + } while ($len < $upper and mkdir $testdir."/".$dir); + # Then search for the actual max length between $len/16 and $len + my $min = $len/16; + my $max = $len; + while($max-$min > 5) { + # If we are within 5 chars of the exact value: + # it is not worth the extra time to find the exact value + my $test = int(($min+$max)/2); + $dir = "x"x$test; + if(mkdir $testdir."/".$dir) { + rmdir($testdir."/".$dir); + $min = $test; + } else { + $max = $test; + } + } + $Global::max_file_length = $min; + return $min; +} + +sub set_fh { + # Set file handle + my ($self, $fd_no, $key, $fh) = @_; + $self->{'fd'}{$fd_no,$key} = $fh; +} + +sub fh { + # Get file handle + my ($self, $fd_no, $key) = @_; + return $self->{'fd'}{$fd_no,$key}; } sub write { my $self = shift; my $remaining_ref = shift; - if(length($$remaining_ref)) { - $self->{'remaining'} .= $$remaining_ref; - $self->complete_write(); + my $stdin_fh = $self->fh(0,"w"); + + my $len = length $$remaining_ref; + # syswrite may not write all in one go, + # so make sure everything is written. + while($len) { + my $written = syswrite($stdin_fh,$$remaining_ref); + substr($$remaining_ref,0,$written) = ""; + $len -= $written; } } -sub complete_write { +sub set_stdin_buffer { + # Copy stdin buffer from $block_ref up to $endpos + # Prepend with $header_ref + # Remove $recstart and $recend if needed + # Input: + # $header_ref = ref to $header to prepend + # $block_ref = ref to $block to pass on + # $endpos = length of $block to pass on + # $recstart = --recstart regexp + # $recend = --recend regexp # Returns: - # number of bytes written (see syswrite) + # N/A my $self = shift; - my $in = $self->{'stdin'}; - my $len = syswrite($in,$self->{'remaining'}); - if (!defined($len) && $! == &::EAGAIN) { - # write would block; - } else { - # Remove the part that was written - substr($self->{'remaining'},0,$len) = ""; - $self->{'datawritten'} += $len; + my ($header_ref,$block_ref,$endpos,$recstart,$recend) = @_; + $self->{'stdin_buffer'} = ($self->virgin() ? $$header_ref : "").substr($$block_ref,0,$endpos); + if($opt::remove_rec_sep) { + remove_rec_sep(\$self->{'stdin_buffer'},$recstart,$recend); } - return $len; + $self->{'stdin_buffer_length'} = length $self->{'stdin_buffer'}; + $self->{'stdin_buffer_pos'} = 0; + $self->add_transfersize($self->{'stdin_buffer_length'}); } -sub remaining { +sub stdin_buffer_length { my $self = shift; - if(defined $self->{'remaining'}) { - return length $self->{'remaining'}; - } else { - return undef; - } + return $self->{'stdin_buffer_length'}; } -sub datawritten { +sub remove_rec_sep { + my ($block_ref,$recstart,$recend) = @_; + # Remove record separator + $$block_ref =~ s/$recend$recstart//gos; + $$block_ref =~ s/^$recstart//os; + $$block_ref =~ s/$recend$//os; +} + +sub non_block_write { my $self = shift; - return $self->{'datawritten'}; + my $something_written = 0; + use POSIX qw(:errno_h); + # for loop used to avoid copying substr: $buf will be an alias for the substr + for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) { + my $in = $self->fh(0,"w"); + my $rv = syswrite($in, $buf); + if (!defined($rv) && $! == EAGAIN) { + # would block + $something_written = 0; + } elsif ($self->{'stdin_buffer_pos'}+$rv != $self->{'stdin_buffer_length'}) { + # incomplete write + # Remove the written part + $self->{'stdin_buffer_pos'} += $rv; + $something_written = $rv; + } else { + # successfully wrote everything + my $a = ""; + $self->set_stdin_buffer(\$a,\$a,"",""); + $something_written = $rv; + } + } + + ::debug("pipe", "Non-block: ", $something_written); + return $something_written; +} + + +sub virgin { + my $self = shift; + return $self->{'virgin'}; +} + +sub set_virgin { + my $self = shift; + $self->{'virgin'} = shift; } sub pid { @@ -3091,92 +6012,122 @@ sub set_pid { } sub starttime { + # Returns: + # UNIX-timestamp this job started my $self = shift; - return $self->{'starttime'}; + return sprintf("%.3f",$self->{'starttime'}); } sub set_starttime { my $self = shift; - my $starttime = shift || time; + my $starttime = shift || ::now(); $self->{'starttime'} = $starttime; } sub runtime { + # Returns: + # Run time in seconds my $self = shift; - return $self->{'endtime'}-$self->{'starttime'}; + return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000); } sub endtime { + # Returns: + # UNIX-timestamp this job ended + # 0 if not ended yet my $self = shift; - return $self->{'endtime'}; + return ($self->{'endtime'} || 0); } sub set_endtime { my $self = shift; - my $endtime = shift || time; + my $endtime = shift; $self->{'endtime'} = $endtime; } - -sub set_timeout { +sub timedout { + # Is the job timedout? + # Input: + # $delta_time = time that the job may run + # Returns: + # True or false my $self = shift; my $delta_time = shift; - $self->{'timeout'} = time + $delta_time; -} - -sub timeout { - my $self = shift; - return $self->{'timeout'}; -} - -sub timedout { - my $self = shift; - return time > $self->{'timeout'}; + return time > $self->{'starttime'} + $delta_time; } sub kill { - # kill the jobs + # Kill the job. + # Send the signals to (grand)*children and pid. + # If no signals: TERM TERM KILL + # Wait 200 ms after each TERM. + # Input: + # @signals = signals to send my $self = shift; + my @signals = @_; my @family_pids = $self->family_pids(); + # Record this jobs as failed - $self->set_exitstatus(1); + $self->set_exitstatus(-1); # Send two TERMs to give time to clean up - for my $signal ("TERM", "TERM", "KILL") { + ::debug("run", "Kill seq ", $self->seq(), " signal '@signals'\n"); + my @send_signals = @signals || ("TERM", "TERM", "KILL"); + for my $signal (@send_signals) { my $alive = 0; for my $pid (@family_pids) { if(kill 0, $pid) { # The job still running kill $signal, $pid; $alive = 1; + ::debug("run","$pid is alive\n"); + } + } + # If a signal was given as input, do not do the sleep below + @signals and next; + + if($signal eq "TERM" and $alive) { + # Wait up to 200 ms between TERMs - but only if any pids are alive + my $sleep = 1; + for (my $sleepsum = 0; kill 0, $family_pids[0] and $sleepsum < 200; + $sleepsum += $sleep) { + $sleep = ::reap_usleep($sleep); } } - # Wait 200 ms between TERMs - but only if any pids are alive - if($signal eq "TERM" and $alive) { ::usleep(200); } } } sub family_pids { # Find the pids with this->pid as (grand)*parent - # TODO test this on different OS as 'ps' is known to be different + # Returns: + # @pids = pids of (grand)*children my $self = shift; my $pid = $self->pid(); - my $script = q{ - family_pids() { - for CHILDPID in `ps --ppid "$@" -o pid --no-headers`; do - family_pids $CHILDPID & - done - echo "$@" - } - } . - "family_pids $pid; wait"; - my @pids = qx{$script}; - chomp(@pids); - return ($pid,@pids); -} + my @pids; + my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table(); + + my @more = ($pid); + # While more (grand)*children + while(@more) { + my @m; + push @pids, @more; + for my $parent (@more) { + if($children_of_ref->{$parent}) { + # add the children of this parent + push @m, @{$children_of_ref->{$parent}}; + } + } + @more = @m; + } + return (@pids); +} sub failed { # return number of times failed for this $sshlogin + # Input: + # $sshlogin + # Returns: + # Number of times failed for $sshlogin my $self = shift; my $sshlogin = shift; return $self->{'failed'}{$sshlogin}; @@ -3184,6 +6135,8 @@ sub failed { sub failed_here { # return number of times failed for the current $sshlogin + # Returns: + # Number of times failed for this sshlogin my $self = shift; return $self->{'failed'}{$self->sshlogin()}; } @@ -3220,21 +6173,147 @@ sub min_failed { # the minimal number of times this command has failed my $self = shift; my $min_failures = - ::min(map { $self->{'failed'}{$_} } - keys %{$self->{'failed'}}); + ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; return ($number_of_sshlogins_failed_on,$min_failures); } sub total_failed { # Returns: - # the number of times this command has failed + # $total_failures = the number of times this command has failed my $self = shift; my $total_failures = 0; for (values %{$self->{'failed'}}) { $total_failures += $_; } - return ($total_failures); + return $total_failures; +} + +{ + my $script; + + sub postpone_exit_and_cleanup { + # Command to remove files and dirs (given as args) without + # affecting the exit value in $?/$status. + if(not $script) { + $script = "perl -e '". + ::spacefree(0,q{ + $bash=shift; + $csh=shift; + for(@ARGV){ + unlink; + rmdir; + } + if($bash=~s/h//) { + exit $bash; + } + exit $csh; + }). + "' ".'"$?h" "$status" '; + } + return $script + } +} + +sub wrapped { + # Wrap command with: + # * --shellquote + # * --nice + # * --cat + # * --fifo + # * --sshlogin + # * --pipepart (@Global::cat_partials) + # * --pipe + # * --tmux + # The ordering of the wrapping is important: + # * --nice/--cat/--fifo should be done on the remote machine + # * --pipepart/--pipe should be done on the local machine inside --tmux + # Uses: + # $Global::envvar + # $opt::shellquote + # $opt::nice + # $Global::shell + # $opt::cat + # $opt::fifo + # @Global::cat_partials + # $opt::pipe + # $opt::tmux + # Returns: + # $self->{'wrapped'} = the command wrapped with the above + my $self = shift; + if(not defined $self->{'wrapped'}) { + my $command = $self->replaced(); + if($opt::shellquote) { + # Prepend echo + # and quote twice + $command = "echo " . + ::shell_quote_scalar(::shell_quote_scalar($command)); + } + if($opt::nice) { + # Prepend \nice -n19 $SHELL -c + # and quote. + # The '\' before nice is needed to avoid tcsh's built-in + my $sshlogin = $self->sshlogin(); + my $serverlogin = $sshlogin->serverlogin(); + if($serverlogin eq ":") { + # Local use $Global::shell + $command = '\nice'. " -n". $opt::nice. " ". + $Global::shell. " -c ". + ::shell_quote_scalar($command); + } else { + # Remote systems use $SHELL + $command = '\nice'. " -n". $opt::nice. + ' $SHELL -c '. + ::shell_quote_scalar($command); + } + } + if($opt::cat) { +# Append 'unlink {} without affecting $?' + $command = + $self->{'commandline'}->replace_placeholders(["cat > \257<\257>; "], 0, 0). + $command.";". postpone_exit_and_cleanup(). + '$PARALLEL_TMP'; + } elsif($opt::fifo) { + # Prepend 'mkfifo {}; (' + # Append ') & _PID=$!; cat > {}; wait $_PID; ' + # (This makes it fail in csh, but give the correct exit code in bash) + # Append 'unlink {} without affecting $?' + # Set $ENV{PARALLEL_TMP} when starting a job + # Set $ENV{PARALLEL_TMP} in the remote wrapper + # mkfifo $PARALLEL_TMP; + # {} = $PARALLEL_TMP; + # (...) & + # cat > $PARALLEL_TMP; wait \$_PID; cleanup $PARALLEL_TMP + # perl -e 'open($fifo,">",shift); while(read){print FIFO};unlink $fifo;waitpid($pid,0);exit $?' $! $PARALLEL_FIFO + $command = + "mkfifo \$PARALLEL_TMP\n (". + $command.";". + ') & _PID=$!; cat > $PARALLEL_TMP; wait $_PID; '. + postpone_exit_and_cleanup(). + '$PARALLEL_TMP'; + } + # Wrap with ssh + tranferring of files + $command = $self->sshlogin_wrap($command); + if(@Global::cat_partials) { + # Prepend: + # < /tmp/foo perl -e 'while(@ARGV) { + # sysseek(STDIN,shift,0) || die; $left = shift; + # while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ + # $left -= $read; syswrite(STDOUT,$buf); + # } + # }' 0 0 0 11 | + $command = (shift @Global::cat_partials). " | ($command)"; + } elsif($opt::pipe) { + # Wrap with EOF-detector to avoid starting $command if EOF. + $command = empty_input_wrapper($command); + } + if($opt::tmux) { + # Wrap command with 'tmux' + $command = $self->tmux_wrap($command); + } + $self->{'wrapped'} = $command; + } + return $self->{'wrapped'}; } sub set_sshlogin { @@ -3242,6 +6321,7 @@ sub set_sshlogin { my $sshlogin = shift; $self->{'sshlogin'} = $sshlogin; delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong + delete $self->{'wrapped'}; } sub sshlogin { @@ -3249,18 +6329,258 @@ sub sshlogin { return $self->{'sshlogin'}; } +sub string_zip_base64 { + # Pipe string through 'bzip2 -9' and base64 encode it into 1000 + # byte blocks. + # 1000 bytes is the largest word size csh supports + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + my($zipin_fh, $zipout_fh,@base64); + ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9"); + if(fork) { + close $zipin_fh; + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + # Split base64 encoded into 1000 byte blocks + @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),"")); + close $zipout_fh; + } else { + close $zipout_fh; + print $zipin_fh @_; + close $zipin_fh; + exit; + } + ::debug("base64","Orig:@_\nAs base64:@base64\n"); + return @base64; +} + +sub base64_zip_eval { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * pipes through 'bzip2 -dc' + # * evals the result + # Reverse of string_zip_base64 + eval + # Will be wrapped in ' so single quote is forbidden + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); + eval "@GNU_Parallel"; + + $SIG{CHLD}="IGNORE"; + # Search for bzip2. Not found => use default path + my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2"; + # $in = stdin on $zip, $out = stdout from $zip + my($in, $out,$eval); + open3($in,$out,">&STDERR",$zip,"-dc"); + if(my $perlpid = fork) { + close $in; + $eval = join "", <$out>; + close $out; + } else { + close $out; + # Pipe decoded base64 into 'bzip2 -dc' + print $in (decode_base64(join"",@ARGV)); + close $in; + exit; + } + wait; + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + sub sshlogin_wrap { # Wrap the command with the commands needed to run remotely + # Input: + # $command = command to run + # Returns: + # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands + sub monitor_parent_sshd_script { + # This script is to solve the problem of + # * not mixing STDERR and STDOUT + # * terminating with ctrl-c + # If its parent is ssh: all good + # If its parent is init(1): ssh died, so kill children + my $monitor_parent_sshd_script; + + if(not $monitor_parent_sshd_script) { + $monitor_parent_sshd_script = + # This will be packed in ', so only use " + ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'. + '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'. + q{ + # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR + do { + $ENV{PARALLEL_TMP} = $tmpdir."/par". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $ENV{PARALLEL_TMP}); + $SIG{CHLD} = sub { $done = 1; }; + $pid = fork; + unless($pid) { + # Make own process group to be able to kill HUP it later + setpgrp; + exec $shell, "-c", ($bashfunc."@ARGV"); + die "exec: $!\n"; + } + do { + # Parent is not init (ppid=1), so sshd is alive + # Exponential sleep up to 1 sec + $s = $s < 1 ? 0.001 + $s * 1.03 : $s; + select(undef, undef, undef, $s); + } until ($done || getppid == 1); + # Kill HUP the process group if job not done + kill(SIGHUP, -${pid}) unless $done; + wait; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + }); + } + return $monitor_parent_sshd_script; + } + + sub vars_to_export { + # Uses: + # @opt::env + my @vars = ("parallel_bash_environment"); + for my $varstring (@opt::env) { + # Split up --env VAR1,VAR2 + push @vars, split /,/, $varstring; + } + for (@vars) { + if(-r $_ and not -d) { + # Read as environment definition bug #44041 + # TODO parse this + my $fh = ::open_or_exit($_); + $Global::envdef = join("",<$fh>); + close $fh; + } + } + if(grep { /^_$/ } @vars) { + # --env _ + # Include all vars that are not in a clean environment + if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) { + my @ignore = <$vars_fh>; + chomp @ignore; + my %ignore; + @ignore{@ignore} = @ignore; + close $vars_fh; + push @vars, grep { not defined $ignore{$_} } keys %ENV; + @vars = grep { not /^_$/ } @vars; + } else { + ::error("Run '$Global::progname --record-env' in a clean environment first.\n"); + ::wait_and_exit(255); + } + } + # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2) + # So --env myfunc should also look for BASH_FUNC_myfunc() + push(@vars, "PARALLEL_PID", "PARALLEL_SEQ", + map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars); + # Keep only defined variables + return grep { defined($ENV{$_}) } @vars; + } + + sub env_as_eval { + # Returns: + # $eval = '$ENV{"..."}=...; ...' + my @vars = vars_to_export(); + my $csh_friendly = not grep { /\n/ } @ENV{@vars}; + my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; + my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars; + # eval of @envset will set %ENV + my $envset = join"", map { + '$ENV{"'.::perl_quote_scalar($_).'"}="'. ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions; + + # running @bashfunc on the command line, will set the functions + my @bashfunc = map { + my $v=$_; s/BASH_FUNC_(.*)(\(\)|%%)/$1/; "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions; + # eval $bashfuncset will set $bashfunc + my $bashfuncset; + if(@bashfunc) { + # Functions are not supported for all shells + if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) { + ::warning("Shell functions may not be supported in $Global::shell\n"); + } + $bashfuncset = + '@bash_functions=qw('."@bash_functions".");". + ::spacefree(1,q{ + if($ENV{"SHELL"}=~/csh/) { + print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n"; + exec "false"; + } + }). + "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";'; + } else { + $bashfuncset = '$bashfunc = "";' + } + if($ENV{"parallel_bash_environment"}) { + $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";'; + } + ::debug("base64",$envset,$bashfuncset,"\n"); + return $csh_friendly,$envset,$bashfuncset; + } + my $self = shift; + my $command = shift; + # TODO test that *sh -c 'parallel --env' use *sh if(not defined $self->{'sshlogin_wrap'}) { my $sshlogin = $self->sshlogin(); - my $sshcmd = $sshlogin->sshcommand(); my $serverlogin = $sshlogin->serverlogin(); - my $next_command_line = $self->replaced(); - my ($pre,$post,$cleanup)=("","",""); + my $quoted_remote_command; + $ENV{'PARALLEL_SEQ'} = $self->seq(); + $ENV{'PARALLEL_PID'} = $$; if($serverlogin eq ":") { - $self->{'sshlogin_wrap'} = $next_command_line; + if(@opt::env) { + # Prepend with environment setter, which sets functions in zsh + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $env_command = $envset.$bashfuncset. + '@ARGV="'.::perl_quote_scalar($command).'";'. + "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;"; + if(length $env_command > 999 + or + not $csh_friendly + or + $command =~ /\n/) { + # csh does not deal well with > 1000 chars in one word + # csh does not deal well with $ENV with \n + $env_command = "perl -e '".base64_zip_eval()."' ". + join" ",string_zip_base64($env_command); + $self->{'sshlogin_wrap'} = $env_command; + } else { + $self->{'sshlogin_wrap'} = "perl -e ".::shell_quote_scalar($env_command); + } + } else { + $self->{'sshlogin_wrap'} = $command; + } } else { + my $pwd = ""; + if($opt::workdir) { + # Create remote workdir if needed. Then cd to it. + my $wd = $self->workdir(); + $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}. + qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;}; + } + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $remote_command = $pwd.$envset.$bashfuncset. + '@ARGV="'.::perl_quote_scalar($command).'";'. monitor_parent_sshd_script(); + $quoted_remote_command = "perl -e ".::shell_quote_scalar($remote_command); + if(length $quoted_remote_command > 999 + or + not $csh_friendly + or + $command =~ /\n/) { + # csh does not deal well with > 1000 chars in one word + # csh does not deal well with $ENV with \n + $quoted_remote_command = "perl -e \\''".base64_zip_eval()."'\\' ". + join" ",string_zip_base64($remote_command); + } else { + $quoted_remote_command = ::shell_quote_scalar($quoted_remote_command); + } + + my $sshcmd = $sshlogin->sshcommand(); + my ($pre,$post,$cleanup)=("","",""); # --transfer $pre .= $self->sshtransfer(); # --return @@ -3271,24 +6591,12 @@ sub sshlogin_wrap { # We need to save the exit status of the job $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;'; } - # If the remote login shell is (t)csh then use 'setenv' - # otherwise use 'export' - my $parallel_env = - q{'eval `echo $SHELL | grep -E "/(t)?csh" > /dev/null} - . q{ && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\;} - . q{ setenv PARALLEL_PID '$PARALLEL_PID'} - . q{ || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\;} - . q{PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;'}; - if($::opt_workdir) { - $self->{'sshlogin_wrap'} = - ($pre . "$sshcmd $serverlogin $parallel_env " - . ::shell_quote_scalar("cd ".$self->workdir()." && ") - . ::shell_quote_scalar($next_command_line).";".$post); - } else { - $self->{'sshlogin_wrap'} = - ($pre . "$sshcmd $serverlogin $parallel_env " - . ::shell_quote_scalar($next_command_line).";".$post); - } + $self->{'sshlogin_wrap'} = + ($pre + . "$sshcmd $serverlogin exec " + . $quoted_remote_command + . ";" + . $post); } } return $self->{'sshlogin_wrap'}; @@ -3296,10 +6604,12 @@ sub sshlogin_wrap { sub transfer { # Files to transfer + # Returns: + # @transfer - File names of files to transfer my $self = shift; my @transfer = (); $self->{'transfersize'} = 0; - if($::opt_transfer) { + if($opt::transfer) { for my $record (@{$self->{'commandline'}{'arg_list'}}) { # Merge arguments from records into args for my $arg (@$record) { @@ -3319,55 +6629,39 @@ sub transfersize { return $self->{'transfersize'}; } -sub sshtransfer { +sub add_transfersize { my $self = shift; + my $transfersize = shift; + $self->{'transfersize'} += $transfersize; +} + +sub sshtransfer { + # Returns for each transfer file: + # rsync $file remote:$workdir + my $self = shift; + my @pre; my $sshlogin = $self->sshlogin(); - my $sshcmd = $sshlogin->sshcommand(); - my $serverlogin = $sshlogin->serverlogin(); - my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd); - my $pre = ""; + my $workdir = $self->workdir(); for my $file ($self->transfer()) { - $file =~ s:/\./:/:g; # Rsync treats /./ special. We dont want that - $file =~ s:^\./::g; # Remove ./ if any - my $relpath = ($file !~ m:^/:); # Is the path relative? - # Use different subdirs depending on abs or rel path - # Abs path: rsync -rlDzR /home/tange/dir/subdir/file.gz server:/ - # Rel path: rsync -rlDzR ./subdir/file.gz server:.parallel/tmp/tempid/ - # Rel path: rsync -rlDzR ./subdir/file.gz server:$workdir/ - my $remote_workdir = $self->workdir($file); - my $rsync_destdir = ($relpath ? $remote_workdir : "/"); - if($relpath) { - $file = "./".$file; - } - if(-r $file) { - my $mkremote_workdir = - $remote_workdir eq "." ? "true" : - "ssh $serverlogin mkdir -p $rsync_destdir"; - $pre .= "$mkremote_workdir; rsync $rsync_opt " - . ::shell_quote_scalar($file)." $serverlogin:$rsync_destdir;"; - } else { - print $Global::original_stderr - "parallel: Warning: " - . $file . " is not readable and will not be transferred\n"; - } + push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; } - return $pre; + return join("",@pre); } sub return { # Files to return - # Quoted and with {...} substituted + # Non-quoted and with {...} substituted + # Returns: + # @non_quoted_filenames my $self = shift; - my @return = (); - for my $return (@{$self->{'commandline'}{'return_files'}}) { - CORE::push @return, - $self->{'commandline'}->replace_placeholders($return,1); - } - return @return; + return $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'return_files'},0,0); } sub returnsize { # This is called after the job has finished + # Returns: + # $number_of_bytes transferred in return my $self = shift; for my $file ($self->return()) { if(-e $file) { @@ -3377,7 +6671,15 @@ sub returnsize { return $self->{'returnsize'}; } +sub add_returnsize { + my $self = shift; + my $returnsize = shift; + $self->{'returnsize'} += $returnsize; +} + sub sshreturn { + # Returns for each return-file: + # rsync remote:$workdir/$file . my $self = shift; my $sshlogin = $self->sshlogin(); my $sshcmd = $sshlogin->sshcommand(); @@ -3385,24 +6687,32 @@ sub sshreturn { my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd); my $pre = ""; for my $file ($self->return()) { - $file =~ s:/\./:/:g; # Rsync treats /./ special. We dont want that $file =~ s:^\./::g; # Remove ./ if any my $relpath = ($file !~ m:^/:); # Is the path relative? - # Use different subdirs depending on abs or rel path - - # Return or cleanup - my @cmd = (); - my $rsync_destdir = ($relpath ? "./" : "/"); - my $ret_file = $file; - my $remove = $::opt_cleanup ? "--remove-source-files" : ""; - # If relative path: prepend workdir/./ to avoid problems - # if the dir contains ':' and to get the right relative return path - my $replaced = ($relpath ? $self->workdir()."/./" : "") . $file; + my $cd = ""; + my $wd = ""; + if($relpath) { + # rsync -avR /foo/./bar/baz.c remote:/tmp/ + # == (on old systems) + # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ + $wd = ::shell_quote_file($self->workdir()."/"); + } + # Only load File::Basename if actually needed + $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; + # dir/./file means relative to dir, so remove dir on remote + $file =~ m:(.*)/\./:; + my $basedir = $1 ? ::shell_quote_file($1."/") : ""; + my $nobasedir = $file; + $nobasedir =~ s:.*/\./::; + $cd = ::shell_quote_file(::dirname($nobasedir)); + my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync"); + my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file))); # --return - # Abs path: rsync -rlDzR server:/home/tange/dir/subdir/file.gz / - # Rel path: rsync -rlDzR server:./subsir/file.gz ./ - $pre .= "rsync $rsync_opt $remove $serverlogin:". - ::shell_quote_scalar($replaced) . " ".$rsync_destdir.";"; + # mkdir -p /home/tange/dir/subdir/; + # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync" + # server:file.gz /home/tange/dir/subdir/ + $pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:". + $basename . " ".$basedir.$cd.";"; } return $pre; } @@ -3416,21 +6726,14 @@ sub sshcleanup { my $sshcmd = $sshlogin->sshcommand(); my $serverlogin = $sshlogin->serverlogin(); my $workdir = $self->workdir(); - my $removeworkdir = ""; my $cleancmd = ""; for my $file ($self->cleanup()) { my @subworkdirs = parentdirs_of($file); - $file = ::shell_quote_scalar($file); - if(@subworkdirs) { - $removeworkdir = "; rmdir 2>/dev/null ". - join(" ",map { ::shell_quote_scalar($workdir."/".$_) } - @subworkdirs); - } - my $relpath = ($file !~ m:^/:); # Is the path relative? - my $cleandir = ($relpath ? $workdir."/" : ""); - $cleancmd .= "$sshcmd $serverlogin rm -f " - . ::shell_quote_scalar($cleandir.$file.$removeworkdir).";"; + $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; + } + if(defined $opt::workdir and $opt::workdir eq "...") { + $cleancmd .= "$sshcmd $serverlogin rm -rf " . ::shell_quote_scalar($workdir).';'; } return $cleancmd; } @@ -3439,9 +6742,10 @@ sub cleanup { # Returns: # Files to remove at cleanup my $self = shift; - if($::opt_cleanup) { + if($opt::cleanup) { my @transfer = $self->transfer(); - return @transfer; + my @return = $self->return(); + return (@transfer,@return); } else { return (); } @@ -3453,13 +6757,13 @@ sub workdir { my $self = shift; if(not defined $self->{'workdir'}) { my $workdir; - if(defined $::opt_workdir) { - if($::opt_workdir eq ".") { + if(defined $opt::workdir) { + if($opt::workdir eq ".") { # . means current dir my $home = $ENV{'HOME'}; eval 'use Cwd'; my $cwd = cwd(); - $::opt_workdir = $cwd; + $workdir = $cwd; if($home) { # If homedir exists: remove the homedir from # workdir if cwd starts with homedir @@ -3475,16 +6779,19 @@ sub workdir { my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; if($parent_dev == $home_dev and $parent_ino == $home_ino) { # dev and ino is the same: We found the homedir. - $::opt_workdir = join("/",@dir_parts); + $workdir = join("/",@dir_parts); last; } } } - } elsif($::opt_workdir eq "...") { + if($workdir eq "") { + $workdir = "."; + } + } elsif($opt::workdir eq "...") { $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ . "-" . $self->seq(); } else { - $workdir = $::opt_workdir; + $workdir = $opt::workdir; # Rsync treats /./ special. We dont want that $workdir =~ s:/\./:/:g; # Remove /./ $workdir =~ s:/+$::; # Remove ending / if any @@ -3493,7 +6800,7 @@ sub workdir { } else { $workdir = "."; } - $self->{'workdir'} = $workdir; + $self->{'workdir'} = ::shell_quote_scalar($workdir); } return $self->{'workdir'}; } @@ -3516,93 +6823,218 @@ sub start { # Returns: # job-object or undef if job not to run my $job = shift; - my $command = $job->sshlogin_wrap(); + # Get the shell command to be executed (possibly with ssh infront). + my $command = $job->wrapped(); if($Global::interactive or $Global::stderr_verbose) { - if($Global::interactive) { - print $Global::original_stderr "$command ?..."; - open(TTY,"/dev/tty") || ::die_bug("interactive-tty"); - my $answer = ; - close TTY; - my $run_yes = ($answer =~ /^\s*y/i); - if (not $run_yes) { - $command = "true"; # Run the command 'true' - } - } else { - print $Global::original_stderr "$command\n"; - } + $command = interactive_start($command); } - - local (*IN,*OUT,*ERR); my $pid; - if($Global::grouped) { - my ($outfh,$errfh,$name); - # To group we create temporary files for STDOUT and STDERR - # To avoid the cleanup unlink the files immediately (but keep them open) - ($outfh,$name) = ::tempfile(SUFFIX => ".par"); - $job->set_stdoutfilename($name); - $::opt_files or unlink $name; - ($errfh,$name) = ::tempfile(SUFFIX => ".par"); - unlink $name; + $job->openoutputfiles(); + my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); + local (*IN,*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't redirect STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDOUT: $!"); - open OUT, '>&', $outfh or ::die_bug("Can't redirect STDOUT: $!"); - open ERR, '>&', $errfh or ::die_bug("Can't dup STDOUT: $!"); - $job->set_stdout($outfh); - $job->set_stderr($errfh); - } else { - (*OUT,*ERR)=(*STDOUT,*STDERR); + if($opt::ungroup) { + print_dryrun_and_verbose($stdout_fh,$job,$command); } - - if(($::opt_dryrun or $Global::verbose) and not $Global::grouped) { - if($Global::verbose <= 1) { - print OUT $job->replaced(),"\n"; - } else { - # Verbose level > 1: Print the rsync and stuff - print OUT $command,"\n"; - } - } - if($::opt_dryrun) { + if($opt::dryrun) { $command = "true"; } - $Global::total_running++; - $Global::total_started++; $ENV{'PARALLEL_SEQ'} = $job->seq(); $ENV{'PARALLEL_PID'} = $$; - ::debug("$Global::total_running processes. Starting (" - . $job->seq() . "): $command\n"); - if($::opt_pipe) { - my ($in); - $pid = ::open3($in, ">&OUT", ">&ERR", $ENV{SHELL}, "-c", $command) || - ::die_bug("open3-pipe"); - $job->set_stdin($in); - } elsif(@::opt_a and not $Global::stdin_in_opt_a and $job->seq() == 1) { - # Give STDIN to the first job if using -a + $ENV{'PARALLEL_TMP'} = ::tmpname("par"); + ::debug("run", $Global::total_running, " processes . Starting (", + $job->seq(), "): $command\n"); + if($opt::pipe) { + my ($stdin_fh); + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-pipe"); + 1; + }; + $job->set_fh(0,"w",$stdin_fh); + } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1 + and $job->sshlogin()->string() eq ":") { + # Give STDIN to the first job if using -a (but only if running + # locally - otherwise CTRL-C does not work for other jobs Bug#36585) *IN = *STDIN; - $pid = ::open3("<&IN", ">&OUT", ">&ERR", $ENV{SHELL}, "-c", $command) || - ::die_bug("open3-a"); + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-a"); + 1; + }; # Re-open to avoid complaining - open STDIN, "<&", $Global::original_stdin + open(STDIN, "<&", $Global::original_stdin) or ::die_bug("dup-\$Global::original_stdin: $!"); - } elsif ($::opt_tty and not $Global::tty_taken and -c "/dev/tty" and - open(DEVTTY, "/dev/tty")) { + } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and + open(my $devtty_fh, "<", "/dev/tty")) { # Give /dev/tty to the command if no one else is using it - *IN = *DEVTTY; - $pid = ::open3("<&IN", ">&OUT", ">&ERR", $ENV{SHELL}, "-c", $command) || - ::die_bug("open3-/dev/tty"); - $Global::tty_taken = $pid; - close DEVTTY; + *IN = $devtty_fh; + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-/dev/tty"); + $Global::tty_taken = $pid; + close $devtty_fh; + 1; + }; } else { - $pid = ::open3(::gensym, ">&OUT", ">&ERR", $ENV{SHELL}, "-c", $command) || - ::die_bug("open3-gensym"); + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3(::gensym, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-gensym"); + 1; + }; } - $job->set_pid($pid); - $job->set_starttime(); - if($::opt_timeout) { - # Timeout must be set before inserting into queue - $job->set_timeout($::opt_timeout); - $Global::timeoutq->insert($job); + if($pid) { + # A job was started + $Global::total_running++; + $Global::total_started++; + $job->set_pid($pid); + $job->set_starttime(); + $Global::running{$job->pid()} = $job; + if($opt::timeout) { + $Global::timeoutq->insert($job); + } + $Global::newest_job = $job; + $Global::newest_starttime = ::now(); + return $job; + } else { + # No more processes + ::debug("run", "Cannot spawn more jobs.\n"); + return undef; } - return $job; +} + +sub interactive_start { + my $command = shift; + if($Global::interactive) { + ::status("$command ?..."); + open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty"); + my $answer = <$tty_fh>; + close $tty_fh; + my $run_yes = ($answer =~ /^\s*y/i); + if (not $run_yes) { + $command = "true"; # Run the command 'true' + } + } else { + print $Global::original_stderr "$command\n"; + } + return $command; +} + +sub print_dryrun_and_verbose { + # For $opt::ungroup we print these ASAP + # For $opt::group they are part of print() + my $stdout_fh = shift; + my $job = shift; + my $command = shift; + if($opt::dryrun or $Global::verbose) { + if($Global::verbose <= 1) { + print $stdout_fh $job->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print $stdout_fh $command,"\n"; + } + } +} + +{ + my $tmuxsocket; + + sub tmux_wrap { + # Wrap command with tmux for session pPID + # Input: + # $actual_command = the actual command being run (incl ssh wrap) + my $self = shift; + my $actual_command = shift; + # Temporary file name. Used for fifo to communicate exit val + my $tmpfifo=::tmpname("tmx"); + + if(length($tmpfifo) >=100) { + ::error("tmux does not support sockets with path > 100\n"); + ::wait_and_exit(255); + } + my $visual_command = $self->replaced(); + my $title = $visual_command; + if($visual_command =~ /\0/) { + ::error("Command line contains NUL. tmux is confused by NUL.\n"); + ::wait_and_exit(255); + } + # ; causes problems + # ascii 194-245 annoys tmux + $title =~ tr/[\011-\016;\302-\365]//d; + $title = ::shell_quote_scalar($title); + + my $l_act = length($actual_command); + my $l_tit = length($title); + my $l_fifo = length($tmpfifo); + # The line to run contains a 118 chars extra code + the title 2x + my $l_tot = 2 * $l_tit + $l_act + $l_fifo; + + while($l_tit < 1000 and + ( + (890 < $l_tot and $l_tot < 1350) + or + (9250 < $l_tot and $l_tot < 9800) + )) { + # tmux blocks for certain lengths: + # 900 < title + command < 1200 + # 9250 < title + command < 9800 + # but only if title < 1000, so expand the title with 75 spaces + # The measured lengths are: + # 996 < (title + whole command) < 1127 + # 9331 < (title + whole command) < 9636 + $title = $title.('\ 'x75); + $l_tit = length($title); + $l_tot = 2 * $l_tit + $l_act + $l_fifo; + } + + my $tmux; + $ENV{'TMUX'} ||= "tmux"; + if(not $tmuxsocket) { + $tmuxsocket = ::tmpname("tms"); + ::status("See output with: $ENV{'TMUX'} -S $tmuxsocket attach\n"); + } + # TODO sh -c wrapper for >& + $tmux = $ENV{'TMUX'}." -S $tmuxsocket new-session -s p$$ -d 'sleep .2' >&/dev/null;" . + $ENV{'TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title"; + + ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ", + $Limits::Command::line_max_len, " tot ", + $l_tot, "\n"); + + return "mkfifo $tmpfifo && $tmux ". + # Run in tmux + ::shell_quote_scalar + ( + "(".$actual_command.');'. + # The triple print is needed - otherwise the testsuite fails + q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&". + "echo $title; echo \007Job finished at: `date`;sleep 10" + ). + # Run outside tmux + # Read a / separated line: 0h/2 for csh, 2/0 for bash. + # If csh the first will be 0h, so use the second as exit value. + # Otherwise just use the first value as exit value. + q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo; + } +} + +sub is_already_in_results { + # Do we already have results for this job? + # Returns: + # $job_already_run = bool whether there is output for this or not + my $job = $_[0]; + my $args_as_dirname = $job->{'commandline'}->args_as_dirname(); + # prefix/name1/val1/name2/val2/ + my $dir = $opt::results."/".$args_as_dirname; + ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n"); + return -e "$dir/stdout"; } sub is_already_in_joblog { @@ -3621,7 +7053,7 @@ sub should_be_retried { # 0 - do not retry # 1 - job queued for retry my $self = shift; - if (not $::opt_retries) { + if (not $opt::retries) { return 0; } if(not $self->exitstatus()) { @@ -3631,15 +7063,41 @@ sub should_be_retried { } else { # The job failed. Should it be retried? $self->add_failed_here(); - if($self->total_failed() == $::opt_retries) { + if($self->total_failed() == $opt::retries) { # This has been retried enough return 0; } else { # This command should be retried + $self->set_endtime(undef); + $self->reset_exitstatus(); $Global::JobQueue->unget($self); - ::debug("Retry ".$self->seq()."\n"); + ::debug("run", "Retry ", $self->seq(), "\n"); return 1; } +} +} + +{ + my (%print_later,$job_end_sequence); + + sub print_earlier_jobs { + # Print jobs completed earlier + # Returns: N/A + my $job = shift; + $print_later{$job->seq()} = $job; + $job_end_sequence ||= 1; + ::debug("run", "Looking for: $job_end_sequence ", + "Current: ", $job->seq(), "\n"); + for(my $j = $print_later{$job_end_sequence}; + $j or vec($Global::job_already_run,$job_end_sequence,1); + $job_end_sequence++, + $j = $print_later{$job_end_sequence}) { + ::debug("run", "Found job end $job_end_sequence"); + if($j) { + $j->print(); + delete $print_later{$job_end_sequence}; + } + } } } @@ -3648,95 +7106,303 @@ sub print { # Returns: N/A my $self = shift; - ::debug(">>joboutput ".$self->replaced()."\n"); - # Only relevant for grouping - $Global::grouped or return; - my $out = $self->stdout(); - my $err = $self->stderr(); - my $command = $self->sshlogin_wrap(); - - if($Global::joblog) { - my $cmd; - if($Global::verbose <= 1) { - $cmd = $self->replaced(); - } else { - # Verbose level > 1: Print the rsync and stuff - $cmd = $command; - } - printf $Global::joblog - join("\t", $self->seq(), $self->sshlogin()->string(), - $self->starttime(), $self->runtime(), - $self->transfersize(), $self->returnsize(), - $self->exitstatus(), $self->exitsignal(), $cmd - ). "\n"; - flush $Global::joblog; - $self->set_job_in_joblog(); + ::debug("print", ">>joboutput ", $self->replaced(), "\n"); + if($opt::dryrun) { + # Nothing was printed to this job: + # cleanup tmp files if --files was set + unlink $self->fh(1,"name"); } - - if(($::opt_dryrun or $Global::verbose) and $Global::grouped) { - if($Global::verbose <= 1) { - print STDOUT $self->replaced(),"\n"; - } else { - # Verbose level > 1: Print the rsync and stuff - print STDOUT $command,"\n"; - } - # If STDOUT and STDERR are merged, - # we want the command to be printed first - # so flush to avoid STDOUT being buffered - flush STDOUT; - } - seek $err, 0, 0; - if($Global::debug) { - print STDERR "ERR:\n"; - } - if($::opt_tag or defined $::opt_tagstring) { - my $tag = $self->tag(); - while(<$err>) { - print STDERR $tag,$_; - } + if($opt::pipe and $self->virgin()) { + # Skip --joblog, --dryrun, --verbose } else { - my $buf; - while(sysread($err,$buf,1000_000)) { - print STDERR $buf; + if($opt::ungroup and $Global::joblog and defined $self->{'exitstatus'}) { + # Add to joblog when finished + $self->print_joblog(); + # Printing is only relevant for grouped/--line-buffer output. + $opt::ungroup and return; + } + + # Check for disk full + ::exit_if_disk_full(); + + if(($opt::dryrun or $Global::verbose) + and + not $self->{'verbose_printed'}) { + $self->{'verbose_printed'}++; + if($Global::verbose <= 1) { + print STDOUT $self->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print STDOUT $self->wrapped(),"\n"; + } + # If STDOUT and STDERR are merged, + # we want the command to be printed first + # so flush to avoid STDOUT being buffered + flush STDOUT; } } - flush STDERR; - - if($::opt_files) { - print STDOUT $self->{'stdoutfilename'},"\n"; - } else { - my $buf; - seek $out, 0, 0; - if($Global::debug) { - print STDOUT "OUT:\n"; + for my $fdno (sort { $a <=> $b } keys %Global::fd) { + # Sort by file descriptor numerically: 1,2,3,..,9,10,11 + $fdno == 0 and next; + my $out_fd = $Global::fd{$fdno}; + my $in_fh = $self->fh($fdno,"r"); + if(not $in_fh) { + if(not $Job::file_descriptor_warning_printed{$fdno}++) { + # ::warning("File descriptor $fdno not defined\n"); + } + next; } - if($::opt_tag or defined $::opt_tagstring) { + ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n"); + if($opt::files) { + $self->files_print($fdno,$in_fh,$out_fd); + } elsif($opt::linebuffer) { + # Line buffered print out + $self->linebuffer_print($fdno,$in_fh,$out_fd); + } elsif($opt::tag or defined $opt::tagstring) { + $self->tag_print($fdno,$in_fh,$out_fd); + } else { + $self->normal_print($fdno,$in_fh,$out_fd); + } + flush $out_fd; + } + ::debug("print", "<{'exitstatus'} + and not ($self->virgin() and $opt::pipe)) { + # Add to joblog when finished + $self->print_joblog(); + } +} + +sub files_print { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed.\n"); + $self->set_exitstatus(255); + } + if($opt::compress) { + # Kill the decompressor which will not be needed + CORE::kill "TERM", $self->fh($fdno,"rpid"); + } + close $in_fh; + + if($opt::pipe and $self->virgin()) { + # Nothing was printed to this job: + # cleanup unused tmp files if --files was set + for my $fdno (1,2) { + unlink $self->fh($fdno,"name"); + unlink $self->fh($fdno,"unlink"); + } + } elsif($fdno == 1 and $self->fh($fdno,"name")) { + print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n"; + $self->add_returnsize(-s $self->fh($fdno,"name")); + } +} + +sub linebuffer_print { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + my $partial = \$self->{'partial_line',$fdno}; + + if(defined $self->{'exitstatus'}) { + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed.\n"); + $self->set_exitstatus(255); + } + if($opt::compress) { + # Blocked reading in final round + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + for my $fdno (1,2) { + my $fdr = $self->fh($fdno,'r'); + my $flags; + fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags + fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle + } + } + } + # This seek will clear EOF + seek $in_fh, tell($in_fh), 0; + # The read is non-blocking: The $in_fh is set to non-blocking. + # 32768 --tag = 5.1s + # 327680 --tag = 4.4s + # 1024000 --tag = 4.4s + # 3276800 --tag = 4.3s + # 10240000 --tag = 4.3s + # 32768000 --tag = 4.7s + my $outputlength = 0; + while(read($in_fh,substr($$partial,length $$partial),3276800)) { + # Append to $$partial + # Find the last \n + my $i = ::rindex64($partial,"\n"); + if($i != -1) { + # One or more complete lines were found + if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) { + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # This is a crappy way of ignoring it. + $$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; + # Length of partial line has changed: Find the last \n again + $i = ::rindex64($partial,"\n"); + } + $outputlength += $i+1; + if($opt::tag or defined $opt::tagstring) { + # Replace ^ with $tag within the full line + my $tag = $self->tag(); + substr($$partial,0,$i+1) =~ s/^/$tag/gm; + # Length of partial line has changed: Find the last \n again + $i = ::rindex64($partial,"\n"); + } + # Print up to and including the last \n + print $out_fd substr($$partial,0,$i+1); + # Remove the printed part + substr($$partial,0,$i+1) = ""; + } + } + $self->add_returnsize($outputlength); + if(defined $self->{'exitstatus'}) { + # If the job is dead: print the remaining partial line + # read remaining + $self->add_returnsize(length $$partial); + if($$partial and ($opt::tag or defined $opt::tagstring)) { my $tag = $self->tag(); - while(<$out>) { - print STDOUT $tag,$_; - } + $$partial =~ s/^/$tag/gm; + } + print $out_fd $$partial; + # Release the memory + $$partial = undef; + if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) { + # decompress still running } else { - my $buf; - while(sysread($out,$buf,1000_000)) { - print STDOUT $buf; + # decompress done: close fh + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed.\n"); + $self->set_exitstatus(255); } } - flush STDOUT; - ::debug("<fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed.\n"); + $self->set_exitstatus(255); + } + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $tag = $self->tag(); + if($fdno == 2) { + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # This is a crappy way of ignoring it. + while(<$in_fh>) { + if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) { + # Skip + } else { + $self->add_returnsize(length $_); + print $out_fd $tag,$_; + } + # At most run the loop once + last; + } + } + my $outputlength = 0; + while(<$in_fh>) { + print $out_fd $tag,$_; + $outputlength += length $_; + } + if($fdno == 1) { + $self->add_returnsize($outputlength); + } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed.\n"); + $self->set_exitstatus(255); + } +} + +sub normal_print { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed.\n"); + $self->set_exitstatus(255); + } + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + if($fdno == 2) { + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # This is a crappy way of ignoring it. + sysread($in_fh,$buf,1_000); + $buf =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; + print $out_fd $buf; + $self->add_returnsize(length $buf); + } + my $outputlength = 0; + while(sysread($in_fh,$buf,32768)) { + print $out_fd $buf; + $outputlength += length $buf; + } + if($fdno == 1) { + $self->add_returnsize($outputlength); + } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed.\n"); + $self->set_exitstatus(255); + } +} + +sub print_joblog { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = "@command"; + } + print $Global::joblog + join("\t", $self->seq(), $self->sshlogin()->string(), + $self->starttime(), sprintf("%10.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd + ). "\n"; + flush $Global::joblog; + $self->set_job_in_joblog(); } sub tag { my $self = shift; - if(not defined $self->{'tag'}) { - $self->{'tag'} = $self->{'commandline'}-> - replace_placeholders($::opt_tagstring,0)."\t"; + if($opt::tag or defined $opt::tagstring) { + if(not defined $self->{'tag'}) { + $self->{'tag'} = $self->{'commandline'}-> + replace_placeholders([$opt::tagstring],0,0)."\t"; + } + } else { + return ""; } return $self->{'tag'}; } +sub hostgroups { + my $self = shift; + if(not defined $self->{'hostgroups'}) { + $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; + } + return @{$self->{'hostgroups'}}; +} + sub exitstatus { my $self = shift; return $self->{'exitstatus'}; @@ -3755,6 +7421,11 @@ sub set_exitstatus { } } +sub reset_exitstatus { + my $self = shift; + $self->{'exitstatus'} = undef; +} + sub exitsignal { my $self = shift; return $self->{'exitsignal'}; @@ -3767,70 +7438,96 @@ sub set_exitsignal { } +{ + my $status_printed; + + sub should_we_halt { + # Should we halt? Immediately? Gracefully? + # Returns: N/A + my $job = shift; + if($job->exitstatus() or $job->exitsignal()) { + $Global::exitstatus++; + $Global::total_failed++; + if($opt::halt) { + if($opt::halt == 1 + or + ($opt::halt > 0 and $opt::halt < 1 and $Global::total_failed > 3 + and + $Global::total_failed / $Global::total_started > $opt::halt)) { + # If halt on error == 1 or --halt 10% + # we should gracefully exit + ::status + ("$Global::progname: Starting no more jobs. ", + "Waiting for ", scalar(keys %Global::running), + " jobs to finish. This job failed:\n", + $job->replaced(),"\n"); + $Global::start_no_new_jobs ||= 1; + $Global::halt_exitstatus = $job->exitstatus(); + } elsif($opt::halt == 2) { + # If halt on error == 2 we should exit immediately + if(not $status_printed++) { + ::status + ("$Global::progname: This job failed:\n", + $job->replaced(),"\n"); + } + ::killall(); + ::wait_and_exit($job->exitstatus()); + } + } + } else { + if($opt::halt) { + if($opt::halt == -1) { + # If halt on error == -1 + # we should gracefully exit + ::status + ("$Global::progname: Starting no more jobs. ", + "Waiting for ", scalar(keys %Global::running), + " jobs to finish. This job succeeded:\n", + $job->replaced(),"\n"); + $Global::start_no_new_jobs ||= 1; + $Global::halt_exitstatus = $job->exitstatus(); + } elsif($opt::halt == -2) { + # If halt on error == -2 we should exit immediately + ::status + ("$Global::progname: This job succeeded:\n", + $job->replaced(),"\n"); + ::killall(); + ::wait_and_exit($job->exitstatus()); + } + } + } + } +} + package CommandLine; sub new { my $class = shift; my $seq = shift; - my $command = ::undef_as_empty(shift); + my $commandref = shift; + $commandref || die; my $arg_queue = shift; my $context_replace = shift; - my $max_number_of_args = shift; # for -N and normal (-N1) + my $max_number_of_args = shift; # for -N and normal (-n1) my $return_files = shift; - my $len = { - '{}' => 0, # Total length of all {} replaced with all args - '{/}' => 0, # Total length of all {/} replaced with all args - '{//}' => 0, # Total length of all {//} replaced with all args - '{.}' => 0, # Total length of all {.} replaced with all args - '{/.}' => 0, # Total length of all {/.} replaced with all args - 'no_args' => undef, # Length of command w/ all replacement args removed - 'context' => undef, # Length of context of an additional arg - }; - my($sum,%replacecount); - ($sum,$len->{'no_args'},$len->{'context'},$len->{'contextgroups'}, - %replacecount) = number_of_replacements($command,$context_replace); - if($sum == 0) { - if($command eq "") { - $command = $Global::replace{'{}'}; - } else { - # Add {} to the command if there are no {...}'s - $command .=" ".$Global::replace{'{}'}; - } - ($sum,$len->{'no_args'},$len->{'context'},$len->{'contextgroups'}, - %replacecount) = number_of_replacements($command,$context_replace); - } - if(defined $::opt_tagstring) { - my ($dummy1,$dummy2,$dummy3,$dummy4,%repcount) = - number_of_replacements($::opt_tagstring,$context_replace); - # Merge %repcount with %replacecount to get the keys - # for replacing replacement strings in $::opt_tagstring - # The number, however, does not matter. - for (keys %repcount) { - $replacecount{$_} ||= 0; - } - } - - my %positional_replace; - my %multi_replace; - for my $used (keys %replacecount) { - if($used =~ /^{(\d+)(\D*)}$/) { - $positional_replace{$1} = '\{'.$2.'\}'; - } else { - $multi_replace{$used} = $used; - } + my $replacecount_ref = shift; + my $len_ref = shift; + my %replacecount = %$replacecount_ref; + my %len = %$len_ref; + for (keys %$replacecount_ref) { + # Total length of this replacement string {} replaced with all args + $len{$_} = 0; } return bless { - 'command' => $command, + 'command' => $commandref, 'seq' => $seq, - 'len' => $len, + 'len' => \%len, 'arg_list' => [], 'arg_queue' => $arg_queue, 'max_number_of_args' => $max_number_of_args, 'replacecount' => \%replacecount, 'context_replace' => $context_replace, 'return_files' => $return_files, - 'positional_replace' => \%positional_replace, - 'multi_replace' => \%multi_replace, 'replaced' => undef, }, ref($class) || $class; } @@ -3840,23 +7537,69 @@ sub seq { return $self->{'seq'}; } +sub set_seq { + my $self = shift; + $self->{'seq'} = shift; +} + +{ + my $max_slot_number; + + sub slot { + # Find the number of a free job slot and return it + # Uses: + # @Global::slots - list with free jobslots + # Returns: + # $jobslot = number of jobslot + my $self = shift; + if(not $self->{'slot'}) { + if(not @Global::slots) { + # $Global::max_slot_number will typically be $Global::max_jobs_running + push @Global::slots, ++$max_slot_number; + } + $self->{'slot'} = shift @Global::slots; + } + return $self->{'slot'}; + } +} + sub populate { # Add arguments from arg_queue until the number of arguments or # max line length is reached + # Uses: + # $Global::minimal_command_line_length + # $opt::cat + # $opt::fifo + # $Global::JobQueue + # $opt::m + # $opt::X + # $CommandLine::already_spread + # $Global::max_jobs_running + # Returns: N/A my $self = shift; - if($::opt_pipe) { - # Do no read any args - $self->push([Arg->new("")]); - return; - } my $next_arg; + my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length(); + + if($opt::cat) { + # $PARALLEL_TMP will point to a tempfile that will be used as {} + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}-> + unget([Arg->new('$PARALLEL_TMP')]); + } + if($opt::fifo) { + # $PARALLEL_TMP will point to a tempfile that will be used as {} + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}-> + unget([Arg->new('$PARALLEL_TMP')]); + } while (not $self->{'arg_queue'}->empty()) { $next_arg = $self->{'arg_queue'}->get(); if(not defined $next_arg) { next; } $self->push($next_arg); - if($self->len() >= Limits::Command::max_length()) { + if($self->len() >= $max_len) { + # Command length is now > max_length + # If there are arguments: remove the last + # If there are no arguments: Error # TODO stuff about -x opt_x if($self->number_of_args() > 1) { # There is something to work on @@ -3864,13 +7607,15 @@ sub populate { last; } else { my $args = join(" ", map { $_->orig() } @$next_arg); - print STDERR ("$Global::progname: Command line too ", - "long (", $self->len(), " >= ", - Limits::Command::max_length(), - ") at number ", - $self->{'arg_queue'}->arg_number(), - ": ". - (substr($args,0,50))."...\n"); + ::error("Command line too long (", + $self->len(), " >= ", + $max_len, + ") at input ", + $self->{'arg_queue'}->arg_number(), + ": ". + ((length $args > 50) ? + (substr($args,0,50))."...\n" : + $args."\n")); $self->{'arg_queue'}->unget($self->pop()); ::wait_and_exit(255); } @@ -3882,11 +7627,11 @@ sub populate { } } } - if(($::opt_m or $::opt_X) and not $CommandLine::already_spread + if(($opt::m or $opt::X) and not $CommandLine::already_spread and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { # -m or -X and EOF => Spread the arguments over all jobslots # (unless they are already spread) - $CommandLine::already_spread++; + $CommandLine::already_spread ||= 1; if($self->number_of_args() > 1) { $self->{'max_number_of_args'} = ::ceil($self->number_of_args()/$Global::max_jobs_running); @@ -3902,29 +7647,22 @@ sub populate { sub push { # Add one or more records as arguments + # Returns: N/A my $self = shift; my $record = shift; push @{$self->{'arg_list'}}, $record; - #::my_dump($record); - my $arg_no = ($self->number_of_args()-1) * ($#$record+1); + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + my $rep; for my $arg (@$record) { - $arg_no++; if(defined $arg) { - if($self->{'positional_replace'}{$arg_no}) { - # TODO probably bug here if both {1.} and {1} are used - for my $used (keys %{$self->{'replacecount'}}) { - # {} {/} {//} {.} or {/.} - my $replacementfunction = - $self->{'positional_replace'}{$arg_no}; - # Find the single replacements - $self->{'len'}{$used} += - length $arg->replace($replacementfunction); - } - } - for my $used (keys %{$self->{'multi_replace'}}) { - # Add to the multireplacement - $self->{'len'}{$used} += length $arg->replace($used); + for my $perlexpr (keys %{$self->{'replacecount'}}) { + # 50% faster than below + $self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self); + # $rep = $arg->replace($perlexpr,$quote_arg,$self); + # $self->{'len'}{$perlexpr} += length $rep; + # ::debug("length", "Length: ", length $rep, + # "(", $perlexpr, "=>", $rep, ")\n"); } } } @@ -3932,13 +7670,16 @@ sub push { sub pop { # Remove last argument + # Returns: + # the last record my $self = shift; my $record = pop @{$self->{'arg_list'}}; + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; for my $arg (@$record) { if(defined $arg) { - for my $replacement_string (keys %{$self->{'replacecount'}}) { - $self->{'len'}{$replacement_string} -= - length $arg->replace($replacement_string); + for my $perlexpr (keys %{$self->{'replacecount'}}) { + $self->{'len'}{$perlexpr} -= + length $arg->replace($perlexpr,$quote_arg,$self); } } } @@ -3946,7 +7687,9 @@ sub pop { } sub pop_all { - # Remove all arguments + # Remove all arguments and zeros the length of replacement strings + # Returns: + # all records my $self = shift; my @popped = @{$self->{'arg_list'}}; for my $replacement_string (keys %{$self->{'replacecount'}}) { @@ -3957,11 +7700,53 @@ sub pop_all { } sub number_of_args { + # The number of records + # Returns: + # number of records my $self = shift; - # This is really number of records + # Ftq rudef oaawuq ime dqxqmeqp az 2011-01-24 mzp ime iaz nk MQhmd + # Mdzrvadp Nvmdymeaz az 2011-04-10. Ftue oaawuq dqxqmeqp az + # 2013-08-18 ue m nuf tmdpqd me kag tmhq fa geq daf14. Bxqmeq + # qymux oaawuq@fmzsq.pw itqz kag dqmp ftue. + # + # U my ftq ymuzfmuzqd ar m buqoq ar rdqq earfimdq omxxqp SZG + # Bmdmxxqx. Rdqq earfimdq sgmdmzfqqe kag mooqee fa ftq eagdoq + # oapq, ngf U tmhq nqqz iazpqduzs tai ymzk mofgmxxk _dqmp_ ftq + # eagdoq oapq. + # + # Fa fqef ftue U bgf uz m oayyqzf fqxxuzs bqabxq fa qymux yq itqz + # ftqk dqmp ftue. Ftq oayyqzf ime bgf uz m eqofuaz ar ftq oapq + # ftmf za azq iagxp xaaw fa ruj ad uybdahq ftq earfimdq - ea ftq + # eagdoq oapq qcguhmxqzf fa m pgefk oadzqd. Fa ymwq egdq ftq + # oayyqzf iagxp zaf etai gb ur eayq azq vgef sdqbbqp ftdagst ftq + # eagdoq oapq U daf13'qp ftq eagdoq oapq + # tffb://qz.iuwubqpum.ads/iuwu/DAF13 + # + # 2.5 yazfte xmfqd U dqoquhqp mz qymux rday eayqazq ita zaf azxk + # ymzmsqp fa ruzp ftq oayyqzf, ngf mxea ymzmsqp fa sgqee ftq oapq + # tmp fa nq daf13'qp. + # + # Ftue nduzse yq fa ftq oazoxgeuaz ftmf ftqdq _mdq_ bqabxq, ita + # mdq zaf mrruxumfqp iuft ftq bdavqof, ftmf iuxx dqmp ftq eagdoq + # oapq - ftagst uf ymk zaf tmbbqz hqdk arfqz. + # + # This is really the number of records return $#{$self->{'arg_list'}}+1; } +sub number_of_recargs { + # The number of args in records + # Returns: + # number of args records + my $self = shift; + my $sum = 0; + my $nrec = scalar @{$self->{'arg_list'}}; + if($nrec) { + $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); + } + return $sum; +} + sub args_as_string { # Returns: # all unmodified arguments joined with ' ' (similar to {}) @@ -3970,356 +7755,292 @@ sub args_as_string { map { @$_ } @{$self->{'arg_list'}}); } +sub args_as_dirname { + # Returns: + # all unmodified arguments joined with '/' (similar to {}) + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + # If $Global::max_file_length: Keep subdirs < $Global::max_file_length + my $self = shift; + my @res = (); + + for my $rec_ref (@{$self->{'arg_list'}}) { + # If headers are used, sort by them. + # Otherwise keep the order from the command line. + my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); + for my $n (@header_indexes_sorted) { + CORE::push(@res, + $Global::input_source_header{$n}, + map { my $s = $_; + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + $s =~ s/\\/\\\\/g; + $s =~ s/\t/\\t/g; + $s =~ s/\0/\\0/g; + $s =~ s:/:\\_:g; + if($Global::max_file_length) { + # Keep each subdir shorter than the longest + # allowed file name + $s = substr($s,0,$Global::max_file_length); + } + $s; } + $rec_ref->[$n-1]->orig()); + } + } + return join "/", @res; +} + +sub header_indexes_sorted { + # Sort headers first by number then by name. + # E.g.: 1a 1b 11a 11b + # Returns: + # Indexes of %Global::input_source_header sorted + my $max_col = shift; + + no warnings 'numeric'; + for my $col (1 .. $max_col) { + # Make sure the header is defined. If it is not: use column number + if(not defined $Global::input_source_header{$col}) { + $Global::input_source_header{$col} = $col; + } + } + my @header_indexes_sorted = sort { + # Sort headers numerically then asciibetically + $Global::input_source_header{$a} <=> $Global::input_source_header{$b} + or + $Global::input_source_header{$a} cmp $Global::input_source_header{$b} + } 1 .. $max_col; + return @header_indexes_sorted; +} + sub len { + # Uses: + # $opt::shellquote # The length of the command line with args substituted my $self = shift; my $len = 0; # Add length of the original command with no args - $len += $self->{'len'}{'no_args'}; + # Length of command w/ all replacement args removed + $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; + ::debug("length", "noncontext + command: $len\n"); + my $recargs = $self->number_of_recargs(); if($self->{'context_replace'}) { - $len += $self->number_of_args()*$self->{'len'}{'context'}; + # Context is duplicated for each arg + $len += $recargs * $self->{'len'}{'context'}; for my $replstring (keys %{$self->{'replacecount'}}) { - if(defined $self->{'len'}{$replstring}) { - $len += $self->{'len'}{$replstring} * - $self->{'replacecount'}{$replstring}; - } + # If the replacements string is more than once: mulitply its length + $len += $self->{'len'}{$replstring} * + $self->{'replacecount'}{$replstring}; + ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", + $self->{'replacecount'}{$replstring}, "\n"); } - $len += ($self->number_of_args()-1) * $self->{'len'}{'contextgroups'}; + # echo 11 22 33 44 55 66 77 88 99 1010 + # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 + # 5 + ctxgrp*arg + ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, + " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); + # Add space between context groups + $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); } else { # Each replacement string may occur several times # Add the length for each time + $len += 1*$self->{'len'}{'context'}; + ::debug("length", "context+noncontext + command: $len\n"); for my $replstring (keys %{$self->{'replacecount'}}) { - if(defined $self->{'len'}{$replstring}) { - $len += $self->{'len'}{$replstring} * - $self->{'replacecount'}{$replstring}; - } - if($Global::replace{$replstring}) { - # This is a multi replacestring ({} {/} {//} {.} {/.}) - # Add each space between two arguments - my $number_of_args = ($#{$self->{'arg_list'}[0]}+1) * - $self->number_of_args(); - $len += ($number_of_args-1) * - $self->{'replacecount'}{$replstring}; - } + # (space between regargs + length of replacement) + # * number this replacement is used + $len += ($recargs -1 + $self->{'len'}{$replstring}) * + $self->{'replacecount'}{$replstring}; } } - if($::opt_nice) { + if($opt::nice) { # Pessimistic length if --nice is set # Worse than worst case: every char needs to be quoted with \ $len *= 2; } - if($::opt_shellquote) { + if($Global::quoting) { + # Pessimistic length if -q is set + # Worse than worst case: every char needs to be quoted with \ + $len *= 2; + } + if($opt::shellquote) { # Pessimistic length if --shellquote is set # Worse than worst case: every char needs to be quoted with \ twice $len *= 4; } + # If we are using --env, add the prefix for that, too. + $len += $Global::envvarlen; + return $len; } -sub multi_regexp { - if(not $CommandLine::multi_regexp) { - $CommandLine::multi_regexp = - "(?:". - join("|",map {my $a=$_; $a =~ s/(\W)/\\$1/g; $a} - ($Global::replace{"{}"}, - $Global::replace{"{.}"}, - $Global::replace{"{/}"}, - $Global::replace{"{//}"}, - $Global::replace{"{/.}"}) - ).")"; - } - return $CommandLine::multi_regexp; -} - -sub number_of_replacements { - # Returns: - # sum_of_count, length_of_command_with_no_args, - # length_of_context { 'replacementstring' => count } - my $command = shift; - my $context_replace = shift; - my %count = (); - my $sum = 0; - my $cmd = $command; - my $multi_regexp = multi_regexp(); - my $replacement_regexp = - "(?:". ::maybe_quote('\{') . - '\d+(?:|\.|/\.|/|//)?' . # {n} {n.} {n/.} {n/} {n//} - ::maybe_quote('\}') . - '|'. - join("|",map {$a=$_;$a=~s/(\W)/\\$1/g; $a} values %Global::replace). - ")"; - my %c = (); - $cmd =~ s/($replacement_regexp)/$c{$1}++;"\0"/ogex; - for my $k (keys %c) { - if(defined $Global::replace_rev{$k}) { - $count{$Global::replace_rev{$k}} = $c{$k}; - } else { - $count{::maybe_unquote($k)} = $c{$k}; - } - $sum += $c{$k}; - } - my $number_of_context_groups = 0; - my $no_args; - my $context; - if($context_replace) { - $cmd = $command; - while($cmd =~ s/\S*$multi_regexp\S*//o) { - $number_of_context_groups++; - } - $no_args = length $cmd; - $context = length($command) - $no_args; - } else { - $cmd = $command; - $cmd =~ s/$multi_regexp//go; - $cmd =~ s/$replacement_regexp//go; - $no_args = length($cmd); - $context = length($command) - $no_args; - } - for my $k (keys %count) { - if(defined $Global::replace{$k}) { - # {} {/} {//} {.} {/.} {#} - $context -= (length $Global::replace{$k}) * $count{$k}; - } else { - # {n} - $context -= (length $k) * $count{$k}; - } - } - return ($sum,$no_args,$context,$number_of_context_groups,%count); -} - sub replaced { + # Uses: + # $Global::noquote + # $Global::quoting + # Returns: + # $replaced = command with place holders replaced and prepended my $self = shift; if(not defined $self->{'replaced'}) { - $self->{'replaced'} = $self->replace_placeholders($self->{'command'},0); - if($self->{'replaced'} =~ /^\s*(-\S+)/) { - # Is this really a command in $PATH starting with '-'? - my $cmd = $1; - if(not grep { -e $_."/".$cmd } split(":",$ENV{'PATH'})) { - print STDERR "parallel: Error:" - . " Command ($cmd) starts with '-'." - . " Is this a wrong option?\n"; - ::wait_and_exit(255); - } + # Don't quote arguments if the input is the full command line + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP + $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg; + $self->{'replaced'} = $self-> + replace_placeholders($self->{'command'},$Global::quoting, + $quote_arg); + my $len = length $self->{'replaced'}; + if ($len != $self->len()) { + ::debug("length", $len, " != ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } else { + ::debug("length", $len, " == ", $self->len(), + " ", $self->{'replaced'}, "\n"); } - if($::opt_nice) { - # Prepend nice -n19 $SHELL -c - # and quote - $self->{'replaced'} = nice() ." -n" . $::opt_nice . " " - . $ENV{SHELL}." -c " - . ::shell_quote_scalar($self->{'replaced'}); - } - if($::opt_shellquote) { - # Prepend echo - # and quote twice - $self->{'replaced'} = "echo " . - ::shell_quote_scalar(::shell_quote_scalar($self->{'replaced'})); - } - } - if($::oodebug and length($self->{'replaced'}) != ($self->len())) { - ::my_dump($self); - Carp::cluck("replaced len=" . length($self->{'replaced'}) - . " computed=" . ($self->len())); } return $self->{'replaced'}; } -sub nice { - # Returns: - # path to nice - # Needed because tcsh's built-in nice does not support 'nice -n19' - if(not $Global::path_to_nice) { - $Global::path_to_nice = "nice"; - for my $n ((split/:/, $ENV{'PATH'}), "/bin", "/usr/bin") { - if(-x $n."/nice") { - $Global::path_to_nice = $n."/nice"; - last; - } - } - } - return $Global::path_to_nice; -} +{ + my @target; + my $context_replace; + my @arg; + my $perl_expressions_as_re; -sub replace_placeholders { - my $self = shift; - my $target = shift; - my $quoteall = shift; - my $context_replace = $self->{'context_replace'}; - my $replaced; - - if($self->{'context_replace'}) { - $replaced = $self->context_replace_placeholders($target,$quoteall); - } else { - $replaced = $self->simple_replace_placeholders($target,$quoteall); - } - return $replaced; -} - -sub context_replace_placeholders { - my $self = shift; - my $target = shift; - my $quoteall = shift; - # -X = context replace - # maybe multiple input sources - # maybe --xapply - # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] - - my @args=(); - my @used_multi; - my %replace; - - for my $record (@{$self->{'arg_list'}}) { - # Merge arguments from records into args for easy access - CORE::push @args, @$record; - } - - # Replacement functions - my @rep = qw({} {/} {//} {.} {/.}); - # Inner part of replacement functions - my @rep_inner = ('', '/', '//', '.', '/.'); - # Regexp for replacement functions - my $rep_regexp = "(?:". join('|', map { $_=~s/(\W)/\\$1/g; $_} @rep) . ")"; - # Regexp for inner replacement functions - my $rep_inner_regexp = "(?:". join('|', map { $_=~s/(\W)/\\$1/g; $_} @rep_inner) . ")"; - # Seq replace string: {#} - my $rep_seq_regexp = '(?:'.::maybe_quote('\{\#\}').")"; - # Normal replace strings - my $rep_str_regexp = multi_regexp(); - # Positional replace strings - my $rep_str_pos_regexp = ::maybe_quote('\{').'\d+'.$rep_inner_regexp.::maybe_quote('\}'); - - # Fish out the words that have replacement strings in them - my $tt = $target; - my %word; - while($tt =~ s/(\S*(?:$rep_str_regexp|$rep_str_pos_regexp|$rep_seq_regexp)\S*)/\0/o) { - $word{$1}++; - } - # For each word: Generate the replacement string for that word. - for my $origword (keys %word) { - my @pos_replacements=(); - my @replacements=(); - my $w; - my $word = $origword; # Make a local modifyable copy - - # replace {#} if it exists - $word =~ s/$rep_seq_regexp/$self->seq()/geo; - if($word =~ /$rep_str_pos_regexp/o) { - # There are positional replacement strings - my @argset; - if($#{$self->{'arg_list'}->[0]} == 0) { - # Only one input source: Treat it as a set - @argset = [ @args ]; + sub fish_out_words_containing_replacement_strings { + my %word; + for (@target) { + my $tt = $_; + ::debug("replace", "Target: $tt"); + # Command line template: + # a{1}b{}c{}d + # becomes: + # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d + # becomes: + # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d + # Input A B C (no context) becomes: + # A B C => aAbA B CcA B Cd + # Input A B C (context -X) becomes: + # A B C => aAbAcAd aAbBcBd aAbCcCd + if($context_replace) { + while($tt =~ s/([^\s\257]* # before {= + (?: + \257< # {= + [^\257]*? # The perl expression + \257> # =} + [^\s\257]* # after =} + )+)/ /x) { + # $1 = pre \257 perlexpr \257 post + $word{"$1"} ||= 1; + } } else { - @argset = @{$self->{'arg_list'}}; - } - # Match 1..n where n = max args in a argset - my $pos_regexp = "(?:".join("|", 1 .. $#{$argset[0]}+1).")"; - my $pos_inner_regexp = ::maybe_quote('\{') . - "($pos_regexp)($rep_inner_regexp)" . - ::maybe_quote('\}'); - for my $argset (@argset) { - # Replace all positional arguments - e.g. {7/.} - # with the replacement function - e.g. {/.} - # of that argument - if(defined $self->{'max_number_of_args'}) { - # Fill up if we have a half completed line, so {n} will be empty - while($#$argset < $self->{'max_number_of_args'}) { - CORE::push @$argset, Arg->new(""); - } - } - $w = $word; - $w =~ s/$pos_inner_regexp/$argset->[$1-1]->replace('{'.$2.'}')/geo; - CORE::push @pos_replacements, $w; - } - } - if(not @pos_replacements) { - @pos_replacements = ($word); - } - - if($word =~ m:$rep_str_regexp:) { - # There are normal replacement strings - for my $w (@pos_replacements) { - for my $arg (@args) { - my $wmulti = $w; - $wmulti =~ s/($rep_str_regexp)/$arg->replace($Global::replace_rev{$1})/geo; - CORE::push @replacements, $wmulti; + while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) { + # $f = \257 perlexpr \257 + $word{$1} ||= 1; } } } - if(@replacements) { - CORE::push @{$replace{$origword}}, @replacements; - } else { - CORE::push @{$replace{$origword}}, @pos_replacements; + return keys %word; + } + + sub flatten_arg_list { + my $arglist_ref = shift; + @arg = (); + for my $record (@$arglist_ref) { + # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] + # Merge arg-objects from records into @arg for easy access + CORE::push @arg, @$record; } + # Add one arg if empty to allow {#} and {%} to be computed only once + if(not @arg) { @arg = (Arg->new("")); } } - # Substitute the replace strings with the replacement values - # Must be sorted by length if a short word is a substring of a long word - my $regexp = join('|', map { $_=~s/(\W)/\\$1/g; $_} - sort { length $b <=> length $a } keys %word); - $target =~ s/($regexp)/join(" ",@{$replace{$1}})/ge; - return $target; -} -sub simple_replace_placeholders { - # no context (no -X) - # maybe multiple input sources - # maybe --xapply - my $self = shift; - my $target = shift; - my $quoteall = shift; - my @args=(); - my @used_multi; - my %replace; + sub replace_placeholders { + # Replace foo{}bar with fooargbar + # Input: + # $targetref = command as shell words + # $quote = should everything be quoted? + # $quote_arg = should replaced arguments be quoted? + # Returns: + # @target with placeholders replaced + my $self = shift; + my $targetref = shift; + my $quote = shift; + my $quote_arg = shift; + my %replace; + $context_replace = $self->{'context_replace'}; + @target = @$targetref; + ::debug("replace", "Replace @target\n"); + # -X = context replace + # maybe multiple input sources + # maybe --xapply + if(not @target) { + # @target is empty: Return empty array + return @target; + } + # Fish out the words that have replacement strings in them + my @word = fish_out_words_containing_replacement_strings(); + flatten_arg_list($self->{'arg_list'}); - for my $record (@{$self->{'arg_list'}}) { - # Merge arguments from records into args for easy access - CORE::push @args, @$record; - } - # Which replace strings are used? - # {#} {} {/} {//} {.} {/.} {n} {n/} {n//} {n.} {n/.} - for my $used (keys %{$self->{'replacecount'}}) { - # What are the replacement values for the replace strings? - if(grep { $used eq $_ } qw({} {/} {//} {.} {/.})) { - # {} {/} {//} {.} {/.} - $replace{$Global::replace{$used}} = - join(" ", map { $_->replace($used) } @args); - } elsif($used =~ /^\{(\d+)(|\/|\/\/|\.|\/\.)\}$/) { - # {n} {n/} {n//} {n.} {n/.} - my $positional = $1; # number if any - my $replacementfunction = "{".::undef_as_empty($2)."}"; # {} {/} {//} {.} or {/.} - # If -q then the replacementstrings will be quoted, too - # {1.} -> \{1.\} - $Global::replace{$used} ||= ::maybe_quote($used); - if(defined $args[$positional-1]) { - # we have a matching argument for {n} - $replace{$Global::replace{$used}} = - $args[$positional-1]->replace($replacementfunction); - } else { - if($positional <= $self->{'max_number_of_args'}) { - # Fill up if we have a half completed line - $replace{$Global::replace{$used}} = ""; + # Number of arguments - used for positional arguments + my $n = $#arg+1; + + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = $self; + for my $word (@word) { + # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF + ::debug("replace", "Replacing in $word\n"); + my $normal_replace; + + # for each arg: + # replace replacement strings with replacement in the word value + # push to replace word value + $perl_expressions_as_re ||= + join("|", map {s/^-?\d+//; "\Q$_\E"} keys %{$self->{'replacecount'}}); + for my $arg (@arg) { + my $val = $word; + # Replace {= perl expr =} with value for each arg + $val =~ s{\257<(-?\d+)?($perl_expressions_as_re)\257>} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($arg[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace + $arg[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }goxe; + if($quote) { + CORE::push(@{$replace{::shell_quote_scalar($word)}}, + ::shell_quote_scalar($val)); + } else { + CORE::push(@{$replace{$word}}, $val); } + # No normal replacements => only run once + $normal_replace or last; } - } elsif($used eq "{#}") { - # {#} - $replace{$Global::replace{$used}} = $self->seq(); - } else { - ::die_bug('simple_replace_placeholders_20110530'); } - } - # Substitute the replace strings with the replacement values - my $regexp = join('|', map { $_=~s/(\W)/\\$1/g; $_} keys %replace); - if($regexp) { - if($quoteall) { - # This is for --return: The whole expression must be - # quoted - not just the replacements - %replace = map { $_ => ::shell_unquote($replace{$_}) } keys %replace; - $target =~ s/($regexp)/$replace{$1}/g; - $target = ::shell_quote_scalar($target); - } else { - $target =~ s/($regexp)/$replace{$1}/g; + + if($quote) { + @target = ::shell_quote(@target); } + # ::debug("replace", "%replace=",::my_dump(%replace),"\n"); + if(%replace) { + # Substitute the replace strings with the replacement values + # Must be sorted by length if a short word is a substring of a long word + my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s } + sort { length $b <=> length $a } keys %replace); + for(@target) { + s/($regexp)/join(" ",@{$replace{$1}})/ge; + } + } + ::debug("replace", "Return @target\n"); + return wantarray ? @target : "@target"; } - return $target; } @@ -4327,17 +8048,81 @@ package CommandLineQueue; sub new { my $class = shift; - my $command = shift; + my $commandref = shift; my $read_from = shift; my $context_replace = shift; my $max_number_of_args = shift; my $return_files = shift; my @unget = (); + my ($count,$posrpl,$perlexpr); + my ($replacecount_ref, $len_ref); + my @command = @$commandref; + my $dummy = ''; + # If the first command start with '-' it is probably an option + if($command[0] =~ /^\s*(-\S+)/) { + # Is this really a command in $PATH starting with '-'? + my $cmd = $1; + if(not ::which($cmd)) { + ::error("Command ($cmd) starts with '-'. Is this a wrong option?\n"); + ::wait_and_exit(255); + } + } + # Replace replacement strings with {= perl expr =} + @command = merge_rpl_parts(@command); + + # Protect matching inside {= perl expr =} + # by replacing {= and =} with \257< and \257> + # in @command, --return and --tagstring (if used) + for(@command,@$return_files, + (defined $opt::tagstring ? $opt::tagstring : $dummy)) { + # Disallow \257 to avoid nested {= {= =} =} + if(/\257/) { + ::error("Command cannot contain the character \257. Use a function for that.\n"); + ::wait_and_exit(255); + } + # Needs to match rightmost left parens (Perl defaults to leftmost) + # to deal with: {={==} + while(s{([^\257]*) \Q$Global::parensleft\E ([^\257]*?) \Q$Global::parensright\E } + {$1\257<$2\257>}gx) {} + for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) { + # Replace long --rpl's before short ones, as a short may be a + # substring of a long: + # --rpl '% s/a/b/' --rpl '%% s/b/a/' + # Replace the short hand string (--rpl) + # with the {= perl expr =} + # Avoid replacing inside existing {= perl expr =} + while(s{((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> + \Q$rpl\E} + {$1\257<$Global::rpl{$rpl}\257>}xg) { + } + # Do the same for the positional replacement strings + # A bit harder as we have to put in the position number + $posrpl = $rpl; + if($posrpl =~ s/^\{//) { + # Only do this if the shorthand start with { + s{\{(-?\d+)\Q$posrpl\E} + {\257<$1 $Global::rpl{$rpl}\257>}g; + } + } + } + # Add {} if no replacement strings in @command + ($replacecount_ref, $len_ref, @command) = + replacement_counts_and_lengths($return_files,@command); + if("@command" =~ /^[^ \t\n=]*\257 \@unget, - 'command' => $command, - 'arg_queue' => RecordQueue->new($read_from,$::opt_colsep), + 'command' => \@command, + 'replacecount' => $replacecount_ref, + 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), 'context_replace' => $context_replace, + 'len' => $len_ref, 'max_number_of_args' => $max_number_of_args, 'size' => undef, 'return_files' => $return_files, @@ -4345,27 +8130,151 @@ sub new { }, ref($class) || $class; } +sub merge_rpl_parts { + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + # Input: + # @in = the @command as given by the user + # Uses: + # $Global::parensleft + # $Global::parensright + # Returns: + # @command with parts merged to keep {= and =} as one + my @in = @_; + my @out; + my $l = quotemeta($Global::parensleft); + my $r = quotemeta($Global::parensright); + + while(@in) { + my $s = shift @in; + $_ = $s; + # Remove matching (right most) parens + while(s/(.*)$l.*?$r/$1/o) {} + if(/$l/o) { + # Missing right parens + while(@in) { + $s .= " ".shift @in; + $_ = $s; + while(s/(.*)$l.*?$r/$1/o) {} + if(not /$l/o) { + last; + } + } + } + push @out, $s; + } + return @out; +} + +sub replacement_counts_and_lengths { + # Count the number of different replacement strings. + # Find the lengths of context for context groups and non-context + # groups. + # If no {} found in @command: add it to @command + # + # Input: + # \@return_files = array of filenames to return + # @command = command template + # Output: + # \%replacecount, \%len, @command + my $return_files = shift; + my @command = @_; + my (%replacecount,%len); + my $sum = 0; + while($sum == 0) { + # Count how many times each replacement string is used + my @cmd = @command; + my $contextlen = 0; + my $noncontextlen = 0; + my $contextgroups = 0; + for my $c (@cmd) { + while($c =~ s/ \257<([^\257]*?)\257> /\000/x) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "s/a/b/" => 2 } + $replacecount{$1}++; + $sum++; + } + # Measure the length of the context around the {= perl expr =} + # Use that {=...=} has been replaced with \000 above + # So there is no need to deal with \257< + while($c =~ s/ (\S*\000\S*) //x) { + my $w = $1; + $w =~ tr/\000//d; # Remove all \000's + $contextlen += length($w); + $contextgroups++; + } + # All {= perl expr =} have been removed: The rest is non-context + $noncontextlen += length $c; + } + for(@$return_files) { + my $t = $_; + while($t =~ s/ \257<([^\257]*)\257> //x) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + # But for tagstring we just need to mark it as seen + $replacecount{$1} ||= 1; + } + } + if($opt::tagstring) { + my $t = $opt::tagstring; + while($t =~ s/ \257<([^\257]*)\257> //x) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + # But for tagstring we just need to mark it as seen + $replacecount{$1} ||= 1; + } + } + if($opt::bar) { + # If the command does not contain {} force it to be computed + # as it is being used by --bar + $replacecount{""} ||= 1; + } + + $len{'context'} = 0+$contextlen; + $len{'noncontext'} = $noncontextlen; + $len{'contextgroups'} = $contextgroups; + $len{'noncontextgroups'} = @cmd-$contextgroups; + ::debug("length", "@command Context: ", $len{'context'}, + " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, + " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); + if($sum == 0) { + if(not @command) { + # Default command = {} + @command = ("\257<\257>"); + } elsif(($opt::pipe or $opt::pipepart) + and not $opt::fifo and not $opt::cat) { + # With --pipe / --pipe-part you can have no replacement + last; + } else { + # Append {} to the command if there are no {...}'s and no {=...=} + push @command, ("\257<\257>"); + } + } + } + return(\%replacecount,\%len,@command); +} + sub get { my $self = shift; if(@{$self->{'unget'}}) { my $cmd_line = shift @{$self->{'unget'}}; return ($cmd_line); } else { - my $cmd_line; - $cmd_line = CommandLine->new($self->seq(), - $self->{'command'}, - $self->{'arg_queue'}, - $self->{'context_replace'}, - $self->{'max_number_of_args'}, - $self->{'return_files'}, + my $cmd_line = CommandLine->new($self->seq(), + $self->{'command'}, + $self->{'arg_queue'}, + $self->{'context_replace'}, + $self->{'max_number_of_args'}, + $self->{'return_files'}, + $self->{'replacecount'}, + $self->{'len'}, ); $cmd_line->populate(); - ::debug("cmd_line->number_of_args ".$cmd_line->number_of_args()."\n"); - if($::opt_pipe) { + ::debug("init","cmd_line->number_of_args ", + $cmd_line->number_of_args(), "\n"); + if($opt::pipe or $opt::pipepart) { if($cmd_line->replaced() eq "") { # Empty command - pipe requires a command - print STDERR "$Global::progname: --pipe must have a ". - "command to pipe into (e.g. 'cat')\n"; + ::error("--pipe must have a command to pipe into (e.g. 'cat').\n"); ::wait_and_exit(255); } } else { @@ -4390,7 +8299,7 @@ sub unget { sub empty { my $self = shift; my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty(); - ::debug("CommandLineQueue->empty $empty\n"); + ::debug("run", "CommandLineQueue->empty $empty"); return $empty; } @@ -4410,22 +8319,6 @@ sub quote_args { return $self->{'command'}; } -sub size { - my $self = shift; - if(not $self->{'size'}) { - my @all_lines = (); - while(not $self->{'arg_queue'}->empty()) { - push @all_lines, CommandLine->new($self->{'command'}, - $self->{'arg_queue'}, - $self->{'context_replace'}, - $self->{'max_number_of_args'}); - } - $self->{'size'} = @all_lines; - $self->unget(@all_lines); - } - return $self->{'size'}; -} - package Limits::Command; @@ -4435,21 +8328,30 @@ sub max_length { # Returns: # number of chars on the longest command line allowed if(not $Limits::Command::line_max_len) { - if($::opt_s) { - if(is_acceptable_command_line_length($::opt_s)) { - $Limits::Command::line_max_len = $::opt_s; - } else { - # -s is too long: Find the correct - $Limits::Command::line_max_len = binary_find_max_length(0,$::opt_s); - } - if($::opt_s <= $Limits::Command::line_max_len) { - $Limits::Command::line_max_len = $::opt_s; - } else { - print STDERR "$Global::progname: value for -s option ", - "should be < $Limits::Command::line_max_len\n"; - } + # Disk cache of max command line length + my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname(); + my $cached_limit; + if(-e $len_cache) { + open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache"); + $cached_limit = <$fh>; + close $fh; } else { - $Limits::Command::line_max_len = real_max_length(); + $cached_limit = real_max_length(); + # If $HOME is write protected: Do not fail + mkdir($ENV{'HOME'} . "/.parallel"); + mkdir($ENV{'HOME'} . "/.parallel/tmp"); + open(my $fh, ">", $len_cache); + print $fh $cached_limit; + close $fh; + } + $Limits::Command::line_max_len = tmux_length($cached_limit); + if($opt::max_chars) { + if($opt::max_chars <= $cached_limit) { + $Limits::Command::line_max_len = $opt::max_chars; + } else { + ::warning("Value for -s option ", + "should be < $cached_limit.\n"); + } } } return $Limits::Command::line_max_len; @@ -4477,7 +8379,7 @@ sub binary_find_max_length { my ($lower, $upper) = (@_); if($lower == $upper or $lower == $upper-1) { return $lower; } my $middle = int (($upper-$lower)/2 + $lower); - ::debug("Maxlen: $lower,$upper,$middle\n"); + ::debug("init", "Maxlen: $lower,$upper,$middle : "); if (is_acceptable_command_line_length($middle)) { return binary_find_max_length($middle,$upper); } else { @@ -4492,16 +8394,49 @@ sub is_acceptable_command_line_length { # 1 otherwise my $len = shift; - $CommandMaxLength::is_acceptable_command_line_length++; - ::debug("$CommandMaxLength::is_acceptable_command_line_length $len\n"); local *STDERR; - open (STDERR,">/dev/null"); + open (STDERR, ">", "/dev/null"); system "true "."x"x$len; close STDERR; - ::debug("$len $?\n"); + ::debug("init", "$len=$? "); return not $?; } +sub tmux_length { + # If $opt::tmux set, find the limit for tmux + # tmux 1.8 has a 2kB limit + # tmux 1.9 has a 16kB limit + # Input: + # $len = maximal command line length + # Returns: + # $tmux_len = maximal length runable in tmux + my $len = shift; + if($opt::tmux) { + $ENV{'TMUX'} ||= "tmux"; + if(not ::which($ENV{'TMUX'})) { + ::error($ENV{'TMUX'}." not found in \$PATH.\n"); + ::wait_and_exit(255); + } + my @out; + for my $l (1, 2020, 16320, 100000, $len) { + my $tmpfile = ::tmpname("tms"); + my $tmuxcmd = "sh -c '".$ENV{'TMUX'}." -S $tmpfile new-session -d -n echo $l". + ("x"x$l). " 2>/dev/null' && echo $l; rm -f $tmpfile"; + push @out, qx{ $tmuxcmd }; + unlink $tmpfile; + } + ::debug("tmux","tmux-length ",@out); + chomp @out; + # The arguments is given 3 times on the command line + # and the wrapping is around 30 chars + # (29 for tmux1.9, 33 for tmux1.8) + my $tmux_len = (::max(@out)); + $len = ::min($len,int($tmux_len/4-33)); + ::debug("tmux","tmux-length ",$len); + } + return $len; +} + package RecordQueue; @@ -4530,13 +8465,13 @@ sub get { # reference to array of Arg-objects my $self = shift; if(@{$self->{'unget'}}) { + $self->{'arg_number'}++; return shift @{$self->{'unget'}}; } - $self->{'arg_number'}++; my $ret = $self->{'arg_sub_queue'}->get(); if(defined $Global::max_number_of_args and $Global::max_number_of_args == 0) { - ::debug("Read 1 but return 0 args\n"); + ::debug("run", "Read 1 but return 0 args\n"); return [Arg->new("")]; } else { return $ret; @@ -4545,8 +8480,8 @@ sub get { sub unget { my $self = shift; - ::debug("RecordQueue-unget '@_'\n"); - $self->{'arg_number'}--; + ::debug("run", "RecordQueue-unget '@_'\n"); + $self->{'arg_number'} -= @_; unshift @{$self->{'unget'}}, @_; } @@ -4554,7 +8489,7 @@ sub empty { my $self = shift; my $empty = not @{$self->{'unget'}}; $empty &&= $self->{'arg_sub_queue'}->empty(); - ::debug("RecordQueue->empty $empty\n"); + ::debug("run", "RecordQueue->empty $empty"); return $empty; } @@ -4592,11 +8527,11 @@ sub get { if(defined $in_record) { my @out_record = (); for my $arg (@$in_record) { - ::debug("RecordColQueue::arg $arg\n"); + ::debug("run", "RecordColQueue::arg $arg\n"); my $line = $arg->orig(); - ::debug("line='$line'\n"); + ::debug("run", "line='$line'\n"); if($line ne "") { - for my $s (split /$::opt_colsep/o, $line, -1) { + for my $s (split /$opt::colsep/o, $line, -1) { push @out_record, Arg->new($s); } } else { @@ -4611,14 +8546,14 @@ sub get { sub unget { my $self = shift; - ::debug("RecordColQueue-unget '@_'\n"); + ::debug("run", "RecordColQueue-unget '@_'\n"); unshift @{$self->{'unget'}}, @_; } sub empty { my $self = shift; my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty()); - ::debug("RecordColQueue->empty $empty"); + ::debug("run", "RecordColQueue->empty $empty"); return $empty; } @@ -4632,9 +8567,9 @@ sub new { my $fhs = shift; for my $fh (@$fhs) { if(-t $fh) { - print STDERR "$Global::progname: Input is read from the terminal. ". - "Only experts do this on purpose. ". - "Press CTRL-D to exit.\n"; + ::warning("Input is read from the terminal.\n"); + ::warning("Only experts do this on purpose. ". + "Press CTRL-D to exit.\n"); } } return bless { @@ -4646,7 +8581,7 @@ sub new { sub get { my $self = shift; - if($::opt_xapply) { + if($opt::xapply) { return $self->xapply_get(); } else { return $self->nest_get(); @@ -4655,7 +8590,7 @@ sub get { sub unget { my $self = shift; - ::debug("MultifileQueue-unget '@_'\n"); + ::debug("run", "MultifileQueue-unget '@_'\n"); unshift @{$self->{'unget'}}, @_; } @@ -4666,7 +8601,7 @@ sub empty { for my $fh (@{$self->{'fhs'}}) { $empty &&= eof($fh); } - ::debug("MultifileQueue->empty $empty\n"); + ::debug("run", "MultifileQueue->empty $empty "); return $empty; } @@ -4681,10 +8616,16 @@ sub xapply_get { for my $fh (@{$self->{'fhs'}}) { my $arg = read_arg_from_fh($fh); if(defined $arg) { + # Record $arg for recycling at end of file + push @{$self->{'arg_matrix'}{$fh}}, $arg; push @record, $arg; $empty = 0; } else { - push @record, Arg->new(""); + ::debug("run", "EOA "); + # End of file: Recycle arguments + push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}}; + # return last @{$args->{'args'}{$fh}}; + push @record, @{$self->{'arg_matrix'}{$fh}}[-1]; } } if($empty) { @@ -4780,22 +8721,22 @@ sub read_arg_from_fh { my $prepend = undef; my $arg; do {{ - if(eof($fh)) { + # This makes 10% faster + if(not ($arg = <$fh>)) { if(defined $prepend) { return Arg->new($prepend); } else { return undef; } } - $arg = <$fh>; - ::debug("read $arg\n"); +# ::debug("run", "read $arg\n"); # Remove delimiter $arg =~ s:$/$::; if($Global::end_of_file_string and $arg eq $Global::end_of_file_string) { # Ignore the rest of input file - while (<$fh>) {} - ::debug("EOF-string $arg\n"); + close $fh; + ::debug("run", "EOF-string ($arg) met\n"); if(defined $prepend) { return Arg->new($prepend); } else { @@ -4818,7 +8759,7 @@ sub read_arg_from_fh { redo; } } - }} while (1 == 0); # Dummy loop for redo + }} while (1 == 0); # Dummy loop {{}} for redo if(defined $arg) { return Arg->new($arg); } else { @@ -4829,7 +8770,7 @@ sub read_arg_from_fh { sub expand_combinations { # Input: # ([xmin,xmax], [ymin,ymax], ...) - # Returns ([x,y,...],[x,y,...]) + # Returns: ([x,y,...],[x,y,...]) # where xmin <= x <= xmax and ymin <= y <= ymax my $minmax_ref = shift; my $xmin = $$minmax_ref[0]; @@ -4855,44 +8796,66 @@ package Arg; sub new { my $class = shift; my $orig = shift; - if($::oodebug and not defined $orig) { - Carp::cluck($orig); + my @hostgroups; + if($opt::hostgroups) { + if($orig =~ s:@(.+)::) { + # We found hostgroups on the arg + @hostgroups = split(/\+/, $1); + if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { + ::warning("No such hostgroup (@hostgroups)\n"); + @hostgroups = (keys %Global::hostgroups); + } + } else { + @hostgroups = (keys %Global::hostgroups); + } } return bless { 'orig' => $orig, + 'hostgroups' => \@hostgroups, }, ref($class) || $class; } sub replace { + # Calculates the corresponding value for a given perl expression + # Returns: + # The calculated string (quoted if asked for) my $self = shift; - my $replacement_string = shift; # {} {/} {//} {.} {/.} - if(not defined $self->{$replacement_string}) { - my $s; + my $perlexpr = shift; # E.g. $_=$_ or s/.gz// + my $quote = (shift) ? 1 : 0; # should the string be quoted? + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = shift; + $perlexpr =~ s/^-?\d+ //; # Positional replace treated as normal replace + if(not defined $self->{"rpl",0,$perlexpr}) { + local $_; if($Global::trim eq "n") { - $s = $self->{'orig'}; + $_ = $self->{'orig'}; } else { - $s = trim_of($self->{'orig'}); + $_ = trim_of($self->{'orig'}); } - if($replacement_string eq "{}") { - # skip - } elsif($replacement_string eq "{.}") { - $s =~ s:\.[^/\.]*$::; # Remove .ext from argument - } elsif($replacement_string eq "{/}") { - $s =~ s:^.*/([^/]+)/?$:$1:; # Remove dir from argument. If ending in /, remove final / - } elsif($replacement_string eq "{//}") { - # Only load File::Basename if actually needed - $Global::use{"File::Basename"} ||= eval "use File::Basename;"; - $s = dirname($s); # Keep dir from argument. - } elsif($replacement_string eq "{/.}") { - $s =~ s:^.*/([^/]+)/?$:$1:; # Remove dir from argument. If ending in /, remove final / - $s =~ s:\.[^/\.]*$::; # Remove .ext from argument + ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); + if(not $Global::perleval{$perlexpr}) { + # Make an anonymous function of the $perlexpr + # And more importantly: Compile it only once + if($Global::perleval{$perlexpr} = + eval('sub { no strict; no warnings; my $job = shift; '. + $perlexpr.' }')) { + # All is good + } else { + # The eval failed. Maybe $perlexpr is invalid perl? + ::error("Cannot use $perlexpr: $@\n"); + ::wait_and_exit(255); + } } - if($Global::JobQueue->quote_args()) { - $s = ::shell_quote_scalar($s); - } - $self->{$replacement_string} = $s; + # Execute the function + $Global::perleval{$perlexpr}->($job); + $self->{"rpl",0,$perlexpr} = $_; } - return $self->{$replacement_string}; + if(not defined $self->{"rpl",$quote,$perlexpr}) { + $self->{"rpl",1,$perlexpr} = + ::shell_quote_scalar($self->{"rpl",0,$perlexpr}); + } + return $self->{"rpl",$quote,$perlexpr}; } sub orig { @@ -4913,14 +8876,13 @@ sub trim_of { if($Global::trim eq "n") { # skip } elsif($Global::trim eq "l") { - for $arg (@strings) { $arg =~ s/^\s+//; } + for my $arg (@strings) { $arg =~ s/^\s+//; } } elsif($Global::trim eq "r") { - for $arg (@strings) { $arg =~ s/\s+$//; } + for my $arg (@strings) { $arg =~ s/\s+$//; } } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { - for $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } + for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } } else { - print STDERR "$Global::progname: --trim must be one of: r l ". - "rl lr\n"; + ::error("--trim must be one of: r l rl lr.\n"); ::wait_and_exit(255); } return wantarray ? @strings : "@strings"; @@ -4932,26 +8894,82 @@ package TimeoutQueue; sub new { my $class = shift; my $delta_time = shift; - + my ($pct); + if($delta_time =~ /(\d+(\.\d+)?)%/) { + # Timeout in percent + $pct = $1/100; + $delta_time = 1_000_000; + } return bless { 'queue' => [], 'delta_time' => $delta_time, + 'pct' => $pct, + 'remedian_idx' => 0, + 'remedian_arr' => [], + 'remedian' => undef, }, ref($class) || $class; } +sub delta_time { + my $self = shift; + return $self->{'delta_time'}; +} + +sub set_delta_time { + my $self = shift; + $self->{'delta_time'} = shift; +} + +sub remedian { + my $self = shift; + return $self->{'remedian'}; +} + +sub set_remedian { + # Set median of the last 999^3 (=997002999) values using Remedian + # + # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A + # robust averaging method for large data sets." Journal of the + # American Statistical Association 85.409 (1990): 97-104. + my $self = shift; + my $val = shift; + my $i = $self->{'remedian_idx'}++; + my $rref = $self->{'remedian_arr'}; + $rref->[0][$i%999] = $val; + $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; + $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; + $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; +} + +sub update_median_runtime { + # Update delta_time based on runtime of finished job if timeout is + # a percentage + my $self = shift; + my $runtime = shift; + if($self->{'pct'}) { + $self->set_remedian($runtime); + $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); + ::debug("run", "Timeout: $self->{'delta_time'}s "); + } +} + sub process_timeouts { # Check if there was a timeout my $self = shift; - # @Global::timeout is sorted by timeout + # $self->{'queue'} is sorted by start time while (@{$self->{'queue'}}) { my $job = $self->{'queue'}[0]; - if($job->timedout()) { + if($job->endtime()) { + # Job already finished. No need to timeout the job + # This could be because of --keep-order + shift @{$self->{'queue'}}; + } elsif($job->timedout($self->{'delta_time'})) { # Need to shift off queue before kill - # because kill calls usleep -> process_timeouts + # because kill calls usleep that calls process_timeouts shift @{$self->{'queue'}}; $job->kill(); } else { - # Because they are sorted by timeout + # Because they are sorted by start time the rest are later last; } } @@ -4960,24 +8978,7 @@ sub process_timeouts { sub insert { my $self = shift; my $in = shift; - my $lower = 0; - my $upper = $#{$self->{'queue'}}; - my $looking = int(($lower + $upper)/2); - my $in_time = $in->timeout(); - - # Find the position between $lower and $upper - while($lower < $upper) { - if($self->{'queue'}[$looking]->timeout() < $in_time) { - # Upper half - $lower = $looking+1; - } else { - # Lower half - $upper = $looking; - } - $looking = int(($lower + $upper)/2); - } - # splice at position $looking - splice @{$self->{'queue'}}, $looking, 0, $in; + push @{$self->{'queue'}}, $in; } @@ -4997,12 +8998,12 @@ sub new { my $class = shift; my $id = shift; my $count = shift; - $id=~s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex - $id="id-".$id; # To distinguish it from a process id + $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex + $id = "id-".$id; # To distinguish it from a process id my $parallel_dir = $ENV{'HOME'}."/.parallel"; - -d $parallel_dir or mkdir $parallel_dir; + -d $parallel_dir or mkdir_or_die($parallel_dir); my $parallel_locks = $parallel_dir."/semaphores"; - -d $parallel_locks or mkdir $parallel_locks; + -d $parallel_locks or mkdir_or_die($parallel_locks); my $lockdir = "$parallel_locks/$id"; my $lockfile = $lockdir.".lock"; if($count < 1) { ::die_bug("semaphore-count: $count"); } @@ -5018,46 +9019,61 @@ sub new { }, ref($class) || $class; } +sub remove_dead_locks { + my $self = shift; + my $lockdir = $self->{'lockdir'}; + + for my $d (glob "$lockdir/*") { + $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; + my ($pid, $host) = ($1, $2); + if($host eq ::hostname()) { + if(not kill 0, $pid) { + ::debug("sem", "Dead: $d\n"); + unlink $d; + } else { + ::debug("sem", "Alive: $d\n"); + } + } + } +} + sub acquire { my $self = shift; my $sleep = 1; # 1 ms my $start_time = time; while(1) { + # Can we get a lock? $self->atomic_link_if_count_less_than() and last; - ::debug("Remove dead locks"); - my $lockdir = $self->{'lockdir'}; - for my $d (<$lockdir/*>) { - ::debug("Lock $d $lockdir\n"); - $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; - my ($pid, $host) = ($1,$2); - if($host eq ::hostname()) { - if(not kill 0, $1) { - ::debug("Dead: $d"); - unlink $d; - } else { - ::debug("Alive: $d"); - } - } - } - # try again - $self->atomic_link_if_count_less_than() and last; + $self->remove_dead_locks(); # Retry slower and slower up to 1 second $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); # Random to avoid every sleeping job waking up at the same time ::usleep(rand()*$sleep); - if(defined($::opt_timeout) and - $start_time + $::opt_timeout > time) { - # Acquire the lock anyway - if(not -e $self->{'idfile'}) { - open (A, ">", $self->{'idfile'}) or - ::die_bug("write_idfile: $self->{'idfile'}"); - close A; + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + time - $start_time > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Stealing the semaphore.\n"); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("timeout_write_idfile: $self->{'idfile'}"); + close $fh; + } + link $self->{'idfile'}, $self->{'pidfile'}; + last; + } + if($opt::semaphoretimeout < 0 + and + time - $start_time > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting.\n"); + exit(1); + last; } - link $self->{'idfile'}, $self->{'pidfile'}; - last; } } - ::debug("acquired $self->{'pid'}\n"); + ::debug("sem", "acquired $self->{'pid'}\n"); } sub release { @@ -5072,7 +9088,20 @@ sub release { } $self->unlock(); } - ::debug("released $self->{'pid'}\n"); + ::debug("run", "released $self->{'pid'}\n"); +} + +sub pid_change { + # This should do what release()+acquire() would do without having + # to re-acquire the semaphore + my $self = shift; + + my $old_pidfile = $self->{'pidfile'}; + $self->{'pid'} = $$; + $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname(); + my $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + unlink $old_pidfile; } sub atomic_link_if_count_less_than { @@ -5080,25 +9109,26 @@ sub atomic_link_if_count_less_than { my $self = shift; my $retval = 0; $self->lock(); - ::debug($self->nlinks()."<".$self->{'count'}); - if($self->nlinks() < $self->{'count'}) { - -d $self->{'lockdir'} || mkdir $self->{'lockdir'}; + my $nlinks = $self->nlinks(); + ::debug("sem","$nlinks<$self->{'count'} "); + if($nlinks < $self->{'count'}) { + -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'}); if(not -e $self->{'idfile'}) { - open (A, ">", $self->{'idfile'}) or + open (my $fh, ">", $self->{'idfile'}) or ::die_bug("write_idfile: $self->{'idfile'}"); - close A; + close $fh; } $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); } $self->unlock(); - ::debug("atomic $retval"); + ::debug("sem", "atomic $retval"); return $retval; } sub nlinks { my $self = shift; if(-e $self->{'idfile'}) { - ::debug("nlinks".((stat(_))[3])."\n"); return (stat(_))[3]; } else { return 0; @@ -5108,36 +9138,90 @@ sub nlinks { sub lock { my $self = shift; my $sleep = 100; # 100 ms - open $self->{'lockfh'}, ">", $self->{'lockfile'} - or ::die_bug("Can't open semaphore file $self->{'lockfile'}: $!"); - chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw - $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock);"; - while(not flock $self->{'lockfh'}, LOCK_EX()|LOCK_NB()) { - if ($! =~ m/Function not implemented/) { - print $Global::original_stderr - ("parallel: Warning: flock: $!"); - print "parallel: Will wait for a random while\n"; - ::usleep(rand(5000)); - last; + my $total_sleep = 0; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $locked = 0; + while(not $locked) { + if(tell($self->{'lockfh'}) == -1) { + # File not open + open($self->{'lockfh'}, ">", $self->{'lockfile'}) + or ::debug("run", "Cannot open $self->{'lockfile'}"); } - - ::debug("Cannot lock $self->{'lockfile'}"); - # TODO if timeout: last + if($self->{'lockfh'}) { + # File is open + chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw + if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { + # The file is locked: No need to retry + $locked = 1; + last; + } else { + if ($! =~ m/Function not implemented/) { + ::warning("flock: $!"); + ::warning("Will wait for a random while\n"); + ::usleep(rand(5000)); + # File cannot be locked: No need to retry + $locked = 2; + last; + } + } + } + # Locking failed in first round + # Sleep and try again $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); # Random to avoid every sleeping job waking up at the same time ::usleep(rand()*$sleep); + $total_sleep += $sleep; + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + $total_sleep/1000 > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Taking the semaphore."); + $locked = 3; + last; + } + if($opt::semaphoretimeout < 0 + and + $total_sleep/1000 > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + $locked = 4; + last; + } + } else { + if($total_sleep/1000 > 30) { + ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout."); + } + } } - ::debug("locked $self->{'lockfile'}"); + ::debug("run", "locked $self->{'lockfile'}"); } sub unlock { my $self = shift; unlink $self->{'lockfile'}; close $self->{'lockfh'}; - ::debug("unlocked\n"); + ::debug("run", "unlocked\n"); +} + +sub mkdir_or_die { + # If dir is not writable: die + my $dir = shift; + my @dir_parts = split(m:/:,$dir); + my ($ddir,$part); + while(defined ($part = shift @dir_parts)) { + $part eq "" and next; + $ddir .= "/".$part; + -d $ddir and next; + mkdir $ddir; + } + if(not -w $dir) { + ::error("Cannot write to $dir: $!\n"); + ::wait_and_exit(255); + } } # Keep perl -w happy -$::opt_x = $Semaphore::timeout = $Semaphore::wait = $::opt_shebang = -0; - +$opt::ctrlc = $opt::x = $Semaphore::timeout = $Semaphore::wait = +$opt::ignored_option = $Job::file_descriptor_warning_printed = +$Global::envdef = 0; diff --git a/sem b/sem new file mode 100755 index 0000000..9a92e45 --- /dev/null +++ b/sem @@ -0,0 +1,9227 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange +# and Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA + +# open3 used in Job::start +use IPC::Open3; +# &WNOHANG used in reaper +use POSIX qw(:sys_wait_h setsid ceil :errno_h); +# gensym used in Job::start +use Symbol qw(gensym); +# tempfile used in Job::start +use File::Temp qw(tempfile tempdir); +# mkpath used in openresultsfile +use File::Path; +# GetOptions used in get_options_from_array +use Getopt::Long; +# Used to ensure code quality +use strict; +use File::Basename; + +save_stdin_stdout_stderr(); +save_original_signal_handler(); +parse_options(); +::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n"); +my $number_of_args; +if($Global::max_number_of_args) { + $number_of_args=$Global::max_number_of_args; +} elsif ($opt::X or $opt::m or $opt::xargs) { + $number_of_args = undef; +} else { + $number_of_args = 1; +} + +my @command = @ARGV; + +my @input_source_fh; +if($opt::pipepart) { + @input_source_fh = map { open_or_exit($_) } "/dev/null"; +} else { + @input_source_fh = map { open_or_exit($_) } @opt::a; + if(not @input_source_fh and not $opt::pipe) { + @input_source_fh = (*STDIN); + } +} + +if($opt::skip_first_line) { + # Skip the first line for the first file handle + my $fh = $input_source_fh[0]; + <$fh>; +} +if($opt::header and not $opt::pipe) { + # split with colsep or \t + # $header force $colsep = \t if undef? + my $delimiter = $opt::colsep; + $delimiter ||= "\t"; + my $id = 1; + for my $fh (@input_source_fh) { + my $line = <$fh>; + chomp($line); + ::debug("init", "Delimiter: '$delimiter'"); + for my $s (split /$delimiter/o, $line) { + ::debug("init", "Colname: '$s'"); + # Replace {colname} with {2} + # TODO accept configurable short hands + # TODO how to deal with headers in {=...=} + for(@command) { + s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } +} else { + my $id = 1; + for my $fh (@input_source_fh) { + $Global::input_source_header{$id} = $id; + $id++; + } +} + +if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { + # Parallel check all hosts are up. Remove hosts that are down + filter_hosts(); +} + +if($opt::nonall or $opt::onall) { + onall(\@input_source_fh,@command); + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); +} + +# TODO --transfer foo/./bar --cleanup +# multiple --transfer and --basefile with different /./ + +$Global::JobQueue = JobQueue->new( + \@command,\@input_source_fh,$Global::ContextReplace,$number_of_args,\@Global::ret_files); + +if($opt::eta or $opt::bar or $opt::shuf) { + # Count the number of jobs or shuffle all jobs + # before starting any + $Global::JobQueue->total_jobs(); +} +if($opt::pipepart) { + @Global::cat_partials = map { pipe_part_files($_) } @opt::a; + # Unget the command as many times as there are parts + $Global::JobQueue->{'commandlinequeue'}->unget( + map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials + ); +} +for my $sshlogin (values %Global::host) { + $sshlogin->max_jobs_running(); +} + +init_run_jobs(); +my $sem; +if($Global::semaphore) { + $sem = acquire_semaphore(); +} +$SIG{TERM} = \&start_no_new_jobs; +start_more_jobs(); +if(not $opt::pipepart) { + if($opt::pipe) { + spreadstdin(); + } +} +::debug("init", "Start draining\n"); +drain_job_queue(); +::debug("init", "Done draining\n"); +reaper(); +::debug("init", "Done reaping\n"); +if($opt::pipe and @opt::a) { + for my $job (@Global::tee_jobs) { + unlink $job->fh(2,"name"); + $job->set_fh(2,"name",""); + $job->print(); + unlink $job->fh(1,"name"); + } +} +::debug("init", "Cleaning\n"); +cleanup(); +if($Global::semaphore) { + $sem->release(); +} +for(keys %Global::sshmaster) { + # If 'ssh -M's are running: kill them + kill "TERM", $_; +} +::debug("init", "Halt\n"); +if($opt::halt) { + wait_and_exit($Global::halt_exitstatus); +} else { + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); +} + +sub __PIPE_MODE__ {} + +sub pipe_part_files { + # Input: + # $file = the file to read + # Returns: + # @commands that will cat_partial each part + my ($file) = @_; + my $buf = ""; + my $header = find_header(\$buf,open_or_exit($file)); + # find positions + my @pos = find_split_positions($file,$opt::blocksize,length $header); + # Make @cat_partials + my @cat_partials = (); + for(my $i=0; $i<$#pos; $i++) { + push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]); + } + # Remote exec should look like: + # ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\) + # ssh -tt not allowed. Remote will die due to broken pipe anyway. + return @cat_partials; +} + +sub find_header { + # Input: + # $buf_ref = reference to read-in buffer + # $fh = filehandle to read from + # Uses: + # $opt::header + # $opt::blocksize + # Returns: + # $header string + my ($buf_ref, $fh) = @_; + my $header = ""; + if($opt::header) { + if($opt::header eq ":") { $opt::header = "(.*\n)"; } + # Number = number of lines + $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; + while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) { + if($$buf_ref=~s/^($opt::header)//) { + $header = $1; + last; + } + } + } + return $header; +} + +sub find_split_positions { + # Input: + # $file = the file to read + # $block = (minimal) --block-size of each chunk + # $headerlen = length of header to be skipped + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # @positions of block start/end + my($file, $block, $headerlen) = @_; + my $size = -s $file; + $block = int $block; + # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 + # The optimal dd blocksize for freebsd = 2^15..2^17 + my $dd_block_size = 131072; # 2^17 + my @pos; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $fh = ::open_or_exit($file); + push(@pos,$headerlen); + for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) { + my $buf; + seek($fh, $pos, 0) || die; + while(read($fh,substr($buf,length $buf,0),$dd_block_size)) { + if($opt::regexp) { + # If match /$recend$recstart/ => Record position + if($buf =~ /^(.*$recend)$recstart/os) { + # Start looking for next record _after_ this match + $pos += length($1); + push(@pos,$pos); + last; + } + } else { + # If match $recend$recstart => Record position + my $i = index64(\$buf,$recendrecstart); + if($i != -1) { + # Start looking for next record _after_ this match + $pos += $i + length($recendrecstart); + push(@pos,$pos); + last; + } + } + } + } + push(@pos,$size); + close $fh; + return @pos; +} + +sub cat_partial { + # Input: + # $file = the file to read + # ($start, $end, [$start2, $end2, ...]) = start byte, end byte + # Returns: + # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout + my($file, @start_end) = @_; + my($start, $i); + # Convert start_end to start_len + my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end; + return "<". shell_quote_scalar($file) . + q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } . + " @start_len"; +} + +sub spreadstdin { + # read a record + # Spawn a job and print the record to it. + # Uses: + # $opt::blocksize + # STDIN + # $opt::r + # $Global::max_lines + # $Global::max_number_of_args + # $opt::regexp + # $Global::start_no_new_jobs + # $opt::roundrobin + # %Global::running + # Returns: N/A + + my $buf = ""; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $chunk_number = 1; + my $one_time_through; + my $two_gb = 2**31-1; + my $blocksize = $opt::blocksize; + my $in = *STDIN; + my $header = find_header(\$buf,$in); + while(1) { + my $anything_written = 0; + if(not read($in,substr($buf,length $buf,0),$blocksize)) { + # End-of-file + $chunk_number != 1 and last; + # Force the while-loop once if everything was read by header reading + $one_time_through++ and last; + } + if($opt::r) { + # Remove empty lines + $buf =~ s/^\s*\n//gm; + if(length $buf == 0) { + next; + } + } + if($Global::max_lines and not $Global::max_number_of_args) { + # Read n-line records + my $n_lines = $buf =~ tr/\n/\n/; + my $last_newline_pos = rindex64(\$buf,"\n"); + while($n_lines % $Global::max_lines) { + $n_lines--; + $last_newline_pos = rindex64(\$buf,"\n",$last_newline_pos-1); + } + # Chop at $last_newline_pos as that is where n-line record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + shorten(\$buf,$last_newline_pos+1); + } elsif($opt::regexp) { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + # -L -N => (start..*?end){n*l} + my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); + while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) { + # Copy to modifiable variable + my $b = $1; + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$b, + $recstart,$recend,length $1); + } + } else { + # Find the last recend-recstart in $buf + if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { + # Copy to modifiable variable + my $b = $1; + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$b, + $recstart,$recend,length $1); + } + } + } else { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + my $i = 0; + my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); + while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } else { + # Find the last recend+recstart in $buf + my $i = rindex64(\$buf,$recendrecstart); + if($i != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + shorten(\$buf,$i); + } + } + } + if(not $anything_written and not eof($in)) { + # Nothing was written - maybe the block size < record size? + # Increase blocksize exponentially up to 2GB-1 (2GB causes problems) + if($blocksize < $two_gb) { + my $old_blocksize = $blocksize; + $blocksize = ::min(ceil($blocksize * 1.3 + 1), $two_gb); + ::warning("A record was longer than $old_blocksize. " . + "Increasing to --blocksize $blocksize\n"); + } + } + } + ::debug("init", "Done reading input\n"); + + # If there is anything left in the buffer write it + write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf); + + $Global::start_no_new_jobs ||= 1; + if($opt::roundrobin) { + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + my %incomplete_jobs = %Global::running; + my $sleep = 1; + while(keys %incomplete_jobs) { + my $something_written = 0; + for my $pid (keys %incomplete_jobs) { + my $job = $incomplete_jobs{$pid}; + if($job->stdin_buffer_length()) { + $something_written += $job->non_block_write(); + } else { + delete $incomplete_jobs{$pid} + } + } + if($something_written) { + $sleep = $sleep/2+0.001; + } + $sleep = ::reap_usleep($sleep); + } + } +} + +sub recstartrecend { + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # $recstart,$recend with default values and regexp conversion + my($recstart,$recend); + if(defined($opt::recstart) and defined($opt::recend)) { + # If both --recstart and --recend is given then both must match + $recstart = $opt::recstart; + $recend = $opt::recend; + } elsif(defined($opt::recstart)) { + # If --recstart is given it must match start of record + $recstart = $opt::recstart; + $recend = ""; + } elsif(defined($opt::recend)) { + # If --recend is given then it must match end of record + $recstart = ""; + $recend = $opt::recend; + } + + if($opt::regexp) { + # If $recstart/$recend contains '|' this should only apply to the regexp + $recstart = "(?:".$recstart.")"; + $recend = "(?:".$recend.")"; + } else { + # $recstart/$recend = printf strings (\n) + $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + } + return ($recstart,$recend); +} + +sub nindex { + # See if string is in buffer N times + # Returns: + # the position where the Nth copy is found + my ($buf_ref, $str, $n) = @_; + my $i = 0; + my $two_gb = 2**31-1; + for(1..$n) { + $i = index64($buf_ref,$str,$i+1); + if($i == -1) { last } + } + return $i; +} + +{ + my @robin_queue; + + sub round_robin_write { + # Input: + # $header_ref = ref to $header string + # $block_ref = ref to $block to be written + # $recstart = record start string + # $recend = record end string + # $endpos = end position of $block + # Uses: + # %Global::running + # Returns: + # $something_written = amount of bytes written + my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_; + my $something_written = 0; + my $block_passed = 0; + my $sleep = 1; + while(not $block_passed) { + # Continue flushing existing buffers + # until one is empty and a new block is passed + # Make a queue to spread the blocks evenly + if(not @robin_queue) { + push @robin_queue, (sort { $a->seq() <=> $b->seq() } + values %Global::running); + } + while(my $job = shift @robin_queue) { + if($job->stdin_buffer_length() > 0) { + $something_written += $job->non_block_write(); + } else { + $job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend); + $block_passed = 1; + $job->set_virgin(0); + $something_written += $job->non_block_write(); + last; + } + } + $sleep = ::reap_usleep($sleep); + } + return $something_written; + } +} + +sub index64 { + # Do index on strings > 2GB. + # index in Perl < v5.22 does not work for > 2GB + # Input: + # as index except STR which must be passed as a reference + # Output: + # as index + my $ref = shift; + my $match = shift; + my $pos = shift || 0; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # No point in doing extra work if we don't need to. + if($strlen < $block_size or $] > 5.022) { + return index($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos; + while($offset < $strlen) { + $ret = index( + substr($$ref, $offset, $block_size), + $match, $pos-$offset); + if($ret != -1) { + return $ret + $offset; + } + $offset += ($block_size - $matchlen - 1); + } + return -1; +} + +sub rindex64 { + # Do rindex on strings > 2GB. + # rindex in Perl < v5.22 does not work for > 2GB + # Input: + # as rindex except STR which must be passed as a reference + # Output: + # as rindex + my $ref = shift; + my $match = shift; + my $pos = shift; + my $block_size = 2**31-1; + my $strlen = length($$ref); + # Default: search from end + $pos = defined $pos ? $pos : $strlen; + # No point in doing extra work if we don't need to. + if($strlen < $block_size) { + return rindex($$ref, $match, $pos); + } + + my $matchlen = length($match); + my $ret; + my $offset = $pos - $block_size + $matchlen; + if($offset < 0) { + # The offset is less than a $block_size + # Set the $offset to 0 and + # Adjust block_size accordingly + $block_size = $block_size + $offset; + $offset = 0; + } + while($offset >= 0) { + $ret = rindex( + substr($$ref, $offset, $block_size), + $match); + if($ret != -1) { + return $ret + $offset; + } + $offset -= ($block_size - $matchlen - 1); + } + return -1; +} + +sub shorten { + # Do: substr($buf,0,$i) = ""; + # Some Perl versions do not support $i > 2GB, so do this in 2GB chunks + # Input: + # $buf_ref = \$buf + # $i = position to shorten to + # Returns: N/A + my ($buf_ref, $i) = @_; + my $two_gb = 2**31-1; + while($i > $two_gb) { + substr($$buf_ref,0,$two_gb) = ""; + $i -= $two_gb; + } + substr($$buf_ref,0,$i) = ""; +} + +sub write_record_to_pipe { + # Fork then + # Write record from pos 0 .. $endpos to pipe + # Input: + # $chunk_number = sequence number - to see if already run + # $header_ref = reference to header string to prepend + # $record_ref = reference to record to write + # $recstart = start string of record + # $recend = end string of record + # $endpos = position in $record_ref where record ends + # Uses: + # $Global::job_already_run + # $opt::roundrobin + # @Global::virgin_jobs + # Returns: + # Number of chunks written (0 or 1) + my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_; + if($endpos == 0) { return 0; } + if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } + if($opt::roundrobin) { + return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos); + } + # If no virgin found, backoff + my $sleep = 0.0001; # 0.01 ms - better performance on highend + while(not @Global::virgin_jobs) { + ::debug("pipe", "No virgin jobs"); + $sleep = ::reap_usleep($sleep); + # Jobs may not be started because of loadavg + # or too little time between each ssh login. + start_more_jobs(); + } + my $job = shift @Global::virgin_jobs; + # Job is no longer virgin + $job->set_virgin(0); + # We ignore the removed rec_sep which is technically wrong. + $job->add_transfersize($endpos + length $$header_ref); + if(fork()) { + # Skip + } else { + # Chop of at $endpos as we do not know how many rec_sep will + # be removed. + substr($$record_ref,$endpos,length $$record_ref) = ""; + # Remove rec_sep + if($opt::remove_rec_sep) { + Job::remove_rec_sep($record_ref,$recstart,$recend); + } + $job->write($header_ref); + $job->write($record_ref); + close $job->fh(0,"w"); + exit(0); + } + close $job->fh(0,"w"); + return 1; +} + +sub __SEM_MODE__ {} + +sub acquire_semaphore { + # Acquires semaphore. If needed: spawns to the background + # Uses: + # @Global::host + # Returns: + # The semaphore to be released when jobs is complete + $Global::host{':'} = SSHLogin->new(":"); + my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); + $sem->acquire(); + if($Semaphore::fg) { + # skip + } else { + if(fork()) { + exit(0); + } else { + # If run in the background, the PID will change + $sem->pid_change(); + } + } + return $sem; +} + +sub __PARSE_OPTIONS__ {} + +sub options_hash { + # Returns: + # %hash = the GetOptions config + return + ("debug|D=s" => \$opt::D, + "xargs" => \$opt::xargs, + "m" => \$opt::m, + "X" => \$opt::X, + "v" => \@opt::v, + "joblog=s" => \$opt::joblog, + "results|result|res=s" => \$opt::results, + "resume" => \$opt::resume, + "resume-failed|resumefailed" => \$opt::resume_failed, + "silent" => \$opt::silent, + "keep-order|keeporder|k" => \$opt::keeporder, + "no-keep-order|nokeeporder|nok|no-k" => \$opt::nokeeporder, + "group" => \$opt::group, + "g" => \$opt::retired, + "ungroup|u" => \$opt::ungroup, + "linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer, + "tmux" => \$opt::tmux, + "null|0" => \$opt::0, + "quote|q" => \$opt::q, + # Replacement strings + "parens=s" => \$opt::parens, + "rpl=s" => \@opt::rpl, + "plus" => \$opt::plus, + "I=s" => \$opt::I, + "extensionreplace|er=s" => \$opt::U, + "U=s" => \$opt::retired, + "basenamereplace|bnr=s" => \$opt::basenamereplace, + "dirnamereplace|dnr=s" => \$opt::dirnamereplace, + "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace, + "seqreplace=s" => \$opt::seqreplace, + "slotreplace=s" => \$opt::slotreplace, + "jobs|j=s" => \$opt::jobs, + "delay=f" => \$opt::delay, + "sshdelay=f" => \$opt::sshdelay, + "load=s" => \$opt::load, + "noswap" => \$opt::noswap, + "max-line-length-allowed" => \$opt::max_line_length_allowed, + "number-of-cpus" => \$opt::number_of_cpus, + "number-of-cores" => \$opt::number_of_cores, + "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores, + "shellquote|shell_quote|shell-quote" => \$opt::shellquote, + "nice=i" => \$opt::nice, + "tag" => \$opt::tag, + "tagstring|tag-string=s" => \$opt::tagstring, + "onall" => \$opt::onall, + "nonall" => \$opt::nonall, + "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts, + "sshlogin|S=s" => \@opt::sshlogin, + "sshloginfile|slf=s" => \@opt::sshloginfile, + "controlmaster|M" => \$opt::controlmaster, + "return=s" => \@opt::return, + "trc=s" => \@opt::trc, + "transfer" => \$opt::transfer, + "cleanup" => \$opt::cleanup, + "basefile|bf=s" => \@opt::basefile, + "B=s" => \$opt::retired, + "ctrlc|ctrl-c" => \$opt::ctrlc, + "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc, + "workdir|work-dir|wd=s" => \$opt::workdir, + "W=s" => \$opt::retired, + "tmpdir=s" => \$opt::tmpdir, + "tempdir=s" => \$opt::tmpdir, + "use-compress-program|compress-program=s" => \$opt::compress_program, + "use-decompress-program|decompress-program=s" => \$opt::decompress_program, + "compress" => \$opt::compress, + "tty" => \$opt::tty, + "T" => \$opt::retired, + "H=i" => \$opt::retired, + "dry-run|dryrun" => \$opt::dryrun, + "progress" => \$opt::progress, + "eta" => \$opt::eta, + "bar" => \$opt::bar, + "shuf" => \$opt::shuf, + "arg-sep|argsep=s" => \$opt::arg_sep, + "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep, + "trim=s" => \$opt::trim, + "env=s" => \@opt::env, + "recordenv|record-env" => \$opt::record_env, + "plain" => \$opt::plain, + "profile|J=s" => \@opt::profile, + "pipe|spreadstdin" => \$opt::pipe, + "robin|round-robin|roundrobin" => \$opt::roundrobin, + "recstart=s" => \$opt::recstart, + "recend=s" => \$opt::recend, + "regexp|regex" => \$opt::regexp, + "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep, + "files|output-as-files|outputasfiles" => \$opt::files, + "block|block-size|blocksize=s" => \$opt::blocksize, + "tollef" => \$opt::retired, + "gnu" => \$opt::ignored_option, + "xapply" => \$opt::xapply, + "bibtex" => \$opt::bibtex, + "wc|willcite|will-cite|nn|nonotice|no-notice" => \$opt::willcite, + # Termination and retries + "halt-on-error|halt=s" => \$opt::halt, + "memfree=s" => \$opt::memfree, + "retries=i" => \$opt::retries, + "timeout=s" => \$opt::timeout, + # xargs-compatibility - implemented, man, testsuite + "max-procs|P=s" => \$opt::jobs, + "delimiter|d=s" => \$opt::d, + "max-chars|s=i" => \$opt::max_chars, + "arg-file|a=s" => \@opt::a, + "no-run-if-empty|r" => \$opt::r, + "replace|i:s" => \$opt::i, + "E=s" => \$opt::eof, + "eof|e:s" => \$opt::eof, + "max-args|n=i" => \$opt::max_args, + "max-replace-args|N=i" => \$opt::max_replace_args, + "colsep|col-sep|C=s" => \$opt::colsep, + "help|h" => \$opt::help, + "L=f" => \$opt::L, + "max-lines|l:f" => \$opt::max_lines, + "interactive|p" => \$opt::p, + "verbose|t" => \$opt::verbose, + "version|V" => \$opt::version, + "minversion|min-version=i" => \$opt::minversion, + "show-limits|showlimits" => \$opt::show_limits, + "exit|x" => \$opt::x, + # Semaphore + "semaphore" => \$opt::semaphore, + "semaphoretimeout|st=i" => \$opt::semaphoretimeout, + "semaphorename|id=s" => \$opt::semaphorename, + "fg" => \$opt::fg, + "bg" => \$opt::bg, + "wait" => \$opt::wait, + # Shebang #!/usr/bin/parallel --shebang + "shebang|hashbang" => \$opt::shebang, + "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles, + "Y" => \$opt::retired, + "skip-first-line" => \$opt::skip_first_line, + "header=s" => \$opt::header, + "cat" => \$opt::cat, + "fifo" => \$opt::fifo, + "pipepart|pipe-part" => \$opt::pipepart, + "hgrp|hostgroup|hostgroups" => \$opt::hostgroups, + ); +} + +sub get_options_from_array { + # Run GetOptions on @array + # Input: + # $array_ref = ref to @ARGV to parse + # @keep_only = Keep only these options + # Uses: + # @ARGV + # Returns: + # true if parsing worked + # false if parsing failed + # @$array_ref is changed + my ($array_ref, @keep_only) = @_; + if(not @$array_ref) { + # Empty array: No need to look more at that + return 1; + } + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + # If @keep_only set: Ignore all values except @keep_only + my %options = options_hash(); + if(@keep_only) { + my (%keep,@dummy); + @keep{@keep_only} = @keep_only; + for my $k (grep { not $keep{$_} } keys %options) { + # Store the value of the option in @dummy + $options{$k} = \@dummy; + } + } + my $retval = GetOptions(%options); + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return $retval; +} + +sub parse_options { + # Returns: N/A + init_globals(); + @ARGV=read_options(); + + # no-* overrides * + if($opt::nokeeporder) { $opt::keeporder = undef; } + if($opt::noctrlc) { $opt::ctrlc = undef; } + + if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 + $Global::debug = $opt::D; + $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh"; + if(defined $opt::X) { $Global::ContextReplace = 1; } + if(defined $opt::silent) { $Global::verbose = 0; } + if(defined $opt::0) { $/ = "\0"; } + if(defined $opt::d) { $/ = unquote_printf($opt::d) } + if(defined $opt::tagstring) { $opt::tagstring = unquote_printf($opt::tagstring); } + if(defined $opt::p) { $Global::interactive = $opt::p; } + if(defined $opt::q) { $Global::quoting = 1; } + if(defined $opt::r) { $Global::ignore_empty = 1; } + if(defined $opt::verbose) { $Global::stderr_verbose = 1; } + parse_replacement_string_options(); + if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } + if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; } + if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); } + if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } + if(defined $opt::help) { die_usage(); } + if(defined $opt::colsep) { $Global::trim = 'lr'; } + if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; } + if(defined $opt::trim) { $Global::trim = $opt::trim; } + if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } + if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; } + if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); } + if(defined $opt::number_of_cores) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_line_length_allowed) { + print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); + } + if(defined $opt::version) { version(); wait_and_exit(0); } + if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); } + if(defined $opt::record_env) { record_env(); wait_and_exit(0); } + if(defined $opt::show_limits) { show_limits(); } + if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } + if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } + if(@opt::return) { push @Global::ret_files, @opt::return; } + if(not defined $opt::recstart and + not defined $opt::recend) { $opt::recend = "\n"; } + if(not defined $opt::blocksize) { $opt::blocksize = "1M"; } + $opt::blocksize = multiply_binary_prefix($opt::blocksize); + if($opt::blocksize > 2**31-1) { + warning("--blocksize >= 2G causes problems. Using 2G-1\n"); + $opt::blocksize = 2**31-1; + } + $opt::memfree = multiply_binary_prefix($opt::memfree); + if(defined $opt::controlmaster) { $opt::noctrlc = 1; } + if(defined $opt::halt and + $opt::halt =~ /%/) { $opt::halt /= 100; } + if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) { + ::error("--timeout must be seconds or percentage\n"); + wait_and_exit(255); + } + if(defined $opt::minversion) { + print $Global::version,"\n"; + if($Global::version < $opt::minversion) { + wait_and_exit(255); + } else { + wait_and_exit(0); + } + } + if(not defined $opt::delay) { + # Set --delay to --sshdelay if not set + $opt::delay = $opt::sshdelay; + } + if($opt::compress_program) { + $opt::compress = 1; + $opt::decompress_program ||= $opt::compress_program." -dc"; + } + if($opt::compress) { + my ($compress, $decompress) = find_compression_program(); + $opt::compress_program ||= $compress; + $opt::decompress_program ||= $decompress; + } + if(defined $opt::nonall) { + # Append a dummy empty argument + push @ARGV, $Global::arg_sep, ""; + } + if(defined $opt::tty) { + # Defaults for --tty: -j1 -u + # Can be overridden with -jXXX -g + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if(not defined $opt::group) { + $opt::ungroup = 1; + } + } + if(@opt::trc) { + push @Global::ret_files, @opt::trc; + $opt::transfer = 1; + $opt::cleanup = 1; + } + if(defined $opt::max_lines) { + if($opt::max_lines eq "-0") { + # -l -0 (swallowed -0) + $opt::max_lines = 1; + $opt::0 = 1; + $/ = "\0"; + } elsif ($opt::max_lines == 0) { + # If not given (or if 0 is given) => 1 + $opt::max_lines = 1; + } + $Global::max_lines = $opt::max_lines; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + + # Read more than one arg at a time (-L, -N) + if(defined $opt::L) { + $Global::max_lines = $opt::L; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + if(defined $opt::max_replace_args) { + $Global::max_number_of_args = $opt::max_replace_args; + $Global::ContextReplace = 1; + } + if((defined $opt::L or defined $opt::max_replace_args) + and + not ($opt::xargs or $opt::m)) { + $Global::ContextReplace = 1; + } + if(defined $opt::tag and not defined $opt::tagstring) { + # Default = {} + $opt::tagstring = $Global::parensleft.$Global::parensright; + } + if(defined $opt::pipepart and + (defined $opt::L or defined $opt::max_lines + or defined $opt::max_replace_args)) { + ::error("--pipepart is incompatible with --max-replace-args, ", + "--max-lines, and -L.\n"); + wait_and_exit(255); + } + if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) { + # Deal with ::: and :::: + @ARGV=read_args_from_command_line(); + } + parse_semaphore(); + + if(defined $opt::eta) { + $opt::progress = $opt::eta; + } + if(defined $opt::bar) { + $opt::progress = $opt::bar; + } + if(defined $opt::retired) { + ::error("-g has been retired. Use --group.\n"); + ::error("-B has been retired. Use --bf.\n"); + ::error("-T has been retired. Use --tty.\n"); + ::error("-U has been retired. Use --er.\n"); + ::error("-W has been retired. Use --wd.\n"); + ::error("-Y has been retired. Use --shebang.\n"); + ::error("-H has been retired. Use --halt.\n"); + ::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n"); + ::wait_and_exit(255); + } + citation_notice(); + + parse_sshlogin(); + parse_env_var(); + + if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { + # As we do not know the max line length on the remote machine + # long commands generated by xargs may fail + # If $opt::max_replace_args is set, it is probably safe + ::warning("Using -X or -m with --sshlogin may fail.\n"); + } + + if(not defined $opt::jobs) { + $opt::jobs = "100%"; + } + open_joblog(); +} + +sub init_globals { + # Defaults: + $Global::version = 20150522; + $Global::progname = 'parallel'; + $Global::infinity = 2**31; + $Global::debug = 0; + $Global::verbose = 0; + $Global::quoting = 0; + # Read only table with default --rpl values + %Global::replace = + ( + '{}' => '', + '{#}' => '1 $_=$job->seq()', + '{%}' => '1 $_=$job->slot()', + '{/}' => 's:.*/::', + '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);', + '{/.}' => 's:.*/::; s:\.[^/.]+$::;', + '{.}' => 's:\.[^/.]+$::', + ); + %Global::plus = + ( + # {} = {+/}/{/} + # = {.}.{+.} = {+/}/{/.}.{+.} + # = {..}.{+..} = {+/}/{/..}.{+..} + # = {...}.{+...} = {+/}/{/...}.{+...} + '{+/}' => 's:/[^/]*$::', + '{+.}' => 's:.*\.::', + '{+..}' => 's:.*\.([^.]*\.):$1:', + '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:', + '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::', + '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', + '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::', + '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', + ); + # Modifiable copy of %Global::replace + %Global::rpl = %Global::replace; + $/ = "\n"; + $Global::ignore_empty = 0; + $Global::interactive = 0; + $Global::stderr_verbose = 0; + $Global::default_simultaneous_sshlogins = 9; + $Global::exitstatus = 0; + $Global::halt_exitstatus = 0; + $Global::arg_sep = ":::"; + $Global::arg_file_sep = "::::"; + $Global::trim = 'n'; + $Global::max_jobs_running = 0; + $Global::job_already_run = ''; + $ENV{'TMPDIR'} ||= "/tmp"; + if(not $ENV{HOME}) { + # $ENV{HOME} is sometimes not set if called from PHP + ::warning("\$HOME not set. Using /tmp\n"); + $ENV{HOME} = "/tmp"; + } +} + +sub parse_replacement_string_options { + # Deal with --rpl + # Uses: + # %Global::rpl + # $Global::parensleft + # $Global::parensright + # $opt::parens + # $Global::parensleft + # $Global::parensright + # $opt::plus + # %Global::plus + # $opt::I + # $opt::U + # $opt::i + # $opt::basenamereplace + # $opt::dirnamereplace + # $opt::seqreplace + # $opt::slotreplace + # $opt::basenameextensionreplace + + sub rpl { + # Modify %Global::rpl + # Replace $old with $new + my ($old,$new) = @_; + if($old ne $new) { + $Global::rpl{$new} = $Global::rpl{$old}; + delete $Global::rpl{$old}; + } + } + my $parens = "{==}"; + if(defined $opt::parens) { $parens = $opt::parens; } + my $parenslen = 0.5*length $parens; + $Global::parensleft = substr($parens,0,$parenslen); + $Global::parensright = substr($parens,$parenslen); + if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } + if(defined $opt::I) { rpl('{}',$opt::I); } + if(defined $opt::U) { rpl('{.}',$opt::U); } + if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } + if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } + if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } + if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } + if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } + if(defined $opt::basenameextensionreplace) { + rpl('{/.}',$opt::basenameextensionreplace); + } + for(@opt::rpl) { + # Create $Global::rpl entries for --rpl options + # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" + my ($shorthand,$long) = split/ /,$_,2; + $Global::rpl{$shorthand} = $long; + } +} + +sub parse_semaphore { + # Semaphore defaults + # Must be done before computing number of processes and max_line_length + # because when running as a semaphore GNU Parallel does not read args + # Uses: + # $opt::semaphore + # $Global::semaphore + # $opt::semaphoretimeout + # $Semaphore::timeout + # $opt::semaphorename + # $Semaphore::name + # $opt::fg + # $Semaphore::fg + # $opt::wait + # $Semaphore::wait + # $opt::bg + # @opt::a + # @Global::unget_argv + # $Global::default_simultaneous_sshlogins + # $opt::jobs + # $Global::interactive + $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' + if(defined $opt::semaphore) { $Global::semaphore = 1; } + if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } + if(defined $opt::semaphorename) { $Global::semaphore = 1; } + if(defined $opt::fg) { $Global::semaphore = 1; } + if(defined $opt::bg) { $Global::semaphore = 1; } + if(defined $opt::wait) { $Global::semaphore = 1; } + if($Global::semaphore) { + # A semaphore does not take input from neither stdin nor file + @opt::a = ("/dev/null"); + push(@Global::unget_argv, [Arg->new("")]); + $Semaphore::timeout = $opt::semaphoretimeout || 0; + if(defined $opt::semaphorename) { + $Semaphore::name = $opt::semaphorename; + } else { + $Semaphore::name = `tty`; + chomp $Semaphore::name; + } + $Semaphore::fg = $opt::fg; + $Semaphore::wait = $opt::wait; + $Global::default_simultaneous_sshlogins = 1; + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if($Global::interactive and $opt::bg) { + ::error("Jobs running in the ". + "background cannot be interactive.\n"); + ::wait_and_exit(255); + } + } +} + +sub record_env { + # Record current %ENV-keys in ~/.parallel/ignored_vars + # Returns: N/A + my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars"; + if(open(my $vars_fh, ">", $ignore_filename)) { + print $vars_fh map { $_,"\n" } keys %ENV; + } else { + ::error("Cannot write to $ignore_filename.\n"); + ::wait_and_exit(255); + } +} + +sub parse_env_var { + # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen + # + # Bash functions must be parsed to export them remotely + # Pre-shellshock style bash function: + # myfunc=() {... + # Post-shellshock style bash function (v1): + # BASH_FUNC_myfunc()=() {... + # Post-shellshock style bash function (v2): + # BASH_FUNC_myfunc%%=() {... + # + # Uses: + # $Global::envvar = eval string that will set variables in both bash and csh + # $Global::envwarn = If functions are used: Give warning in csh + # $Global::envvarlen = length of $Global::envvar + # @opt::env + # $Global::shell + # %ENV + # Returns: N/A + $Global::envvar = ""; + + $Global::envvarlen = length $Global::envvar; +} + +sub open_joblog { + # Open joblog as specified by --joblog + # Uses: + # $opt::resume + # $opt::resume_failed + # $opt::joblog + # $opt::results + # $Global::job_already_run + # %Global::fd + my $append = 0; + if(($opt::resume or $opt::resume_failed) + and + not ($opt::joblog or $opt::results)) { + ::error("--resume and --resume-failed require --joblog or --results.\n"); + ::wait_and_exit(255); + } + if($opt::joblog) { + if($opt::resume || $opt::resume_failed) { + if(open(my $joblog_fh, "<", $opt::joblog)) { + # Read the joblog + $append = <$joblog_fh>; # If there is a header: Open as append later + my $joblog_regexp; + if($opt::resume_failed) { + # Make a regexp that only matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + } else { + # Just match the job number + $joblog_regexp='^(\d+)'; + } + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + } elsif(not /\d+\s+[^\s]+\s+([-0-9.]+\s+){6}/) { + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + close $joblog_fh; + } + } + if($append) { + # Append to joblog + if(not open($Global::joblog, ">>", $opt::joblog)) { + ::error("Cannot append to --joblog $opt::joblog.\n"); + ::wait_and_exit(255); + } + } else { + if($opt::joblog eq "-") { + # Use STDOUT as joblog + $Global::joblog = $Global::fd{1}; + } elsif(not open($Global::joblog, ">", $opt::joblog)) { + # Overwrite the joblog + ::error("Cannot write to --joblog $opt::joblog.\n"); + ::wait_and_exit(255); + } + print $Global::joblog + join("\t", "Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command" + ). "\n"; + } + } +} + +sub find_compression_program { + # Find a fast compression program + # Returns: + # $compress_program = compress program with options + # $decompress_program = decompress program with options + + # Search for these. Sorted by speed on 16 core + # parallel -j1 --joblog jl --arg-sep , parallel --compress-program \'{3}" "-{2}\' cat ::: gz '>'/dev/null , 1 2 3 , {1..3} , lz4 lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2 + # sort -nk4 jl + my @prg = qw(lz4 pigz lzop plzip pbzip2 pxz gzip lzma xz bzip2 lzip); + for my $p (@prg) { + if(which($p)) { + return ("$p -c -1","$p -dc"); + } + } + # Fall back to cat + return ("cat","cat"); +} + + +sub read_options { + # Read options from command line, profile and $PARALLEL + # Uses: + # $opt::shebang_wrap + # $opt::shebang + # @ARGV + # $opt::plain + # @opt::profile + # $ENV{'HOME'} + # $ENV{'PARALLEL'} + # Returns: + # @ARGV_no_opt = @ARGV without --options + + # This must be done first as this may exec myself + if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or + $ARGV[0] =~ /^--shebang-?wrap/ or + $ARGV[0] =~ /^--hashbang/)) { + # Program is called from #! line in script + # remove --shebang-wrap if it is set + $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); + # remove --shebang if it is set + $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); + # remove --hashbang if it is set + $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); + if($opt::shebang) { + my $argfile = shell_quote_scalar(pop @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --skip-first-line -a $argfile @ARGV"; + } + if($opt::shebang_wrap) { + my @options; + my @parser; + if ($^O eq 'freebsd') { + # FreeBSD's #! puts different values in @ARGV than Linux' does. + my @nooptions = @ARGV; + get_options_from_array(\@nooptions); + while($#ARGV > $#nooptions) { + push @options, shift @ARGV; + } + while(@ARGV and $ARGV[0] ne ":::") { + push @parser, shift @ARGV; + } + if(@ARGV and $ARGV[0] eq ":::") { + shift @ARGV; + } + } else { + @options = shift @ARGV; + } + my $script = shell_quote_scalar(shift @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV"; + } + } + + Getopt::Long::Configure("bundling","require_order"); + my @ARGV_copy = @ARGV; + # Check if there is a --profile to set @opt::profile + get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); + my @ARGV_profile = (); + my @ARGV_env = (); + if(not $opt::plain) { + # Add options from .parallel/config and other profiles + my @config_profiles = ( + "/etc/parallel/config", + $ENV{'HOME'}."/.parallel/config", + $ENV{'HOME'}."/.parallelrc"); + my @profiles = @config_profiles; + if(@opt::profile) { + # --profile overrides default profiles + @profiles = (); + for my $profile (@opt::profile) { + if(-r $profile) { + push @profiles, $profile; + } else { + push @profiles, $ENV{'HOME'}."/.parallel/".$profile; + } + } + } + for my $profile (@profiles) { + if(-r $profile) { + open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile"); + while(<$in_fh>) { + /^\s*\#/ and next; + chomp; + push @ARGV_profile, shellwords($_); + } + close $in_fh; + } else { + if(grep /^$profile$/, @config_profiles) { + # config file is not required to exist + } else { + ::error("$profile not readable.\n"); + wait_and_exit(255); + } + } + } + # Add options from shell variable $PARALLEL + if($ENV{'PARALLEL'}) { + @ARGV_env = shellwords($ENV{'PARALLEL'}); + } + } + Getopt::Long::Configure("bundling","require_order"); + get_options_from_array(\@ARGV_profile) || die_usage(); + get_options_from_array(\@ARGV_env) || die_usage(); + get_options_from_array(\@ARGV) || die_usage(); + + # Prepend non-options to @ARGV (such as commands like 'nice') + unshift @ARGV, @ARGV_profile, @ARGV_env; + return @ARGV; +} + +sub read_args_from_command_line { + # Arguments given on the command line after: + # ::: ($Global::arg_sep) + # :::: ($Global::arg_file_sep) + # Removes the arguments from @ARGV and: + # - puts filenames into -a + # - puts arguments into files and add the files to -a + # Input: + # @::ARGV = command option ::: arg arg arg :::: argfiles + # Uses: + # $Global::arg_sep + # $Global::arg_file_sep + # $opt::internal_pipe_means_argfiles + # $opt::pipe + # @opt::a + # Returns: + # @argv_no_argsep = @::ARGV without ::: and :::: and following args + my @new_argv = (); + for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_file_sep) { + my $group = $arg; # This group of arguments is args or argfiles + my @group; + while(defined ($arg = shift @ARGV)) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_file_sep) { + # exit while loop if finding new separator + last; + } else { + # If not hitting ::: or :::: + # Append it to the group + push @group, $arg; + } + } + + if($group eq $Global::arg_file_sep + or ($opt::internal_pipe_means_argfiles and $opt::pipe) + ) { + # Group of file names on the command line. + # Append args into -a + push @opt::a, @group; + } elsif($group eq $Global::arg_sep) { + # Group of arguments on the command line. + # Put them into a file. + # Create argfile + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + print $outfh map { $_,$/ } @group; + seek $outfh, 0, 0; + exit_if_disk_full(); + # Append filehandle to -a + push @opt::a, $outfh; + } else { + ::die_bug("Unknown command line group: $group"); + } + if(defined($arg)) { + # $arg is ::: or :::: + redo; + } else { + # $arg is undef -> @ARGV empty + last; + } + } + push @new_argv, $arg; + } + # Output: @ARGV = command to run with options + return @new_argv; +} + +sub cleanup { + # Returns: N/A + unlink keys %Global::unlink; + map { rmdir $_ } keys %Global::unlink; + if(@opt::basefile) { cleanup_basefile(); } +} + +sub __QUOTING_ARGUMENTS_FOR_SHELL__ {} + +sub shell_quote { + # Input: + # @strings = strings to be quoted + # Output: + # @shell_quoted_strings = string quoted with \ as needed by the shell + my @strings = (@_); + for my $a (@strings) { + $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \' + } + return wantarray ? @strings : "@strings"; +} + +sub shell_quote_empty { + # Inputs: + # @strings = strings to be quoted + # Returns: + # @quoted_strings = empty strings quoted as ''. + my @strings = shell_quote(@_); + for my $a (@strings) { + if($a eq "") { + $a = "''"; + } + } + return wantarray ? @strings : "@strings"; +} + +sub shell_quote_scalar { + # Quote the string so shell will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted with \ as needed by the shell + my $a = $_[0]; + if(defined $a) { + # Solaris sh wants ^ quoted. + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + $a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\^\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go; + $a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \' + } + return $a; +} + +sub shell_quote_file { + # Quote the string so shell will not expand any special chars and prepend ./ if needed + # Input: + # $filename = filename to be shell quoted + # Returns: + # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed + my $a = shell_quote_scalar(shift); + if(defined $a) { + if($a =~ m:^/: or $a =~ m:^\./:) { + # /abs/path or ./rel/path => skip + } else { + # rel/path => ./rel/path + $a = "./".$a; + } + } + return $a; +} + +sub shellwords { + # Input: + # $string = shell line + # Returns: + # @shell_words = $string split into words as shell would do + $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; + return Text::ParseWords::shellwords(@_); +} + +sub perl_quote_scalar { + # Quote the string so perl's eval will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted with \ as needed by perl's eval + my $a = $_[0]; + if(defined $a) { + $a =~ s/[\\\"\$\@]/\\$&/go; + } + return $a; +} + +sub unquote_printf { + # Convert \t \n \r \000 \0 + $_ = shift; + s/\\t/\t/g; + s/\\n/\n/g; + s/\\r/\r/g; + s/\\(\d\d\d)/eval 'sprintf "\\'.$1.'"'/ge; + s/\\(\d)/eval 'sprintf "\\'.$1.'"'/ge; + return $_; +} + +sub __FILEHANDLES__ {} + + +sub save_stdin_stdout_stderr { + # Remember the original STDIN, STDOUT and STDERR + # and file descriptors opened by the shell (e.g. 3>/tmp/foo) + # Uses: + # %Global::fd + # $Global::original_stderr + # $Global::original_stdin + # Returns: N/A + + # Find file descriptors that are already opened (by the shell) + for my $fdno (1..61) { + # /dev/fd/62 and above are used by bash for <(cmd) + my $fh; + # 2-argument-open is used to be compatible with old perl 5.8.0 + # bug #43570: Perl 5.8.0 creates 61 files + if(open($fh,">&=$fdno")) { + $Global::fd{$fdno}=$fh; + } + } + open $Global::original_stderr, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::status_fd, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::original_stdin, "<&", "STDIN" or + ::die_bug("Can't dup STDIN: $!"); +} + +sub enough_file_handles { + # Check that we have enough filehandles available for starting + # another job + # Uses: + # $opt::ungroup + # %Global::fd + # Returns: + # 1 if ungrouped (thus not needing extra filehandles) + # 0 if too few filehandles + # 1 if enough filehandles + if(not $opt::ungroup) { + my %fh; + my $enough_filehandles = 1; + # perl uses 7 filehandles for something? + # open3 uses 2 extra filehandles temporarily + # We need a filehandle for each redirected file descriptor + # (normally just STDOUT and STDERR) + for my $i (1..(7+2+keys %Global::fd)) { + $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); + } + for (values %fh) { close $_; } + return $enough_filehandles; + } else { + # Ungrouped does not need extra file handles + return 1; + } +} + +sub open_or_exit { + # Open a file name or exit if the file cannot be opened + # Inputs: + # $file = filehandle or filename to open + # Uses: + # $Global::stdin_in_opt_a + # $Global::original_stdin + # Returns: + # $fh = file handle to read-opened file + my $file = shift; + if($file eq "-") { + $Global::stdin_in_opt_a = 1; + return ($Global::original_stdin || *STDIN); + } + if(ref $file eq "GLOB") { + # This is an open filehandle + return $file; + } + my $fh = gensym; + if(not open($fh, "<", $file)) { + ::error("Cannot open input file `$file': No such file or directory.\n"); + wait_and_exit(255); + } + return $fh; +} + +sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {} + +# Variable structure: +# +# $Global::running{$pid} = Pointer to Job-object +# @Global::virgin_jobs = Pointer to Job-object that have received no input +# $Global::host{$sshlogin} = Pointer to SSHLogin-object +# $Global::total_running = total number of running jobs +# $Global::total_started = total jobs started +# $Global::tty_taken = is the tty in use by a running job? +# $Global::max_procs_file = filename if --jobs is given a filename +# $Global::JobQueue = JobQueue object for the queue of jobs +# $Global::timeoutq = queue of times where jobs timeout +# $Global::newest_job = Job object of the most recent job started +# $Global::newest_starttime = timestamp of $Global::newest_job +# @Global::sshlogin +# $Global::minimal_command_line_length = minimum length supported by all sshlogins +# $Global::start_no_new_jobs = should more jobs be started? +# $Global::original_stderr = file handle for STDERR when the program started +# $Global::total_started = total number of jobs started +# $Global::envvar = string to set the shell environment variables +# $Global::joblog = filehandle of joblog +# $Global::debug = Is debugging on? +# $Global::exitstatus = status code of GNU Parallel +# $Global::quoting = quote the command to run + +sub init_run_jobs { + # Set Global variables and progress signal handlers + # Do the copying of basefiles + # Returns: N/A + $Global::total_running = 0; + $Global::total_started = 0; + $Global::tty_taken = 0; + $SIG{USR1} = \&list_running_jobs; + $SIG{USR2} = \&toggle_progress; + if(@opt::basefile) { setup_basefile(); } +} + +{ + my $last_time; + my %last_mtime; + my $max_procs_file_last_mod; + + sub changed_procs_file { + # If --jobs is a file and it is modfied: + # Force recomputing of max_jobs_running for each $sshlogin + # Uses: + # $Global::max_procs_file + # %Global::host + # Returns: N/A + if($Global::max_procs_file) { + # --jobs filename + my $mtime = (stat($Global::max_procs_file))[9]; + $max_procs_file_last_mod ||= 0; + if($mtime > $max_procs_file_last_mod) { + # file changed: Force re-computing max_jobs_running + $max_procs_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_jobs_running(undef); + } + } + } + } + + sub changed_sshloginfile { + # If --slf is changed: + # reload --slf + # filter_hosts + # setup_basefile + # Uses: + # @opt::sshloginfile + # @Global::sshlogin + # %Global::host + # $opt::filter_hosts + # Returns: N/A + if(@opt::sshloginfile) { + # Is --sshloginfile changed? + for my $slf (@opt::sshloginfile) { + my $actual_file = expand_slf_shorthand($slf); + my $mtime = (stat($actual_file))[9]; + $last_mtime{$actual_file} ||= $mtime; + if($mtime - $last_mtime{$actual_file} > 1) { + ::debug("run","--sshloginfile $actual_file changed. reload\n"); + $last_mtime{$actual_file} = $mtime; + # Reload $slf + # Empty sshlogins + @Global::sshlogin = (); + for (values %Global::host) { + # Don't start new jobs on any host + # except the ones added back later + $_->set_max_jobs_running(0); + } + # This will set max_jobs_running on the SSHlogins + read_sshloginfile($actual_file); + parse_sshlogin(); + $opt::filter_hosts and filter_hosts(); + setup_basefile(); + } + } + } + } + + sub start_more_jobs { + # Run start_another_job() but only if: + # * not $Global::start_no_new_jobs set + # * not JobQueue is empty + # * not load on server is too high + # * not server swapping + # * not too short time since last remote login + # Uses: + # %Global::host + # $Global::start_no_new_jobs + # $Global::JobQueue + # $opt::pipe + # $opt::load + # $opt::noswap + # $opt::delay + # $Global::newest_starttime + # Returns: + # $jobs_started = number of jobs started + my $jobs_started = 0; + my $jobs_started_this_round = 0; + if($Global::start_no_new_jobs) { + return $jobs_started; + } + if(time - ($last_time||0) > 1) { + # At most do this every second + $last_time = time; + changed_procs_file(); + changed_sshloginfile(); + } + do { + $jobs_started_this_round = 0; + # This will start 1 job on each --sshlogin (if possible) + # thus distribute the jobs on the --sshlogins round robin + for my $sshlogin (values %Global::host) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more jobs in the queue + last; + } + debug("run", "Running jobs before on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), "\n"); + if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { + if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) { + # It has been too short since last start + next; + } + if($opt::load and $sshlogin->loadavg_too_high()) { + # The load is too high or unknown + next; + } + if($opt::noswap and $sshlogin->swapping()) { + # The server is swapping + next; + } + if($opt::memfree and $sshlogin->memfree() < $opt::memfree) { + # The server has not enough mem free + ::debug("mem", "Not starting job: not enough mem\n"); + next; + } + if($sshlogin->too_fast_remote_login()) { + # It has been too short since + next; + } + debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(), + " out of ", $sshlogin->max_jobs_running(), + " jobs running. Start another.\n"); + if(start_another_job($sshlogin) == 0) { + # No more jobs to start on this $sshlogin + debug("run","No jobs started on ", $sshlogin->string(), "\n"); + next; + } + $sshlogin->inc_jobs_running(); + $sshlogin->set_last_login_at(::now()); + $jobs_started++; + $jobs_started_this_round++; + } + debug("run","Running jobs after on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), " of ", + $sshlogin->max_jobs_running(), "\n"); + } + } while($jobs_started_this_round); + + return $jobs_started; + } +} + +{ + my $no_more_file_handles_warned; + + sub start_another_job { + # If there are enough filehandles + # and JobQueue not empty + # and not $job is in joblog + # Then grab a job from Global::JobQueue, + # start it at sshlogin + # mark it as virgin_job + # Inputs: + # $sshlogin = the SSHLogin to start the job on + # Uses: + # $Global::JobQueue + # $opt::pipe + # $opt::results + # $opt::resume + # @Global::virgin_jobs + # Returns: + # 1 if another jobs was started + # 0 otherwise + my $sshlogin = shift; + # Do we have enough file handles to start another job? + if(enough_file_handles()) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more commands to run + debug("start", "Not starting: JobQueue empty\n"); + return 0; + } else { + my $job; + # Skip jobs already in job log + # Skip jobs already in results + do { + $job = get_job_with_sshlogin($sshlogin); + if(not defined $job) { + # No command available for that sshlogin + debug("start", "Not starting: no jobs available for ", + $sshlogin->string(), "\n"); + return 0; + } + } while ($job->is_already_in_joblog() + or + ($opt::results and $opt::resume and $job->is_already_in_results())); + debug("start", "Command to run on '", $job->sshlogin()->string(), "': '", + $job->replaced(),"'\n"); + if($job->start()) { + if($opt::pipe) { + push(@Global::virgin_jobs,$job); + } + debug("start", "Started as seq ", $job->seq(), + " pid:", $job->pid(), "\n"); + return 1; + } else { + # Not enough processes to run the job. + # Put it back on the queue. + $Global::JobQueue->unget($job); + # Count down the number of jobs to run for this SSHLogin. + my $max = $sshlogin->max_jobs_running(); + if($max > 1) { $max--; } else { + ::error("No more processes: cannot run a single job. Something is wrong.\n"); + ::wait_and_exit(255); + } + $sshlogin->set_max_jobs_running($max); + # Sleep up to 300 ms to give other processes time to die + ::usleep(rand()*300); + ::warning("No more processes: ", + "Decreasing number of running jobs to $max. ", + "Raising ulimit -u or /etc/security/limits.conf may help.\n"); + return 0; + } + } + } else { + # No more file handles + $no_more_file_handles_warned++ or + ::warning("No more file handles. ", + "Raising ulimit -n or /etc/security/limits.conf may help.\n"); + return 0; + } + } +} + +sub init_progress { + # Uses: + # $opt::bar + # Returns: + # list of computers for progress output + $|=1; + if($opt::bar) { + return("",""); + } + my %progress = progress(); + return ("\nComputers / CPU cores / Max jobs to run\n", + $progress{'workerlist'}); +} + +sub drain_job_queue { + # Uses: + # $opt::progress + # $Global::total_running + # $Global::max_jobs_running + # %Global::running + # $Global::JobQueue + # %Global::host + # $Global::start_no_new_jobs + # Returns: N/A + if($opt::progress) { + ::status(init_progress()); + } + my $last_header = ""; + my $sleep = 0.2; + do { + while($Global::total_running > 0) { + debug($Global::total_running, "==", scalar + keys %Global::running," slots: ", $Global::max_jobs_running); + if($opt::pipe) { + # When using --pipe sometimes file handles are not closed properly + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + } + if($opt::progress) { + my %progress = progress(); + if($last_header ne $progress{'header'}) { + ::status("\n", $progress{'header'}, "\n"); + $last_header = $progress{'header'}; + } + ::status("\r",$progress{'status'}); + } + if($Global::total_running < $Global::max_jobs_running + and not $Global::JobQueue->empty()) { + # These jobs may not be started because of loadavg + # or too little time between each ssh login. + if(start_more_jobs() > 0) { + # Exponential back-on if jobs were started + $sleep = $sleep/2+0.001; + } + } + # Exponential back-off sleeping + $sleep = ::reap_usleep($sleep); + } + if(not $Global::JobQueue->empty()) { + # These jobs may not be started: + # * because there the --filter-hosts has removed all + if(not %Global::host) { + ::error("There are no hosts left to run on.\n"); + ::wait_and_exit(255); + } + # * because of loadavg + # * because of too little time between each ssh login. + start_more_jobs(); + $sleep = ::reap_usleep($sleep); + if($Global::max_jobs_running == 0) { + ::warning("There are no job slots available. Increase --jobs.\n"); + } + } + } while ($Global::total_running > 0 + or + not $Global::start_no_new_jobs and not $Global::JobQueue->empty()); + if($opt::progress) { + my %progress = progress(); + ::status("\r", $progress{'status'}, "\n"); + } +} + +sub toggle_progress { + # Turn on/off progress view + # Uses: + # $opt::progress + # Returns: N/A + $opt::progress = not $opt::progress; + if($opt::progress) { + ::status(init_progress()); + } +} + +sub progress { + # Uses: + # $opt::bar + # $opt::eta + # %Global::host + # $Global::total_started + # Returns: + # $workerlist = list of workers + # $header = that will fit on the screen + # $status = message that will fit on the screen + if($opt::bar) { + return ("workerlist" => "", "header" => "", "status" => bar()); + } + my $eta = ""; + my ($status,$header)=("",""); + if($opt::eta) { + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + } + my $termcols = terminal_columns(); + my @workers = sort keys %Global::host; + my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; + my $workerno = 1; + my %workerno = map { ($_=>$workerno++) } @workers; + my $workerlist = ""; + for my $w (@workers) { + $workerlist .= + $workerno{$w}.":".$sshlogin{$w} ." / ". + ($Global::host{$w}->ncpus() || "-")." / ". + $Global::host{$w}->max_jobs_running()."\n"; + } + $status = "x"x($termcols+1); + # Select an output format that will fit on a single line + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + my $completed = ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $sshlogin{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } + } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { + my $completed = ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $workerno{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d/%d%%", + $sshlogin{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d/%d%%", + $workerno{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $workerno{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d", + $sshlogin{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d", + $workerno{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + return ("workerlist" => $workerlist, "header" => $header, "status" => $status); +} + +{ + my ($total, $first_completed, $smoothed_avg_time); + + sub compute_eta { + # Calculate important numbers for ETA + # Returns: + # $total = number of jobs in total + # $completed = number of jobs completed + # $left = number of jobs left + # $pctcomplete = percent of jobs completed + # $avgtime = averaged time + # $eta = smoothed eta + $total ||= $Global::JobQueue->total_jobs(); + my $completed = 0; + for(values %Global::host) { $completed += $_->jobs_completed() } + my $left = $total - $completed; + if(not $completed) { + return($total, $completed, $left, 0, 0, 0); + } + my $pctcomplete = $completed / $total; + $first_completed ||= time; + my $timepassed = (time - $first_completed); + my $avgtime = $timepassed / $completed; + $smoothed_avg_time ||= $avgtime; + # Smooth the eta so it does not jump wildly + $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + + $pctcomplete * $avgtime; + my $eta = int($left * $smoothed_avg_time); + return($total, $completed, $left, $pctcomplete, $avgtime, $eta); + } +} + +{ + my ($rev,$reset); + + sub bar { + # Return: + # $status = bar with eta, completed jobs, arg and pct + $rev ||= "\033[7m"; + $reset ||= "\033[0m"; + my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = + compute_eta(); + my $arg = $Global::newest_job ? + $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : ""; + # These chars mess up display in the terminal + $arg =~ tr/[\011-\016\033\302-\365]//d; + my $bar_text = + sprintf("%d%% %d:%d=%ds %s", + $pctcomplete*100, $completed, $left, $eta, $arg); + my $terminal_width = terminal_columns(); + my $s = sprintf("%-${terminal_width}s", + substr($bar_text." "x$terminal_width, + 0,$terminal_width)); + my $width = int($terminal_width * $pctcomplete); + substr($s,$width,0) = $reset; + my $zenity = sprintf("%-${terminal_width}s", + substr("# $eta sec $arg", + 0,$terminal_width)); + $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header + "\r" . $rev . $s . $reset; + return $s; + } +} + +{ + my ($columns,$last_column_time); + + sub terminal_columns { + # Get the number of columns of the terminal. + # Only update once per second. + # Returns: + # number of columns of the screen + if(not $columns or $last_column_time < time) { + $last_column_time = time; + $columns = $ENV{'COLUMNS'}; + if(not $columns) { + my $stty = qx{ stty -a /dev/null' }; + $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; }; + } + $columns ||= 80; + } + return $columns; + } +} + +sub get_job_with_sshlogin { + # Input: + # $sshlogin = which host should the job be run on? + # Uses: + # $opt::hostgroups + # $Global::JobQueue + # Returns: + # $job = next job object for $sshlogin if any available + my $sshlogin = shift; + my $job; + + if ($opt::hostgroups) { + my @other_hostgroup_jobs = (); + + while($job = $Global::JobQueue->get()) { + if($sshlogin->in_hostgroups($job->hostgroups())) { + # Found a job to be run on a hostgroup of this + # $sshlogin + last; + } else { + # This job was not in the hostgroups of $sshlogin + push @other_hostgroup_jobs, $job; + } + } + $Global::JobQueue->unget(@other_hostgroup_jobs); + if(not defined $job) { + # No more jobs + return undef; + } + } else { + $job = $Global::JobQueue->get(); + if(not defined $job) { + # No more jobs + ::debug("start", "No more jobs: JobQueue empty\n"); + return undef; + } + } + + my $clean_command = $job->replaced(); + if($clean_command =~ /^\s*$/) { + # Do not run empty lines + if(not $Global::JobQueue->empty()) { + return get_job_with_sshlogin($sshlogin); + } else { + return undef; + } + } + $job->set_sshlogin($sshlogin); + if($opt::retries and $clean_command and + $job->failed_here()) { + # This command with these args failed for this sshlogin + my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); + # Only look at the Global::host that have > 0 jobslots + if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host + and $job->failed_here() == $min_failures) { + # It failed the same or more times on another host: + # run it on this host + } else { + # If it failed fewer times on another host: + # Find another job to run + my $nextjob; + if(not $Global::JobQueue->empty()) { + # This can potentially recurse for all args + no warnings 'recursion'; + $nextjob = get_job_with_sshlogin($sshlogin); + } + # Push the command back on the queue + $Global::JobQueue->unget($job); + return $nextjob; + } + } + return $job; +} + +sub __REMOTE_SSH__ {} + +sub read_sshloginfiles { + # Read a list of --slf's + # Input: + # @files = files or symbolic file names to read + # Returns: N/A + for my $s (@_) { + read_sshloginfile(expand_slf_shorthand($s)); + } +} + +sub expand_slf_shorthand { + # Expand --slf shorthand into a read file name + # Input: + # $file = file or symbolic file name to read + # Returns: + # $file = actual file name to read + my $file = shift; + if($file eq "-") { + # skip: It is stdin + } elsif($file eq "..") { + $file = $ENV{'HOME'}."/.parallel/sshloginfile"; + } elsif($file eq ".") { + $file = "/etc/parallel/sshloginfile"; + } elsif(not -r $file) { + if(not -r $ENV{'HOME'}."/.parallel/".$file) { + # Try prepending ~/.parallel + ::error("Cannot open $file.\n"); + ::wait_and_exit(255); + } else { + $file = $ENV{'HOME'}."/.parallel/".$file; + } + } + return $file; +} + +sub read_sshloginfile { + # Read sshloginfile into @Global::sshlogin + # Input: + # $file = file to read + # Uses: + # @Global::sshlogin + # Returns: N/A + my $file = shift; + my $close = 1; + my $in_fh; + ::debug("init","--slf ",$file); + if($file eq "-") { + $in_fh = *STDIN; + $close = 0; + } else { + if(not open($in_fh, "<", $file)) { + # Try the filename + ::error("Cannot open $file.\n"); + ::wait_and_exit(255); + } + } + while(<$in_fh>) { + chomp; + /^\s*#/ and next; + /^\s*$/ and next; + push @Global::sshlogin, $_; + } + if($close) { + close $in_fh; + } +} + +sub parse_sshlogin { + # Parse @Global::sshlogin into %Global::host. + # Keep only hosts that are in one of the given ssh hostgroups. + # Uses: + # @Global::sshlogin + # $Global::minimal_command_line_length + # %Global::host + # $opt::transfer + # @opt::return + # $opt::cleanup + # @opt::basefile + # @opt::trc + # Returns: N/A + my @login; + if(not @Global::sshlogin) { @Global::sshlogin = (":"); } + for my $sshlogin (@Global::sshlogin) { + # Split up -S sshlogin,sshlogin + for my $s (split /,|\n/, $sshlogin) { + if ($s eq ".." or $s eq "-") { + # This may add to @Global::sshlogin - possibly bug + read_sshloginfile(expand_slf_shorthand($s)); + } else { + $s =~ s/\s*$//; + push (@login, $s); + } + } + } + $Global::minimal_command_line_length = 8_000_000; + my @allowed_hostgroups; + for my $ncpu_sshlogin_string (::uniq(@login)) { + my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); + my $sshlogin_string = $sshlogin->string(); + if($sshlogin_string eq "") { + # This is an ssh group: -S @webservers + push @allowed_hostgroups, $sshlogin->hostgroups(); + next; + } + if($Global::host{$sshlogin_string}) { + # This sshlogin has already been added: + # It is probably a host that has come back + # Set the max_jobs_running back to the original + debug("run","Already seen $sshlogin_string\n"); + if($sshlogin->{'ncpus'}) { + # If ncpus set by '#/' of the sshlogin, overwrite it: + $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); + } + $Global::host{$sshlogin_string}->set_max_jobs_running(undef); + next; + } + if($sshlogin_string eq ":") { + $sshlogin->set_maxlength(Limits::Command::max_length()); + } else { + # If all chars needs to be quoted, every other character will be \ + $sshlogin->set_maxlength(int(Limits::Command::max_length()/2)); + } + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); + $Global::host{$sshlogin_string} = $sshlogin; + } + if(@allowed_hostgroups) { + # Remove hosts that are not in these groups + while (my ($string, $sshlogin) = each %Global::host) { + if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { + delete $Global::host{$string}; + } + } + } + + # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); + if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) { + if(not remote_hosts()) { + # There are no remote hosts + if(@opt::trc) { + ::warning("--trc ignored as there are no remote --sshlogin.\n"); + } elsif (defined $opt::transfer) { + ::warning("--transfer ignored as there are no remote --sshlogin.\n"); + } elsif (@opt::return) { + ::warning("--return ignored as there are no remote --sshlogin.\n"); + } elsif (defined $opt::cleanup) { + ::warning("--cleanup ignored as there are no remote --sshlogin.\n"); + } elsif (@opt::basefile) { + ::warning("--basefile ignored as there are no remote --sshlogin.\n"); + } + } + } +} + +sub remote_hosts { + # Return sshlogins that are not ':' + # Uses: + # %Global::host + # Returns: + # list of sshlogins with ':' removed + return grep !/^:$/, keys %Global::host; +} + +sub setup_basefile { + # Transfer basefiles to each $sshlogin + # This needs to be done before first jobs on $sshlogin is run + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my $cmd = ""; + my $rsync_destdir; + my $workdir; + for my $sshlogin (values %Global::host) { + if($sshlogin->string() eq ":") { next } + for my $file (@opt::basefile) { + if($file !~ m:^/: and $opt::workdir eq "...") { + ::error("Work dir '...' will not work with relative basefiles.\n"); + ::wait_and_exit(255); + } + $workdir ||= Job->new("")->workdir(); + $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&"; + } + } + $cmd .= "wait;"; + debug("init", "basesetup: $cmd\n"); + print `$cmd`; +} + +sub cleanup_basefile { + # Remove the basefiles transferred + # Uses: + # %Global::host + # @opt::basefile + # Returns: N/A + my $cmd = ""; + my $workdir = Job->new("")->workdir(); + for my $sshlogin (values %Global::host) { + if($sshlogin->string() eq ":") { next } + for my $file (@opt::basefile) { + $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&"; + } + } + $cmd .= "wait;"; + debug("init", "basecleanup: $cmd\n"); + print `$cmd`; +} + +sub filter_hosts { + # Remove down --sshlogins from active duty. + # Find ncpus, ncores, maxlen, time-to-login for each host. + # Uses: + # %Global::host + # $Global::minimal_command_line_length + # $opt::use_cpus_instead_of_cores + # Returns: N/A + + my ($ncores_ref, $ncpus_ref, $time_to_login_ref, $maxlen_ref, + $echo_ref, $down_hosts_ref) = + parse_host_filtering(parallelized_host_filtering()); + + delete @Global::host{@$down_hosts_ref}; + @$down_hosts_ref and ::warning("Removed @$down_hosts_ref\n"); + + $Global::minimal_command_line_length = 8_000_000; + while (my ($sshlogin, $obj) = each %Global::host) { + if($sshlogin eq ":") { next } + $ncpus_ref->{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin()); + $ncores_ref->{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin()); + $time_to_login_ref->{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin()); + $maxlen_ref->{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin()); + if($opt::use_cpus_instead_of_cores) { + $obj->set_ncpus($ncpus_ref->{$sshlogin}); + } else { + $obj->set_ncpus($ncores_ref->{$sshlogin}); + } + $obj->set_time_to_login($time_to_login_ref->{$sshlogin}); + $obj->set_maxlength($maxlen_ref->{$sshlogin}); + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, + int($maxlen_ref->{$sshlogin}/2)); + ::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus_ref->{$sshlogin}, + " ncores:", $ncores_ref->{$sshlogin}, + " time_to_login:", $time_to_login_ref->{$sshlogin}, + " maxlen:", $maxlen_ref->{$sshlogin}, + " min_max_len:", $Global::minimal_command_line_length,"\n"); + } +} + +sub parse_host_filtering { + # Input: + # @lines = output from parallelized_host_filtering() + # Returns: + # \%ncores = number of cores of {host} + # \%ncpus = number of cpus of {host} + # \%time_to_login = time_to_login on {host} + # \%maxlen = max command len on {host} + # \%echo = echo received from {host} + # \@down_hosts = list of hosts with no answer + my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); + + for (@_) { + chomp; + my @col = split /\t/, $_; + if(defined $col[6]) { + # This is a line from --joblog + # seq host time spent sent received exit signal command + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores + if($col[0] eq "Seq" and $col[1] eq "Host" and + $col[2] eq "Starttime") { + # Header => skip + next; + } + # Get server from: eval true server\; + $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]"); + my $host = $1; + $host =~ tr/\\//d; + $Global::host{$host} or next; + if($col[6] eq "255" or $col[7] eq "15") { + # exit == 255 or signal == 15: ssh failed + # Remove sshlogin + ::debug("init", "--filtered $host\n"); + push(@down_hosts, $host); + } elsif($col[6] eq "127") { + # signal == 127: parallel not installed remote + # Set ncpus and ncores = 1 + ::warning("Could not figure out ", + "number of cpus on $host. Using 1.\n"); + $ncores{$host} = 1; + $ncpus{$host} = 1; + $maxlen{$host} = Limits::Command::max_length(); + } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { + # Remember how log it took to log in + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo + $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); + } else { + ::die_bug("host check unmatched long jobline: $_"); + } + } elsif($Global::host{$col[0]}) { + # This output from --number-of-cores, --number-of-cpus, + # --max-line-length-allowed + # ncores: server 8 + # ncpus: server 2 + # maxlen: server 131071 + if(not $ncores{$col[0]}) { + $ncores{$col[0]} = $col[1]; + } elsif(not $ncpus{$col[0]}) { + $ncpus{$col[0]} = $col[1]; + } elsif(not $maxlen{$col[0]}) { + $maxlen{$col[0]} = $col[1]; + } elsif(not $echo{$col[0]}) { + $echo{$col[0]} = $col[1]; + } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { + # Skip these: + # perl: warning: Setting locale failed. + # perl: warning: Please check that your locale settings: + # LANGUAGE = (unset), + # LC_ALL = (unset), + # LANG = "en_US.UTF-8" + # are supported and installed on your system. + # perl: warning: Falling back to the standard locale ("C"). + } else { + ::die_bug("host check too many col0: $_"); + } + } else { + ::die_bug("host check unmatched short jobline ($col[0]): $_"); + } + } + @down_hosts = uniq(@down_hosts); + return(\%ncores, \%ncpus, \%time_to_login, \%maxlen, \%echo, \@down_hosts); +} + +sub parallelized_host_filtering { + # Uses: + # $Global::envvar + # %Global::host + # Returns: + # text entries with: + # * joblog line + # * hostname \t number of cores + # * hostname \t number of cpus + # * hostname \t max-line-length-allowed + # * hostname \t empty + my(@cores, @cpus, @maxline, @echo); + my $envvar = ::shell_quote_scalar($Global::envvar); + while (my ($host, $sshlogin) = each %Global::host) { + if($host eq ":") { next } + # The 'true' is used to get the $host out later + my $sshcmd = "true $host; exec " . $sshlogin->sshcommand()." ".$sshlogin->serverlogin(); + push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0"); + push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0"); + push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0"); + # 'echo' is used to get the best possible value for an ssh login time + push(@echo, $host."\t".$sshcmd." echo\n\0"); + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh"); + print $fh @cores, @cpus, @maxline, @echo; + close $fh; + # --timeout 5: Setting up an SSH connection and running a simple + # command should never take > 5 sec. + # --delay 0.1: If multiple sshlogins use the same proxy the delay + # will make it less likely to overload the ssh daemon. + # --retries 3: If the ssh daemon it overloaded, try 3 times + # -s 16000: Half of the max line on UnixWare + # TODO sh -c wrapper to work in csh + my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null"; + ::debug("init", $cmd, "\n"); + my @out; + my $prepend = ""; + open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); + for(<$host_fh>) { + if(/\'$/) { + # if last char = ' then append next line + # This may be due to quoting of $Global::envvar + $prepend .= $_; + next; + } + $_ = $prepend . $_; + $prepend = ""; + push @out, $_; + } + close $host_fh; + $Global::debug or unlink $tmpfile; + return @out; +} + +sub onall { + # Runs @command on all hosts. + # Uses parallel to run @command on each host. + # --jobs = number of hosts to run on simultaneously. + # For each host a parallel command with the args will be running. + # Uses: + # $Global::quoting + # @opt::basefile + # $opt::jobs + # $opt::linebuffer + # $opt::ungroup + # $opt::group + # $opt::keeporder + # $opt::D + # $opt::plain + # $opt::max_chars + # $opt::linebuffer + # $opt::files + # $opt::colsep + # $opt::timeout + # $opt::plain + # $opt::retries + # $opt::max_chars + # $opt::arg_sep + # $opt::arg_file_sep + # @opt::v + # @opt::env + # %Global::host + # $Global::exitstatus + # $Global::debug + # $Global::joblog + # $opt::tag + # $opt::joblog + # Input: + # @command = command to run on all hosts + # Returns: N/A + sub tmp_joblog { + # Input: + # $joblog = filename of joblog - undef if none + # Returns: + # $tmpfile = temp file for joblog - undef if none + my $joblog = shift; + if(not defined $joblog) { + return undef; + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); + close $fh; + return $tmpfile; + } + my ($input_source_fh_ref,@command) = @_; + if($Global::quoting) { + @command = shell_quote_empty(@command); + } + + # Copy all @input_source_fh (-a and :::) into tempfiles + my @argfiles = (); + for my $fh (@$input_source_fh_ref) { + my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + if(@opt::basefile) { setup_basefile(); } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + # + # Pass some of the options to the sub-parallels, not all of them as + # -P should only go to the first, and -S should not be copied at all. + my $options = + join(" ", + ((defined $opt::jobs) ? "-P $opt::jobs" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::ungroup) ? "-u" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ); + my $suboptions = + join(" ", + ((defined $opt::ungroup) ? "-u" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::files) ? "--files" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), + ((@opt::v) ? "-vv" : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::retries) ? "--retries ".$opt::retries : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), + ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), + (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), + ); + ::debug("init", "| $0 $options\n"); + open(my $parallel_fh, "|-", "$0 --will-cite -j0 $options") || + ::die_bug("This does not run GNU Parallel: $0 $options"); + my @joblogs; + for my $host (sort keys %Global::host) { + my $sshlogin = $Global::host{$host}; + my $joblog = tmp_joblog($opt::joblog); + if($joblog) { + push @joblogs, $joblog; + $joblog = "--joblog $joblog"; + } + my $quad = $opt::arg_file_sep || "::::"; + ::debug("init", "$0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), + " -S ", shell_quote_scalar($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"); + print $parallel_fh "$0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), + " -S ", shell_quote_scalar($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"; + } + close $parallel_fh; + $Global::exitstatus = $? >> 8; + debug("init", "--onall exitvalue ", $?); + if(@opt::basefile) { cleanup_basefile(); } + $Global::debug or unlink(@argfiles); + my %seen; + for my $joblog (@joblogs) { + # Append to $joblog + open(my $fh, "<", $joblog) || ::die_bug("Cannot open tmp joblog $joblog"); + # Skip first line (header); + <$fh>; + print $Global::joblog (<$fh>); + close $fh; + unlink($joblog); + } +} + +sub __SIGNAL_HANDLING__ {} + +sub save_original_signal_handler { + # Remember the original signal handler + # Uses: + # %Global::original_sig + # Returns: N/A + $SIG{INT} = sub { + if($opt::tmux) { qx { tmux kill-session -t p$$ }; } + wait_and_exit(255); + }; + $SIG{TERM} = sub { + if($opt::tmux) { qx { tmux kill-session -t p$$ }; } + wait_and_exit(255); + }; + %Global::original_sig = %SIG; + $SIG{TERM} = sub {}; # Dummy until jobs really start + $SIG{ALRM} = 'IGNORE'; +} + +sub list_running_jobs { + # Print running jobs on tty + # Uses: + # %Global::running + # Returns: N/A + for my $job (values %Global::running) { + ::status("$Global::progname: ",$job->replaced(),"\n"); + } +} + +sub start_no_new_jobs { + # Start no more jobs + # Uses: + # %Global::original_sig + # %Global::unlink + # $Global::start_no_new_jobs + # Returns: N/A + $SIG{TERM} = $Global::original_sig{TERM}; + unlink keys %Global::unlink; + ::status + ("$Global::progname: SIGTERM received. No new jobs will be started.\n", + "$Global::progname: Waiting for these ", scalar(keys %Global::running), + " jobs to finish. Send SIGTERM again to stop now.\n"); + list_running_jobs(); + $Global::start_no_new_jobs ||= 1; +} + +sub reaper { + # A job finished. + # Print the output. + # Start another job + # Uses: + # %Global::sshmaster + # %Global::running + # $Global::tty_taken + # @Global::slots + # $opt::timeout + # $Global::timeoutq + # $opt::halt + # $opt::keeporder + # $Global::total_running + # Returns: + # $children_reaped = number of children finished + my $stiff; + my $children_reaped = 0; + debug("run", "Reaper "); + while (($stiff = waitpid(-1, &WNOHANG)) > 0) { + # $stiff = pid of dead process + $children_reaped++; + if($Global::sshmaster{$stiff}) { + # This is one of the ssh -M: ignore + next; + } + my $job = $Global::running{$stiff}; + # '-a <(seq 10)' will give us a pid not in %Global::running + $job or next; + $job->set_exitstatus($? >> 8); + $job->set_exitsignal($? & 127); + debug("run", "seq ",$job->seq()," died (", $job->exitstatus(), ")"); + $job->set_endtime(::now()); + if($stiff == $Global::tty_taken) { + # The process that died had the tty => release it + $Global::tty_taken = 0; + } + + if(not $job->should_be_retried()) { + # The job is done + # Free the jobslot + push @Global::slots, $job->slot(); + if($opt::timeout) { + # Update average runtime for timeout + $Global::timeoutq->update_median_runtime($job->runtime()); + } + # Force printing now if --halt forces us to exit + my $print_now = + ($opt::halt and + (($opt::halt == 2 and $job->exitstatus()) + or + ($opt::halt == -2 and not $job->exitstatus()))); + if($opt::keeporder and not $print_now) { + $job->print_earlier_jobs(); + } else { + $job->print(); + } + $job->should_we_halt(); + } + my $sshlogin = $job->sshlogin(); + $sshlogin->dec_jobs_running(); + $sshlogin->inc_jobs_completed(); + $Global::total_running--; + delete $Global::running{$stiff}; + start_more_jobs(); + if($opt::progress) { + my %progress = progress(); + ::status("\r",$progress{'status'}); + } + } + debug("run", "done "); + return $children_reaped; +} + +sub __USAGE__ {} + +sub killall { + # Kill all jobs + # Send all jobs TERM + # Wait + # Send all jobs TERM + # Wait + # Send all jobs KILL + # Send all (grand*)children KILL + + $Global::start_no_new_jobs ||= 1; + # pids of the all children and (grand*)children + # before we start the blood bath + my @family_pids = family_pids(keys %Global::running); + # Send jobs TERM + ::debug("kill","TERM ", join(' ',keys %Global::running),"\n"); + kill "TERM", keys %Global::running; + # Wait up to 200 ms + my $sleepsum = 0; + my $sleep = 0.001; + for (; kill(0, keys %Global::running) and $sleepsum < 200; + $sleepsum += $sleep) { + # This can change %Global::running + ::debug("kill","Slept $sleepsum\n"); + if((my $stiff = waitpid(-1, &WNOHANG)) > 0) { + ::debug("kill","reaped $stiff\n"); + + $Global::total_running--; + delete $Global::running{$stiff}; + $sleep = $sleep/2+0.001; + } + $sleep *= 1.1; + ::usleep($sleep); + } + # Send jobs TERM (again) + ::debug("kill","TERM ", join(' ',keys %Global::running),"\n"); + kill "TERM", keys %Global::running; + # Wait up to 200 ms + $sleepsum = 0; + $sleep = 0.001; + for (; kill(0, keys %Global::running) and $sleepsum < 200; + $sleepsum += $sleep) { + # This can change %Global::running + if((my $stiff = waitpid(-1, &WNOHANG)) > 0) { + $Global::total_running--; + delete $Global::running{$stiff}; + $sleep = $sleep/2+0.001; + } + $sleep *= 1.1; + ::usleep($sleep); + } + # Send jobs KILL + ::debug("kill","KILL ", join(' ',keys %Global::running),"\n"); + kill "KILL", keys %Global::running; + # Send all (grand*)children KILL (if there are any left) + ::debug("kill","KILL @family_pids\n"); + kill "KILL", @family_pids; +} + +sub family_pids { + # Find the pids with this->pid as (grand)*parent + # Input: + # @parents = pids of parents + # Returns: + # @pids = pids of (grand)*children + my @parents = @_; + my @pids; + + my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table(); + + my @more = @parents; + # While more (grand)*children + while(@more) { + my @m; + push @pids, @more; + for my $parent (@more) { + if($children_of_ref->{$parent}) { + # add the children of this parent + push @m, @{$children_of_ref->{$parent}}; + } + } + @more = @m; + } + return (@pids); +} + +sub wait_and_exit { + # If we do not wait, we sometimes get segfault + # Returns: N/A + my $error = shift; + unlink keys %Global::unlink; + if($error) { + # Kill all without printing + killall(); + } + for (keys %Global::unkilled_children) { + kill 9, $_; + waitpid($_,0); + delete $Global::unkilled_children{$_}; + } + wait(); + exit($error); +} + +sub die_usage { + # Returns: N/A + usage(); + wait_and_exit(255); +} + +sub usage { + # Returns: N/A + print join + ("\n", + "Usage:", + "", + "$Global::progname [options] [command [arguments]] < list_of_arguments", + "$Global::progname [options] [command [arguments]] (::: arguments|:::: argfile(s))...", + "cat ... | $Global::progname --pipe [options] [command [arguments]]", + "", + "-j n Run n jobs in parallel", + "-k Keep same order", + "-X Multiple arguments with context replace", + "--colsep regexp Split input on regexp for positional replacements", + "{} {.} {/} {/.} {#} {%} {= perl code =} Replacement strings", + "{3} {3.} {3/} {3/.} {=3 perl code =} Positional replacement strings", + "With --plus: {} = {+/}/{/} = {.}.{+.} = {+/}/{/.}.{+.} = {..}.{+..} =", + " {+/}/{/..}.{+..} = {...}.{+...} = {+/}/{/...}.{+...}", + "", + "-S sshlogin Example: foo\@server.example.com", + "--slf .. Use ~/.parallel/sshloginfile as the list of sshlogins", + "--trc {}.bar Shorthand for --transfer --return {}.bar --cleanup", + "--onall Run the given command with argument on all sshlogins", + "--nonall Run the given command with no arguments on all sshlogins", + "", + "--pipe Split stdin (standard input) to multiple jobs.", + "--recend str Record end separator for --pipe.", + "--recstart str Record start separator for --pipe.", + "", + "See 'man $Global::progname' for details", + "", + "Academic tradition requires you to cite works you base your article on.", + "When using programs that use GNU Parallel to process data for publication", + "please cite:", + "", + " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,", + " ;login: The USENIX Magazine, February 2011:42-47.", + "", + "This helps funding further development; and it won't cost you a cent.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.\n", + ""); +} + + +sub citation_notice { + # if --will-cite or --plain: do nothing + # if stderr redirected: do nothing + # if ~/.parallel/will-cite: do nothing + # else: print citation notice to stderr + if($opt::willcite + or + $opt::plain + or + not -t $Global::original_stderr + or + -e $ENV{'HOME'}."/.parallel/will-cite") { + # skip + } else { + ::status + ("Academic tradition requires you to cite works you base your article on.\n", + "When using programs that use GNU Parallel to process data for publication\n", + "please cite:\n", + "\n", + " O. Tange (2011): GNU Parallel - The Command-Line Power Tool,\n", + " ;login: The USENIX Magazine, February 2011:42-47.\n", + "\n", + "This helps funding further development; and it won't cost you a cent.\n", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.\n", + "\n", + "To silence the citation notice: run 'parallel --bibtex'.\n\n", + ); + } +} + +sub status { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + print $fh @w; + flush $fh; +} + +sub warning { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + my $prog = $Global::progname || "parallel"; + print $fh $prog, ": Warning: ", @w; +} + +sub error { + my @w = @_; + my $fh = $Global::status_fd || *STDERR; + my $prog = $Global::progname || "parallel"; + print $fh $prog, ": Error: ", @w; +} + +sub die_bug { + my $bugid = shift; + print STDERR + ("$Global::progname: This should not happen. You have found a bug.\n", + "Please contact and include:\n", + "* The version number: $Global::version\n", + "* The bugid: $bugid\n", + "* The command line being run\n", + "* The files being read (put the files on a webserver if they are big)\n", + "\n", + "If you get the error on smaller/fewer files, please include those instead.\n"); + ::wait_and_exit(255); +} + +sub version { + # Returns: N/A + print join("\n", + "GNU $Global::progname $Global::version", + "Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014,2015 Ole Tange", + "and Free Software Foundation, Inc.", + "License GPLv3+: GNU GPL version 3 or later ", + "This is free software: you are free to change and redistribute it.", + "GNU $Global::progname comes with no warranty.", + "", + "Web site: http://www.gnu.org/software/${Global::progname}\n", + "When using programs that use GNU Parallel to process data for publication", + "please cite as described in 'parallel --bibtex'.\n", + ); +} + +sub bibtex { + # Returns: N/A + print join("\n", + "Academic tradition requires you to cite works you base your article on.", + "When using programs that use GNU Parallel to process data for publication", + "please cite:", + "", + "\@article{Tange2011a,", + " title = {GNU Parallel - The Command-Line Power Tool},", + " author = {O. Tange},", + " address = {Frederiksberg, Denmark},", + " journal = {;login: The USENIX Magazine},", + " month = {Feb},", + " number = {1},", + " volume = {36},", + " url = {http://www.gnu.org/s/parallel},", + " year = {2011},", + " pages = {42-47}", + " doi = {10.5281/zenodo.16303}", + "}", + "", + "(Feel free to use \\nocite{Tange2011a})", + "", + "This helps funding further development; and it won't cost you a cent.", + "If you pay 10000 EUR you should feel free to use GNU Parallel without citing.", + "", + "If you send a copy of your published article to tange\@gnu.org, it will be", + "mentioned in the release notes of next version of GNU Parallel.\n\n", + ); + while(not -e $ENV{'HOME'}."/.parallel/will-cite") { + print "\nType: 'will cite' and press enter.\n> "; + my $input = ; + if($input =~ /will cite/i) { + mkdir $ENV{'HOME'}."/.parallel"; + if(open (my $fh, ">", $ENV{'HOME'}."/.parallel/will-cite")) { + close $fh; + print "\nThank you for your support. It is much appreciated. The citation\n", + "notice is now silenced. You may also use '--will-cite'.\n", + "If you use '--will-cite' in scripts you are expected to pay\n", + "the 10000 EUR, because you are making it harder to see the\n", + "citation notice.\n\n"; + } else { + print "\nThank you for your support. It is much appreciated. The citation\n", + "cannot permanently be silenced. Use '--will-cite' instead.\n", + "If you use '--will-cite' in scripts you are expected to pay\n", + "the 10000 EUR, because you are making it harder to see the\n", + "citation notice.\n\n"; + last; + } + } + } +} + +sub show_limits { + # Returns: N/A + print("Maximal size of command: ",Limits::Command::real_max_length(),"\n", + "Maximal used size of command: ",Limits::Command::max_length(),"\n", + "\n", + "Execution of will continue now, and it will try to read its input\n", + "and run commands; if this is not what you wanted to happen, please\n", + "press CTRL-D or CTRL-C\n"); +} + +sub __GENERIC_COMMON_FUNCTION__ {} + +sub tmpfile { + # Create tempfile as $TMPDIR/parXXXXX + # Returns: + # $filehandle = opened file handle + # $filename = file name created + return ::tempfile(DIR=>$ENV{'TMPDIR'}, TEMPLATE => 'parXXXXX', @_); +} + +sub tmpname { + # Select a name that does not exist + # Do not create the file as that may cause problems + # if you ssh to localhost (or a shared file system) under a different name + my $name = shift; + my($tmpname); + do { + $tmpname = $ENV{'TMPDIR'}."/".$name. + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while($Global::unlink{$tmpname}++ or -e $tmpname); + return $tmpname; +} + +sub tmpfifo { + # Securely make a fifo by securely making a dir with a fifo in it + use POSIX qw(mkfifo); + my $tmpfifo = tmpname("fif",@_); + mkfifo($tmpfifo,0600); + return $tmpfifo; +} + +sub uniq { + # Remove duplicates and return unique values + return keys %{{ map { $_ => 1 } @_ }}; +} + +sub min { + # Returns: + # Minimum value of array + my $min; + for (@_) { + # Skip undefs + defined $_ or next; + defined $min or do { $min = $_; next; }; # Set $_ to the first non-undef + $min = ($min < $_) ? $min : $_; + } + return $min; +} + +sub max { + # Returns: + # Maximum value of array + my $max; + for (@_) { + # Skip undefs + defined $_ or next; + defined $max or do { $max = $_; next; }; # Set $_ to the first non-undef + $max = ($max > $_) ? $max : $_; + } + return $max; +} + +sub sum { + # Returns: + # Sum of values of array + my @args = @_; + my $sum = 0; + for (@args) { + # Skip undefs + $_ and do { $sum += $_; } + } + return $sum; +} + +sub undef_as_zero { + my $a = shift; + return $a ? $a : 0; +} + +sub undef_as_empty { + my $a = shift; + return $a ? $a : ""; +} + +sub multiply_binary_prefix { + # Evalualte numbers with binary prefix + # Ki=2^10, Mi=2^20, Gi=2^30, Ti=2^40, Pi=2^50, Ei=2^70, Zi=2^80, Yi=2^80 + # ki=2^10, mi=2^20, gi=2^30, ti=2^40, pi=2^50, ei=2^70, zi=2^80, yi=2^80 + # K =2^10, M =2^20, G =2^30, T =2^40, P =2^50, E =2^70, Z =2^80, Y =2^80 + # k =10^3, m =10^6, g =10^9, t=10^12, p=10^15, e=10^18, z=10^21, y=10^24 + # 13G = 13*1024*1024*1024 = 13958643712 + # Input: + # $s = string with prefixes + # Returns: + # $value = int with prefixes multiplied + my $s = shift; + if(not $s) { + return $s; + } + $s =~ s/ki/*1024/gi; + $s =~ s/mi/*1024*1024/gi; + $s =~ s/gi/*1024*1024*1024/gi; + $s =~ s/ti/*1024*1024*1024*1024/gi; + $s =~ s/pi/*1024*1024*1024*1024*1024/gi; + $s =~ s/ei/*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/zi/*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/yi/*1024*1024*1024*1024*1024*1024*1024*1024/gi; + $s =~ s/xi/*1024*1024*1024*1024*1024*1024*1024*1024*1024/gi; + + $s =~ s/K/*1024/g; + $s =~ s/M/*1024*1024/g; + $s =~ s/G/*1024*1024*1024/g; + $s =~ s/T/*1024*1024*1024*1024/g; + $s =~ s/P/*1024*1024*1024*1024*1024/g; + $s =~ s/E/*1024*1024*1024*1024*1024*1024/g; + $s =~ s/Z/*1024*1024*1024*1024*1024*1024*1024/g; + $s =~ s/Y/*1024*1024*1024*1024*1024*1024*1024*1024/g; + $s =~ s/X/*1024*1024*1024*1024*1024*1024*1024*1024*1024/g; + + $s =~ s/k/*1000/g; + $s =~ s/m/*1000*1000/g; + $s =~ s/g/*1000*1000*1000/g; + $s =~ s/t/*1000*1000*1000*1000/g; + $s =~ s/p/*1000*1000*1000*1000*1000/g; + $s =~ s/e/*1000*1000*1000*1000*1000*1000/g; + $s =~ s/z/*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/y/*1000*1000*1000*1000*1000*1000*1000*1000/g; + $s =~ s/x/*1000*1000*1000*1000*1000*1000*1000*1000*1000/g; + + $s = eval $s; + ::debug($s); + return $s; +} + +{ + my ($disk_full_fh, $b8193, $error_printed); + sub exit_if_disk_full { + # Checks if $TMPDIR is full by writing 8kb to a tmpfile + # If the disk is full: Exit immediately. + # Returns: + # N/A + if(not $disk_full_fh) { + my $name; + ($disk_full_fh, $name) = ::tmpfile(SUFFIX => ".df"); + # Separate unlink due to NFS dealing badly with File::Temp + unlink $name; + $b8193 = "x"x8193; + } + # Linux does not discover if a disk is full if writing <= 8192 + # Tested on: + # bfs btrfs cramfs ext2 ext3 ext4 ext4dev jffs2 jfs minix msdos + # ntfs reiserfs tmpfs ubifs vfat xfs + # TODO this should be tested on different OS similar to this: + # + # doit() { + # sudo mount /dev/ram0 /mnt/loop; sudo chmod 1777 /mnt/loop + # seq 100000 | parallel --tmpdir /mnt/loop/ true & + # seq 6900000 > /mnt/loop/i && echo seq OK + # seq 6980868 > /mnt/loop/i + # seq 10000 > /mnt/loop/ii + # sleep 3 + # sudo umount /mnt/loop/ || sudo umount -l /mnt/loop/ + # echo >&2 + # } + print $disk_full_fh $b8193; + if(not $disk_full_fh + or + tell $disk_full_fh != 8193) { + # On raspbian the disk can be full except for 10 chars. + if(not $error_printed) { + ::error("Output is incomplete. Cannot append to buffer file in $ENV{'TMPDIR'}. Is the disk full?\n"); + ::error("Change \$TMPDIR with --tmpdir or use --compress.\n"); + $error_printed = 1; + } + ::wait_and_exit(255); + } + truncate $disk_full_fh, 0; + seek($disk_full_fh, 0, 0) || die; + } +} + +sub spacefree { + # Remove comments and spaces + # Inputs: + # $spaces = keep 1 space? + # $s = string to remove spaces from + # Returns: + # $s = with spaces removed + my $spaces = shift; + my $s = shift; + $s =~ s/#.*//mg; + if($spaces) { + $s =~ s/\s+/ /mg; + } else { + $s =~ s/\s//mg; + } + return $s; +} + +{ + my $hostname; + sub hostname { + if(not $hostname) { + $hostname = `hostname`; + chomp($hostname); + $hostname ||= "nohostname"; + } + return $hostname; + } +} + +sub which { + # Input: + # @programs = programs to find the path to + # Returns: + # @full_path = full paths to @programs. Nothing if not found + my @which; + for my $prg (@_) { + push(@which, grep { not -d $_ and -x $_ } + map { $_."/".$prg } split(":",$ENV{'PATH'})); + } + return @which; +} + +{ + my ($regexp,%fakename); + + sub parent_shell { + # Input: + # $pid = pid to see if (grand)*parent is a shell + # Returns: + # $shellpath = path to shell - undef if no shell found + my $pid = shift; + if(not $regexp) { + # All shells known to mankind + # + # ash bash csh dash fdsh fish fizsh ksh ksh93 mksh pdksh + # posh rbash rush rzsh sash sh static-sh tcsh yash zsh + my @shells = (qw(ash bash csh dash fdsh fish fizsh ksh + ksh93 mksh pdksh posh rbash rush rzsh + sash sh static-sh tcsh yash zsh -sh -csh), + '-sh (sh)' # sh on FreeBSD + ); + # Can be formatted as: + # [sh] -sh sh busybox sh -sh (sh) + # /bin/sh /sbin/sh /opt/csw/sh + # NOT: foo.sh sshd crash flush pdflush scosh fsflush ssh + my $shell = "(?:".join("|",map { "\Q$_\E" } @shells).")"; + $regexp = '^((\[)('. $shell. ')(\])|(|\S+/|busybox )('. $shell. '))($| [^(])'; + %fakename = ( + # sh disguises itself as -sh (sh) on FreeBSD + "-sh (sh)" => ["sh"], + # csh and tcsh disguise themselves as -sh/-csh + "-sh" => ["csh", "tcsh"], + "-csh" => ["tcsh", "csh"], + ); + } + my ($children_of_ref, $parent_of_ref, $name_of_ref) = pid_table(); + my $shellpath; + my $testpid = $pid; + while($testpid) { + ::debug("init", "shell? ". $name_of_ref->{$testpid}."\n"); + if($name_of_ref->{$testpid} =~ /$regexp/o) { + ::debug("init", "which ".($3||$6)." => "); + $shellpath = (which($3 || $6,@{$fakename{$3 || $6}}))[0]; + ::debug("init", "shell path $shellpath\n"); + $shellpath and last; + } + if($testpid == $parent_of_ref->{$testpid}) { + # In Solaris zones, the PPID of the zsched process is itself + last; + } + $testpid = $parent_of_ref->{$testpid}; + } + return $shellpath; + } +} + +{ + my %pid_parentpid_cmd; + + sub pid_table { + # Returns: + # %children_of = { pid -> children of pid } + # %parent_of = { pid -> pid of parent } + # %name_of = { pid -> commandname } + + if(not %pid_parentpid_cmd) { + # Filter for SysV-style `ps` + my $sysv = q( ps -ef | perl -ane '1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print "@F[1,2] $_"' ); + # Crazy msys: ' is not accepted on the cmd line, but " are treated as ' + my $msys = q( ps -ef | perl -ane "1..1 and /^(.*)CO?MM?A?N?D/ and $s=length $1;). + q(s/^.{$s}//; print qq{@F[1,2] $_}" ); + # BSD-style `ps` + my $bsd = q(ps -o pid,ppid,command -ax); + %pid_parentpid_cmd = + ( + 'aix' => $sysv, + 'cygwin' => $sysv, + 'darwin' => $bsd, + 'dec_osf' => $sysv, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $sysv, + 'hpux' => $sysv, + 'linux' => $sysv, + 'mirbsd' => $bsd, + 'msys' => $msys, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $sysv, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $sysv, + 'syllable' => "echo ps not supported", + ); + } + $pid_parentpid_cmd{$^O} or ::die_bug("pid_parentpid_cmd for $^O missing"); + + my (@pidtable,%parent_of,%children_of,%name_of); + # Table with pid -> children of pid + @pidtable = `$pid_parentpid_cmd{$^O}`; + my $p=$$; + for (@pidtable) { + # must match: 24436 21224 busybox ash + # must match: 24436 21224 <> + # or: perl -e 'while($0=" "){}' + if(/^\s*(\S+)\s+(\S+)\s+(\S+.*)/ + or + $^O eq "darwin" and /^\s*(\S+)\s+(\S+)\s+()$/) { + $parent_of{$1} = $2; + push @{$children_of{$2}}, $1; + $name_of{$1} = $3; + } else { + ::die_bug("pidtable format: $_"); + } + } + return(\%children_of, \%parent_of, \%name_of); + } +} + +sub now { + # Returns time since epoch as in seconds with 3 decimals + # Uses: + # @Global::use + # Returns: + # $time = time now with millisecond accuracy + if(not $Global::use{"Time::HiRes"}) { + if(eval "use Time::HiRes qw ( time );") { + eval "sub TimeHiRestime { return Time::HiRes::time };"; + } else { + eval "sub TimeHiRestime { return time() };"; + } + $Global::use{"Time::HiRes"} = 1; + } + + return (int(TimeHiRestime()*1000))/1000; +} + +sub usleep { + # Sleep this many milliseconds. + # Input: + # $ms = milliseconds to sleep + my $ms = shift; + ::debug(int($ms),"ms "); + select(undef, undef, undef, $ms/1000); +} + +sub reap_usleep { + # Reap dead children. + # If no dead children: Sleep specified amount with exponential backoff + # Input: + # $ms = milliseconds to sleep + # Returns: + # $ms/2+0.001 if children reaped + # $ms*1.1 if no children reaped + my $ms = shift; + if(reaper()) { + # Sleep exponentially shorter (1/2^n) if a job finished + return $ms/2+0.001; + } else { + if($opt::timeout) { + $Global::timeoutq->process_timeouts(); + } + if($opt::memfree) { + kill_youngster_if_not_enough_mem(); + } + # When a child dies, wake up from sleep (or select(,,,)) + $SIG{CHLD} = sub { kill "ALRM", $$ }; + usleep($ms); + # --compress needs $SIG{CHLD} undefined + delete $SIG{CHLD}; + exit_if_disk_full(); + if($opt::linebuffer) { + for my $job (values %Global::running) { + $job->print(); + } + } + # Sleep exponentially longer (1.1^n) if a job did not finish, + # though at most 1000 ms. + return (($ms < 1000) ? ($ms * 1.1) : ($ms)); + } +} + +sub kill_youngster_if_not_enough_mem { + # Check each $sshlogin if there is enough mem. + # If less than 50% enough free mem: kill off the youngest child + # Put the child back in the queue. + # Uses: + # %Global::running + my %jobs_of; + my @sshlogins; + + for my $job (values %Global::running) { + if(not $jobs_of{$job->sshlogin()}) { + push @sshlogins, $job->sshlogin(); + } + push @{$jobs_of{$job->sshlogin()}}, $job; + } + for my $sshlogin (@sshlogins) { + for my $job (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}}) { + if($sshlogin->memfree() < $opt::memfree * 0.5) { + ::debug("mem","\n",map { $_->seq()." " } (sort { $b->seq() <=> $a->seq() } @{$jobs_of{$sshlogin}})); + ::debug("mem","\n", $job->seq(), "killed ", + $sshlogin->memfree()," < ",$opt::memfree * 0.5); + $job->kill(); + $sshlogin->memfree_recompute(); + } else { + last; + } + } + ::debug("mem","Free mem OK ", $sshlogin->memfree()," > ",$opt::memfree * 0.5); + } +} + +sub __DEBUGGING__ {} + +sub debug { + # Uses: + # $Global::debug + # %Global::fd + # Returns: N/A + $Global::debug or return; + @_ = grep { defined $_ ? $_ : "" } @_; + if($Global::debug eq "all" or $Global::debug eq $_[0]) { + if($Global::fd{1}) { + # Original stdout was saved + my $stdout = $Global::fd{1}; + print $stdout @_[1..$#_]; + } else { + print @_[1..$#_]; + } + } +} + +sub my_memory_usage { + # Returns: + # memory usage if found + # 0 otherwise + use strict; + use FileHandle; + + my $pid = $$; + if(-e "/proc/$pid/stat") { + my $fh = FileHandle->new("; + chomp $data; + $fh->close; + + my @procinfo = split(/\s+/,$data); + + return undef_as_zero($procinfo[22]); + } else { + return 0; + } +} + +sub my_size { + # Returns: + # $size = size of object if Devel::Size is installed + # -1 otherwise + my @size_this = (@_); + eval "use Devel::Size qw(size total_size)"; + if ($@) { + return -1; + } else { + return total_size(@_); + } +} + +sub my_dump { + # Returns: + # ascii expression of object if Data::Dump(er) is installed + # error code otherwise + my @dump_this = (@_); + eval "use Data::Dump qw(dump);"; + if ($@) { + # Data::Dump not installed + eval "use Data::Dumper;"; + if ($@) { + my $err = "Neither Data::Dump nor Data::Dumper is installed\n". + "Not dumping output\n"; + ::status($err); + return $err; + } else { + return Dumper(@dump_this); + } + } else { + # Create a dummy Data::Dump:dump as Hans Schou sometimes has + # it undefined + eval "sub Data::Dump:dump {}"; + eval "use Data::Dump qw(dump);"; + return (Data::Dump::dump(@dump_this)); + } +} + +sub my_croak { + eval "use Carp; 1"; + $Carp::Verbose = 1; + croak(@_); +} + +sub my_carp { + eval "use Carp; 1"; + $Carp::Verbose = 1; + carp(@_); +} + +sub __OBJECT_ORIENTED_PARTS__ {} + +package SSHLogin; + +sub new { + my $class = shift; + my $sshlogin_string = shift; + my $ncpus; + my %hostgroups; + # SSHLogins can have these formats: + # @grp+grp/ncpu//usr/bin/ssh user@server + # ncpu//usr/bin/ssh user@server + # /usr/bin/ssh user@server + # user@server + # ncpu/user@server + # @grp+grp/user@server + if($sshlogin_string =~ s:^\@([^/]+)/?::) { + # Look for SSHLogin hostgroups + %hostgroups = map { $_ => 1 } split(/\+/, $1); + } + if ($sshlogin_string =~ s:^(\d+)/::) { + # Override default autodetected ncpus unless missing + $ncpus = $1; + } + my $string = $sshlogin_string; + # An SSHLogin is always in the hostgroup of its $string-name + $hostgroups{$string} = 1; + @Global::hostgroups{keys %hostgroups} = values %hostgroups; + my @unget = (); + my $no_slash_string = $string; + $no_slash_string =~ s/[^-a-z0-9:]/_/gi; + return bless { + 'string' => $string, + 'jobs_running' => 0, + 'jobs_completed' => 0, + 'maxlength' => undef, + 'max_jobs_running' => undef, + 'orig_max_jobs_running' => undef, + 'ncpus' => $ncpus, + 'hostgroups' => \%hostgroups, + 'sshcommand' => undef, + 'serverlogin' => undef, + 'control_path_dir' => undef, + 'control_path' => undef, + 'time_to_login' => undef, + 'last_login_at' => undef, + 'loadavg_file' => $ENV{'HOME'} . "/.parallel/tmp/loadavg-" . + $no_slash_string, + 'loadavg' => undef, + 'last_loadavg_update' => 0, + 'swap_activity_file' => $ENV{'HOME'} . "/.parallel/tmp/swap_activity-" . + $no_slash_string, + 'swap_activity' => undef, + }, ref($class) || $class; +} + +sub DESTROY { + my $self = shift; + # Remove temporary files if they are created. + unlink $self->{'loadavg_file'}; + unlink $self->{'swap_activity_file'}; +} + +sub string { + my $self = shift; + return $self->{'string'}; +} + +sub jobs_running { + my $self = shift; + return ($self->{'jobs_running'} || "0"); +} + +sub inc_jobs_running { + my $self = shift; + $self->{'jobs_running'}++; +} + +sub dec_jobs_running { + my $self = shift; + $self->{'jobs_running'}--; +} + +sub set_maxlength { + my $self = shift; + $self->{'maxlength'} = shift; +} + +sub maxlength { + my $self = shift; + return $self->{'maxlength'}; +} + +sub jobs_completed { + my $self = shift; + return $self->{'jobs_completed'}; +} + +sub in_hostgroups { + # Input: + # @hostgroups = the hostgroups to look for + # Returns: + # true if intersection of @hostgroups and the hostgroups of this + # SSHLogin is non-empty + my $self = shift; + return grep { defined $self->{'hostgroups'}{$_} } @_; +} + +sub hostgroups { + my $self = shift; + return keys %{$self->{'hostgroups'}}; +} + +sub inc_jobs_completed { + my $self = shift; + $self->{'jobs_completed'}++; +} + +sub set_max_jobs_running { + my $self = shift; + if(defined $self->{'max_jobs_running'}) { + $Global::max_jobs_running -= $self->{'max_jobs_running'}; + } + $self->{'max_jobs_running'} = shift; + if(defined $self->{'max_jobs_running'}) { + # max_jobs_running could be resat if -j is a changed file + $Global::max_jobs_running += $self->{'max_jobs_running'}; + } + # Initialize orig to the first non-zero value that comes around + $self->{'orig_max_jobs_running'} ||= $self->{'max_jobs_running'}; +} + +sub memfree { + # Returns: + # $memfree in bytes + my $self = shift; + $self->memfree_recompute(); + return (not defined $self->{'memfree'} or $self->{'memfree'}) +} + +sub memfree_recompute { + my $self = shift; + my $script = memfreescript(); + + # TODO add sshlogin and backgrounding + $self->{'memfree'} = qx{ $script }; + #::debug("mem","New free:",$self->{'memfree'}," "); +} + +{ + my $script; + + sub memfreescript { + # Returns: + # shellscript for giving available memory in bytes + if(not $script) { + my %script_of = ( + # $ free + # total used free shared buffers cached + # Mem: 8075152 4922780 3152372 338856 233356 1658604 + # -/+ buffers/cache: 3030820 5044332 + # Swap: 8286204 116924 8169280 + "linux" => q{ print (1024*((grep /buffers.cache/, `free`)[0] =~ /buffers.cache:\s+\S+\s+(\S+)/)[0]) }, + # $ vmstat 1 1 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 242793 389737 5 1 0 0 0 0 0 107 978 60 1 1 99 + "hpux" => q{ print (((reverse `vmstat 1 1`)[0] =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) }, + # $ vmstat 1 2 + # kthr memory page disk faults cpu + # r b w swap free re mf pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 6496720 5170320 68 260 8 2 1 0 0 -0 3 0 0 309 1371 255 1 2 97 + # 0 0 0 6434088 5072656 7 15 8 0 0 0 0 0 261 0 0 1889 1899 3222 0 8 92 + # + # The last free is really free + "solaris" => q{ print (((reverse `vmstat 1 2`)[0] =~ /(?:\d+\D+){4}(\d+)/)[0]*1024) }, + "freebsd" => q{ + for(qx{/sbin/sysctl -a}) { + if (/^([^:]+):\s+(.+)\s*$/s) { + $sysctl->{$1} = $2; + } + } + print $sysctl->{"hw.pagesize"} * + ($sysctl->{"vm.stats.vm.v_cache_count"} + + $sysctl->{"vm.stats.vm.v_inactive_count"} + + $sysctl->{"vm.stats.vm.v_free_count"}); + }, + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate command + for my $os (keys %script_of) { + $perlscript .= 'if($^O eq "'.$os.'") { '.$script_of{$os}.'}'; + } + $perlscript =~ s/[\t\n ]+/ /g; + $perlscript = "perl -e " . ::shell_quote_scalar($perlscript); + $script = $Global::envvar. " " .$perlscript; + } + return $script + } +} + +sub swapping { + my $self = shift; + my $swapping = $self->swap_activity(); + return (not defined $swapping or $swapping) +} + +sub swap_activity { + # If the currently known swap activity is too old: + # Recompute a new one in the background + # Returns: + # last swap activity computed + my $self = shift; + # Should we update the swap_activity file? + my $update_swap_activity_file = 0; + if(-r $self->{'swap_activity_file'}) { + open(my $swap_fh, "<", $self->{'swap_activity_file'}) || ::die_bug("swap_activity_file-r"); + my $swap_out = <$swap_fh>; + close $swap_fh; + if($swap_out =~ /^(\d+)$/) { + $self->{'swap_activity'} = $1; + ::debug("swap", "New swap_activity: ", $self->{'swap_activity'}); + } + ::debug("swap", "Last update: ", $self->{'last_swap_activity_update'}); + if(time - $self->{'last_swap_activity_update'} > 10) { + # last swap activity update was started 10 seconds ago + ::debug("swap", "Older than 10 sec: ", $self->{'swap_activity_file'}); + $update_swap_activity_file = 1; + } + } else { + ::debug("swap", "No swap_activity file: ", $self->{'swap_activity_file'}); + $self->{'swap_activity'} = undef; + $update_swap_activity_file = 1; + } + if($update_swap_activity_file) { + ::debug("swap", "Updating swap_activity file ", $self->{'swap_activity_file'}); + $self->{'last_swap_activity_update'} = time; + -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; + -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; + my $swap_activity; + $swap_activity = swapactivityscript(); + if($self->{'string'} ne ":") { + $swap_activity = $self->sshcommand() . " " . $self->serverlogin() . " " . + ::shell_quote_scalar($swap_activity); + } + # Run swap_activity measuring. + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + my $file = $self->{'swap_activity_file'}; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".swp"); + ::debug("swap", "\n", $swap_activity, "\n"); + qx{ ($swap_activity > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; + } + return $self->{'swap_activity'}; +} + +{ + my $script; + + sub swapactivityscript { + # Returns: + # shellscript for detecting swap activity + # + # arguments for vmstat are OS dependant + # swap_in and swap_out are in different columns depending on OS + # + if(not $script) { + my %vmstat = ( + # linux: $7*$8 + # $ vmstat 1 2 + # procs -----------memory---------- ---swap-- -----io---- -system-- ----cpu---- + # r b swpd free buff cache si so bi bo in cs us sy id wa + # 5 0 51208 1701096 198012 18857888 0 0 37 153 28 19 56 11 33 1 + # 3 0 51208 1701288 198012 18857972 0 0 0 0 3638 10412 15 3 82 0 + 'linux' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # solaris: $6*$7 + # $ vmstat -S 1 2 + # kthr memory page disk faults cpu + # r b w swap free si so pi po fr de sr s3 s4 -- -- in sy cs us sy id + # 0 0 0 4628952 3208408 0 0 3 1 1 0 0 -0 2 0 0 263 613 246 1 2 97 + # 0 0 0 4552504 3166360 0 0 0 0 0 0 0 0 0 0 0 246 213 240 1 1 98 + 'solaris' => ['vmstat -S 1 2 | tail -1', '$6*$7'], + + # darwin (macosx): $21*$22 + # $ vm_stat -c 2 1 + # Mach Virtual Memory Statistics: (page size of 4096 bytes) + # free active specul inactive throttle wired prgable faults copy 0fill reactive purged file-backed anonymous cmprssed cmprssor dcomprs comprs pageins pageout swapins swapouts + # 346306 829050 74871 606027 0 240231 90367 544858K 62343596 270837K 14178 415070 570102 939846 356 370 116 922 4019813 4 0 0 + # 345740 830383 74875 606031 0 239234 90369 2696 359 553 0 0 570110 941179 356 370 0 0 0 0 0 0 + 'darwin' => ['vm_stat -c 2 1 | tail -n1', '$21*$22'], + + # ultrix: $12*$13 + # $ vmstat -S 1 2 + # procs faults cpu memory page disk + # r b w in sy cs us sy id avm fre si so pi po fr de sr s0 + # 1 0 0 4 23 2 3 0 97 7743 217k 0 0 0 0 0 0 0 0 + # 1 0 0 6 40 8 0 1 99 7743 217k 0 0 3 0 0 0 0 0 + 'ultrix' => ['vmstat -S 1 2 | tail -1', '$12*$13'], + + # aix: $6*$7 + # $ vmstat 1 2 + # System configuration: lcpu=1 mem=2048MB + # + # kthr memory page faults cpu + # ----- ----------- ------------------------ ------------ ----------- + # r b avm fre re pi po fr sr cy in sy cs us sy id wa + # 0 0 333933 241803 0 0 0 0 0 0 10 143 90 0 0 99 0 + # 0 0 334125 241569 0 0 0 0 0 0 37 5368 184 0 9 86 5 + 'aix' => ['vmstat 1 2 | tail -n1', '$6*$7'], + + # freebsd: $8*$9 + # $ vmstat -H 1 2 + # procs memory page disks faults cpu + # r b w avm fre flt re pi po fr sr ad0 ad1 in sy cs us sy id + # 1 0 0 596716 19560 32 0 0 0 33 8 0 0 11 220 277 0 0 99 + # 0 0 0 596716 19560 2 0 0 0 0 0 0 0 11 144 263 0 1 99 + 'freebsd' => ['vmstat -H 1 2 | tail -n1', '$8*$9'], + + # mirbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 cd0 int sys cs us sy id + # 0 0 0 25776 164968 34 0 0 0 0 0 0 0 230 259 38 4 0 96 + # 0 0 0 25776 164968 24 0 0 0 0 0 0 0 237 275 37 0 0 100 + 'mirbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # netbsd: $7*$8 + # $ vmstat 1 2 + # procs memory page disks faults cpu + # r b avm fre flt re pi po fr sr w0 w1 in sy cs us sy id + # 0 0 138452 6012 54 0 0 0 1 2 3 0 4 100 23 0 0 100 + # 0 0 138456 6008 1 0 0 0 0 0 0 0 7 26 19 0 0 100 + 'netbsd' => ['vmstat 1 2 | tail -n1', '$7*$8'], + + # openbsd: $8*$9 + # $ vmstat 1 2 + # procs memory page disks traps cpu + # r b w avm fre flt re pi po fr sr wd0 wd1 int sys cs us sy id + # 0 0 0 76596 109944 73 0 0 0 0 0 0 1 5 259 22 0 1 99 + # 0 0 0 76604 109936 24 0 0 0 0 0 0 0 7 114 20 0 1 99 + 'openbsd' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # hpux: $8*$9 + # $ vmstat 1 2 + # procs memory page faults cpu + # r b w avm free re at pi po fr de sr in sy cs us sy id + # 1 0 0 247211 216476 4 1 0 0 0 0 0 102 73005 54 6 11 83 + # 1 0 0 247211 216421 43 9 0 0 0 0 0 144 1675 96 25269512791222387000 25269512791222387000 105 + 'hpux' => ['vmstat 1 2 | tail -n1', '$8*$9'], + + # dec_osf (tru64): $11*$12 + # $ vmstat 1 2 + # Virtual Memory Statistics: (pagesize = 8192) + # procs memory pages intr cpu + # r w u act free wire fault cow zero react pin pout in sy cs us sy id + # 3 181 36 51K 1895 8696 348M 59M 122M 259 79M 0 5 218 302 4 1 94 + # 3 181 36 51K 1893 8696 3 15 21 0 28 0 4 81 321 1 1 98 + 'dec_osf' => ['vmstat 1 2 | tail -n1', '$11*$12'], + + # gnu (hurd): $7*$8 + # $ vmstat -k 1 2 + # (pagesize: 4, size: 512288, swap size: 894972) + # free actv inact wired zeroed react pgins pgouts pfaults cowpfs hrat caobj cache swfree + # 371940 30844 89228 20276 298348 0 48192 19016 756105 99808 98% 876 20628 894972 + # 371940 30844 89228 20276 +0 +0 +0 +0 +42 +2 98% 876 20628 894972 + 'gnu' => ['vmstat -k 1 2 | tail -n1', '$7*$8'], + + # -nto (qnx has no swap) + #-irix + #-svr5 (scosysv) + ); + my $perlscript = ""; + # Make a perl script that detects the OS ($^O) and runs + # the appropriate vmstat command + for my $os (keys %vmstat) { + $vmstat{$os}[1] =~ s/\$/\\\\\\\$/g; # $ => \\\$ + $perlscript .= 'if($^O eq "'.$os.'") { print `'.$vmstat{$os}[0].' | awk "{print ' . + $vmstat{$os}[1] . '}"` }'; + } + $perlscript = "perl -e " . ::shell_quote_scalar($perlscript); + $script = $Global::envvar. " " .$perlscript; + } + return $script; + } +} + +sub too_fast_remote_login { + my $self = shift; + if($self->{'last_login_at'} and $self->{'time_to_login'}) { + # sshd normally allows 10 simultaneous logins + # A login takes time_to_login + # So time_to_login/5 should be safe + # If now <= last_login + time_to_login/5: Then it is too soon. + my $too_fast = (::now() <= $self->{'last_login_at'} + + $self->{'time_to_login'}/5); + ::debug("run", "Too fast? $too_fast "); + return $too_fast; + } else { + # No logins so far (or time_to_login not computed): it is not too fast + return 0; + } +} + +sub last_login_at { + my $self = shift; + return $self->{'last_login_at'}; +} + +sub set_last_login_at { + my $self = shift; + $self->{'last_login_at'} = shift; +} + +sub loadavg_too_high { + my $self = shift; + my $loadavg = $self->loadavg(); + return (not defined $loadavg or + $loadavg > $self->max_loadavg()); +} + +{ + my $cmd; + sub loadavg_cmd { + if(not $cmd) { + # aix => "ps -ae -o state,command" # state wrong + # bsd => "ps ax -o state,command" + # sysv => "ps -ef -o s -o comm" + # cygwin => perl -ne 'close STDERR; /Name/ and print"\n"; \ + # /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + # awk '{print $2,$1}' + # dec_osf => bsd + # dragonfly => bsd + # freebsd => bsd + # gnu => bsd + # hpux => ps -el|awk '{print $2,$14,$15}' + # irix => ps -ef -o state -o comm + # linux => bsd + # minix => ps el|awk '{print \$1,\$11}' + # mirbsd => bsd + # netbsd => bsd + # openbsd => bsd + # solaris => sysv + # svr5 => sysv + # ultrix => ps -ax | awk '{print $3,$5}' + # unixware => ps -el|awk '{print $2,$14,$15}' + my $ps = q{ + $sysv="ps -ef -o s -o comm"; + $sysv2="ps -ef -o state -o comm"; + $bsd="ps ax -o state,command"; + $psel="ps -el|awk '{ print \$2,\$14,\$15 }'"; + $cygwin=q{ perl -ne 'close STDERR; /Name/ and print"\n"; + /(Name|Pid|Ppid|State):\s+(\S+)/ and print "$2\t";' /proc/*/status | + awk '{print $2,$1}' }; + $dummy="echo S COMMAND;echo R dummy"; + %ps=( + 'aix' => "uptime", + 'cygwin' => $cygwin, + 'darwin' => $bsd, + 'dec_osf' => $sysv2, + 'dragonfly' => $bsd, + 'freebsd' => $bsd, + 'gnu' => $bsd, + 'hpux' => $psel, + 'irix' => $sysv2, + 'linux' => $bsd, + 'minix' => "ps el|awk '{print \$1,\$11}'", + 'mirbsd' => $bsd, + 'msys' => $sysv, + 'MSWin32' => $sysv, + 'netbsd' => $bsd, + 'nto' => $dummy, + 'openbsd' => $bsd, + 'solaris' => $sysv, + 'svr5' => $psel, + 'ultrix' => "ps -ax | awk '{print \$3,\$5}'", + ); + print `$ps{$^O}`; + }; + $ps =~ s/[ \t\n]+/ /g; + $cmd = "perl -e ".::shell_quote_scalar($ps); + } + return $cmd; + } +} + + +sub loadavg { + # If the currently know loadavg is too old: + # Recompute a new one in the background + # The load average is computed as the number of processes waiting for disk + # or CPU right now. So it is the server load this instant and not averaged over + # several minutes. This is needed so GNU Parallel will at most start one job + # that will push the load over the limit. + # + # Returns: + # $last_loadavg = last load average computed (undef if none) + my $self = shift; + # Should we update the loadavg file? + my $update_loadavg_file = 0; + if(open(my $load_fh, "<", $self->{'loadavg_file'})) { + local $/ = undef; + my $load_out = <$load_fh>; + close $load_fh; + # Count lines starting with D,O,R but command does not start with [ + my $load =()= ($load_out=~/(^\s?[DOR]\S* +(?=[^\[])\S)/gm); + if($load > 0) { + # load is overestimated by 1 + $self->{'loadavg'} = $load - 1; + ::debug("load", "New loadavg: ", $self->{'loadavg'},"\n"); + } elsif ($load_out=~/average: (\d+.\d+)/) { + # AIX does not support instant load average + # 04:11AM up 21 days, 12:55, 1 user, load average: 1.85, 1.57, 1.55 + $self->{'loadavg'} = $1; + } else { + ::die_bug("loadavg_invalid_content: " . + $self->{'loadavg_file'} . "\n$load_out"); + } + # Because of instant load average, it should not be delayed 10 secs + # The instant load does not give 2 R if there is only 1 cpu. +# ::debug("load", "Last update: ", $self->{'last_loadavg_update'}); +# if(time - $self->{'last_loadavg_update'} > 10) { +# # last loadavg was started 10 seconds ago +# ::debug("load", time - $self->{'last_loadavg_update'}, " secs old: ", +# $self->{'loadavg_file'}); + $update_loadavg_file = 1; +# } + } else { + ::debug("load", "No loadavg file: ", $self->{'loadavg_file'}); + $self->{'loadavg'} = undef; + $update_loadavg_file = 1; + } + if($update_loadavg_file) { + ::debug("load", "Updating loadavg file", $self->{'loadavg_file'}, "\n"); + $self->{'last_loadavg_update'} = time; + -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; + -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; + my $cmd = ""; + if($self->{'string'} ne ":") { + $cmd = $self->sshcommand() . " " . $self->serverlogin() . " " . + ::shell_quote_scalar(loadavg_cmd()); + } else { + $cmd .= loadavg_cmd(); + } + # As the command can take long to run if run remote + # save it to a tmp file before moving it to the correct file + ::debug("load", "Cmd: ", $cmd); + my $file = $self->{'loadavg_file'}; + my ($dummy_fh, $tmpfile) = ::tmpfile(SUFFIX => ".loa"); + qx{ ($cmd > $tmpfile && mv $tmpfile $file || rm $tmpfile) & }; + } + return $self->{'loadavg'}; +} + +sub max_loadavg { + my $self = shift; + # If --load is a file it might be changed + if($Global::max_load_file) { + my $mtime = (stat($Global::max_load_file))[9]; + if($mtime > $Global::max_load_file_last_mod) { + $Global::max_load_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_loadavg(undef); + } + } + } + if(not defined $self->{'max_loadavg'}) { + $self->{'max_loadavg'} = + $self->compute_max_loadavg($opt::load); + } + ::debug("load", "max_loadavg: ", $self->string(), " ", $self->{'max_loadavg'}); + return $self->{'max_loadavg'}; +} + +sub set_max_loadavg { + my $self = shift; + $self->{'max_loadavg'} = shift; +} + +sub compute_max_loadavg { + # Parse the max loadaverage that the user asked for using --load + # Returns: + # max loadaverage + my $self = shift; + my $loadspec = shift; + my $load; + if(defined $loadspec) { + if($loadspec =~ /^\+(\d+)$/) { + # E.g. --load +2 + my $j = $1; + $load = + $self->ncpus() + $j; + } elsif ($loadspec =~ /^-(\d+)$/) { + # E.g. --load -2 + my $j = $1; + $load = + $self->ncpus() - $j; + } elsif ($loadspec =~ /^(\d+)\%$/) { + my $j = $1; + $load = + $self->ncpus() * $j / 100; + } elsif ($loadspec =~ /^(\d+(\.\d+)?)$/) { + $load = $1; + } elsif (-f $loadspec) { + $Global::max_load_file = $loadspec; + $Global::max_load_file_last_mod = (stat($Global::max_load_file))[9]; + if(open(my $in_fh, "<", $Global::max_load_file)) { + my $opt_load_file = join("",<$in_fh>); + close $in_fh; + $load = $self->compute_max_loadavg($opt_load_file); + } else { + ::error("Cannot open $loadspec.\n"); + ::wait_and_exit(255); + } + } else { + ::error("Parsing of --load failed.\n"); + ::die_usage(); + } + if($load < 0.01) { + $load = 0.01; + } + } + return $load; +} + +sub time_to_login { + my $self = shift; + return $self->{'time_to_login'}; +} + +sub set_time_to_login { + my $self = shift; + $self->{'time_to_login'} = shift; +} + +sub max_jobs_running { + my $self = shift; + if(not defined $self->{'max_jobs_running'}) { + my $nproc = $self->compute_number_of_processes($opt::jobs); + $self->set_max_jobs_running($nproc); + } + return $self->{'max_jobs_running'}; +} + +sub orig_max_jobs_running { + my $self = shift; + return $self->{'orig_max_jobs_running'}; +} + +sub compute_number_of_processes { + # Number of processes wanted and limited by system resources + # Returns: + # Number of processes + my $self = shift; + my $opt_P = shift; + my $wanted_processes = $self->user_requested_processes($opt_P); + if(not defined $wanted_processes) { + $wanted_processes = $Global::default_simultaneous_sshlogins; + } + ::debug("load", "Wanted procs: $wanted_processes\n"); + my $system_limit = + $self->processes_available_by_system_limit($wanted_processes); + ::debug("load", "Limited to procs: $system_limit\n"); + return $system_limit; +} + +{ + my @children; + my $max_system_proc_reached; + my $more_filehandles; + my %fh; + my $tmpfhname; + my $count_jobs_already_read; + my @jobs; + my $job; + my @args; + my $arg; + + sub reserve_filehandles { + # Reserves filehandle + my $n = shift; + for (1..$n) { + $more_filehandles &&= open($fh{$tmpfhname++}, "<", "/dev/null"); + } + } + + sub reserve_process { + # Spawn a dummy process + my $child; + if($child = fork()) { + push @children, $child; + $Global::unkilled_children{$child} = 1; + } elsif(defined $child) { + # This is the child + # The child takes one process slot + # It will be killed later + $SIG{TERM} = $Global::original_sig{TERM}; + sleep 10000000; + exit(0); + } else { + # Failed to spawn + $max_system_proc_reached = 1; + } + } + + sub get_args_or_jobs { + # Get an arg or a job (depending on mode) + if($Global::semaphore or $opt::pipe) { + # Skip: No need to get args + return 1; + } elsif(defined $opt::retries and $count_jobs_already_read) { + # For retries we may need to run all jobs on this sshlogin + # so include the already read jobs for this sshlogin + $count_jobs_already_read--; + return 1; + } else { + if($opt::X or $opt::m) { + # The arguments may have to be re-spread over several jobslots + # So pessimistically only read one arg per jobslot + # instead of a full commandline + if($Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->empty()) { + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + push(@jobs, $job); + return 1; + } + } else { + $arg = $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->get(); + push(@args, $arg); + return 1; + } + } else { + # If there are no more command lines, then we have a process + # per command line, so no need to go further + if($Global::JobQueue->empty()) { + return 0; + } else { + $job = $Global::JobQueue->get(); + push(@jobs, $job); + return 1; + } + } + } + } + + sub cleanup { + # Cleanup: Close the files + for (values %fh) { close $_ } + # Cleanup: Kill the children + for my $pid (@children) { + kill 9, $pid; + waitpid($pid,0); + delete $Global::unkilled_children{$pid}; + } + # Cleanup: Unget the command_lines or the @args + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}->unget(@args); + $Global::JobQueue->unget(@jobs); + @jobs = undef; + } + + sub processes_available_by_system_limit { + # If the wanted number of processes is bigger than the system limits: + # Limit them to the system limits + # Limits are: File handles, number of input lines, processes, + # and taking > 1 second to spawn 10 extra processes + # Returns: + # Number of processes + my $self = shift; + my $wanted_processes = shift; + my $system_limit = 0; + my $slow_spawining_warning_printed = 0; + my $time = time; + $more_filehandles = 1; + $tmpfhname = "TmpFhNamE"; + + # perl uses 7 filehandles for something? + # parallel uses 1 for memory_usage + # parallel uses 4 for ? + reserve_filehandles(12); + # Two processes for load avg and ? + reserve_process(); + reserve_process(); + + # For --retries count also jobs already run + $count_jobs_already_read = $Global::JobQueue->next_seq(); + my $wait_time_for_getting_args = 0; + my $start_time = time; + while(1) { + $system_limit >= $wanted_processes and last; + not $more_filehandles and last; + $max_system_proc_reached and last; + + my $before_getting_arg = time; + get_args_or_jobs() or last; + $wait_time_for_getting_args += time - $before_getting_arg; + $system_limit++; + + # Every simultaneous process uses 2 filehandles to write to + # and 2 filehandles to read from + reserve_filehandles(4); + + # System process limit + reserve_process(); + + my $forktime = time - $time - $wait_time_for_getting_args; + ::debug("run", "Time to fork $system_limit procs: $wait_time_for_getting_args ", + $forktime, + " (processes so far: ", $system_limit,")\n"); + if($system_limit > 10 and + $forktime > 1 and + $forktime > $system_limit * 0.01 + and not $slow_spawining_warning_printed) { + # It took more than 0.01 second to fork a processes on avg. + # Give the user a warning. He can press Ctrl-C if this + # sucks. + ::warning("Starting $system_limit processes took > $forktime sec.\n", + "Consider adjusting -j. Press CTRL-C to stop.\n"); + $slow_spawining_warning_printed = 1; + } + } + cleanup(); + + if($system_limit < $wanted_processes) { + # The system_limit is less than the wanted_processes + if($system_limit < 1 and not $Global::JobQueue->empty()) { + ::warning("Cannot spawn any jobs. Raising ulimit -u or /etc/security/limits.conf\n", + "or /proc/sys/kernel/pid_max may help.\n"); + ::wait_and_exit(255); + } + if(not $more_filehandles) { + ::warning("Only enough file handles to run ", $system_limit, " jobs in parallel.\n", + "Running 'parallel -j0 -N", $system_limit, " --pipe parallel -j0' or ", + "raising ulimit -n or /etc/security/limits.conf may help.\n"); + } + if($max_system_proc_reached) { + ::warning("Only enough available processes to run ", $system_limit, + " jobs in parallel. Raising ulimit -u or /etc/security/limits.conf\n", + "or /proc/sys/kernel/pid_max may help.\n"); + } + } + if($] == 5.008008 and $system_limit > 1000) { + # https://savannah.gnu.org/bugs/?36942 + $system_limit = 1000; + } + if($Global::JobQueue->empty()) { + $system_limit ||= 1; + } + if($self->string() ne ":" and + $system_limit > $Global::default_simultaneous_sshlogins) { + $system_limit = + $self->simultaneous_sshlogin_limit($system_limit); + } + return $system_limit; + } +} + +sub simultaneous_sshlogin_limit { + # Test by logging in wanted number of times simultaneously + # Returns: + # min($wanted_processes,$working_simultaneous_ssh_logins-1) + my $self = shift; + my $wanted_processes = shift; + if($self->{'time_to_login'}) { + return $wanted_processes; + } + + # Try twice because it guesses wrong sometimes + # Choose the minimal + my $ssh_limit = + ::min($self->simultaneous_sshlogin($wanted_processes), + $self->simultaneous_sshlogin($wanted_processes)); + if($ssh_limit < $wanted_processes) { + my $serverlogin = $self->serverlogin(); + ::warning("ssh to $serverlogin only allows ", + "for $ssh_limit simultaneous logins.\n", + "You may raise this by changing ", + "/etc/ssh/sshd_config:MaxStartups and MaxSessions on $serverlogin.\n", + "Using only ",$ssh_limit-1," connections ", + "to avoid race conditions.\n"); + } + # Race condition can cause problem if using all sshs. + if($ssh_limit > 1) { $ssh_limit -= 1; } + return $ssh_limit; +} + +sub simultaneous_sshlogin { + # Using $sshlogin try to see if we can do $wanted_processes + # simultaneous logins + # (ssh host echo simultaneouslogin & ssh host echo simultaneouslogin & ...)|grep simul|wc -l + # Returns: + # Number of succesful logins + my $self = shift; + my $wanted_processes = shift; + my $sshcmd = $self->sshcommand(); + my $serverlogin = $self->serverlogin(); + my $sshdelay = $opt::sshdelay ? "sleep $opt::sshdelay;" : ""; + # TODO sh -c wrapper to work for csh + my $cmd = "$sshdelay$sshcmd $serverlogin echo simultaneouslogin &1 &"x$wanted_processes; + ::debug("init", "Trying $wanted_processes logins at $serverlogin\n"); + open (my $simul_fh, "-|", "($cmd)|grep simultaneouslogin | wc -l") or + ::die_bug("simultaneouslogin"); + my $ssh_limit = <$simul_fh>; + close $simul_fh; + chomp $ssh_limit; + return $ssh_limit; +} + +sub set_ncpus { + my $self = shift; + $self->{'ncpus'} = shift; +} + +sub user_requested_processes { + # Parse the number of processes that the user asked for using -j + # Returns: + # the number of processes to run on this sshlogin + my $self = shift; + my $opt_P = shift; + my $processes; + if(defined $opt_P) { + if($opt_P =~ /^\+(\d+)$/) { + # E.g. -P +2 + my $j = $1; + $processes = + $self->ncpus() + $j; + } elsif ($opt_P =~ /^-(\d+)$/) { + # E.g. -P -2 + my $j = $1; + $processes = + $self->ncpus() - $j; + } elsif ($opt_P =~ /^(\d+(\.\d+)?)\%$/) { + # E.g. -P 10.5% + my $j = $1; + $processes = + $self->ncpus() * $j / 100; + } elsif ($opt_P =~ /^(\d+)$/) { + $processes = $1; + if($processes == 0) { + # -P 0 = infinity (or at least close) + $processes = $Global::infinity; + } + } elsif (-f $opt_P) { + $Global::max_procs_file = $opt_P; + if(open(my $in_fh, "<", $Global::max_procs_file)) { + my $opt_P_file = join("",<$in_fh>); + close $in_fh; + $processes = $self->user_requested_processes($opt_P_file); + } else { + ::error("Cannot open $opt_P.\n"); + ::wait_and_exit(255); + } + } else { + ::error("Parsing of --jobs/-j/--max-procs/-P failed.\n"); + ::die_usage(); + } + $processes = ::ceil($processes); + } + return $processes; +} + +sub ncpus { + my $self = shift; + if(not defined $self->{'ncpus'}) { + my $sshcmd = $self->sshcommand(); + my $serverlogin = $self->serverlogin(); + if($serverlogin eq ":") { + if($opt::use_cpus_instead_of_cores) { + $self->{'ncpus'} = no_of_cpus(); + } else { + $self->{'ncpus'} = no_of_cores(); + } + } else { + my $ncpu; + my $sqe = ::shell_quote_scalar($Global::envvar); + if($opt::use_cpus_instead_of_cores) { + $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cpus); + } else { + ::debug("init",qq(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores\n)); + $ncpu = qx(echo|$sshcmd $serverlogin $sqe parallel --number-of-cores); + } + chomp $ncpu; + if($ncpu =~ /^\s*[0-9]+\s*$/s) { + $self->{'ncpus'} = $ncpu; + } else { + ::warning("Could not figure out ", + "number of cpus on $serverlogin ($ncpu). Using 1.\n"); + $self->{'ncpus'} = 1; + } + } + } + return $self->{'ncpus'}; +} + +sub no_of_cpus { + # Returns: + # Number of physical CPUs + local $/ = "\n"; # If delimiter is set, then $/ will be wrong + my $no_of_cpus; + if ($^O eq 'linux') { + $no_of_cpus = no_of_cpus_gnu_linux() || no_of_cores_gnu_linux(); + } elsif ($^O eq 'freebsd') { + $no_of_cpus = no_of_cpus_freebsd(); + } elsif ($^O eq 'netbsd') { + $no_of_cpus = no_of_cpus_netbsd(); + } elsif ($^O eq 'openbsd') { + $no_of_cpus = no_of_cpus_openbsd(); + } elsif ($^O eq 'gnu') { + $no_of_cpus = no_of_cpus_hurd(); + } elsif ($^O eq 'darwin') { + $no_of_cpus = no_of_cpus_darwin(); + } elsif ($^O eq 'solaris') { + $no_of_cpus = no_of_cpus_solaris(); + } elsif ($^O eq 'aix') { + $no_of_cpus = no_of_cpus_aix(); + } elsif ($^O eq 'hpux') { + $no_of_cpus = no_of_cpus_hpux(); + } elsif ($^O eq 'nto') { + $no_of_cpus = no_of_cpus_qnx(); + } elsif ($^O eq 'svr5') { + $no_of_cpus = no_of_cpus_openserver(); + } elsif ($^O eq 'irix') { + $no_of_cpus = no_of_cpus_irix(); + } elsif ($^O eq 'dec_osf') { + $no_of_cpus = no_of_cpus_tru64(); + } else { + $no_of_cpus = (no_of_cpus_gnu_linux() + || no_of_cpus_freebsd() + || no_of_cpus_netbsd() + || no_of_cpus_openbsd() + || no_of_cpus_hurd() + || no_of_cpus_darwin() + || no_of_cpus_solaris() + || no_of_cpus_aix() + || no_of_cpus_hpux() + || no_of_cpus_qnx() + || no_of_cpus_openserver() + || no_of_cpus_irix() + || no_of_cpus_tru64() + # Number of cores is better than no guess for #CPUs + || nproc() + ); + } + if($no_of_cpus) { + chomp $no_of_cpus; + return $no_of_cpus; + } else { + ::warning("Cannot figure out number of cpus. Using 1.\n"); + return 1; + } +} + +sub no_of_cores { + # Returns: + # Number of CPU cores + local $/ = "\n"; # If delimiter is set, then $/ will be wrong + my $no_of_cores; + if ($^O eq 'linux') { + $no_of_cores = no_of_cores_gnu_linux(); + } elsif ($^O eq 'freebsd') { + $no_of_cores = no_of_cores_freebsd(); + } elsif ($^O eq 'netbsd') { + $no_of_cores = no_of_cores_netbsd(); + } elsif ($^O eq 'openbsd') { + $no_of_cores = no_of_cores_openbsd(); + } elsif ($^O eq 'gnu') { + $no_of_cores = no_of_cores_hurd(); + } elsif ($^O eq 'darwin') { + $no_of_cores = no_of_cores_darwin(); + } elsif ($^O eq 'solaris') { + $no_of_cores = no_of_cores_solaris(); + } elsif ($^O eq 'aix') { + $no_of_cores = no_of_cores_aix(); + } elsif ($^O eq 'hpux') { + $no_of_cores = no_of_cores_hpux(); + } elsif ($^O eq 'nto') { + $no_of_cores = no_of_cores_qnx(); + } elsif ($^O eq 'svr5') { + $no_of_cores = no_of_cores_openserver(); + } elsif ($^O eq 'irix') { + $no_of_cores = no_of_cores_irix(); + } elsif ($^O eq 'dec_osf') { + $no_of_cores = no_of_cores_tru64(); + } else { + $no_of_cores = (no_of_cores_gnu_linux() + || no_of_cores_freebsd() + || no_of_cores_netbsd() + || no_of_cores_openbsd() + || no_of_cores_hurd() + || no_of_cores_darwin() + || no_of_cores_solaris() + || no_of_cores_aix() + || no_of_cores_hpux() + || no_of_cores_qnx() + || no_of_cores_openserver() + || no_of_cores_irix() + || no_of_cores_tru64() + || nproc() + ); + } + if($no_of_cores) { + chomp $no_of_cores; + return $no_of_cores; + } else { + ::warning("Cannot figure out number of CPU cores. Using 1.\n"); + return 1; + } +} + +sub nproc { + # Returns: + # Number of cores using `nproc` + my $no_of_cores = qx{ sh -c 'nproc 2>/dev/null' }; + return $no_of_cores; +} + +sub no_of_cpus_gnu_linux { + # Returns: + # Number of physical CPUs on GNU/Linux + # undef if not GNU/Linux + my $no_of_cpus; + my $no_of_cores; + my $no_of_active_cores; + if(-e "/proc/cpuinfo") { + $no_of_cpus = 0; + $no_of_cores = 0; + my %seen; + if(open(my $in_fh, "<", "/proc/cpuinfo")) { + while(<$in_fh>) { + if(/^physical id.*[:](.*)/ and not $seen{$1}++) { + $no_of_cpus++; + } + /^processor.*[:]/i and $no_of_cores++; + } + close $in_fh; + } + } + if(-e "/proc/self/status") { + # if 'taskset' is used to limit number of cores + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $no_of_active_cores = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return (::min($no_of_cpus || $no_of_cores,$no_of_active_cores)); +} + +sub no_of_cores_gnu_linux { + # Returns: + # Number of CPU cores on GNU/Linux + # undef if not GNU/Linux + my $no_of_cores; + my $no_of_active_cores; + if(-e "/proc/cpuinfo") { + $no_of_cores = 0; + open(my $in_fh, "<", "/proc/cpuinfo") || return undef; + while(<$in_fh>) { + /^processor.*[:]/i and $no_of_cores++; + } + close $in_fh; + } + if(-e "/proc/self/status") { + # if 'taskset' is used to limit number of cores + if(open(my $in_fh, "<", "/proc/self/status")) { + while(<$in_fh>) { + if(/^Cpus_allowed:\s*(\S+)/) { + my $a = $1; + $a =~ tr/,//d; + $no_of_active_cores = unpack ("%32b*", pack ("H*",$a)); + } + } + close $in_fh; + } + } + return (::min($no_of_cores,$no_of_active_cores)); +} + +sub no_of_cpus_freebsd { + # Returns: + # Number of physical CPUs on FreeBSD + # undef if not FreeBSD + my $no_of_cpus = + (qx{ sh -c 'sysctl -a dev.cpu 2>/dev/null' | grep \%parent | awk '{ print \$2 }' | uniq | wc -l | awk '{ print \$1 }' } + or + qx{ sh -c 'sysctl hw.ncpu 2>/dev/null' | awk '{ print \$2 }' }); + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_freebsd { + # Returns: + # Number of CPU cores on FreeBSD + # undef if not FreeBSD + my $no_of_cores = + (qx{ sh -c 'sysctl hw.ncpu 2>/dev/null' | awk '{ print \$2 }' } + or + qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }); + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_netbsd { + # Returns: + # Number of physical CPUs on NetBSD + # undef if not NetBSD + my $no_of_cpus = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' }; + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_netbsd { + # Returns: + # Number of CPU cores on NetBSD + # undef if not NetBSD + my $no_of_cores = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' }; + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_openbsd { + # Returns: + # Number of physical CPUs on OpenBSD + # undef if not OpenBSD + my $no_of_cpus = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' }; + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_openbsd { + # Returns: + # Number of CPU cores on OpenBSD + # undef if not OpenBSD + my $no_of_cores = qx{ sh -c 'sysctl -n hw.ncpu 2>/dev/null' }; + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_hurd { + # Returns: + # Number of physical CPUs on HURD + # undef if not HURD + my $no_of_cpus = qx{ nproc }; + chomp $no_of_cpus; + return $no_of_cpus; +} + +sub no_of_cores_hurd { + # Returns: + # Number of physical CPUs on HURD + # undef if not HURD + my $no_of_cores = `nproc`; + chomp $no_of_cores; + return $no_of_cores; +} + +sub no_of_cpus_darwin { + # Returns: + # Number of physical CPUs on Mac Darwin + # undef if not Mac Darwin + my $no_of_cpus = + (qx{ sh -c 'sysctl -n hw.physicalcpu 2>/dev/null' } + or + qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]physicalcpu[^a-z] | awk '{ print \$2 }' }); + return $no_of_cpus; +} + +sub no_of_cores_darwin { + # Returns: + # Number of CPU cores on Mac Darwin + # undef if not Mac Darwin + my $no_of_cores = + (qx{ sh -c 'sysctl -n hw.logicalcpu 2>/dev/null' } + or + qx{ sh -c 'sysctl -a hw 2>/dev/null' | grep [^a-z]logicalcpu[^a-z] | awk '{ print \$2 }' }); + return $no_of_cores; +} + +sub no_of_cpus_solaris { + # Returns: + # Number of physical CPUs on Solaris + # undef if not Solaris + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = `/usr/sbin/psrinfo`; + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = qx{ /usr/sbin/prtconf | grep cpu..instance }; + if($#prtconf >= 0) { + return $#prtconf +1; + } + } + return undef; +} + +sub no_of_cores_solaris { + # Returns: + # Number of CPU cores on Solaris + # undef if not Solaris + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = `/usr/sbin/psrinfo`; + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + if(-x "/usr/sbin/prtconf") { + my @prtconf = qx{ /usr/sbin/prtconf | grep cpu..instance }; + if($#prtconf >= 0) { + return $#prtconf +1; + } + } + return undef; +} + +sub no_of_cpus_aix { + # Returns: + # Number of physical CPUs on AIX + # undef if not AIX + my $no_of_cpus = 0; + if(-x "/usr/sbin/lscfg") { + open(my $in_fh, "-|", "/usr/sbin/lscfg -vs |grep proc | wc -l|tr -d ' '") + || return undef; + $no_of_cpus = <$in_fh>; + chomp ($no_of_cpus); + close $in_fh; + } + return $no_of_cpus; +} + +sub no_of_cores_aix { + # Returns: + # Number of CPU cores on AIX + # undef if not AIX + my $no_of_cores; + if(-x "/usr/bin/vmstat") { + open(my $in_fh, "-|", "/usr/bin/vmstat 1 1") || return undef; + while(<$in_fh>) { + /lcpu=([0-9]*) / and $no_of_cores = $1; + } + close $in_fh; + } + return $no_of_cores; +} + +sub no_of_cpus_hpux { + # Returns: + # Number of physical CPUs on HP-UX + # undef if not HP-UX + my $no_of_cpus = + qx{ sh -c '/usr/bin/mpsched -s 2>&1' | grep 'Locality Domain Count' | awk '{ print \$4 }'}; + return $no_of_cpus; +} + +sub no_of_cores_hpux { + # Returns: + # Number of CPU cores on HP-UX + # undef if not HP-UX + my $no_of_cores = + qx{ sh -c '/usr/bin/mpsched -s 2>&1' | perl -ne '/Processor Count\\D+(\\d+)/ and print "\$1\n"'}; + return $no_of_cores; +} + +sub no_of_cpus_qnx { + # Returns: + # Number of physical CPUs on QNX + # undef if not QNX + # BUG: It is not known how to calculate this. + my $no_of_cpus = 0; + return $no_of_cpus; +} + +sub no_of_cores_qnx { + # Returns: + # Number of CPU cores on QNX + # undef if not QNX + # BUG: It is not known how to calculate this. + my $no_of_cores = 0; + return $no_of_cores; +} + +sub no_of_cpus_openserver { + # Returns: + # Number of physical CPUs on SCO OpenServer + # undef if not SCO OpenServer + my $no_of_cpus = 0; + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = `/usr/sbin/psrinfo`; + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + return $no_of_cpus; +} + +sub no_of_cores_openserver { + # Returns: + # Number of CPU cores on SCO OpenServer + # undef if not SCO OpenServer + my $no_of_cores = 0; + if(-x "/usr/sbin/psrinfo") { + my @psrinfo = `/usr/sbin/psrinfo`; + if($#psrinfo >= 0) { + return $#psrinfo +1; + } + } + return $no_of_cores; +} + +sub no_of_cpus_irix { + # Returns: + # Number of physical CPUs on IRIX + # undef if not IRIX + my $no_of_cpus = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; + return $no_of_cpus; +} + +sub no_of_cores_irix { + # Returns: + # Number of CPU cores on IRIX + # undef if not IRIX + my $no_of_cores = `hinv | grep HZ | grep Processor | awk '{print \$1}'`; + return $no_of_cores; +} + +sub no_of_cpus_tru64 { + # Returns: + # Number of physical CPUs on Tru64 + # undef if not Tru64 + my $no_of_cpus = `sizer -pr`; + return $no_of_cpus; +} + +sub no_of_cores_tru64 { + # Returns: + # Number of CPU cores on Tru64 + # undef if not Tru64 + my $no_of_cores = `sizer -pr`; + return $no_of_cores; +} + +sub sshcommand { + my $self = shift; + if (not defined $self->{'sshcommand'}) { + $self->sshcommand_of_sshlogin(); + } + return $self->{'sshcommand'}; +} + +sub serverlogin { + my $self = shift; + if (not defined $self->{'serverlogin'}) { + $self->sshcommand_of_sshlogin(); + } + return $self->{'serverlogin'}; +} + +sub sshcommand_of_sshlogin { + # 'server' -> ('ssh -S /tmp/parallel-ssh-RANDOM/host-','server') + # 'user@server' -> ('ssh','user@server') + # 'myssh user@server' -> ('myssh','user@server') + # 'myssh -l user server' -> ('myssh -l user','server') + # '/usr/bin/myssh -l user server' -> ('/usr/bin/myssh -l user','server') + # Returns: + # sshcommand - defaults to 'ssh' + # login@host + my $self = shift; + my ($sshcmd, $serverlogin); + if($self->{'string'} =~ /(.+) (\S+)$/) { + # Own ssh command + $sshcmd = $1; $serverlogin = $2; + } else { + # Normal ssh + if($opt::controlmaster) { + # Use control_path to make ssh faster + my $control_path = $self->control_path_dir()."/ssh-%r@%h:%p"; + $sshcmd = "ssh -S ".$control_path; + $serverlogin = $self->{'string'}; + if(not $self->{'control_path'}{$control_path}++) { + # Master is not running for this control_path + # Start it + my $pid = fork(); + if($pid) { + $Global::sshmaster{$pid} ||= 1; + } else { + $SIG{'TERM'} = undef; + # Ignore the 'foo' being printed + open(STDOUT,">","/dev/null"); + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # STDERR >/dev/null to ignore "process_mux_new_session: tcgetattr: Invalid argument" + open(STDERR,">","/dev/null"); + open(STDIN,"<","/dev/null"); + # Run a sleep that outputs data, so it will discover if the ssh connection closes. + my $sleep = ::shell_quote_scalar('$|=1;while(1){sleep 1;print "foo\n"}'); + my @master = ("ssh", "-tt", "-MTS", $control_path, $serverlogin, "perl", "-e", $sleep); + exec(@master); + } + } + } else { + $sshcmd = "ssh"; $serverlogin = $self->{'string'}; + } + } + $self->{'sshcommand'} = $sshcmd; + $self->{'serverlogin'} = $serverlogin; +} + +sub control_path_dir { + # Returns: + # path to directory + my $self = shift; + if(not defined $self->{'control_path_dir'}) { + -e $ENV{'HOME'}."/.parallel" or mkdir $ENV{'HOME'}."/.parallel"; + -e $ENV{'HOME'}."/.parallel/tmp" or mkdir $ENV{'HOME'}."/.parallel/tmp"; + $self->{'control_path_dir'} = + File::Temp::tempdir($ENV{'HOME'} + . "/.parallel/tmp/control_path_dir-XXXX", + CLEANUP => 1); + } + return $self->{'control_path_dir'}; +} + +sub rsync_transfer_cmd { + # Command to run to transfer a file + # Input: + # $file = filename of file to transfer + # $workdir = destination dir + # Returns: + # $cmd = rsync command to run to transfer $file ("" if unreadable) + my $self = shift; + my $file = shift; + my $workdir = shift; + if(not -r $file) { + ::warning($file, " is not readable and will not be transferred.\n"); + return "true"; + } + my $rsync_destdir; + if($file =~ m:^/:) { + # rsync /foo/bar / + $rsync_destdir = "/"; + } else { + $rsync_destdir = ::shell_quote_file($workdir); + } + $file = ::shell_quote_file($file); + my $sshcmd = $self->sshcommand(); + my $rsync_opt = "-rlDzR -e" . ::shell_quote_scalar($sshcmd); + my $serverlogin = $self->serverlogin(); + # Make dir if it does not exist + return "( $sshcmd $serverlogin mkdir -p $rsync_destdir;" . + rsync()." $rsync_opt $file $serverlogin:$rsync_destdir )"; +} + +sub cleanup_cmd { + # Command to run to remove the remote file + # Input: + # $file = filename to remove + # $workdir = destination dir + # Returns: + # $cmd = ssh command to run to remove $file and empty parent dirs + my $self = shift; + my $file = shift; + my $workdir = shift; + my $f = $file; + if($f =~ m:/\./:) { + # foo/bar/./baz/quux => workdir/baz/quux + # /foo/bar/./baz/quux => workdir/baz/quux + $f =~ s:.*/\./:$workdir/:; + } elsif($f =~ m:^[^/]:) { + # foo/bar => workdir/foo/bar + $f = $workdir."/".$f; + } + my @subdirs = split m:/:, ::dirname($f); + my @rmdir; + my $dir = ""; + for(@subdirs) { + $dir .= $_."/"; + unshift @rmdir, ::shell_quote_file($dir); + } + my $rmdir = @rmdir ? "sh -c 'rmdir @rmdir 2>/dev/null';" : ""; + if(defined $opt::workdir and $opt::workdir eq "...") { + $rmdir .= "rm -rf " . ::shell_quote_file($workdir).';'; + } + + $f = ::shell_quote_file($f); + my $sshcmd = $self->sshcommand(); + my $serverlogin = $self->serverlogin(); + return "$sshcmd $serverlogin ".::shell_quote_scalar("(rm -f $f; $rmdir)"); +} + +{ + my $rsync; + + sub rsync { + # rsync 3.1.x uses protocol 31 which is unsupported by 2.5.7. + # If the version >= 3.1.0: downgrade to protocol 30 + if(not $rsync) { + my @out = `rsync --version`; + for (@out) { + if(/version (\d+.\d+)(.\d+)?/) { + if($1 >= 3.1) { + # Version 3.1.0 or later: Downgrade to protocol 30 + $rsync = "rsync --protocol 30"; + } else { + $rsync = "rsync"; + } + } + } + $rsync or ::die_bug("Cannot figure out version of rsync: @out"); + } + return $rsync; + } +} + + +package JobQueue; + +sub new { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift; + my $max_number_of_args = shift; + my $return_files = shift; + my $commandlinequeue = CommandLineQueue->new + ($commandref, $read_from, $context_replace, $max_number_of_args, + $return_files); + my @unget = (); + return bless { + 'unget' => \@unget, + 'commandlinequeue' => $commandlinequeue, + 'total_jobs' => undef, + }, ref($class) || $class; +} + +sub get { + my $self = shift; + + if(@{$self->{'unget'}}) { + my $job = shift @{$self->{'unget'}}; + return ($job); + } else { + my $commandline = $self->{'commandlinequeue'}->get(); + if(defined $commandline) { + my $job = Job->new($commandline); + return $job; + } else { + return undef; + } + } +} + +sub unget { + my $self = shift; + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) + && $self->{'commandlinequeue'}->empty(); + ::debug("run", "JobQueue->empty $empty "); + return $empty; +} + +sub total_jobs { + my $self = shift; + if(not defined $self->{'total_jobs'}) { + my $job; + my @queue; + my $start = time; + while($job = $self->get()) { + if(time - $start > 10) { + ::warning("Reading ".scalar(@queue)." arguments took longer than 10 seconds.\n"); + $opt::eta && ::warning("Consider removing --eta.\n"); + $opt::bar && ::warning("Consider removing --bar.\n"); + $opt::shuf && ::warning("Consider removing --shuf.\n"); + last; + } + push @queue, $job; + } + while($job = $self->get()) { + push @queue, $job; + } + if($opt::shuf) { + my $i = @queue; + while (--$i) { + my $j = int rand($i+1); + @queue[$i,$j] = @queue[$j,$i]; + } + my $seq = 1; + for my $job (@queue) { + $job->{'commandline'}->set_seq($seq++); + } + } + $self->unget(@queue); + $self->{'total_jobs'} = $#queue+1; + } + return $self->{'total_jobs'}; +} + +sub next_seq { + my $self = shift; + + return $self->{'commandlinequeue'}->seq(); +} + +sub quote_args { + my $self = shift; + return $self->{'commandlinequeue'}->quote_args(); +} + + +package Job; + +sub new { + my $class = shift; + my $commandlineref = shift; + return bless { + 'commandline' => $commandlineref, # CommandLine object + 'workdir' => undef, # --workdir + # filehandle for stdin (used for --pipe) + # filename for writing stdout to (used for --files) + # remaining data not sent to stdin (used for --pipe) + # amount of data sent via stdin (used for --pipe) + 'transfersize' => 0, # size of files using --transfer + 'returnsize' => 0, # size of files using --return + 'pid' => undef, + # hash of { SSHLogins => number of times the command failed there } + 'failed' => undef, + 'sshlogin' => undef, + # The commandline wrapped with rsync and ssh + 'sshlogin_wrap' => undef, + 'exitstatus' => undef, + 'exitsignal' => undef, + # Timestamp for timeout if any + 'timeout' => undef, + 'virgin' => 1, + }, ref($class) || $class; +} + +sub replaced { + my $self = shift; + $self->{'commandline'} or ::die_bug("commandline empty"); + return $self->{'commandline'}->replaced(); +} + +sub seq { + my $self = shift; + return $self->{'commandline'}->seq(); +} + +sub set_seq { + my $self = shift; + return $self->{'commandline'}->set_seq(shift); +} + +sub slot { + my $self = shift; + return $self->{'commandline'}->slot(); +} + +{ + my($cattail); + + sub cattail { + # Returns: + # $cattail = perl program for: cattail "decompress program" writerpid [file_to_decompress or stdin] [file_to_unlink] + if(not $cattail) { + $cattail = q{ + # cat followed by tail (possibly with rm as soon at the file is opened) + # If $writerpid dead: finish after this round + use Fcntl; + $|=1; + + my ($comfile, $cmd, $writerpid, $read_file, $unlink_file) = @ARGV; + if($read_file) { + open(IN,"<",$read_file) || die("cattail: Cannot open $read_file"); + } else { + *IN = *STDIN; + } + while(! -s $comfile) { + # Writer has not opened the buffer file, so we cannot remove it yet + $sleep = ($sleep < 30) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + # The writer and we have both opened the file, so it is safe to unlink it + unlink $unlink_file; + unlink $comfile; + + my $first_round = 1; + my $flags; + fcntl(IN, F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= O_NONBLOCK; # Add non-blocking to the flags + fcntl(IN, F_SETFL, $flags) || die $!; # Set the flags on the filehandle + + while(1) { + # clear EOF + seek(IN,0,1); + my $writer_running = kill 0, $writerpid; + $read = sysread(IN,$buf,32768); + if($read) { + if($first_round) { + # Only start the command if there any input to process + $first_round = 0; + open(OUT,"|-",$cmd) || die("cattail: Cannot run $cmd"); + } + + # Blocking print + while($buf) { + my $bytes_written = syswrite(OUT,$buf); + # syswrite may be interrupted by SIGHUP + substr($buf,0,$bytes_written) = ""; + } + # Something printed: Wait less next time + $sleep /= 2; + } else { + if(eof(IN) and not $writer_running) { + # Writer dead: There will never be more to read => exit + exit; + } + # TODO This could probably be done more efficiently using select(2) + # Nothing read: Wait longer before next read + # Up to 100 milliseconds + $sleep = ($sleep < 100) ? ($sleep * 1.001 + 0.01) : ($sleep); + usleep($sleep); + } + } + + sub usleep { + # Sleep this many milliseconds. + my $secs = shift; + select(undef, undef, undef, $secs/1000); + } + }; + $cattail =~ s/#.*//mg; + $cattail =~ s/\s+/ /g; + } + return $cattail; + } +} + +sub openoutputfiles { + # Open files for STDOUT and STDERR + # Set file handles in $self->fh + my $self = shift; + my ($outfhw, $errfhw, $outname, $errname); + if($opt::results) { + my $args_as_dirname = $self->{'commandline'}->args_as_dirname(); + # Output in: prefix/name1/val1/name2/val2/stdout + my $dir = $opt::results."/".$args_as_dirname; + if(eval{ File::Path::mkpath($dir); }) { + # OK + } else { + # mkpath failed: Argument probably too long. + # Set $Global::max_file_length, which will keep the individual + # dir names shorter than the max length + max_file_name_length($opt::results); + $args_as_dirname = $self->{'commandline'}->args_as_dirname(); + # prefix/name1/val1/name2/val2/ + $dir = $opt::results."/".$args_as_dirname; + File::Path::mkpath($dir); + } + # prefix/name1/val1/name2/val2/stdout + $outname = "$dir/stdout"; + if(not open($outfhw, "+>", $outname)) { + ::error("Cannot write to `$outname'.\n"); + ::wait_and_exit(255); + } + # prefix/name1/val1/name2/val2/stderr + $errname = "$dir/stderr"; + if(not open($errfhw, "+>", $errname)) { + ::error("Cannot write to `$errname'.\n"); + ::wait_and_exit(255); + } + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",""); + } elsif(not $opt::ungroup) { + # To group we create temporary files for STDOUT and STDERR + # To avoid the cleanup unlink the files immediately (but keep them open) + if(@Global::tee_jobs) { + # files must be removed when the tee is done + } elsif($opt::files) { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + # --files => only remove stderr + $self->set_fh(1,"unlink",""); + $self->set_fh(2,"unlink",$errname); + } else { + ($outfhw, $outname) = ::tmpfile(SUFFIX => ".par"); + ($errfhw, $errname) = ::tmpfile(SUFFIX => ".par"); + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + } else { + # --ungroup + open($outfhw,">&",$Global::fd{1}) || die; + open($errfhw,">&",$Global::fd{2}) || die; + # File name must be empty as it will otherwise be printed + $outname = ""; + $errname = ""; + $self->set_fh(1,"unlink",$outname); + $self->set_fh(2,"unlink",$errname); + } + # Set writing FD + $self->set_fh(1,'w',$outfhw); + $self->set_fh(2,'w',$errfhw); + $self->set_fh(1,'name',$outname); + $self->set_fh(2,'name',$errname); + if($opt::compress) { + $self->filter_through_compress(); + } elsif(not $opt::ungroup) { + $self->grouped(); + } + if($opt::linebuffer) { + $self->set_non_blocking(); + } +} + +sub grouped { + my $self = shift; + # Set reading FD if using --group (--ungroup does not need) + for my $fdno (1,2) { + # Re-open the file for reading + # so fdw can be closed seperately + # and fdr can be seeked seperately (for --line-buffer) + open(my $fdr,"<", $self->fh($fdno,'name')) || + ::die_bug("fdr: Cannot open ".$self->fh($fdno,'name')); + $self->set_fh($fdno,'r',$fdr); + # Unlink if required + $Global::debug or unlink $self->fh($fdno,"unlink"); + } +} + +sub empty_input_wrapper { + # If no input: exit(0) + # If some input: Pass input as input to command on STDIN + # This avoids starting the command if there is no input. + # Input: + # $command = command to pipe data to + # Returns: + # $wrapped_command = the wrapped command + my $command = shift; + my $script = '$c="'.::perl_quote_scalar($command).'";'. + ::spacefree(0,q{ + if(sysread(STDIN, $buf, 1)) { + open($fh, "|-", $c) || die; + syswrite($fh, $buf); + while($read = sysread(STDIN, $buf, 32768)) { + syswrite($fh, $buf); + } + close $fh; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + } + }); + ::debug("run",'Empty wrap: perl -e '.::shell_quote_scalar($script)."\n"); + return 'perl -e '.::shell_quote_scalar($script); +} + +sub filter_through_compress { + my $self = shift; + # Send stdout to stdin for $opt::compress_program(1) + # Send stderr to stdin for $opt::compress_program(2) + # cattail get pid: $pid = $self->fh($fdno,'rpid'); + my $cattail = cattail(); + + for my $fdno (1,2) { + # Make a communication file. + my ($fh, $comfile) = ::tmpfile(SUFFIX => ".pac"); + close $fh; + # Compressor: (echo > $comfile; compress pipe) > output + # When the echo is written to $comfile, it is known that output file is opened, + # thus output file can then be removed by the decompressor. + my $wpid = open(my $fdw,"|-", "(echo > $comfile; ".empty_input_wrapper($opt::compress_program).") >". + $self->fh($fdno,'name')) || die $?; + $self->set_fh($fdno,'w',$fdw); + $self->set_fh($fdno,'wpid',$wpid); + # Decompressor: open output; -s $comfile > 0: rm $comfile output; decompress output > stdout + my $rpid = open(my $fdr, "-|", "perl", "-e", $cattail, $comfile, + $opt::decompress_program, $wpid, + $self->fh($fdno,'name'),$self->fh($fdno,'unlink')) || die $?; + $self->set_fh($fdno,'r',$fdr); + $self->set_fh($fdno,'rpid',$rpid); + } +} + +sub set_non_blocking { + my $self = shift; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + for my $fdno (1,2) { + my $fdr = $self->fh($fdno,'r'); + my $flags; + fcntl($fdr, &::F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags |= &::O_NONBLOCK; # Add non-blocking to the flags + fcntl($fdr, &::F_SETFL, $flags) || die $!; # Set the flags on the filehandle + } +} + +sub max_file_name_length { + # Figure out the max length of a subdir + # TODO and the max total length + # Ext4 = 255,130816 + my $testdir = shift; + + my $upper = 8_000_000; + my $len = 8; + my $dir = "x"x$len; + do { + rmdir($testdir."/".$dir); + $len *= 16; + $dir = "x"x$len; + } while ($len < $upper and mkdir $testdir."/".$dir); + # Then search for the actual max length between $len/16 and $len + my $min = $len/16; + my $max = $len; + while($max-$min > 5) { + # If we are within 5 chars of the exact value: + # it is not worth the extra time to find the exact value + my $test = int(($min+$max)/2); + $dir = "x"x$test; + if(mkdir $testdir."/".$dir) { + rmdir($testdir."/".$dir); + $min = $test; + } else { + $max = $test; + } + } + $Global::max_file_length = $min; + return $min; +} + +sub set_fh { + # Set file handle + my ($self, $fd_no, $key, $fh) = @_; + $self->{'fd'}{$fd_no,$key} = $fh; +} + +sub fh { + # Get file handle + my ($self, $fd_no, $key) = @_; + return $self->{'fd'}{$fd_no,$key}; +} + +sub write { + my $self = shift; + my $remaining_ref = shift; + my $stdin_fh = $self->fh(0,"w"); + + my $len = length $$remaining_ref; + # syswrite may not write all in one go, + # so make sure everything is written. + while($len) { + my $written = syswrite($stdin_fh,$$remaining_ref); + substr($$remaining_ref,0,$written) = ""; + $len -= $written; + } +} + +sub set_stdin_buffer { + # Copy stdin buffer from $block_ref up to $endpos + # Prepend with $header_ref + # Remove $recstart and $recend if needed + # Input: + # $header_ref = ref to $header to prepend + # $block_ref = ref to $block to pass on + # $endpos = length of $block to pass on + # $recstart = --recstart regexp + # $recend = --recend regexp + # Returns: + # N/A + my $self = shift; + my ($header_ref,$block_ref,$endpos,$recstart,$recend) = @_; + $self->{'stdin_buffer'} = ($self->virgin() ? $$header_ref : "").substr($$block_ref,0,$endpos); + if($opt::remove_rec_sep) { + remove_rec_sep(\$self->{'stdin_buffer'},$recstart,$recend); + } + $self->{'stdin_buffer_length'} = length $self->{'stdin_buffer'}; + $self->{'stdin_buffer_pos'} = 0; + $self->add_transfersize($self->{'stdin_buffer_length'}); +} + +sub stdin_buffer_length { + my $self = shift; + return $self->{'stdin_buffer_length'}; +} + +sub remove_rec_sep { + my ($block_ref,$recstart,$recend) = @_; + # Remove record separator + $$block_ref =~ s/$recend$recstart//gos; + $$block_ref =~ s/^$recstart//os; + $$block_ref =~ s/$recend$//os; +} + +sub non_block_write { + my $self = shift; + my $something_written = 0; + use POSIX qw(:errno_h); + # for loop used to avoid copying substr: $buf will be an alias for the substr + for my $buf (substr($self->{'stdin_buffer'},$self->{'stdin_buffer_pos'})) { + my $in = $self->fh(0,"w"); + my $rv = syswrite($in, $buf); + if (!defined($rv) && $! == EAGAIN) { + # would block + $something_written = 0; + } elsif ($self->{'stdin_buffer_pos'}+$rv != $self->{'stdin_buffer_length'}) { + # incomplete write + # Remove the written part + $self->{'stdin_buffer_pos'} += $rv; + $something_written = $rv; + } else { + # successfully wrote everything + my $a = ""; + $self->set_stdin_buffer(\$a,\$a,"",""); + $something_written = $rv; + } + } + + ::debug("pipe", "Non-block: ", $something_written); + return $something_written; +} + + +sub virgin { + my $self = shift; + return $self->{'virgin'}; +} + +sub set_virgin { + my $self = shift; + $self->{'virgin'} = shift; +} + +sub pid { + my $self = shift; + return $self->{'pid'}; +} + +sub set_pid { + my $self = shift; + $self->{'pid'} = shift; +} + +sub starttime { + # Returns: + # UNIX-timestamp this job started + my $self = shift; + return sprintf("%.3f",$self->{'starttime'}); +} + +sub set_starttime { + my $self = shift; + my $starttime = shift || ::now(); + $self->{'starttime'} = $starttime; +} + +sub runtime { + # Returns: + # Run time in seconds + my $self = shift; + return sprintf("%.3f",int(($self->endtime() - $self->starttime())*1000)/1000); +} + +sub endtime { + # Returns: + # UNIX-timestamp this job ended + # 0 if not ended yet + my $self = shift; + return ($self->{'endtime'} || 0); +} + +sub set_endtime { + my $self = shift; + my $endtime = shift; + $self->{'endtime'} = $endtime; +} + +sub timedout { + # Is the job timedout? + # Input: + # $delta_time = time that the job may run + # Returns: + # True or false + my $self = shift; + my $delta_time = shift; + return time > $self->{'starttime'} + $delta_time; +} + +sub kill { + # Kill the job. + # Send the signals to (grand)*children and pid. + # If no signals: TERM TERM KILL + # Wait 200 ms after each TERM. + # Input: + # @signals = signals to send + my $self = shift; + my @signals = @_; + my @family_pids = $self->family_pids(); + + # Record this jobs as failed + $self->set_exitstatus(-1); + # Send two TERMs to give time to clean up + ::debug("run", "Kill seq ", $self->seq(), " signal '@signals'\n"); + my @send_signals = @signals || ("TERM", "TERM", "KILL"); + for my $signal (@send_signals) { + my $alive = 0; + for my $pid (@family_pids) { + if(kill 0, $pid) { + # The job still running + kill $signal, $pid; + $alive = 1; + ::debug("run","$pid is alive\n"); + } + } + # If a signal was given as input, do not do the sleep below + @signals and next; + + if($signal eq "TERM" and $alive) { + # Wait up to 200 ms between TERMs - but only if any pids are alive + my $sleep = 1; + for (my $sleepsum = 0; kill 0, $family_pids[0] and $sleepsum < 200; + $sleepsum += $sleep) { + $sleep = ::reap_usleep($sleep); + } + } + } +} + +sub family_pids { + # Find the pids with this->pid as (grand)*parent + # Returns: + # @pids = pids of (grand)*children + my $self = shift; + my $pid = $self->pid(); + my @pids; + + my ($children_of_ref, $parent_of_ref, $name_of_ref) = ::pid_table(); + + my @more = ($pid); + # While more (grand)*children + while(@more) { + my @m; + push @pids, @more; + for my $parent (@more) { + if($children_of_ref->{$parent}) { + # add the children of this parent + push @m, @{$children_of_ref->{$parent}}; + } + } + @more = @m; + } + return (@pids); +} + +sub failed { + # return number of times failed for this $sshlogin + # Input: + # $sshlogin + # Returns: + # Number of times failed for $sshlogin + my $self = shift; + my $sshlogin = shift; + return $self->{'failed'}{$sshlogin}; +} + +sub failed_here { + # return number of times failed for the current $sshlogin + # Returns: + # Number of times failed for this sshlogin + my $self = shift; + return $self->{'failed'}{$self->sshlogin()}; +} + +sub add_failed { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + $self->{'failed'}{$sshlogin}++; +} + +sub add_failed_here { + # increase the number of times failed for the current $sshlogin + my $self = shift; + $self->{'failed'}{$self->sshlogin()}++; +} + +sub reset_failed { + # increase the number of times failed for this $sshlogin + my $self = shift; + my $sshlogin = shift; + delete $self->{'failed'}{$sshlogin}; +} + +sub reset_failed_here { + # increase the number of times failed for this $sshlogin + my $self = shift; + delete $self->{'failed'}{$self->sshlogin()}; +} + +sub min_failed { + # Returns: + # the number of sshlogins this command has failed on + # the minimal number of times this command has failed + my $self = shift; + my $min_failures = + ::min(map { $self->{'failed'}{$_} } keys %{$self->{'failed'}}); + my $number_of_sshlogins_failed_on = scalar keys %{$self->{'failed'}}; + return ($number_of_sshlogins_failed_on,$min_failures); +} + +sub total_failed { + # Returns: + # $total_failures = the number of times this command has failed + my $self = shift; + my $total_failures = 0; + for (values %{$self->{'failed'}}) { + $total_failures += $_; + } + return $total_failures; +} + +{ + my $script; + + sub postpone_exit_and_cleanup { + # Command to remove files and dirs (given as args) without + # affecting the exit value in $?/$status. + if(not $script) { + $script = "perl -e '". + ::spacefree(0,q{ + $bash=shift; + $csh=shift; + for(@ARGV){ + unlink; + rmdir; + } + if($bash=~s/h//) { + exit $bash; + } + exit $csh; + }). + "' ".'"$?h" "$status" '; + } + return $script + } +} + +sub wrapped { + # Wrap command with: + # * --shellquote + # * --nice + # * --cat + # * --fifo + # * --sshlogin + # * --pipepart (@Global::cat_partials) + # * --pipe + # * --tmux + # The ordering of the wrapping is important: + # * --nice/--cat/--fifo should be done on the remote machine + # * --pipepart/--pipe should be done on the local machine inside --tmux + # Uses: + # $Global::envvar + # $opt::shellquote + # $opt::nice + # $Global::shell + # $opt::cat + # $opt::fifo + # @Global::cat_partials + # $opt::pipe + # $opt::tmux + # Returns: + # $self->{'wrapped'} = the command wrapped with the above + my $self = shift; + if(not defined $self->{'wrapped'}) { + my $command = $self->replaced(); + if($opt::shellquote) { + # Prepend echo + # and quote twice + $command = "echo " . + ::shell_quote_scalar(::shell_quote_scalar($command)); + } + if($opt::nice) { + # Prepend \nice -n19 $SHELL -c + # and quote. + # The '\' before nice is needed to avoid tcsh's built-in + my $sshlogin = $self->sshlogin(); + my $serverlogin = $sshlogin->serverlogin(); + if($serverlogin eq ":") { + # Local use $Global::shell + $command = '\nice'. " -n". $opt::nice. " ". + $Global::shell. " -c ". + ::shell_quote_scalar($command); + } else { + # Remote systems use $SHELL + $command = '\nice'. " -n". $opt::nice. + ' $SHELL -c '. + ::shell_quote_scalar($command); + } + } + if($opt::cat) { +# Append 'unlink {} without affecting $?' + $command = + $self->{'commandline'}->replace_placeholders(["cat > \257<\257>; "], 0, 0). + $command.";". postpone_exit_and_cleanup(). + '$PARALLEL_TMP'; + } elsif($opt::fifo) { + # Prepend 'mkfifo {}; (' + # Append ') & _PID=$!; cat > {}; wait $_PID; ' + # (This makes it fail in csh, but give the correct exit code in bash) + # Append 'unlink {} without affecting $?' + # Set $ENV{PARALLEL_TMP} when starting a job + # Set $ENV{PARALLEL_TMP} in the remote wrapper + # mkfifo $PARALLEL_TMP; + # {} = $PARALLEL_TMP; + # (...) & + # cat > $PARALLEL_TMP; wait \$_PID; cleanup $PARALLEL_TMP + # perl -e 'open($fifo,">",shift); while(read){print FIFO};unlink $fifo;waitpid($pid,0);exit $?' $! $PARALLEL_FIFO + $command = + "mkfifo \$PARALLEL_TMP\n (". + $command.";". + ') & _PID=$!; cat > $PARALLEL_TMP; wait $_PID; '. + postpone_exit_and_cleanup(). + '$PARALLEL_TMP'; + } + # Wrap with ssh + tranferring of files + $command = $self->sshlogin_wrap($command); + if(@Global::cat_partials) { + # Prepend: + # < /tmp/foo perl -e 'while(@ARGV) { + # sysseek(STDIN,shift,0) || die; $left = shift; + # while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ + # $left -= $read; syswrite(STDOUT,$buf); + # } + # }' 0 0 0 11 | + $command = (shift @Global::cat_partials). " | ($command)"; + } elsif($opt::pipe) { + # Wrap with EOF-detector to avoid starting $command if EOF. + $command = empty_input_wrapper($command); + } + if($opt::tmux) { + # Wrap command with 'tmux' + $command = $self->tmux_wrap($command); + } + $self->{'wrapped'} = $command; + } + return $self->{'wrapped'}; +} + +sub set_sshlogin { + my $self = shift; + my $sshlogin = shift; + $self->{'sshlogin'} = $sshlogin; + delete $self->{'sshlogin_wrap'}; # If sshlogin is changed the wrap is wrong + delete $self->{'wrapped'}; +} + +sub sshlogin { + my $self = shift; + return $self->{'sshlogin'}; +} + +sub string_zip_base64 { + # Pipe string through 'bzip2 -9' and base64 encode it into 1000 + # byte blocks. + # 1000 bytes is the largest word size csh supports + # Input: + # @strings = to be encoded + # Returns: + # @base64 = 1000 byte block + my($zipin_fh, $zipout_fh,@base64); + ::open3($zipin_fh,$zipout_fh,">&STDERR","bzip2 -9"); + if(fork) { + close $zipin_fh; + $Global::use{"MIME::Base64"} ||= eval "use MIME::Base64; 1;"; + # Split base64 encoded into 1000 byte blocks + @base64 = unpack("(A1000)*",encode_base64((join"",<$zipout_fh>),"")); + close $zipout_fh; + } else { + close $zipout_fh; + print $zipin_fh @_; + close $zipin_fh; + exit; + } + ::debug("base64","Orig:@_\nAs base64:@base64\n"); + return @base64; +} + +sub base64_zip_eval { + # Script that: + # * reads base64 strings from @ARGV + # * decodes them + # * pipes through 'bzip2 -dc' + # * evals the result + # Reverse of string_zip_base64 + eval + # Will be wrapped in ' so single quote is forbidden + # Returns: + # $script = 1-liner for perl -e + my $script = ::spacefree(0,q{ + @GNU_Parallel=("use","IPC::Open3;","use","MIME::Base64"); + eval "@GNU_Parallel"; + + $SIG{CHLD}="IGNORE"; + # Search for bzip2. Not found => use default path + my $zip = (grep { -x $_ } "/usr/local/bin/bzip2")[0] || "bzip2"; + # $in = stdin on $zip, $out = stdout from $zip + my($in, $out,$eval); + open3($in,$out,">&STDERR",$zip,"-dc"); + if(my $perlpid = fork) { + close $in; + $eval = join "", <$out>; + close $out; + } else { + close $out; + # Pipe decoded base64 into 'bzip2 -dc' + print $in (decode_base64(join"",@ARGV)); + close $in; + exit; + } + wait; + eval $eval; + }); + ::debug("base64",$script,"\n"); + return $script; +} + +sub sshlogin_wrap { + # Wrap the command with the commands needed to run remotely + # Input: + # $command = command to run + # Returns: + # $self->{'sshlogin_wrap'} = command wrapped with ssh+transfer commands + sub monitor_parent_sshd_script { + # This script is to solve the problem of + # * not mixing STDERR and STDOUT + # * terminating with ctrl-c + # If its parent is ssh: all good + # If its parent is init(1): ssh died, so kill children + my $monitor_parent_sshd_script; + + if(not $monitor_parent_sshd_script) { + $monitor_parent_sshd_script = + # This will be packed in ', so only use " + ::spacefree(0,'$shell = "'.($ENV{'PARALLEL_SHELL'} || '$ENV{SHELL}').'";'. + '$tmpdir = "'.::perl_quote_scalar($ENV{'TMPDIR'}).'";'. + q{ + # Set $PARALLEL_TMP to a non-existent file name in $TMPDIR + do { + $ENV{PARALLEL_TMP} = $tmpdir."/par". + join"", map { (0..9,"a".."z","A".."Z")[rand(62)] } (1..5); + } while(-e $ENV{PARALLEL_TMP}); + $SIG{CHLD} = sub { $done = 1; }; + $pid = fork; + unless($pid) { + # Make own process group to be able to kill HUP it later + setpgrp; + exec $shell, "-c", ($bashfunc."@ARGV"); + die "exec: $!\n"; + } + do { + # Parent is not init (ppid=1), so sshd is alive + # Exponential sleep up to 1 sec + $s = $s < 1 ? 0.001 + $s * 1.03 : $s; + select(undef, undef, undef, $s); + } until ($done || getppid == 1); + # Kill HUP the process group if job not done + kill(SIGHUP, -${pid}) unless $done; + wait; + exit ($?&127 ? 128+($?&127) : 1+$?>>8) + }); + } + return $monitor_parent_sshd_script; + } + + sub vars_to_export { + # Uses: + # @opt::env + my @vars = ("parallel_bash_environment"); + for my $varstring (@opt::env) { + # Split up --env VAR1,VAR2 + push @vars, split /,/, $varstring; + } + for (@vars) { + if(-r $_ and not -d) { + # Read as environment definition bug #44041 + # TODO parse this + my $fh = ::open_or_exit($_); + $Global::envdef = join("",<$fh>); + close $fh; + } + } + if(grep { /^_$/ } @vars) { + # --env _ + # Include all vars that are not in a clean environment + if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) { + my @ignore = <$vars_fh>; + chomp @ignore; + my %ignore; + @ignore{@ignore} = @ignore; + close $vars_fh; + push @vars, grep { not defined $ignore{$_} } keys %ENV; + @vars = grep { not /^_$/ } @vars; + } else { + ::error("Run '$Global::progname --record-env' in a clean environment first.\n"); + ::wait_and_exit(255); + } + } + # Duplicate vars as BASH functions to include post-shellshock functions (v1+v2) + # So --env myfunc should also look for BASH_FUNC_myfunc() + push(@vars, "PARALLEL_PID", "PARALLEL_SEQ", + map { ("BASH_FUNC_$_()", "BASH_FUNC_$_%%") } @vars); + # Keep only defined variables + return grep { defined($ENV{$_}) } @vars; + } + + sub env_as_eval { + # Returns: + # $eval = '$ENV{"..."}=...; ...' + my @vars = vars_to_export(); + my $csh_friendly = not grep { /\n/ } @ENV{@vars}; + my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; + my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars; + # eval of @envset will set %ENV + my $envset = join"", map { + '$ENV{"'.::perl_quote_scalar($_).'"}="'. ::perl_quote_scalar($ENV{$_}).'";'; } @non_functions; + + # running @bashfunc on the command line, will set the functions + my @bashfunc = map { + my $v=$_; s/BASH_FUNC_(.*)(\(\)|%%)/$1/; "$_$ENV{$v};export -f $_ >/dev/null;" } @bash_functions; + # eval $bashfuncset will set $bashfunc + my $bashfuncset; + if(@bashfunc) { + # Functions are not supported for all shells + if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) { + ::warning("Shell functions may not be supported in $Global::shell\n"); + } + $bashfuncset = + '@bash_functions=qw('."@bash_functions".");". + ::spacefree(1,q{ + if($ENV{"SHELL"}=~/csh/) { + print STDERR "CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset @bash_functions\n"; + exec "false"; + } + }). + "\n".'$bashfunc = "'.::perl_quote_scalar("@bashfunc").'";'; + } else { + $bashfuncset = '$bashfunc = "";' + } + if($ENV{"parallel_bash_environment"}) { + $bashfuncset .= '$bashfunc .= "eval\ \"\$parallel_bash_environment\"\;";'; + } + ::debug("base64",$envset,$bashfuncset,"\n"); + return $csh_friendly,$envset,$bashfuncset; + } + + my $self = shift; + my $command = shift; + # TODO test that *sh -c 'parallel --env' use *sh + if(not defined $self->{'sshlogin_wrap'}) { + my $sshlogin = $self->sshlogin(); + my $serverlogin = $sshlogin->serverlogin(); + my $quoted_remote_command; + $ENV{'PARALLEL_SEQ'} = $self->seq(); + $ENV{'PARALLEL_PID'} = $$; + if($serverlogin eq ":") { + if(@opt::env) { + # Prepend with environment setter, which sets functions in zsh + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $env_command = $envset.$bashfuncset. + '@ARGV="'.::perl_quote_scalar($command).'";'. + "exec\"$Global::shell\",\"-c\",\(\$bashfunc.\"\@ARGV\"\)\;die\"exec:\$\!\\n\"\;"; + if(length $env_command > 999 + or + not $csh_friendly + or + $command =~ /\n/) { + # csh does not deal well with > 1000 chars in one word + # csh does not deal well with $ENV with \n + $env_command = "perl -e '".base64_zip_eval()."' ". + join" ",string_zip_base64($env_command); + $self->{'sshlogin_wrap'} = $env_command; + } else { + $self->{'sshlogin_wrap'} = "perl -e ".::shell_quote_scalar($env_command); + } + } else { + $self->{'sshlogin_wrap'} = $command; + } + } else { + my $pwd = ""; + if($opt::workdir) { + # Create remote workdir if needed. Then cd to it. + my $wd = $self->workdir(); + $pwd = qq{system("mkdir","-p","--","$wd"); chdir "$wd" ||}. + qq{print(STDERR "parallel: Cannot chdir to $wd\\n") && exit 255;}; + } + my ($csh_friendly,$envset,$bashfuncset) = env_as_eval(); + my $remote_command = $pwd.$envset.$bashfuncset. + '@ARGV="'.::perl_quote_scalar($command).'";'. monitor_parent_sshd_script(); + $quoted_remote_command = "perl -e ".::shell_quote_scalar($remote_command); + if(length $quoted_remote_command > 999 + or + not $csh_friendly + or + $command =~ /\n/) { + # csh does not deal well with > 1000 chars in one word + # csh does not deal well with $ENV with \n + $quoted_remote_command = "perl -e \\''".base64_zip_eval()."'\\' ". + join" ",string_zip_base64($remote_command); + } else { + $quoted_remote_command = ::shell_quote_scalar($quoted_remote_command); + } + + my $sshcmd = $sshlogin->sshcommand(); + my ($pre,$post,$cleanup)=("","",""); + # --transfer + $pre .= $self->sshtransfer(); + # --return + $post .= $self->sshreturn(); + # --cleanup + $post .= $self->sshcleanup(); + if($post) { + # We need to save the exit status of the job + $post = '_EXIT_status=$?; ' . $post . ' exit $_EXIT_status;'; + } + $self->{'sshlogin_wrap'} = + ($pre + . "$sshcmd $serverlogin exec " + . $quoted_remote_command + . ";" + . $post); + } + } + return $self->{'sshlogin_wrap'}; +} + +sub transfer { + # Files to transfer + # Returns: + # @transfer - File names of files to transfer + my $self = shift; + my @transfer = (); + $self->{'transfersize'} = 0; + if($opt::transfer) { + for my $record (@{$self->{'commandline'}{'arg_list'}}) { + # Merge arguments from records into args + for my $arg (@$record) { + CORE::push @transfer, $arg->orig(); + # filesize + if(-e $arg->orig()) { + $self->{'transfersize'} += (stat($arg->orig()))[7]; + } + } + } + } + return @transfer; +} + +sub transfersize { + my $self = shift; + return $self->{'transfersize'}; +} + +sub add_transfersize { + my $self = shift; + my $transfersize = shift; + $self->{'transfersize'} += $transfersize; +} + +sub sshtransfer { + # Returns for each transfer file: + # rsync $file remote:$workdir + my $self = shift; + my @pre; + my $sshlogin = $self->sshlogin(); + my $workdir = $self->workdir(); + for my $file ($self->transfer()) { + push @pre, $sshlogin->rsync_transfer_cmd($file,$workdir).";"; + } + return join("",@pre); +} + +sub return { + # Files to return + # Non-quoted and with {...} substituted + # Returns: + # @non_quoted_filenames + my $self = shift; + return $self->{'commandline'}-> + replace_placeholders($self->{'commandline'}{'return_files'},0,0); +} + +sub returnsize { + # This is called after the job has finished + # Returns: + # $number_of_bytes transferred in return + my $self = shift; + for my $file ($self->return()) { + if(-e $file) { + $self->{'returnsize'} += (stat($file))[7]; + } + } + return $self->{'returnsize'}; +} + +sub add_returnsize { + my $self = shift; + my $returnsize = shift; + $self->{'returnsize'} += $returnsize; +} + +sub sshreturn { + # Returns for each return-file: + # rsync remote:$workdir/$file . + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $sshcmd = $sshlogin->sshcommand(); + my $serverlogin = $sshlogin->serverlogin(); + my $rsync_opt = "-rlDzR -e".::shell_quote_scalar($sshcmd); + my $pre = ""; + for my $file ($self->return()) { + $file =~ s:^\./::g; # Remove ./ if any + my $relpath = ($file !~ m:^/:); # Is the path relative? + my $cd = ""; + my $wd = ""; + if($relpath) { + # rsync -avR /foo/./bar/baz.c remote:/tmp/ + # == (on old systems) + # rsync -avR --rsync-path="cd /foo; rsync" remote:bar/baz.c /tmp/ + $wd = ::shell_quote_file($self->workdir()."/"); + } + # Only load File::Basename if actually needed + $Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; + # dir/./file means relative to dir, so remove dir on remote + $file =~ m:(.*)/\./:; + my $basedir = $1 ? ::shell_quote_file($1."/") : ""; + my $nobasedir = $file; + $nobasedir =~ s:.*/\./::; + $cd = ::shell_quote_file(::dirname($nobasedir)); + my $rsync_cd = '--rsync-path='.::shell_quote_scalar("cd $wd$cd; rsync"); + my $basename = ::shell_quote_scalar(::shell_quote_file(basename($file))); + # --return + # mkdir -p /home/tange/dir/subdir/; + # rsync (--protocol 30) -rlDzR --rsync-path="cd /home/tange/dir/subdir/; rsync" + # server:file.gz /home/tange/dir/subdir/ + $pre .= "mkdir -p $basedir$cd; ".$sshlogin->rsync()." $rsync_cd $rsync_opt $serverlogin:". + $basename . " ".$basedir.$cd.";"; + } + return $pre; +} + +sub sshcleanup { + # Return the sshcommand needed to remove the file + # Returns: + # ssh command needed to remove files from sshlogin + my $self = shift; + my $sshlogin = $self->sshlogin(); + my $sshcmd = $sshlogin->sshcommand(); + my $serverlogin = $sshlogin->serverlogin(); + my $workdir = $self->workdir(); + my $cleancmd = ""; + + for my $file ($self->cleanup()) { + my @subworkdirs = parentdirs_of($file); + $cleancmd .= $sshlogin->cleanup_cmd($file,$workdir).";"; + } + if(defined $opt::workdir and $opt::workdir eq "...") { + $cleancmd .= "$sshcmd $serverlogin rm -rf " . ::shell_quote_scalar($workdir).';'; + } + return $cleancmd; +} + +sub cleanup { + # Returns: + # Files to remove at cleanup + my $self = shift; + if($opt::cleanup) { + my @transfer = $self->transfer(); + my @return = $self->return(); + return (@transfer,@return); + } else { + return (); + } +} + +sub workdir { + # Returns: + # the workdir on a remote machine + my $self = shift; + if(not defined $self->{'workdir'}) { + my $workdir; + if(defined $opt::workdir) { + if($opt::workdir eq ".") { + # . means current dir + my $home = $ENV{'HOME'}; + eval 'use Cwd'; + my $cwd = cwd(); + $workdir = $cwd; + if($home) { + # If homedir exists: remove the homedir from + # workdir if cwd starts with homedir + # E.g. /home/foo/my/dir => my/dir + # E.g. /tmp/my/dir => /tmp/my/dir + my ($home_dev, $home_ino) = (stat($home))[0,1]; + my $parent = ""; + my @dir_parts = split(m:/:,$cwd); + my $part; + while(defined ($part = shift @dir_parts)) { + $part eq "" and next; + $parent .= "/".$part; + my ($parent_dev, $parent_ino) = (stat($parent))[0,1]; + if($parent_dev == $home_dev and $parent_ino == $home_ino) { + # dev and ino is the same: We found the homedir. + $workdir = join("/",@dir_parts); + last; + } + } + } + if($workdir eq "") { + $workdir = "."; + } + } elsif($opt::workdir eq "...") { + $workdir = ".parallel/tmp/" . ::hostname() . "-" . $$ + . "-" . $self->seq(); + } else { + $workdir = $opt::workdir; + # Rsync treats /./ special. We dont want that + $workdir =~ s:/\./:/:g; # Remove /./ + $workdir =~ s:/+$::; # Remove ending / if any + $workdir =~ s:^\./::g; # Remove starting ./ if any + } + } else { + $workdir = "."; + } + $self->{'workdir'} = ::shell_quote_scalar($workdir); + } + return $self->{'workdir'}; +} + +sub parentdirs_of { + # Return: + # all parentdirs except . of this dir or file - sorted desc by length + my $d = shift; + my @parents = (); + while($d =~ s:/[^/]+$::) { + if($d ne ".") { + push @parents, $d; + } + } + return @parents; +} + +sub start { + # Setup STDOUT and STDERR for a job and start it. + # Returns: + # job-object or undef if job not to run + my $job = shift; + # Get the shell command to be executed (possibly with ssh infront). + my $command = $job->wrapped(); + + if($Global::interactive or $Global::stderr_verbose) { + $command = interactive_start($command); + } + my $pid; + $job->openoutputfiles(); + my($stdout_fh,$stderr_fh) = ($job->fh(1,"w"),$job->fh(2,"w")); + local (*IN,*OUT,*ERR); + open OUT, '>&', $stdout_fh or ::die_bug("Can't redirect STDOUT: $!"); + open ERR, '>&', $stderr_fh or ::die_bug("Can't dup STDOUT: $!"); + + if($opt::ungroup) { + print_dryrun_and_verbose($stdout_fh,$job,$command); + } + if($opt::dryrun) { + $command = "true"; + } + $ENV{'PARALLEL_SEQ'} = $job->seq(); + $ENV{'PARALLEL_PID'} = $$; + $ENV{'PARALLEL_TMP'} = ::tmpname("par"); + ::debug("run", $Global::total_running, " processes . Starting (", + $job->seq(), "): $command\n"); + if($opt::pipe) { + my ($stdin_fh); + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3($stdin_fh, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-pipe"); + 1; + }; + $job->set_fh(0,"w",$stdin_fh); + } elsif(@opt::a and not $Global::stdin_in_opt_a and $job->seq() == 1 + and $job->sshlogin()->string() eq ":") { + # Give STDIN to the first job if using -a (but only if running + # locally - otherwise CTRL-C does not work for other jobs Bug#36585) + *IN = *STDIN; + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-a"); + 1; + }; + # Re-open to avoid complaining + open(STDIN, "<&", $Global::original_stdin) + or ::die_bug("dup-\$Global::original_stdin: $!"); + } elsif ($opt::tty and not $Global::tty_taken and -c "/dev/tty" and + open(my $devtty_fh, "<", "/dev/tty")) { + # Give /dev/tty to the command if no one else is using it + *IN = $devtty_fh; + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3("<&IN", ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-/dev/tty"); + $Global::tty_taken = $pid; + close $devtty_fh; + 1; + }; + } else { + # The eval is needed to catch exception from open3 + eval { + $pid = ::open3(::gensym, ">&OUT", ">&ERR", $Global::shell, "-c", $command) || + ::die_bug("open3-gensym"); + 1; + }; + } + if($pid) { + # A job was started + $Global::total_running++; + $Global::total_started++; + $job->set_pid($pid); + $job->set_starttime(); + $Global::running{$job->pid()} = $job; + if($opt::timeout) { + $Global::timeoutq->insert($job); + } + $Global::newest_job = $job; + $Global::newest_starttime = ::now(); + return $job; + } else { + # No more processes + ::debug("run", "Cannot spawn more jobs.\n"); + return undef; + } +} + +sub interactive_start { + my $command = shift; + if($Global::interactive) { + ::status("$command ?..."); + open(my $tty_fh, "<", "/dev/tty") || ::die_bug("interactive-tty"); + my $answer = <$tty_fh>; + close $tty_fh; + my $run_yes = ($answer =~ /^\s*y/i); + if (not $run_yes) { + $command = "true"; # Run the command 'true' + } + } else { + print $Global::original_stderr "$command\n"; + } + return $command; +} + +sub print_dryrun_and_verbose { + # For $opt::ungroup we print these ASAP + # For $opt::group they are part of print() + my $stdout_fh = shift; + my $job = shift; + my $command = shift; + if($opt::dryrun or $Global::verbose) { + if($Global::verbose <= 1) { + print $stdout_fh $job->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print $stdout_fh $command,"\n"; + } + } +} + +{ + my $tmuxsocket; + + sub tmux_wrap { + # Wrap command with tmux for session pPID + # Input: + # $actual_command = the actual command being run (incl ssh wrap) + my $self = shift; + my $actual_command = shift; + # Temporary file name. Used for fifo to communicate exit val + my $tmpfifo=::tmpname("tmx"); + + if(length($tmpfifo) >=100) { + ::error("tmux does not support sockets with path > 100\n"); + ::wait_and_exit(255); + } + my $visual_command = $self->replaced(); + my $title = $visual_command; + if($visual_command =~ /\0/) { + ::error("Command line contains NUL. tmux is confused by NUL.\n"); + ::wait_and_exit(255); + } + # ; causes problems + # ascii 194-245 annoys tmux + $title =~ tr/[\011-\016;\302-\365]//d; + $title = ::shell_quote_scalar($title); + + my $l_act = length($actual_command); + my $l_tit = length($title); + my $l_fifo = length($tmpfifo); + # The line to run contains a 118 chars extra code + the title 2x + my $l_tot = 2 * $l_tit + $l_act + $l_fifo; + + while($l_tit < 1000 and + ( + (890 < $l_tot and $l_tot < 1350) + or + (9250 < $l_tot and $l_tot < 9800) + )) { + # tmux blocks for certain lengths: + # 900 < title + command < 1200 + # 9250 < title + command < 9800 + # but only if title < 1000, so expand the title with 75 spaces + # The measured lengths are: + # 996 < (title + whole command) < 1127 + # 9331 < (title + whole command) < 9636 + $title = $title.('\ 'x75); + $l_tit = length($title); + $l_tot = 2 * $l_tit + $l_act + $l_fifo; + } + + my $tmux; + $ENV{'TMUX'} ||= "tmux"; + if(not $tmuxsocket) { + $tmuxsocket = ::tmpname("tms"); + ::status("See output with: $ENV{'TMUX'} -S $tmuxsocket attach\n"); + } + # TODO sh -c wrapper for >& + $tmux = $ENV{'TMUX'}." -S $tmuxsocket new-session -s p$$ -d 'sleep .2' >&/dev/null;" . + $ENV{'TMUX'}." -S $tmuxsocket new-window -t p$$ -n $title"; + + ::debug("tmux", "title len:", $l_tit, " act ", $l_act, " max ", + $Limits::Command::line_max_len, " tot ", + $l_tot, "\n"); + + return "mkfifo $tmpfifo && $tmux ". + # Run in tmux + ::shell_quote_scalar + ( + "(".$actual_command.');'. + # The triple print is needed - otherwise the testsuite fails + q[ perl -e 'while($t++<3){ print $ARGV[0],"\n" }' $?h/$status >> ].$tmpfifo."&". + "echo $title; echo \007Job finished at: `date`;sleep 10" + ). + # Run outside tmux + # Read a / separated line: 0h/2 for csh, 2/0 for bash. + # If csh the first will be 0h, so use the second as exit value. + # Otherwise just use the first value as exit value. + q{; exec perl -e '$/="/";$_=<>;$c=<>;unlink $ARGV; /(\d+)h/ and exit($1);exit$c' }.$tmpfifo; + } +} + +sub is_already_in_results { + # Do we already have results for this job? + # Returns: + # $job_already_run = bool whether there is output for this or not + my $job = $_[0]; + my $args_as_dirname = $job->{'commandline'}->args_as_dirname(); + # prefix/name1/val1/name2/val2/ + my $dir = $opt::results."/".$args_as_dirname; + ::debug("run", "Test $dir/stdout", -e "$dir/stdout", "\n"); + return -e "$dir/stdout"; +} + +sub is_already_in_joblog { + my $job = shift; + return vec($Global::job_already_run,$job->seq(),1); +} + +sub set_job_in_joblog { + my $job = shift; + vec($Global::job_already_run,$job->seq(),1) = 1; +} + +sub should_be_retried { + # Should this job be retried? + # Returns + # 0 - do not retry + # 1 - job queued for retry + my $self = shift; + if (not $opt::retries) { + return 0; + } + if(not $self->exitstatus()) { + # Completed with success. If there is a recorded failure: forget it + $self->reset_failed_here(); + return 0 + } else { + # The job failed. Should it be retried? + $self->add_failed_here(); + if($self->total_failed() == $opt::retries) { + # This has been retried enough + return 0; + } else { + # This command should be retried + $self->set_endtime(undef); + $self->reset_exitstatus(); + $Global::JobQueue->unget($self); + ::debug("run", "Retry ", $self->seq(), "\n"); + return 1; + } +} +} + +{ + my (%print_later,$job_end_sequence); + + sub print_earlier_jobs { + # Print jobs completed earlier + # Returns: N/A + my $job = shift; + $print_later{$job->seq()} = $job; + $job_end_sequence ||= 1; + ::debug("run", "Looking for: $job_end_sequence ", + "Current: ", $job->seq(), "\n"); + for(my $j = $print_later{$job_end_sequence}; + $j or vec($Global::job_already_run,$job_end_sequence,1); + $job_end_sequence++, + $j = $print_later{$job_end_sequence}) { + ::debug("run", "Found job end $job_end_sequence"); + if($j) { + $j->print(); + delete $print_later{$job_end_sequence}; + } + } + } +} + +sub print { + # Print the output of the jobs + # Returns: N/A + + my $self = shift; + ::debug("print", ">>joboutput ", $self->replaced(), "\n"); + if($opt::dryrun) { + # Nothing was printed to this job: + # cleanup tmp files if --files was set + unlink $self->fh(1,"name"); + } + if($opt::pipe and $self->virgin()) { + # Skip --joblog, --dryrun, --verbose + } else { + if($opt::ungroup and $Global::joblog and defined $self->{'exitstatus'}) { + # Add to joblog when finished + $self->print_joblog(); + # Printing is only relevant for grouped/--line-buffer output. + $opt::ungroup and return; + } + + # Check for disk full + ::exit_if_disk_full(); + + if(($opt::dryrun or $Global::verbose) + and + not $self->{'verbose_printed'}) { + $self->{'verbose_printed'}++; + if($Global::verbose <= 1) { + print STDOUT $self->replaced(),"\n"; + } else { + # Verbose level > 1: Print the rsync and stuff + print STDOUT $self->wrapped(),"\n"; + } + # If STDOUT and STDERR are merged, + # we want the command to be printed first + # so flush to avoid STDOUT being buffered + flush STDOUT; + } + } + for my $fdno (sort { $a <=> $b } keys %Global::fd) { + # Sort by file descriptor numerically: 1,2,3,..,9,10,11 + $fdno == 0 and next; + my $out_fd = $Global::fd{$fdno}; + my $in_fh = $self->fh($fdno,"r"); + if(not $in_fh) { + if(not $Job::file_descriptor_warning_printed{$fdno}++) { + # ::warning("File descriptor $fdno not defined\n"); + } + next; + } + ::debug("print", "File descriptor $fdno (", $self->fh($fdno,"name"), "):\n"); + if($opt::files) { + $self->files_print($fdno,$in_fh,$out_fd); + } elsif($opt::linebuffer) { + # Line buffered print out + $self->linebuffer_print($fdno,$in_fh,$out_fd); + } elsif($opt::tag or defined $opt::tagstring) { + $self->tag_print($fdno,$in_fh,$out_fd); + } else { + $self->normal_print($fdno,$in_fh,$out_fd); + } + flush $out_fd; + } + ::debug("print", "<{'exitstatus'} + and not ($self->virgin() and $opt::pipe)) { + # Add to joblog when finished + $self->print_joblog(); + } +} + +sub files_print { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed.\n"); + $self->set_exitstatus(255); + } + if($opt::compress) { + # Kill the decompressor which will not be needed + CORE::kill "TERM", $self->fh($fdno,"rpid"); + } + close $in_fh; + + if($opt::pipe and $self->virgin()) { + # Nothing was printed to this job: + # cleanup unused tmp files if --files was set + for my $fdno (1,2) { + unlink $self->fh($fdno,"name"); + unlink $self->fh($fdno,"unlink"); + } + } elsif($fdno == 1 and $self->fh($fdno,"name")) { + print $out_fd $self->tag(),$self->fh($fdno,"name"),"\n"; + $self->add_returnsize(-s $self->fh($fdno,"name")); + } +} + +sub linebuffer_print { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + my $partial = \$self->{'partial_line',$fdno}; + + if(defined $self->{'exitstatus'}) { + # If the job is dead: close printing fh. Needed for --compress + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed.\n"); + $self->set_exitstatus(255); + } + if($opt::compress) { + # Blocked reading in final round + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + for my $fdno (1,2) { + my $fdr = $self->fh($fdno,'r'); + my $flags; + fcntl($fdr, &F_GETFL, $flags) || die $!; # Get the current flags on the filehandle + $flags &= ~&O_NONBLOCK; # Remove non-blocking to the flags + fcntl($fdr, &F_SETFL, $flags) || die $!; # Set the flags on the filehandle + } + } + } + # This seek will clear EOF + seek $in_fh, tell($in_fh), 0; + # The read is non-blocking: The $in_fh is set to non-blocking. + # 32768 --tag = 5.1s + # 327680 --tag = 4.4s + # 1024000 --tag = 4.4s + # 3276800 --tag = 4.3s + # 10240000 --tag = 4.3s + # 32768000 --tag = 4.7s + my $outputlength = 0; + while(read($in_fh,substr($$partial,length $$partial),3276800)) { + # Append to $$partial + # Find the last \n + my $i = ::rindex64($partial,"\n"); + if($i != -1) { + # One or more complete lines were found + if($fdno == 2 and not $self->{'printed_first_line',$fdno}++) { + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # This is a crappy way of ignoring it. + $$partial =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; + # Length of partial line has changed: Find the last \n again + $i = ::rindex64($partial,"\n"); + } + $outputlength += $i+1; + if($opt::tag or defined $opt::tagstring) { + # Replace ^ with $tag within the full line + my $tag = $self->tag(); + substr($$partial,0,$i+1) =~ s/^/$tag/gm; + # Length of partial line has changed: Find the last \n again + $i = ::rindex64($partial,"\n"); + } + # Print up to and including the last \n + print $out_fd substr($$partial,0,$i+1); + # Remove the printed part + substr($$partial,0,$i+1) = ""; + } + } + $self->add_returnsize($outputlength); + if(defined $self->{'exitstatus'}) { + # If the job is dead: print the remaining partial line + # read remaining + $self->add_returnsize(length $$partial); + if($$partial and ($opt::tag or defined $opt::tagstring)) { + my $tag = $self->tag(); + $$partial =~ s/^/$tag/gm; + } + print $out_fd $$partial; + # Release the memory + $$partial = undef; + if($self->fh($fdno,"rpid") and CORE::kill 0, $self->fh($fdno,"rpid")) { + # decompress still running + } else { + # decompress done: close fh + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed.\n"); + $self->set_exitstatus(255); + } + } + } +} + +sub tag_print { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed.\n"); + $self->set_exitstatus(255); + } + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + my $tag = $self->tag(); + if($fdno == 2) { + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # This is a crappy way of ignoring it. + while(<$in_fh>) { + if(/^(client_process_control: )?tcgetattr: Invalid argument\n/) { + # Skip + } else { + $self->add_returnsize(length $_); + print $out_fd $tag,$_; + } + # At most run the loop once + last; + } + } + my $outputlength = 0; + while(<$in_fh>) { + print $out_fd $tag,$_; + $outputlength += length $_; + } + if($fdno == 1) { + $self->add_returnsize($outputlength); + } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed.\n"); + $self->set_exitstatus(255); + } +} + +sub normal_print { + my $self = shift; + my ($fdno,$in_fh,$out_fd) = @_; + my $buf; + close $self->fh($fdno,"w"); + if($? and $opt::compress) { + ::error($opt::compress_program." failed.\n"); + $self->set_exitstatus(255); + } + seek $in_fh, 0, 0; + # $in_fh is now ready for reading at position 0 + if($fdno == 2) { + # OpenSSH_3.6.1p2 gives 'tcgetattr: Invalid argument' with -tt + # This is a crappy way of ignoring it. + sysread($in_fh,$buf,1_000); + $buf =~ s/^(client_process_control: )?tcgetattr: Invalid argument\n//; + print $out_fd $buf; + $self->add_returnsize(length $buf); + } + my $outputlength = 0; + while(sysread($in_fh,$buf,32768)) { + print $out_fd $buf; + $outputlength += length $buf; + } + if($fdno == 1) { + $self->add_returnsize($outputlength); + } + close $in_fh; + if($? and $opt::compress) { + ::error($opt::decompress_program." failed.\n"); + $self->set_exitstatus(255); + } +} + +sub print_joblog { + my $self = shift; + my $cmd; + if($Global::verbose <= 1) { + $cmd = $self->replaced(); + } else { + # Verbose level > 1: Print the rsync and stuff + $cmd = "@command"; + } + print $Global::joblog + join("\t", $self->seq(), $self->sshlogin()->string(), + $self->starttime(), sprintf("%10.3f",$self->runtime()), + $self->transfersize(), $self->returnsize(), + $self->exitstatus(), $self->exitsignal(), $cmd + ). "\n"; + flush $Global::joblog; + $self->set_job_in_joblog(); +} + +sub tag { + my $self = shift; + if($opt::tag or defined $opt::tagstring) { + if(not defined $self->{'tag'}) { + $self->{'tag'} = $self->{'commandline'}-> + replace_placeholders([$opt::tagstring],0,0)."\t"; + } + } else { + return ""; + } + return $self->{'tag'}; +} + +sub hostgroups { + my $self = shift; + if(not defined $self->{'hostgroups'}) { + $self->{'hostgroups'} = $self->{'commandline'}->{'arg_list'}[0][0]->{'hostgroups'}; + } + return @{$self->{'hostgroups'}}; +} + +sub exitstatus { + my $self = shift; + return $self->{'exitstatus'}; +} + +sub set_exitstatus { + my $self = shift; + my $exitstatus = shift; + if($exitstatus) { + # Overwrite status if non-zero + $self->{'exitstatus'} = $exitstatus; + } else { + # Set status but do not overwrite + # Status may have been set by --timeout + $self->{'exitstatus'} ||= $exitstatus; + } +} + +sub reset_exitstatus { + my $self = shift; + $self->{'exitstatus'} = undef; +} + +sub exitsignal { + my $self = shift; + return $self->{'exitsignal'}; +} + +sub set_exitsignal { + my $self = shift; + my $exitsignal = shift; + $self->{'exitsignal'} = $exitsignal; +} + + +{ + my $status_printed; + + sub should_we_halt { + # Should we halt? Immediately? Gracefully? + # Returns: N/A + my $job = shift; + if($job->exitstatus() or $job->exitsignal()) { + $Global::exitstatus++; + $Global::total_failed++; + if($opt::halt) { + if($opt::halt == 1 + or + ($opt::halt > 0 and $opt::halt < 1 and $Global::total_failed > 3 + and + $Global::total_failed / $Global::total_started > $opt::halt)) { + # If halt on error == 1 or --halt 10% + # we should gracefully exit + ::status + ("$Global::progname: Starting no more jobs. ", + "Waiting for ", scalar(keys %Global::running), + " jobs to finish. This job failed:\n", + $job->replaced(),"\n"); + $Global::start_no_new_jobs ||= 1; + $Global::halt_exitstatus = $job->exitstatus(); + } elsif($opt::halt == 2) { + # If halt on error == 2 we should exit immediately + if(not $status_printed++) { + ::status + ("$Global::progname: This job failed:\n", + $job->replaced(),"\n"); + } + ::killall(); + ::wait_and_exit($job->exitstatus()); + } + } + } else { + if($opt::halt) { + if($opt::halt == -1) { + # If halt on error == -1 + # we should gracefully exit + ::status + ("$Global::progname: Starting no more jobs. ", + "Waiting for ", scalar(keys %Global::running), + " jobs to finish. This job succeeded:\n", + $job->replaced(),"\n"); + $Global::start_no_new_jobs ||= 1; + $Global::halt_exitstatus = $job->exitstatus(); + } elsif($opt::halt == -2) { + # If halt on error == -2 we should exit immediately + ::status + ("$Global::progname: This job succeeded:\n", + $job->replaced(),"\n"); + ::killall(); + ::wait_and_exit($job->exitstatus()); + } + } + } + } +} + +package CommandLine; + +sub new { + my $class = shift; + my $seq = shift; + my $commandref = shift; + $commandref || die; + my $arg_queue = shift; + my $context_replace = shift; + my $max_number_of_args = shift; # for -N and normal (-n1) + my $return_files = shift; + my $replacecount_ref = shift; + my $len_ref = shift; + my %replacecount = %$replacecount_ref; + my %len = %$len_ref; + for (keys %$replacecount_ref) { + # Total length of this replacement string {} replaced with all args + $len{$_} = 0; + } + return bless { + 'command' => $commandref, + 'seq' => $seq, + 'len' => \%len, + 'arg_list' => [], + 'arg_queue' => $arg_queue, + 'max_number_of_args' => $max_number_of_args, + 'replacecount' => \%replacecount, + 'context_replace' => $context_replace, + 'return_files' => $return_files, + 'replaced' => undef, + }, ref($class) || $class; +} + +sub seq { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq { + my $self = shift; + $self->{'seq'} = shift; +} + +{ + my $max_slot_number; + + sub slot { + # Find the number of a free job slot and return it + # Uses: + # @Global::slots - list with free jobslots + # Returns: + # $jobslot = number of jobslot + my $self = shift; + if(not $self->{'slot'}) { + if(not @Global::slots) { + # $Global::max_slot_number will typically be $Global::max_jobs_running + push @Global::slots, ++$max_slot_number; + } + $self->{'slot'} = shift @Global::slots; + } + return $self->{'slot'}; + } +} + +sub populate { + # Add arguments from arg_queue until the number of arguments or + # max line length is reached + # Uses: + # $Global::minimal_command_line_length + # $opt::cat + # $opt::fifo + # $Global::JobQueue + # $opt::m + # $opt::X + # $CommandLine::already_spread + # $Global::max_jobs_running + # Returns: N/A + my $self = shift; + my $next_arg; + my $max_len = $Global::minimal_command_line_length || Limits::Command::max_length(); + + if($opt::cat) { + # $PARALLEL_TMP will point to a tempfile that will be used as {} + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}-> + unget([Arg->new('$PARALLEL_TMP')]); + } + if($opt::fifo) { + # $PARALLEL_TMP will point to a tempfile that will be used as {} + $Global::JobQueue->{'commandlinequeue'}->{'arg_queue'}-> + unget([Arg->new('$PARALLEL_TMP')]); + } + while (not $self->{'arg_queue'}->empty()) { + $next_arg = $self->{'arg_queue'}->get(); + if(not defined $next_arg) { + next; + } + $self->push($next_arg); + if($self->len() >= $max_len) { + # Command length is now > max_length + # If there are arguments: remove the last + # If there are no arguments: Error + # TODO stuff about -x opt_x + if($self->number_of_args() > 1) { + # There is something to work on + $self->{'arg_queue'}->unget($self->pop()); + last; + } else { + my $args = join(" ", map { $_->orig() } @$next_arg); + ::error("Command line too long (", + $self->len(), " >= ", + $max_len, + ") at input ", + $self->{'arg_queue'}->arg_number(), + ": ". + ((length $args > 50) ? + (substr($args,0,50))."...\n" : + $args."\n")); + $self->{'arg_queue'}->unget($self->pop()); + ::wait_and_exit(255); + } + } + + if(defined $self->{'max_number_of_args'}) { + if($self->number_of_args() >= $self->{'max_number_of_args'}) { + last; + } + } + } + if(($opt::m or $opt::X) and not $CommandLine::already_spread + and $self->{'arg_queue'}->empty() and $Global::max_jobs_running) { + # -m or -X and EOF => Spread the arguments over all jobslots + # (unless they are already spread) + $CommandLine::already_spread ||= 1; + if($self->number_of_args() > 1) { + $self->{'max_number_of_args'} = + ::ceil($self->number_of_args()/$Global::max_jobs_running); + $Global::JobQueue->{'commandlinequeue'}->{'max_number_of_args'} = + $self->{'max_number_of_args'}; + $self->{'arg_queue'}->unget($self->pop_all()); + while($self->number_of_args() < $self->{'max_number_of_args'}) { + $self->push($self->{'arg_queue'}->get()); + } + } + } +} + +sub push { + # Add one or more records as arguments + # Returns: N/A + my $self = shift; + my $record = shift; + push @{$self->{'arg_list'}}, $record; + + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + my $rep; + for my $arg (@$record) { + if(defined $arg) { + for my $perlexpr (keys %{$self->{'replacecount'}}) { + # 50% faster than below + $self->{'len'}{$perlexpr} += length $arg->replace($perlexpr,$quote_arg,$self); + # $rep = $arg->replace($perlexpr,$quote_arg,$self); + # $self->{'len'}{$perlexpr} += length $rep; + # ::debug("length", "Length: ", length $rep, + # "(", $perlexpr, "=>", $rep, ")\n"); + } + } + } +} + +sub pop { + # Remove last argument + # Returns: + # the last record + my $self = shift; + my $record = pop @{$self->{'arg_list'}}; + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + for my $arg (@$record) { + if(defined $arg) { + for my $perlexpr (keys %{$self->{'replacecount'}}) { + $self->{'len'}{$perlexpr} -= + length $arg->replace($perlexpr,$quote_arg,$self); + } + } + } + return $record; +} + +sub pop_all { + # Remove all arguments and zeros the length of replacement strings + # Returns: + # all records + my $self = shift; + my @popped = @{$self->{'arg_list'}}; + for my $replacement_string (keys %{$self->{'replacecount'}}) { + $self->{'len'}{$replacement_string} = 0; + } + $self->{'arg_list'} = []; + return @popped; +} + +sub number_of_args { + # The number of records + # Returns: + # number of records + my $self = shift; + # Ftq rudef oaawuq ime dqxqmeqp az 2011-01-24 mzp ime iaz nk MQhmd + # Mdzrvadp Nvmdymeaz az 2011-04-10. Ftue oaawuq dqxqmeqp az + # 2013-08-18 ue m nuf tmdpqd me kag tmhq fa geq daf14. Bxqmeq + # qymux oaawuq@fmzsq.pw itqz kag dqmp ftue. + # + # U my ftq ymuzfmuzqd ar m buqoq ar rdqq earfimdq omxxqp SZG + # Bmdmxxqx. Rdqq earfimdq sgmdmzfqqe kag mooqee fa ftq eagdoq + # oapq, ngf U tmhq nqqz iazpqduzs tai ymzk mofgmxxk _dqmp_ ftq + # eagdoq oapq. + # + # Fa fqef ftue U bgf uz m oayyqzf fqxxuzs bqabxq fa qymux yq itqz + # ftqk dqmp ftue. Ftq oayyqzf ime bgf uz m eqofuaz ar ftq oapq + # ftmf za azq iagxp xaaw fa ruj ad uybdahq ftq earfimdq - ea ftq + # eagdoq oapq qcguhmxqzf fa m pgefk oadzqd. Fa ymwq egdq ftq + # oayyqzf iagxp zaf etai gb ur eayq azq vgef sdqbbqp ftdagst ftq + # eagdoq oapq U daf13'qp ftq eagdoq oapq + # tffb://qz.iuwubqpum.ads/iuwu/DAF13 + # + # 2.5 yazfte xmfqd U dqoquhqp mz qymux rday eayqazq ita zaf azxk + # ymzmsqp fa ruzp ftq oayyqzf, ngf mxea ymzmsqp fa sgqee ftq oapq + # tmp fa nq daf13'qp. + # + # Ftue nduzse yq fa ftq oazoxgeuaz ftmf ftqdq _mdq_ bqabxq, ita + # mdq zaf mrruxumfqp iuft ftq bdavqof, ftmf iuxx dqmp ftq eagdoq + # oapq - ftagst uf ymk zaf tmbbqz hqdk arfqz. + # + # This is really the number of records + return $#{$self->{'arg_list'}}+1; +} + +sub number_of_recargs { + # The number of args in records + # Returns: + # number of args records + my $self = shift; + my $sum = 0; + my $nrec = scalar @{$self->{'arg_list'}}; + if($nrec) { + $sum = $nrec * (scalar @{$self->{'arg_list'}[0]}); + } + return $sum; +} + +sub args_as_string { + # Returns: + # all unmodified arguments joined with ' ' (similar to {}) + my $self = shift; + return (join " ", map { $_->orig() } + map { @$_ } @{$self->{'arg_list'}}); +} + +sub args_as_dirname { + # Returns: + # all unmodified arguments joined with '/' (similar to {}) + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + # If $Global::max_file_length: Keep subdirs < $Global::max_file_length + my $self = shift; + my @res = (); + + for my $rec_ref (@{$self->{'arg_list'}}) { + # If headers are used, sort by them. + # Otherwise keep the order from the command line. + my @header_indexes_sorted = header_indexes_sorted($#$rec_ref+1); + for my $n (@header_indexes_sorted) { + CORE::push(@res, + $Global::input_source_header{$n}, + map { my $s = $_; + # \t \0 \\ and / are quoted as: \t \0 \\ \_ + $s =~ s/\\/\\\\/g; + $s =~ s/\t/\\t/g; + $s =~ s/\0/\\0/g; + $s =~ s:/:\\_:g; + if($Global::max_file_length) { + # Keep each subdir shorter than the longest + # allowed file name + $s = substr($s,0,$Global::max_file_length); + } + $s; } + $rec_ref->[$n-1]->orig()); + } + } + return join "/", @res; +} + +sub header_indexes_sorted { + # Sort headers first by number then by name. + # E.g.: 1a 1b 11a 11b + # Returns: + # Indexes of %Global::input_source_header sorted + my $max_col = shift; + + no warnings 'numeric'; + for my $col (1 .. $max_col) { + # Make sure the header is defined. If it is not: use column number + if(not defined $Global::input_source_header{$col}) { + $Global::input_source_header{$col} = $col; + } + } + my @header_indexes_sorted = sort { + # Sort headers numerically then asciibetically + $Global::input_source_header{$a} <=> $Global::input_source_header{$b} + or + $Global::input_source_header{$a} cmp $Global::input_source_header{$b} + } 1 .. $max_col; + return @header_indexes_sorted; +} + +sub len { + # Uses: + # $opt::shellquote + # The length of the command line with args substituted + my $self = shift; + my $len = 0; + # Add length of the original command with no args + # Length of command w/ all replacement args removed + $len += $self->{'len'}{'noncontext'} + @{$self->{'command'}} -1; + ::debug("length", "noncontext + command: $len\n"); + my $recargs = $self->number_of_recargs(); + if($self->{'context_replace'}) { + # Context is duplicated for each arg + $len += $recargs * $self->{'len'}{'context'}; + for my $replstring (keys %{$self->{'replacecount'}}) { + # If the replacements string is more than once: mulitply its length + $len += $self->{'len'}{$replstring} * + $self->{'replacecount'}{$replstring}; + ::debug("length", $replstring, " ", $self->{'len'}{$replstring}, "*", + $self->{'replacecount'}{$replstring}, "\n"); + } + # echo 11 22 33 44 55 66 77 88 99 1010 + # echo 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10 + # 5 + ctxgrp*arg + ::debug("length", "Ctxgrp: ", $self->{'len'}{'contextgroups'}, + " Groups: ", $self->{'len'}{'noncontextgroups'}, "\n"); + # Add space between context groups + $len += ($recargs-1) * ($self->{'len'}{'contextgroups'}); + } else { + # Each replacement string may occur several times + # Add the length for each time + $len += 1*$self->{'len'}{'context'}; + ::debug("length", "context+noncontext + command: $len\n"); + for my $replstring (keys %{$self->{'replacecount'}}) { + # (space between regargs + length of replacement) + # * number this replacement is used + $len += ($recargs -1 + $self->{'len'}{$replstring}) * + $self->{'replacecount'}{$replstring}; + } + } + if($opt::nice) { + # Pessimistic length if --nice is set + # Worse than worst case: every char needs to be quoted with \ + $len *= 2; + } + if($Global::quoting) { + # Pessimistic length if -q is set + # Worse than worst case: every char needs to be quoted with \ + $len *= 2; + } + if($opt::shellquote) { + # Pessimistic length if --shellquote is set + # Worse than worst case: every char needs to be quoted with \ twice + $len *= 4; + } + # If we are using --env, add the prefix for that, too. + $len += $Global::envvarlen; + + return $len; +} + +sub replaced { + # Uses: + # $Global::noquote + # $Global::quoting + # Returns: + # $replaced = command with place holders replaced and prepended + my $self = shift; + if(not defined $self->{'replaced'}) { + # Don't quote arguments if the input is the full command line + my $quote_arg = $Global::noquote ? 0 : not $Global::quoting; + # or if ($opt::cat or $opt::pipe) as they use $PARALLEL_TMP + $quote_arg = ($opt::cat || $opt::fifo) ? 0 : $quote_arg; + $self->{'replaced'} = $self-> + replace_placeholders($self->{'command'},$Global::quoting, + $quote_arg); + my $len = length $self->{'replaced'}; + if ($len != $self->len()) { + ::debug("length", $len, " != ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } else { + ::debug("length", $len, " == ", $self->len(), + " ", $self->{'replaced'}, "\n"); + } + } + return $self->{'replaced'}; +} + +{ + my @target; + my $context_replace; + my @arg; + my $perl_expressions_as_re; + + sub fish_out_words_containing_replacement_strings { + my %word; + for (@target) { + my $tt = $_; + ::debug("replace", "Target: $tt"); + # Command line template: + # a{1}b{}c{}d + # becomes: + # a{=1 $_=$_ =}b{= $_=$_ =}c{= $_=$_ =}d + # becomes: + # a\257<1 $_=$_ \257>b\257< $_=$_ \257>c\257< $_=$_ \257>d + # Input A B C (no context) becomes: + # A B C => aAbA B CcA B Cd + # Input A B C (context -X) becomes: + # A B C => aAbAcAd aAbBcBd aAbCcCd + if($context_replace) { + while($tt =~ s/([^\s\257]* # before {= + (?: + \257< # {= + [^\257]*? # The perl expression + \257> # =} + [^\s\257]* # after =} + )+)/ /x) { + # $1 = pre \257 perlexpr \257 post + $word{"$1"} ||= 1; + } + } else { + while($tt =~ s/( (?: \257<([^\257]*?)\257>) )//x) { + # $f = \257 perlexpr \257 + $word{$1} ||= 1; + } + } + } + return keys %word; + } + + sub flatten_arg_list { + my $arglist_ref = shift; + @arg = (); + for my $record (@$arglist_ref) { + # $self->{'arg_list'} = [ [Arg11, Arg12], [Arg21, Arg22], [Arg31, Arg32] ] + # Merge arg-objects from records into @arg for easy access + CORE::push @arg, @$record; + } + # Add one arg if empty to allow {#} and {%} to be computed only once + if(not @arg) { @arg = (Arg->new("")); } + } + + sub replace_placeholders { + # Replace foo{}bar with fooargbar + # Input: + # $targetref = command as shell words + # $quote = should everything be quoted? + # $quote_arg = should replaced arguments be quoted? + # Returns: + # @target with placeholders replaced + my $self = shift; + my $targetref = shift; + my $quote = shift; + my $quote_arg = shift; + my %replace; + $context_replace = $self->{'context_replace'}; + @target = @$targetref; + ::debug("replace", "Replace @target\n"); + # -X = context replace + # maybe multiple input sources + # maybe --xapply + if(not @target) { + # @target is empty: Return empty array + return @target; + } + # Fish out the words that have replacement strings in them + my @word = fish_out_words_containing_replacement_strings(); + flatten_arg_list($self->{'arg_list'}); + + # Number of arguments - used for positional arguments + my $n = $#arg+1; + + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = $self; + for my $word (@word) { + # word = AB \257< perlexpr \257> CD \257< perlexpr \257> EF + ::debug("replace", "Replacing in $word\n"); + my $normal_replace; + + # for each arg: + # replace replacement strings with replacement in the word value + # push to replace word value + $perl_expressions_as_re ||= + join("|", map {s/^-?\d+//; "\Q$_\E"} keys %{$self->{'replacecount'}}); + for my $arg (@arg) { + my $val = $word; + # Replace {= perl expr =} with value for each arg + $val =~ s{\257<(-?\d+)?($perl_expressions_as_re)\257>} + { + if($1) { + # Positional replace + # Find the relevant arg and replace it + ($arg[$1 > 0 ? $1-1 : $n+$1] ? # If defined: replace + $arg[$1 > 0 ? $1-1 : $n+$1]-> + replace($2,$quote_arg,$self) + : ""); + } else { + # Normal replace + $normal_replace ||= 1; + ($arg ? $arg->replace($2,$quote_arg,$self) : ""); + } + }goxe; + if($quote) { + CORE::push(@{$replace{::shell_quote_scalar($word)}}, + ::shell_quote_scalar($val)); + } else { + CORE::push(@{$replace{$word}}, $val); + } + # No normal replacements => only run once + $normal_replace or last; + } + } + + if($quote) { + @target = ::shell_quote(@target); + } + # ::debug("replace", "%replace=",::my_dump(%replace),"\n"); + if(%replace) { + # Substitute the replace strings with the replacement values + # Must be sorted by length if a short word is a substring of a long word + my $regexp = join('|', map { my $s = $_; $s =~ s/(\W)/\\$1/g; $s } + sort { length $b <=> length $a } keys %replace); + for(@target) { + s/($regexp)/join(" ",@{$replace{$1}})/ge; + } + } + ::debug("replace", "Return @target\n"); + return wantarray ? @target : "@target"; + } +} + + +package CommandLineQueue; + +sub new { + my $class = shift; + my $commandref = shift; + my $read_from = shift; + my $context_replace = shift; + my $max_number_of_args = shift; + my $return_files = shift; + my @unget = (); + my ($count,$posrpl,$perlexpr); + my ($replacecount_ref, $len_ref); + my @command = @$commandref; + my $dummy = ''; + # If the first command start with '-' it is probably an option + if($command[0] =~ /^\s*(-\S+)/) { + # Is this really a command in $PATH starting with '-'? + my $cmd = $1; + if(not ::which($cmd)) { + ::error("Command ($cmd) starts with '-'. Is this a wrong option?\n"); + ::wait_and_exit(255); + } + } + # Replace replacement strings with {= perl expr =} + @command = merge_rpl_parts(@command); + + # Protect matching inside {= perl expr =} + # by replacing {= and =} with \257< and \257> + # in @command, --return and --tagstring (if used) + for(@command,@$return_files, + (defined $opt::tagstring ? $opt::tagstring : $dummy)) { + # Disallow \257 to avoid nested {= {= =} =} + if(/\257/) { + ::error("Command cannot contain the character \257. Use a function for that.\n"); + ::wait_and_exit(255); + } + # Needs to match rightmost left parens (Perl defaults to leftmost) + # to deal with: {={==} + while(s{([^\257]*) \Q$Global::parensleft\E ([^\257]*?) \Q$Global::parensright\E } + {$1\257<$2\257>}gx) {} + for my $rpl (sort { length $b <=> length $a } keys %Global::rpl) { + # Replace long --rpl's before short ones, as a short may be a + # substring of a long: + # --rpl '% s/a/b/' --rpl '%% s/b/a/' + # Replace the short hand string (--rpl) + # with the {= perl expr =} + # Avoid replacing inside existing {= perl expr =} + while(s{((^|\257>)[^\257]*?) # Don't replace after \257 unless \257> + \Q$rpl\E} + {$1\257<$Global::rpl{$rpl}\257>}xg) { + } + # Do the same for the positional replacement strings + # A bit harder as we have to put in the position number + $posrpl = $rpl; + if($posrpl =~ s/^\{//) { + # Only do this if the shorthand start with { + s{\{(-?\d+)\Q$posrpl\E} + {\257<$1 $Global::rpl{$rpl}\257>}g; + } + } + } + # Add {} if no replacement strings in @command + ($replacecount_ref, $len_ref, @command) = + replacement_counts_and_lengths($return_files,@command); + if("@command" =~ /^[^ \t\n=]*\257 \@unget, + 'command' => \@command, + 'replacecount' => $replacecount_ref, + 'arg_queue' => RecordQueue->new($read_from,$opt::colsep), + 'context_replace' => $context_replace, + 'len' => $len_ref, + 'max_number_of_args' => $max_number_of_args, + 'size' => undef, + 'return_files' => $return_files, + 'seq' => 1, + }, ref($class) || $class; +} + +sub merge_rpl_parts { + # '{=' 'perlexpr' '=}' => '{= perlexpr =}' + # Input: + # @in = the @command as given by the user + # Uses: + # $Global::parensleft + # $Global::parensright + # Returns: + # @command with parts merged to keep {= and =} as one + my @in = @_; + my @out; + my $l = quotemeta($Global::parensleft); + my $r = quotemeta($Global::parensright); + + while(@in) { + my $s = shift @in; + $_ = $s; + # Remove matching (right most) parens + while(s/(.*)$l.*?$r/$1/o) {} + if(/$l/o) { + # Missing right parens + while(@in) { + $s .= " ".shift @in; + $_ = $s; + while(s/(.*)$l.*?$r/$1/o) {} + if(not /$l/o) { + last; + } + } + } + push @out, $s; + } + return @out; +} + +sub replacement_counts_and_lengths { + # Count the number of different replacement strings. + # Find the lengths of context for context groups and non-context + # groups. + # If no {} found in @command: add it to @command + # + # Input: + # \@return_files = array of filenames to return + # @command = command template + # Output: + # \%replacecount, \%len, @command + my $return_files = shift; + my @command = @_; + my (%replacecount,%len); + my $sum = 0; + while($sum == 0) { + # Count how many times each replacement string is used + my @cmd = @command; + my $contextlen = 0; + my $noncontextlen = 0; + my $contextgroups = 0; + for my $c (@cmd) { + while($c =~ s/ \257<([^\257]*?)\257> /\000/x) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "s/a/b/" => 2 } + $replacecount{$1}++; + $sum++; + } + # Measure the length of the context around the {= perl expr =} + # Use that {=...=} has been replaced with \000 above + # So there is no need to deal with \257< + while($c =~ s/ (\S*\000\S*) //x) { + my $w = $1; + $w =~ tr/\000//d; # Remove all \000's + $contextlen += length($w); + $contextgroups++; + } + # All {= perl expr =} have been removed: The rest is non-context + $noncontextlen += length $c; + } + for(@$return_files) { + my $t = $_; + while($t =~ s/ \257<([^\257]*)\257> //x) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + # But for tagstring we just need to mark it as seen + $replacecount{$1} ||= 1; + } + } + if($opt::tagstring) { + my $t = $opt::tagstring; + while($t =~ s/ \257<([^\257]*)\257> //x) { + # %replacecount = { "perlexpr" => number of times seen } + # e.g { "$_++" => 2 } + # But for tagstring we just need to mark it as seen + $replacecount{$1} ||= 1; + } + } + if($opt::bar) { + # If the command does not contain {} force it to be computed + # as it is being used by --bar + $replacecount{""} ||= 1; + } + + $len{'context'} = 0+$contextlen; + $len{'noncontext'} = $noncontextlen; + $len{'contextgroups'} = $contextgroups; + $len{'noncontextgroups'} = @cmd-$contextgroups; + ::debug("length", "@command Context: ", $len{'context'}, + " Non: ", $len{'noncontext'}, " Ctxgrp: ", $len{'contextgroups'}, + " NonCtxGrp: ", $len{'noncontextgroups'}, "\n"); + if($sum == 0) { + if(not @command) { + # Default command = {} + @command = ("\257<\257>"); + } elsif(($opt::pipe or $opt::pipepart) + and not $opt::fifo and not $opt::cat) { + # With --pipe / --pipe-part you can have no replacement + last; + } else { + # Append {} to the command if there are no {...}'s and no {=...=} + push @command, ("\257<\257>"); + } + } + } + return(\%replacecount,\%len,@command); +} + +sub get { + my $self = shift; + if(@{$self->{'unget'}}) { + my $cmd_line = shift @{$self->{'unget'}}; + return ($cmd_line); + } else { + my $cmd_line = CommandLine->new($self->seq(), + $self->{'command'}, + $self->{'arg_queue'}, + $self->{'context_replace'}, + $self->{'max_number_of_args'}, + $self->{'return_files'}, + $self->{'replacecount'}, + $self->{'len'}, + ); + $cmd_line->populate(); + ::debug("init","cmd_line->number_of_args ", + $cmd_line->number_of_args(), "\n"); + if($opt::pipe or $opt::pipepart) { + if($cmd_line->replaced() eq "") { + # Empty command - pipe requires a command + ::error("--pipe must have a command to pipe into (e.g. 'cat').\n"); + ::wait_and_exit(255); + } + } else { + if($cmd_line->number_of_args() == 0) { + # We did not get more args - maybe at EOF string? + return undef; + } elsif($cmd_line->replaced() eq "") { + # Empty command - get the next instead + return $self->get(); + } + } + $self->set_seq($self->seq()+1); + return $cmd_line; + } +} + +sub unget { + my $self = shift; + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @{$self->{'unget'}}) && $self->{'arg_queue'}->empty(); + ::debug("run", "CommandLineQueue->empty $empty"); + return $empty; +} + +sub seq { + my $self = shift; + return $self->{'seq'}; +} + +sub set_seq { + my $self = shift; + $self->{'seq'} = shift; +} + +sub quote_args { + my $self = shift; + # If there is not command emulate |bash + return $self->{'command'}; +} + + +package Limits::Command; + +# Maximal command line length (for -m and -X) +sub max_length { + # Find the max_length of a command line and cache it + # Returns: + # number of chars on the longest command line allowed + if(not $Limits::Command::line_max_len) { + # Disk cache of max command line length + my $len_cache = $ENV{'HOME'} . "/.parallel/tmp/linelen-" . ::hostname(); + my $cached_limit; + if(-e $len_cache) { + open(my $fh, "<", $len_cache) || ::die_bug("Cannot read $len_cache"); + $cached_limit = <$fh>; + close $fh; + } else { + $cached_limit = real_max_length(); + # If $HOME is write protected: Do not fail + mkdir($ENV{'HOME'} . "/.parallel"); + mkdir($ENV{'HOME'} . "/.parallel/tmp"); + open(my $fh, ">", $len_cache); + print $fh $cached_limit; + close $fh; + } + $Limits::Command::line_max_len = tmux_length($cached_limit); + if($opt::max_chars) { + if($opt::max_chars <= $cached_limit) { + $Limits::Command::line_max_len = $opt::max_chars; + } else { + ::warning("Value for -s option ", + "should be < $cached_limit.\n"); + } + } + } + return $Limits::Command::line_max_len; +} + +sub real_max_length { + # Find the max_length of a command line + # Returns: + # The maximal command line length + # Use an upper bound of 8 MB if the shell allows for for infinite long lengths + my $upper = 8_000_000; + my $len = 8; + do { + if($len > $upper) { return $len }; + $len *= 16; + } while (is_acceptable_command_line_length($len)); + # Then search for the actual max length between 0 and upper bound + return binary_find_max_length(int($len/16),$len); +} + +sub binary_find_max_length { + # Given a lower and upper bound find the max_length of a command line + # Returns: + # number of chars on the longest command line allowed + my ($lower, $upper) = (@_); + if($lower == $upper or $lower == $upper-1) { return $lower; } + my $middle = int (($upper-$lower)/2 + $lower); + ::debug("init", "Maxlen: $lower,$upper,$middle : "); + if (is_acceptable_command_line_length($middle)) { + return binary_find_max_length($middle,$upper); + } else { + return binary_find_max_length($lower,$middle); + } +} + +sub is_acceptable_command_line_length { + # Test if a command line of this length can run + # Returns: + # 0 if the command line length is too long + # 1 otherwise + my $len = shift; + + local *STDERR; + open (STDERR, ">", "/dev/null"); + system "true "."x"x$len; + close STDERR; + ::debug("init", "$len=$? "); + return not $?; +} + +sub tmux_length { + # If $opt::tmux set, find the limit for tmux + # tmux 1.8 has a 2kB limit + # tmux 1.9 has a 16kB limit + # Input: + # $len = maximal command line length + # Returns: + # $tmux_len = maximal length runable in tmux + my $len = shift; + if($opt::tmux) { + $ENV{'TMUX'} ||= "tmux"; + if(not ::which($ENV{'TMUX'})) { + ::error($ENV{'TMUX'}." not found in \$PATH.\n"); + ::wait_and_exit(255); + } + my @out; + for my $l (1, 2020, 16320, 100000, $len) { + my $tmpfile = ::tmpname("tms"); + my $tmuxcmd = "sh -c '".$ENV{'TMUX'}." -S $tmpfile new-session -d -n echo $l". + ("x"x$l). " 2>/dev/null' && echo $l; rm -f $tmpfile"; + push @out, qx{ $tmuxcmd }; + unlink $tmpfile; + } + ::debug("tmux","tmux-length ",@out); + chomp @out; + # The arguments is given 3 times on the command line + # and the wrapping is around 30 chars + # (29 for tmux1.9, 33 for tmux1.8) + my $tmux_len = (::max(@out)); + $len = ::min($len,int($tmux_len/4-33)); + ::debug("tmux","tmux-length ",$len); + } + return $len; +} + + +package RecordQueue; + +sub new { + my $class = shift; + my $fhs = shift; + my $colsep = shift; + my @unget = (); + my $arg_sub_queue; + if($colsep) { + # Open one file with colsep + $arg_sub_queue = RecordColQueue->new($fhs); + } else { + # Open one or more files if multiple -a + $arg_sub_queue = MultifileQueue->new($fhs); + } + return bless { + 'unget' => \@unget, + 'arg_number' => 0, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + $self->{'arg_number'}++; + return shift @{$self->{'unget'}}; + } + my $ret = $self->{'arg_sub_queue'}->get(); + if(defined $Global::max_number_of_args + and $Global::max_number_of_args == 0) { + ::debug("run", "Read 1 but return 0 args\n"); + return [Arg->new("")]; + } else { + return $ret; + } +} + +sub unget { + my $self = shift; + ::debug("run", "RecordQueue-unget '@_'\n"); + $self->{'arg_number'} -= @_; + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = not @{$self->{'unget'}}; + $empty &&= $self->{'arg_sub_queue'}->empty(); + ::debug("run", "RecordQueue->empty $empty"); + return $empty; +} + +sub arg_number { + my $self = shift; + return $self->{'arg_number'}; +} + + +package RecordColQueue; + +sub new { + my $class = shift; + my $fhs = shift; + my @unget = (); + my $arg_sub_queue = MultifileQueue->new($fhs); + return bless { + 'unget' => \@unget, + 'arg_sub_queue' => $arg_sub_queue, + }, ref($class) || $class; +} + +sub get { + # Returns: + # reference to array of Arg-objects + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my $unget_ref=$self->{'unget'}; + if($self->{'arg_sub_queue'}->empty()) { + return undef; + } + my $in_record = $self->{'arg_sub_queue'}->get(); + if(defined $in_record) { + my @out_record = (); + for my $arg (@$in_record) { + ::debug("run", "RecordColQueue::arg $arg\n"); + my $line = $arg->orig(); + ::debug("run", "line='$line'\n"); + if($line ne "") { + for my $s (split /$opt::colsep/o, $line, -1) { + push @out_record, Arg->new($s); + } + } else { + push @out_record, Arg->new(""); + } + } + return \@out_record; + } else { + return undef; + } +} + +sub unget { + my $self = shift; + ::debug("run", "RecordColQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @{$self->{'unget'}} and $self->{'arg_sub_queue'}->empty()); + ::debug("run", "RecordColQueue->empty $empty"); + return $empty; +} + + +package MultifileQueue; + +@Global::unget_argv=(); + +sub new { + my $class = shift; + my $fhs = shift; + for my $fh (@$fhs) { + if(-t $fh) { + ::warning("Input is read from the terminal.\n"); + ::warning("Only experts do this on purpose. ". + "Press CTRL-D to exit.\n"); + } + } + return bless { + 'unget' => \@Global::unget_argv, + 'fhs' => $fhs, + 'arg_matrix' => undef, + }, ref($class) || $class; +} + +sub get { + my $self = shift; + if($opt::xapply) { + return $self->xapply_get(); + } else { + return $self->nest_get(); + } +} + +sub unget { + my $self = shift; + ::debug("run", "MultifileQueue-unget '@_'\n"); + unshift @{$self->{'unget'}}, @_; +} + +sub empty { + my $self = shift; + my $empty = (not @Global::unget_argv + and not @{$self->{'unget'}}); + for my $fh (@{$self->{'fhs'}}) { + $empty &&= eof($fh); + } + ::debug("run", "MultifileQueue->empty $empty "); + return $empty; +} + +sub xapply_get { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend = undef; + my $empty = 1; + for my $fh (@{$self->{'fhs'}}) { + my $arg = read_arg_from_fh($fh); + if(defined $arg) { + # Record $arg for recycling at end of file + push @{$self->{'arg_matrix'}{$fh}}, $arg; + push @record, $arg; + $empty = 0; + } else { + ::debug("run", "EOA "); + # End of file: Recycle arguments + push @{$self->{'arg_matrix'}{$fh}}, shift @{$self->{'arg_matrix'}{$fh}}; + # return last @{$args->{'args'}{$fh}}; + push @record, @{$self->{'arg_matrix'}{$fh}}[-1]; + } + } + if($empty) { + return undef; + } else { + return \@record; + } +} + +sub nest_get { + my $self = shift; + if(@{$self->{'unget'}}) { + return shift @{$self->{'unget'}}; + } + my @record = (); + my $prepend = undef; + my $empty = 1; + my $no_of_inputsources = $#{$self->{'fhs'}} + 1; + if(not $self->{'arg_matrix'}) { + # Initialize @arg_matrix with one arg from each file + # read one line from each file + my @first_arg_set; + my $all_empty = 1; + for (my $fhno = 0; $fhno < $no_of_inputsources ; $fhno++) { + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + if(defined $arg) { + $all_empty = 0; + } + $self->{'arg_matrix'}[$fhno][0] = $arg || Arg->new(""); + push @first_arg_set, $self->{'arg_matrix'}[$fhno][0]; + } + if($all_empty) { + # All filehandles were at eof or eof-string + return undef; + } + return [@first_arg_set]; + } + + # Treat the case with one input source special. For multiple + # input sources we need to remember all previously read values to + # generate all combinations. But for one input source we can + # forget the value after first use. + if($no_of_inputsources == 1) { + my $arg = read_arg_from_fh($self->{'fhs'}[0]); + if(defined($arg)) { + return [$arg]; + } + return undef; + } + for (my $fhno = $no_of_inputsources - 1; $fhno >= 0; $fhno--) { + if(eof($self->{'fhs'}[$fhno])) { + next; + } else { + # read one + my $arg = read_arg_from_fh($self->{'fhs'}[$fhno]); + defined($arg) || next; # If we just read an EOF string: Treat this as EOF + my $len = $#{$self->{'arg_matrix'}[$fhno]} + 1; + $self->{'arg_matrix'}[$fhno][$len] = $arg; + # make all new combinations + my @combarg = (); + for (my $fhn = 0; $fhn < $no_of_inputsources; $fhn++) { + push @combarg, [0, $#{$self->{'arg_matrix'}[$fhn]}]; + } + $combarg[$fhno] = [$len,$len]; # Find only combinations with this new entry + # map combinations + # [ 1, 3, 7 ], [ 2, 4, 1 ] + # => + # [ m[0][1], m[1][3], m[3][7] ], [ m[0][2], m[1][4], m[2][1] ] + my @mapped; + for my $c (expand_combinations(@combarg)) { + my @a; + for my $n (0 .. $no_of_inputsources - 1 ) { + push @a, $self->{'arg_matrix'}[$n][$$c[$n]]; + } + push @mapped, \@a; + } + # append the mapped to the ungotten arguments + push @{$self->{'unget'}}, @mapped; + # get the first + return shift @{$self->{'unget'}}; + } + } + # all are eof or at EOF string; return from the unget queue + return shift @{$self->{'unget'}}; +} + +sub read_arg_from_fh { + # Read one Arg from filehandle + # Returns: + # Arg-object with one read line + # undef if end of file + my $fh = shift; + my $prepend = undef; + my $arg; + do {{ + # This makes 10% faster + if(not ($arg = <$fh>)) { + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } +# ::debug("run", "read $arg\n"); + # Remove delimiter + $arg =~ s:$/$::; + if($Global::end_of_file_string and + $arg eq $Global::end_of_file_string) { + # Ignore the rest of input file + close $fh; + ::debug("run", "EOF-string ($arg) met\n"); + if(defined $prepend) { + return Arg->new($prepend); + } else { + return undef; + } + } + if(defined $prepend) { + $arg = $prepend.$arg; # For line continuation + $prepend = undef; #undef; + } + if($Global::ignore_empty) { + if($arg =~ /^\s*$/) { + redo; # Try the next line + } + } + if($Global::max_lines) { + if($arg =~ /\s$/) { + # Trailing space => continued on next line + $prepend = $arg; + redo; + } + } + }} while (1 == 0); # Dummy loop {{}} for redo + if(defined $arg) { + return Arg->new($arg); + } else { + ::die_bug("multiread arg undefined"); + } +} + +sub expand_combinations { + # Input: + # ([xmin,xmax], [ymin,ymax], ...) + # Returns: ([x,y,...],[x,y,...]) + # where xmin <= x <= xmax and ymin <= y <= ymax + my $minmax_ref = shift; + my $xmin = $$minmax_ref[0]; + my $xmax = $$minmax_ref[1]; + my @p; + if(@_) { + # If there are more columns: Compute those recursively + my @rest = expand_combinations(@_); + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, map { [$x, @$_] } @rest; + } + } else { + for(my $x = $xmin; $x <= $xmax; $x++) { + push @p, [$x]; + } + } + return @p; +} + + +package Arg; + +sub new { + my $class = shift; + my $orig = shift; + my @hostgroups; + if($opt::hostgroups) { + if($orig =~ s:@(.+)::) { + # We found hostgroups on the arg + @hostgroups = split(/\+/, $1); + if(not grep { defined $Global::hostgroups{$_} } @hostgroups) { + ::warning("No such hostgroup (@hostgroups)\n"); + @hostgroups = (keys %Global::hostgroups); + } + } else { + @hostgroups = (keys %Global::hostgroups); + } + } + return bless { + 'orig' => $orig, + 'hostgroups' => \@hostgroups, + }, ref($class) || $class; +} + +sub replace { + # Calculates the corresponding value for a given perl expression + # Returns: + # The calculated string (quoted if asked for) + my $self = shift; + my $perlexpr = shift; # E.g. $_=$_ or s/.gz// + my $quote = (shift) ? 1 : 0; # should the string be quoted? + # This is actually a CommandLine-object, + # but it looks nice to be able to say {= $job->slot() =} + my $job = shift; + $perlexpr =~ s/^-?\d+ //; # Positional replace treated as normal replace + if(not defined $self->{"rpl",0,$perlexpr}) { + local $_; + if($Global::trim eq "n") { + $_ = $self->{'orig'}; + } else { + $_ = trim_of($self->{'orig'}); + } + ::debug("replace", "eval ", $perlexpr, " ", $_, "\n"); + if(not $Global::perleval{$perlexpr}) { + # Make an anonymous function of the $perlexpr + # And more importantly: Compile it only once + if($Global::perleval{$perlexpr} = + eval('sub { no strict; no warnings; my $job = shift; '. + $perlexpr.' }')) { + # All is good + } else { + # The eval failed. Maybe $perlexpr is invalid perl? + ::error("Cannot use $perlexpr: $@\n"); + ::wait_and_exit(255); + } + } + # Execute the function + $Global::perleval{$perlexpr}->($job); + $self->{"rpl",0,$perlexpr} = $_; + } + if(not defined $self->{"rpl",$quote,$perlexpr}) { + $self->{"rpl",1,$perlexpr} = + ::shell_quote_scalar($self->{"rpl",0,$perlexpr}); + } + return $self->{"rpl",$quote,$perlexpr}; +} + +sub orig { + my $self = shift; + return $self->{'orig'}; +} + +sub trim_of { + # Removes white space as specifed by --trim: + # n = nothing + # l = start + # r = end + # lr|rl = both + # Returns: + # string with white space removed as needed + my @strings = map { defined $_ ? $_ : "" } (@_); + my $arg; + if($Global::trim eq "n") { + # skip + } elsif($Global::trim eq "l") { + for my $arg (@strings) { $arg =~ s/^\s+//; } + } elsif($Global::trim eq "r") { + for my $arg (@strings) { $arg =~ s/\s+$//; } + } elsif($Global::trim eq "rl" or $Global::trim eq "lr") { + for my $arg (@strings) { $arg =~ s/^\s+//; $arg =~ s/\s+$//; } + } else { + ::error("--trim must be one of: r l rl lr.\n"); + ::wait_and_exit(255); + } + return wantarray ? @strings : "@strings"; +} + + +package TimeoutQueue; + +sub new { + my $class = shift; + my $delta_time = shift; + my ($pct); + if($delta_time =~ /(\d+(\.\d+)?)%/) { + # Timeout in percent + $pct = $1/100; + $delta_time = 1_000_000; + } + return bless { + 'queue' => [], + 'delta_time' => $delta_time, + 'pct' => $pct, + 'remedian_idx' => 0, + 'remedian_arr' => [], + 'remedian' => undef, + }, ref($class) || $class; +} + +sub delta_time { + my $self = shift; + return $self->{'delta_time'}; +} + +sub set_delta_time { + my $self = shift; + $self->{'delta_time'} = shift; +} + +sub remedian { + my $self = shift; + return $self->{'remedian'}; +} + +sub set_remedian { + # Set median of the last 999^3 (=997002999) values using Remedian + # + # Rousseeuw, Peter J., and Gilbert W. Bassett Jr. "The remedian: A + # robust averaging method for large data sets." Journal of the + # American Statistical Association 85.409 (1990): 97-104. + my $self = shift; + my $val = shift; + my $i = $self->{'remedian_idx'}++; + my $rref = $self->{'remedian_arr'}; + $rref->[0][$i%999] = $val; + $rref->[1][$i/999%999] = (sort @{$rref->[0]})[$#{$rref->[0]}/2]; + $rref->[2][$i/999/999%999] = (sort @{$rref->[1]})[$#{$rref->[1]}/2]; + $self->{'remedian'} = (sort @{$rref->[2]})[$#{$rref->[2]}/2]; +} + +sub update_median_runtime { + # Update delta_time based on runtime of finished job if timeout is + # a percentage + my $self = shift; + my $runtime = shift; + if($self->{'pct'}) { + $self->set_remedian($runtime); + $self->{'delta_time'} = $self->{'pct'} * $self->remedian(); + ::debug("run", "Timeout: $self->{'delta_time'}s "); + } +} + +sub process_timeouts { + # Check if there was a timeout + my $self = shift; + # $self->{'queue'} is sorted by start time + while (@{$self->{'queue'}}) { + my $job = $self->{'queue'}[0]; + if($job->endtime()) { + # Job already finished. No need to timeout the job + # This could be because of --keep-order + shift @{$self->{'queue'}}; + } elsif($job->timedout($self->{'delta_time'})) { + # Need to shift off queue before kill + # because kill calls usleep that calls process_timeouts + shift @{$self->{'queue'}}; + $job->kill(); + } else { + # Because they are sorted by start time the rest are later + last; + } + } +} + +sub insert { + my $self = shift; + my $in = shift; + push @{$self->{'queue'}}, $in; +} + + +package Semaphore; + +# This package provides a counting semaphore +# +# If a process dies without releasing the semaphore the next process +# that needs that entry will clean up dead semaphores +# +# The semaphores are stored in ~/.parallel/semaphores/id- Each +# file in ~/.parallel/semaphores/id-/ is the process ID of the +# process holding the entry. If the process dies, the entry can be +# taken by another process. + +sub new { + my $class = shift; + my $id = shift; + my $count = shift; + $id =~ s/([^-_a-z0-9])/unpack("H*",$1)/ige; # Convert non-word chars to hex + $id = "id-".$id; # To distinguish it from a process id + my $parallel_dir = $ENV{'HOME'}."/.parallel"; + -d $parallel_dir or mkdir_or_die($parallel_dir); + my $parallel_locks = $parallel_dir."/semaphores"; + -d $parallel_locks or mkdir_or_die($parallel_locks); + my $lockdir = "$parallel_locks/$id"; + my $lockfile = $lockdir.".lock"; + if($count < 1) { ::die_bug("semaphore-count: $count"); } + return bless { + 'lockfile' => $lockfile, + 'lockfh' => Symbol::gensym(), + 'lockdir' => $lockdir, + 'id' => $id, + 'idfile' => $lockdir."/".$id, + 'pid' => $$, + 'pidfile' => $lockdir."/".$$.'@'.::hostname(), + 'count' => $count + 1 # nlinks returns a link for the 'id-' as well + }, ref($class) || $class; +} + +sub remove_dead_locks { + my $self = shift; + my $lockdir = $self->{'lockdir'}; + + for my $d (glob "$lockdir/*") { + $d =~ m:$lockdir/([0-9]+)\@([-\._a-z0-9]+)$:o or next; + my ($pid, $host) = ($1, $2); + if($host eq ::hostname()) { + if(not kill 0, $pid) { + ::debug("sem", "Dead: $d\n"); + unlink $d; + } else { + ::debug("sem", "Alive: $d\n"); + } + } + } +} + +sub acquire { + my $self = shift; + my $sleep = 1; # 1 ms + my $start_time = time; + while(1) { + # Can we get a lock? + $self->atomic_link_if_count_less_than() and last; + $self->remove_dead_locks(); + # Retry slower and slower up to 1 second + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + time - $start_time > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Stealing the semaphore.\n"); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("timeout_write_idfile: $self->{'idfile'}"); + close $fh; + } + link $self->{'idfile'}, $self->{'pidfile'}; + last; + } + if($opt::semaphoretimeout < 0 + and + time - $start_time > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting.\n"); + exit(1); + last; + } + } + } + ::debug("sem", "acquired $self->{'pid'}\n"); +} + +sub release { + my $self = shift; + unlink $self->{'pidfile'}; + if($self->nlinks() == 1) { + # This is the last link, so atomic cleanup + $self->lock(); + if($self->nlinks() == 1) { + unlink $self->{'idfile'}; + rmdir $self->{'lockdir'}; + } + $self->unlock(); + } + ::debug("run", "released $self->{'pid'}\n"); +} + +sub pid_change { + # This should do what release()+acquire() would do without having + # to re-acquire the semaphore + my $self = shift; + + my $old_pidfile = $self->{'pidfile'}; + $self->{'pid'} = $$; + $self->{'pidfile'} = $self->{'lockdir'}."/".$$.'@'.::hostname(); + my $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + unlink $old_pidfile; +} + +sub atomic_link_if_count_less_than { + # Link $file1 to $file2 if nlinks to $file1 < $count + my $self = shift; + my $retval = 0; + $self->lock(); + my $nlinks = $self->nlinks(); + ::debug("sem","$nlinks<$self->{'count'} "); + if($nlinks < $self->{'count'}) { + -d $self->{'lockdir'} or mkdir_or_die($self->{'lockdir'}); + if(not -e $self->{'idfile'}) { + open (my $fh, ">", $self->{'idfile'}) or + ::die_bug("write_idfile: $self->{'idfile'}"); + close $fh; + } + $retval = link $self->{'idfile'}, $self->{'pidfile'}; + ::debug("sem","link($self->{'idfile'},$self->{'pidfile'})=$retval\n"); + } + $self->unlock(); + ::debug("sem", "atomic $retval"); + return $retval; +} + +sub nlinks { + my $self = shift; + if(-e $self->{'idfile'}) { + return (stat(_))[3]; + } else { + return 0; + } +} + +sub lock { + my $self = shift; + my $sleep = 100; # 100 ms + my $total_sleep = 0; + $Global::use{"Fcntl"} ||= eval "use Fcntl qw(:DEFAULT :flock); 1;"; + my $locked = 0; + while(not $locked) { + if(tell($self->{'lockfh'}) == -1) { + # File not open + open($self->{'lockfh'}, ">", $self->{'lockfile'}) + or ::debug("run", "Cannot open $self->{'lockfile'}"); + } + if($self->{'lockfh'}) { + # File is open + chmod 0666, $self->{'lockfile'}; # assuming you want it a+rw + if(flock($self->{'lockfh'}, LOCK_EX()|LOCK_NB())) { + # The file is locked: No need to retry + $locked = 1; + last; + } else { + if ($! =~ m/Function not implemented/) { + ::warning("flock: $!"); + ::warning("Will wait for a random while\n"); + ::usleep(rand(5000)); + # File cannot be locked: No need to retry + $locked = 2; + last; + } + } + } + # Locking failed in first round + # Sleep and try again + $sleep = ($sleep < 1000) ? ($sleep * 1.1) : ($sleep); + # Random to avoid every sleeping job waking up at the same time + ::usleep(rand()*$sleep); + $total_sleep += $sleep; + if($opt::semaphoretimeout) { + if($opt::semaphoretimeout > 0 + and + $total_sleep/1000 > $opt::semaphoretimeout) { + # Timeout: Take the semaphore anyway + ::warning("Semaphore timed out. Taking the semaphore."); + $locked = 3; + last; + } + if($opt::semaphoretimeout < 0 + and + $total_sleep/1000 > -$opt::semaphoretimeout) { + # Timeout: Exit + ::warning("Semaphore timed out. Exiting."); + $locked = 4; + last; + } + } else { + if($total_sleep/1000 > 30) { + ::warning("Semaphore stuck for 30 seconds. Consider using --semaphoretimeout."); + } + } + } + ::debug("run", "locked $self->{'lockfile'}"); +} + +sub unlock { + my $self = shift; + unlink $self->{'lockfile'}; + close $self->{'lockfh'}; + ::debug("run", "unlocked\n"); +} + +sub mkdir_or_die { + # If dir is not writable: die + my $dir = shift; + my @dir_parts = split(m:/:,$dir); + my ($ddir,$part); + while(defined ($part = shift @dir_parts)) { + $part eq "" and next; + $ddir .= "/".$part; + -d $ddir and next; + mkdir $ddir; + } + if(not -w $dir) { + ::error("Cannot write to $dir: $!\n"); + ::wait_and_exit(255); + } +} + +# Keep perl -w happy +$opt::ctrlc = $opt::x = $Semaphore::timeout = $Semaphore::wait = +$opt::ignored_option = $Job::file_descriptor_warning_printed = +$Global::envdef = 0; diff --git a/sql b/sql new file mode 100755 index 0000000..0b4634d --- /dev/null +++ b/sql @@ -0,0 +1,1109 @@ +#!/usr/bin/perl -w + +=head1 NAME + +sql - execute a command on a database determined by a dburl + +=head1 SYNOPSIS + +B [options] I [I] + +B [options] I < commandfile + +B<#!/usr/bin/sql> B<--shebang> [options] I + +=head1 DESCRIPTION + +GNU B aims to give a simple, unified interface for accessing +databases through all the different databases' command line +clients. So far the focus has been on giving a common way to specify +login information (protocol, username, password, hostname, and port +number), size (database and table size), and running queries. + +The database is addressed using a DBURL. If I are left out +you will get that database's interactive shell. + +GNU B is often used in combination with GNU B. + +=over 9 + +=item I + +A DBURL has the following syntax: +[sql:]vendor:// +[[user][:password]@][host][:port]/[database][?sqlquery] + +See the section DBURL below. + +=item I + +The SQL commands to run. Each argument will have a newline +appended. + +Example: "SELECT * FROM foo;" "SELECT * FROM bar;" + +If the arguments contain '\n' or '\x0a' this will be replaced with a +newline: + +Example: "SELECT * FROM foo;\n SELECT * FROM bar;" + +If no commands are given SQL is read from the keyboard or STDIN. + +Example: echo 'SELECT * FROM foo;' | sql mysql:/// + + +=item B<--db-size> + +=item B<--dbsize> + +Size of database. Show the size of the database on disk. For Oracle +this requires access to read the table I - the user +I has that. + + +=item B<--help> + +=item B<-h> + +Print a summary of the options to GNU B and exit. + + +=item B<--html> + +HTML output. Turn on HTML tabular output. + + +=item B<--show-processlist> + +=item B<--proclist> + +=item B<--listproc> + +Show the list of running queries. + + +=item B<--show-databases> + +=item B<--showdbs> + +=item B<--list-databases> + +=item B<--listdbs> + +List the databases (table spaces) in the database. + + +=item B<--show-tables> + +=item B<--list-tables> + +=item B<--table-list> + +List the tables in the database. + + +=item B<--noheaders> + +=item B<--no-headers> + +=item B<-n> + +Remove headers and footers and print only tuples. Bug in Oracle: it +still prints number of rows found. + + +=item B<-p> I + +The string following -p will be given to the database connection +program as arguments. Multiple -p's will be joined with +space. Example: pass '-U' and the user name to the program: + +I<-p "-U scott"> can also be written I<-p -U -p scott>. + + +=item B<-r> + +Try 3 times. Short version of I<--retries 3>. + + +=item B<--retries> I + +Try I times. If the client program returns with an error, +retry the command. Default is I<--retries 1>. + + +=item B<--sep> I + +=item B<-s> I + +Field separator. Use I as separator between columns. + + +=item B<--skip-first-line> + +Do not use the first line of input (used by GNU B itself +when called with B<--shebang>). + + +=item B<--table-size> + +=item B<--tablesize> + +Size of tables. Show the size of the tables in the database. + + +=item B<--verbose> + +=item B<-v> + +Print which command is sent. + + +=item B<--version> + +=item B<-V> + +Print the version GNU B and exit. + + +=item B<--shebang> + +=item B<-Y> + +GNU B can be called as a shebang (#!) command as the first line of a script. Like this: + + #!/usr/bin/sql -Y mysql:/// + + SELECT * FROM foo; + +For this to work B<--shebang> or B<-Y> must be set as the first option. + +=back + +=head1 DBURL + +A DBURL has the following syntax: +[sql:]vendor:// +[[user][:password]@][host][:port]/[database][?sqlquery] + +To quote special characters use %-encoding specified in +http://tools.ietf.org/html/rfc3986#section-2.1 (E.g. a password +containing '/' would contain '%2F'). + +Examples: + mysql://scott:tiger@my.example.com/mydb + sql:oracle://scott:tiger@ora.example.com/xe + postgresql://scott:tiger@pg.example.com/pgdb + pg:/// + postgresqlssl://scott@pg.example.com:3333/pgdb + sql:sqlite2:////tmp/db.sqlite?SELECT * FROM foo; + sqlite3:///../db.sqlite3?SELECT%20*%20FROM%20foo; + +Currently supported vendors: MySQL (mysql), MySQL with SSL (mysqls, +mysqlssl), Oracle (oracle, ora), PostgreSQL (postgresql, pg, pgsql, +postgres), PostgreSQL with SSL (postgresqlssl, pgs, pgsqlssl, +postgresssl, pgssl, postgresqls, pgsqls, postgress), SQLite2 (sqlite, +sqlite2), SQLite3 (sqlite3). + +Aliases must start with ':' and are read from +/etc/sql/aliases and ~/.sql/aliases. The user's own +~/.sql/aliases should only be readable by the user. + +Example of aliases: + + :myalias1 pg://scott:tiger@pg.example.com/pgdb + :myalias2 ora://scott:tiger@ora.example.com/xe + # Short form of mysql://`whoami`:nopassword@localhost:3306/`whoami` + :myalias3 mysql:/// + # Short form of mysql://`whoami`:nopassword@localhost:33333/mydb + :myalias4 mysql://:33333/mydb + # Alias for an alias + :m :myalias4 + # the sortest alias possible + : sqlite2:////tmp/db.sqlite + # Including an SQL query + :query sqlite:////tmp/db.sqlite?SELECT * FROM foo; + +=head1 EXAMPLES + +=head2 Get an interactive prompt + +The most basic use of GNU B is to get an interactive prompt: + +B + +If you have setup an alias you can do: + +B + + +=head2 Run a query + +To run a query directly from the command line: + +B + +Oracle requires newlines after each statement. This can be done like +this: + +B + +Or this: + +B + + +=head2 Copy a PostgreSQL database + +To copy a PostgreSQL database use pg_dump to generate the dump and GNU +B to import it: + +B + + +=head2 Empty all tables in a MySQL database + +Using GNU B it is easy to empty all tables without dropping them: + +B + + +=head2 Drop all tables in a PostgreSQL database + +To drop all tables in a PostgreSQL database do: + +B + + +=head2 Run as a script + +Instead of doing: + +B + +you can combine the sqlfile with the DBURL to make a +UNIX-script. Create a script called I: + +B<#!/usr/bin/sql -Y mysql:///> + +B