Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 9B77C200D3C for ; Mon, 9 Oct 2017 18:24:51 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 99E181609BB; Mon, 9 Oct 2017 16:24:51 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 16589160BF3 for ; Mon, 9 Oct 2017 18:24:43 +0200 (CEST) Received: (qmail 65502 invoked by uid 500); 9 Oct 2017 16:24:43 -0000 Mailing-List: contact commits-help@nifi.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@nifi.apache.org Delivered-To: mailing list commits@nifi.apache.org Received: (qmail 65111 invoked by uid 99); 9 Oct 2017 16:24:43 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 09 Oct 2017 16:24:43 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 6EF2FF5D54; Mon, 9 Oct 2017 16:24:42 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jeremydyer@apache.org To: commits@nifi.apache.org Date: Mon, 09 Oct 2017 16:25:12 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [32/51] [partial] nifi-minifi-cpp git commit: MINIFI-372: Replace leveldb with RocksDB archived-at: Mon, 09 Oct 2017 16:24:51 -0000 http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/48867732/thirdparty/rocksdb/build_tools/gnu_parallel ---------------------------------------------------------------------- diff --git a/thirdparty/rocksdb/build_tools/gnu_parallel b/thirdparty/rocksdb/build_tools/gnu_parallel new file mode 100755 index 0000000..abbf8f1 --- /dev/null +++ b/thirdparty/rocksdb/build_tools/gnu_parallel @@ -0,0 +1,7936 @@ +#!/usr/bin/env perl + +# Copyright (C) 2007,2008,2009,2010,2011,2012,2013,2014 Ole Tange and +# Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see +# or write to the Free Software Foundation, Inc., 51 Franklin St, +# Fifth Floor, Boston, MA 02110-1301 USA + +# open3 used in Job::start +use IPC::Open3; +# &WNOHANG used in reaper +use POSIX qw(:sys_wait_h setsid ceil :errno_h); +# gensym used in Job::start +use Symbol qw(gensym); +# tempfile used in Job::start +use File::Temp qw(tempfile tempdir); +# mkpath used in openresultsfile +use File::Path; +# GetOptions used in get_options_from_array +use Getopt::Long; +# Used to ensure code quality +use strict; +use File::Basename; + +if(not $ENV{HOME}) { + # $ENV{HOME} is sometimes not set if called from PHP + ::warning("\$HOME not set. Using /tmp\n"); + $ENV{HOME} = "/tmp"; +} + +save_stdin_stdout_stderr(); +save_original_signal_handler(); +parse_options(); +::debug("init", "Open file descriptors: ", join(" ",keys %Global::fd), "\n"); +my $number_of_args; +if($Global::max_number_of_args) { + $number_of_args=$Global::max_number_of_args; +} elsif ($opt::X or $opt::m or $opt::xargs) { + $number_of_args = undef; +} else { + $number_of_args = 1; +} + +my @command; +@command = @ARGV; + +my @fhlist; +if($opt::pipepart) { + @fhlist = map { open_or_exit($_) } "/dev/null"; +} else { + @fhlist = map { open_or_exit($_) } @opt::a; + if(not @fhlist and not $opt::pipe) { + @fhlist = (*STDIN); + } +} + +if($opt::skip_first_line) { + # Skip the first line for the first file handle + my $fh = $fhlist[0]; + <$fh>; +} +if($opt::header and not $opt::pipe) { + my $fh = $fhlist[0]; + # split with colsep or \t + # $header force $colsep = \t if undef? + my $delimiter = $opt::colsep; + $delimiter ||= "\$"; + my $id = 1; + for my $fh (@fhlist) { + my $line = <$fh>; + chomp($line); + ::debug("init", "Delimiter: '$delimiter'"); + for my $s (split /$delimiter/o, $line) { + ::debug("init", "Colname: '$s'"); + # Replace {colname} with {2} + # TODO accept configurable short hands + # TODO how to deal with headers in {=...=} + for(@command) { + s:\{$s(|/|//|\.|/\.)\}:\{$id$1\}:g; + } + $Global::input_source_header{$id} = $s; + $id++; + } + } +} else { + my $id = 1; + for my $fh (@fhlist) { + $Global::input_source_header{$id} = $id; + $id++; + } +} + +if($opt::filter_hosts and (@opt::sshlogin or @opt::sshloginfile)) { + # Parallel check all hosts are up. Remove hosts that are down + filter_hosts(); +} + +if($opt::nonall or $opt::onall) { + onall(@command); + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); +} + +# TODO --transfer foo/./bar --cleanup +# multiple --transfer and --basefile with different /./ + +$Global::JobQueue = JobQueue->new( + \@command,\@fhlist,$Global::ContextReplace,$number_of_args,\@Global::ret_files); + +if($opt::eta or $opt::bar) { + # Count the number of jobs before starting any + $Global::JobQueue->total_jobs(); +} +if($opt::pipepart) { + @Global::cat_partials = map { pipe_part_files($_) } @opt::a; + # Unget the command as many times as there are parts + $Global::JobQueue->{'commandlinequeue'}->unget( + map { $Global::JobQueue->{'commandlinequeue'}->get() } @Global::cat_partials + ); +} +for my $sshlogin (values %Global::host) { + $sshlogin->max_jobs_running(); +} + +init_run_jobs(); +my $sem; +if($Global::semaphore) { + $sem = acquire_semaphore(); +} +$SIG{TERM} = \&start_no_new_jobs; + +start_more_jobs(); +if(not $opt::pipepart) { + if($opt::pipe) { + spreadstdin(); + } +} +::debug("init", "Start draining\n"); +drain_job_queue(); +::debug("init", "Done draining\n"); +reaper(); +::debug("init", "Done reaping\n"); +if($opt::pipe and @opt::a) { + for my $job (@Global::tee_jobs) { + unlink $job->fh(2,"name"); + $job->set_fh(2,"name",""); + $job->print(); + unlink $job->fh(1,"name"); + } +} +::debug("init", "Cleaning\n"); +cleanup(); +if($Global::semaphore) { + $sem->release(); +} +for(keys %Global::sshmaster) { + kill "TERM", $_; +} +::debug("init", "Halt\n"); +if($opt::halt_on_error) { + wait_and_exit($Global::halt_on_error_exitstatus); +} else { + wait_and_exit(min(undef_as_zero($Global::exitstatus),254)); +} + +sub __PIPE_MODE__ {} + +sub pipe_part_files { + # Input: + # $file = the file to read + # Returns: + # @commands that will cat_partial each part + my ($file) = @_; + my $buf = ""; + my $header = find_header(\$buf,open_or_exit($file)); + # find positions + my @pos = find_split_positions($file,$opt::blocksize,length $header); + # Make @cat_partials + my @cat_partials = (); + for(my $i=0; $i<$#pos; $i++) { + push @cat_partials, cat_partial($file, 0, length($header), $pos[$i], $pos[$i+1]); + } + # Remote exec should look like: + # ssh -oLogLevel=quiet lo 'eval `echo $SHELL | grep "/t\{0,1\}csh" > /dev/null && echo setenv PARALLEL_SEQ '$PARALLEL_SEQ'\; setenv PARALLEL_PID '$PARALLEL_PID' || echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ FOO\ /tmp/foo\ \|\|\ export\ FOO=/tmp/foo\; \(wc\ -\ \$FOO\) + # ssh -tt not allowed. Remote will die due to broken pipe anyway. + # TODO test remote with --fifo / --cat + return @cat_partials; +} + +sub find_header { + # Input: + # $buf_ref = reference to read-in buffer + # $fh = filehandle to read from + # Uses: + # $opt::header + # $opt::blocksize + # Returns: + # $header string + my ($buf_ref, $fh) = @_; + my $header = ""; + if($opt::header) { + if($opt::header eq ":") { $opt::header = "(.*\n)"; } + # Number = number of lines + $opt::header =~ s/^(\d+)$/"(.*\n)"x$1/e; + while(read($fh,substr($$buf_ref,length $$buf_ref,0),$opt::blocksize)) { + if($$buf_ref=~s/^($opt::header)//) { + $header = $1; + last; + } + } + } + return $header; +} + +sub find_split_positions { + # Input: + # $file = the file to read + # $block = (minimal) --block-size of each chunk + # $headerlen = length of header to be skipped + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # @positions of block start/end + my($file, $block, $headerlen) = @_; + my $size = -s $file; + $block = int $block; + # The optimal dd blocksize for mint, redhat, solaris, openbsd = 2^17..2^20 + # The optimal dd blocksize for freebsd = 2^15..2^17 + my $dd_block_size = 131072; # 2^17 + my @pos; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $fh = ::open_or_exit($file); + push(@pos,$headerlen); + for(my $pos = $block+$headerlen; $pos < $size; $pos += $block) { + my $buf; + seek($fh, $pos, 0) || die; + while(read($fh,substr($buf,length $buf,0),$dd_block_size)) { + if($opt::regexp) { + # If match /$recend$recstart/ => Record position + if($buf =~ /(.*$recend)$recstart/os) { + my $i = length($1); + push(@pos,$pos+$i); + # Start looking for next record _after_ this match + $pos += $i; + last; + } + } else { + # If match $recend$recstart => Record position + my $i = index($buf,$recendrecstart); + if($i != -1) { + push(@pos,$pos+$i); + # Start looking for next record _after_ this match + $pos += $i; + last; + } + } + } + } + push(@pos,$size); + close $fh; + return @pos; +} + +sub cat_partial { + # Input: + # $file = the file to read + # ($start, $end, [$start2, $end2, ...]) = start byte, end byte + # Returns: + # Efficient perl command to copy $start..$end, $start2..$end2, ... to stdout + my($file, @start_end) = @_; + my($start, $i); + # Convert start_end to start_len + my @start_len = map { if(++$i % 2) { $start = $_; } else { $_-$start } } @start_end; + return "<". shell_quote_scalar($file) . + q{ perl -e 'while(@ARGV) { sysseek(STDIN,shift,0) || die; $left = shift; while($read = sysread(STDIN,$buf, ($left > 32768 ? 32768 : $left))){ $left -= $read; syswrite(STDOUT,$buf); } }' } . + " @start_len"; +} + +sub spreadstdin { + # read a record + # Spawn a job and print the record to it. + # Uses: + # $opt::blocksize + # STDIN + # $opr::r + # $Global::max_lines + # $Global::max_number_of_args + # $opt::regexp + # $Global::start_no_new_jobs + # $opt::roundrobin + # %Global::running + + my $buf = ""; + my ($recstart,$recend) = recstartrecend(); + my $recendrecstart = $recend.$recstart; + my $chunk_number = 1; + my $one_time_through; + my $blocksize = $opt::blocksize; + my $in = *STDIN; + my $header = find_header(\$buf,$in); + while(1) { + my $anything_written = 0; + if(not read($in,substr($buf,length $buf,0),$blocksize)) { + # End-of-file + $chunk_number != 1 and last; + # Force the while-loop once if everything was read by header reading + $one_time_through++ and last; + } + if($opt::r) { + # Remove empty lines + $buf =~ s/^\s*\n//gm; + if(length $buf == 0) { + next; + } + } + if($Global::max_lines and not $Global::max_number_of_args) { + # Read n-line records + my $n_lines = $buf =~ tr/\n/\n/; + my $last_newline_pos = rindex($buf,"\n"); + while($n_lines % $Global::max_lines) { + $n_lines--; + $last_newline_pos = rindex($buf,"\n",$last_newline_pos-1); + } + # Chop at $last_newline_pos as that is where n-line record ends + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$last_newline_pos+1); + substr($buf,0,$last_newline_pos+1) = ""; + } elsif($opt::regexp) { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + # -L -N => (start..*?end){n*l} + my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); + while($buf =~ s/((?:$recstart.*?$recend){$read_n_lines})($recstart.*)$/$2/os) { + # Copy to modifiable variable + my $b = $1; + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$b, + $recstart,$recend,length $1); + } + } else { + # Find the last recend-recstart in $buf + if($buf =~ s/(.*$recend)($recstart.*?)$/$2/os) { + # Copy to modifiable variable + my $b = $1; + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$b, + $recstart,$recend,length $1); + } + } + } else { + if($Global::max_number_of_args) { + # -N => (start..*?end){n} + my $i = 0; + my $read_n_lines = $Global::max_number_of_args * ($Global::max_lines || 1); + while(($i = nindex(\$buf,$recendrecstart,$read_n_lines)) != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + substr($buf,0,$i) = ""; + } + } else { + # Find the last recend-recstart in $buf + my $i = rindex($buf,$recendrecstart); + if($i != -1) { + $i += length $recend; # find the actual splitting location + $anything_written += + write_record_to_pipe($chunk_number++,\$header,\$buf, + $recstart,$recend,$i); + substr($buf,0,$i) = ""; + } + } + } + if(not $anything_written and not eof($in)) { + # Nothing was written - maybe the block size < record size? + # Increase blocksize exponentially + my $old_blocksize = $blocksize; + $blocksize = ceil($blocksize * 1.3 + 1); + ::warning("A record was longer than $old_blocksize. " . + "Increasing to --blocksize $blocksize\n"); + } + } + ::debug("init", "Done reading input\n"); + + # If there is anything left in the buffer write it + substr($buf,0,0) = ""; + write_record_to_pipe($chunk_number++,\$header,\$buf,$recstart,$recend,length $buf); + + $Global::start_no_new_jobs ||= 1; + if($opt::roundrobin) { + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + my %incomplete_jobs = %Global::running; + my $sleep = 1; + while(keys %incomplete_jobs) { + my $something_written = 0; + for my $pid (keys %incomplete_jobs) { + my $job = $incomplete_jobs{$pid}; + if($job->stdin_buffer_length()) { + $something_written += $job->non_block_write(); + } else { + delete $incomplete_jobs{$pid} + } + } + if($something_written) { + $sleep = $sleep/2+0.001; + } + $sleep = ::reap_usleep($sleep); + } + } +} + +sub recstartrecend { + # Uses: + # $opt::recstart + # $opt::recend + # Returns: + # $recstart,$recend with default values and regexp conversion + my($recstart,$recend); + if(defined($opt::recstart) and defined($opt::recend)) { + # If both --recstart and --recend is given then both must match + $recstart = $opt::recstart; + $recend = $opt::recend; + } elsif(defined($opt::recstart)) { + # If --recstart is given it must match start of record + $recstart = $opt::recstart; + $recend = ""; + } elsif(defined($opt::recend)) { + # If --recend is given then it must match end of record + $recstart = ""; + $recend = $opt::recend; + } + + if($opt::regexp) { + # If $recstart/$recend contains '|' this should only apply to the regexp + $recstart = "(?:".$recstart.")"; + $recend = "(?:".$recend.")"; + } else { + # $recstart/$recend = printf strings (\n) + $recstart =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + $recend =~ s/\\([0rnt\'\"\\])/"qq|\\$1|"/gee; + } + return ($recstart,$recend); +} + +sub nindex { + # See if string is in buffer N times + # Returns: + # the position where the Nth copy is found + my ($buf_ref, $str, $n) = @_; + my $i = 0; + for(1..$n) { + $i = index($$buf_ref,$str,$i+1); + if($i == -1) { last } + } + return $i; +} + +{ + my @robin_queue; + + sub round_robin_write { + # Input: + # $header_ref = ref to $header string + # $block_ref = ref to $block to be written + # $recstart = record start string + # $recend = record end string + # $endpos = end position of $block + # Uses: + # %Global::running + my ($header_ref,$block_ref,$recstart,$recend,$endpos) = @_; + my $something_written = 0; + my $block_passed = 0; + my $sleep = 1; + while(not $block_passed) { + # Continue flushing existing buffers + # until one is empty and a new block is passed + # Make a queue to spread the blocks evenly + if(not @robin_queue) { + push @robin_queue, values %Global::running; + } + while(my $job = shift @robin_queue) { + if($job->stdin_buffer_length() > 0) { + $something_written += $job->non_block_write(); + } else { + $job->set_stdin_buffer($header_ref,$block_ref,$endpos,$recstart,$recend); + $block_passed = 1; + $job->set_virgin(0); + $something_written += $job->non_block_write(); + last; + } + } + $sleep = ::reap_usleep($sleep); + } + return $something_written; + } +} + +sub write_record_to_pipe { + # Fork then + # Write record from pos 0 .. $endpos to pipe + # Input: + # $chunk_number = sequence number - to see if already run + # $header_ref = reference to header string to prepend + # $record_ref = reference to record to write + # $recstart = start string of record + # $recend = end string of record + # $endpos = position in $record_ref where record ends + # Uses: + # $Global::job_already_run + # $opt::roundrobin + # @Global::virgin_jobs + # Returns: + # Number of chunks written (0 or 1) + my ($chunk_number,$header_ref,$record_ref,$recstart,$recend,$endpos) = @_; + if($endpos == 0) { return 0; } + if(vec($Global::job_already_run,$chunk_number,1)) { return 1; } + if($opt::roundrobin) { + return round_robin_write($header_ref,$record_ref,$recstart,$recend,$endpos); + } + # If no virgin found, backoff + my $sleep = 0.0001; # 0.01 ms - better performance on highend + while(not @Global::virgin_jobs) { + ::debug("pipe", "No virgin jobs"); + $sleep = ::reap_usleep($sleep); + # Jobs may not be started because of loadavg + # or too little time between each ssh login. + start_more_jobs(); + } + my $job = shift @Global::virgin_jobs; + # Job is no longer virgin + $job->set_virgin(0); + if(fork()) { + # Skip + } else { + # Chop of at $endpos as we do not know how many rec_sep will + # be removed. + substr($$record_ref,$endpos,length $$record_ref) = ""; + # Remove rec_sep + if($opt::remove_rec_sep) { + Job::remove_rec_sep($record_ref,$recstart,$recend); + } + $job->write($header_ref); + $job->write($record_ref); + close $job->fh(0,"w"); + exit(0); + } + close $job->fh(0,"w"); + return 1; +} + +sub __SEM_MODE__ {} + +sub acquire_semaphore { + # Acquires semaphore. If needed: spawns to the background + # Uses: + # @Global::host + # Returns: + # The semaphore to be released when jobs is complete + $Global::host{':'} = SSHLogin->new(":"); + my $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); + $sem->acquire(); + if($Semaphore::fg) { + # skip + } else { + # If run in the background, the PID will change + # therefore release and re-acquire the semaphore + $sem->release(); + if(fork()) { + exit(0); + } else { + # child + # Get a semaphore for this pid + ::die_bug("Can't start a new session: $!") if setsid() == -1; + $sem = Semaphore->new($Semaphore::name,$Global::host{':'}->max_jobs_running()); + $sem->acquire(); + } + } + return $sem; +} + +sub __PARSE_OPTIONS__ {} + +sub options_hash { + # Returns: + # %hash = the GetOptions config + return + ("debug|D=s" => \$opt::D, + "xargs" => \$opt::xargs, + "m" => \$opt::m, + "X" => \$opt::X, + "v" => \@opt::v, + "joblog=s" => \$opt::joblog, + "results|result|res=s" => \$opt::results, + "resume" => \$opt::resume, + "resume-failed|resumefailed" => \$opt::resume_failed, + "silent" => \$opt::silent, + #"silent-error|silenterror" => \$opt::silent_error, + "keep-order|keeporder|k" => \$opt::keeporder, + "group" => \$opt::group, + "g" => \$opt::retired, + "ungroup|u" => \$opt::ungroup, + "linebuffer|linebuffered|line-buffer|line-buffered" => \$opt::linebuffer, + "tmux" => \$opt::tmux, + "null|0" => \$opt::0, + "quote|q" => \$opt::q, + # Replacement strings + "parens=s" => \$opt::parens, + "rpl=s" => \@opt::rpl, + "plus" => \$opt::plus, + "I=s" => \$opt::I, + "extensionreplace|er=s" => \$opt::U, + "U=s" => \$opt::retired, + "basenamereplace|bnr=s" => \$opt::basenamereplace, + "dirnamereplace|dnr=s" => \$opt::dirnamereplace, + "basenameextensionreplace|bner=s" => \$opt::basenameextensionreplace, + "seqreplace=s" => \$opt::seqreplace, + "slotreplace=s" => \$opt::slotreplace, + "jobs|j=s" => \$opt::jobs, + "delay=f" => \$opt::delay, + "sshdelay=f" => \$opt::sshdelay, + "load=s" => \$opt::load, + "noswap" => \$opt::noswap, + "max-line-length-allowed" => \$opt::max_line_length_allowed, + "number-of-cpus" => \$opt::number_of_cpus, + "number-of-cores" => \$opt::number_of_cores, + "use-cpus-instead-of-cores" => \$opt::use_cpus_instead_of_cores, + "shellquote|shell_quote|shell-quote" => \$opt::shellquote, + "nice=i" => \$opt::nice, + "timeout=s" => \$opt::timeout, + "tag" => \$opt::tag, + "tagstring|tag-string=s" => \$opt::tagstring, + "onall" => \$opt::onall, + "nonall" => \$opt::nonall, + "filter-hosts|filterhosts|filter-host" => \$opt::filter_hosts, + "sshlogin|S=s" => \@opt::sshlogin, + "sshloginfile|slf=s" => \@opt::sshloginfile, + "controlmaster|M" => \$opt::controlmaster, + "return=s" => \@opt::return, + "trc=s" => \@opt::trc, + "transfer" => \$opt::transfer, + "cleanup" => \$opt::cleanup, + "basefile|bf=s" => \@opt::basefile, + "B=s" => \$opt::retired, + "ctrlc|ctrl-c" => \$opt::ctrlc, + "noctrlc|no-ctrlc|no-ctrl-c" => \$opt::noctrlc, + "workdir|work-dir|wd=s" => \$opt::workdir, + "W=s" => \$opt::retired, + "tmpdir=s" => \$opt::tmpdir, + "tempdir=s" => \$opt::tmpdir, + "use-compress-program|compress-program=s" => \$opt::compress_program, + "use-decompress-program|decompress-program=s" => \$opt::decompress_program, + "compress" => \$opt::compress, + "tty" => \$opt::tty, + "T" => \$opt::retired, + "halt-on-error|halt=s" => \$opt::halt_on_error, + "H=i" => \$opt::retired, + "retries=i" => \$opt::retries, + "dry-run|dryrun" => \$opt::dryrun, + "progress" => \$opt::progress, + "eta" => \$opt::eta, + "bar" => \$opt::bar, + "arg-sep|argsep=s" => \$opt::arg_sep, + "arg-file-sep|argfilesep=s" => \$opt::arg_file_sep, + "trim=s" => \$opt::trim, + "env=s" => \@opt::env, + "recordenv|record-env" => \$opt::record_env, + "plain" => \$opt::plain, + "profile|J=s" => \@opt::profile, + "pipe|spreadstdin" => \$opt::pipe, + "robin|round-robin|roundrobin" => \$opt::roundrobin, + "recstart=s" => \$opt::recstart, + "recend=s" => \$opt::recend, + "regexp|regex" => \$opt::regexp, + "remove-rec-sep|removerecsep|rrs" => \$opt::remove_rec_sep, + "files|output-as-files|outputasfiles" => \$opt::files, + "block|block-size|blocksize=s" => \$opt::blocksize, + "tollef" => \$opt::retired, + "gnu" => \$opt::gnu, + "xapply" => \$opt::xapply, + "bibtex" => \$opt::bibtex, + "nn|nonotice|no-notice" => \$opt::no_notice, + # xargs-compatibility - implemented, man, testsuite + "max-procs|P=s" => \$opt::jobs, + "delimiter|d=s" => \$opt::d, + "max-chars|s=i" => \$opt::max_chars, + "arg-file|a=s" => \@opt::a, + "no-run-if-empty|r" => \$opt::r, + "replace|i:s" => \$opt::i, + "E=s" => \$opt::eof, + "eof|e:s" => \$opt::eof, + "max-args|n=i" => \$opt::max_args, + "max-replace-args|N=i" => \$opt::max_replace_args, + "colsep|col-sep|C=s" => \$opt::colsep, + "help|h" => \$opt::help, + "L=f" => \$opt::L, + "max-lines|l:f" => \$opt::max_lines, + "interactive|p" => \$opt::p, + "verbose|t" => \$opt::verbose, + "version|V" => \$opt::version, + "minversion|min-version=i" => \$opt::minversion, + "show-limits|showlimits" => \$opt::show_limits, + "exit|x" => \$opt::x, + # Semaphore + "semaphore" => \$opt::semaphore, + "semaphoretimeout=i" => \$opt::semaphoretimeout, + "semaphorename|id=s" => \$opt::semaphorename, + "fg" => \$opt::fg, + "bg" => \$opt::bg, + "wait" => \$opt::wait, + # Shebang #!/usr/bin/parallel --shebang + "shebang|hashbang" => \$opt::shebang, + "internal-pipe-means-argfiles" => \$opt::internal_pipe_means_argfiles, + "Y" => \$opt::retired, + "skip-first-line" => \$opt::skip_first_line, + "header=s" => \$opt::header, + "cat" => \$opt::cat, + "fifo" => \$opt::fifo, + "pipepart|pipe-part" => \$opt::pipepart, + "hgrp|hostgroup|hostgroups" => \$opt::hostgroups, + ); +} + +sub get_options_from_array { + # Run GetOptions on @array + # Input: + # $array_ref = ref to @ARGV to parse + # @keep_only = Keep only these options + # Uses: + # @ARGV + # Returns: + # true if parsing worked + # false if parsing failed + # @$array_ref is changed + my ($array_ref, @keep_only) = @_; + if(not @$array_ref) { + # Empty array: No need to look more at that + return 1; + } + # A bit of shuffling of @ARGV needed as GetOptionsFromArray is not + # supported everywhere + my @save_argv; + my $this_is_ARGV = (\@::ARGV == $array_ref); + if(not $this_is_ARGV) { + @save_argv = @::ARGV; + @::ARGV = @{$array_ref}; + } + # If @keep_only set: Ignore all values except @keep_only + my %options = options_hash(); + if(@keep_only) { + my (%keep,@dummy); + @keep{@keep_only} = @keep_only; + for my $k (grep { not $keep{$_} } keys %options) { + # Store the value of the option in @dummy + $options{$k} = \@dummy; + } + } + my $retval = GetOptions(%options); + if(not $this_is_ARGV) { + @{$array_ref} = @::ARGV; + @::ARGV = @save_argv; + } + return $retval; +} + +sub parse_options { + # Returns: N/A + # Defaults: + $Global::version = 20141122; + $Global::progname = 'parallel'; + $Global::infinity = 2**31; + $Global::debug = 0; + $Global::verbose = 0; + $Global::quoting = 0; + # Read only table with default --rpl values + %Global::replace = + ( + '{}' => '', + '{#}' => '1 $_=$job->seq()', + '{%}' => '1 $_=$job->slot()', + '{/}' => 's:.*/::', + '{//}' => '$Global::use{"File::Basename"} ||= eval "use File::Basename; 1;"; $_ = dirname($_);', + '{/.}' => 's:.*/::; s:\.[^/.]+$::;', + '{.}' => 's:\.[^/.]+$::', + ); + %Global::plus = + ( + # {} = {+/}/{/} + # = {.}.{+.} = {+/}/{/.}.{+.} + # = {..}.{+..} = {+/}/{/..}.{+..} + # = {...}.{+...} = {+/}/{/...}.{+...} + '{+/}' => 's:/[^/]*$::', + '{+.}' => 's:.*\.::', + '{+..}' => 's:.*\.([^.]*\.):$1:', + '{+...}' => 's:.*\.([^.]*\.[^.]*\.):$1:', + '{..}' => 's:\.[^/.]+$::; s:\.[^/.]+$::', + '{...}' => 's:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', + '{/..}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::', + '{/...}' => 's:.*/::; s:\.[^/.]+$::; s:\.[^/.]+$::; s:\.[^/.]+$::', + ); + # Modifiable copy of %Global::replace + %Global::rpl = %Global::replace; + $Global::parens = "{==}"; + $/="\n"; + $Global::ignore_empty = 0; + $Global::interactive = 0; + $Global::stderr_verbose = 0; + $Global::default_simultaneous_sshlogins = 9; + $Global::exitstatus = 0; + $Global::halt_on_error_exitstatus = 0; + $Global::arg_sep = ":::"; + $Global::arg_file_sep = "::::"; + $Global::trim = 'n'; + $Global::max_jobs_running = 0; + $Global::job_already_run = ''; + $ENV{'TMPDIR'} ||= "/tmp"; + + @ARGV=read_options(); + + if(@opt::v) { $Global::verbose = $#opt::v+1; } # Convert -v -v to v=2 + $Global::debug = $opt::D; + $Global::shell = $ENV{'PARALLEL_SHELL'} || parent_shell($$) || $ENV{'SHELL'} || "/bin/sh"; + if(defined $opt::X) { $Global::ContextReplace = 1; } + if(defined $opt::silent) { $Global::verbose = 0; } + if(defined $opt::0) { $/ = "\0"; } + if(defined $opt::d) { my $e="sprintf \"$opt::d\""; $/ = eval $e; } + if(defined $opt::p) { $Global::interactive = $opt::p; } + if(defined $opt::q) { $Global::quoting = 1; } + if(defined $opt::r) { $Global::ignore_empty = 1; } + if(defined $opt::verbose) { $Global::stderr_verbose = 1; } + # Deal with --rpl + sub rpl { + # Modify %Global::rpl + # Replace $old with $new + my ($old,$new) = @_; + if($old ne $new) { + $Global::rpl{$new} = $Global::rpl{$old}; + delete $Global::rpl{$old}; + } + } + if(defined $opt::parens) { $Global::parens = $opt::parens; } + my $parenslen = 0.5*length $Global::parens; + $Global::parensleft = substr($Global::parens,0,$parenslen); + $Global::parensright = substr($Global::parens,$parenslen); + if(defined $opt::plus) { %Global::rpl = (%Global::plus,%Global::rpl); } + if(defined $opt::I) { rpl('{}',$opt::I); } + if(defined $opt::U) { rpl('{.}',$opt::U); } + if(defined $opt::i and $opt::i) { rpl('{}',$opt::i); } + if(defined $opt::basenamereplace) { rpl('{/}',$opt::basenamereplace); } + if(defined $opt::dirnamereplace) { rpl('{//}',$opt::dirnamereplace); } + if(defined $opt::seqreplace) { rpl('{#}',$opt::seqreplace); } + if(defined $opt::slotreplace) { rpl('{%}',$opt::slotreplace); } + if(defined $opt::basenameextensionreplace) { + rpl('{/.}',$opt::basenameextensionreplace); + } + for(@opt::rpl) { + # Create $Global::rpl entries for --rpl options + # E.g: "{..} s:\.[^.]+$:;s:\.[^.]+$:;" + my ($shorthand,$long) = split/ /,$_,2; + $Global::rpl{$shorthand} = $long; + } + if(defined $opt::eof) { $Global::end_of_file_string = $opt::eof; } + if(defined $opt::max_args) { $Global::max_number_of_args = $opt::max_args; } + if(defined $opt::timeout) { $Global::timeoutq = TimeoutQueue->new($opt::timeout); } + if(defined $opt::tmpdir) { $ENV{'TMPDIR'} = $opt::tmpdir; } + if(defined $opt::help) { die_usage(); } + if(defined $opt::colsep) { $Global::trim = 'lr'; } + if(defined $opt::header) { $opt::colsep = defined $opt::colsep ? $opt::colsep : "\t"; } + if(defined $opt::trim) { $Global::trim = $opt::trim; } + if(defined $opt::arg_sep) { $Global::arg_sep = $opt::arg_sep; } + if(defined $opt::arg_file_sep) { $Global::arg_file_sep = $opt::arg_file_sep; } + if(defined $opt::number_of_cpus) { print SSHLogin::no_of_cpus(),"\n"; wait_and_exit(0); } + if(defined $opt::number_of_cores) { + print SSHLogin::no_of_cores(),"\n"; wait_and_exit(0); + } + if(defined $opt::max_line_length_allowed) { + print Limits::Command::real_max_length(),"\n"; wait_and_exit(0); + } + if(defined $opt::version) { version(); wait_and_exit(0); } + if(defined $opt::bibtex) { bibtex(); wait_and_exit(0); } + if(defined $opt::record_env) { record_env(); wait_and_exit(0); } + if(defined $opt::show_limits) { show_limits(); } + if(@opt::sshlogin) { @Global::sshlogin = @opt::sshlogin; } + if(@opt::sshloginfile) { read_sshloginfiles(@opt::sshloginfile); } + if(@opt::return) { push @Global::ret_files, @opt::return; } + if(not defined $opt::recstart and + not defined $opt::recend) { $opt::recend = "\n"; } + if(not defined $opt::blocksize) { $opt::blocksize = "1M"; } + $opt::blocksize = multiply_binary_prefix($opt::blocksize); + if(defined $opt::controlmaster) { $opt::noctrlc = 1; } + if(defined $opt::semaphore) { $Global::semaphore = 1; } + if(defined $opt::semaphoretimeout) { $Global::semaphore = 1; } + if(defined $opt::semaphorename) { $Global::semaphore = 1; } + if(defined $opt::fg) { $Global::semaphore = 1; } + if(defined $opt::bg) { $Global::semaphore = 1; } + if(defined $opt::wait) { $Global::semaphore = 1; } + if(defined $opt::halt_on_error and + $opt::halt_on_error=~/%/) { $opt::halt_on_error /= 100; } + if(defined $opt::timeout and $opt::timeout !~ /^\d+(\.\d+)?%?$/) { + ::error("--timeout must be seconds or percentage\n"); + wait_and_exit(255); + } + if(defined $opt::minversion) { + print $Global::version,"\n"; + if($Global::version < $opt::minversion) { + wait_and_exit(255); + } else { + wait_and_exit(0); + } + } + if(not defined $opt::delay) { + # Set --delay to --sshdelay if not set + $opt::delay = $opt::sshdelay; + } + if($opt::compress_program) { + $opt::compress = 1; + $opt::decompress_program ||= $opt::compress_program." -dc"; + } + if($opt::compress) { + my ($compress, $decompress) = find_compression_program(); + $opt::compress_program ||= $compress; + $opt::decompress_program ||= $decompress; + } + if(defined $opt::nonall) { + # Append a dummy empty argument + push @ARGV, $Global::arg_sep, ""; + } + if(defined $opt::tty) { + # Defaults for --tty: -j1 -u + # Can be overridden with -jXXX -g + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if(not defined $opt::group) { + $opt::ungroup = 0; + } + } + if(@opt::trc) { + push @Global::ret_files, @opt::trc; + $opt::transfer = 1; + $opt::cleanup = 1; + } + if(defined $opt::max_lines) { + if($opt::max_lines eq "-0") { + # -l -0 (swallowed -0) + $opt::max_lines = 1; + $opt::0 = 1; + $/ = "\0"; + } elsif ($opt::max_lines == 0) { + # If not given (or if 0 is given) => 1 + $opt::max_lines = 1; + } + $Global::max_lines = $opt::max_lines; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + + # Read more than one arg at a time (-L, -N) + if(defined $opt::L) { + $Global::max_lines = $opt::L; + if(not $opt::pipe) { + # --pipe -L means length of record - not max_number_of_args + $Global::max_number_of_args ||= $Global::max_lines; + } + } + if(defined $opt::max_replace_args) { + $Global::max_number_of_args = $opt::max_replace_args; + $Global::ContextReplace = 1; + } + if((defined $opt::L or defined $opt::max_replace_args) + and + not ($opt::xargs or $opt::m)) { + $Global::ContextReplace = 1; + } + if(defined $opt::tag and not defined $opt::tagstring) { + $opt::tagstring = "\257<\257>"; # Default = {} + } + if(defined $opt::pipepart and + (defined $opt::L or defined $opt::max_lines + or defined $opt::max_replace_args)) { + ::error("--pipepart is incompatible with --max-replace-args, ", + "--max-lines, and -L.\n"); + wait_and_exit(255); + } + if(grep /^$Global::arg_sep$|^$Global::arg_file_sep$/o, @ARGV) { + # Deal with ::: and :::: + @ARGV=read_args_from_command_line(); + } + + # Semaphore defaults + # Must be done before computing number of processes and max_line_length + # because when running as a semaphore GNU Parallel does not read args + $Global::semaphore ||= ($0 =~ m:(^|/)sem$:); # called as 'sem' + if($Global::semaphore) { + # A semaphore does not take input from neither stdin nor file + @opt::a = ("/dev/null"); + push(@Global::unget_argv, [Arg->new("")]); + $Semaphore::timeout = $opt::semaphoretimeout || 0; + if(defined $opt::semaphorename) { + $Semaphore::name = $opt::semaphorename; + } else { + $Semaphore::name = `tty`; + chomp $Semaphore::name; + } + $Semaphore::fg = $opt::fg; + $Semaphore::wait = $opt::wait; + $Global::default_simultaneous_sshlogins = 1; + if(not defined $opt::jobs) { + $opt::jobs = 1; + } + if($Global::interactive and $opt::bg) { + ::error("Jobs running in the ". + "background cannot be interactive.\n"); + ::wait_and_exit(255); + } + } + if(defined $opt::eta) { + $opt::progress = $opt::eta; + } + if(defined $opt::bar) { + $opt::progress = $opt::bar; + } + if(defined $opt::retired) { + ::error("-g has been retired. Use --group.\n"); + ::error("-B has been retired. Use --bf.\n"); + ::error("-T has been retired. Use --tty.\n"); + ::error("-U has been retired. Use --er.\n"); + ::error("-W has been retired. Use --wd.\n"); + ::error("-Y has been retired. Use --shebang.\n"); + ::error("-H has been retired. Use --halt.\n"); + ::error("--tollef has been retired. Use -u -q --arg-sep -- and --load for -l.\n"); + ::wait_and_exit(255); + } + citation_notice(); + + parse_sshlogin(); + parse_env_var(); + + if(remote_hosts() and ($opt::X or $opt::m or $opt::xargs)) { + # As we do not know the max line length on the remote machine + # long commands generated by xargs may fail + # If opt_N is set, it is probably safe + ::warning("Using -X or -m with --sshlogin may fail.\n"); + } + + if(not defined $opt::jobs) { + $opt::jobs = "100%"; + } + open_joblog(); +} + +sub env_quote { + # Input: + # $v = value to quote + # Returns: + # $v = value quoted as environment variable + my $v = $_[0]; + $v =~ s/([\\])/\\$1/g; + $v =~ s/([\[\] \#\'\&\<\>\(\)\;\{\}\t\"\$\`\*\174\!\?\~])/\\$1/g; + $v =~ s/\n/"\n"/g; + return $v; +} + +sub record_env { + # Record current %ENV-keys in ~/.parallel/ignored_vars + # Returns: N/A + my $ignore_filename = $ENV{'HOME'} . "/.parallel/ignored_vars"; + if(open(my $vars_fh, ">", $ignore_filename)) { + print $vars_fh map { $_,"\n" } keys %ENV; + } else { + ::error("Cannot write to $ignore_filename\n"); + ::wait_and_exit(255); + } +} + +sub parse_env_var { + # Parse --env and set $Global::envvar, $Global::envwarn and $Global::envvarlen + # + # Bash functions must be parsed to export them remotely + # Pre-shellshock style bash function: + # myfunc=() {... + # Post-shellshock style bash function: + # BASH_FUNC_myfunc()=() {... + # + # Uses: + # $Global::envvar = eval string that will set variables in both bash and csh + # $Global::envwarn = If functions are used: Give warning in csh + # $Global::envvarlen = length of $Global::envvar + # @opt::env + # $Global::shell + # %ENV + # Returns: N/A + $Global::envvar = ""; + $Global::envwarn = ""; + my @vars = ('parallel_bash_environment'); + for my $varstring (@opt::env) { + # Split up --env VAR1,VAR2 + push @vars, split /,/, $varstring; + } + if(grep { /^_$/ } @vars) { + # --env _ + # Include all vars that are not in a clean environment + if(open(my $vars_fh, "<", $ENV{'HOME'} . "/.parallel/ignored_vars")) { + my @ignore = <$vars_fh>; + chomp @ignore; + my %ignore; + @ignore{@ignore} = @ignore; + close $vars_fh; + push @vars, grep { not defined $ignore{$_} } keys %ENV; + @vars = grep { not /^_$/ } @vars; + } else { + ::error("Run '$Global::progname --record-env' in a clean environment first.\n"); + ::wait_and_exit(255); + } + } + # Duplicate vars as BASH functions to include post-shellshock functions. + # So --env myfunc should also look for BASH_FUNC_myfunc() + @vars = map { $_, "BASH_FUNC_$_()" } @vars; + # Keep only defined variables + @vars = grep { defined($ENV{$_}) } @vars; + # Pre-shellshock style bash function: + # myfunc=() { echo myfunc + # } + # Post-shellshock style bash function: + # BASH_FUNC_myfunc()=() { echo myfunc + # } + my @bash_functions = grep { substr($ENV{$_},0,4) eq "() {" } @vars; + my @non_functions = grep { substr($ENV{$_},0,4) ne "() {" } @vars; + if(@bash_functions) { + # Functions are not supported for all shells + if($Global::shell !~ m:/(bash|rbash|zsh|rzsh|dash|ksh):) { + ::warning("Shell functions may not be supported in $Global::shell\n"); + } + } + + # Pre-shellschock names are without () + my @bash_pre_shellshock = grep { not /\(\)/ } @bash_functions; + # Post-shellschock names are with () + my @bash_post_shellshock = grep { /\(\)/ } @bash_functions; + + my @qcsh = (map { my $a=$_; "setenv $a " . env_quote($ENV{$a}) } + grep { not /^parallel_bash_environment$/ } @non_functions); + my @qbash = (map { my $a=$_; "export $a=" . env_quote($ENV{$a}) } + @non_functions, @bash_pre_shellshock); + + push @qbash, map { my $a=$_; "eval $a\"\$$a\"" } @bash_pre_shellshock; + push @qbash, map { /BASH_FUNC_(.*)\(\)/; "$1 $ENV{$_}" } @bash_post_shellshock; + + #ssh -tt -oLogLevel=quiet lo 'eval `echo PARALLEL_SEQ='$PARALLEL_SEQ'\;export PARALLEL_SEQ\; PARALLEL_PID='$PARALLEL_PID'\;export PARALLEL_PID` ;' tty\ \>/dev/null\ \&\&\ stty\ isig\ -onlcr\ -echo\;echo\ \$SHELL\ \|\ grep\ \"/t\\\{0,1\\\}csh\"\ \>\ /dev/null\ \&\&\ setenv\ BASH_FUNC_myfunc\ \\\(\\\)\\\ \\\{\\\ \\\ echo\\\ a\"' + #'\"\\\}\ \|\|\ myfunc\(\)\ \{\ \ echo\ a' + #'\}\ \;myfunc\ 1; + + # Check if any variables contain \n + if(my @v = map { s/BASH_FUNC_(.*)\(\)/$1/; $_ } grep { $ENV{$_}=~/\n/ } @vars) { + # \n is bad for csh and will cause it to fail. + $Global::envwarn = ::shell_quote_scalar(q{echo $SHELL | egrep "/t?csh" > /dev/null && echo CSH/TCSH DO NOT SUPPORT newlines IN VARIABLES/FUNCTIONS. Unset }."@v".q{ && exec false;}."\n\n") . $Global::envwarn; + } + + if(not @qcsh) { push @qcsh, "true"; } + if(not @qbash) { push @qbash, "true"; } + # Create lines like: + # echo $SHELL | grep "/t\\{0,1\\}csh" >/dev/null && setenv V1 val1 && setenv V2 val2 || export V1=val1 && export V2=val2 ; echo "$V1$V2" + if(@vars) { + $Global::envvar .= + join"", + (q{echo $SHELL | grep "/t\\{0,1\\}csh" > /dev/null && } + . join(" && ", @qcsh) + . q{ || } + . join(" && ", @qbash) + .q{;}); + if($ENV{'parallel_bash_environment'}) { + $Global::envvar .= 'eval "$parallel_bash_environment";'."\n"; + } + } + $Global::envvarlen = length $Global::envvar; +} + +sub open_joblog { + # Open joblog as specified by --joblog + # Uses: + # $opt::resume + # $opt::resume_failed + # $opt::joblog + # $opt::results + # $Global::job_already_run + # %Global::fd + my $append = 0; + if(($opt::resume or $opt::resume_failed) + and + not ($opt::joblog or $opt::results)) { + ::error("--resume and --resume-failed require --joblog or --results.\n"); + ::wait_and_exit(255); + } + if($opt::joblog) { + if($opt::resume || $opt::resume_failed) { + if(open(my $joblog_fh, "<", $opt::joblog)) { + # Read the joblog + $append = <$joblog_fh>; # If there is a header: Open as append later + my $joblog_regexp; + if($opt::resume_failed) { + # Make a regexp that only matches commands with exit+signal=0 + # 4 host 1360490623.067 3.445 1023 1222 0 0 command + $joblog_regexp='^(\d+)(?:\t[^\t]+){5}\t0\t0\t'; + } else { + # Just match the job number + $joblog_regexp='^(\d+)'; + } + while(<$joblog_fh>) { + if(/$joblog_regexp/o) { + # This is 30% faster than set_job_already_run($1); + vec($Global::job_already_run,($1||0),1) = 1; + } elsif(not /\d+\s+[^\s]+\s+([0-9.]+\s+){6}/) { + ::error("Format of '$opt::joblog' is wrong: $_"); + ::wait_and_exit(255); + } + } + close $joblog_fh; + } + } + if($append) { + # Append to joblog + if(not open($Global::joblog, ">>", $opt::joblog)) { + ::error("Cannot append to --joblog $opt::joblog.\n"); + ::wait_and_exit(255); + } + } else { + if($opt::joblog eq "-") { + # Use STDOUT as joblog + $Global::joblog = $Global::fd{1}; + } elsif(not open($Global::joblog, ">", $opt::joblog)) { + # Overwrite the joblog + ::error("Cannot write to --joblog $opt::joblog.\n"); + ::wait_and_exit(255); + } + print $Global::joblog + join("\t", "Seq", "Host", "Starttime", "JobRuntime", + "Send", "Receive", "Exitval", "Signal", "Command" + ). "\n"; + } + } +} + +sub find_compression_program { + # Find a fast compression program + # Returns: + # $compress_program = compress program with options + # $decompress_program = decompress program with options + + # Search for these. Sorted by speed + my @prg = qw(lzop pigz pxz gzip plzip pbzip2 lzma xz lzip bzip2); + for my $p (@prg) { + if(which($p)) { + return ("$p -c -1","$p -dc"); + } + } + # Fall back to cat + return ("cat","cat"); +} + + +sub read_options { + # Read options from command line, profile and $PARALLEL + # Uses: + # $opt::shebang_wrap + # $opt::shebang + # @ARGV + # $opt::plain + # @opt::profile + # $ENV{'HOME'} + # $ENV{'PARALLEL'} + # Returns: + # @ARGV_no_opt = @ARGV without --options + + # This must be done first as this may exec myself + if(defined $ARGV[0] and ($ARGV[0] =~ /^--shebang/ or + $ARGV[0] =~ /^--shebang-?wrap/ or + $ARGV[0] =~ /^--hashbang/)) { + # Program is called from #! line in script + # remove --shebang-wrap if it is set + $opt::shebang_wrap = ($ARGV[0] =~ s/^--shebang-?wrap *//); + # remove --shebang if it is set + $opt::shebang = ($ARGV[0] =~ s/^--shebang *//); + # remove --hashbang if it is set + $opt::shebang .= ($ARGV[0] =~ s/^--hashbang *//); + if($opt::shebang) { + my $argfile = shell_quote_scalar(pop @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --skip-first-line -a $argfile @ARGV"; + } + if($opt::shebang_wrap) { + my @options; + my @parser; + if ($^O eq 'freebsd') { + # FreeBSD's #! puts different values in @ARGV than Linux' does. + my @nooptions = @ARGV; + get_options_from_array(\@nooptions); + while($#ARGV > $#nooptions) { + push @options, shift @ARGV; + } + while(@ARGV and $ARGV[0] ne ":::") { + push @parser, shift @ARGV; + } + if(@ARGV and $ARGV[0] eq ":::") { + shift @ARGV; + } + } else { + @options = shift @ARGV; + } + my $script = shell_quote_scalar(shift @ARGV); + # exec myself to split $ARGV[0] into separate fields + exec "$0 --internal-pipe-means-argfiles @options @parser $script ::: @ARGV"; + } + } + + Getopt::Long::Configure("bundling","require_order"); + my @ARGV_copy = @ARGV; + # Check if there is a --profile to set @opt::profile + get_options_from_array(\@ARGV_copy,"profile|J=s","plain") || die_usage(); + my @ARGV_profile = (); + my @ARGV_env = (); + if(not $opt::plain) { + # Add options from .parallel/config and other profiles + my @config_profiles = ( + "/etc/parallel/config", + $ENV{'HOME'}."/.parallel/config", + $ENV{'HOME'}."/.parallelrc"); + my @profiles = @config_profiles; + if(@opt::profile) { + # --profile overrides default profiles + @profiles = (); + for my $profile (@opt::profile) { + if(-r $profile) { + push @profiles, $profile; + } else { + push @profiles, $ENV{'HOME'}."/.parallel/".$profile; + } + } + } + for my $profile (@profiles) { + if(-r $profile) { + open (my $in_fh, "<", $profile) || ::die_bug("read-profile: $profile"); + while(<$in_fh>) { + /^\s*\#/ and next; + chomp; + push @ARGV_profile, shellwords($_); + } + close $in_fh; + } else { + if(grep /^$profile$/, @config_profiles) { + # config file is not required to exist + } else { + ::error("$profile not readable.\n"); + wait_and_exit(255); + } + } + } + # Add options from shell variable $PARALLEL + if($ENV{'PARALLEL'}) { + @ARGV_env = shellwords($ENV{'PARALLEL'}); + } + } + Getopt::Long::Configure("bundling","require_order"); + get_options_from_array(\@ARGV_profile) || die_usage(); + get_options_from_array(\@ARGV_env) || die_usage(); + get_options_from_array(\@ARGV) || die_usage(); + + # Prepend non-options to @ARGV (such as commands like 'nice') + unshift @ARGV, @ARGV_profile, @ARGV_env; + return @ARGV; +} + +sub read_args_from_command_line { + # Arguments given on the command line after: + # ::: ($Global::arg_sep) + # :::: ($Global::arg_file_sep) + # Removes the arguments from @ARGV and: + # - puts filenames into -a + # - puts arguments into files and add the files to -a + # Input: + # @::ARGV = command option ::: arg arg arg :::: argfiles + # Uses: + # $Global::arg_sep + # $Global::arg_file_sep + # $opt::internal_pipe_means_argfiles + # $opt::pipe + # @opt::a + # Returns: + # @argv_no_argsep = @::ARGV without ::: and :::: and following args + my @new_argv = (); + for(my $arg = shift @ARGV; @ARGV; $arg = shift @ARGV) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_file_sep) { + my $group = $arg; # This group of arguments is args or argfiles + my @group; + while(defined ($arg = shift @ARGV)) { + if($arg eq $Global::arg_sep + or + $arg eq $Global::arg_file_sep) { + # exit while loop if finding new separator + last; + } else { + # If not hitting ::: or :::: + # Append it to the group + push @group, $arg; + } + } + + if($group eq $Global::arg_file_sep + or ($opt::internal_pipe_means_argfiles and $opt::pipe) + ) { + # Group of file names on the command line. + # Append args into -a + push @opt::a, @group; + } elsif($group eq $Global::arg_sep) { + # Group of arguments on the command line. + # Put them into a file. + # Create argfile + my ($outfh,$name) = ::tmpfile(SUFFIX => ".arg"); + unlink($name); + # Put args into argfile + print $outfh map { $_,$/ } @group; + seek $outfh, 0, 0; + # Append filehandle to -a + push @opt::a, $outfh; + } else { + ::die_bug("Unknown command line group: $group"); + } + if(defined($arg)) { + # $arg is ::: or :::: + redo; + } else { + # $arg is undef -> @ARGV empty + last; + } + } + push @new_argv, $arg; + } + # Output: @ARGV = command to run with options + return @new_argv; +} + +sub cleanup { + # Returns: N/A + if(@opt::basefile) { cleanup_basefile(); } +} + +sub __QUOTING_ARGUMENTS_FOR_SHELL__ {} + +sub shell_quote { + # Input: + # @strings = strings to be quoted + # Output: + # @shell_quoted_strings = string quoted with \ as needed by the shell + my @strings = (@_); + for my $a (@strings) { + $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + $a =~ s/[\n]/'\n'/g; # filenames with '\n' is quoted using \' + } + return wantarray ? @strings : "@strings"; +} + +sub shell_quote_empty { + # Inputs: + # @strings = strings to be quoted + # Returns: + # @quoted_strings = empty strings quoted as ''. + my @strings = shell_quote(@_); + for my $a (@strings) { + if($a eq "") { + $a = "''"; + } + } + return wantarray ? @strings : "@strings"; +} + +sub shell_quote_scalar { + # Quote the string so shell will not expand any special chars + # Inputs: + # $string = string to be quoted + # Returns: + # $shell_quoted = string quoted with \ as needed by the shell + my $a = $_[0]; + if(defined $a) { + # $a =~ s/([\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377])/\\$1/g; + # This is 1% faster than the above + $a =~ s/[\002-\011\013-\032\\\#\?\`\(\)\{\}\[\]\*\>\<\~\|\; \"\!\$\&\'\202-\377]/\\$&/go; + $a =~ s/[\n]/'\n'/go; # filenames with '\n' is quoted using \' + } + return $a; +} + +sub shell_quote_file { + # Quote the string so shell will not expand any special chars and prepend ./ if needed + # Input: + # $filename = filename to be shell quoted + # Returns: + # $quoted_filename = filename quoted with \ as needed by the shell and ./ if needed + my $a = shell_quote_scalar(shift); + if(defined $a) { + if($a =~ m:^/: or $a =~ m:^\./:) { + # /abs/path or ./rel/path => skip + } else { + # rel/path => ./rel/path + $a = "./".$a; + } + } + return $a; +} + +sub shellwords { + # Input: + # $string = shell line + # Returns: + # @shell_words = $string split into words as shell would do + $Global::use{"Text::ParseWords"} ||= eval "use Text::ParseWords; 1;"; + return Text::ParseWords::shellwords(@_); +} + + +sub __FILEHANDLES__ {} + + +sub save_stdin_stdout_stderr { + # Remember the original STDIN, STDOUT and STDERR + # and file descriptors opened by the shell (e.g. 3>/tmp/foo) + # Uses: + # %Global::fd + # $Global::original_stderr + # $Global::original_stdin + # Returns: N/A + + # Find file descriptors that are already opened (by the shell) + for my $fdno (1..61) { + # /dev/fd/62 and above are used by bash for <(cmd) + my $fh; + # 2-argument-open is used to be compatible with old perl 5.8.0 + # bug #43570: Perl 5.8.0 creates 61 files + if(open($fh,">&=$fdno")) { + $Global::fd{$fdno}=$fh; + } + } + open $Global::original_stderr, ">&", "STDERR" or + ::die_bug("Can't dup STDERR: $!"); + open $Global::original_stdin, "<&", "STDIN" or + ::die_bug("Can't dup STDIN: $!"); +} + +sub enough_file_handles { + # Check that we have enough filehandles available for starting + # another job + # Uses: + # $opt::ungroup + # %Global::fd + # Returns: + # 1 if ungrouped (thus not needing extra filehandles) + # 0 if too few filehandles + # 1 if enough filehandles + if(not $opt::ungroup) { + my %fh; + my $enough_filehandles = 1; + # perl uses 7 filehandles for something? + # open3 uses 2 extra filehandles temporarily + # We need a filehandle for each redirected file descriptor + # (normally just STDOUT and STDERR) + for my $i (1..(7+2+keys %Global::fd)) { + $enough_filehandles &&= open($fh{$i}, "<", "/dev/null"); + } + for (values %fh) { close $_; } + return $enough_filehandles; + } else { + # Ungrouped does not need extra file handles + return 1; + } +} + +sub open_or_exit { + # Open a file name or exit if the file cannot be opened + # Inputs: + # $file = filehandle or filename to open + # Uses: + # $Global::stdin_in_opt_a + # $Global::original_stdin + # Returns: + # $fh = file handle to read-opened file + my $file = shift; + if($file eq "-") { + $Global::stdin_in_opt_a = 1; + return ($Global::original_stdin || *STDIN); + } + if(ref $file eq "GLOB") { + # This is an open filehandle + return $file; + } + my $fh = gensym; + if(not open($fh, "<", $file)) { + ::error("Cannot open input file `$file': No such file or directory.\n"); + wait_and_exit(255); + } + return $fh; +} + +sub __RUNNING_THE_JOBS_AND_PRINTING_PROGRESS__ {} + +# Variable structure: +# +# $Global::running{$pid} = Pointer to Job-object +# @Global::virgin_jobs = Pointer to Job-object that have received no input +# $Global::host{$sshlogin} = Pointer to SSHLogin-object +# $Global::total_running = total number of running jobs +# $Global::total_started = total jobs started + +sub init_run_jobs { + $Global::total_running = 0; + $Global::total_started = 0; + $Global::tty_taken = 0; + $SIG{USR1} = \&list_running_jobs; + $SIG{USR2} = \&toggle_progress; + if(@opt::basefile) { setup_basefile(); } +} + +{ + my $last_time; + my %last_mtime; + +sub start_more_jobs { + # Run start_another_job() but only if: + # * not $Global::start_no_new_jobs set + # * not JobQueue is empty + # * not load on server is too high + # * not server swapping + # * not too short time since last remote login + # Uses: + # $Global::max_procs_file + # $Global::max_procs_file_last_mod + # %Global::host + # @opt::sshloginfile + # $Global::start_no_new_jobs + # $opt::filter_hosts + # $Global::JobQueue + # $opt::pipe + # $opt::load + # $opt::noswap + # $opt::delay + # $Global::newest_starttime + # Returns: + # $jobs_started = number of jobs started + my $jobs_started = 0; + my $jobs_started_this_round = 0; + if($Global::start_no_new_jobs) { + return $jobs_started; + } + if(time - ($last_time||0) > 1) { + # At most do this every second + $last_time = time; + if($Global::max_procs_file) { + # --jobs filename + my $mtime = (stat($Global::max_procs_file))[9]; + if($mtime > $Global::max_procs_file_last_mod) { + # file changed: Force re-computing max_jobs_running + $Global::max_procs_file_last_mod = $mtime; + for my $sshlogin (values %Global::host) { + $sshlogin->set_max_jobs_running(undef); + } + } + } + if(@opt::sshloginfile) { + # Is --sshloginfile changed? + for my $slf (@opt::sshloginfile) { + my $actual_file = expand_slf_shorthand($slf); + my $mtime = (stat($actual_file))[9]; + $last_mtime{$actual_file} ||= $mtime; + if($mtime - $last_mtime{$actual_file} > 1) { + ::debug("run","--sshloginfile $actual_file changed. reload\n"); + $last_mtime{$actual_file} = $mtime; + # Reload $slf + # Empty sshlogins + @Global::sshlogin = (); + for (values %Global::host) { + # Don't start new jobs on any host + # except the ones added back later + $_->set_max_jobs_running(0); + } + # This will set max_jobs_running on the SSHlogins + read_sshloginfile($actual_file); + parse_sshlogin(); + $opt::filter_hosts and filter_hosts(); + setup_basefile(); + } + } + } + } + do { + $jobs_started_this_round = 0; + # This will start 1 job on each --sshlogin (if possible) + # thus distribute the jobs on the --sshlogins round robin + + for my $sshlogin (values %Global::host) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more jobs in the queue + last; + } + debug("run", "Running jobs before on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), "\n"); + if ($sshlogin->jobs_running() < $sshlogin->max_jobs_running()) { + if($opt::load and $sshlogin->loadavg_too_high()) { + # The load is too high or unknown + next; + } + if($opt::noswap and $sshlogin->swapping()) { + # The server is swapping + next; + } + if($sshlogin->too_fast_remote_login()) { + # It has been too short since + next; + } + if($opt::delay and $opt::delay > ::now() - $Global::newest_starttime) { + # It has been too short since last start + next; + } + debug("run", $sshlogin->string(), " has ", $sshlogin->jobs_running(), + " out of ", $sshlogin->max_jobs_running(), + " jobs running. Start another.\n"); + if(start_another_job($sshlogin) == 0) { + # No more jobs to start on this $sshlogin + debug("run","No jobs started on ", $sshlogin->string(), "\n"); + next; + } + $sshlogin->inc_jobs_running(); + $sshlogin->set_last_login_at(::now()); + $jobs_started++; + $jobs_started_this_round++; + } + debug("run","Running jobs after on ", $sshlogin->string(), ": ", + $sshlogin->jobs_running(), " of ", + $sshlogin->max_jobs_running(), "\n"); + } + } while($jobs_started_this_round); + + return $jobs_started; +} +} + +{ + my $no_more_file_handles_warned; + +sub start_another_job { + # If there are enough filehandles + # and JobQueue not empty + # and not $job is in joblog + # Then grab a job from Global::JobQueue, + # start it at sshlogin + # mark it as virgin_job + # Inputs: + # $sshlogin = the SSHLogin to start the job on + # Uses: + # $Global::JobQueue + # $opt::pipe + # $opt::results + # $opt::resume + # @Global::virgin_jobs + # Returns: + # 1 if another jobs was started + # 0 otherwise + my $sshlogin = shift; + # Do we have enough file handles to start another job? + if(enough_file_handles()) { + if($Global::JobQueue->empty() and not $opt::pipe) { + # No more commands to run + debug("start", "Not starting: JobQueue empty\n"); + return 0; + } else { + my $job; + # Skip jobs already in job log + # Skip jobs already in results + do { + $job = get_job_with_sshlogin($sshlogin); + if(not defined $job) { + # No command available for that sshlogin + debug("start", "Not starting: no jobs available for ", + $sshlogin->string(), "\n"); + return 0; + } + } while ($job->is_already_in_joblog() + or + ($opt::results and $opt::resume and $job->is_already_in_results())); + debug("start", "Command to run on '", $job->sshlogin()->string(), "': '", + $job->replaced(),"'\n"); + if($job->start()) { + if($opt::pipe) { + push(@Global::virgin_jobs,$job); + } + debug("start", "Started as seq ", $job->seq(), + " pid:", $job->pid(), "\n"); + return 1; + } else { + # Not enough processes to run the job. + # Put it back on the queue. + $Global::JobQueue->unget($job); + # Count down the number of jobs to run for this SSHLogin. + my $max = $sshlogin->max_jobs_running(); + if($max > 1) { $max--; } else { + ::error("No more processes: cannot run a single job. Something is wrong.\n"); + ::wait_and_exit(255); + } + $sshlogin->set_max_jobs_running($max); + # Sleep up to 300 ms to give other processes time to die + ::usleep(rand()*300); + ::warning("No more processes: ", + "Decreasing number of running jobs to $max. ", + "Raising ulimit -u or /etc/security/limits.conf may help.\n"); + return 0; + } + } + } else { + # No more file handles + $no_more_file_handles_warned++ or + ::warning("No more file handles. ", + "Raising ulimit -n or /etc/security/limits.conf may help.\n"); + return 0; + } +} +} + +sub init_progress { + # Uses: + # $opt::bar + # Returns: + # list of computers for progress output + $|=1; + if($opt::bar) { + return("",""); + } + my %progress = progress(); + return ("\nComputers / CPU cores / Max jobs to run\n", + $progress{'workerlist'}); +} + +sub drain_job_queue { + # Uses: + # $opt::progress + # $Global::original_stderr + # $Global::total_running + # $Global::max_jobs_running + # %Global::running + # $Global::JobQueue + # %Global::host + # $Global::start_no_new_jobs + # Returns: N/A + if($opt::progress) { + print $Global::original_stderr init_progress(); + } + my $last_header=""; + my $sleep = 0.2; + do { + while($Global::total_running > 0) { + debug($Global::total_running, "==", scalar + keys %Global::running," slots: ", $Global::max_jobs_running); + if($opt::pipe) { + # When using --pipe sometimes file handles are not closed properly + for my $job (values %Global::running) { + close $job->fh(0,"w"); + } + } + if($opt::progress) { + my %progress = progress(); + if($last_header ne $progress{'header'}) { + print $Global::original_stderr "\n", $progress{'header'}, "\n"; + $last_header = $progress{'header'}; + } + print $Global::original_stderr "\r",$progress{'status'}; + flush $Global::original_stderr; + } + if($Global::total_running < $Global::max_jobs_running + and not $Global::JobQueue->empty()) { + # These jobs may not be started because of loadavg + # or too little time between each ssh login. + if(start_more_jobs() > 0) { + # Exponential back-on if jobs were started + $sleep = $sleep/2+0.001; + } + } + # Sometimes SIGCHLD is not registered, so force reaper + $sleep = ::reap_usleep($sleep); + } + if(not $Global::JobQueue->empty()) { + # These jobs may not be started: + # * because there the --filter-hosts has removed all + if(not %Global::host) { + ::error("There are no hosts left to run on.\n"); + ::wait_and_exit(255); + } + # * because of loadavg + # * because of too little time between each ssh login. + start_more_jobs(); + $sleep = ::reap_usleep($sleep); + if($Global::max_jobs_running == 0) { + ::warning("There are no job slots available. Increase --jobs.\n"); + } + } + } while ($Global::total_running > 0 + or + not $Global::start_no_new_jobs and not $Global::JobQueue->empty()); + if($opt::progress) { + my %progress = progress(); + print $Global::original_stderr "\r", $progress{'status'}, "\n"; + flush $Global::original_stderr; + } +} + +sub toggle_progress { + # Turn on/off progress view + # Uses: + # $opt::progress + # $Global::original_stderr + # Returns: N/A + $opt::progress = not $opt::progress; + if($opt::progress) { + print $Global::original_stderr init_progress(); + } +} + +sub progress { + # Uses: + # $opt::bar + # $opt::eta + # %Global::host + # $Global::total_started + # Returns: + # $workerlist = list of workers + # $header = that will fit on the screen + # $status = message that will fit on the screen + if($opt::bar) { + return ("workerlist" => "", "header" => "", "status" => bar()); + } + my $eta = ""; + my ($status,$header)=("",""); + if($opt::eta) { + my($total, $completed, $left, $pctcomplete, $avgtime, $this_eta) = + compute_eta(); + $eta = sprintf("ETA: %ds Left: %d AVG: %.2fs ", + $this_eta, $left, $avgtime); + } + my $termcols = terminal_columns(); + my @workers = sort keys %Global::host; + my %sshlogin = map { $_ eq ":" ? ($_=>"local") : ($_=>$_) } @workers; + my $workerno = 1; + my %workerno = map { ($_=>$workerno++) } @workers; + my $workerlist = ""; + for my $w (@workers) { + $workerlist .= + $workerno{$w}.":".$sshlogin{$w} ." / ". + ($Global::host{$w}->ncpus() || "-")." / ". + $Global::host{$w}->max_jobs_running()."\n"; + } + $status = "x"x($termcols+1); + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX%/XX.Xs sshlogin2:XX/XX/XX%/XX.Xs sshlogin3:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete"; + $status = $eta . + join(" ",map + { + if($Global::total_started) { + my $completed = ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $sshlogin{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } + } @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX%/XX.Xs 2:XX/XX/XX%/XX.Xs 3:XX/XX/XX%/XX.Xs 4:XX/XX/XX%/XX.Xs + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { + my $completed = ($Global::host{$_}->jobs_completed()||0); + my $running = $Global::host{$_}->jobs_running(); + my $time = $completed ? (time-$^T)/($completed) : "0"; + sprintf("%s:%d/%d/%d%%/%.1fs ", + $workerno{$_}, $running, $completed, + ($running+$completed)*100 + / $Global::total_started, $time); + } @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d/%d%%", + $sshlogin{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX/XX% 2:XX/XX/XX% 3:XX/XX/XX% 4:XX/XX/XX% 5:XX/XX/XX% 6:XX/XX/XX% + $header = "Computer:jobs running/jobs completed/%of started jobs"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d/%d%%", + $workerno{$_}, + $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0), + ($Global::host{$_}->jobs_running()+ + ($Global::host{$_}->jobs_completed()||0))*100 + / $Global::total_started) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX/XX% sshlogin2:XX/XX/XX% sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX/XX sshlogin2:XX/XX sshlogin3:XX/XX sshlogin4:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $sshlogin{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX/XX 2:XX/XX 3:XX/XX 4:XX/XX 5:XX/XX 6:XX/XX + $header = "Computer:jobs running/jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d/%d", + $workerno{$_}, $Global::host{$_}->jobs_running(), + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # sshlogin1:XX sshlogin2:XX sshlogin3:XX sshlogin4:XX sshlogin5:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d", + $sshlogin{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + if(length $status > $termcols) { + # 1:XX 2:XX 3:XX 4:XX 5:XX 6:XX + $header = "Computer:jobs completed"; + $status = $eta . + join(" ",map + { sprintf("%s:%d", + $workerno{$_}, + ($Global::host{$_}->jobs_completed()||0)) } + @workers); + } + return ("workerlist" => $workerlist, "header" => $header, "status" => $status); +} + +{ + my ($total, $first_completed, $smoothed_avg_time); + + sub compute_eta { + # Calculate important numbers for ETA + # Returns: + # $total = number of jobs in total + # $completed = number of jobs completed + # $left = number of jobs left + # $pctcomplete = percent of jobs completed + # $avgtime = averaged time + # $eta = smoothed eta + $total ||= $Global::JobQueue->total_jobs(); + my $completed = 0; + for(values %Global::host) { $completed += $_->jobs_completed() } + my $left = $total - $completed; + if(not $completed) { + return($total, $completed, $left, 0, 0, 0); + } + my $pctcomplete = $completed / $total; + $first_completed ||= time; + my $timepassed = (time - $first_completed); + my $avgtime = $timepassed / $completed; + $smoothed_avg_time ||= $avgtime; + # Smooth the eta so it does not jump wildly + $smoothed_avg_time = (1 - $pctcomplete) * $smoothed_avg_time + + $pctcomplete * $avgtime; + my $eta = int($left * $smoothed_avg_time); + return($total, $completed, $left, $pctcomplete, $avgtime, $eta); + } +} + +{ + my ($rev,$reset); + + sub bar { + # Return: + # $status = bar with eta, completed jobs, arg and pct + $rev ||= "\033[7m"; + $reset ||= "\033[0m"; + my($total, $completed, $left, $pctcomplete, $avgtime, $eta) = + compute_eta(); + my $arg = $Global::newest_job ? + $Global::newest_job->{'commandline'}->replace_placeholders(["\257<\257>"],0,0) : ""; + # These chars mess up display in the terminal + $arg =~ tr/[\011-\016\033\302-\365]//d; + my $bar_text = + sprintf("%d%% %d:%d=%ds %s", + $pctcomplete*100, $completed, $left, $eta, $arg); + my $terminal_width = terminal_columns(); + my $s = sprintf("%-${terminal_width}s", + substr($bar_text." "x$terminal_width, + 0,$terminal_width)); + my $width = int($terminal_width * $pctcomplete); + substr($s,$width,0) = $reset; + my $zenity = sprintf("%-${terminal_width}s", + substr("# $eta sec $arg", + 0,$terminal_width)); + $s = "\r" . $zenity . "\r" . $pctcomplete*100 . # Prefix with zenity header + "\r" . $rev . $s . $reset; + return $s; + } +} + +{ + my ($columns,$last_column_time); + + sub terminal_columns { + # Get the number of columns of the display + # Returns: + # number of columns of the screen + if(not $columns or $last_column_time < time) { + $last_column_time = time; + $columns = $ENV{'COLUMNS'}; + if(not $columns) { + my $resize = qx{ resize 2>/dev/null }; + $resize =~ /COLUMNS=(\d+);/ and do { $columns = $1; }; + } + $columns ||= 80; + } + return $columns; + } +} + +sub get_job_with_sshlogin { + # Returns: + # next job object for $sshlogin if any available + my $sshlogin = shift; + my $job = undef; + + if ($opt::hostgroups) { + my @other_hostgroup_jobs = (); + + while($job = $Global::JobQueue->get()) { + if($sshlogin->in_hostgroups($job->hostgroups())) { + # Found a job for this hostgroup + last; + } else { + # This job was not in the hostgroups of $sshlogin + push @other_hostgroup_jobs, $job; + } + } + $Global::JobQueue->unget(@other_hostgroup_jobs); + if(not defined $job) { + # No more jobs + return undef; + } + } else { + $job = $Global::JobQueue->get(); + if(not defined $job) { + # No more jobs + ::debug("start", "No more jobs: JobQueue empty\n"); + return undef; + } + } + + my $clean_command = $job->replaced(); + if($clean_command =~ /^\s*$/) { + # Do not run empty lines + if(not $Global::JobQueue->empty()) { + return get_job_with_sshlogin($sshlogin); + } else { + return undef; + } + } + $job->set_sshlogin($sshlogin); + if($opt::retries and $clean_command and + $job->failed_here()) { + # This command with these args failed for this sshlogin + my ($no_of_failed_sshlogins,$min_failures) = $job->min_failed(); + # Only look at the Global::host that have > 0 jobslots + if($no_of_failed_sshlogins == grep { $_->max_jobs_running() > 0 } values %Global::host + and $job->failed_here() == $min_failures) { + # It failed the same or more times on another host: + # run it on this host + } else { + # If it failed fewer times on another host: + # Find another job to run + my $nextjob; + if(not $Global::JobQueue->empty()) { + # This can potentially recurse for all args + no warnings 'recursion'; + $nextjob = get_job_with_sshlogin($sshlogin); + } + # Push the command back on the queue + $Global::JobQueue->unget($job); + return $nextjob; + } + } + return $job; +} + +sub __REMOTE_SSH__ {} + +sub read_sshloginfiles { + # Returns: N/A + for my $s (@_) { + read_sshloginfile(expand_slf_shorthand($s)); + } +} + +sub expand_slf_shorthand { + my $file = shift; + if($file eq "-") { + # skip: It is stdin + } elsif($file eq "..") { + $file = $ENV{'HOME'}."/.parallel/sshloginfile"; + } elsif($file eq ".") { + $file = "/etc/parallel/sshloginfile"; + } elsif(not -r $file) { + if(not -r $ENV{'HOME'}."/.parallel/".$file) { + # Try prepending ~/.parallel + ::error("Cannot open $file.\n"); + ::wait_and_exit(255); + } else { + $file = $ENV{'HOME'}."/.parallel/".$file; + } + } + return $file; +} + +sub read_sshloginfile { + # Returns: N/A + my $file = shift; + my $close = 1; + my $in_fh; + ::debug("init","--slf ",$file); + if($file eq "-") { + $in_fh = *STDIN; + $close = 0; + } else { + if(not open($in_fh, "<", $file)) { + # Try the filename + ::error("Cannot open $file.\n"); + ::wait_and_exit(255); + } + } + while(<$in_fh>) { + chomp; + /^\s*#/ and next; + /^\s*$/ and next; + push @Global::sshlogin, $_; + } + if($close) { + close $in_fh; + } +} + +sub parse_sshlogin { + # Returns: N/A + my @login; + if(not @Global::sshlogin) { @Global::sshlogin = (":"); } + for my $sshlogin (@Global::sshlogin) { + # Split up -S sshlogin,sshlogin + for my $s (split /,/, $sshlogin) { + if ($s eq ".." or $s eq "-") { + # This may add to @Global::sshlogin - possibly bug + read_sshloginfile(expand_slf_shorthand($s)); + } else { + push (@login, $s); + } + } + } + $Global::minimal_command_line_length = 8_000_000; + my @allowed_hostgroups; + for my $ncpu_sshlogin_string (::uniq(@login)) { + my $sshlogin = SSHLogin->new($ncpu_sshlogin_string); + my $sshlogin_string = $sshlogin->string(); + if($sshlogin_string eq "") { + # This is an ssh group: -S @webservers + push @allowed_hostgroups, $sshlogin->hostgroups(); + next; + } + if($Global::host{$sshlogin_string}) { + # This sshlogin has already been added: + # It is probably a host that has come back + # Set the max_jobs_running back to the original + debug("run","Already seen $sshlogin_string\n"); + if($sshlogin->{'ncpus'}) { + # If ncpus set by '#/' of the sshlogin, overwrite it: + $Global::host{$sshlogin_string}->set_ncpus($sshlogin->ncpus()); + } + $Global::host{$sshlogin_string}->set_max_jobs_running(undef); + next; + } + if($sshlogin_string eq ":") { + $sshlogin->set_maxlength(Limits::Command::max_length()); + } else { + # If all chars needs to be quoted, every other character will be \ + $sshlogin->set_maxlength(int(Limits::Command::max_length()/2)); + } + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, $sshlogin->maxlength()); + $Global::host{$sshlogin_string} = $sshlogin; + } + if(@allowed_hostgroups) { + # Remove hosts that are not in these groups + while (my ($string, $sshlogin) = each %Global::host) { + if(not $sshlogin->in_hostgroups(@allowed_hostgroups)) { + delete $Global::host{$string}; + } + } + } + + # debug("start", "sshlogin: ", my_dump(%Global::host),"\n"); + if($opt::transfer or @opt::return or $opt::cleanup or @opt::basefile) { + if(not remote_hosts()) { + # There are no remote hosts + if(@opt::trc) { + ::warning("--trc ignored as there are no remote --sshlogin.\n"); + } elsif (defined $opt::transfer) { + ::warning("--transfer ignored as there are no remote --sshlogin.\n"); + } elsif (@opt::return) { + ::warning("--return ignored as there are no remote --sshlogin.\n"); + } elsif (defined $opt::cleanup) { + ::warning("--cleanup ignored as there are no remote --sshlogin.\n"); + } elsif (@opt::basefile) { + ::warning("--basefile ignored as there are no remote --sshlogin.\n"); + } + } + } +} + +sub remote_hosts { + # Return sshlogins that are not ':' + # Returns: + # list of sshlogins with ':' removed + return grep !/^:$/, keys %Global::host; +} + +sub setup_basefile { + # Transfer basefiles to each $sshlogin + # This needs to be done before first jobs on $sshlogin is run + # Returns: N/A + my $cmd = ""; + my $rsync_destdir; + my $workdir; + for my $sshlogin (values %Global::host) { + if($sshlogin->string() eq ":") { next } + for my $file (@opt::basefile) { + if($file !~ m:^/: and $opt::workdir eq "...") { + ::error("Work dir '...' will not work with relative basefiles\n"); + ::wait_and_exit(255); + } + $workdir ||= Job->new("")->workdir(); + $cmd .= $sshlogin->rsync_transfer_cmd($file,$workdir) . "&"; + } + } + $cmd .= "wait;"; + debug("init", "basesetup: $cmd\n"); + print `$cmd`; +} + +sub cleanup_basefile { + # Remove the basefiles transferred + # Returns: N/A + my $cmd=""; + my $workdir = Job->new("")->workdir(); + for my $sshlogin (values %Global::host) { + if($sshlogin->string() eq ":") { next } + for my $file (@opt::basefile) { + $cmd .= $sshlogin->cleanup_cmd($file,$workdir)."&"; + } + } + $cmd .= "wait;"; + debug("init", "basecleanup: $cmd\n"); + print `$cmd`; +} + +sub filter_hosts { + my(@cores, @cpus, @maxline, @echo); + my $envvar = ::shell_quote_scalar($Global::envvar); + while (my ($host, $sshlogin) = each %Global::host) { + if($host eq ":") { next } + # The 'true' is used to get the $host out later + my $sshcmd = "true $host;" . $sshlogin->sshcommand()." ".$sshlogin->serverlogin(); + push(@cores, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cores\n\0"); + push(@cpus, $host."\t".$sshcmd." ".$envvar." parallel --number-of-cpus\n\0"); + push(@maxline, $host."\t".$sshcmd." ".$envvar." parallel --max-line-length-allowed\n\0"); + # 'echo' is used to get the best possible value for an ssh login time + push(@echo, $host."\t".$sshcmd." echo\n\0"); + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".ssh"); + print $fh @cores, @cpus, @maxline, @echo; + close $fh; + # --timeout 5: Setting up an SSH connection and running a simple + # command should never take > 5 sec. + # --delay 0.1: If multiple sshlogins use the same proxy the delay + # will make it less likely to overload the ssh daemon. + # --retries 3: If the ssh daemon it overloaded, try 3 times + # -s 16000: Half of the max line on UnixWare + my $cmd = "cat $tmpfile | $0 -j0 --timeout 5 -s 16000 --joblog - --plain --delay 0.1 --retries 3 --tag --tagstring {1} -0 --colsep '\t' -k eval {2} 2>/dev/null"; + ::debug("init", $cmd, "\n"); + open(my $host_fh, "-|", $cmd) || ::die_bug("parallel host check: $cmd"); + my (%ncores, %ncpus, %time_to_login, %maxlen, %echo, @down_hosts); + my $prepend = ""; + while(<$host_fh>) { + if(/\'$/) { + # if last char = ' then append next line + # This may be due to quoting of $Global::envvar + $prepend .= $_; + next; + } + $_ = $prepend . $_; + $prepend = ""; + chomp; + my @col = split /\t/, $_; + if(defined $col[6]) { + # This is a line from --joblog + # seq host time spent sent received exit signal command + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ parallel\ --number-of-cores + if($col[0] eq "Seq" and $col[1] eq "Host" and + $col[2] eq "Starttime") { + # Header => skip + next; + } + # Get server from: eval true server\; + $col[8] =~ /eval true..([^;]+).;/ or ::die_bug("col8 does not contain host: $col[8]"); + my $host = $1; + $host =~ tr/\\//d; + $Global::host{$host} or next; + if($col[6] eq "255" or $col[7] eq "15") { + # exit == 255 or signal == 15: ssh failed + # Remove sshlogin + ::debug("init", "--filtered $host\n"); + push(@down_hosts, $host); + @down_hosts = uniq(@down_hosts); + } elsif($col[6] eq "127") { + # signal == 127: parallel not installed remote + # Set ncpus and ncores = 1 + ::warning("Could not figure out ", + "number of cpus on $host. Using 1.\n"); + $ncores{$host} = 1; + $ncpus{$host} = 1; + $maxlen{$host} = Limits::Command::max_length(); + } elsif($col[0] =~ /^\d+$/ and $Global::host{$host}) { + # Remember how log it took to log in + # 2 : 1372607672.654 0.675 0 0 0 0 eval true\ m\;ssh\ m\ echo + $time_to_login{$host} = ::min($time_to_login{$host},$col[3]); + } else { + ::die_bug("host check unmatched long jobline: $_"); + } + } elsif($Global::host{$col[0]}) { + # This output from --number-of-cores, --number-of-cpus, + # --max-line-length-allowed + # ncores: server 8 + # ncpus: server 2 + # maxlen: server 131071 + if(not $ncores{$col[0]}) { + $ncores{$col[0]} = $col[1]; + } elsif(not $ncpus{$col[0]}) { + $ncpus{$col[0]} = $col[1]; + } elsif(not $maxlen{$col[0]}) { + $maxlen{$col[0]} = $col[1]; + } elsif(not $echo{$col[0]}) { + $echo{$col[0]} = $col[1]; + } elsif(m/perl: warning:|LANGUAGE =|LC_ALL =|LANG =|are supported and installed/) { + # Skip these: + # perl: warning: Setting locale failed. + # perl: warning: Please check that your locale settings: + # LANGUAGE = (unset), + # LC_ALL = (unset), + # LANG = "en_US.UTF-8" + # are supported and installed on your system. + # perl: warning: Falling back to the standard locale ("C"). + } else { + ::die_bug("host check too many col0: $_"); + } + } else { + ::die_bug("host check unmatched short jobline ($col[0]): $_"); + } + } + close $host_fh; + $Global::debug or unlink $tmpfile; + delete @Global::host{@down_hosts}; + @down_hosts and ::warning("Removed @down_hosts\n"); + $Global::minimal_command_line_length = 8_000_000; + while (my ($sshlogin, $obj) = each %Global::host) { + if($sshlogin eq ":") { next } + $ncpus{$sshlogin} or ::die_bug("ncpus missing: ".$obj->serverlogin()); + $ncores{$sshlogin} or ::die_bug("ncores missing: ".$obj->serverlogin()); + $time_to_login{$sshlogin} or ::die_bug("time_to_login missing: ".$obj->serverlogin()); + $maxlen{$sshlogin} or ::die_bug("maxlen missing: ".$obj->serverlogin()); + if($opt::use_cpus_instead_of_cores) { + $obj->set_ncpus($ncpus{$sshlogin}); + } else { + $obj->set_ncpus($ncores{$sshlogin}); + } + $obj->set_time_to_login($time_to_login{$sshlogin}); + $obj->set_maxlength($maxlen{$sshlogin}); + $Global::minimal_command_line_length = + ::min($Global::minimal_command_line_length, + int($maxlen{$sshlogin}/2)); + ::debug("init", "Timing from -S:$sshlogin ncpus:",$ncpus{$sshlogin}, + " ncores:", $ncores{$sshlogin}, + " time_to_login:", $time_to_login{$sshlogin}, + " maxlen:", $maxlen{$sshlogin}, + " min_max_len:", $Global::minimal_command_line_length,"\n"); + } +} + +sub onall { + sub tmp_joblog { + my $joblog = shift; + if(not defined $joblog) { + return undef; + } + my ($fh, $tmpfile) = ::tmpfile(SUFFIX => ".log"); + close $fh; + return $tmpfile; + } + my @command = @_; + if($Global::quoting) { + @command = shell_quote_empty(@command); + } + + # Copy all @fhlist into tempfiles + my @argfiles = (); + for my $fh (@fhlist) { + my ($outfh, $name) = ::tmpfile(SUFFIX => ".all", UNLINK => 1); + print $outfh (<$fh>); + close $outfh; + push @argfiles, $name; + } + if(@opt::basefile) { setup_basefile(); } + # for each sshlogin do: + # parallel -S $sshlogin $command :::: @argfiles + # + # Pass some of the options to the sub-parallels, not all of them as + # -P should only go to the first, and -S should not be copied at all. + my $options = + join(" ", + ((defined $opt::jobs) ? "-P $opt::jobs" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::ungroup) ? "-u" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ); + my $suboptions = + join(" ", + ((defined $opt::ungroup) ? "-u" : ""), + ((defined $opt::linebuffer) ? "--linebuffer" : ""), + ((defined $opt::group) ? "-g" : ""), + ((defined $opt::files) ? "--files" : ""), + ((defined $opt::keeporder) ? "--keeporder" : ""), + ((defined $opt::colsep) ? "--colsep ".shell_quote($opt::colsep) : ""), + ((@opt::v) ? "-vv" : ""), + ((defined $opt::D) ? "-D $opt::D" : ""), + ((defined $opt::timeout) ? "--timeout ".$opt::timeout : ""), + ((defined $opt::plain) ? "--plain" : ""), + ((defined $opt::retries) ? "--retries ".$opt::retries : ""), + ((defined $opt::max_chars) ? "--max-chars ".$opt::max_chars : ""), + ((defined $opt::arg_sep) ? "--arg-sep ".$opt::arg_sep : ""), + ((defined $opt::arg_file_sep) ? "--arg-file-sep ".$opt::arg_file_sep : ""), + (@opt::env ? map { "--env ".::shell_quote_scalar($_) } @opt::env : ""), + ); + ::debug("init", "| $0 $options\n"); + open(my $parallel_fh, "|-", "$0 --no-notice -j0 $options") || + ::die_bug("This does not run GNU Parallel: $0 $options"); + my @joblogs; + for my $host (sort keys %Global::host) { + my $sshlogin = $Global::host{$host}; + my $joblog = tmp_joblog($opt::joblog); + if($joblog) { + push @joblogs, $joblog; + $joblog = "--joblog $joblog"; + } + my $quad = $opt::arg_file_sep || "::::"; + ::debug("init", "$0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), + " -S ", shell_quote_scalar($sshlogin->string())," ", + join(" ",shell_quote(@command))," $quad @argfiles\n"); + print $parallel_fh "$0 $suboptions -j1 $joblog ", + ((defined $opt::tag) ? + "--tagstring ".shell_quote_scalar($sshlogin->string()) : ""), + " -S ", shell_quote_scalar($sshlogin->string()),"