accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From els...@apache.org
Subject [1/6] git commit: ACCUMULO-1794 adds hdfs failover to continuous integration test.
Date Sat, 07 Dec 2013 04:14:29 GMT
Updated Branches:
  refs/heads/1.6.0-SNAPSHOT 352dc1b8b -> 7fb59e909


ACCUMULO-1794 adds hdfs failover to continuous integration test.

Signed-off-by: Josh Elser <elserj@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/872fd1df
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/872fd1df
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/872fd1df

Branch: refs/heads/1.6.0-SNAPSHOT
Commit: 872fd1dfb252e45560b5547aad43399fe433f1a1
Parents: 513f4d2
Author: Sean Busbey <busbey@clouderagovt.com>
Authored: Sat Nov 16 02:46:40 2013 -0600
Committer: Josh Elser <elserj@apache.org>
Committed: Fri Dec 6 20:35:28 2013 -0500

----------------------------------------------------------------------
 .../system/continuous/continuous-env.sh.example |   9 +
 test/system/continuous/hdfs-agitator.pl         | 217 +++++++++++++++++++
 test/system/continuous/start-agitator.sh        |   5 +
 test/system/continuous/stop-agitator.sh         |   4 +
 4 files changed, 235 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/872fd1df/test/system/continuous/continuous-env.sh.example
----------------------------------------------------------------------
diff --git a/test/system/continuous/continuous-env.sh.example b/test/system/continuous/continuous-env.sh.example
index 830ae86..1d39034 100644
--- a/test/system/continuous/continuous-env.sh.example
+++ b/test/system/continuous/continuous-env.sh.example
@@ -89,6 +89,15 @@ MAX_KILL=1
 MASTER_KILL_SLEEP_TIME=60
 MASTER_RESTART_SLEEP_TIME=2
 
+#Do we want to perturb HDFS? Only works on HDFS versions with HA, i.e. Hadoop 2
+# AGITATE_HDFS=true
+#Defaults to false for Accumulo versions that ship default against Hadoop 1
+AGITATE_HDFS=false
+AGITATE_HDFS_SLEEP_TIME=10
+AGITATE_HDFS_SUPERUSER=hdfs
+AGITATE_HDFS_COMMAND="${HADOOP_PREFIX:/usr/lib/hadoop}/share/hadoop/hdfs/bin/hdfs"
+AGITATE_HDFS_SUDO=`which sudo`
+
 #settings for the verification map reduce job
 VERIFY_OUT=/tmp/continuous_verify
 VERIFY_MAX_MAPS=64

http://git-wip-us.apache.org/repos/asf/accumulo/blob/872fd1df/test/system/continuous/hdfs-agitator.pl
----------------------------------------------------------------------
diff --git a/test/system/continuous/hdfs-agitator.pl b/test/system/continuous/hdfs-agitator.pl
new file mode 100755
index 0000000..85eab32
--- /dev/null
+++ b/test/system/continuous/hdfs-agitator.pl
@@ -0,0 +1,217 @@
+#! /usr/bin/env perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+use POSIX qw(strftime);
+use Getopt::Long;
+use Pod::Usage;
+
+my $help = 0;
+my $man = 0;
+my $sleep = 10;
+my $superuser = 'hdfs';
+my $hdfsCmd;
+if( defined $ENV{'HADOOP_PREFIX'} ){
+  $hdfsCmd = $ENV{'HADOOP_PREFIX'} . '/share/hadoop/hdfs/bin/hdfs';
+}
+my $sudo;
+my $nameservice;
+
+GetOptions('help|?' => \$help, 'man' => \$man, 'sleep=i' => \$sleep, 'nameservice=s'
=> \$nameservice, 'superuser=s' => \$superuser, 'hdfs-cmd=s' => \$hdfsCmd, 'sudo:s'
=> \$sudo) or pod2usage(2);
+pod2usage(-exitval => 0, -verbose => 1) if $help;
+pod2usage(-exitval => 0, -verbose => 2) if $man;
+pod2usage(-exitval => 1, -verbose => 1, -message => '$HADOOP_PREFIX not defined
and no hdfs-cmd given. please use --hdfs-cmd to specify where your hdfs cli is.') if not defined
$hdfsCmd;
+pod2usage(-exitval => 1, -verbose => 1, -message => "Your specified hdfs cli '$hdfsCmd'
is not executable.") if not -x $hdfsCmd;
+if( defined $sudo and "" eq $sudo ){
+  $sudo = `which sudo`;
+  pod2usage(-exitval => 1, -verbose => 1, -message => "Error attempting to find
the sudo command, please specify it with --sudo /path/to/sudo") if 0 != $?;
+  chomp($sudo);
+}
+if( defined $sudo ){
+  pod2usage(-exitval => 1, -verbose => 1, -message => "Your specified sudo command
'$sudo' is not executable.") if not -x $sudo;
+}
+
+my $needsudo = defined $sudo;
+my $haadmin = "$hdfsCmd haadmin";
+if($needsudo) {
+  $haadmin = "$sudo -u $superuser $haadmin";
+  print STDERR "Starting HDFS agitator, configured to fail over every $sleep minutes. will
run hdfs command '$hdfsCmd' as user '$superuser' via '$sudo'.\n";
+} else {
+  print STDERR "Starting HDFS agitator, configured to fail over every $sleep minutes. will
run hdfs command '$hdfsCmd' as the current user.\n";
+}
+while(1){
+  sleep($sleep * 60);
+  my $t = strftime "%Y%m%d %H:%M:%S", localtime;
+  my @failServices;
+  if( defined $nameservice ){
+    @failServices = ($nameservice);
+  } else {
+    my $nameservicesRaw = `$hdfsCmd getconf -confKey dfs.nameservices`;
+    if(0 != $?) {
+      print STDERR "$t HDFS CLI failed. please see --help to set it correctly\n";
+      exit(1);
+    }
+    chomp($nameservicesRaw);
+    my @nameservices = split(/,/, $nameservicesRaw);
+    if(1 > scalar(@nameservices)) {
+      print STDERR "$t No HDFS NameServices found. Are you sure you're running in HA?\n";
+      exit(1);
+    }
+    if(rand(1) < .5){
+      my $serviceToFail = $nameservices[int(rand(scalar(@nameservices)))];
+      print STDERR "$t Failing over nameservice $serviceToFail\n";
+      @failServices = ($serviceToFail);
+    } else {
+      print STDERR "$t Failing over all nameservices\n";
+      @failServices = @nameservices;
+    }
+  }
+  for my $toFail (@failServices){
+    my $namenodesRaw = `$hdfsCmd getconf -confKey dfs.ha.namenodes.$toFail`;
+    if(0 != $?) {
+      print STDERR "$t HDFS CLI failed to look up namenodes in service $toFail.\n";
+      exit(1);
+    }
+    chomp($namenodesRaw);
+    my @namenodes = split(/,/, $namenodesRaw);
+    if(2 > scalar(@namenodes)) {
+      print STDERR "$t WARN NameService $toFail does not have at least 2 namenodes according
to the HDFS configuration, skipping.\n";
+      next;
+    }
+    my $active;
+    for my $namenode (@namenodes){
+      my $status = `$haadmin -ns $toFail -getServiceState $namenode`;
+      if(0 != $?) {
+        if($needsudo) {
+          print STDERR "$t WARN Error while attempting to get the service state of $toFail
:: $namenode\n";
+          $status = 'error';
+        } else {
+          print STDERR "$t WARN Current user may not run the HDFS haadmin utility, attempting
to sudo to the $superuser user.\n";
+          $needsudo = 1;
+          if(not defined $sudo) {
+            $sudo = `which sudo`;
+            pod2usage(-exitval => 1, -verbose => 1, -message => "Error attempting
to find the sudo command, please specify it with --sudo") if 0 != $?;
+            chomp($sudo);
+            pod2usage(-exitval => 1, -verbose => 1, -message => "The sudo command
'$sudo' is not executable. please specify sudo with --sudo") if not -x $sudo;
+          }
+          $haadmin = "$sudo -u $superuser $haadmin";
+          redo;
+        }
+      }
+      chomp($status);
+      if( 'active' eq $status ){
+        $active = $namenode;
+        last;
+      }
+    }
+    if( defined $active ){
+      my @standby = grep { $_ ne $active } @namenodes;
+      my $newActive = $standby[int(rand(scalar(@standby)))];
+      print STDERR "$t Transitioning nameservice $toFail from $active to $newActive\n";
+      my $cmd = "$haadmin -ns $toFail -failover $active $newActive";
+      print "$t $cmd\n";
+      system($cmd);
+    } else {
+      my $newActive = $namenodes[int(rand(scalar(@namenodes)))];
+      print STDERR "$t WARN nameservice $toFail did not have an active namenode. Transitioning
a random namenode to active. This will fail if HDFS is configured for automatic failover.\n";
+      my $cmd = "$haadmin -ns $toFail -transitionToActive $newActive";
+      print "$t $cmd\n";
+      system($cmd);
+    }
+  }
+}
+__END__
+
+=head1 NAME
+
+hdfs-agitator - causes HDFS to failover
+
+=head1 DESCRIPTION
+
+Sleeps for a configurable amount of time, then causes a NameNode failover in one
+or more HDFS NameServices. If a given NameService does not have an Active
+NameNode when it comes time to failover, a random standby is promoted.
+
+Only works on HDFS versions that support HA configurations and the haadmin
+command. In order to function, the user running this script must be able to
+use the haadmin command. This requires access to an HDFS superuser. By default,
+it will attempt to sudo to perform calls.
+
+=head1 SYNOPSIS
+
+hdfs-agitator [options]
+
+  Options:
+    --help         Brief help message
+    --man          Full documentation
+    --sleep        Time to sleep between failovers in minutes. Default 10
+    --superuser    HDFS superuser. Default 'hdfs'
+    --hdfs-cmd     hdfs command path. Default '$HADOOP_PREFIX/share/hadoop/hdfs/bin/hdfs'
+    --nameservice  Limit failovers to specified nameservice. Default all nameservices
+    --sudo         command to call to sudo to the HDFS superuser. Default 'sudo' if needed.
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--sleep>
+
+Sleep the given number of minutes between attempts to fail over nameservices.
+
+=item B<--nameservice>
+
+Limit failover attempts to the given nameservice. By default, we attempt ot list
+all known nameservices and choose either one or all of them to failover in a
+given cycle.
+
+=item B<--superuser>
+
+An HDFS superuser capable of running the haadmin command. Defaults to "hdfs".
+
+=item B<--hdfs-cmd>
+
+Path to the HDFS cli. Will be used both for non-administrative commands (e.g.
+listing the nameservices and serviceids in a given nameservice) and admin-only
+actions such as checking status and failing over.
+
+Defaults to using $HADOOP_PREFIX.
+
+=item B<--sudo>
+
+Command to allow us to act as the given HDFS superuser. By default we assume the current
user
+can run HDFS administrative commands. When this argument is specified we will instead attempt
+to use the HDFS superuser instead. If given an argument, it will be called like
+sudo, i.e. "sudo -u $superuser $cmd". Defaults to "sudo" on the shell's path.
+
+=back
+
+=head1 SEE ALSO
+
+See the Apache Hadoop documentation on configuring HDFS HA
+
+=over 8
+
+=item B<HA with QJM>
+
+http://hadoop.apache.org/docs/r2.2.0/hadoop-yarn/hadoop-yarn-site/HDFSHighAvailabilityWithQJM.html#Administrative_commands
+
+=item B<HA with NFS>
+
+http://hadoop.apache.org/docs/r2.2.0/hadoop-yarn/hadoop-yarn-site/HDFSHighAvailabilityWithNFS.html#Administrative_commands
+
+=back

http://git-wip-us.apache.org/repos/asf/accumulo/blob/872fd1df/test/system/continuous/start-agitator.sh
----------------------------------------------------------------------
diff --git a/test/system/continuous/start-agitator.sh b/test/system/continuous/start-agitator.sh
index 52e5a4e..c734943 100755
--- a/test/system/continuous/start-agitator.sh
+++ b/test/system/continuous/start-agitator.sh
@@ -21,3 +21,8 @@ CONTINUOUS_CONF_DIR=${CONTINUOUS_CONF_DIR:-$ACCUMULO_HOME/test/system/continuous
 nohup ./agitator.pl $KILL_SLEEP_TIME $TUP_SLEEP_TIME $MIN_KILL $MAX_KILL >$CONTINUOUS_LOG_DIR/`date
+%Y%m%d%H%M%S`_`hostname`_agitator.out 2>$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_agitator.err
&
 
 nohup ./magitator.pl $MASTER_KILL_SLEEP_TIME $MASTER_RESTART_SLEEP_TIME >$CONTINUOUS_LOG_DIR/`date
+%Y%m%d%H%M%S`_`hostname`_magitator.out 2>$CONTINUOUS_LOG_DIR/`date +%Y%m%d%H%M%S`_`hostname`_magitator.err
&
+
+if ${AGITATE_HDFS:-false} ; then
+  AGITATOR_LOG=${CONTINUOUS_LOG_DIR}/`date +%Y%m%d%H%M%S`_`hostname`_hdfs-agitator
+  nohup ./hdfs-agitator.pl --sleep ${AGITATE_HDFS_SLEEP_TIME} --hdfs-cmd ${AGITATE_HDFS_COMMAND}
--superuser ${AGITATE_HDFS_SUPERUSER} --sudo ${AGITATE_HDFS_SUDO} >${AGITATOR_LOG}.out
2>${AGITATOR_LOG}.err &
+fi

http://git-wip-us.apache.org/repos/asf/accumulo/blob/872fd1df/test/system/continuous/stop-agitator.sh
----------------------------------------------------------------------
diff --git a/test/system/continuous/stop-agitator.sh b/test/system/continuous/stop-agitator.sh
index b853a55..f26e3b2 100755
--- a/test/system/continuous/stop-agitator.sh
+++ b/test/system/continuous/stop-agitator.sh
@@ -18,5 +18,9 @@
 CONTINUOUS_CONF_DIR=${CONTINUOUS_CONF_DIR:-$ACCUMULO_HOME/test/system/continuous/}
 . $CONTINUOUS_CONF_DIR/continuous-env.sh
 
+if ${AGITATE_HDFS:-false} ; then
+  pkill -f hdfs-agitator.pl
+fi
+
 pkill -f agitator.pl
 


Mime
View raw message