Return-Path: X-Original-To: apmail-accumulo-commits-archive@www.apache.org Delivered-To: apmail-accumulo-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 40E9B10D2E for ; Sat, 5 Apr 2014 00:59:54 +0000 (UTC) Received: (qmail 63550 invoked by uid 500); 5 Apr 2014 00:59:39 -0000 Delivered-To: apmail-accumulo-commits-archive@accumulo.apache.org Received: (qmail 63455 invoked by uid 500); 5 Apr 2014 00:59:38 -0000 Mailing-List: contact commits-help@accumulo.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@accumulo.apache.org Delivered-To: mailing list commits@accumulo.apache.org Received: (qmail 63363 invoked by uid 99); 5 Apr 2014 00:59:36 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 05 Apr 2014 00:59:36 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 17D59943986; Sat, 5 Apr 2014 00:59:36 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: busbey@apache.org To: commits@accumulo.apache.org Date: Sat, 05 Apr 2014 00:59:36 -0000 Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: [01/15] git commit: ACCUMULO-2519 Aborts upgrade if there are Fate transactions from an old version. Repository: accumulo Updated Branches: refs/heads/1.5.2-SNAPSHOT f67c38614 -> 5a504b311 refs/heads/1.6.0-SNAPSHOT 2cb526e5e -> f5a94f041 refs/heads/master bdc2a994c -> ebbd5e62b ACCUMULO-2519 Aborts upgrade if there are Fate transactions from an old version. Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/5a504b31 Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/5a504b31 Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/5a504b31 Branch: refs/heads/1.5.2-SNAPSHOT Commit: 5a504b311c0e5f59ff5b14221c6bf61f43b4d093 Parents: a904f69 Author: Sean Busbey Authored: Fri Mar 28 01:46:09 2014 -0500 Committer: Sean Busbey Committed: Fri Apr 4 17:27:05 2014 -0700 ---------------------------------------------------------------------- README | 14 +++ .../org/apache/accumulo/server/Accumulo.java | 31 ++++++ .../apache/accumulo/server/master/Master.java | 100 ++++++++++++------- .../server/tabletserver/TabletServer.java | 5 + .../accumulo/server/util/MetadataTable.java | 3 + 5 files changed, 116 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/README ---------------------------------------------------------------------- diff --git a/README b/README index 115a9b7..0bb1030 100644 --- a/README +++ b/README @@ -54,12 +54,26 @@ accumulo. This happens automatically the first time Accumulo 1.5 is started. + * Verify that there are no outstanding FATE operations + - Under 1.4 you can list what's in FATE by running + $ACCUMULO_HOME/bin/accumulo org.apache.accumulo.server.fate.Admin print + - Note that operations in any state will prevent an upgrade. It is safe + to delete operations with status SUCCESSFUL. For others, you should restart + your 1.4 cluster and allow them to finish. * Stop the 1.4 instance. * Configure 1.5 to use the hdfs directory, walog directories, and zookeepers that 1.4 was using. * Copy other 1.4 configuration options as needed. * Start Accumulo 1.5. + The upgrade process must make changes to Accumulo's internal state in both ZooKeeper and + the table metadata. This process may take some time as Tablet Servers move write-ahead + logs to HDFS and then do recovery. During this time, the Monitor will claim that the + Master is down and some services may send the Monitor log messages about failure to + communicate with each other. These messages are safe to ignore. If you need detail on + the upgrade's progress you should view the local logs on the Tablet Servers and active + Master. + ****************************************************************************** 4. Configuring http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/Accumulo.java ---------------------------------------------------------------------- diff --git a/server/src/main/java/org/apache/accumulo/server/Accumulo.java b/server/src/main/java/org/apache/accumulo/server/Accumulo.java index 99ec7e4..420b6cc 100644 --- a/server/src/main/java/org/apache/accumulo/server/Accumulo.java +++ b/server/src/main/java/org/apache/accumulo/server/Accumulo.java @@ -27,11 +27,16 @@ import java.util.Map.Entry; import java.util.TreeMap; import org.apache.accumulo.core.Constants; +import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.trace.DistributedTrace; import org.apache.accumulo.core.util.AddressUtil; import org.apache.accumulo.core.util.UtilWaitThread; import org.apache.accumulo.core.util.Version; +import org.apache.accumulo.core.zookeeper.ZooUtil; +import org.apache.accumulo.fate.ReadOnlyTStore; +import org.apache.accumulo.fate.ReadOnlyStore; +import org.apache.accumulo.fate.ZooStore; import org.apache.accumulo.server.client.HdfsZooInstance; import org.apache.accumulo.server.conf.ServerConfiguration; import org.apache.accumulo.server.util.time.SimpleTimer; @@ -53,6 +58,7 @@ public class Accumulo { try { if (getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) { fs.create(new Path(ServerConstants.getDataVersionLocation() + "/" + Constants.DATA_VERSION)); + // TODO document failure mode & recovery if FS permissions cause above to work and below to fail ACCUMULO-2596 fs.delete(new Path(ServerConstants.getDataVersionLocation() + "/" + Constants.PREV_DATA_VERSION), false); } } catch (IOException e) { @@ -263,4 +269,29 @@ public class Accumulo { throw new RuntimeException("cannot find method setSafeMode", ex); } } + + /** + * Exit loudly if there are outstanding Fate operations. + * Since Fate serializes class names, we need to make sure there are no queued + * transactions from a previous version before continuing an upgrade. The status of the operations is + * irrelevant; those in SUCCESSFUL status cause the same problem as those just queued. + * + * Note that the Master should not allow write access to Fate until after all upgrade steps are complete. + * + * Should be called as a guard before performing any upgrade steps, after determining that an upgrade is needed. + * + * see ACCUMULO-2519 + */ + public static void abortIfFateTransactions() { + try { + final ReadOnlyTStore fate = new ReadOnlyStore(new ZooStore(ZooUtil.getRoot(HdfsZooInstance.getInstance()) + Constants.ZFATE, + ZooReaderWriter.getRetryingInstance())); + if (!(fate.list().isEmpty())) { + throw new AccumuloException("Aborting upgrade because there are outstanding FATE transactions from a previous Accumulo version. Please see the README document for instructions on what to do under your previous version."); + } + } catch (Exception exception) { + log.fatal("Problem verifying Fate readiness", exception); + System.exit(1); + } + } } http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/master/Master.java ---------------------------------------------------------------------- diff --git a/server/src/main/java/org/apache/accumulo/server/master/Master.java b/server/src/main/java/org/apache/accumulo/server/master/Master.java index 270eb18..a2ad2e6 100644 --- a/server/src/main/java/org/apache/accumulo/server/master/Master.java +++ b/server/src/main/java/org/apache/accumulo/server/master/Master.java @@ -34,6 +34,7 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -271,7 +272,9 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt upgradeMetadata(); } } - + + private boolean haveUpgradedZooKeeper = false; + private void upgradeZookeeper() { // 1.5.1 and 1.6.0 both do some state checking after obtaining the zoolock for the // monitor and before starting up. It's not tied to the data version at all (and would @@ -279,59 +282,79 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt // that the master is not the only thing that may alter zookeeper before starting. if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) { + // This Master hasn't started Fate yet, so any outstanding transactions must be from before the upgrade. + // Change to Guava's Verify once we use Guava 17. + if (null != fate) { + throw new IllegalStateException("Access to Fate should not have been initialized prior to the Master transitioning to active. Please save all logs and file a bug."); + } + Accumulo.abortIfFateTransactions(); try { log.info("Upgrading zookeeper"); - + IZooReaderWriter zoo = ZooReaderWriter.getInstance(); - + zoo.recursiveDelete(ZooUtil.getRoot(instance) + "/loggers", NodeMissingPolicy.SKIP); zoo.recursiveDelete(ZooUtil.getRoot(instance) + "/dead/loggers", NodeMissingPolicy.SKIP); zoo.putPersistentData(ZooUtil.getRoot(instance) + Constants.ZRECOVERY, new byte[] {'0'}, NodeExistsPolicy.SKIP); - + for (String id : Tables.getIdToNameMap(instance).keySet()) { - + zoo.putPersistentData(ZooUtil.getRoot(instance) + Constants.ZTABLES + "/" + id + Constants.ZTABLE_COMPACT_CANCEL_ID, "0".getBytes(Constants.UTF8), NodeExistsPolicy.SKIP); } + haveUpgradedZooKeeper = true; } catch (Exception ex) { log.fatal("Error performing upgrade", ex); System.exit(1); } } } - + private final AtomicBoolean upgradeMetadataRunning = new AtomicBoolean(false); - + private final CountDownLatch waitForMetadataUpgrade = new CountDownLatch(1); + private final ServerConfiguration serverConfig; private void upgradeMetadata() { - if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) { - if (upgradeMetadataRunning.compareAndSet(false, true)) { + // we make sure we're only doing the rest of this method once so that we can signal to other threads that an upgrade wasn't needed. + if (upgradeMetadataRunning.compareAndSet(false, true)) { + if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) { + // sanity check that we passed the Fate verification prior to ZooKeeper upgrade, and that Fate still hasn't been started. + // Change both to use Guava's Verify once we use Guava 17. + if (!haveUpgradedZooKeeper) { + throw new IllegalStateException("We should only attempt to upgrade Accumulo's !METADATA table if we've already upgraded ZooKeeper. Please save all logs and file a bug."); + } + if (null != fate) { + throw new IllegalStateException("Access to Fate should not have been initialized prior to the Master finishing upgrades. Please save all logs and file a bug."); + } Runnable upgradeTask = new Runnable() { @Override public void run() { try { + log.info("Starting to upgrade !METADATA table."); MetadataTable.moveMetaDeleteMarkers(instance, SecurityConstants.getSystemCredentials()); + log.info("Updating persistent data version."); Accumulo.updateAccumuloVersion(fs); - log.info("Upgrade complete"); - + waitForMetadataUpgrade.countDown(); } catch (Exception ex) { log.fatal("Error performing upgrade", ex); System.exit(1); } - + } }; - + // need to run this in a separate thread because a lock is held that prevents !METADATA tablets from being assigned and this task writes to the // !METADATA table new Thread(upgradeTask).start(); + } else { + waitForMetadataUpgrade.countDown(); } } } - + private int assignedOrHosted(Text tableId) { int result = 0; for (TabletGroupWatcher watcher : watchers) { @@ -2136,28 +2159,6 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt tserverSet.startListeningForTabletServerChanges(); - // TODO: add shutdown for fate object - ACCUMULO-1307 - try { - final AgeOffStore store = new AgeOffStore(new org.apache.accumulo.fate.ZooStore(ZooUtil.getRoot(instance) + Constants.ZFATE, - ZooReaderWriter.getRetryingInstance()), 1000 * 60 * 60 * 8); - - int threads = this.getConfiguration().getConfiguration().getCount(Property.MASTER_FATE_THREADPOOL_SIZE); - - fate = new Fate(this, store, threads); - - SimpleTimer.getInstance().schedule(new Runnable() { - - @Override - public void run() { - store.ageOff(); - } - }, 63000, 63000); - } catch (KeeperException e) { - throw new IOException(e); - } catch (InterruptedException e) { - throw new IOException(e); - } - ZooReaderWriter.getInstance().getChildren(zroot + Constants.ZRECOVERY, new Watcher() { @Override public void process(WatchedEvent event) { @@ -2183,7 +2184,32 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt for (TabletGroupWatcher watcher : watchers) { watcher.start(); } - + + // Once we are sure tablet servers are no longer checking for an empty Fate transaction queue before doing WAL upgrades, we can safely start using Fate ourselves. + waitForMetadataUpgrade.await(); + + // TODO: add shutdown for fate object - ACCUMULO-1307 + try { + final AgeOffStore store = new AgeOffStore(new org.apache.accumulo.fate.ZooStore(ZooUtil.getRoot(instance) + Constants.ZFATE, + ZooReaderWriter.getRetryingInstance()), 1000 * 60 * 60 * 8); + + int threads = this.getConfiguration().getConfiguration().getCount(Property.MASTER_FATE_THREADPOOL_SIZE); + + fate = new Fate(this, store, threads); + + SimpleTimer.getInstance().schedule(new Runnable() { + + @Override + public void run() { + store.ageOff(); + } + }, 63000, 63000); + } catch (KeeperException e) { + throw new IOException(e); + } catch (InterruptedException e) { + throw new IOException(e); + } + Processor processor = new Processor(TraceWrap.service(new MasterClientServiceHandler())); ServerPort serverPort = TServerUtils.startServer(getSystemConfiguration(), Property.MASTER_CLIENTPORT, processor, "Master", "Master Client Service Handler", null, Property.MASTER_MINTHREADS, Property.MASTER_THREADCHECK, Property.GENERAL_MAX_MESSAGE_SIZE); http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java ---------------------------------------------------------------------- diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java index d76946d..ad3d615 100644 --- a/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java +++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java @@ -3322,6 +3322,11 @@ public class TabletServer extends AbstractMetricsImpl implements org.apache.accu * */ public static void recoverLocalWriteAheadLogs(FileSystem fs, ServerConfiguration serverConf) throws IOException { + if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) { + // If the Master has not yet signaled a finish to upgrading, we need to make sure we can rollback in the + // event of outstanding transactions in Fate from the previous version. + Accumulo.abortIfFateTransactions(); + } FileSystem localfs = FileSystem.getLocal(fs.getConf()).getRawFileSystem(); AccumuloConfiguration conf = serverConf.getConfiguration(); String localWalDirectories = conf.get(Property.LOGGER_DIR); http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java ---------------------------------------------------------------------- diff --git a/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java b/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java index 7328a55..d6e0a3c 100644 --- a/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java +++ b/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java @@ -1233,6 +1233,9 @@ public class MetadataTable extends org.apache.accumulo.core.util.MetadataTable { update(SecurityConstants.getSystemCredentials(), m); } + /** + * During an upgrade from Accumulo 1.4 -> 1.5, we need to move deletion requests for files under the !METADATA table to the root tablet. + */ public static void moveMetaDeleteMarkers(Instance instance, TCredentials creds) { // move delete markers from the normal delete keyspace to the root tablet delete keyspace if the files are for the !METADATA table Scanner scanner = new ScannerImpl(instance, creds, Constants.METADATA_TABLE_ID, Constants.NO_AUTHS);