accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bus...@apache.org
Subject [08/15] git commit: Merge branch '1.5.2-SNAPSHOT' into 1.6.0-SNAPSHOT
Date Sat, 05 Apr 2014 00:59:43 GMT
Merge branch '1.5.2-SNAPSHOT' into 1.6.0-SNAPSHOT

  Conflicts:
      fate/src/main/java/org/apache/accumulo/fate/AdminUtil.java
      fate/src/main/java/org/apache/accumulo/fate/TStore.java
      server/base/src/main/java/org/apache/accumulo/server/Accumulo.java
      server/src/main/java/org/apache/accumulo/server/master/Master.java
      server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
      server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/957c9d1b
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/957c9d1b
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/957c9d1b

Branch: refs/heads/master
Commit: 957c9d1b34afb8a5b7347921ba2d5b0b22bbc828
Parents: 2cb526e 5a504b3
Author: Sean Busbey <busbey@cloudera.com>
Authored: Fri Apr 4 17:27:39 2014 -0700
Committer: Sean Busbey <busbey@cloudera.com>
Committed: Fri Apr 4 17:28:32 2014 -0700

----------------------------------------------------------------------
 README                                          |  14 +++
 fate/pom.xml                                    |   9 ++
 .../org/apache/accumulo/fate/AdminUtil.java     |  14 +--
 .../java/org/apache/accumulo/fate/Fate.java     |   2 +-
 .../org/apache/accumulo/fate/ReadOnlyRepo.java  |  32 +++++
 .../org/apache/accumulo/fate/ReadOnlyStore.java | 111 ++++++++++++++++
 .../apache/accumulo/fate/ReadOnlyTStore.java    | 125 +++++++++++++++++++
 .../java/org/apache/accumulo/fate/Repo.java     |   5 +-
 .../java/org/apache/accumulo/fate/TStore.java   |  62 +--------
 .../apache/accumulo/fate/AgeOffStoreTest.java   |   2 +-
 .../apache/accumulo/fate/ReadOnlyStoreTest.java |  72 +++++++++++
 .../org/apache/accumulo/fate/SimpleStore.java   |   2 +-
 .../org/apache/accumulo/server/Accumulo.java    |  33 ++++-
 .../accumulo/server/util/MetadataTableUtil.java |   3 +
 .../java/org/apache/accumulo/master/Master.java |  77 ++++++++----
 .../apache/accumulo/master/util/FateAdmin.java  |   3 +-
 16 files changed, 464 insertions(+), 102 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/957c9d1b/README
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/accumulo/blob/957c9d1b/fate/pom.xml
----------------------------------------------------------------------
diff --cc fate/pom.xml
index b339758,0868e4c..a3a7b42
--- a/fate/pom.xml
+++ b/fate/pom.xml
@@@ -24,11 -24,15 +24,15 @@@
    </parent>
    <artifactId>accumulo-fate</artifactId>
    <name>Fate</name>
 +  <description>A FAult-Tolerant Executor library used by Apache Accumulo.</description>
    <dependencies>
      <dependency>
+       <groupId>com.google.guava</groupId>
+       <artifactId>guava</artifactId>
+     </dependency>
+     <dependency>
        <groupId>commons-lang</groupId>
        <artifactId>commons-lang</artifactId>
 -      <scope>provided</scope>
      </dependency>
      <dependency>
        <groupId>log4j</groupId>

http://git-wip-us.apache.org/repos/asf/accumulo/blob/957c9d1b/fate/src/main/java/org/apache/accumulo/fate/AdminUtil.java
----------------------------------------------------------------------
diff --cc fate/src/main/java/org/apache/accumulo/fate/AdminUtil.java
index 8277c71,0238fde..1f8c32d
--- a/fate/src/main/java/org/apache/accumulo/fate/AdminUtil.java
+++ b/fate/src/main/java/org/apache/accumulo/fate/AdminUtil.java
@@@ -25,9 -23,8 +25,9 @@@ import java.util.HashMap
  import java.util.List;
  import java.util.Map;
  import java.util.Map.Entry;
 +import java.util.Set;
  
- import org.apache.accumulo.fate.TStore.TStatus;
+ import org.apache.accumulo.fate.ReadOnlyTStore.TStatus;
  import org.apache.accumulo.fate.zookeeper.IZooReaderWriter;
  import org.apache.accumulo.fate.zookeeper.ZooLock;
  import org.apache.accumulo.fate.zookeeper.ZooUtil.NodeMissingPolicy;
@@@ -39,32 -36,7 +39,32 @@@ import org.apache.zookeeper.KeeperExcep
  public class AdminUtil<T> {
    private static final Charset UTF8 = Charset.forName("UTF-8");
    
 +  private boolean exitOnError = false;
 +  
 +  /**
 +   * Default constructor
 +   */
 +  public AdminUtil() {
 +    this(true);
 +  }
 +  
 +  /**
 +   * Constructor
 +   * 
 +   * @param exitOnError
 +   *          <code>System.exit(1)</code> on error if true
 +   */
 +  public AdminUtil(boolean exitOnError) {
 +    super();
 +    this.exitOnError = exitOnError;
 +  }
 +  
-   public void print(ZooStore<T> zs, IZooReaderWriter zk, String lockPath) throws KeeperException, InterruptedException {
+   public void print(ReadOnlyTStore<T> zs, IZooReaderWriter zk, String lockPath) throws KeeperException, InterruptedException {
 +    print(zs, zk, lockPath, new Formatter(System.out), null, null);
 +  }
 +  
-   public void print(ZooStore<T> zs, IZooReaderWriter zk, String lockPath, Formatter fmt, Set<Long> filterTxid, EnumSet<TStatus> filterStatus)
++  public void print(ReadOnlyTStore<T> zs, IZooReaderWriter zk, String lockPath, Formatter fmt, Set<Long> filterTxid, EnumSet<TStatus> filterStatus)
 +      throws KeeperException, InterruptedException {
      Map<Long,List<String>> heldLocks = new HashMap<Long,List<String>>();
      Map<Long,List<String>> waitingLocks = new HashMap<Long,List<String>>();
      
@@@ -169,79 -137,28 +169,79 @@@
        return false;
      }
      
 -    long txid = Long.parseLong(txidStr, 16);
 +    long txid;
 +    try {
 +      txid = Long.parseLong(txidStr, 16);
 +    } catch (NumberFormatException nfe) {
 +      System.out.printf("Invalid transaction ID format: %s%n", txidStr);
 +      return false;
 +    }
 +    boolean state = false;
      zs.reserve(txid);
 -    zs.delete(txid);
 -    zs.unreserve(txid, 0);
 +    TStatus ts = zs.getStatus(txid);
 +    switch (ts) {
 +      case UNKNOWN:
 +        System.out.printf("Invalid transaction ID: %016x%n", txid);
 +        break;
 +      
 +      case IN_PROGRESS:
 +      case NEW:
 +      case FAILED:
 +      case FAILED_IN_PROGRESS:
 +      case SUCCESSFUL:
 +        System.out.printf("Deleting transaction: %016x (%s)%n", txid, ts);
 +        zs.delete(txid);
 +        state = true;
 +        break;
 +    }
      
 -    return true;
 +    zs.unreserve(txid, 0);
 +    return state;
    }
    
-   public boolean prepFail(ZooStore<T> zs, IZooReaderWriter zk, String path, String txidStr) {
+   public boolean prepFail(TStore<T> zs, IZooReaderWriter zk, String path, String txidStr) {
      if (!checkGlobalLock(zk, path)) {
        return false;
      }
      
 -    long txid = Long.parseLong(txidStr, 16);
 +    long txid;
 +    try {
 +      txid = Long.parseLong(txidStr, 16);
 +    } catch (NumberFormatException nfe) {
 +      System.out.printf("Invalid transaction ID format: %s%n", txidStr);
 +      return false;
 +    }
 +    boolean state = false;
      zs.reserve(txid);
 -    zs.setStatus(txid, TStatus.FAILED_IN_PROGRESS);
 -    zs.unreserve(txid, 0);
 +    TStatus ts = zs.getStatus(txid);
 +    switch (ts) {
 +      case UNKNOWN:
 +        System.out.printf("Invalid transaction ID: %016x%n", txid);
 +        break;
 +      
 +      case IN_PROGRESS:
 +      case NEW:
 +        System.out.printf("Failing transaction: %016x (%s)%n", txid, ts);
 +        zs.setStatus(txid, TStatus.FAILED_IN_PROGRESS);
 +        state = true;
 +        break;
 +      
 +      case SUCCESSFUL:
 +        System.out.printf("Transaction already completed: %016x (%s)%n", txid, ts);
 +        break;
 +      
 +      case FAILED:
 +      case FAILED_IN_PROGRESS:
 +        System.out.printf("Transaction already failed: %016x (%s)%n", txid, ts);
 +        state = true;
 +        break;
 +    }
      
 -    return true;
 +    zs.unreserve(txid, 0);
 +    return state;
    }
    
-   public void deleteLocks(ZooStore<T> zs, IZooReaderWriter zk, String path, String txidStr) throws KeeperException, InterruptedException {
+   public void deleteLocks(TStore<T> zs, IZooReaderWriter zk, String path, String txidStr) throws KeeperException, InterruptedException {
      // delete any locks assoc w/ fate operation
      List<String> lockedIds = zk.getChildren(path);
      

http://git-wip-us.apache.org/repos/asf/accumulo/blob/957c9d1b/fate/src/main/java/org/apache/accumulo/fate/Fate.java
----------------------------------------------------------------------
diff --cc fate/src/main/java/org/apache/accumulo/fate/Fate.java
index 3561fc8,b2eb681..2b232ac
--- a/fate/src/main/java/org/apache/accumulo/fate/Fate.java
+++ b/fate/src/main/java/org/apache/accumulo/fate/Fate.java
@@@ -17,9 -17,8 +17,9 @@@
  package org.apache.accumulo.fate;
  
  import java.util.EnumSet;
 +import java.util.concurrent.atomic.AtomicBoolean;
  
- import org.apache.accumulo.fate.TStore.TStatus;
+ import org.apache.accumulo.fate.ReadOnlyTStore.TStatus;
  import org.apache.accumulo.fate.util.Daemon;
  import org.apache.accumulo.fate.util.LoggingRunnable;
  import org.apache.log4j.Logger;

http://git-wip-us.apache.org/repos/asf/accumulo/blob/957c9d1b/server/base/src/main/java/org/apache/accumulo/server/Accumulo.java
----------------------------------------------------------------------
diff --cc server/base/src/main/java/org/apache/accumulo/server/Accumulo.java
index a9e4b5f,0000000..4e1eb35
mode 100644,000000..100644
--- a/server/base/src/main/java/org/apache/accumulo/server/Accumulo.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/Accumulo.java
@@@ -1,242 -1,0 +1,271 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.server;
 +
 +import java.io.File;
 +import java.io.FileInputStream;
 +import java.io.IOException;
 +import java.io.InputStream;
 +import java.net.InetAddress;
 +import java.net.UnknownHostException;
 +import java.util.Map.Entry;
 +import java.util.TreeMap;
 +
 +import org.apache.accumulo.core.Constants;
++import org.apache.accumulo.core.client.AccumuloException;
 +import org.apache.accumulo.core.conf.Property;
 +import org.apache.accumulo.core.trace.DistributedTrace;
 +import org.apache.accumulo.core.util.AddressUtil;
 +import org.apache.accumulo.core.util.UtilWaitThread;
 +import org.apache.accumulo.core.util.Version;
 +import org.apache.accumulo.core.volume.Volume;
++import org.apache.accumulo.core.zookeeper.ZooUtil;
++import org.apache.accumulo.fate.ReadOnlyTStore;
++import org.apache.accumulo.fate.ReadOnlyStore;
++import org.apache.accumulo.fate.ZooStore;
 +import org.apache.accumulo.server.client.HdfsZooInstance;
 +import org.apache.accumulo.server.conf.ServerConfiguration;
 +import org.apache.accumulo.server.fs.VolumeManager;
 +import org.apache.accumulo.server.util.time.SimpleTimer;
 +import org.apache.accumulo.server.watcher.MonitorLog4jWatcher;
 +import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
 +import org.apache.hadoop.fs.FileStatus;
 +import org.apache.hadoop.fs.FileSystem;
 +import org.apache.hadoop.fs.Path;
 +import org.apache.log4j.Logger;
 +import org.apache.log4j.helpers.LogLog;
 +import org.apache.log4j.xml.DOMConfigurator;
 +import org.apache.zookeeper.KeeperException;
 +
 +public class Accumulo {
 +  
 +  private static final Logger log = Logger.getLogger(Accumulo.class);
 +  
 +  public static synchronized void updateAccumuloVersion(VolumeManager fs) {
 +    for (Volume volume : fs.getVolumes()) {
 +      try {
 +        if (getAccumuloPersistentVersion(fs) == ServerConstants.PREV_DATA_VERSION) {
 +          log.debug("Attempting to upgrade " + volume);
 +          Path dataVersionLocation = ServerConstants.getDataVersionLocation(volume);
 +          fs.create(new Path(dataVersionLocation, Integer.toString(ServerConstants.DATA_VERSION))).close();
- 
++          // TODO document failure mode & recovery if FS permissions cause above to work and below to fail ACCUMULO-2596
 +          Path prevDataVersionLoc = new Path(dataVersionLocation, Integer.toString(ServerConstants.PREV_DATA_VERSION));
 +          if (!fs.delete(prevDataVersionLoc)) {
 +            throw new RuntimeException("Could not delete previous data version location (" + prevDataVersionLoc + ") for " + volume);
 +          }
 +        }
 +      } catch (IOException e) {
 +        throw new RuntimeException("Unable to set accumulo version: an error occurred.", e);
 +      }
 +    }
 +  }
 +  
 +  public static synchronized int getAccumuloPersistentVersion(FileSystem fs, Path path) {
 +    int dataVersion;
 +    try {
 +      FileStatus[] files = fs.listStatus(path);
 +      if (files == null || files.length == 0) {
 +        dataVersion = -1; // assume it is 0.5 or earlier
 +      } else {
 +        dataVersion = Integer.parseInt(files[0].getPath().getName());
 +      }
 +      return dataVersion;
 +    } catch (IOException e) {
 +      throw new RuntimeException("Unable to read accumulo version: an error occurred.", e);
 +    }
 +  }
 +  
 +  public static synchronized int getAccumuloPersistentVersion(VolumeManager fs) {
 +    // It doesn't matter which Volume is used as they should all have the data version stored
 +    Volume v = fs.getVolumes().iterator().next();
 +    Path path = ServerConstants.getDataVersionLocation(v);
 +    return getAccumuloPersistentVersion(v.getFileSystem(), path);
 +  }
 +
 +  public static synchronized Path getAccumuloInstanceIdPath(VolumeManager fs) {
 +    // It doesn't matter which Volume is used as they should all have the instance ID stored
 +    Volume v = fs.getVolumes().iterator().next();
 +    return ServerConstants.getInstanceIdLocation(v);
 +  }
 +
 +  public static void enableTracing(String address, String application) {
 +    try {
 +      DistributedTrace.enable(HdfsZooInstance.getInstance(), ZooReaderWriter.getInstance(), application, address);
 +    } catch (Exception ex) {
 +      log.error("creating remote sink for trace spans", ex);
 +    }
 +  }
 +  
 +  public static void init(VolumeManager fs, ServerConfiguration config, String application) throws UnknownHostException {
 +    
 +    System.setProperty("org.apache.accumulo.core.application", application);
 +    
 +    if (System.getenv("ACCUMULO_LOG_DIR") != null)
 +      System.setProperty("org.apache.accumulo.core.dir.log", System.getenv("ACCUMULO_LOG_DIR"));
 +    else
 +      System.setProperty("org.apache.accumulo.core.dir.log", System.getenv("ACCUMULO_HOME") + "/logs/");
 +    
 +    String localhost = InetAddress.getLocalHost().getHostName();
 +    System.setProperty("org.apache.accumulo.core.ip.localhost.hostname", localhost);
 +    
 +    int logPort = config.getConfiguration().getPort(Property.MONITOR_LOG4J_PORT);
 +    System.setProperty("org.apache.accumulo.core.host.log.port", Integer.toString(logPort));
 +    
 +    // Use a specific log config, if it exists
 +    String logConfig = String.format("%s/%s_logger.xml", System.getenv("ACCUMULO_CONF_DIR"), application);
 +    if (!new File(logConfig).exists()) {
 +      // otherwise, use the generic config
 +      logConfig = String.format("%s/generic_logger.xml", System.getenv("ACCUMULO_CONF_DIR"));
 +    }
 +    // Turn off messages about not being able to reach the remote logger... we protect against that.
 +    LogLog.setQuietMode(true);
 +
 +    // Read the auditing config
 +    String auditConfig = String.format("%s/auditLog.xml", System.getenv("ACCUMULO_CONF_DIR"));
 +
 +    DOMConfigurator.configureAndWatch(auditConfig, 5000);
 +
 +    // Configure logging using information advertised in zookeeper by the monitor
 +    new MonitorLog4jWatcher(config.getInstance().getInstanceID(), logConfig, 5000).start();
 +
 +    log.info(application + " starting");
 +    log.info("Instance " + config.getInstance().getInstanceID());
 +    int dataVersion = Accumulo.getAccumuloPersistentVersion(fs);
 +    log.info("Data Version " + dataVersion);
 +    Accumulo.waitForZookeeperAndHdfs(fs);
 +    
 +    Version codeVersion = new Version(Constants.VERSION);
 +    if (dataVersion != ServerConstants.DATA_VERSION && dataVersion != ServerConstants.PREV_DATA_VERSION) {
 +      throw new RuntimeException("This version of accumulo (" + codeVersion + ") is not compatible with files stored using data version " + dataVersion);
 +    }
 +    
 +    TreeMap<String,String> sortedProps = new TreeMap<String,String>();
 +    for (Entry<String,String> entry : config.getConfiguration())
 +      sortedProps.put(entry.getKey(), entry.getValue());
 +    
 +    for (Entry<String,String> entry : sortedProps.entrySet()) {
 +      String key = entry.getKey();
 +      log.info(key + " = " + (Property.isSensitive(key) ? "<hidden>" : entry.getValue()));
 +    }
 +    
 +    monitorSwappiness();
 +  }
 +  
 +  /**
 +   * 
 +   */
 +  public static void monitorSwappiness() {
 +    SimpleTimer.getInstance().schedule(new Runnable() {
 +      @Override
 +      public void run() {
 +        try {
 +          String procFile = "/proc/sys/vm/swappiness";
 +          File swappiness = new File(procFile);
 +          if (swappiness.exists() && swappiness.canRead()) {
 +            InputStream is = new FileInputStream(procFile);
 +            try {
 +              byte[] buffer = new byte[10];
 +              int bytes = is.read(buffer);
 +              String setting = new String(buffer, 0, bytes, Constants.UTF8);
 +              setting = setting.trim();
 +              if (bytes > 0 && Integer.parseInt(setting) > 10) {
 +                log.warn("System swappiness setting is greater than ten (" + setting + ") which can cause time-sensitive operations to be delayed. "
 +                    + " Accumulo is time sensitive because it needs to maintain distributed lock agreement.");
 +              }
 +            } finally {
 +              is.close();
 +            }
 +          }
 +        } catch (Throwable t) {
 +          log.error(t, t);
 +        }
 +      }
 +    }, 1000, 10 * 60 * 1000);
 +  }
 +  
 +  public static void waitForZookeeperAndHdfs(VolumeManager fs) {
 +    log.info("Attempting to talk to zookeeper");
 +    while (true) {
 +      try {
 +        ZooReaderWriter.getInstance().getChildren(Constants.ZROOT);
 +        break;
 +      } catch (InterruptedException e) {
 +        // ignored
 +      } catch (KeeperException ex) {
 +        log.info("Waiting for accumulo to be initialized");
 +        UtilWaitThread.sleep(1000);
 +      }
 +    }
 +    log.info("Zookeeper connected and initialized, attemping to talk to HDFS");
 +    long sleep = 1000;
 +    int unknownHostTries = 3;
 +    while (true) {
 +      try {
 +        if (fs.isReady())
 +          break;
 +        log.warn("Waiting for the NameNode to leave safemode");
 +      } catch (IOException ex) {
 +        log.warn("Unable to connect to HDFS", ex);
 +      } catch (IllegalArgumentException exception) {
 +        /* Unwrap the UnknownHostException so we can deal with it directly */
 +        if (exception.getCause() instanceof UnknownHostException) {
 +          if (unknownHostTries > 0) {
 +            log.warn("Unable to connect to HDFS, will retry. cause: " + exception.getCause());
 +            /* We need to make sure our sleep period is long enough to avoid getting a cached failure of the host lookup. */
 +            sleep = Math.max(sleep, (AddressUtil.getAddressCacheNegativeTtl((UnknownHostException)(exception.getCause()))+1)*1000);
 +          } else {
 +            log.error("Unable to connect to HDFS and have exceeded max number of retries.", exception);
 +            throw exception;
 +          }
 +          unknownHostTries--;
 +        } else {
 +          throw exception;
 +        }
 +      }
 +      log.info("Backing off due to failure; current sleep period is " + sleep / 1000. + " seconds");
 +      UtilWaitThread.sleep(sleep);
 +      /* Back off to give transient failures more time to clear. */
 +      sleep = Math.min(60 * 1000, sleep * 2);
 +    }
 +    log.info("Connected to HDFS");
 +  }
-   
++
++  /**
++   * Exit loudly if there are outstanding Fate operations.
++   * Since Fate serializes class names, we need to make sure there are no queued
++   * transactions from a previous version before continuing an upgrade. The status of the operations is
++   * irrelevant; those in SUCCESSFUL status cause the same problem as those just queued.
++   *
++   * Note that the Master should not allow write access to Fate until after all upgrade steps are complete.
++   *
++   * Should be called as a guard before performing any upgrade steps, after determining that an upgrade is needed.
++   *
++   * see ACCUMULO-2519
++   */
++  public static void abortIfFateTransactions() {
++    try {
++      final ReadOnlyTStore<Accumulo> fate = new ReadOnlyStore<Accumulo>(new ZooStore<Accumulo>(ZooUtil.getRoot(HdfsZooInstance.getInstance()) + Constants.ZFATE,
++          ZooReaderWriter.getRetryingInstance()));
++      if (!(fate.list().isEmpty())) {
++        throw new AccumuloException("Aborting upgrade because there are outstanding FATE transactions from a previous Accumulo version. Please see the README document for instructions on what to do under your previous version.");
++      }
++    } catch (Exception exception) {
++      log.fatal("Problem verifying Fate readiness", exception);
++      System.exit(1);
++    }
++  }
 +}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/957c9d1b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java
----------------------------------------------------------------------
diff --cc server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java
index 760d57f,0000000..374017d
mode 100644,000000..100644
--- a/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/util/MetadataTableUtil.java
@@@ -1,1018 -1,0 +1,1021 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +package org.apache.accumulo.server.util;
 +
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.Collections;
 +import java.util.Comparator;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.Iterator;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Map.Entry;
 +import java.util.Set;
 +import java.util.SortedMap;
 +import java.util.TreeMap;
 +import java.util.concurrent.TimeUnit;
 +
 +import org.apache.accumulo.core.Constants;
 +import org.apache.accumulo.core.client.AccumuloException;
 +import org.apache.accumulo.core.client.AccumuloSecurityException;
 +import org.apache.accumulo.core.client.BatchWriter;
 +import org.apache.accumulo.core.client.BatchWriterConfig;
 +import org.apache.accumulo.core.client.Connector;
 +import org.apache.accumulo.core.client.Instance;
 +import org.apache.accumulo.core.client.IsolatedScanner;
 +import org.apache.accumulo.core.client.MutationsRejectedException;
 +import org.apache.accumulo.core.client.Scanner;
 +import org.apache.accumulo.core.client.TableNotFoundException;
 +import org.apache.accumulo.core.client.impl.BatchWriterImpl;
 +import org.apache.accumulo.core.client.impl.ScannerImpl;
 +import org.apache.accumulo.core.client.impl.Writer;
 +import org.apache.accumulo.core.data.Key;
 +import org.apache.accumulo.core.data.KeyExtent;
 +import org.apache.accumulo.core.data.Mutation;
 +import org.apache.accumulo.core.data.PartialKey;
 +import org.apache.accumulo.core.data.Range;
 +import org.apache.accumulo.core.data.Value;
 +import org.apache.accumulo.core.metadata.MetadataTable;
 +import org.apache.accumulo.core.metadata.RootTable;
 +import org.apache.accumulo.core.metadata.schema.DataFileValue;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ChoppedColumnFamily;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ClonedColumnFamily;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.LogColumnFamily;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ScanFileColumnFamily;
 +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily;
 +import org.apache.accumulo.core.security.Authorizations;
 +import org.apache.accumulo.core.security.Credentials;
 +import org.apache.accumulo.core.tabletserver.log.LogEntry;
 +import org.apache.accumulo.core.tabletserver.thrift.ConstraintViolationException;
 +import org.apache.accumulo.core.util.ColumnFQ;
 +import org.apache.accumulo.core.util.FastFormat;
 +import org.apache.accumulo.core.util.Pair;
 +import org.apache.accumulo.core.util.UtilWaitThread;
 +import org.apache.accumulo.core.zookeeper.ZooUtil;
 +import org.apache.accumulo.fate.zookeeper.IZooReaderWriter;
 +import org.apache.accumulo.fate.zookeeper.ZooUtil.NodeExistsPolicy;
 +import org.apache.accumulo.fate.zookeeper.ZooUtil.NodeMissingPolicy;
 +import org.apache.accumulo.server.ServerConstants;
 +import org.apache.accumulo.server.client.HdfsZooInstance;
 +import org.apache.accumulo.server.fs.FileRef;
 +import org.apache.accumulo.server.fs.VolumeManager;
 +import org.apache.accumulo.server.fs.VolumeManagerImpl;
 +import org.apache.accumulo.server.security.SystemCredentials;
 +import org.apache.accumulo.server.zookeeper.ZooLock;
 +import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
 +import org.apache.hadoop.fs.FileStatus;
 +import org.apache.hadoop.fs.Path;
 +import org.apache.hadoop.io.Text;
 +import org.apache.log4j.Logger;
 +import org.apache.zookeeper.KeeperException;
 +
 +/**
 + * provides a reference to the metadata table for updates by tablet servers
 + */
 +public class MetadataTableUtil {
 +
 +  private static final Text EMPTY_TEXT = new Text();
 +  private static Map<Credentials,Writer> root_tables = new HashMap<Credentials,Writer>();
 +  private static Map<Credentials,Writer> metadata_tables = new HashMap<Credentials,Writer>();
 +  private static final Logger log = Logger.getLogger(MetadataTableUtil.class);
 +
 +  private MetadataTableUtil() {}
 +
 +  public synchronized static Writer getMetadataTable(Credentials credentials) {
 +    Writer metadataTable = metadata_tables.get(credentials);
 +    if (metadataTable == null) {
 +      metadataTable = new Writer(HdfsZooInstance.getInstance(), credentials, MetadataTable.ID);
 +      metadata_tables.put(credentials, metadataTable);
 +    }
 +    return metadataTable;
 +  }
 +
 +  private synchronized static Writer getRootTable(Credentials credentials) {
 +    Writer rootTable = root_tables.get(credentials);
 +    if (rootTable == null) {
 +      rootTable = new Writer(HdfsZooInstance.getInstance(), credentials, RootTable.ID);
 +      root_tables.put(credentials, rootTable);
 +    }
 +    return rootTable;
 +  }
 +
 +  private static void putLockID(ZooLock zooLock, Mutation m) {
 +    TabletsSection.ServerColumnFamily.LOCK_COLUMN.put(m, new Value(zooLock.getLockID().serialize(ZooUtil.getRoot(HdfsZooInstance.getInstance()) + "/")
 +        .getBytes(Constants.UTF8)));
 +  }
 +
 +  private static void update(Credentials credentials, Mutation m, KeyExtent extent) {
 +    update(credentials, null, m, extent);
 +  }
 +
 +  public static void update(Credentials credentials, ZooLock zooLock, Mutation m, KeyExtent extent) {
 +    Writer t = extent.isMeta() ? getRootTable(credentials) : getMetadataTable(credentials);
 +    if (zooLock != null)
 +      putLockID(zooLock, m);
 +    while (true) {
 +      try {
 +        t.update(m);
 +        return;
 +      } catch (AccumuloException e) {
 +        log.error(e, e);
 +      } catch (AccumuloSecurityException e) {
 +        log.error(e, e);
 +      } catch (ConstraintViolationException e) {
 +        log.error(e, e);
 +      } catch (TableNotFoundException e) {
 +        log.error(e, e);
 +      }
 +      UtilWaitThread.sleep(1000);
 +    }
 +
 +  }
 +
 +  public static void updateTabletFlushID(KeyExtent extent, long flushID, Credentials credentials, ZooLock zooLock) {
 +    if (!extent.isRootTablet()) {
 +      Mutation m = new Mutation(extent.getMetadataEntry());
 +      TabletsSection.ServerColumnFamily.FLUSH_COLUMN.put(m, new Value((flushID + "").getBytes(Constants.UTF8)));
 +      update(credentials, zooLock, m, extent);
 +    }
 +  }
 +
 +  public static void updateTabletCompactID(KeyExtent extent, long compactID, Credentials credentials, ZooLock zooLock) {
 +    if (!extent.isRootTablet()) {
 +      Mutation m = new Mutation(extent.getMetadataEntry());
 +      TabletsSection.ServerColumnFamily.COMPACT_COLUMN.put(m, new Value((compactID + "").getBytes(Constants.UTF8)));
 +      update(credentials, zooLock, m, extent);
 +    }
 +  }
 +
 +  public static void updateTabletDataFile(long tid, KeyExtent extent, Map<FileRef,DataFileValue> estSizes, String time, Credentials credentials, ZooLock zooLock) {
 +    Mutation m = new Mutation(extent.getMetadataEntry());
 +    byte[] tidBytes = Long.toString(tid).getBytes(Constants.UTF8);
 +
 +    for (Entry<FileRef,DataFileValue> entry : estSizes.entrySet()) {
 +      Text file = entry.getKey().meta();
 +      m.put(DataFileColumnFamily.NAME, file, new Value(entry.getValue().encode()));
 +      m.put(TabletsSection.BulkFileColumnFamily.NAME, file, new Value(tidBytes));
 +    }
 +    TabletsSection.ServerColumnFamily.TIME_COLUMN.put(m, new Value(time.getBytes(Constants.UTF8)));
 +    update(credentials, zooLock, m, extent);
 +  }
 +
 +  public static void updateTabletDir(KeyExtent extent, String newDir, Credentials creds, ZooLock lock) {
 +    Mutation m = new Mutation(extent.getMetadataEntry());
 +    TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value(newDir.getBytes(Constants.UTF8)));
 +    update(creds, lock, m, extent);
 +  }
 +
 +  public static void addTablet(KeyExtent extent, String path, Credentials credentials, char timeType, ZooLock lock) {
 +    Mutation m = extent.getPrevRowUpdateMutation();
 +
 +    TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value(path.getBytes(Constants.UTF8)));
 +    TabletsSection.ServerColumnFamily.TIME_COLUMN.put(m, new Value((timeType + "0").getBytes(Constants.UTF8)));
 +
 +    update(credentials, lock, m, extent);
 +  }
 +
 +  public static void updateTabletPrevEndRow(KeyExtent extent, Credentials credentials) {
 +    Mutation m = extent.getPrevRowUpdateMutation(); //
 +    update(credentials, m, extent);
 +  }
 +
 +  public static void updateTabletVolumes(KeyExtent extent, List<LogEntry> logsToRemove, List<LogEntry> logsToAdd, List<FileRef> filesToRemove,
 +      SortedMap<FileRef,DataFileValue> filesToAdd, String newDir, ZooLock zooLock, Credentials credentials) {
 +
 +    if (extent.isRootTablet()) {
 +      if (newDir != null)
 +        throw new IllegalArgumentException("newDir not expected for " + extent);
 +
 +      if (filesToRemove.size() != 0 || filesToAdd.size() != 0)
 +        throw new IllegalArgumentException("files not expected for " + extent);
 +
 +      // add before removing in case of process death
 +      for (LogEntry logEntry : logsToAdd)
 +        addLogEntry(credentials, logEntry, zooLock);
 +
 +      removeUnusedWALEntries(extent, logsToRemove, zooLock);
 +    } else {
 +      Mutation m = new Mutation(extent.getMetadataEntry());
 +
 +      for (LogEntry logEntry : logsToRemove)
 +        m.putDelete(logEntry.getColumnFamily(), logEntry.getColumnQualifier());
 +
 +      for (LogEntry logEntry : logsToAdd)
 +        m.put(logEntry.getColumnFamily(), logEntry.getColumnQualifier(), logEntry.getValue());
 +
 +      for (FileRef fileRef : filesToRemove)
 +        m.putDelete(DataFileColumnFamily.NAME, fileRef.meta());
 +
 +      for (Entry<FileRef,DataFileValue> entry : filesToAdd.entrySet())
 +        m.put(DataFileColumnFamily.NAME, entry.getKey().meta(), new Value(entry.getValue().encode()));
 +
 +      if (newDir != null)
 +        ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value(newDir.getBytes(Constants.UTF8)));
 +
 +      update(credentials, m, extent);
 +    }
 +  }
 +
 +  public static SortedMap<FileRef,DataFileValue> getDataFileSizes(KeyExtent extent, Credentials credentials) throws IOException {
 +    TreeMap<FileRef,DataFileValue> sizes = new TreeMap<FileRef,DataFileValue>();
 +
 +    Scanner mdScanner = new ScannerImpl(HdfsZooInstance.getInstance(), credentials, MetadataTable.ID, Authorizations.EMPTY);
 +    mdScanner.fetchColumnFamily(DataFileColumnFamily.NAME);
 +    Text row = extent.getMetadataEntry();
 +    VolumeManager fs = VolumeManagerImpl.get();
 +
 +    Key endKey = new Key(row, DataFileColumnFamily.NAME, new Text(""));
 +    endKey = endKey.followingKey(PartialKey.ROW_COLFAM);
 +
 +    mdScanner.setRange(new Range(new Key(row), endKey));
 +    for (Entry<Key,Value> entry : mdScanner) {
 +
 +      if (!entry.getKey().getRow().equals(row))
 +        break;
 +      DataFileValue dfv = new DataFileValue(entry.getValue().get());
 +      sizes.put(new FileRef(fs, entry.getKey()), dfv);
 +    }
 +
 +    return sizes;
 +  }
 +
 +  public static void rollBackSplit(Text metadataEntry, Text oldPrevEndRow, Credentials credentials, ZooLock zooLock) {
 +    KeyExtent ke = new KeyExtent(metadataEntry, oldPrevEndRow);
 +    Mutation m = ke.getPrevRowUpdateMutation();
 +    TabletsSection.TabletColumnFamily.SPLIT_RATIO_COLUMN.putDelete(m);
 +    TabletsSection.TabletColumnFamily.OLD_PREV_ROW_COLUMN.putDelete(m);
 +    update(credentials, zooLock, m, new KeyExtent(metadataEntry, (Text) null));
 +  }
 +
 +  public static void splitTablet(KeyExtent extent, Text oldPrevEndRow, double splitRatio, Credentials credentials, ZooLock zooLock) {
 +    Mutation m = extent.getPrevRowUpdateMutation(); //
 +
 +    TabletsSection.TabletColumnFamily.SPLIT_RATIO_COLUMN.put(m, new Value(Double.toString(splitRatio).getBytes(Constants.UTF8)));
 +
 +    TabletsSection.TabletColumnFamily.OLD_PREV_ROW_COLUMN.put(m, KeyExtent.encodePrevEndRow(oldPrevEndRow));
 +    ChoppedColumnFamily.CHOPPED_COLUMN.putDelete(m);
 +    update(credentials, zooLock, m, extent);
 +  }
 +
 +  public static void finishSplit(Text metadataEntry, Map<FileRef,DataFileValue> datafileSizes, List<FileRef> highDatafilesToRemove, Credentials credentials,
 +      ZooLock zooLock) {
 +    Mutation m = new Mutation(metadataEntry);
 +    TabletsSection.TabletColumnFamily.SPLIT_RATIO_COLUMN.putDelete(m);
 +    TabletsSection.TabletColumnFamily.OLD_PREV_ROW_COLUMN.putDelete(m);
 +    ChoppedColumnFamily.CHOPPED_COLUMN.putDelete(m);
 +
 +    for (Entry<FileRef,DataFileValue> entry : datafileSizes.entrySet()) {
 +      m.put(DataFileColumnFamily.NAME, entry.getKey().meta(), new Value(entry.getValue().encode()));
 +    }
 +
 +    for (FileRef pathToRemove : highDatafilesToRemove) {
 +      m.putDelete(DataFileColumnFamily.NAME, pathToRemove.meta());
 +    }
 +
 +    update(credentials, zooLock, m, new KeyExtent(metadataEntry, (Text) null));
 +  }
 +
 +  public static void finishSplit(KeyExtent extent, Map<FileRef,DataFileValue> datafileSizes, List<FileRef> highDatafilesToRemove, Credentials credentials,
 +      ZooLock zooLock) {
 +    finishSplit(extent.getMetadataEntry(), datafileSizes, highDatafilesToRemove, credentials, zooLock);
 +  }
 +
 +  public static void addDeleteEntries(KeyExtent extent, Set<FileRef> datafilesToDelete, Credentials credentials) throws IOException {
 +
 +    String tableId = extent.getTableId().toString();
 +
 +    // TODO could use batch writer,would need to handle failure and retry like update does - ACCUMULO-1294
 +    for (FileRef pathToRemove : datafilesToDelete) {
 +      update(credentials, createDeleteMutation(tableId, pathToRemove.path().toString()), extent);
 +    }
 +  }
 +
 +  public static void addDeleteEntry(String tableId, String path) throws IOException {
 +    update(SystemCredentials.get(), createDeleteMutation(tableId, path), new KeyExtent(new Text(tableId), null, null));
 +  }
 +
 +  public static Mutation createDeleteMutation(String tableId, String pathToRemove) throws IOException {
 +    Path path = VolumeManagerImpl.get().getFullPath(tableId, pathToRemove);
 +    Mutation delFlag = new Mutation(new Text(MetadataSchema.DeletesSection.getRowPrefix() + path.toString()));
 +    delFlag.put(EMPTY_TEXT, EMPTY_TEXT, new Value(new byte[] {}));
 +    return delFlag;
 +  }
 +
 +  public static void removeScanFiles(KeyExtent extent, Set<FileRef> scanFiles, Credentials credentials, ZooLock zooLock) {
 +    Mutation m = new Mutation(extent.getMetadataEntry());
 +
 +    for (FileRef pathToRemove : scanFiles)
 +      m.putDelete(ScanFileColumnFamily.NAME, pathToRemove.meta());
 +
 +    update(credentials, zooLock, m, extent);
 +  }
 +
 +  public static void splitDatafiles(Text table, Text midRow, double splitRatio, Map<FileRef,FileUtil.FileInfo> firstAndLastRows,
 +      SortedMap<FileRef,DataFileValue> datafiles, SortedMap<FileRef,DataFileValue> lowDatafileSizes, SortedMap<FileRef,DataFileValue> highDatafileSizes,
 +      List<FileRef> highDatafilesToRemove) {
 +
 +    for (Entry<FileRef,DataFileValue> entry : datafiles.entrySet()) {
 +
 +      Text firstRow = null;
 +      Text lastRow = null;
 +
 +      boolean rowsKnown = false;
 +
 +      FileUtil.FileInfo mfi = firstAndLastRows.get(entry.getKey());
 +
 +      if (mfi != null) {
 +        firstRow = mfi.getFirstRow();
 +        lastRow = mfi.getLastRow();
 +        rowsKnown = true;
 +      }
 +
 +      if (rowsKnown && firstRow.compareTo(midRow) > 0) {
 +        // only in high
 +        long highSize = entry.getValue().getSize();
 +        long highEntries = entry.getValue().getNumEntries();
 +        highDatafileSizes.put(entry.getKey(), new DataFileValue(highSize, highEntries, entry.getValue().getTime()));
 +      } else if (rowsKnown && lastRow.compareTo(midRow) <= 0) {
 +        // only in low
 +        long lowSize = entry.getValue().getSize();
 +        long lowEntries = entry.getValue().getNumEntries();
 +        lowDatafileSizes.put(entry.getKey(), new DataFileValue(lowSize, lowEntries, entry.getValue().getTime()));
 +
 +        highDatafilesToRemove.add(entry.getKey());
 +      } else {
 +        long lowSize = (long) Math.floor((entry.getValue().getSize() * splitRatio));
 +        long lowEntries = (long) Math.floor((entry.getValue().getNumEntries() * splitRatio));
 +        lowDatafileSizes.put(entry.getKey(), new DataFileValue(lowSize, lowEntries, entry.getValue().getTime()));
 +
 +        long highSize = (long) Math.ceil((entry.getValue().getSize() * (1.0 - splitRatio)));
 +        long highEntries = (long) Math.ceil((entry.getValue().getNumEntries() * (1.0 - splitRatio)));
 +        highDatafileSizes.put(entry.getKey(), new DataFileValue(highSize, highEntries, entry.getValue().getTime()));
 +      }
 +    }
 +  }
 +
 +  public static void deleteTable(String tableId, boolean insertDeletes, Credentials credentials, ZooLock lock) throws AccumuloException, IOException {
 +    Scanner ms = new ScannerImpl(HdfsZooInstance.getInstance(), credentials, MetadataTable.ID, Authorizations.EMPTY);
 +    Text tableIdText = new Text(tableId);
 +    BatchWriter bw = new BatchWriterImpl(HdfsZooInstance.getInstance(), credentials, MetadataTable.ID, new BatchWriterConfig().setMaxMemory(1000000)
 +        .setMaxLatency(120000l, TimeUnit.MILLISECONDS).setMaxWriteThreads(2));
 +
 +    // scan metadata for our table and delete everything we find
 +    Mutation m = null;
 +    ms.setRange(new KeyExtent(tableIdText, null, null).toMetadataRange());
 +
 +    // insert deletes before deleting data from metadata... this makes the code fault tolerant
 +    if (insertDeletes) {
 +
 +      ms.fetchColumnFamily(DataFileColumnFamily.NAME);
 +      TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.fetch(ms);
 +
 +      for (Entry<Key,Value> cell : ms) {
 +        Key key = cell.getKey();
 +
 +        if (key.getColumnFamily().equals(DataFileColumnFamily.NAME)) {
 +          FileRef ref = new FileRef(VolumeManagerImpl.get(), key);
 +          bw.addMutation(createDeleteMutation(tableId, ref.meta().toString()));
 +        }
 +
 +        if (TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) {
 +          bw.addMutation(createDeleteMutation(tableId, cell.getValue().toString()));
 +        }
 +      }
 +
 +      bw.flush();
 +
 +      ms.clearColumns();
 +    }
 +
 +    for (Entry<Key,Value> cell : ms) {
 +      Key key = cell.getKey();
 +
 +      if (m == null) {
 +        m = new Mutation(key.getRow());
 +        if (lock != null)
 +          putLockID(lock, m);
 +      }
 +
 +      if (key.getRow().compareTo(m.getRow(), 0, m.getRow().length) != 0) {
 +        bw.addMutation(m);
 +        m = new Mutation(key.getRow());
 +        if (lock != null)
 +          putLockID(lock, m);
 +      }
 +      m.putDelete(key.getColumnFamily(), key.getColumnQualifier());
 +    }
 +
 +    if (m != null)
 +      bw.addMutation(m);
 +
 +    bw.close();
 +  }
 +
 +  static String getZookeeperLogLocation() {
 +    return ZooUtil.getRoot(HdfsZooInstance.getInstance()) + RootTable.ZROOT_TABLET_WALOGS;
 +  }
 +
 +  public static void addLogEntry(Credentials credentials, LogEntry entry, ZooLock zooLock) {
 +    if (entry.extent.isRootTablet()) {
 +      String root = getZookeeperLogLocation();
 +      while (true) {
 +        try {
 +          IZooReaderWriter zoo = ZooReaderWriter.getInstance();
 +          if (zoo.isLockHeld(zooLock.getLockID())) {
 +            String[] parts = entry.filename.split("/");
 +            String uniqueId = parts[parts.length - 1];
 +            zoo.putPersistentData(root + "/" + uniqueId, entry.toBytes(), NodeExistsPolicy.OVERWRITE);
 +          }
 +          break;
 +        } catch (KeeperException e) {
 +          log.error(e, e);
 +        } catch (InterruptedException e) {
 +          log.error(e, e);
 +        } catch (IOException e) {
 +          log.error(e, e);
 +        }
 +        UtilWaitThread.sleep(1000);
 +      }
 +    } else {
 +      Mutation m = new Mutation(entry.getRow());
 +      m.put(entry.getColumnFamily(), entry.getColumnQualifier(), entry.getValue());
 +      update(credentials, zooLock, m, entry.extent);
 +    }
 +  }
 +
 +  public static void setRootTabletDir(String dir) throws IOException {
 +    IZooReaderWriter zoo = ZooReaderWriter.getInstance();
 +    String zpath = ZooUtil.getRoot(HdfsZooInstance.getInstance()) + RootTable.ZROOT_TABLET_PATH;
 +    try {
 +      zoo.putPersistentData(zpath, dir.getBytes(Constants.UTF8), -1, NodeExistsPolicy.OVERWRITE);
 +    } catch (KeeperException e) {
 +      throw new IOException(e);
 +    } catch (InterruptedException e) {
 +      Thread.currentThread().interrupt();
 +      throw new IOException(e);
 +    }
 +  }
 +
 +  public static String getRootTabletDir() throws IOException {
 +    IZooReaderWriter zoo = ZooReaderWriter.getInstance();
 +    String zpath = ZooUtil.getRoot(HdfsZooInstance.getInstance()) + RootTable.ZROOT_TABLET_PATH;
 +    try {
 +      return new String(zoo.getData(zpath, null), Constants.UTF8);
 +    } catch (KeeperException e) {
 +      throw new IOException(e);
 +    } catch (InterruptedException e) {
 +      Thread.currentThread().interrupt();
 +      throw new IOException(e);
 +    }
 +  }
 +
 +  public static Pair<List<LogEntry>,SortedMap<FileRef,DataFileValue>> getFileAndLogEntries(Credentials credentials, KeyExtent extent) throws KeeperException,
 +      InterruptedException, IOException {
 +    ArrayList<LogEntry> result = new ArrayList<LogEntry>();
 +    TreeMap<FileRef,DataFileValue> sizes = new TreeMap<FileRef,DataFileValue>();
 +
 +    VolumeManager fs = VolumeManagerImpl.get();
 +    if (extent.isRootTablet()) {
 +      getRootLogEntries(result);
 +      Path rootDir = new Path(getRootTabletDir());
 +      FileStatus[] files = fs.listStatus(rootDir);
 +      for (FileStatus fileStatus : files) {
 +        if (fileStatus.getPath().toString().endsWith("_tmp")) {
 +          continue;
 +        }
 +        DataFileValue dfv = new DataFileValue(0, 0);
 +        sizes.put(new FileRef(fileStatus.getPath().toString(), fileStatus.getPath()), dfv);
 +      }
 +
 +    } else {
 +      String systemTableToCheck = extent.isMeta() ? RootTable.ID : MetadataTable.ID;
 +      Scanner scanner = new ScannerImpl(HdfsZooInstance.getInstance(), credentials, systemTableToCheck, Authorizations.EMPTY);
 +      scanner.fetchColumnFamily(LogColumnFamily.NAME);
 +      scanner.fetchColumnFamily(DataFileColumnFamily.NAME);
 +      scanner.setRange(extent.toMetadataRange());
 +
 +      for (Entry<Key,Value> entry : scanner) {
 +        if (!entry.getKey().getRow().equals(extent.getMetadataEntry())) {
 +          throw new RuntimeException("Unexpected row " + entry.getKey().getRow() + " expected " + extent.getMetadataEntry());
 +        }
 +
 +        if (entry.getKey().getColumnFamily().equals(LogColumnFamily.NAME)) {
 +          result.add(LogEntry.fromKeyValue(entry.getKey(), entry.getValue()));
 +        } else if (entry.getKey().getColumnFamily().equals(DataFileColumnFamily.NAME)) {
 +          DataFileValue dfv = new DataFileValue(entry.getValue().get());
 +          sizes.put(new FileRef(fs, entry.getKey()), dfv);
 +        } else {
 +          throw new RuntimeException("Unexpected col fam " + entry.getKey().getColumnFamily());
 +        }
 +      }
 +    }
 +
 +    return new Pair<List<LogEntry>,SortedMap<FileRef,DataFileValue>>(result, sizes);
 +  }
 +
 +  public static List<LogEntry> getLogEntries(Credentials credentials, KeyExtent extent) throws IOException, KeeperException, InterruptedException {
 +    log.info("Scanning logging entries for " + extent);
 +    ArrayList<LogEntry> result = new ArrayList<LogEntry>();
 +    if (extent.equals(RootTable.EXTENT)) {
 +      log.info("Getting logs for root tablet from zookeeper");
 +      getRootLogEntries(result);
 +    } else {
 +      log.info("Scanning metadata for logs used for tablet " + extent);
 +      Scanner scanner = getTabletLogScanner(credentials, extent);
 +      Text pattern = extent.getMetadataEntry();
 +      for (Entry<Key,Value> entry : scanner) {
 +        Text row = entry.getKey().getRow();
 +        if (entry.getKey().getColumnFamily().equals(LogColumnFamily.NAME)) {
 +          if (row.equals(pattern)) {
 +            result.add(LogEntry.fromKeyValue(entry.getKey(), entry.getValue()));
 +          }
 +        }
 +      }
 +    }
 +
 +    Collections.sort(result, new Comparator<LogEntry>() {
 +      @Override
 +      public int compare(LogEntry o1, LogEntry o2) {
 +        long diff = o1.timestamp - o2.timestamp;
 +        if (diff < 0)
 +          return -1;
 +        if (diff > 0)
 +          return 1;
 +        return 0;
 +      }
 +    });
 +    log.info("Returning logs " + result + " for extent " + extent);
 +    return result;
 +  }
 +
 +  static void getRootLogEntries(ArrayList<LogEntry> result) throws KeeperException, InterruptedException, IOException {
 +    IZooReaderWriter zoo = ZooReaderWriter.getInstance();
 +    String root = getZookeeperLogLocation();
 +    // there's a little race between getting the children and fetching
 +    // the data. The log can be removed in between.
 +    while (true) {
 +      result.clear();
 +      for (String child : zoo.getChildren(root)) {
 +        LogEntry e = new LogEntry();
 +        try {
 +          e.fromBytes(zoo.getData(root + "/" + child, null));
 +          // upgrade from !0;!0<< -> +r<<
 +          e.extent = RootTable.EXTENT;
 +          result.add(e);
 +        } catch (KeeperException.NoNodeException ex) {
 +          continue;
 +        }
 +      }
 +      break;
 +    }
 +  }
 +
 +  private static Scanner getTabletLogScanner(Credentials credentials, KeyExtent extent) {
 +    String tableId = MetadataTable.ID;
 +    if (extent.isMeta())
 +      tableId = RootTable.ID;
 +    Scanner scanner = new ScannerImpl(HdfsZooInstance.getInstance(), credentials, tableId, Authorizations.EMPTY);
 +    scanner.fetchColumnFamily(LogColumnFamily.NAME);
 +    Text start = extent.getMetadataEntry();
 +    Key endKey = new Key(start, LogColumnFamily.NAME);
 +    endKey = endKey.followingKey(PartialKey.ROW_COLFAM);
 +    scanner.setRange(new Range(new Key(start), endKey));
 +    return scanner;
 +  }
 +
 +  private static class LogEntryIterator implements Iterator<LogEntry> {
 +
 +    Iterator<LogEntry> zookeeperEntries = null;
 +    Iterator<LogEntry> rootTableEntries = null;
 +    Iterator<Entry<Key,Value>> metadataEntries = null;
 +
 +    LogEntryIterator(Credentials creds) throws IOException, KeeperException, InterruptedException {
 +      zookeeperEntries = getLogEntries(creds, RootTable.EXTENT).iterator();
 +      rootTableEntries = getLogEntries(creds, new KeyExtent(new Text(MetadataTable.ID), null, null)).iterator();
 +      try {
 +        Scanner scanner = HdfsZooInstance.getInstance().getConnector(creds.getPrincipal(), creds.getToken())
 +            .createScanner(MetadataTable.NAME, Authorizations.EMPTY);
 +        log.info("Setting range to " + MetadataSchema.TabletsSection.getRange());
 +        scanner.setRange(MetadataSchema.TabletsSection.getRange());
 +        scanner.fetchColumnFamily(LogColumnFamily.NAME);
 +        metadataEntries = scanner.iterator();
 +      } catch (Exception ex) {
 +        throw new IOException(ex);
 +      }
 +    }
 +
 +    @Override
 +    public boolean hasNext() {
 +      return zookeeperEntries.hasNext() || rootTableEntries.hasNext() || metadataEntries.hasNext();
 +    }
 +
 +    @Override
 +    public LogEntry next() {
 +      if (zookeeperEntries.hasNext()) {
 +        return zookeeperEntries.next();
 +      }
 +      if (rootTableEntries.hasNext()) {
 +        return rootTableEntries.next();
 +      }
 +      Entry<Key,Value> entry = metadataEntries.next();
 +      return LogEntry.fromKeyValue(entry.getKey(), entry.getValue());
 +    }
 +
 +    @Override
 +    public void remove() {
 +      throw new UnsupportedOperationException();
 +    }
 +  }
 +
 +  public static Iterator<LogEntry> getLogEntries(Credentials creds) throws IOException, KeeperException, InterruptedException {
 +    return new LogEntryIterator(creds);
 +  }
 +
 +  public static void removeUnusedWALEntries(KeyExtent extent, List<LogEntry> logEntries, ZooLock zooLock) {
 +    if (extent.isRootTablet()) {
 +      for (LogEntry entry : logEntries) {
 +        String root = getZookeeperLogLocation();
 +        while (true) {
 +          try {
 +            IZooReaderWriter zoo = ZooReaderWriter.getInstance();
 +            if (zoo.isLockHeld(zooLock.getLockID())) {
 +              String parts[] = entry.filename.split("/");
 +              zoo.recursiveDelete(root + "/" + parts[parts.length - 1], NodeMissingPolicy.SKIP);
 +            }
 +            break;
 +          } catch (Exception e) {
 +            log.error(e, e);
 +          }
 +          UtilWaitThread.sleep(1000);
 +        }
 +      }
 +    } else {
 +      Mutation m = new Mutation(extent.getMetadataEntry());
 +      for (LogEntry entry : logEntries) {
 +        m.putDelete(LogColumnFamily.NAME, new Text(entry.getName()));
 +      }
 +      update(SystemCredentials.get(), zooLock, m, extent);
 +    }
 +  }
 +
 +  private static void getFiles(Set<String> files, Map<Key,Value> tablet, String srcTableId) {
 +    for (Entry<Key,Value> entry : tablet.entrySet()) {
 +      if (entry.getKey().getColumnFamily().equals(DataFileColumnFamily.NAME)) {
 +        String cf = entry.getKey().getColumnQualifier().toString();
 +        if (srcTableId != null && !cf.startsWith("../") && !cf.contains(":")) {
 +          cf = "../" + srcTableId + entry.getKey().getColumnQualifier();
 +        }
 +        files.add(cf);
 +      }
 +    }
 +  }
 +
 +  private static Mutation createCloneMutation(String srcTableId, String tableId, Map<Key,Value> tablet) {
 +
 +    KeyExtent ke = new KeyExtent(tablet.keySet().iterator().next().getRow(), (Text) null);
 +    Mutation m = new Mutation(KeyExtent.getMetadataEntry(new Text(tableId), ke.getEndRow()));
 +
 +    for (Entry<Key,Value> entry : tablet.entrySet()) {
 +      if (entry.getKey().getColumnFamily().equals(DataFileColumnFamily.NAME)) {
 +        String cf = entry.getKey().getColumnQualifier().toString();
 +        if (!cf.startsWith("../") && !cf.contains(":"))
 +          cf = "../" + srcTableId + entry.getKey().getColumnQualifier();
 +        m.put(entry.getKey().getColumnFamily(), new Text(cf), entry.getValue());
 +      } else if (entry.getKey().getColumnFamily().equals(TabletsSection.CurrentLocationColumnFamily.NAME)) {
 +        m.put(TabletsSection.LastLocationColumnFamily.NAME, entry.getKey().getColumnQualifier(), entry.getValue());
 +      } else if (entry.getKey().getColumnFamily().equals(TabletsSection.LastLocationColumnFamily.NAME)) {
 +        // skip
 +      } else {
 +        m.put(entry.getKey().getColumnFamily(), entry.getKey().getColumnQualifier(), entry.getValue());
 +      }
 +    }
 +    return m;
 +  }
 +
 +  private static Scanner createCloneScanner(String tableId, Connector conn) throws TableNotFoundException {
 +    String tableName = MetadataTable.NAME;
 +    if (tableId.equals(MetadataTable.ID))
 +      tableName = RootTable.NAME;
 +    Scanner mscanner = new IsolatedScanner(conn.createScanner(tableName, Authorizations.EMPTY));
 +    mscanner.setRange(new KeyExtent(new Text(tableId), null, null).toMetadataRange());
 +    mscanner.fetchColumnFamily(DataFileColumnFamily.NAME);
 +    mscanner.fetchColumnFamily(TabletsSection.CurrentLocationColumnFamily.NAME);
 +    mscanner.fetchColumnFamily(TabletsSection.LastLocationColumnFamily.NAME);
 +    mscanner.fetchColumnFamily(ClonedColumnFamily.NAME);
 +    TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.fetch(mscanner);
 +    TabletsSection.ServerColumnFamily.TIME_COLUMN.fetch(mscanner);
 +    return mscanner;
 +  }
 +
 +  static void initializeClone(String srcTableId, String tableId, Connector conn, BatchWriter bw) throws TableNotFoundException, MutationsRejectedException {
 +    TabletIterator ti;
 +    if (srcTableId.equals(MetadataTable.ID))
 +      ti = new TabletIterator(createCloneScanner(srcTableId, conn), new Range(), true, true);
 +    else
 +      ti = new TabletIterator(createCloneScanner(srcTableId, conn), new KeyExtent(new Text(srcTableId), null, null).toMetadataRange(), true, true);
 +
 +    if (!ti.hasNext())
 +      throw new RuntimeException(" table deleted during clone?  srcTableId = " + srcTableId);
 +
 +    while (ti.hasNext())
 +      bw.addMutation(createCloneMutation(srcTableId, tableId, ti.next()));
 +
 +    bw.flush();
 +  }
 +
 +  private static int compareEndRows(Text endRow1, Text endRow2) {
 +    return new KeyExtent(new Text("0"), endRow1, null).compareTo(new KeyExtent(new Text("0"), endRow2, null));
 +  }
 +
 +  static int checkClone(String srcTableId, String tableId, Connector conn, BatchWriter bw) throws TableNotFoundException, MutationsRejectedException {
 +    TabletIterator srcIter = new TabletIterator(createCloneScanner(srcTableId, conn), new KeyExtent(new Text(srcTableId), null, null).toMetadataRange(), true,
 +        true);
 +    TabletIterator cloneIter = new TabletIterator(createCloneScanner(tableId, conn), new KeyExtent(new Text(tableId), null, null).toMetadataRange(), true, true);
 +
 +    if (!cloneIter.hasNext() || !srcIter.hasNext())
 +      throw new RuntimeException(" table deleted during clone?  srcTableId = " + srcTableId + " tableId=" + tableId);
 +
 +    int rewrites = 0;
 +
 +    while (cloneIter.hasNext()) {
 +      Map<Key,Value> cloneTablet = cloneIter.next();
 +      Text cloneEndRow = new KeyExtent(cloneTablet.keySet().iterator().next().getRow(), (Text) null).getEndRow();
 +      HashSet<String> cloneFiles = new HashSet<String>();
 +
 +      boolean cloneSuccessful = false;
 +      for (Entry<Key,Value> entry : cloneTablet.entrySet()) {
 +        if (entry.getKey().getColumnFamily().equals(ClonedColumnFamily.NAME)) {
 +          cloneSuccessful = true;
 +          break;
 +        }
 +      }
 +
 +      if (!cloneSuccessful)
 +        getFiles(cloneFiles, cloneTablet, null);
 +
 +      List<Map<Key,Value>> srcTablets = new ArrayList<Map<Key,Value>>();
 +      Map<Key,Value> srcTablet = srcIter.next();
 +      srcTablets.add(srcTablet);
 +
 +      Text srcEndRow = new KeyExtent(srcTablet.keySet().iterator().next().getRow(), (Text) null).getEndRow();
 +
 +      int cmp = compareEndRows(cloneEndRow, srcEndRow);
 +      if (cmp < 0)
 +        throw new TabletIterator.TabletDeletedException("Tablets deleted from src during clone : " + cloneEndRow + " " + srcEndRow);
 +
 +      HashSet<String> srcFiles = new HashSet<String>();
 +      if (!cloneSuccessful)
 +        getFiles(srcFiles, srcTablet, srcTableId);
 +
 +      while (cmp > 0) {
 +        srcTablet = srcIter.next();
 +        srcTablets.add(srcTablet);
 +        srcEndRow = new KeyExtent(srcTablet.keySet().iterator().next().getRow(), (Text) null).getEndRow();
 +        cmp = compareEndRows(cloneEndRow, srcEndRow);
 +        if (cmp < 0)
 +          throw new TabletIterator.TabletDeletedException("Tablets deleted from src during clone : " + cloneEndRow + " " + srcEndRow);
 +
 +        if (!cloneSuccessful)
 +          getFiles(srcFiles, srcTablet, srcTableId);
 +      }
 +
 +      if (cloneSuccessful)
 +        continue;
 +
 +      if (!srcFiles.containsAll(cloneFiles)) {
 +        // delete existing cloned tablet entry
 +        Mutation m = new Mutation(cloneTablet.keySet().iterator().next().getRow());
 +
 +        for (Entry<Key,Value> entry : cloneTablet.entrySet()) {
 +          Key k = entry.getKey();
 +          m.putDelete(k.getColumnFamily(), k.getColumnQualifier(), k.getTimestamp());
 +        }
 +
 +        bw.addMutation(m);
 +
 +        for (Map<Key,Value> st : srcTablets)
 +          bw.addMutation(createCloneMutation(srcTableId, tableId, st));
 +
 +        rewrites++;
 +      } else {
 +        // write out marker that this tablet was successfully cloned
 +        Mutation m = new Mutation(cloneTablet.keySet().iterator().next().getRow());
 +        m.put(ClonedColumnFamily.NAME, new Text(""), new Value("OK".getBytes(Constants.UTF8)));
 +        bw.addMutation(m);
 +      }
 +    }
 +
 +    bw.flush();
 +    return rewrites;
 +  }
 +
 +  public static void cloneTable(Instance instance, String srcTableId, String tableId, VolumeManager volumeManager) throws Exception {
 +
 +    Connector conn = instance.getConnector(SystemCredentials.get().getPrincipal(), SystemCredentials.get().getToken());
 +    BatchWriter bw = conn.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig());
 +
 +    while (true) {
 +
 +      try {
 +        initializeClone(srcTableId, tableId, conn, bw);
 +
 +        // the following loop looks changes in the file that occurred during the copy.. if files were dereferenced then they could have been GCed
 +
 +        while (true) {
 +          int rewrites = checkClone(srcTableId, tableId, conn, bw);
 +
 +          if (rewrites == 0)
 +            break;
 +        }
 +
 +        bw.flush();
 +        break;
 +
 +      } catch (TabletIterator.TabletDeletedException tde) {
 +        // tablets were merged in the src table
 +        bw.flush();
 +
 +        // delete what we have cloned and try again
 +        deleteTable(tableId, false, SystemCredentials.get(), null);
 +
 +        log.debug("Tablets merged in table " + srcTableId + " while attempting to clone, trying again");
 +
 +        UtilWaitThread.sleep(100);
 +      }
 +    }
 +
 +    // delete the clone markers and create directory entries
 +    Scanner mscanner = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
 +    mscanner.setRange(new KeyExtent(new Text(tableId), null, null).toMetadataRange());
 +    mscanner.fetchColumnFamily(ClonedColumnFamily.NAME);
 +
 +    int dirCount = 0;
 +
 +    for (Entry<Key,Value> entry : mscanner) {
 +      Key k = entry.getKey();
 +      Mutation m = new Mutation(k.getRow());
 +      m.putDelete(k.getColumnFamily(), k.getColumnQualifier());
 +      String dir = volumeManager.choose(ServerConstants.getTablesDirs()) + "/" + tableId
 +          + new String(FastFormat.toZeroPaddedString(dirCount++, 8, 16, "/c-".getBytes(Constants.UTF8)));
 +      TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value(dir.getBytes(Constants.UTF8)));
 +      bw.addMutation(m);
 +    }
 +
 +    bw.close();
 +
 +  }
 +
 +  public static void chopped(KeyExtent extent, ZooLock zooLock) {
 +    Mutation m = new Mutation(extent.getMetadataEntry());
 +    ChoppedColumnFamily.CHOPPED_COLUMN.put(m, new Value("chopped".getBytes(Constants.UTF8)));
 +    update(SystemCredentials.get(), zooLock, m, extent);
 +  }
 +
 +  public static void removeBulkLoadEntries(Connector conn, String tableId, long tid) throws Exception {
 +    Scanner mscanner = new IsolatedScanner(conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY));
 +    mscanner.setRange(new KeyExtent(new Text(tableId), null, null).toMetadataRange());
 +    mscanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME);
 +    BatchWriter bw = conn.createBatchWriter(MetadataTable.NAME, new BatchWriterConfig());
 +    for (Entry<Key,Value> entry : mscanner) {
 +      log.debug("Looking at entry " + entry + " with tid " + tid);
 +      if (Long.parseLong(entry.getValue().toString()) == tid) {
 +        log.debug("deleting entry " + entry);
 +        Mutation m = new Mutation(entry.getKey().getRow());
 +        m.putDelete(entry.getKey().getColumnFamily(), entry.getKey().getColumnQualifier());
 +        bw.addMutation(m);
 +      }
 +    }
 +    bw.close();
 +  }
 +
 +  public static List<FileRef> getBulkFilesLoaded(Connector conn, KeyExtent extent, long tid) throws IOException {
 +    List<FileRef> result = new ArrayList<FileRef>();
 +    try {
 +      VolumeManager fs = VolumeManagerImpl.get();
 +      Scanner mscanner = new IsolatedScanner(conn.createScanner(extent.isMeta() ? RootTable.NAME : MetadataTable.NAME, Authorizations.EMPTY));
 +      mscanner.setRange(extent.toMetadataRange());
 +      mscanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME);
 +      for (Entry<Key,Value> entry : mscanner) {
 +        if (Long.parseLong(entry.getValue().toString()) == tid) {
 +          result.add(new FileRef(fs, entry.getKey()));
 +        }
 +      }
 +      return result;
 +    } catch (TableNotFoundException ex) {
 +      // unlikely
 +      throw new RuntimeException("Onos! teh metadata table has vanished!!");
 +    }
 +  }
 +
 +  public static Map<FileRef,Long> getBulkFilesLoaded(Credentials credentials, KeyExtent extent) throws IOException {
 +    Text metadataRow = extent.getMetadataEntry();
 +    Map<FileRef,Long> ret = new HashMap<FileRef,Long>();
 +
 +    VolumeManager fs = VolumeManagerImpl.get();
 +    Scanner scanner = new ScannerImpl(HdfsZooInstance.getInstance(), credentials, extent.isMeta() ? RootTable.ID : MetadataTable.ID, Authorizations.EMPTY);
 +    scanner.setRange(new Range(metadataRow));
 +    scanner.fetchColumnFamily(TabletsSection.BulkFileColumnFamily.NAME);
 +    for (Entry<Key,Value> entry : scanner) {
 +      Long tid = Long.parseLong(entry.getValue().toString());
 +      ret.put(new FileRef(fs, entry.getKey()), tid);
 +    }
 +    return ret;
 +  }
 +
 +  public static void addBulkLoadInProgressFlag(String path) {
 +
 +    Mutation m = new Mutation(MetadataSchema.BlipSection.getRowPrefix() + path);
 +    m.put(EMPTY_TEXT, EMPTY_TEXT, new Value(new byte[] {}));
 +
 +    // new KeyExtent is only added to force update to write to the metadata table, not the root table
 +    // because bulk loads aren't supported to the metadata table
 +    update(SystemCredentials.get(), m, new KeyExtent(new Text("anythingNotMetadata"), null, null));
 +  }
 +
 +  public static void removeBulkLoadInProgressFlag(String path) {
 +
 +    Mutation m = new Mutation(MetadataSchema.BlipSection.getRowPrefix() + path);
 +    m.putDelete(EMPTY_TEXT, EMPTY_TEXT);
 +
 +    // new KeyExtent is only added to force update to write to the metadata table, not the root table
 +    // because bulk loads aren't supported to the metadata table
 +    update(SystemCredentials.get(), m, new KeyExtent(new Text("anythingNotMetadata"), null, null));
 +  }
 +
++  /**
++   * During an upgrade we need to move deletion requests for files under the !METADATA table to the root tablet.
++   */
 +  public static void moveMetaDeleteMarkers(Instance instance, Credentials creds) {
 +    // move old delete markers to new location, to standardize table schema between all metadata tables
 +    byte[] EMPTY_BYTES = new byte[0];
 +    Scanner scanner = new ScannerImpl(instance, creds, RootTable.ID, Authorizations.EMPTY);
 +    String oldDeletesPrefix = "!!~del";
 +    Range oldDeletesRange = new Range(oldDeletesPrefix, true, "!!~dem", false);
 +    scanner.setRange(oldDeletesRange);
 +    for (Entry<Key,Value> entry : scanner) {
 +      String row = entry.getKey().getRow().toString();
 +      if (row.startsWith(oldDeletesPrefix)) {
 +        String filename = row.substring(oldDeletesPrefix.length());
 +        // add the new entry first
 +        log.info("Moving " + filename + " marker in " + RootTable.NAME);
 +        Mutation m = new Mutation(MetadataSchema.DeletesSection.getRowPrefix() + filename);
 +        m.put(EMPTY_BYTES, EMPTY_BYTES, EMPTY_BYTES);
 +        update(creds, m, RootTable.EXTENT);
 +        // remove the old entry
 +        m = new Mutation(entry.getKey().getRow());
 +        m.putDelete(EMPTY_BYTES, EMPTY_BYTES);
 +        update(creds, m, RootTable.OLD_EXTENT);
 +      } else {
 +        break;
 +      }
 +    }
 +
 +  }
 +
 +  public static SortedMap<Text,SortedMap<ColumnFQ,Value>> getTabletEntries(SortedMap<Key,Value> tabletKeyValues, List<ColumnFQ> columns) {
 +    TreeMap<Text,SortedMap<ColumnFQ,Value>> tabletEntries = new TreeMap<Text,SortedMap<ColumnFQ,Value>>();
 +
 +    HashSet<ColumnFQ> colSet = null;
 +    if (columns != null) {
 +      colSet = new HashSet<ColumnFQ>(columns);
 +    }
 +
 +    for (Entry<Key,Value> entry : tabletKeyValues.entrySet()) {
 +
 +      if (columns != null && !colSet.contains(new ColumnFQ(entry.getKey()))) {
 +        continue;
 +      }
 +
 +      Text row = entry.getKey().getRow();
 +
 +      SortedMap<ColumnFQ,Value> colVals = tabletEntries.get(row);
 +      if (colVals == null) {
 +        colVals = new TreeMap<ColumnFQ,Value>();
 +        tabletEntries.put(row, colVals);
 +      }
 +
 +      colVals.put(new ColumnFQ(entry.getKey()), entry.getValue());
 +    }
 +
 +    return tabletEntries;
 +  }
 +}


Mime
View raw message