Return-Path: X-Original-To: apmail-accumulo-commits-archive@www.apache.org Delivered-To: apmail-accumulo-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B6C12F6EB for ; Tue, 9 Apr 2013 20:28:36 +0000 (UTC) Received: (qmail 53833 invoked by uid 500); 9 Apr 2013 20:28:36 -0000 Delivered-To: apmail-accumulo-commits-archive@accumulo.apache.org Received: (qmail 53809 invoked by uid 500); 9 Apr 2013 20:28:36 -0000 Mailing-List: contact commits-help@accumulo.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@accumulo.apache.org Delivered-To: mailing list commits@accumulo.apache.org Received: (qmail 53800 invoked by uid 99); 9 Apr 2013 20:28:36 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 09 Apr 2013 20:28:36 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 09 Apr 2013 20:28:33 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 871E223888CD; Tue, 9 Apr 2013 20:28:13 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1466217 - in /accumulo/trunk: ./ assemble/ core/ core/src/main/java/org/apache/accumulo/core/util/ examples/ fate/src/main/java/org/apache/accumulo/fate/ fate/src/main/java/org/apache/accumulo/fate/zookeeper/ server/ server/src/main/java/o... Date: Tue, 09 Apr 2013 20:28:13 -0000 To: commits@accumulo.apache.org From: kturner@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130409202813.871E223888CD@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: kturner Date: Tue Apr 9 20:28:12 2013 New Revision: 1466217 URL: http://svn.apache.org/r1466217 Log: ACCUMULO-1243 Made tablet loading code only load one tablet when recovering a split. Made code more strict, it will only load an exact tablet. Made load code roll back splits that have started, but did not create a new tablet. Added some more test. Added: accumulo/trunk/server/src/test/java/org/apache/accumulo/server/tabletserver/CheckTabletMetadataTest.java - copied unchanged from r1466211, accumulo/branches/1.5/server/src/test/java/org/apache/accumulo/server/tabletserver/CheckTabletMetadataTest.java accumulo/trunk/test/src/test/java/org/apache/accumulo/test/TestAccumuloSplitRecovery.java - copied unchanged from r1466211, accumulo/branches/1.5/test/src/test/java/org/apache/accumulo/test/TestAccumuloSplitRecovery.java Removed: accumulo/trunk/test/src/test/java/org/apache/accumulo/test/TestAccumulo1235.java Modified: accumulo/trunk/ (props changed) accumulo/trunk/assemble/ (props changed) accumulo/trunk/core/ (props changed) accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/MetadataTable.java accumulo/trunk/examples/ (props changed) accumulo/trunk/fate/src/main/java/org/apache/accumulo/fate/ZooStore.java (props changed) accumulo/trunk/fate/src/main/java/org/apache/accumulo/fate/zookeeper/ZooSession.java (props changed) accumulo/trunk/server/ (props changed) accumulo/trunk/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java accumulo/trunk/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java accumulo/trunk/src/ (props changed) accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/SplitRecoveryTest.java accumulo/trunk/test/src/main/java/org/apache/accumulo/test/performance/scan/CollectTabletStats.java Propchange: accumulo/trunk/ ------------------------------------------------------------------------------ Merged /accumulo/branches/1.5:r1466211 Propchange: accumulo/trunk/assemble/ ------------------------------------------------------------------------------ Merged /accumulo/branches/1.5/assemble:r1466211 Propchange: accumulo/trunk/core/ ------------------------------------------------------------------------------ Merged /accumulo/branches/1.5/core:r1466211 Modified: accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/MetadataTable.java URL: http://svn.apache.org/viewvc/accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/MetadataTable.java?rev=1466217&r1=1466216&r2=1466217&view=diff ============================================================================== --- accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/MetadataTable.java (original) +++ accumulo/trunk/core/src/main/java/org/apache/accumulo/core/util/MetadataTable.java Tue Apr 9 20:28:12 2013 @@ -32,7 +32,6 @@ import org.apache.accumulo.core.client.A import org.apache.accumulo.core.client.Instance; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.client.impl.ScannerImpl; import org.apache.accumulo.core.client.impl.Tables; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.KeyExtent; @@ -173,13 +172,7 @@ public class MetadataTable { return new Pair,List>(results, locationless); } - - public static SortedMap> getTabletEntries(Instance instance, KeyExtent ke, List columns, TCredentials credentials) { - TreeMap tkv = new TreeMap(); - getTabletAndPrevTabletKeyValues(instance, tkv, ke, columns, credentials); - return getTabletEntries(tkv, columns); - } - + public static SortedMap> getTabletEntries(SortedMap tabletKeyValues, List columns) { TreeMap> tabletEntries = new TreeMap>(); @@ -207,40 +200,7 @@ public class MetadataTable { return tabletEntries; } - - public static void getTabletAndPrevTabletKeyValues(Instance instance, SortedMap tkv, KeyExtent ke, List columns, TCredentials credentials) { - Text startRow; - Text endRow = ke.getMetadataEntry(); - if (ke.getPrevEndRow() == null) { - startRow = new Text(KeyExtent.getMetadataEntry(ke.getTableId(), new Text())); - } else { - startRow = new Text(KeyExtent.getMetadataEntry(ke.getTableId(), ke.getPrevEndRow())); - } - - Scanner scanner = new ScannerImpl(instance, credentials, Constants.METADATA_TABLE_ID, Constants.NO_AUTHS); - - if (columns != null) { - for (ColumnFQ column : columns) - column.fetch(scanner); - } - - scanner.setRange(new Range(new Key(startRow), true, new Key(endRow).followingKey(PartialKey.ROW), false)); - - tkv.clear(); - boolean successful = false; - try { - for (Entry entry : scanner) { - tkv.put(entry.getKey(), entry.getValue()); - } - successful = true; - } finally { - if (!successful) { - tkv.clear(); - } - } - } - public static void getEntries(Instance instance, TCredentials credentials, String table, boolean isTid, Map locations, SortedSet tablets) throws AccumuloException, AccumuloSecurityException, TableNotFoundException { String tableId = isTid ? table : Tables.getNameToIdMap(instance).get(table); Propchange: accumulo/trunk/examples/ ------------------------------------------------------------------------------ Merged /accumulo/branches/1.5/examples:r1466211 Propchange: accumulo/trunk/fate/src/main/java/org/apache/accumulo/fate/ZooStore.java ------------------------------------------------------------------------------ Merged /accumulo/branches/1.5/fate/src/main/java/org/apache/accumulo/fate/ZooStore.java:r1466211 Propchange: accumulo/trunk/fate/src/main/java/org/apache/accumulo/fate/zookeeper/ZooSession.java ------------------------------------------------------------------------------ Merged /accumulo/branches/1.5/fate/src/main/java/org/apache/accumulo/fate/zookeeper/ZooSession.java:r1466211 Propchange: accumulo/trunk/server/ ------------------------------------------------------------------------------ Merged /accumulo/branches/1.5/server:r1466211 Modified: accumulo/trunk/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java URL: http://svn.apache.org/viewvc/accumulo/trunk/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java?rev=1466217&r1=1466216&r2=1466217&view=diff ============================================================================== --- accumulo/trunk/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java (original) +++ accumulo/trunk/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java Tue Apr 9 20:28:12 2013 @@ -68,6 +68,7 @@ import org.apache.accumulo.core.Constant import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.client.AccumuloSecurityException; import org.apache.accumulo.core.client.Instance; +import org.apache.accumulo.core.client.impl.ScannerImpl; import org.apache.accumulo.core.client.impl.TabletType; import org.apache.accumulo.core.client.impl.Translator; import org.apache.accumulo.core.client.impl.thrift.SecurityErrorCode; @@ -2442,10 +2443,10 @@ public class TabletServer extends Abstra log.debug("Loading extent: " + extent); // check Metadata table before accepting assignment - SortedMap tabletsInRange = null; + Text locationToOpen = null; SortedMap tabletsKeyValues = new TreeMap(); try { - tabletsInRange = verifyTabletInformation(extent, TabletServer.this.getTabletSession(), tabletsKeyValues, getClientAddressString(), getLock()); + locationToOpen = verifyTabletInformation(extent, TabletServer.this.getTabletSession(), tabletsKeyValues, getClientAddressString(), getLock()); } catch (Exception e) { synchronized (openingTablets) { openingTablets.remove(extent); @@ -2454,131 +2455,92 @@ public class TabletServer extends Abstra log.warn("Failed to verify tablet " + extent, e); throw new RuntimeException(e); } - - if (tabletsInRange == null) { - log.info("Reporting tablet " + extent + " assignment failure: unable to verify Tablet Information"); + + if (locationToOpen == null) { + log.debug("Reporting tablet " + extent + " assignment failure: unable to verify Tablet Information"); enqueueMasterMessage(new TabletStatusMessage(TabletLoadState.LOAD_FAILURE, extent)); synchronized (openingTablets) { openingTablets.remove(extent); openingTablets.notifyAll(); } + return; } - // If extent given is not the one to be opened, update - if (tabletsInRange.size() != 1 || !tabletsInRange.containsKey(extent)) { - synchronized (openingTablets) { - openingTablets.remove(extent); - openingTablets.notifyAll(); - for (KeyExtent e : tabletsInRange.keySet()) - openingTablets.add(e); + + Tablet tablet = null; + boolean successful = false; + + try { + TabletResourceManager trm = resourceManager.createTabletResourceManager(); + + // this opens the tablet file and fills in the endKey in the + // extent + tablet = new Tablet(TabletServer.this, locationToOpen, extent, trm, tabletsKeyValues); + /* + * If a minor compaction starts after a tablet opens, this indicates a log recovery occurred. This recovered data must be minor compacted. + * + * There are three reasons to wait for this minor compaction to finish before placing the tablet in online tablets. + * + * 1) The log recovery code does not handle data written to the tablet on multiple tablet servers. 2) The log recovery code does not block if memory is + * full. Therefore recovering lots of tablets that use a lot of memory could run out of memory. 3) The minor compaction finish event did not make it to + * the logs (the file will be in !METADATA, preventing replay of compacted data)... but do not want a majc to wipe the file out from !METADATA and then + * have another process failure... this could cause duplicate data to replay + */ + if (tablet.getNumEntriesInMemory() > 0 && !tablet.minorCompactNow(MinorCompactionReason.SYSTEM)) { + throw new RuntimeException("Minor compaction after recovery fails for " + extent); } - } else { - // remove any metadata entries for the previous tablet - Iterator iter = tabletsKeyValues.keySet().iterator(); - Text row = extent.getMetadataEntry(); - while (iter.hasNext()) { - Key key = iter.next(); - if (!key.getRow().equals(row)) { - iter.remove(); + + Assignment assignment = new Assignment(extent, getTabletSession()); + TabletStateStore.setLocation(assignment); + + synchronized (openingTablets) { + synchronized (onlineTablets) { + openingTablets.remove(extent); + onlineTablets.put(extent, tablet); + openingTablets.notifyAll(); + recentlyUnloadedCache.remove(tablet); } } + tablet = null; // release this reference + successful = true; + } catch (Throwable e) { + log.warn("exception trying to assign tablet " + extent + " " + locationToOpen, e); + if (e.getMessage() != null) + log.warn(e.getMessage()); + String table = extent.getTableId().toString(); + ProblemReports.getInstance().report(new ProblemReport(table, TABLET_LOAD, extent.getUUID().toString(), getClientAddressString(), e)); } - if (tabletsInRange.size() > 1) { - log.debug("Master didn't know " + extent + " was split, letting it know about " + tabletsInRange.keySet()); - enqueueMasterMessage(new SplitReportMessage(extent, tabletsInRange)); - } - - // create the tablet object - for (Entry entry : tabletsInRange.entrySet()) { - Tablet tablet = null; - boolean successful = false; - - final KeyExtent extentToOpen = entry.getKey(); - Text locationToOpen = entry.getValue(); - - if (onlineTablets.containsKey(extentToOpen)) { - // know this was from fixing a split, because initial check - // would have caught original extent - log.warn("Something is screwy! Already serving tablet " + extentToOpen + " derived from fixing split. Original extent = " + extent); + + if (!successful) { + synchronized (unopenedTablets) { synchronized (openingTablets) { - openingTablets.remove(extentToOpen); + openingTablets.remove(extent); + unopenedTablets.add(extent); openingTablets.notifyAll(); } - continue; } - - try { - TabletResourceManager trm = resourceManager.createTabletResourceManager(); - - // this opens the tablet file and fills in the endKey in the - // extent - tablet = new Tablet(TabletServer.this, locationToOpen, extentToOpen, trm, tabletsKeyValues); - /* - * If a minor compaction starts after a tablet opens, this indicates a log recovery occurred. This recovered data must be minor compacted. - * - * There are three reasons to wait for this minor compaction to finish before placing the tablet in online tablets. - * - * 1) The log recovery code does not handle data written to the tablet on multiple tablet servers. 2) The log recovery code does not block if memory - * is full. Therefore recovering lots of tablets that use a lot of memory could run out of memory. 3) The minor compaction finish event did not make - * it to the logs (the file will be in !METADATA, preventing replay of compacted data)... but do not want a majc to wipe the file out from !METADATA - * and then have another process failure... this could cause duplicate data to replay - */ - if (tablet.getNumEntriesInMemory() > 0 && !tablet.minorCompactNow(MinorCompactionReason.SYSTEM)) { - throw new RuntimeException("Minor compaction after recovery fails for " + extentToOpen); - } - - Assignment assignment = new Assignment(extentToOpen, getTabletSession()); - TabletStateStore.setLocation(assignment); - - synchronized (openingTablets) { - synchronized (onlineTablets) { - openingTablets.remove(extentToOpen); - onlineTablets.put(extentToOpen, tablet); - openingTablets.notifyAll(); - recentlyUnloadedCache.remove(tablet); - } - } - tablet = null; // release this reference - successful = true; - } catch (Throwable e) { - log.warn("exception trying to assign tablet " + extentToOpen + " " + locationToOpen, e); - if (e.getMessage() != null) - log.warn(e.getMessage()); - String table = extent.getTableId().toString(); - ProblemReports.getInstance().report(new ProblemReport(table, TABLET_LOAD, extentToOpen.getUUID().toString(), getClientAddressString(), e)); - } - - if (!successful) { - synchronized (unopenedTablets) { - synchronized (openingTablets) { - openingTablets.remove(extentToOpen); - unopenedTablets.add(extentToOpen); - openingTablets.notifyAll(); - } - } - log.warn("failed to open tablet " + extentToOpen + " reporting failure to master"); - enqueueMasterMessage(new TabletStatusMessage(TabletLoadState.LOAD_FAILURE, extentToOpen)); - long reschedule = Math.min((1l << Math.min(32, retryAttempt)) * 1000, 10 * 60 * 1000l); - log.warn(String.format("rescheduling tablet load in %.2f seconds", reschedule / 1000.)); - SimpleTimer.getInstance().schedule(new TimerTask() { - @Override - public void run() { - log.info("adding tablet " + extent + " back to the assignment pool (retry " + retryAttempt + ")"); - AssignmentHandler handler = new AssignmentHandler(extentToOpen, retryAttempt + 1); - if (extent.isMeta()) { - if (extent.isRootTablet()) { - new Daemon(new LoggingRunnable(log, handler), "Root tablet assignment retry").start(); - } else { - resourceManager.addMetaDataAssignment(handler); - } + log.warn("failed to open tablet " + extent + " reporting failure to master"); + enqueueMasterMessage(new TabletStatusMessage(TabletLoadState.LOAD_FAILURE, extent)); + long reschedule = Math.min((1l << Math.min(32, retryAttempt)) * 1000, 10 * 60 * 1000l); + log.warn(String.format("rescheduling tablet load in %.2f seconds", reschedule / 1000.)); + SimpleTimer.getInstance().schedule(new TimerTask() { + @Override + public void run() { + log.info("adding tablet " + extent + " back to the assignment pool (retry " + retryAttempt + ")"); + AssignmentHandler handler = new AssignmentHandler(extent, retryAttempt + 1); + if (extent.isMeta()) { + if (extent.isRootTablet()) { + new Daemon(new LoggingRunnable(log, handler), "Root tablet assignment retry").start(); } else { - resourceManager.addAssignment(handler); + resourceManager.addMetaDataAssignment(handler); } + } else { + resourceManager.addAssignment(handler); } - }, reschedule); - } else { - enqueueMasterMessage(new TabletStatusMessage(TabletLoadState.LOADED, extentToOpen)); - } + } + }, reschedule); + } else { + enqueueMasterMessage(new TabletStatusMessage(TabletLoadState.LOADED, extent)); } } } @@ -2890,148 +2852,141 @@ public class TabletServer extends Abstra private long totalMinorCompactions; - public static SortedMap verifyTabletInformation(KeyExtent extent, TServerInstance instance, SortedMap tabletsKeyValues, - String clientAddress, ZooLock lock) throws AccumuloSecurityException, DistributedStoreException { - for (int tries = 0; tries < 3; tries++) { - try { - log.debug("verifying extent " + extent); - if (extent.isRootTablet()) { - ZooTabletStateStore store = new ZooTabletStateStore(); - if (!store.iterator().hasNext()) { - log.warn("Illegal state: location is not set in zookeeper"); - return null; - } - TabletLocationState next = store.iterator().next(); - if (!instance.equals(next.future)) { - log.warn("Future location is not to this server for the root tablet"); - return null; - } - TreeMap set = new TreeMap(); - set.put(extent, new Text(Constants.ZROOT_TABLET)); - return set; - } - - List columnsToFetch = Arrays.asList(new ColumnFQ[] {Constants.METADATA_DIRECTORY_COLUMN, Constants.METADATA_PREV_ROW_COLUMN, - Constants.METADATA_SPLIT_RATIO_COLUMN, Constants.METADATA_OLD_PREV_ROW_COLUMN, Constants.METADATA_TIME_COLUMN}); - - if (tabletsKeyValues == null) { - tabletsKeyValues = new TreeMap(); - } - MetadataTable.getTabletAndPrevTabletKeyValues(tabletsKeyValues, extent, null, SecurityConstants.getSystemCredentials()); - - SortedMap> tabletEntries; - tabletEntries = MetadataTable.getTabletEntries(tabletsKeyValues, columnsToFetch); - - if (tabletEntries.size() == 0) { - log.warn("Failed to find any metadata entries for " + extent); - return null; - } - - // ensure last key in map is same as extent that was passed in - if (!tabletEntries.lastKey().equals(extent.getMetadataEntry())) { - log.warn("Failed to find metadata entry for " + extent + " found " + tabletEntries.lastKey()); - return null; - } - - TServerInstance future = null; - Text metadataEntry = extent.getMetadataEntry(); - for (Entry entry : tabletsKeyValues.entrySet()) { - Key key = entry.getKey(); - if (!metadataEntry.equals(key.getRow())) - continue; - Text cf = key.getColumnFamily(); - if (cf.equals(Constants.METADATA_FUTURE_LOCATION_COLUMN_FAMILY)) { - future = new TServerInstance(entry.getValue(), key.getColumnQualifier()); - } else if (cf.equals(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY)) { - log.error("Tablet seems to be already assigned to " + new TServerInstance(entry.getValue(), key.getColumnQualifier())); - return null; - } - } - if (future == null) { - log.warn("The master has not assigned " + extent + " to " + instance); - return null; - } - if (!instance.equals(future)) { - log.warn("Table " + extent + " has been assigned to " + future + " which is not " + instance); - return null; - } - - // look for incomplete splits - int splitsFixed = 0; - for (Entry> entry : tabletEntries.entrySet()) { - - if (extent.getPrevEndRow() != null) { - Text prevRowMetadataEntry = new Text(KeyExtent.getMetadataEntry(extent.getTableId(), extent.getPrevEndRow())); - if (entry.getKey().compareTo(prevRowMetadataEntry) <= 0) { - continue; - } - } - - if (entry.getValue().containsKey(Constants.METADATA_OLD_PREV_ROW_COLUMN)) { - KeyExtent fixedke = MetadataTable.fixSplit(entry.getKey(), entry.getValue(), instance, SecurityConstants.getSystemCredentials(), lock); - if (fixedke != null) { - if (fixedke.getPrevEndRow() == null || fixedke.getPrevEndRow().compareTo(extent.getPrevEndRow()) < 0) { - extent = new KeyExtent(extent); - extent.setPrevEndRow(fixedke.getPrevEndRow()); - } - splitsFixed++; - } - } - } - - if (splitsFixed > 0) { - // reread and reverify metadata entries now that metadata - // entries were fixed - tabletsKeyValues.clear(); - return verifyTabletInformation(extent, instance, tabletsKeyValues, clientAddress, lock); - } - - SortedMap children = new TreeMap(); - - for (Entry> entry : tabletEntries.entrySet()) { - if (extent.getPrevEndRow() != null) { - Text prevRowMetadataEntry = new Text(KeyExtent.getMetadataEntry(extent.getTableId(), extent.getPrevEndRow())); - - if (entry.getKey().compareTo(prevRowMetadataEntry) <= 0) { - continue; - } - } - - Value prevEndRowIBW = entry.getValue().get(Constants.METADATA_PREV_ROW_COLUMN); - if (prevEndRowIBW == null) { - log.warn("Metadata entry does not have prev row (" + entry.getKey() + ")"); - return null; - } - - Value dirIBW = entry.getValue().get(Constants.METADATA_DIRECTORY_COLUMN); - if (dirIBW == null) { - log.warn("Metadata entry does not have directory (" + entry.getKey() + ")"); - return null; - } - - Text dir = new Text(dirIBW.get()); - - KeyExtent child = new KeyExtent(entry.getKey(), prevEndRowIBW); - children.put(child, dir); - } - - if (!MetadataTable.isContiguousRange(extent, new TreeSet(children.keySet()))) { - log.warn("For extent " + extent + " metadata entries " + children + " do not form a contiguous range."); - return null; - } - return children; - } catch (AccumuloException e) { - log.error("error verifying metadata information. retrying ..."); - log.error(e.toString()); - UtilWaitThread.sleep(1000); - } catch (AccumuloSecurityException e) { - // if it's a security exception, retrying won't work either. - log.error(e.toString()); - throw e; + private static Text verifyRootTablet(KeyExtent extent, TServerInstance instance) throws DistributedStoreException, AccumuloException { + ZooTabletStateStore store = new ZooTabletStateStore(); + if (!store.iterator().hasNext()) { + throw new AccumuloException("Illegal state: location is not set in zookeeper"); + } + TabletLocationState next = store.iterator().next(); + if (!instance.equals(next.future)) { + throw new AccumuloException("Future location is not to this server for the root tablet"); + } + + if (next.current != null) { + throw new AccumuloException("Root tablet already has a location set"); + } + + return new Text(Constants.ZROOT_TABLET); + } + + public static Text verifyTabletInformation(KeyExtent extent, TServerInstance instance, SortedMap tabletsKeyValues, String clientAddress, + ZooLock lock) throws AccumuloSecurityException, DistributedStoreException, AccumuloException { + + log.debug("verifying extent " + extent); + if (extent.isRootTablet()) { + return verifyRootTablet(extent, instance); + } + + List columnsToFetch = Arrays.asList(new ColumnFQ[] {Constants.METADATA_DIRECTORY_COLUMN, Constants.METADATA_PREV_ROW_COLUMN, + Constants.METADATA_SPLIT_RATIO_COLUMN, Constants.METADATA_OLD_PREV_ROW_COLUMN, Constants.METADATA_TIME_COLUMN}); + + ScannerImpl scanner = new ScannerImpl(HdfsZooInstance.getInstance(), SecurityConstants.getSystemCredentials(), Constants.METADATA_TABLE_ID, + Constants.NO_AUTHS); + scanner.setRange(extent.toMetadataRange()); + + TreeMap tkv = new TreeMap(); + for (Entry entry : scanner) + tkv.put(entry.getKey(), entry.getValue()); + + // only populate map after success + if (tabletsKeyValues == null) { + tabletsKeyValues = tkv; + } else { + tabletsKeyValues.clear(); + tabletsKeyValues.putAll(tkv); + } + + Text metadataEntry = extent.getMetadataEntry(); + + Value dir = checkTabletMetadata(extent, instance, tabletsKeyValues, metadataEntry); + if (dir == null) + return null; + + Value oldPrevEndRow = null; + for (Entry entry : tabletsKeyValues.entrySet()) { + if (Constants.METADATA_OLD_PREV_ROW_COLUMN.hasColumns(entry.getKey())) { + oldPrevEndRow = entry.getValue(); } } - // default is to accept - return null; + + if (oldPrevEndRow != null) { + SortedMap> tabletEntries; + tabletEntries = MetadataTable.getTabletEntries(tabletsKeyValues, columnsToFetch); + + KeyExtent fke = MetadataTable.fixSplit(metadataEntry, tabletEntries.get(metadataEntry), instance, SecurityConstants.getSystemCredentials(), lock); + + if (!fke.equals(extent)) { + return null; + } + + // reread and reverify metadata entries now that metadata entries were fixed + tabletsKeyValues.clear(); + return verifyTabletInformation(fke, instance, tabletsKeyValues, clientAddress, lock); + } + + return new Text(dir.get()); + } + + static Value checkTabletMetadata(KeyExtent extent, TServerInstance instance, SortedMap tabletsKeyValues, Text metadataEntry) + throws AccumuloException { + + TServerInstance future = null; + Value prevEndRow = null; + Value dir = null; + Value time = null; + for (Entry entry : tabletsKeyValues.entrySet()) { + Key key = entry.getKey(); + if (!metadataEntry.equals(key.getRow())) { + log.info("Unexpected row in tablet metadata " + metadataEntry + " " + key.getRow()); + return null; + } + Text cf = key.getColumnFamily(); + if (cf.equals(Constants.METADATA_FUTURE_LOCATION_COLUMN_FAMILY)) { + if (future != null) { + throw new AccumuloException("Tablet has multiple future locations " + extent); + } + future = new TServerInstance(entry.getValue(), key.getColumnQualifier()); + } else if (cf.equals(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY)) { + log.info("Tablet seems to be already assigned to " + new TServerInstance(entry.getValue(), key.getColumnQualifier())); + return null; + } else if (Constants.METADATA_PREV_ROW_COLUMN.hasColumns(key)) { + prevEndRow = entry.getValue(); + } else if (Constants.METADATA_DIRECTORY_COLUMN.hasColumns(key)) { + dir = entry.getValue(); + } else if (Constants.METADATA_TIME_COLUMN.hasColumns(key)) { + time = entry.getValue(); + } + } + + if (prevEndRow == null) { + throw new AccumuloException("Metadata entry does not have prev row (" + metadataEntry + ")"); + } else { + KeyExtent ke2 = new KeyExtent(metadataEntry, prevEndRow); + if (!extent.equals(ke2)) { + log.info("Tablet prev end row mismatch " + extent + " " + ke2.getPrevEndRow()); + return null; + } + } + + if (dir == null) { + throw new AccumuloException("Metadata entry does not have directory (" + metadataEntry + ")"); + } + + if (time == null) { + throw new AccumuloException("Metadata entry does not have time (" + metadataEntry + ")"); + } + + if (future == null) { + log.info("The master has not assigned " + extent + " to " + instance); + return null; + } + + if (!instance.equals(future)) { + log.info("Table " + extent + " has been assigned to " + future + " which is not " + instance); + return null; + } + + return dir; } public String getClientAddressString() { Modified: accumulo/trunk/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java URL: http://svn.apache.org/viewvc/accumulo/trunk/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java?rev=1466217&r1=1466216&r2=1466217&view=diff ============================================================================== --- accumulo/trunk/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java (original) +++ accumulo/trunk/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java Tue Apr 9 20:28:12 2013 @@ -339,7 +339,6 @@ public class MetadataTable extends org.a TreeMap sizes = new TreeMap(); Scanner mdScanner = new ScannerImpl(HdfsZooInstance.getInstance(), credentials, Constants.METADATA_TABLE_ID, Constants.NO_AUTHS); - mdScanner.setRange(Constants.METADATA_KEYSPACE); mdScanner.fetchColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY); Text row = extent.getMetadataEntry(); @@ -386,6 +385,14 @@ public class MetadataTable extends org.a update(credentials, zooLock, m); } + public static void rollBackSplit(Text metadataEntry, Text oldPrevEndRow, TCredentials credentials, ZooLock zooLock) { + KeyExtent ke = new KeyExtent(metadataEntry, oldPrevEndRow); + Mutation m = ke.getPrevRowUpdateMutation(); + Constants.METADATA_SPLIT_RATIO_COLUMN.putDelete(m); + Constants.METADATA_OLD_PREV_ROW_COLUMN.putDelete(m); + update(credentials, zooLock, m); + } + public static void splitTablet(KeyExtent extent, Text oldPrevEndRow, double splitRatio, TCredentials credentials, ZooLock zooLock) { Mutation m = extent.getPrevRowUpdateMutation(); // @@ -490,14 +497,6 @@ public class MetadataTable extends org.a update(credentials, zooLock, m); } - public static void getTabletAndPrevTabletKeyValues(SortedMap tkv, KeyExtent ke, List columns, TCredentials credentials) { - getTabletAndPrevTabletKeyValues(HdfsZooInstance.getInstance(), tkv, ke, columns, credentials); - } - - public static SortedMap> getTabletEntries(KeyExtent ke, List columns, TCredentials credentials) { - return getTabletEntries(HdfsZooInstance.getInstance(), ke, columns, credentials); - } - private static KeyExtent fixSplit(Text table, Text metadataEntry, Text metadataPrevEndRow, Value oper, double splitRatio, TServerInstance tserver, TCredentials credentials, String time, long initFlushID, long initCompactID, ZooLock lock) throws AccumuloException { if (metadataPrevEndRow == null) @@ -505,51 +504,45 @@ public class MetadataTable extends org.a // prev end row.... throw new AccumuloException("Split tablet does not have prev end row, something is amiss, extent = " + metadataEntry); - KeyExtent low = null; - - List highDatafilesToRemove = new ArrayList(); - - String lowDirectory = TabletOperations.createTabletDirectory(ServerConstants.getTablesDir() + "/" + table, metadataPrevEndRow); - - Text prevPrevEndRow = KeyExtent.decodePrevEndRow(oper); - - low = new KeyExtent(table, metadataPrevEndRow, prevPrevEndRow); - - Scanner scanner3 = new ScannerImpl(HdfsZooInstance.getInstance(), credentials, Constants.METADATA_TABLE_ID, Constants.NO_AUTHS); - Key rowKey = new Key(metadataEntry); - - SortedMap origDatafileSizes = new TreeMap(); - SortedMap highDatafileSizes = new TreeMap(); - SortedMap lowDatafileSizes = new TreeMap(); - scanner3.fetchColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY); - scanner3.setRange(new Range(rowKey, rowKey.followingKey(PartialKey.ROW))); - - for (Entry entry : scanner3) { - if (entry.getKey().compareColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY) == 0) { - origDatafileSizes.put(entry.getKey().getColumnQualifier().toString(), new DataFileValue(entry.getValue().get())); - } - } - - splitDatafiles(table, metadataPrevEndRow, splitRatio, new HashMap(), origDatafileSizes, lowDatafileSizes, highDatafileSizes, - highDatafilesToRemove); - // check to see if prev tablet exist in metadata tablet Key prevRowKey = new Key(new Text(KeyExtent.getMetadataEntry(table, metadataPrevEndRow))); - + ScannerImpl scanner2 = new ScannerImpl(HdfsZooInstance.getInstance(), credentials, Constants.METADATA_TABLE_ID, Constants.NO_AUTHS); scanner2.setRange(new Range(prevRowKey, prevRowKey.followingKey(PartialKey.ROW))); if (!scanner2.iterator().hasNext()) { - log.debug("Prev tablet " + prevRowKey + " does not exist, need to create it " + metadataPrevEndRow + " " + prevPrevEndRow + " " + splitRatio); - Map bulkFiles = getBulkFilesLoaded(credentials, metadataEntry); - MetadataTable.addNewTablet(low, lowDirectory, tserver, lowDatafileSizes, bulkFiles, credentials, time, initFlushID, initCompactID, lock); + log.info("Rolling back incomplete split " + metadataEntry + " " + metadataPrevEndRow); + rollBackSplit(metadataEntry, KeyExtent.decodePrevEndRow(oper), credentials, lock); + return new KeyExtent(metadataEntry, KeyExtent.decodePrevEndRow(oper)); } else { - log.debug("Prev tablet " + prevRowKey + " exist, do not need to add it"); - } - - MetadataTable.finishSplit(metadataEntry, highDatafileSizes, highDatafilesToRemove, credentials, lock); + log.info("Finishing incomplete split " + metadataEntry + " " + metadataPrevEndRow); + + List highDatafilesToRemove = new ArrayList(); + + Scanner scanner3 = new ScannerImpl(HdfsZooInstance.getInstance(), credentials, Constants.METADATA_TABLE_ID, Constants.NO_AUTHS); + Key rowKey = new Key(metadataEntry); + + SortedMap origDatafileSizes = new TreeMap(); + SortedMap highDatafileSizes = new TreeMap(); + SortedMap lowDatafileSizes = new TreeMap(); + scanner3.fetchColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY); + scanner3.setRange(new Range(rowKey, rowKey.followingKey(PartialKey.ROW))); + + for (Entry entry : scanner3) { + if (entry.getKey().compareColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY) == 0) { + origDatafileSizes.put(entry.getKey().getColumnQualifier().toString(), new DataFileValue(entry.getValue().get())); + } + } + + splitDatafiles(table, metadataPrevEndRow, splitRatio, new HashMap(), origDatafileSizes, lowDatafileSizes, highDatafileSizes, + highDatafilesToRemove); - return low; + MetadataTable.finishSplit(metadataEntry, highDatafileSizes, highDatafilesToRemove, credentials, lock); + + return new KeyExtent(metadataEntry, KeyExtent.encodePrevEndRow(metadataPrevEndRow)); + } + + } public static void splitDatafiles(Text table, Text midRow, double splitRatio, Map firstAndLastRows, @@ -597,13 +590,12 @@ public class MetadataTable extends org.a public static KeyExtent fixSplit(Text metadataEntry, SortedMap columns, TServerInstance tserver, TCredentials credentials, ZooLock lock) throws AccumuloException { - log.warn("Incomplete split " + metadataEntry + " attempting to fix"); + log.info("Incomplete split " + metadataEntry + " attempting to fix"); Value oper = columns.get(Constants.METADATA_OLD_PREV_ROW_COLUMN); if (columns.get(Constants.METADATA_SPLIT_RATIO_COLUMN) == null) { - log.warn("Metadata entry does not have split ratio (" + metadataEntry + ")"); - return null; + throw new IllegalArgumentException("Metadata entry does not have split ratio (" + metadataEntry + ")"); } double splitRatio = Double.parseDouble(new String(columns.get(Constants.METADATA_SPLIT_RATIO_COLUMN).get())); @@ -611,15 +603,13 @@ public class MetadataTable extends org.a Value prevEndRowIBW = columns.get(Constants.METADATA_PREV_ROW_COLUMN); if (prevEndRowIBW == null) { - log.warn("Metadata entry does not have prev row (" + metadataEntry + ")"); - return null; + throw new IllegalArgumentException("Metadata entry does not have prev row (" + metadataEntry + ")"); } Value time = columns.get(Constants.METADATA_TIME_COLUMN); if (time == null) { - log.warn("Metadata entry does not have time (" + metadataEntry + ")"); - return null; + throw new IllegalArgumentException("Metadata entry does not have time (" + metadataEntry + ")"); } Value flushID = columns.get(Constants.METADATA_FLUSH_COLUMN); Propchange: accumulo/trunk/src/ ------------------------------------------------------------------------------ Merged /accumulo/branches/1.5/src:r1466211 Modified: accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/SplitRecoveryTest.java URL: http://svn.apache.org/viewvc/accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/SplitRecoveryTest.java?rev=1466217&r1=1466216&r2=1466217&view=diff ============================================================================== --- accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/SplitRecoveryTest.java (original) +++ accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/SplitRecoveryTest.java Tue Apr 9 20:28:12 2013 @@ -28,6 +28,8 @@ import java.util.SortedMap; import java.util.TreeMap; import org.apache.accumulo.core.Constants; +import org.apache.accumulo.core.client.Scanner; +import org.apache.accumulo.core.client.impl.ScannerImpl; import org.apache.accumulo.core.client.impl.Writer; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.KeyExtent; @@ -176,34 +178,32 @@ public class SplitRecoveryTest extends F if (steps >= 2) MetadataTable.finishSplit(high, highDatafileSizes, highDatafilesToRemove, SecurityConstants.getSystemCredentials(), zl); - SortedMap vtiRet = TabletServer.verifyTabletInformation(extent, instance, null, "127.0.0.1:0", zl); - if (vtiRet.size() != 2) { - throw new Exception("verifyTabletInformation did not return two tablets, " + vtiRet.size()); - } - - if (!vtiRet.containsKey(high) || !vtiRet.containsKey(low)) { - throw new Exception("verifyTabletInformation did not return correct tablets, " + vtiRet.keySet()); - } - - ensureTabletHasNoUnexpectedMetadataEntries(low, lowDatafileSizes); - ensureTabletHasNoUnexpectedMetadataEntries(high, highDatafileSizes); + TabletServer.verifyTabletInformation(high, instance, null, "127.0.0.1:0", zl); + + if (steps >= 1) { + ensureTabletHasNoUnexpectedMetadataEntries(low, lowDatafileSizes); + ensureTabletHasNoUnexpectedMetadataEntries(high, highDatafileSizes); - Map lowBulkFiles = MetadataTable.getBulkFilesLoaded(SecurityConstants.getSystemCredentials(), low); - Map highBulkFiles = MetadataTable.getBulkFilesLoaded(SecurityConstants.getSystemCredentials(), high); + Map lowBulkFiles = MetadataTable.getBulkFilesLoaded(SecurityConstants.getSystemCredentials(), low); + Map highBulkFiles = MetadataTable.getBulkFilesLoaded(SecurityConstants.getSystemCredentials(), high); - if (!lowBulkFiles.equals(highBulkFiles)) { - throw new Exception(" " + lowBulkFiles + " != " + highBulkFiles + " " + low + " " + high); - } + if (!lowBulkFiles.equals(highBulkFiles)) { + throw new Exception(" " + lowBulkFiles + " != " + highBulkFiles + " " + low + " " + high); + } - if (lowBulkFiles.size() == 0) { - throw new Exception(" no bulk files " + low); + if (lowBulkFiles.size() == 0) { + throw new Exception(" no bulk files " + low); + } + } else { + ensureTabletHasNoUnexpectedMetadataEntries(extent, mapFiles); } } private void ensureTabletHasNoUnexpectedMetadataEntries(KeyExtent extent, SortedMap expectedMapFiles) throws Exception { - SortedMap tkv = new TreeMap(); - MetadataTable.getTabletAndPrevTabletKeyValues(tkv, extent, null, SecurityConstants.getSystemCredentials()); + Scanner scanner = new ScannerImpl(HdfsZooInstance.getInstance(), SecurityConstants.getSystemCredentials(), Constants.METADATA_TABLE_ID, + Constants.NO_AUTHS); + scanner.setRange(extent.toMetadataRange()); HashSet expectedColumns = new HashSet(); expectedColumns.add(Constants.METADATA_DIRECTORY_COLUMN); @@ -218,12 +218,12 @@ public class SplitRecoveryTest extends F expectedColumnFamilies.add(Constants.METADATA_LAST_LOCATION_COLUMN_FAMILY); expectedColumnFamilies.add(Constants.METADATA_BULKFILE_COLUMN_FAMILY); - Iterator iter = tkv.keySet().iterator(); + Iterator> iter = scanner.iterator(); while (iter.hasNext()) { - Key key = iter.next(); + Key key = iter.next().getKey(); if (!key.getRow().equals(extent.getMetadataEntry())) { - continue; + throw new Exception("Tablet " + extent + " contained unexpected " + Constants.METADATA_TABLE_NAME + " entry " + key); } if (expectedColumnFamilies.contains(key.getColumnFamily())) { Modified: accumulo/trunk/test/src/main/java/org/apache/accumulo/test/performance/scan/CollectTabletStats.java URL: http://svn.apache.org/viewvc/accumulo/trunk/test/src/main/java/org/apache/accumulo/test/performance/scan/CollectTabletStats.java?rev=1466217&r1=1466216&r2=1466217&view=diff ============================================================================== --- accumulo/trunk/test/src/main/java/org/apache/accumulo/test/performance/scan/CollectTabletStats.java (original) +++ accumulo/trunk/test/src/main/java/org/apache/accumulo/test/performance/scan/CollectTabletStats.java Tue Apr 9 20:28:12 2013 @@ -27,8 +27,6 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Random; -import java.util.Set; -import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; @@ -36,7 +34,6 @@ import java.util.concurrent.CountDownLat import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import org.apache.accumulo.core.Constants; import org.apache.accumulo.core.cli.ScannerOpts; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.Instance; @@ -67,12 +64,12 @@ import org.apache.accumulo.core.security import org.apache.accumulo.core.security.thrift.TCredentials; import org.apache.accumulo.core.util.AddressUtil; import org.apache.accumulo.core.util.CachedConfiguration; -import org.apache.accumulo.core.util.MetadataTable; import org.apache.accumulo.core.util.Stat; import org.apache.accumulo.server.ServerConstants; import org.apache.accumulo.server.cli.ClientOnRequiredTable; import org.apache.accumulo.server.conf.ServerConfiguration; import org.apache.accumulo.server.conf.TableConfiguration; +import org.apache.accumulo.server.util.MetadataTable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; @@ -378,16 +375,8 @@ public class CollectTabletStats { private static List getTabletFiles(TCredentials token, Instance zki, String tableId, KeyExtent ke) { List files = new ArrayList(); - SortedMap tkv = new TreeMap(); - MetadataTable.getTabletAndPrevTabletKeyValues(zki, tkv, ke, null, token); - - Set> es = tkv.entrySet(); - for (Entry entry : es) { - if (entry.getKey().compareRow(ke.getMetadataEntry()) == 0) { - if (entry.getKey().compareColumnFamily(Constants.METADATA_DATAFILE_COLUMN_FAMILY) == 0) { - files.add(ServerConstants.getTablesDir() + "/" + tableId + entry.getKey().getColumnQualifier()); - } - } + for (String cq : MetadataTable.getDataFileSizes(ke, token).keySet()) { + files.add(ServerConstants.getTablesDir() + "/" + tableId + cq); } return files; }