accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mmil...@apache.org
Subject [accumulo] branch master updated: Update Bulk Import Tests (#702)
Date Wed, 17 Oct 2018 22:00:41 GMT
This is an automated email from the ASF dual-hosted git repository.

mmiller pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/master by this push:
     new a6127bc  Update Bulk Import Tests (#702)
a6127bc is described below

commit a6127bcb2a24c52b29285bfb65e1f7e8752eca2b
Author: Mike Miller <mmiller@apache.org>
AuthorDate: Wed Oct 17 18:00:37 2018 -0400

    Update Bulk Import Tests (#702)
    
    * Renamed GetFileInfoBulkIT to CountNameNodeOpsBulkIT and updated to use new method
    * Suppress warnings in old bulk import code tests
    * Deleted FastBulkImportIT - prone to failure and no longer relevant
---
 .../client/impl/TableOperationsHelperTest.java     |   1 +
 .../accumulo/test/BulkImportMonitoringIT.java      |   2 +
 .../apache/accumulo/test/BulkImportVolumeIT.java   |   2 +
 ...InfoBulkIT.java => CountNameNodeOpsBulkIT.java} |  60 ++++++-----
 .../accumulo/test/functional/BulkFileIT.java       |  17 +++-
 .../performance/metadata/FastBulkImportIT.java     | 113 ---------------------
 6 files changed, 58 insertions(+), 137 deletions(-)

diff --git a/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java
b/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java
index 461ec63..0f55345 100644
--- a/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java
+++ b/core/src/test/java/org/apache/accumulo/core/client/impl/TableOperationsHelperTest.java
@@ -183,6 +183,7 @@ public class TableOperationsHelperTest {
     }
 
     @Override
+    @Deprecated
     public void importDirectory(String tableName, String dir, String failureDir, boolean
setTime)
         throws TableNotFoundException, IOException, AccumuloException, AccumuloSecurityException
{}
 
diff --git a/test/src/main/java/org/apache/accumulo/test/BulkImportMonitoringIT.java b/test/src/main/java/org/apache/accumulo/test/BulkImportMonitoringIT.java
index bfb0bb9..77677c5 100644
--- a/test/src/main/java/org/apache/accumulo/test/BulkImportMonitoringIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/BulkImportMonitoringIT.java
@@ -56,6 +56,8 @@ public class BulkImportMonitoringIT extends ConfigurableMacBase {
     cfg.useMiniDFS(true);
   }
 
+  // suppress importDirectory deprecated since this tests legacy monitoring
+  @SuppressWarnings("deprecation")
   @Test
   public void test() throws Exception {
     getCluster().getClusterControl().start(ServerType.MONITOR);
diff --git a/test/src/main/java/org/apache/accumulo/test/BulkImportVolumeIT.java b/test/src/main/java/org/apache/accumulo/test/BulkImportVolumeIT.java
index bceb39a..e1fec08 100644
--- a/test/src/main/java/org/apache/accumulo/test/BulkImportVolumeIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/BulkImportVolumeIT.java
@@ -61,6 +61,8 @@ public class BulkImportVolumeIT extends AccumuloClusterHarness {
     hadoopCoreSite.set("fs.file.impl", RawLocalFileSystem.class.getName());
   }
 
+  // suppress importDirectory deprecated since this tests legacy failure directory
+  @SuppressWarnings("deprecation")
   @Test
   public void testBulkImportFailure() throws Exception {
     String tableName = getUniqueNames(1)[0];
diff --git a/test/src/main/java/org/apache/accumulo/test/GetFileInfoBulkIT.java b/test/src/main/java/org/apache/accumulo/test/CountNameNodeOpsBulkIT.java
similarity index 77%
rename from test/src/main/java/org/apache/accumulo/test/GetFileInfoBulkIT.java
rename to test/src/main/java/org/apache/accumulo/test/CountNameNodeOpsBulkIT.java
index b3f592e..0f0f68b 100644
--- a/test/src/main/java/org/apache/accumulo/test/GetFileInfoBulkIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/CountNameNodeOpsBulkIT.java
@@ -18,10 +18,10 @@ package org.apache.accumulo.test;
 
 import static org.apache.accumulo.fate.util.UtilWaitThread.sleepUninterruptibly;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
 
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedSet;
@@ -41,7 +41,6 @@ import org.apache.accumulo.core.file.FileSKVWriter;
 import org.apache.accumulo.core.file.rfile.RFile;
 import org.apache.accumulo.core.master.thrift.MasterMonitorInfo;
 import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.accumulo.core.util.Pair;
 import org.apache.accumulo.minicluster.ServerType;
 import org.apache.accumulo.minicluster.impl.MiniAccumuloConfigImpl;
 import org.apache.accumulo.test.functional.ConfigurableMacBase;
@@ -56,8 +55,11 @@ import com.google.gson.Gson;
 
 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
 
-// ACCUMULO-3949, ACCUMULO-3953
-public class GetFileInfoBulkIT extends ConfigurableMacBase {
+/**
+ * Originally written for ACCUMULO-3949 and ACCUMULO-3953 to count the number of FileInfo
calls to
+ * the NameNode. Updated in 2.0 to count the calls for new bulk import comparing it to the
old.
+ */
+public class CountNameNodeOpsBulkIT extends ConfigurableMacBase {
 
   @Override
   protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) {
@@ -67,25 +69,29 @@ public class GetFileInfoBulkIT extends ConfigurableMacBase {
 
   @SuppressFBWarnings(value = {"PATH_TRAVERSAL_IN", "URLCONNECTION_SSRF_FD"},
       justification = "path provided by test; url provided by test")
-  private long getOpts() throws Exception {
+  private Map<?,?> getStats() throws Exception {
     String uri = getCluster().getMiniDfs().getHttpUri(0);
     URL url = new URL(uri + "/jmx");
     log.debug("Fetching web page " + url);
     String jsonString = FunctionalTestUtils.readAll(url.openStream());
     Gson gson = new Gson();
-    Map<?,?> jsonObject = (Map<?,?>) gson.fromJson(jsonString, Object.class);
+    Map<?,?> jsonObject = gson.fromJson(jsonString, Map.class);
     List<?> beans = (List<?>) jsonObject.get("beans");
     for (Object bean : beans) {
       Map<?,?> map = (Map<?,?>) bean;
       if (map.get("name").toString().equals("Hadoop:service=NameNode,name=NameNodeActivity"))
{
-        return (long) Double.parseDouble(map.get("FileInfoOps").toString());
+        return map;
       }
     }
-    return 0;
+    return new HashMap<>(0);
+  }
+
+  private long getStat(Map<?,?> map, String stat) {
+    return (long) Double.parseDouble(map.get(stat).toString());
   }
 
   @Test
-  public void test() throws Exception {
+  public void compareOldNewBulkImportTest() throws Exception {
     final AccumuloClient c = getClient();
     getCluster().getClusterControl().kill(ServerType.GARBAGE_COLLECTOR, "localhost");
     final String tableName = getUniqueNames(1)[0];
@@ -113,13 +119,11 @@ public class GetFileInfoBulkIT extends ConfigurableMacBase {
     fs.mkdirs(base);
 
     ExecutorService es = Executors.newFixedThreadPool(5);
-    List<Future<Pair<String,String>>> futures = new ArrayList<>();
+    List<Future<String>> futures = new ArrayList<>();
     for (int i = 0; i < 10; i++) {
       final int which = i;
       futures.add(es.submit(() -> {
-        Path bulkFailures = new Path(base, "failures" + which);
         Path files = new Path(base, "files" + which);
-        fs.mkdirs(bulkFailures);
         fs.mkdirs(files);
         for (int i1 = 0; i1 < 100; i1++) {
           FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder()
@@ -131,22 +135,20 @@ public class GetFileInfoBulkIT extends ConfigurableMacBase {
           }
           writer.close();
         }
-        return new Pair<>(files.toString(), bulkFailures.toString());
+        return files.toString();
       }));
     }
-    List<Pair<String,String>> dirs = new ArrayList<>();
-    for (Future<Pair<String,String>> f : futures) {
+    List<String> dirs = new ArrayList<>();
+    for (Future<String> f : futures) {
       dirs.add(f.get());
     }
     log.info("Importing");
-    long startOps = getOpts();
+    long startOps = getStat(getStats(), "FileInfoOps");
     long now = System.currentTimeMillis();
     List<Future<Object>> errs = new ArrayList<>();
-    for (Pair<String,String> entry : dirs) {
-      final String dir = entry.getFirst();
-      final String err = entry.getSecond();
+    for (String dir : dirs) {
       errs.add(es.submit(() -> {
-        c.tableOperations().importDirectory(tableName, dir, err, false);
+        c.tableOperations().importDirectory(dir).to(tableName).load();
         return null;
       }));
     }
@@ -158,9 +160,21 @@ public class GetFileInfoBulkIT extends ConfigurableMacBase {
     log.info(
         String.format("Completed in %.2f seconds", (System.currentTimeMillis() - now) / 1000.));
     sleepUninterruptibly(30, TimeUnit.SECONDS);
-    long getFileInfoOpts = getOpts() - startOps;
-    log.info("# opts: {}", getFileInfoOpts);
-    assertTrue("unexpected number of getFileOps", getFileInfoOpts < 2100 && getFileInfoOpts
> 1000);
+    Map<?,?> map = getStats();
+    map.forEach((k, v) -> {
+      try {
+        if (v != null && Double.parseDouble(v.toString()) > 0.0)
+          log.debug("{}:{}", k, v);
+      } catch (NumberFormatException e) {
+        // only looking for numbers
+      }
+    });
+    long getFileInfoOpts = getStat(map, "FileInfoOps") - startOps;
+    log.info("New bulk import used {} opts, vs old using 2060", getFileInfoOpts);
+    // counts for old bulk import:
+    // Expected number of FileInfoOps was between 1000 and 2100
+    // new bulk import is way better :)
+    assertEquals("unexpected number of FileInfoOps", 20, getFileInfoOpts);
   }
 
 }
diff --git a/test/src/main/java/org/apache/accumulo/test/functional/BulkFileIT.java b/test/src/main/java/org/apache/accumulo/test/functional/BulkFileIT.java
index ef5ec28..69d0a8e 100644
--- a/test/src/main/java/org/apache/accumulo/test/functional/BulkFileIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/functional/BulkFileIT.java
@@ -43,6 +43,9 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.junit.Test;
 
+/**
+ * Tests old bulk import technique. For new bulk import see {@link BulkFileIT}
+ */
 public class BulkFileIT extends AccumuloClusterHarness {
 
   @Override
@@ -55,6 +58,8 @@ public class BulkFileIT extends AccumuloClusterHarness {
     return 4 * 60;
   }
 
+  // suppress importDirectory deprecated since this is the only test for legacy technique
+  @SuppressWarnings("deprecation")
   @Test
   public void testBulkFile() throws Exception {
     AccumuloClient c = getAccumuloClient();
@@ -78,7 +83,17 @@ public class BulkFileIT extends AccumuloClusterHarness {
     writeData(conf, aconf, fs, dir, "f2", 334, 999);
     writeData(conf, aconf, fs, dir, "f3", 1000, 1999);
 
-    c.tableOperations().importDirectory(dir).to(tableName).load();
+    String failDir = dir + "_failures";
+    Path failPath = new Path(failDir);
+    fs.delete(failPath, true);
+    fs.mkdirs(failPath);
+
+    // Ensure server can read/modify files
+    c.tableOperations().importDirectory(tableName, dir, failDir, false);
+
+    if (fs.listStatus(failPath).length > 0) {
+      throw new Exception("Some files failed to bulk import");
+    }
 
     FunctionalTestUtils.checkRFiles(c, tableName, 6, 6, 1, 1);
 
diff --git a/test/src/main/java/org/apache/accumulo/test/performance/metadata/FastBulkImportIT.java
b/test/src/main/java/org/apache/accumulo/test/performance/metadata/FastBulkImportIT.java
deleted file mode 100644
index 04763d8..0000000
--- a/test/src/main/java/org/apache/accumulo/test/performance/metadata/FastBulkImportIT.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.test.performance.metadata;
-
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assume.assumeFalse;
-
-import java.util.SortedSet;
-import java.util.TreeSet;
-
-import org.apache.accumulo.core.client.AccumuloClient;
-import org.apache.accumulo.core.conf.DefaultConfiguration;
-import org.apache.accumulo.core.conf.Property;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.file.FileOperations;
-import org.apache.accumulo.core.file.FileSKVWriter;
-import org.apache.accumulo.core.file.rfile.RFile;
-import org.apache.accumulo.core.util.CachedConfiguration;
-import org.apache.accumulo.minicluster.impl.MiniAccumuloConfigImpl;
-import org.apache.accumulo.test.functional.ConfigurableMacBase;
-import org.apache.accumulo.test.mrit.IntegrationTestMapReduce;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-// ACCUMULO-3327
-public class FastBulkImportIT extends ConfigurableMacBase {
-
-  @BeforeClass
-  public static void checkMR() {
-    assumeFalse(IntegrationTestMapReduce.isMapReduce());
-  }
-
-  @Override
-  protected int defaultTimeoutSeconds() {
-    return 120;
-  }
-
-  @Override
-  protected void configure(MiniAccumuloConfigImpl cfg, Configuration hadoopCoreSite) {
-    cfg.setNumTservers(3);
-    cfg.setProperty(Property.TSERV_BULK_ASSIGNMENT_THREADS, "5");
-    cfg.setProperty(Property.TSERV_BULK_PROCESS_THREADS, "5");
-    cfg.setProperty(Property.TABLE_MAJC_RATIO, "9999");
-    cfg.setProperty(Property.TABLE_FILE_MAX, "9999");
-    cfg.setProperty(Property.TABLE_DURABILITY, "none");
-  }
-
-  @Test
-  public void test() throws Exception {
-    log.info("Creating table");
-    final String tableName = getUniqueNames(1)[0];
-    final AccumuloClient c = getClient();
-    c.tableOperations().create(tableName);
-    log.info("Adding splits");
-    SortedSet<Text> splits = new TreeSet<>();
-    for (int i = 1; i < 0xfff; i += 7) {
-      splits.add(new Text(Integer.toHexString(i)));
-    }
-    c.tableOperations().addSplits(tableName, splits);
-
-    log.info("Creating lots of bulk import files");
-    FileSystem fs = getCluster().getFileSystem();
-    Path basePath = getCluster().getTemporaryPath();
-    CachedConfiguration.setInstance(fs.getConf());
-
-    Path base = new Path(basePath, "testBulkFail_" + tableName);
-    fs.delete(base, true);
-    fs.mkdirs(base);
-    Path bulkFailures = new Path(base, "failures");
-    Path files = new Path(base, "files");
-    fs.mkdirs(bulkFailures);
-    fs.mkdirs(files);
-    for (int i = 0; i < 100; i++) {
-      FileSKVWriter writer = FileOperations.getInstance().newWriterBuilder()
-          .forFile(files + "/bulk_" + i + "." + RFile.EXTENSION, fs, fs.getConf())
-          .withTableConfiguration(DefaultConfiguration.getInstance()).build();
-      writer.startDefaultLocalityGroup();
-      for (int j = 0x100; j < 0xfff; j += 3) {
-        writer.append(new Key(Integer.toHexString(j)), new Value(new byte[0]));
-      }
-      writer.close();
-    }
-    log.info("Waiting for balance");
-    c.instanceOperations().waitForBalance();
-
-    log.info("Bulk importing files");
-    long now = System.currentTimeMillis();
-    c.tableOperations().importDirectory(tableName, files.toString(), bulkFailures.toString(),
true);
-    double diffSeconds = (System.currentTimeMillis() - now) / 1000.;
-    log.info(String.format("Import took %.2f seconds", diffSeconds));
-    assertTrue(diffSeconds < 30);
-  }
-
-}


Mime
View raw message