accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mwa...@apache.org
Subject [accumulo-examples] branch master updated: Refactored shard example (#26)
Date Fri, 15 Jun 2018 19:03:00 GMT
This is an automated email from the ASF dual-hosted git repository.

mwalch pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/accumulo-examples.git


The following commit(s) were added to refs/heads/master by this push:
     new a6ccf92  Refactored shard example (#26)
a6ccf92 is described below

commit a6ccf92921607bd9922b31699b0ff882bfa2ec72
Author: Mike Walch <mwalch@apache.org>
AuthorDate: Fri Jun 15 15:02:51 2018 -0400

    Refactored shard example (#26)
    
    * Refactored shard example
    
    * Shard example uses new Connector builder
    * Renamed ConnectionInfo to ClientInfo due to changes in Accumulo
    * Updated accumulo-client.properties due to changes in Accumulo
---
 conf/accumulo-client.properties                    | 25 ++++-------
 docs/shard.md                                      | 15 ++-----
 .../examples/mapreduce/bulk/BulkIngestExample.java |  8 ++--
 .../accumulo/examples/shard/ContinuousQuery.java   | 50 +++++++++++-----------
 .../org/apache/accumulo/examples/shard/Index.java  | 24 +++++++----
 .../org/apache/accumulo/examples/shard/Query.java  | 41 ++++++++++--------
 .../apache/accumulo/examples/shard/Reverse.java    | 32 +++++++-------
 .../org/apache/accumulo/examples/ExamplesIT.java   |  6 +--
 8 files changed, 96 insertions(+), 105 deletions(-)

diff --git a/conf/accumulo-client.properties b/conf/accumulo-client.properties
index eb8757d..48094ec 100644
--- a/conf/accumulo-client.properties
+++ b/conf/accumulo-client.properties
@@ -28,29 +28,20 @@ instance.name=
 ## Zookeeper connection information for Accumulo instance
 instance.zookeepers=localhost:2181
 
-## Zookeeper session timeout (in seconds)
-#instance.zookeepers.timeout.sec=30
+## Zookeeper session timeout
+#instance.zookeepers.timeout=30s
 
 
 ## Authentication properties
 ## --------------
-## Authentication method (i.e password, kerberos, provider). Set more properties for chosen
method below.
-auth.method=password
+## Authentication method (i.e password, kerberos, PasswordToken, KerberosToken, etc)
+auth.type=password
 
-## Accumulo username/principal for chosen authentication method
-auth.username=
+## Accumulo principal/username for chosen authentication method
+auth.principal=root
 
-## Path to Kerberos keytab
-#auth.kerberos.keytab.path=
-
-## Accumulo user password
-auth.password=
-
-## Alias used to extract Accumulo user password from CredentialProvider
-#auth.provider.name=
-
-## Comma separated list of URLs defining CredentialProvider(s)
-#auth.provider.urls=
+## Authentication token (ex. mypassword, /path/to/keytab)
+auth.token=secret
 
 
 ## Batch Writer properties
diff --git a/docs/shard.md b/docs/shard.md
index f63f712..dbae395 100644
--- a/docs/shard.md
+++ b/docs/shard.md
@@ -31,32 +31,25 @@ To run these example programs, create two tables like below.
 
 After creating the tables, index some files. The following command indexes all of the java
files in the Accumulo source code.
 
-    $ cd /local/username/workspace/accumulo/
-    $ find core/src server/src -name "*.java" | xargs ./bin/runex shard.Index -c ./examples.conf
-t shard --partitions 30
+    $ find /path/to/accumulo/core -name "*.java" | xargs ./bin/runex shard.Index -t shard
--partitions 30
 
 The following command queries the index to find all files containing 'foo' and 'bar'.
 
-    $ ./bin/runex shard.Query -c ./examples.conf -t shard foo bar
+    $ ./bin/runex shard.Query -t shard foo bar
     /local/username/workspace/accumulo/src/core/src/test/java/accumulo/core/security/ColumnVisibilityTest.java
     /local/username/workspace/accumulo/src/core/src/test/java/accumulo/core/client/mock/MockConnectorTest.java
     /local/username/workspace/accumulo/src/core/src/test/java/accumulo/core/security/VisibilityEvaluatorTest.java
-    /local/username/workspace/accumulo/src/server/src/main/java/accumulo/test/functional/RowDeleteTest.java
-    /local/username/workspace/accumulo/src/server/src/test/java/accumulo/server/logger/TestLogWriter.java
-    /local/username/workspace/accumulo/src/server/src/main/java/accumulo/test/functional/DeleteEverythingTest.java
     /local/username/workspace/accumulo/src/core/src/test/java/accumulo/core/data/KeyExtentTest.java
-    /local/username/workspace/accumulo/src/server/src/test/java/accumulo/server/constraints/MetadataConstraintsTest.java
     /local/username/workspace/accumulo/src/core/src/test/java/accumulo/core/iterators/WholeRowIteratorTest.java
-    /local/username/workspace/accumulo/src/server/src/test/java/accumulo/server/util/DefaultMapTest.java
-    /local/username/workspace/accumulo/src/server/src/test/java/accumulo/server/tabletserver/InMemoryMapTest.java
 
 In order to run ContinuousQuery, we need to run Reverse.java to populate doc2term.
 
-    $ ./bin/runex shard.Reverse -c ./examples.conf --shardTable shard --doc2Term doc2term
+    $ ./bin/runex shard.Reverse --shardTable shard --doc2Term doc2term
 
 Below ContinuousQuery is run using 5 terms. So it selects 5 random terms from each document,
then it continually
 randomly selects one set of 5 terms and queries. It prints the number of matching documents
and the time in seconds.
 
-    $ ./bin/runex shard.ContinuousQuery -c ./examples.conf --shardTable shard --doc2Term
doc2term --terms 5
+    $ ./bin/runex shard.ContinuousQuery --shardTable shard --doc2Term doc2term --terms 5
     [public, core, class, binarycomparable, b] 2  0.081
     [wordtodelete, unindexdocument, doctablename, putdelete, insert] 1  0.041
     [import, columnvisibilityinterpreterfactory, illegalstateexception, cv, columnvisibility]
1  0.049
diff --git a/src/main/java/org/apache/accumulo/examples/mapreduce/bulk/BulkIngestExample.java
b/src/main/java/org/apache/accumulo/examples/mapreduce/bulk/BulkIngestExample.java
index f87a768..a4ffe10 100644
--- a/src/main/java/org/apache/accumulo/examples/mapreduce/bulk/BulkIngestExample.java
+++ b/src/main/java/org/apache/accumulo/examples/mapreduce/bulk/BulkIngestExample.java
@@ -22,7 +22,7 @@ import java.io.PrintStream;
 import java.util.Base64;
 import java.util.Collection;
 
-import org.apache.accumulo.core.client.ConnectionInfo;
+import org.apache.accumulo.core.client.ClientInfo;
 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.mapreduce.AccumuloFileOutputFormat;
 import org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat;
@@ -120,9 +120,9 @@ public class BulkIngestExample extends Configured implements Tool {
       job.setReducerClass(ReduceClass.class);
       job.setOutputFormatClass(AccumuloFileOutputFormat.class);
 
-      Connector connector = Connector.builder().usingProperties("conf/accumulo-client.properties").build();
-      ConnectionInfo connectionInfo = Connector.builder().usingProperties("conf/accumulo-client.properties").info();
-      AccumuloInputFormat.setConnectionInfo(job, connectionInfo);
+      ClientInfo info = Connector.builder().usingProperties("conf/accumulo-client.properties").info();
+      Connector connector = Connector.builder().usingClientInfo(info).build();
+      AccumuloInputFormat.setClientInfo(job, info);
       AccumuloInputFormat.setInputTableName(job, SetupTable.tableName);
       AccumuloInputFormat.setScanAuthorizations(job, Authorizations.EMPTY);
       AccumuloOutputFormat.setCreateTables(job, true);
diff --git a/src/main/java/org/apache/accumulo/examples/shard/ContinuousQuery.java b/src/main/java/org/apache/accumulo/examples/shard/ContinuousQuery.java
index cabce7a..1371964 100644
--- a/src/main/java/org/apache/accumulo/examples/shard/ContinuousQuery.java
+++ b/src/main/java/org/apache/accumulo/examples/shard/ContinuousQuery.java
@@ -21,7 +21,6 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.Map.Entry;
 import java.util.Random;
-import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.client.BatchScanner;
 import org.apache.accumulo.core.client.Connector;
@@ -31,8 +30,8 @@ import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.iterators.user.IntersectingIterator;
-import org.apache.accumulo.examples.cli.BatchScannerOpts;
-import org.apache.accumulo.examples.cli.ClientOpts;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.examples.cli.Help;
 import org.apache.hadoop.io.Text;
 
 import com.beust.jcommander.Parameter;
@@ -44,51 +43,50 @@ import com.google.common.collect.Iterators;
  */
 public class ContinuousQuery {
 
-  static class Opts extends ClientOpts {
+  static class Opts extends Help {
+
     @Parameter(names = "--shardTable", required = true, description = "name of the shard
table")
     String tableName = null;
+
     @Parameter(names = "--doc2Term", required = true, description = "name of the doc2Term
table")
     String doc2Term;
+
     @Parameter(names = "--terms", required = true, description = "the number of terms in
the query")
     int numTerms;
+
     @Parameter(names = "--count", description = "the number of queries to run")
     long iterations = Long.MAX_VALUE;
   }
 
   public static void main(String[] args) throws Exception {
     Opts opts = new Opts();
-    BatchScannerOpts bsOpts = new BatchScannerOpts();
-    opts.parseArgs(ContinuousQuery.class.getName(), args, bsOpts);
+    opts.parseArgs(ContinuousQuery.class.getName(), args);
 
-    Connector conn = opts.getConnector();
+    Connector conn = Connector.builder().usingProperties("conf/accumulo-client.properties").build();
 
-    ArrayList<Text[]> randTerms = findRandomTerms(conn.createScanner(opts.doc2Term,
opts.auths), opts.numTerms);
+    ArrayList<Text[]> randTerms = findRandomTerms(conn.createScanner(opts.doc2Term,
Authorizations.EMPTY), opts.numTerms);
 
     Random rand = new Random();
 
-    BatchScanner bs = conn.createBatchScanner(opts.tableName, opts.auths, bsOpts.scanThreads);
-    bs.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS);
+    try (BatchScanner bs = conn.createBatchScanner(opts.tableName, Authorizations.EMPTY,
5)) {
+      for (long i = 0; i < opts.iterations; i += 1) {
+        Text[] columns = randTerms.get(rand.nextInt(randTerms.size()));
 
-    for (long i = 0; i < opts.iterations; i += 1) {
-      Text[] columns = randTerms.get(rand.nextInt(randTerms.size()));
+        bs.clearScanIterators();
+        bs.clearColumns();
 
-      bs.clearScanIterators();
-      bs.clearColumns();
+        IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
+        IntersectingIterator.setColumnFamilies(ii, columns);
+        bs.addScanIterator(ii);
+        bs.setRanges(Collections.singleton(new Range()));
 
-      IteratorSetting ii = new IteratorSetting(20, "ii", IntersectingIterator.class);
-      IntersectingIterator.setColumnFamilies(ii, columns);
-      bs.addScanIterator(ii);
-      bs.setRanges(Collections.singleton(new Range()));
+        long t1 = System.currentTimeMillis();
+        int count = Iterators.size(bs.iterator());
+        long t2 = System.currentTimeMillis();
 
-      long t1 = System.currentTimeMillis();
-      int count = Iterators.size(bs.iterator());
-      long t2 = System.currentTimeMillis();
-
-      System.out.printf("  %s %,d %6.3f%n", Arrays.asList(columns), count, (t2 - t1) / 1000.0);
+        System.out.printf("  %s %,d %6.3f%n", Arrays.asList(columns), count, (t2 - t1) /
1000.0);
+      }
     }
-
-    bs.close();
-
   }
 
   private static ArrayList<Text[]> findRandomTerms(Scanner scanner, int numTerms) {
diff --git a/src/main/java/org/apache/accumulo/examples/shard/Index.java b/src/main/java/org/apache/accumulo/examples/shard/Index.java
index 92c8a12..5b2d67f 100644
--- a/src/main/java/org/apache/accumulo/examples/shard/Index.java
+++ b/src/main/java/org/apache/accumulo/examples/shard/Index.java
@@ -23,10 +23,12 @@ import java.util.HashSet;
 import java.util.List;
 
 import org.apache.accumulo.core.client.BatchWriter;
+import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.examples.cli.BatchWriterOpts;
 import org.apache.accumulo.examples.cli.ClientOnRequiredTable;
+import org.apache.accumulo.examples.cli.Help;
 import org.apache.hadoop.io.Text;
 
 import com.beust.jcommander.Parameter;
@@ -91,25 +93,31 @@ public class Index {
 
   }
 
-  static class Opts extends ClientOnRequiredTable {
+  static class IndexOpts extends Help {
+
+    @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
+    private String tableName;
+
     @Parameter(names = "--partitions", required = true, description = "the number of shards
to create")
     int partitions;
+
     @Parameter(required = true, description = "<file> { <file> ... }")
     List<String> files = new ArrayList<>();
   }
 
   public static void main(String[] args) throws Exception {
-    Opts opts = new Opts();
-    BatchWriterOpts bwOpts = new BatchWriterOpts();
-    opts.parseArgs(Index.class.getName(), args, bwOpts);
+    IndexOpts opts = new IndexOpts();
+    opts.parseArgs(Index.class.getName(), args);
 
     String splitRegex = "\\W+";
 
-    BatchWriter bw = opts.getConnector().createBatchWriter(opts.getTableName(), bwOpts.getBatchWriterConfig());
-    for (String filename : opts.files) {
-      index(opts.partitions, new File(filename), splitRegex, bw);
+    Connector connector = Connector.builder().usingProperties("conf/accumulo-client.properties").build();
+
+    try (BatchWriter bw = connector.createBatchWriter(opts.tableName)) {
+      for (String filename : opts.files) {
+        index(opts.partitions, new File(filename), splitRegex, bw);
+      }
     }
-    bw.close();
   }
 
 }
diff --git a/src/main/java/org/apache/accumulo/examples/shard/Query.java b/src/main/java/org/apache/accumulo/examples/shard/Query.java
index e7fe30a..16f1530 100644
--- a/src/main/java/org/apache/accumulo/examples/shard/Query.java
+++ b/src/main/java/org/apache/accumulo/examples/shard/Query.java
@@ -20,7 +20,6 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map.Entry;
-import java.util.concurrent.TimeUnit;
 
 import org.apache.accumulo.core.client.BatchScanner;
 import org.apache.accumulo.core.client.Connector;
@@ -30,8 +29,8 @@ import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
 import org.apache.accumulo.core.iterators.user.IntersectingIterator;
-import org.apache.accumulo.examples.cli.BatchScannerOpts;
-import org.apache.accumulo.examples.cli.ClientOnRequiredTable;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.examples.cli.Help;
 import org.apache.hadoop.io.Text;
 
 import com.beust.jcommander.Parameter;
@@ -41,10 +40,14 @@ import com.beust.jcommander.Parameter;
  */
 public class Query {
 
-  static class Opts extends ClientOnRequiredTable {
+  static class QueryOpts extends Help {
+
     @Parameter(description = " term { <term> ... }")
     List<String> terms = new ArrayList<>();
 
+    @Parameter(names = {"-t", "--table"}, required = true, description = "table to use")
+    private String tableName;
+
     @Parameter(names = {"--sample"}, description = "Do queries against sample, useful when
sample is built using column qualifier")
     private boolean useSample = false;
 
@@ -81,21 +84,21 @@ public class Query {
   }
 
   public static void main(String[] args) throws Exception {
-    Opts opts = new Opts();
-    BatchScannerOpts bsOpts = new BatchScannerOpts();
-    opts.parseArgs(Query.class.getName(), args, bsOpts);
-    Connector conn = opts.getConnector();
-    BatchScanner bs = conn.createBatchScanner(opts.getTableName(), opts.auths, bsOpts.scanThreads);
-    bs.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS);
-    if (opts.useSample) {
-      SamplerConfiguration samplerConfig = conn.tableOperations().getSamplerConfiguration(opts.getTableName());
-      CutoffIntersectingIterator.validateSamplerConfig(conn.tableOperations().getSamplerConfiguration(opts.getTableName()));
-      bs.setSamplerConfiguration(samplerConfig);
-    }
-    for (String entry : query(bs, opts.terms, opts.sampleCutoff))
-      System.out.println("  " + entry);
+    QueryOpts opts = new QueryOpts();
+    opts.parseArgs(Query.class.getName(), args);
 
-    bs.close();
-  }
+    Connector conn = Connector.builder().usingProperties("conf/accumulo-client.properties")
+        .build();
 
+    try (BatchScanner bs = conn.createBatchScanner(opts.tableName, Authorizations.EMPTY,
10)) {
+      if (opts.useSample) {
+        SamplerConfiguration samplerConfig = conn.tableOperations().getSamplerConfiguration(opts.tableName);
+        CutoffIntersectingIterator.validateSamplerConfig(conn.tableOperations().getSamplerConfiguration(opts.tableName));
+        bs.setSamplerConfiguration(samplerConfig);
+      }
+      for (String entry : query(bs, opts.terms, opts.sampleCutoff)) {
+        System.out.println("  " + entry);
+      }
+    }
+  }
 }
diff --git a/src/main/java/org/apache/accumulo/examples/shard/Reverse.java b/src/main/java/org/apache/accumulo/examples/shard/Reverse.java
index 26e7f38..19ef6a4 100644
--- a/src/main/java/org/apache/accumulo/examples/shard/Reverse.java
+++ b/src/main/java/org/apache/accumulo/examples/shard/Reverse.java
@@ -24,8 +24,10 @@ import org.apache.accumulo.core.client.Scanner;
 import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.examples.cli.BatchWriterOpts;
 import org.apache.accumulo.examples.cli.ClientOpts;
+import org.apache.accumulo.examples.cli.Help;
 import org.apache.accumulo.examples.cli.ScannerOpts;
 import org.apache.hadoop.io.Text;
 
@@ -37,33 +39,29 @@ import com.beust.jcommander.Parameter;
  */
 public class Reverse {
 
-  static class Opts extends ClientOpts {
+  static class Opts extends Help {
+
     @Parameter(names = "--shardTable")
     String shardTable = "shard";
+
     @Parameter(names = "--doc2Term")
     String doc2TermTable = "doc2Term";
   }
 
   public static void main(String[] args) throws Exception {
     Opts opts = new Opts();
-    ScannerOpts scanOpts = new ScannerOpts();
-    BatchWriterOpts bwOpts = new BatchWriterOpts();
-    opts.parseArgs(Reverse.class.getName(), args, scanOpts, bwOpts);
-
-    Connector conn = opts.getConnector();
+    opts.parseArgs(Reverse.class.getName(), args);
 
-    Scanner scanner = conn.createScanner(opts.shardTable, opts.auths);
-    scanner.setBatchSize(scanOpts.scanBatchSize);
-    BatchWriter bw = conn.createBatchWriter(opts.doc2TermTable, bwOpts.getBatchWriterConfig());
+    Connector conn = Connector.builder().usingProperties("conf/accumulo-client.properties").build();
 
-    for (Entry<Key,Value> entry : scanner) {
-      Key key = entry.getKey();
-      Mutation m = new Mutation(key.getColumnQualifier());
-      m.put(key.getColumnFamily(), new Text(), new Value(new byte[0]));
-      bw.addMutation(m);
+    try (Scanner scanner = conn.createScanner(opts.shardTable, Authorizations.EMPTY);
+         BatchWriter bw = conn.createBatchWriter(opts.doc2TermTable)) {
+      for (Entry<Key, Value> entry : scanner) {
+        Key key = entry.getKey();
+        Mutation m = new Mutation(key.getColumnQualifier());
+        m.put(key.getColumnFamily(), new Text(), new Value(new byte[0]));
+        bw.addMutation(m);
+      }
     }
-
-    bw.close();
-
   }
 }
diff --git a/src/test/java/org/apache/accumulo/examples/ExamplesIT.java b/src/test/java/org/apache/accumulo/examples/ExamplesIT.java
index 0087eb4..022ed80 100644
--- a/src/test/java/org/apache/accumulo/examples/ExamplesIT.java
+++ b/src/test/java/org/apache/accumulo/examples/ExamplesIT.java
@@ -129,9 +129,9 @@ public class ExamplesIT extends AccumuloClusterHarness {
   @Before
   public void getClusterInfo() throws Exception {
     c = getConnector();
-    String user = getAdminPrincipal();
-    String instance = c.getInstance().getInstanceName();
-    String keepers = c.getInstance().getZooKeepers();
+    String user = c.info().getPrincipal();
+    String instance = c.info().getInstanceName();
+    String keepers = c.info().getZooKeepers();
     AuthenticationToken token = getAdminToken();
     if (token instanceof PasswordToken) {
       String passwd = new String(((PasswordToken) getAdminToken()).getPassword(), UTF_8);

-- 
To stop receiving notification emails like this one, please contact
mwalch@apache.org.

Mime
View raw message