accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vi...@apache.org
Subject git commit: ACCUMULO-1458 - Sean Hickey's patch for sample using Password token
Date Fri, 01 Nov 2013 22:56:13 GMT
Updated Branches:
  refs/heads/master b64149d78 -> c8999e693


ACCUMULO-1458 - Sean Hickey's patch for sample using Password token

Signed-off-by: John Vines <jvines@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/c8999e69
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/c8999e69
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/c8999e69

Branch: refs/heads/master
Commit: c8999e6937ad0af8de4ba928ebab33c55e3d0567
Parents: b64149d
Author: John Vines <jvines@gmail.com>
Authored: Thu Oct 31 17:39:16 2013 -0400
Committer: John Vines <jvines@gmail.com>
Committed: Fri Nov 1 18:28:31 2013 -0400

----------------------------------------------------------------------
 .../simple/mapreduce/TokenFileWordCount.java    | 97 ++++++++++++++++++++
 .../main/resources/docs/examples/README.mapred  | 59 +++++++++++-
 2 files changed, 155 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8999e69/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java
----------------------------------------------------------------------
diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java
b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java
new file mode 100644
index 0000000..15a7765
--- /dev/null
+++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.accumulo.examples.simple.mapreduce;
+
+import java.io.IOException;
+
+import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
+import org.apache.accumulo.core.data.Mutation;
+import org.apache.accumulo.core.data.Value;
+import org.apache.accumulo.core.util.CachedConfiguration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A simple map reduce job that inserts word counts into accumulo. See the README for instructions
on how to run this. This version does not use the ClientOpts
+ * class to parse arguments as an example of using AccumuloInputFormat and AccumuloOutputFormat
directly. See README.mapred for more details.
+ * 
+ */
+public class TokenFileWordCount extends Configured implements Tool {
+  
+  public static class MapClass extends Mapper<LongWritable,Text,Text,Mutation> {
+    @Override
+    public void map(LongWritable key, Text value, Context output) throws IOException {
+      String[] words = value.toString().split("\\s+");
+      
+      for (String word : words) {
+        
+        Mutation mutation = new Mutation(new Text(word));
+        mutation.put(new Text("count"), new Text("20080906"), new Value("1".getBytes()));
+        
+        try {
+          output.write(null, mutation);
+        } catch (InterruptedException e) {
+          e.printStackTrace();
+        }
+      }
+    }
+  }
+  
+  public int run(String[] args) throws Exception {
+    
+    String instance = args[0];
+    String zookeepers = args[1];
+    String user = args[2];
+    String tokenFile = args[3];
+    String input = args[4];
+    String tableName = args[5];
+    
+    Job job = new Job(getConf(), TokenFileWordCount.class.getName());
+    job.setJarByClass(this.getClass());
+    
+    job.setInputFormatClass(TextInputFormat.class);
+    TextInputFormat.setInputPaths(job, input);
+    
+    job.setMapperClass(MapClass.class);
+    
+    job.setNumReduceTasks(0);
+    
+    job.setOutputFormatClass(AccumuloOutputFormat.class);
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(Mutation.class);
+    
+    // AccumuloInputFormat not used here, but it uses the same functions.
+    AccumuloOutputFormat.setZooKeeperInstance(job, instance, zookeepers);
+    AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile);
+    AccumuloOutputFormat.setCreateTables(job, true);
+    AccumuloOutputFormat.setDefaultTableName(job, tableName);
+    
+    job.waitForCompletion(true);
+    return 0;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(CachedConfiguration.getInstance(), new TokenFileWordCount(),
args);
+    System.exit(res);
+  }
+}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8999e69/server/monitor/src/main/resources/docs/examples/README.mapred
----------------------------------------------------------------------
diff --git a/server/monitor/src/main/resources/docs/examples/README.mapred b/server/monitor/src/main/resources/docs/examples/README.mapred
index 4acd306..b98140f 100644
--- a/server/monitor/src/main/resources/docs/examples/README.mapred
+++ b/server/monitor/src/main/resources/docs/examples/README.mapred
@@ -50,7 +50,7 @@ for the column family count.
 
 After creating the table, run the word count map reduce job.
 
-    $ bin/tool.sh lib/accumulo-examples-simple.jar org.apache.accumulo.examples.simple.mapreduce.WordCount
-i instance -z zookeepers  --input /user/username/wc wordCount -u username -p password
+    $ bin/tool.sh lib/accumulo-examples-simple.jar org.apache.accumulo.examples.simple.mapreduce.WordCount
-i instance -z zookeepers  --input /user/username/wc -t wordCount -u username -p password
     
     11/02/07 18:20:11 INFO input.FileInputFormat: Total input paths to process : 1
     11/02/07 18:20:12 INFO mapred.JobClient: Running job: job_201102071740_0003
@@ -95,3 +95,60 @@ org.apache.accumulo.examples.simple.mapreduce.UniqueColumns.  This example
 computes the unique set of columns in a table and shows how a map reduce job
 can directly read a tables files from HDFS. 
 
+One more example available is 
+org.apache.accumulo.examples.simple.mapreduce.TokenFileWordCount.
+The TokenFileWordCount example works exactly the same as the WordCount example
+explained above except that it uses a token file rather than giving the 
+password directly to the map-reduce job (this avoids having the password 
+displayed in the job's configuration which is world-readable).
+
+To create a token file, use the create-token utility
+
+  $ ./bin/accumulo create-token
+  
+It defaults to creating a PasswordToken, but you can specify the token class 
+with -tc (requires the fully qualified class name). Based on the token class, 
+it will prompt you for each property required to create the token.
+
+The last value it prompts for is a local filename to save to. If this file
+exists, it will append the new token to the end. Multiple tokens can exist in
+a file, but only the first one for each user will be recognized.
+
+Rather than waiting for the prompts, you can specify some options when calling
+create-token, for example
+
+  $ ./bin/accumulo create-token -u root -p secret -f root.pw
+  
+would create a token file containing a PasswordToken for 
+user 'root' with password 'secret' and saved to 'root.pw'
+
+This local file needs to be uploaded to hdfs to be used with the 
+map-reduce job. For example, if the file were 'root.pw' in the local directory:
+
+  $ hadoop fs -put root.pw root.pw
+  
+This would put 'root.pw' in the user's home directory in hdfs. 
+
+Because the basic WordCount example uses Opts to parse its arguments 
+(which extends ClientOnRequiredTable), you can use a token file with
+the basic WordCount example by calling the same command as explained above
+except replacing the password with the token file (rather than -p, use -tf).
+
+  $ ./bin/tool.sh lib/accumulo-examples-simple.jar org.apache.accumulo.examples.simple.mapreduce.WordCount
-i instance -z zookeepers  --input /user/username/wc -t wordCount -u username -tf tokenfile
+
+In the above examples, username was 'root' and tokenfile was 'root.pw'  
+
+However, if you don't want to use the Opts class to parse arguments,
+the TokenFileWordCount is an example of using the token file manually.
+
+  $ bin/tool.sh lib/accumulo-examples-simple.jar org.apache.accumulo.examples.simple.mapreduce.TokenFileWordCount
instance zookeepers username tokenfile /user/username/wc wordCount
+
+The results should be the same as the WordCount example except that the
+authentication token was not stored in the configuration. It was instead 
+stored in a file that the map-reduce job pulled into the distributed cache.
+(If you ran either of these on the same table right after the 
+WordCount example, then the resulting counts should just double.)
+
+
+
+


Mime
View raw message