hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r648422 - in /hadoop/hbase/trunk: ./ src/java/org/apache/hadoop/hbase/mapred/
Date Tue, 15 Apr 2008 21:39:22 GMT
Author: stack
Date: Tue Apr 15 14:39:20 2008
New Revision: 648422

URL: http://svn.apache.org/viewvc?rev=648422&view=rev
Log:
HBASE-559 MR example job to count table rows

Added:
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/Driver.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties
Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/build.xml
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/package-info.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=648422&r1=648421&r2=648422&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Tue Apr 15 14:39:20 2008
@@ -7,6 +7,10 @@
    HBASE-12    When hbase regionserver restarts, it says "impossible state for
                createLease()"
 
+  IMPROVEMENTS
+   HBASE-559   MR example job to count table rows
+
+
 Release 0.1.1 - 04/11/2008
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hbase/trunk/build.xml
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/build.xml?rev=648422&r1=648421&r2=648422&view=diff
==============================================================================
--- hadoop/hbase/trunk/build.xml (original)
+++ hadoop/hbase/trunk/build.xml Tue Apr 15 14:39:20 2008
@@ -148,12 +148,20 @@
    </javac>
   </target>
 	
-  <!-- Override jar target to specify main class -->
   <target name="jar" depends="compile">
+    <!--Copy over any properties under src-->
+    <copy todir="${build.classes}">
+      <fileset dir="${src.dir}">
+        <include name="**/*.properties" />
+      </fileset>
+    </copy>
     <jar jarfile="${build.dir}/${final.name}.jar"
         basedir="${build.classes}" >
       <fileset file="${basedir}/conf/hbase-default.xml"/>
       <zipfileset dir="${build.webapps}" prefix="webapps"/>
+   		<manifest>
+            <attribute name="Main-Class" value="org/apache/hadoop/hbase/mapred/Driver"
/>
+    	</manifest>
     </jar>
   </target>
 

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/Driver.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/Driver.java?rev=648422&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/Driver.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/Driver.java Tue Apr 15 14:39:20
2008
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import org.apache.hadoop.util.ProgramDriver;
+
+/**
+ * Driver for hbase mapreduce jobs. Select which to run by passing
+ * name of job to this main.
+ */
+public class Driver {
+  /**
+   * @param args
+   * @throws Throwable 
+   */
+  public static void main(String[] args) throws Throwable {
+    ProgramDriver pgd = new ProgramDriver();
+    pgd.addClass(RowCounter.NAME, RowCounter.class,
+      "Count rows in HBase table");
+    pgd.driver(args);
+  }
+}

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter.java?rev=648422&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter.java Tue Apr 15
14:39:20 2008
@@ -0,0 +1,126 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.mapred;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.io.Cell;
+import org.apache.hadoop.hbase.io.RowResult;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.IdentityReducer;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * A job with a map to count rows.
+ * Map outputs table rows IF the input row has columns that have content.  
+ * Uses an {@link IdentityReducer}
+ */
+public class RowCounter extends TableMap<Text, RowResult> implements Tool {
+  /* Name of this 'program'
+   */
+  static final String NAME = "rowcounter";
+  
+  private Configuration conf;
+  private final RowResult EMPTY_RESULT_VALUE = new RowResult();
+  private static enum Counters {ROWS}
+  
+  @Override
+  public void map(Text row, RowResult value,
+    OutputCollector<Text, RowResult> output,
+    @SuppressWarnings("unused") Reporter reporter)
+  throws IOException {
+    boolean content = false;
+    for (Map.Entry<Text, Cell> e: value.entrySet()) {
+      Cell cell = e.getValue();
+      if (cell != null && cell.getValue().length > 0) {
+        content = true;
+        break;
+      }
+    }
+    if (!content) {
+      return;
+    }
+    // Give out same value every time.  We're only interested in the row/key
+    reporter.incrCounter(Counters.ROWS, 1);
+    output.collect(row, EMPTY_RESULT_VALUE);
+  }
+
+  @SuppressWarnings({ "unused", "deprecation" })
+  public JobConf createSubmittableJob(String[] args) throws IOException {
+    JobConf c = new JobConf(getConf(), RowCounter.class);
+    c.setJobName(NAME);
+    // Columns are space delimited
+    StringBuilder sb = new StringBuilder();
+    final int columnoffset = 2;
+    for (int i = columnoffset; i < args.length; i++) {
+      if (i > columnoffset) {
+        sb.append(" ");
+      }
+      sb.append(args[i]);
+    }
+    // Second argument is the table name.
+    TableMap.initJob(args[1], sb.toString(), this.getClass(), Text.class,
+      RowResult.class, c);
+    c.setReducerClass(IdentityReducer.class);
+    // First arg is the output directory.
+    c.setOutputPath(new Path(args[0]));
+    return c;
+  }
+  
+  static int printUsage() {
+    System.out.println(NAME +
+      " <outputdir> <tablename> <column1> [<column2>...]");
+    return -1;
+  }
+  
+  public int run(final String[] args) throws Exception {
+    // Make sure there are at least 3 parameters
+    if (args.length < 3) {
+      System.err.println("ERROR: Wrong number of parameters: " + args.length);
+      return printUsage();
+    }
+    JobClient.runJob(createSubmittableJob(args));
+    return 0;
+  }
+
+  public Configuration getConf() {
+    return this.conf;
+  }
+
+  public void setConf(final Configuration c) {
+    this.conf = c;
+  }
+
+  public static void main(String[] args) throws Exception {
+    HBaseConfiguration c = new HBaseConfiguration();
+    c.set("hbase.master", args[0]);
+    int errCode = ToolRunner.run(c, new RowCounter(), args);
+    System.exit(errCode);
+  }
+}
\ No newline at end of file

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties?rev=648422&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties
(added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties
Tue Apr 15 14:39:20 2008
@@ -0,0 +1,6 @@
+
+# ResourceBundle properties file for RowCounter MR job
+
+CounterGroupName=         RowCounter
+
+ROWS.name=                Rows

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/package-info.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/package-info.java?rev=648422&r1=648421&r2=648422&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/package-info.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/package-info.java Tue Apr 15
14:39:20 2008
@@ -75,7 +75,15 @@
 reducers so load is spread across the hbase cluster.
 </p>
 
-<h2> Sample MR Bulk Uploader </h2>
+<h2>Example Code</h2>
+<h3>Sample Row Counter</h3>
+<p>See {@link org.apache.hadoop.hbase.mapred.RowCounter}.  You should be able to run
+it by doing: <code>% ./bin/hadoop jar hbase-X.X.X.jar</code>.  This will invoke
+the hbase MapReduce Driver class.  Select 'rowcounter' from the choice of jobs
+offered.
+</p>
+
+<h3> Sample MR Bulk Uploader </h3>
 <p>Read the class comment below for specification of inputs, prerequisites, etc.
 </p>
 <blockquote><pre>package org.apache.hadoop.hbase.mapred;



Mime
View raw message