incubator-accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ktur...@apache.org
Subject svn commit: r1242064 - in /incubator/accumulo/trunk: ./ src/core/ src/core/src/main/java/org/apache/accumulo/core/client/ src/examples/wikisearch/ src/examples/wikisearch/ingest/ src/examples/wikisearch/ingest/bin/ src/examples/wikisearch/ingest/src/ma...
Date Wed, 08 Feb 2012 20:05:58 GMT
Author: kturner
Date: Wed Feb  8 20:05:58 2012
New Revision: 1242064

URL: http://svn.apache.org/viewvc?rev=1242064&view=rev
Log:
ACCUMULO-383 ACCUMULO-375 merged from 1.4

Modified:
    incubator/accumulo/trunk/   (props changed)
    incubator/accumulo/trunk/src/core/   (props changed)
    incubator/accumulo/trunk/src/core/src/main/java/org/apache/accumulo/core/client/TableDeletedException.java
    incubator/accumulo/trunk/src/examples/wikisearch/README
    incubator/accumulo/trunk/src/examples/wikisearch/README.parallel
    incubator/accumulo/trunk/src/examples/wikisearch/ingest/bin/ingest.sh
    incubator/accumulo/trunk/src/examples/wikisearch/ingest/bin/ingest_parallel.sh
    incubator/accumulo/trunk/src/examples/wikisearch/ingest/pom.xml
    incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java
    incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
    incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
    incubator/accumulo/trunk/src/server/   (props changed)

Propchange: incubator/accumulo/trunk/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Feb  8 20:05:58 2012
@@ -1,3 +1,3 @@
 /incubator/accumulo/branches/1.3:1190280,1190413,1190420,1190427,1190500,1195622,1195625,1195629,1195635,1196044,1196054,1196057,1196071-1196072,1196106,1197066,1198935,1199383,1203683,1204625,1205547,1205880,1206169,1208031,1209124,1209526,1209532,1209539,1209541,1209587,1209657,1210518,1210571,1210596,1210598,1213424,1214320,1225006,1227215,1227231,1227611,1228195,1230180,1230736,1231043,1236873
 /incubator/accumulo/branches/1.3.5rc:1209938
-/incubator/accumulo/branches/1.4:1201902-1241620
+/incubator/accumulo/branches/1.4:1201902-1242061

Propchange: incubator/accumulo/trunk/src/core/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Feb  8 20:05:58 2012
@@ -1,3 +1,3 @@
 /incubator/accumulo/branches/1.3.5rc/src/core:1209938
 /incubator/accumulo/branches/1.3/src/core:1190280,1190413,1190420,1190427,1190500,1195622,1195625,1195629,1195635,1196044,1196054,1196057,1196071-1196072,1196106,1197066,1198935,1199383,1203683,1204625,1205547,1205880,1206169,1208031,1209124,1209526,1209532,1209539,1209541,1209587,1209657,1210518,1210571,1210596,1210598,1213424,1214320,1225006,1227215
-/incubator/accumulo/branches/1.4/src/core:1201902-1241620
+/incubator/accumulo/branches/1.4/src/core:1201902-1242061

Modified: incubator/accumulo/trunk/src/core/src/main/java/org/apache/accumulo/core/client/TableDeletedException.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/core/src/main/java/org/apache/accumulo/core/client/TableDeletedException.java?rev=1242064&r1=1242063&r2=1242064&view=diff
==============================================================================
--- incubator/accumulo/trunk/src/core/src/main/java/org/apache/accumulo/core/client/TableDeletedException.java
(original)
+++ incubator/accumulo/trunk/src/core/src/main/java/org/apache/accumulo/core/client/TableDeletedException.java
Wed Feb  8 20:05:58 2012
@@ -29,6 +29,7 @@ public class TableDeletedException exten
   private String tableId;
   
   public TableDeletedException(String tableId) {
+    super("Table ID " + tableId + " was deleted");
     this.tableId = tableId;
   }
   

Modified: incubator/accumulo/trunk/src/examples/wikisearch/README
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/wikisearch/README?rev=1242064&r1=1242063&r2=1242064&view=diff
==============================================================================
--- incubator/accumulo/trunk/src/examples/wikisearch/README (original)
+++ incubator/accumulo/trunk/src/examples/wikisearch/README Wed Feb  8 20:05:58 2012
@@ -8,8 +8,7 @@
  	Prerequisites
  	-------------
  	1. Accumulo, Hadoop, and ZooKeeper must be installed and running
- 	2. ACCUMULO_HOME and ZOOKEEPER_HOME must be defined in the environment
- 	3. One or more wikipedia dump files (http://dumps.wikimedia.org/backup-index.html) placed
in an HDFS directory.
+ 	2. One or more wikipedia dump files (http://dumps.wikimedia.org/backup-index.html) placed
in an HDFS directory.
 	   You will want to grab the files with the link name of pages-articles.xml.bz2
  
  

Modified: incubator/accumulo/trunk/src/examples/wikisearch/README.parallel
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/wikisearch/README.parallel?rev=1242064&r1=1242063&r2=1242064&view=diff
==============================================================================
--- incubator/accumulo/trunk/src/examples/wikisearch/README.parallel (original)
+++ incubator/accumulo/trunk/src/examples/wikisearch/README.parallel Wed Feb  8 20:05:58 2012
@@ -8,8 +8,7 @@
  	Prerequisites
  	-------------
  	1. Accumulo, Hadoop, and ZooKeeper must be installed and running
- 	2. ACCUMULO_HOME and ZOOKEEPER_HOME must be defined in the environment
- 	3. One or more wikipedia dump files (http://dumps.wikimedia.org/backup-index.html) placed
in an HDFS directory.
+ 	2. One or more wikipedia dump files (http://dumps.wikimedia.org/backup-index.html) placed
in an HDFS directory.
 	     You will want to grab the files with the link name of pages-articles.xml.bz2
  
  

Modified: incubator/accumulo/trunk/src/examples/wikisearch/ingest/bin/ingest.sh
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/wikisearch/ingest/bin/ingest.sh?rev=1242064&r1=1242063&r2=1242064&view=diff
==============================================================================
--- incubator/accumulo/trunk/src/examples/wikisearch/ingest/bin/ingest.sh (original)
+++ incubator/accumulo/trunk/src/examples/wikisearch/ingest/bin/ingest.sh Wed Feb  8 20:05:58
2012
@@ -22,34 +22,6 @@ SCRIPT_DIR="${THIS_SCRIPT%/*}"
 SCRIPT_DIR=`cd $SCRIPT_DIR ; pwd`
 echo $SCRIPT_DIR
 
-ACCUMULO_HOME=${ACCUMULO_HOME}
-ZOOKEEPER_HOME=${ZOOKEEPER_HOME}
-
-#
-# Check ZOOKEEPER_HOME
-#
-if [[ -z $ZOOKEEPER_HOME ]]; then
-	echo "You must set ZOOKEEPER_HOME environment variable"
-	exit -1;
-else
-	for f in $ZOOKEEPER_HOME/zookeeper-*.jar; do
-		CLASSPATH=$f
-		break
-	done	
-fi
-
-#
-# Check ACCUMULO_HOME
-#
-if [[ -z $ACCUMULO_HOME ]]; then
-	echo "You must set ACCUMULO_HOME environment variable"
-	exit -1;
-else
-	for f in $ACCUMULO_HOME/lib/*.jar; do
-		CLASSPATH=${CLASSPATH}:$f
-	done	
-fi
-
 #
 # Add our jars
 #
@@ -60,7 +32,7 @@ done
 #
 # Transform the classpath into a comma-separated list also
 #
-LIBJARS=`echo $CLASSPATH | sed 's/:/,/g'`
+LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 
 
 #

Modified: incubator/accumulo/trunk/src/examples/wikisearch/ingest/bin/ingest_parallel.sh
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/wikisearch/ingest/bin/ingest_parallel.sh?rev=1242064&r1=1242063&r2=1242064&view=diff
==============================================================================
--- incubator/accumulo/trunk/src/examples/wikisearch/ingest/bin/ingest_parallel.sh (original)
+++ incubator/accumulo/trunk/src/examples/wikisearch/ingest/bin/ingest_parallel.sh Wed Feb
 8 20:05:58 2012
@@ -22,34 +22,6 @@ SCRIPT_DIR="${THIS_SCRIPT%/*}"
 SCRIPT_DIR=`cd $SCRIPT_DIR ; pwd`
 echo $SCRIPT_DIR
 
-ACCUMULO_HOME=${ACCUMULO_HOME}
-ZOOKEEPER_HOME=${ZOOKEEPER_HOME}
-
-#
-# Check ZOOKEEPER_HOME
-#
-if [[ -z $ZOOKEEPER_HOME ]]; then
-	echo "You must set ZOOKEEPER_HOME environment variable"
-	exit -1;
-else
-	for f in $ZOOKEEPER_HOME/zookeeper-*.jar; do
-		CLASSPATH=$f
-		break
-	done	
-fi
-
-#
-# Check ACCUMULO_HOME
-#
-if [[ -z $ACCUMULO_HOME ]]; then
-	echo "You must set ACCUMULO_HOME environment variable"
-	exit -1;
-else
-	for f in $ACCUMULO_HOME/lib/*.jar; do
-		CLASSPATH=${CLASSPATH}:$f
-	done	
-fi
-
 #
 # Add our jars
 #
@@ -60,7 +32,7 @@ done
 #
 # Transform the classpath into a comma-separated list also
 #
-LIBJARS=`echo $CLASSPATH | sed 's/:/,/g'`
+LIBJARS=`echo $CLASSPATH | sed 's/^://' | sed 's/:/,/g'`
 
 
 #

Modified: incubator/accumulo/trunk/src/examples/wikisearch/ingest/pom.xml
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/wikisearch/ingest/pom.xml?rev=1242064&r1=1242063&r2=1242064&view=diff
==============================================================================
--- incubator/accumulo/trunk/src/examples/wikisearch/ingest/pom.xml (original)
+++ incubator/accumulo/trunk/src/examples/wikisearch/ingest/pom.xml Wed Feb  8 20:05:58 2012
@@ -76,6 +76,16 @@
     	<groupId>com.sun.jersey</groupId>
     	<artifactId>jersey-server</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.accumulo</groupId>
+      <artifactId>cloudtrace</artifactId>
+      <scope>runtime</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.thrift</groupId>
+      <artifactId>libthrift</artifactId>
+      <scope>runtime</scope>
+    </dependency>
   </dependencies>
 
   <build>
@@ -93,8 +103,8 @@
             <configuration>
               <outputDirectory>lib</outputDirectory>
               <!-- just grab the non-provided runtime dependencies -->
-              <includeArtifactIds>commons-lang,google-collections,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java</includeArtifactIds>
-              <excludeTransitive>true</excludeTransitive>
+              <includeArtifactIds>commons-lang,google-collections,lucene-core,lucene-analyzers,lucene-wikipedia,protobuf-java,accumulo-core,hadoop-core,libthrift,cloudtrace,zookeeper</includeArtifactIds>
+              <excludeTransitive>false</excludeTransitive>
             </configuration>
           </execution>
         </executions>

Modified: incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java?rev=1242064&r1=1242063&r2=1242064&view=diff
==============================================================================
--- incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java
(original)
+++ incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/ArticleExtractor.java
Wed Feb  8 20:05:58 2012
@@ -32,6 +32,7 @@ import javax.xml.stream.XMLStreamReader;
 
 import org.apache.accumulo.examples.wikisearch.normalizer.LcNoDiacriticsNormalizer;
 import org.apache.accumulo.examples.wikisearch.normalizer.NumberNormalizer;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
 
@@ -100,28 +101,37 @@ public class ArticleExtractor {
     @Override
     public void readFields(DataInput in) throws IOException {
       id = in.readInt();
-      title = in.readUTF();
+      Text foo = new Text();
+      foo.readFields(in);
+      title = foo.toString();
       timestamp = in.readLong();
-      comments = in.readUTF();
-      text = in.readUTF();
+      foo.readFields(in);
+      comments = foo.toString();
+      foo.readFields(in);
+      text = foo.toString();
     }
 
     @Override
     public void write(DataOutput out) throws IOException {
       out.writeInt(id);
-      out.writeUTF(title);
+      (new Text(title)).write(out);
       out.writeLong(timestamp);
-      out.writeUTF(comments);
-      out.writeUTF(text);
+      (new Text(comments)).write(out);
+      (new Text(text)).write(out);
     }
     
   }
   
   public ArticleExtractor() {}
   
-  public Article extract(Reader reader) {
-    XMLInputFactory xmlif = XMLInputFactory.newInstance();
+  private static XMLInputFactory xmlif = XMLInputFactory.newInstance();
+
+  static
+  {
     xmlif.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
+  }
+  
+  public Article extract(Reader reader) {
     
     XMLStreamReader xmlr = null;
     

Modified: incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java?rev=1242064&r1=1242063&r2=1242064&view=diff
==============================================================================
--- incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
(original)
+++ incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputFormat.java
Wed Feb  8 20:05:58 2012
@@ -116,11 +116,11 @@ public class WikipediaInputFormat extend
     
     int numGroups = WikipediaConfiguration.getNumGroups(job.getConfiguration());
 
-    for(InputSplit split:superSplits)
+    for(int group = 0; group < numGroups; group++)
     {
-      FileSplit fileSplit = (FileSplit)split;
-      for(int group = 0; group < numGroups; group++)
+      for(InputSplit split:superSplits)
       {
+        FileSplit fileSplit = (FileSplit)split;
         splits.add(new WikipediaInputSplit(fileSplit,group));
       }
     }

Modified: incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java?rev=1242064&r1=1242063&r2=1242064&view=diff
==============================================================================
--- incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
(original)
+++ incubator/accumulo/trunk/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
Wed Feb  8 20:05:58 2012
@@ -50,6 +50,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
@@ -173,9 +174,13 @@ public class WikipediaPartitionedIngeste
     // setup output format
     partitionerJob.setMapOutputKeyClass(Text.class);
     partitionerJob.setMapOutputValueClass(Article.class);
+    partitionerJob.setOutputKeyClass(Text.class);
+    partitionerJob.setOutputValueClass(Article.class);
     partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class);
     Path outputDir = WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf);
     SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir);
+    SequenceFileOutputFormat.setCompressOutput(partitionerJob, true);
+    SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD);
     
     return partitionerJob.waitForCompletion(true) ? 0 : 1;
   }
@@ -186,6 +191,8 @@ public class WikipediaPartitionedIngeste
     Configuration ingestConf = ingestJob.getConfiguration();
     ingestConf.set("mapred.map.tasks.speculative.execution", "false");
 
+    configureIngestJob(ingestJob);
+    
     String tablename = WikipediaConfiguration.getTableName(ingestConf);
     
     String zookeepers = WikipediaConfiguration.getZookeepers(ingestConf);
@@ -199,9 +206,13 @@ public class WikipediaPartitionedIngeste
     
     createTables(tops, tablename);
     
+    ingestJob.setMapperClass(WikipediaPartitionedMapper.class);
+    ingestJob.setNumReduceTasks(0);
+    
     // setup input format
     ingestJob.setInputFormatClass(SequenceFileInputFormat.class);
     SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
+    SequenceFileInputFormat.setMinInputSplitSize(ingestJob, 1l << 28);
 
     // setup output format
     ingestJob.setMapOutputKeyClass(Text.class);
@@ -227,6 +238,11 @@ public class WikipediaPartitionedIngeste
     conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
     conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
   }
+
+  protected void configureIngestJob(Job job) {
+    job.setJarByClass(WikipediaPartitionedIngester.class);
+    job.setInputFormatClass(WikipediaInputFormat.class);
+  }
   
   protected static final Pattern filePattern = Pattern.compile("([a-z_]+).*.xml(.bz2)?");
   

Propchange: incubator/accumulo/trunk/src/server/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Feb  8 20:05:58 2012
@@ -1,3 +1,3 @@
 /incubator/accumulo/branches/1.3.5rc/src/server:1209938
 /incubator/accumulo/branches/1.3/src/server:1190280,1190413,1190420,1190427,1190500,1195622,1195625,1195629,1195635,1196044,1196054,1196057,1196071-1196072,1196106,1197066,1198935,1199383,1203683,1204625,1205547,1205880,1206169,1208031,1209124,1209526,1209532,1209539,1209541,1209587,1209657,1210518,1210571,1210596,1210598,1213424,1214320,1225006,1227215,1227231,1227611
-/incubator/accumulo/branches/1.4/src/server:1201902-1241620
+/incubator/accumulo/branches/1.4/src/server:1201902-1242061



Mime
View raw message