nutch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a.@apache.org
Subject svn commit: r1051505 - in /nutch/branches/branch-1.3: CHANGES.txt src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
Date Tue, 21 Dec 2010 14:14:03 GMT
Author: ab
Date: Tue Dec 21 14:14:03 2010
New Revision: 1051505

URL: http://svn.apache.org/viewvc?rev=1051505&view=rev
Log:
NUTCH-939 Added -dir command line option to SolrIndexer.

Modified:
    nutch/branches/branch-1.3/CHANGES.txt
    nutch/branches/branch-1.3/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java

Modified: nutch/branches/branch-1.3/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.3/CHANGES.txt?rev=1051505&r1=1051504&r2=1051505&view=diff
==============================================================================
--- nutch/branches/branch-1.3/CHANGES.txt (original)
+++ nutch/branches/branch-1.3/CHANGES.txt Tue Dec 21 14:14:03 2010
@@ -44,6 +44,8 @@ Release 1.3 - Current Development
 
 * NUTCH-832 Website menu has lots of broken links - in particular the API docs (Alex McLintock
via mattmann)
 
+* NUTCH-939 Added -dir command line option to SolrIndexer (Claudio Martella via ab)
+
 
 Release 1.1 - 2010-06-06
 

Modified: nutch/branches/branch-1.3/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.3/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java?rev=1051505&r1=1051504&r2=1051505&view=diff
==============================================================================
--- nutch/branches/branch-1.3/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java (original)
+++ nutch/branches/branch-1.3/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java Tue
Dec 21 14:14:03 2010
@@ -20,6 +20,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.FileOutputFormat;
@@ -30,6 +31,7 @@ import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.nutch.indexer.IndexerMapReduce;
 import org.apache.nutch.indexer.NutchIndexWriterFactory;
+import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
 import org.apache.nutch.util.TimingUtil;
@@ -92,7 +94,7 @@ public class SolrIndexer extends Configu
 
   public int run(String[] args) throws Exception {
     if (args.length < 4) {
-      System.err.println("Usage: SolrIndexer <solr url> <crawldb> <linkdb>
<segment> ...");
+      System.err.println("Usage: SolrIndexer <solr url> <crawldb> <linkdb>
(<segment> ... | -dir <segments>)");
       return -1;
     }
 
@@ -101,7 +103,18 @@ public class SolrIndexer extends Configu
 
     final List<Path> segments = new ArrayList<Path>();
     for (int i = 3; i < args.length; i++) {
-      segments.add(new Path(args[i]));
+      if (args[i].equals("-dir")) {
+        Path dir = new Path(args[++i]);
+        FileSystem fs = dir.getFileSystem(getConf());
+        FileStatus[] fstats = fs.listStatus(dir,
+                HadoopFSUtil.getPassDirectoriesFilter(fs));
+        Path[] files = HadoopFSUtil.getPaths(fstats);
+        for (Path p : files) {
+          segments.add(p);
+        }
+      } else {
+        segments.add(new Path(args[i]));
+      }
     }
 
     try {



Mime
View raw message