hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sur...@apache.org
Subject svn commit: r1397432 [2/5] - in /hadoop/common/trunk/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/ hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/ h...
Date Fri, 12 Oct 2012 04:35:46 GMT
Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java Fri Oct 12 04:35:42 2012
@@ -1,56 +1,56 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-
-/**
- * Choose a shard for each insert or delete based on document id hashing. Do
- * NOT use this distribution policy when the number of shards changes.
- */
-public class HashingDistributionPolicy implements IDistributionPolicy {
-
-  private int numShards;
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
-   */
-  public void init(Shard[] shards) {
-    numShards = shards.length;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
-   */
-  public int chooseShardForInsert(DocumentID key) {
-    int hashCode = key.hashCode();
-    return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
-   */
-  public int chooseShardForDelete(DocumentID key) {
-    int hashCode = key.hashCode();
-    return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+
+/**
+ * Choose a shard for each insert or delete based on document id hashing. Do
+ * NOT use this distribution policy when the number of shards changes.
+ */
+public class HashingDistributionPolicy implements IDistributionPolicy {
+
+  private int numShards;
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
+   */
+  public void init(Shard[] shards) {
+    numShards = shards.length;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
+   */
+  public int chooseShardForInsert(DocumentID key) {
+    int hashCode = key.hashCode();
+    return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
+   */
+  public int chooseShardForDelete(DocumentID key) {
+    int hashCode = key.hashCode();
+    return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
+  }
+
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java Fri Oct 12 04:35:42 2012
@@ -1,57 +1,57 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * Identity local analysis maps inputs directly into outputs.
- */
-public class IdentityLocalAnalysis implements
-    ILocalAnalysis<DocumentID, DocumentAndOp> {
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
-   */
-  public void map(DocumentID key, DocumentAndOp value,
-      OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
-      throws IOException {
-    output.collect(key, value);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
-   */
-  public void configure(JobConf job) {
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.io.Closeable#close()
-   */
-  public void close() throws IOException {
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * Identity local analysis maps inputs directly into outputs.
+ */
+public class IdentityLocalAnalysis implements
+    ILocalAnalysis<DocumentID, DocumentAndOp> {
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
+   */
+  public void map(DocumentID key, DocumentAndOp value,
+      OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
+      throws IOException {
+    output.collect(key, value);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
+   */
+  public void configure(JobConf job) {
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.io.Closeable#close()
+   */
+  public void close() throws IOException {
+  }
+
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java Fri Oct 12 04:35:42 2012
@@ -1,46 +1,46 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * An InputFormat for LineDoc for plain text files where each line is a doc.
- */
-public class LineDocInputFormat extends
-    FileInputFormat<DocumentID, LineDocTextAndOp> {
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.FileInputFormat#getRecordReader(org.apache.hadoop.mapred.InputSplit, org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.Reporter)
-   */
-  public RecordReader<DocumentID, LineDocTextAndOp> getRecordReader(
-      InputSplit split, JobConf job, Reporter reporter) throws IOException {
-    reporter.setStatus(split.toString());
-    return new LineDocRecordReader(job, (FileSplit) split);
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * An InputFormat for LineDoc for plain text files where each line is a doc.
+ */
+public class LineDocInputFormat extends
+    FileInputFormat<DocumentID, LineDocTextAndOp> {
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.FileInputFormat#getRecordReader(org.apache.hadoop.mapred.InputSplit, org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.Reporter)
+   */
+  public RecordReader<DocumentID, LineDocTextAndOp> getRecordReader(
+      InputSplit split, JobConf job, Reporter reporter) throws IOException {
+    reporter.setStatus(split.toString());
+    return new LineDocRecordReader(job, (FileSplit) split);
+  }
+
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java Fri Oct 12 04:35:42 2012
@@ -1,80 +1,80 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.Term;
-
-/**
- * Convert LineDocTextAndOp to DocumentAndOp as required by ILocalAnalysis.
- */
-public class LineDocLocalAnalysis implements
-    ILocalAnalysis<DocumentID, LineDocTextAndOp> {
-
-  private static String docidFieldName = "id";
-  private static String contentFieldName = "content";
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
-   */
-  public void map(DocumentID key, LineDocTextAndOp value,
-      OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
-      throws IOException {
-
-    DocumentAndOp.Op op = value.getOp();
-    Document doc = null;
-    Term term = null;
-
-    if (op == DocumentAndOp.Op.INSERT || op == DocumentAndOp.Op.UPDATE) {
-      doc = new Document();
-      doc.add(new Field(docidFieldName, key.getText().toString(),
-          Field.Store.YES, Field.Index.UN_TOKENIZED));
-      doc.add(new Field(contentFieldName, value.getText().toString(),
-          Field.Store.NO, Field.Index.TOKENIZED));
-    }
-
-    if (op == DocumentAndOp.Op.DELETE || op == DocumentAndOp.Op.UPDATE) {
-      term = new Term(docidFieldName, key.getText().toString());
-    }
-
-    output.collect(key, new DocumentAndOp(op, doc, term));
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
-   */
-  public void configure(JobConf job) {
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.io.Closeable#close()
-   */
-  public void close() throws IOException {
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
+
+/**
+ * Convert LineDocTextAndOp to DocumentAndOp as required by ILocalAnalysis.
+ */
+public class LineDocLocalAnalysis implements
+    ILocalAnalysis<DocumentID, LineDocTextAndOp> {
+
+  private static String docidFieldName = "id";
+  private static String contentFieldName = "content";
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
+   */
+  public void map(DocumentID key, LineDocTextAndOp value,
+      OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
+      throws IOException {
+
+    DocumentAndOp.Op op = value.getOp();
+    Document doc = null;
+    Term term = null;
+
+    if (op == DocumentAndOp.Op.INSERT || op == DocumentAndOp.Op.UPDATE) {
+      doc = new Document();
+      doc.add(new Field(docidFieldName, key.getText().toString(),
+          Field.Store.YES, Field.Index.UN_TOKENIZED));
+      doc.add(new Field(contentFieldName, value.getText().toString(),
+          Field.Store.NO, Field.Index.TOKENIZED));
+    }
+
+    if (op == DocumentAndOp.Op.DELETE || op == DocumentAndOp.Op.UPDATE) {
+      term = new Term(docidFieldName, key.getText().toString());
+    }
+
+    output.collect(key, new DocumentAndOp(op, doc, term));
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
+   */
+  public void configure(JobConf job) {
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.io.Closeable#close()
+   */
+  public void close() throws IOException {
+  }
+
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java Fri Oct 12 04:35:42 2012
@@ -1,231 +1,231 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.RecordReader;
-
-/**
- * A simple RecordReader for LineDoc for plain text files where each line is a
- * doc. Each line is as follows: documentID<SPACE>op<SPACE>content<EOF>,
- * where op can be "i", "ins" or "insert" for insert, "d", "del" or "delete"
- * for delete, or "u", "upd" or "update" for update.
- */
-public class LineDocRecordReader implements
-    RecordReader<DocumentID, LineDocTextAndOp> {
-  private static final char SPACE = ' ';
-  private static final char EOL = '\n';
-
-  private long start;
-  private long pos;
-  private long end;
-  private BufferedInputStream in;
-  private ByteArrayOutputStream buffer = new ByteArrayOutputStream(256);
-
-  /**
-   * Provide a bridge to get the bytes from the ByteArrayOutputStream without
-   * creating a new byte array.
-   */
-  private static class TextStuffer extends OutputStream {
-    public Text target;
-
-    public void write(int b) {
-      throw new UnsupportedOperationException("write(byte) not supported");
-    }
-
-    public void write(byte[] data, int offset, int len) throws IOException {
-      target.set(data, offset, len);
-    }
-  }
-
-  private TextStuffer bridge = new TextStuffer();
-
-  /**
-   * Constructor
-   * @param job
-   * @param split  
-   * @throws IOException
-   */
-  public LineDocRecordReader(Configuration job, FileSplit split)
-      throws IOException {
-    long start = split.getStart();
-    long end = start + split.getLength();
-    final Path file = split.getPath();
-
-    // open the file and seek to the start of the split
-    FileSystem fs = file.getFileSystem(job);
-    FSDataInputStream fileIn = fs.open(split.getPath());
-    InputStream in = fileIn;
-    boolean skipFirstLine = false;
-    if (start != 0) {
-      skipFirstLine = true; // wait till BufferedInputStream to skip
-      --start;
-      fileIn.seek(start);
-    }
-
-    this.in = new BufferedInputStream(in);
-    if (skipFirstLine) { // skip first line and re-establish "start".
-      start += LineDocRecordReader.readData(this.in, null, EOL);
-    }
-    this.start = start;
-    this.pos = start;
-    this.end = end;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.RecordReader#close()
-   */
-  public void close() throws IOException {
-    in.close();
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.RecordReader#createKey()
-   */
-  public DocumentID createKey() {
-    return new DocumentID();
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.RecordReader#createValue()
-   */
-  public LineDocTextAndOp createValue() {
-    return new LineDocTextAndOp();
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.RecordReader#getPos()
-   */
-  public long getPos() throws IOException {
-    return pos;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.RecordReader#getProgress()
-   */
-  public float getProgress() throws IOException {
-    if (start == end) {
-      return 0.0f;
-    } else {
-      return Math.min(1.0f, (pos - start) / (float) (end - start));
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.mapred.RecordReader#next(java.lang.Object, java.lang.Object)
-   */
-  public synchronized boolean next(DocumentID key, LineDocTextAndOp value)
-      throws IOException {
-    if (pos >= end) {
-      return false;
-    }
-
-    // key is document id, which are bytes until first space
-    if (!readInto(key.getText(), SPACE)) {
-      return false;
-    }
-
-    // read operation: i/d/u, or ins/del/upd, or insert/delete/update
-    Text opText = new Text();
-    if (!readInto(opText, SPACE)) {
-      return false;
-    }
-    String opStr = opText.toString();
-    DocumentAndOp.Op op;
-    if (opStr.equals("i") || opStr.equals("ins") || opStr.equals("insert")) {
-      op = DocumentAndOp.Op.INSERT;
-    } else if (opStr.equals("d") || opStr.equals("del")
-        || opStr.equals("delete")) {
-      op = DocumentAndOp.Op.DELETE;
-    } else if (opStr.equals("u") || opStr.equals("upd")
-        || opStr.equals("update")) {
-      op = DocumentAndOp.Op.UPDATE;
-    } else {
-      // default is insert
-      op = DocumentAndOp.Op.INSERT;
-    }
-    value.setOp(op);
-
-    if (op == DocumentAndOp.Op.DELETE) {
-      return true;
-    } else {
-      // read rest of the line
-      return readInto(value.getText(), EOL);
-    }
-  }
-
-  private boolean readInto(Text text, char delimiter) throws IOException {
-    buffer.reset();
-    long bytesRead = readData(in, buffer, delimiter);
-    if (bytesRead == 0) {
-      return false;
-    }
-    pos += bytesRead;
-    bridge.target = text;
-    buffer.writeTo(bridge);
-    return true;
-  }
-
-  private static long readData(InputStream in, OutputStream out, char delimiter)
-      throws IOException {
-    long bytes = 0;
-    while (true) {
-
-      int b = in.read();
-      if (b == -1) {
-        break;
-      }
-      bytes += 1;
-
-      byte c = (byte) b;
-      if (c == EOL || c == delimiter) {
-        break;
-      }
-
-      if (c == '\r') {
-        in.mark(1);
-        byte nextC = (byte) in.read();
-        if (nextC != EOL || c == delimiter) {
-          in.reset();
-        } else {
-          bytes += 1;
-        }
-        break;
-      }
-
-      if (out != null) {
-        out.write(c);
-      }
-    }
-    return bytes;
-  }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.RecordReader;
+
+/**
+ * A simple RecordReader for LineDoc for plain text files where each line is a
+ * doc. Each line is as follows: documentID<SPACE>op<SPACE>content<EOF>,
+ * where op can be "i", "ins" or "insert" for insert, "d", "del" or "delete"
+ * for delete, or "u", "upd" or "update" for update.
+ */
+public class LineDocRecordReader implements
+    RecordReader<DocumentID, LineDocTextAndOp> {
+  private static final char SPACE = ' ';
+  private static final char EOL = '\n';
+
+  private long start;
+  private long pos;
+  private long end;
+  private BufferedInputStream in;
+  private ByteArrayOutputStream buffer = new ByteArrayOutputStream(256);
+
+  /**
+   * Provide a bridge to get the bytes from the ByteArrayOutputStream without
+   * creating a new byte array.
+   */
+  private static class TextStuffer extends OutputStream {
+    public Text target;
+
+    public void write(int b) {
+      throw new UnsupportedOperationException("write(byte) not supported");
+    }
+
+    public void write(byte[] data, int offset, int len) throws IOException {
+      target.set(data, offset, len);
+    }
+  }
+
+  private TextStuffer bridge = new TextStuffer();
+
+  /**
+   * Constructor
+   * @param job
+   * @param split  
+   * @throws IOException
+   */
+  public LineDocRecordReader(Configuration job, FileSplit split)
+      throws IOException {
+    long start = split.getStart();
+    long end = start + split.getLength();
+    final Path file = split.getPath();
+
+    // open the file and seek to the start of the split
+    FileSystem fs = file.getFileSystem(job);
+    FSDataInputStream fileIn = fs.open(split.getPath());
+    InputStream in = fileIn;
+    boolean skipFirstLine = false;
+    if (start != 0) {
+      skipFirstLine = true; // wait till BufferedInputStream to skip
+      --start;
+      fileIn.seek(start);
+    }
+
+    this.in = new BufferedInputStream(in);
+    if (skipFirstLine) { // skip first line and re-establish "start".
+      start += LineDocRecordReader.readData(this.in, null, EOL);
+    }
+    this.start = start;
+    this.pos = start;
+    this.end = end;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.RecordReader#close()
+   */
+  public void close() throws IOException {
+    in.close();
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.RecordReader#createKey()
+   */
+  public DocumentID createKey() {
+    return new DocumentID();
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.RecordReader#createValue()
+   */
+  public LineDocTextAndOp createValue() {
+    return new LineDocTextAndOp();
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.RecordReader#getPos()
+   */
+  public long getPos() throws IOException {
+    return pos;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.RecordReader#getProgress()
+   */
+  public float getProgress() throws IOException {
+    if (start == end) {
+      return 0.0f;
+    } else {
+      return Math.min(1.0f, (pos - start) / (float) (end - start));
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapred.RecordReader#next(java.lang.Object, java.lang.Object)
+   */
+  public synchronized boolean next(DocumentID key, LineDocTextAndOp value)
+      throws IOException {
+    if (pos >= end) {
+      return false;
+    }
+
+    // key is document id, which are bytes until first space
+    if (!readInto(key.getText(), SPACE)) {
+      return false;
+    }
+
+    // read operation: i/d/u, or ins/del/upd, or insert/delete/update
+    Text opText = new Text();
+    if (!readInto(opText, SPACE)) {
+      return false;
+    }
+    String opStr = opText.toString();
+    DocumentAndOp.Op op;
+    if (opStr.equals("i") || opStr.equals("ins") || opStr.equals("insert")) {
+      op = DocumentAndOp.Op.INSERT;
+    } else if (opStr.equals("d") || opStr.equals("del")
+        || opStr.equals("delete")) {
+      op = DocumentAndOp.Op.DELETE;
+    } else if (opStr.equals("u") || opStr.equals("upd")
+        || opStr.equals("update")) {
+      op = DocumentAndOp.Op.UPDATE;
+    } else {
+      // default is insert
+      op = DocumentAndOp.Op.INSERT;
+    }
+    value.setOp(op);
+
+    if (op == DocumentAndOp.Op.DELETE) {
+      return true;
+    } else {
+      // read rest of the line
+      return readInto(value.getText(), EOL);
+    }
+  }
+
+  private boolean readInto(Text text, char delimiter) throws IOException {
+    buffer.reset();
+    long bytesRead = readData(in, buffer, delimiter);
+    if (bytesRead == 0) {
+      return false;
+    }
+    pos += bytesRead;
+    bridge.target = text;
+    buffer.writeTo(bridge);
+    return true;
+  }
+
+  private static long readData(InputStream in, OutputStream out, char delimiter)
+      throws IOException {
+    long bytes = 0;
+    while (true) {
+
+      int b = in.read();
+      if (b == -1) {
+        break;
+      }
+      bytes += 1;
+
+      byte c = (byte) b;
+      if (c == EOL || c == delimiter) {
+        break;
+      }
+
+      if (c == '\r') {
+        in.mark(1);
+        byte nextC = (byte) in.read();
+        if (nextC != EOL || c == delimiter) {
+          in.reset();
+        } else {
+          bytes += 1;
+        }
+        break;
+      }
+
+      if (out != null) {
+        out.write(c);
+      }
+    }
+    return bytes;
+  }
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java Fri Oct 12 04:35:42 2012
@@ -1,92 +1,92 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-
-/**
- * This class represents an operation. The operation can be an insert, a delete
- * or an update. If the operation is an insert or an update, a (new) document,
- * which is in the form of text, is specified.
- */
-public class LineDocTextAndOp implements Writable {
-  private DocumentAndOp.Op op;
-  private Text doc;
-
-  /**
-   * Constructor
-   */
-  public LineDocTextAndOp() {
-    doc = new Text();
-  }
-
-  /**
-   * Set the type of the operation.
-   * @param op  the type of the operation
-   */
-  public void setOp(DocumentAndOp.Op op) {
-    this.op = op;
-  }
-
-  /**
-   * Get the type of the operation.
-   * @return the type of the operation
-   */
-  public DocumentAndOp.Op getOp() {
-    return op;
-  }
-
-  /**
-   * Get the text that represents a document.
-   * @return the text that represents a document
-   */
-  public Text getText() {
-    return doc;
-  }
-
-  /* (non-Javadoc)
-   * @see java.lang.Object#toString()
-   */
-  public String toString() {
-    return this.getClass().getName() + "[op=" + op + ", text=" + doc + "]";
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
-   */
-  public void write(DataOutput out) throws IOException {
-    throw new IOException(this.getClass().getName()
-        + ".write should never be called");
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
-   */
-  public void readFields(DataInput in) throws IOException {
-    throw new IOException(this.getClass().getName()
-        + ".readFields should never be called");
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class represents an operation. The operation can be an insert, a delete
+ * or an update. If the operation is an insert or an update, a (new) document,
+ * which is in the form of text, is specified.
+ */
+public class LineDocTextAndOp implements Writable {
+  private DocumentAndOp.Op op;
+  private Text doc;
+
+  /**
+   * Constructor
+   */
+  public LineDocTextAndOp() {
+    doc = new Text();
+  }
+
+  /**
+   * Set the type of the operation.
+   * @param op  the type of the operation
+   */
+  public void setOp(DocumentAndOp.Op op) {
+    this.op = op;
+  }
+
+  /**
+   * Get the type of the operation.
+   * @return the type of the operation
+   */
+  public DocumentAndOp.Op getOp() {
+    return op;
+  }
+
+  /**
+   * Get the text that represents a document.
+   * @return the text that represents a document
+   */
+  public Text getText() {
+    return doc;
+  }
+
+  /* (non-Javadoc)
+   * @see java.lang.Object#toString()
+   */
+  public String toString() {
+    return this.getClass().getName() + "[op=" + op + ", text=" + doc + "]";
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
+   */
+  public void write(DataOutput out) throws IOException {
+    throw new IOException(this.getClass().getName()
+        + ".write should never be called");
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
+   */
+  public void readFields(DataInput in) throws IOException {
+    throw new IOException(this.getClass().getName()
+        + ".readFields should never be called");
+  }
+
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java Fri Oct 12 04:35:42 2012
@@ -1,58 +1,58 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-
-/**
- * Choose a shard for each insert in a round-robin fashion. Choose all the
- * shards for each delete because we don't know where it is stored.
- */
-public class RoundRobinDistributionPolicy implements IDistributionPolicy {
-
-  private int numShards;
-  private int rr; // round-robin implementation
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
-   */
-  public void init(Shard[] shards) {
-    numShards = shards.length;
-    rr = 0;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
-   */
-  public int chooseShardForInsert(DocumentID key) {
-    int chosen = rr;
-    rr = (rr + 1) % numShards;
-    return chosen;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
-   */
-  public int chooseShardForDelete(DocumentID key) {
-    // -1 represents all the shards
-    return -1;
-  }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+
+/**
+ * Choose a shard for each insert in a round-robin fashion. Choose all the
+ * shards for each delete because we don't know where it is stored.
+ */
+public class RoundRobinDistributionPolicy implements IDistributionPolicy {
+
+  private int numShards;
+  private int rr; // round-robin implementation
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
+   */
+  public void init(Shard[] shards) {
+    numShards = shards.length;
+    rr = 0;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
+   */
+  public int chooseShardForInsert(DocumentID key) {
+    int chosen = rr;
+    rr = (rr + 1) % numShards;
+    return chosen;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
+   */
+  public int chooseShardForDelete(DocumentID key) {
+    // -1 represents all the shards
+    return -1;
+  }
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java Fri Oct 12 04:35:42 2012
@@ -1,55 +1,55 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.lucene.index.IndexFileNameFilter;
-
-/**
- * A wrapper class to convert an IndexFileNameFilter which implements
- * java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
- */
-class LuceneIndexFileNameFilter implements PathFilter {
-
-  private static final LuceneIndexFileNameFilter singleton =
-      new LuceneIndexFileNameFilter();
-
-  /**
-   * Get a static instance.
-   * @return the static instance
-   */
-  public static LuceneIndexFileNameFilter getFilter() {
-    return singleton;
-  }
-
-  private final IndexFileNameFilter luceneFilter;
-
-  private LuceneIndexFileNameFilter() {
-    luceneFilter = IndexFileNameFilter.getFilter();
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path)
-   */
-  public boolean accept(Path path) {
-    return luceneFilter.accept(null, path.getName());
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.lucene.index.IndexFileNameFilter;
+
+/**
+ * A wrapper class to convert an IndexFileNameFilter which implements
+ * java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
+ */
+class LuceneIndexFileNameFilter implements PathFilter {
+
+  private static final LuceneIndexFileNameFilter singleton =
+      new LuceneIndexFileNameFilter();
+
+  /**
+   * Get a static instance.
+   * @return the static instance
+   */
+  public static LuceneIndexFileNameFilter getFilter() {
+    return singleton;
+  }
+
+  private final IndexFileNameFilter luceneFilter;
+
+  private LuceneIndexFileNameFilter() {
+    luceneFilter = IndexFileNameFilter.getFilter();
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path)
+   */
+  public boolean accept(Path path) {
+    return luceneFilter.accept(null, path.getName());
+  }
+
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java Fri Oct 12 04:35:42 2012
@@ -1,112 +1,112 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.lucene.store.Directory;
-
-/**
- * This class copies some methods from Lucene's SegmentInfos since that class
- * is not public.
- */
-public final class LuceneUtil {
-
-  static final class IndexFileNames {
-    /** Name of the index segment file */
-    static final String SEGMENTS = "segments";
-
-    /** Name of the generation reference file name */
-    static final String SEGMENTS_GEN = "segments.gen";
-  }
-
-  /**
-   * Check if the file is a segments_N file
-   * @param name
-   * @return true if the file is a segments_N file
-   */
-  public static boolean isSegmentsFile(String name) {
-    return name.startsWith(IndexFileNames.SEGMENTS)
-        && !name.equals(IndexFileNames.SEGMENTS_GEN);
-  }
-
-  /**
-   * Check if the file is the segments.gen file
-   * @param name
-   * @return true if the file is the segments.gen file
-   */
-  public static boolean isSegmentsGenFile(String name) {
-    return name.equals(IndexFileNames.SEGMENTS_GEN);
-  }
-
-  /**
-   * Get the generation (N) of the current segments_N file in the directory.
-   * 
-   * @param directory -- directory to search for the latest segments_N file
-   */
-  public static long getCurrentSegmentGeneration(Directory directory)
-      throws IOException {
-    String[] files = directory.list();
-    if (files == null)
-      throw new IOException("cannot read directory " + directory
-          + ": list() returned null");
-    return getCurrentSegmentGeneration(files);
-  }
-
-  /**
-   * Get the generation (N) of the current segments_N file from a list of
-   * files.
-   * 
-   * @param files -- array of file names to check
-   */
-  public static long getCurrentSegmentGeneration(String[] files) {
-    if (files == null) {
-      return -1;
-    }
-    long max = -1;
-    for (int i = 0; i < files.length; i++) {
-      String file = files[i];
-      if (file.startsWith(IndexFileNames.SEGMENTS)
-          && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
-        long gen = generationFromSegmentsFileName(file);
-        if (gen > max) {
-          max = gen;
-        }
-      }
-    }
-    return max;
-  }
-
-  /**
-   * Parse the generation off the segments file name and return it.
-   */
-  public static long generationFromSegmentsFileName(String fileName) {
-    if (fileName.equals(IndexFileNames.SEGMENTS)) {
-      return 0;
-    } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
-      return Long.parseLong(
-          fileName.substring(1 + IndexFileNames.SEGMENTS.length()),
-          Character.MAX_RADIX);
-    } else {
-      throw new IllegalArgumentException("fileName \"" + fileName
-          + "\" is not a segments file");
-    }
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+
+/**
+ * This class copies some methods from Lucene's SegmentInfos since that class
+ * is not public.
+ */
+public final class LuceneUtil {
+
+  static final class IndexFileNames {
+    /** Name of the index segment file */
+    static final String SEGMENTS = "segments";
+
+    /** Name of the generation reference file name */
+    static final String SEGMENTS_GEN = "segments.gen";
+  }
+
+  /**
+   * Check if the file is a segments_N file
+   * @param name
+   * @return true if the file is a segments_N file
+   */
+  public static boolean isSegmentsFile(String name) {
+    return name.startsWith(IndexFileNames.SEGMENTS)
+        && !name.equals(IndexFileNames.SEGMENTS_GEN);
+  }
+
+  /**
+   * Check if the file is the segments.gen file
+   * @param name
+   * @return true if the file is the segments.gen file
+   */
+  public static boolean isSegmentsGenFile(String name) {
+    return name.equals(IndexFileNames.SEGMENTS_GEN);
+  }
+
+  /**
+   * Get the generation (N) of the current segments_N file in the directory.
+   * 
+   * @param directory -- directory to search for the latest segments_N file
+   */
+  public static long getCurrentSegmentGeneration(Directory directory)
+      throws IOException {
+    String[] files = directory.list();
+    if (files == null)
+      throw new IOException("cannot read directory " + directory
+          + ": list() returned null");
+    return getCurrentSegmentGeneration(files);
+  }
+
+  /**
+   * Get the generation (N) of the current segments_N file from a list of
+   * files.
+   * 
+   * @param files -- array of file names to check
+   */
+  public static long getCurrentSegmentGeneration(String[] files) {
+    if (files == null) {
+      return -1;
+    }
+    long max = -1;
+    for (int i = 0; i < files.length; i++) {
+      String file = files[i];
+      if (file.startsWith(IndexFileNames.SEGMENTS)
+          && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
+        long gen = generationFromSegmentsFileName(file);
+        if (gen > max) {
+          max = gen;
+        }
+      }
+    }
+    return max;
+  }
+
+  /**
+   * Parse the generation off the segments file name and return it.
+   */
+  public static long generationFromSegmentsFileName(String fileName) {
+    if (fileName.equals(IndexFileNames.SEGMENTS)) {
+      return 0;
+    } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
+      return Long.parseLong(
+          fileName.substring(1 + IndexFileNames.SEGMENTS.length()),
+          Character.MAX_RADIX);
+    } else {
+      throw new IllegalArgumentException("fileName \"" + fileName
+          + "\" is not a segments file");
+    }
+  }
+
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java Fri Oct 12 04:35:42 2012
@@ -1,49 +1,49 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.lucene.index.IndexCommitPoint;
-import org.apache.lucene.index.IndexDeletionPolicy;
-
-/**
- * For mixed directory. Use KeepAllDeletionPolicy for the read-only directory
- * (keep all from init) and use KeepOnlyLastCommitDeletionPolicy for the
- * writable directory (initially empty, keep latest after init).
- */
-class MixedDeletionPolicy implements IndexDeletionPolicy {
-
-  private int keepAllFromInit = 0;
-
-  public void onInit(List commits) throws IOException {
-    keepAllFromInit = commits.size();
-  }
-
-  public void onCommit(List commits) throws IOException {
-    int size = commits.size();
-    assert (size > keepAllFromInit);
-    // keep all from init and the latest, delete the rest
-    for (int i = keepAllFromInit; i < size - 1; i++) {
-      ((IndexCommitPoint) commits.get(i)).delete();
-    }
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.index.IndexCommitPoint;
+import org.apache.lucene.index.IndexDeletionPolicy;
+
+/**
+ * For mixed directory. Use KeepAllDeletionPolicy for the read-only directory
+ * (keep all from init) and use KeepOnlyLastCommitDeletionPolicy for the
+ * writable directory (initially empty, keep latest after init).
+ */
+class MixedDeletionPolicy implements IndexDeletionPolicy {
+
+  private int keepAllFromInit = 0;
+
+  public void onInit(List commits) throws IOException {
+    keepAllFromInit = commits.size();
+  }
+
+  public void onCommit(List commits) throws IOException {
+    int size = commits.size();
+    assert (size > keepAllFromInit);
+    // keep all from init and the latest, delete the rest
+    for (int i = keepAllFromInit; i < size - 1; i++) {
+      ((IndexCommitPoint) commits.get(i)).delete();
+    }
+  }
+
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java Fri Oct 12 04:35:42 2012
@@ -1,185 +1,185 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.NoLockFactory;
-
-/**
- * The initial version of an index is stored in a read-only FileSystem dir
- * (FileSystemDirectory). Index files created by newer versions are written to
- * a writable local FS dir (Lucene's FSDirectory). We should use the general
- * FileSystemDirectory for the writable dir as well. But have to use Lucene's
- * FSDirectory because currently Lucene does randome write and
- * FileSystemDirectory only supports sequential write.
- * 
- * Note: We may delete files from the read-only FileSystem dir because there
- * can be some segment files from an uncommitted checkpoint. For the same
- * reason, we may create files in the writable dir which already exist in the
- * read-only dir and logically they overwrite the ones in the read-only dir.
- */
-class MixedDirectory extends Directory {
-
-  private final Directory readDir; // FileSystemDirectory
-  private final Directory writeDir; // Lucene's FSDirectory
-
-  // take advantage of the fact that Lucene's FSDirectory.fileExists is faster
-
-  public MixedDirectory(FileSystem readFs, Path readPath, FileSystem writeFs,
-      Path writePath, Configuration conf) throws IOException {
-
-    try {
-      readDir = new FileSystemDirectory(readFs, readPath, false, conf);
-      // check writeFS is a local FS?
-      writeDir = FSDirectory.getDirectory(writePath.toString());
-
-    } catch (IOException e) {
-      try {
-        close();
-      } catch (IOException e1) {
-        // ignore this one, throw the original one
-      }
-      throw e;
-    }
-
-    lockFactory = new NoLockFactory();
-  }
-
-  // for debugging
-  MixedDirectory(Directory readDir, Directory writeDir) throws IOException {
-    this.readDir = readDir;
-    this.writeDir = writeDir;
-
-    lockFactory = new NoLockFactory();
-  }
-
-  @Override
-  public String[] list() throws IOException {
-    String[] readFiles = readDir.list();
-    String[] writeFiles = writeDir.list();
-
-    if (readFiles == null || readFiles.length == 0) {
-      return writeFiles;
-    } else if (writeFiles == null || writeFiles.length == 0) {
-      return readFiles;
-    } else {
-      String[] result = new String[readFiles.length + writeFiles.length];
-      System.arraycopy(readFiles, 0, result, 0, readFiles.length);
-      System.arraycopy(writeFiles, 0, result, readFiles.length,
-          writeFiles.length);
-      return result;
-    }
-  }
-
-  @Override
-  public void deleteFile(String name) throws IOException {
-    if (writeDir.fileExists(name)) {
-      writeDir.deleteFile(name);
-    }
-    if (readDir.fileExists(name)) {
-      readDir.deleteFile(name);
-    }
-  }
-
-  @Override
-  public boolean fileExists(String name) throws IOException {
-    return writeDir.fileExists(name) || readDir.fileExists(name);
-  }
-
-  @Override
-  public long fileLength(String name) throws IOException {
-    if (writeDir.fileExists(name)) {
-      return writeDir.fileLength(name);
-    } else {
-      return readDir.fileLength(name);
-    }
-  }
-
-  @Override
-  public long fileModified(String name) throws IOException {
-    if (writeDir.fileExists(name)) {
-      return writeDir.fileModified(name);
-    } else {
-      return readDir.fileModified(name);
-    }
-  }
-
-  @Override
-  public void renameFile(String from, String to) throws IOException {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public void touchFile(String name) throws IOException {
-    if (writeDir.fileExists(name)) {
-      writeDir.touchFile(name);
-    } else {
-      readDir.touchFile(name);
-    }
-  }
-
-  @Override
-  public IndexOutput createOutput(String name) throws IOException {
-    return writeDir.createOutput(name);
-  }
-
-  @Override
-  public IndexInput openInput(String name) throws IOException {
-    if (writeDir.fileExists(name)) {
-      return writeDir.openInput(name);
-    } else {
-      return readDir.openInput(name);
-    }
-  }
-
-  @Override
-  public IndexInput openInput(String name, int bufferSize) throws IOException {
-    if (writeDir.fileExists(name)) {
-      return writeDir.openInput(name, bufferSize);
-    } else {
-      return readDir.openInput(name, bufferSize);
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    try {
-      if (readDir != null) {
-        readDir.close();
-      }
-    } finally {
-      if (writeDir != null) {
-        writeDir.close();
-      }
-    }
-  }
-
-  public String toString() {
-    return this.getClass().getName() + "@" + readDir + "&" + writeDir;
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.NoLockFactory;
+
+/**
+ * The initial version of an index is stored in a read-only FileSystem dir
+ * (FileSystemDirectory). Index files created by newer versions are written to
+ * a writable local FS dir (Lucene's FSDirectory). We should use the general
+ * FileSystemDirectory for the writable dir as well. But have to use Lucene's
+ * FSDirectory because currently Lucene does randome write and
+ * FileSystemDirectory only supports sequential write.
+ * 
+ * Note: We may delete files from the read-only FileSystem dir because there
+ * can be some segment files from an uncommitted checkpoint. For the same
+ * reason, we may create files in the writable dir which already exist in the
+ * read-only dir and logically they overwrite the ones in the read-only dir.
+ */
+class MixedDirectory extends Directory {
+
+  private final Directory readDir; // FileSystemDirectory
+  private final Directory writeDir; // Lucene's FSDirectory
+
+  // take advantage of the fact that Lucene's FSDirectory.fileExists is faster
+
+  public MixedDirectory(FileSystem readFs, Path readPath, FileSystem writeFs,
+      Path writePath, Configuration conf) throws IOException {
+
+    try {
+      readDir = new FileSystemDirectory(readFs, readPath, false, conf);
+      // check writeFS is a local FS?
+      writeDir = FSDirectory.getDirectory(writePath.toString());
+
+    } catch (IOException e) {
+      try {
+        close();
+      } catch (IOException e1) {
+        // ignore this one, throw the original one
+      }
+      throw e;
+    }
+
+    lockFactory = new NoLockFactory();
+  }
+
+  // for debugging
+  MixedDirectory(Directory readDir, Directory writeDir) throws IOException {
+    this.readDir = readDir;
+    this.writeDir = writeDir;
+
+    lockFactory = new NoLockFactory();
+  }
+
+  @Override
+  public String[] list() throws IOException {
+    String[] readFiles = readDir.list();
+    String[] writeFiles = writeDir.list();
+
+    if (readFiles == null || readFiles.length == 0) {
+      return writeFiles;
+    } else if (writeFiles == null || writeFiles.length == 0) {
+      return readFiles;
+    } else {
+      String[] result = new String[readFiles.length + writeFiles.length];
+      System.arraycopy(readFiles, 0, result, 0, readFiles.length);
+      System.arraycopy(writeFiles, 0, result, readFiles.length,
+          writeFiles.length);
+      return result;
+    }
+  }
+
+  @Override
+  public void deleteFile(String name) throws IOException {
+    if (writeDir.fileExists(name)) {
+      writeDir.deleteFile(name);
+    }
+    if (readDir.fileExists(name)) {
+      readDir.deleteFile(name);
+    }
+  }
+
+  @Override
+  public boolean fileExists(String name) throws IOException {
+    return writeDir.fileExists(name) || readDir.fileExists(name);
+  }
+
+  @Override
+  public long fileLength(String name) throws IOException {
+    if (writeDir.fileExists(name)) {
+      return writeDir.fileLength(name);
+    } else {
+      return readDir.fileLength(name);
+    }
+  }
+
+  @Override
+  public long fileModified(String name) throws IOException {
+    if (writeDir.fileExists(name)) {
+      return writeDir.fileModified(name);
+    } else {
+      return readDir.fileModified(name);
+    }
+  }
+
+  @Override
+  public void renameFile(String from, String to) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void touchFile(String name) throws IOException {
+    if (writeDir.fileExists(name)) {
+      writeDir.touchFile(name);
+    } else {
+      readDir.touchFile(name);
+    }
+  }
+
+  @Override
+  public IndexOutput createOutput(String name) throws IOException {
+    return writeDir.createOutput(name);
+  }
+
+  @Override
+  public IndexInput openInput(String name) throws IOException {
+    if (writeDir.fileExists(name)) {
+      return writeDir.openInput(name);
+    } else {
+      return readDir.openInput(name);
+    }
+  }
+
+  @Override
+  public IndexInput openInput(String name, int bufferSize) throws IOException {
+    if (writeDir.fileExists(name)) {
+      return writeDir.openInput(name, bufferSize);
+    } else {
+      return readDir.openInput(name, bufferSize);
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      if (readDir != null) {
+        readDir.close();
+      }
+    } finally {
+      if (writeDir != null) {
+        writeDir.close();
+      }
+    }
+  }
+
+  public String toString() {
+    return this.getClass().getName() + "@" + readDir + "&" + writeDir;
+  }
+
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java?rev=1397432&r1=1397431&r2=1397432&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java Fri Oct 12 04:35:42 2012
@@ -1,119 +1,119 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.Text;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMDirectory;
-
-/**
- * A utility class which writes an index in a ram dir into a DataOutput and
- * read from a DataInput an index into a ram dir.
- */
-public class RAMDirectoryUtil {
-  private static final int BUFFER_SIZE = 1024; // RAMOutputStream.BUFFER_SIZE;
-
-  /**
-   * Write a number of files from a ram directory to a data output.
-   * @param out  the data output
-   * @param dir  the ram directory
-   * @param names  the names of the files to write
-   * @throws IOException
-   */
-  public static void writeRAMFiles(DataOutput out, RAMDirectory dir,
-      String[] names) throws IOException {
-    out.writeInt(names.length);
-
-    for (int i = 0; i < names.length; i++) {
-      Text.writeString(out, names[i]);
-      long length = dir.fileLength(names[i]);
-      out.writeLong(length);
-
-      if (length > 0) {
-        // can we avoid the extra copy?
-        IndexInput input = null;
-        try {
-          input = dir.openInput(names[i], BUFFER_SIZE);
-
-          int position = 0;
-          byte[] buffer = new byte[BUFFER_SIZE];
-
-          while (position < length) {
-            int len =
-                position + BUFFER_SIZE <= length ? BUFFER_SIZE
-                    : (int) (length - position);
-            input.readBytes(buffer, 0, len);
-            out.write(buffer, 0, len);
-            position += len;
-          }
-        } finally {
-          if (input != null) {
-            input.close();
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Read a number of files from a data input to a ram directory.
-   * @param in  the data input
-   * @param dir  the ram directory
-   * @throws IOException
-   */
-  public static void readRAMFiles(DataInput in, RAMDirectory dir)
-      throws IOException {
-    int numFiles = in.readInt();
-
-    for (int i = 0; i < numFiles; i++) {
-      String name = Text.readString(in);
-      long length = in.readLong();
-
-      if (length > 0) {
-        // can we avoid the extra copy?
-        IndexOutput output = null;
-        try {
-          output = dir.createOutput(name);
-
-          int position = 0;
-          byte[] buffer = new byte[BUFFER_SIZE];
-
-          while (position < length) {
-            int len =
-                position + BUFFER_SIZE <= length ? BUFFER_SIZE
-                    : (int) (length - position);
-            in.readFully(buffer, 0, len);
-            output.writeBytes(buffer, 0, len);
-            position += len;
-          }
-        } finally {
-          if (output != null) {
-            output.close();
-          }
-        }
-      }
-    }
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * A utility class which writes an index in a ram dir into a DataOutput and
+ * read from a DataInput an index into a ram dir.
+ */
+public class RAMDirectoryUtil {
+  private static final int BUFFER_SIZE = 1024; // RAMOutputStream.BUFFER_SIZE;
+
+  /**
+   * Write a number of files from a ram directory to a data output.
+   * @param out  the data output
+   * @param dir  the ram directory
+   * @param names  the names of the files to write
+   * @throws IOException
+   */
+  public static void writeRAMFiles(DataOutput out, RAMDirectory dir,
+      String[] names) throws IOException {
+    out.writeInt(names.length);
+
+    for (int i = 0; i < names.length; i++) {
+      Text.writeString(out, names[i]);
+      long length = dir.fileLength(names[i]);
+      out.writeLong(length);
+
+      if (length > 0) {
+        // can we avoid the extra copy?
+        IndexInput input = null;
+        try {
+          input = dir.openInput(names[i], BUFFER_SIZE);
+
+          int position = 0;
+          byte[] buffer = new byte[BUFFER_SIZE];
+
+          while (position < length) {
+            int len =
+                position + BUFFER_SIZE <= length ? BUFFER_SIZE
+                    : (int) (length - position);
+            input.readBytes(buffer, 0, len);
+            out.write(buffer, 0, len);
+            position += len;
+          }
+        } finally {
+          if (input != null) {
+            input.close();
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Read a number of files from a data input to a ram directory.
+   * @param in  the data input
+   * @param dir  the ram directory
+   * @throws IOException
+   */
+  public static void readRAMFiles(DataInput in, RAMDirectory dir)
+      throws IOException {
+    int numFiles = in.readInt();
+
+    for (int i = 0; i < numFiles; i++) {
+      String name = Text.readString(in);
+      long length = in.readLong();
+
+      if (length > 0) {
+        // can we avoid the extra copy?
+        IndexOutput output = null;
+        try {
+          output = dir.createOutput(name);
+
+          int position = 0;
+          byte[] buffer = new byte[BUFFER_SIZE];
+
+          while (position < length) {
+            int len =
+                position + BUFFER_SIZE <= length ? BUFFER_SIZE
+                    : (int) (length - position);
+            in.readFully(buffer, 0, len);
+            output.writeBytes(buffer, 0, len);
+            position += len;
+          }
+        } finally {
+          if (output != null) {
+            output.close();
+          }
+        }
+      }
+    }
+  }
+
+}



Mime
View raw message