accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From e..@apache.org
Subject [15/37] ACCUMULO-600 removed wikisearch from trunk
Date Tue, 26 Nov 2013 15:18:10 GMT
http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java
deleted file mode 100644
index c469748..0000000
--- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// Generated by the protocol buffer compiler.  DO NOT EDIT!
-// source: Uid.proto
-
-package org.apache.accumulo.examples.wikisearch.protobuf;
-
-public final class Uid {
-  private Uid() {}
-  
-  public static void registerAllExtensions(com.google.protobuf.ExtensionRegistry registry) {}
-  
-  public static final class List extends com.google.protobuf.GeneratedMessage {
-    // Use List.newBuilder() to construct.
-    private List() {
-      initFields();
-    }
-    
-    private List(boolean noInit) {}
-    
-    private static final List defaultInstance;
-    
-    public static List getDefaultInstance() {
-      return defaultInstance;
-    }
-    
-    public List getDefaultInstanceForType() {
-      return defaultInstance;
-    }
-    
-    public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() {
-      return org.apache.accumulo.examples.wikisearch.protobuf.Uid.internal_static_protobuf_List_descriptor;
-    }
-    
-    protected com.google.protobuf.GeneratedMessage.FieldAccessorTable internalGetFieldAccessorTable() {
-      return org.apache.accumulo.examples.wikisearch.protobuf.Uid.internal_static_protobuf_List_fieldAccessorTable;
-    }
-    
-    // required bool IGNORE = 1;
-    public static final int IGNORE_FIELD_NUMBER = 1;
-    private boolean hasIGNORE;
-    private boolean iGNORE_ = false;
-    
-    public boolean hasIGNORE() {
-      return hasIGNORE;
-    }
-    
-    public boolean getIGNORE() {
-      return iGNORE_;
-    }
-    
-    // required uint64 COUNT = 2;
-    public static final int COUNT_FIELD_NUMBER = 2;
-    private boolean hasCOUNT;
-    private long cOUNT_ = 0L;
-    
-    public boolean hasCOUNT() {
-      return hasCOUNT;
-    }
-    
-    public long getCOUNT() {
-      return cOUNT_;
-    }
-    
-    // repeated string UID = 3;
-    public static final int UID_FIELD_NUMBER = 3;
-    private java.util.List<java.lang.String> uID_ = java.util.Collections.emptyList();
-    
-    public java.util.List<java.lang.String> getUIDList() {
-      return uID_;
-    }
-    
-    public int getUIDCount() {
-      return uID_.size();
-    }
-    
-    public java.lang.String getUID(int index) {
-      return uID_.get(index);
-    }
-    
-    private void initFields() {}
-    
-    public final boolean isInitialized() {
-      if (!hasIGNORE)
-        return false;
-      if (!hasCOUNT)
-        return false;
-      return true;
-    }
-    
-    public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException {
-      getSerializedSize();
-      if (hasIGNORE()) {
-        output.writeBool(1, getIGNORE());
-      }
-      if (hasCOUNT()) {
-        output.writeUInt64(2, getCOUNT());
-      }
-      for (java.lang.String element : getUIDList()) {
-        output.writeString(3, element);
-      }
-      getUnknownFields().writeTo(output);
-    }
-    
-    private int memoizedSerializedSize = -1;
-    
-    public int getSerializedSize() {
-      int size = memoizedSerializedSize;
-      if (size != -1)
-        return size;
-      
-      size = 0;
-      if (hasIGNORE()) {
-        size += com.google.protobuf.CodedOutputStream.computeBoolSize(1, getIGNORE());
-      }
-      if (hasCOUNT()) {
-        size += com.google.protobuf.CodedOutputStream.computeUInt64Size(2, getCOUNT());
-      }
-      {
-        int dataSize = 0;
-        for (java.lang.String element : getUIDList()) {
-          dataSize += com.google.protobuf.CodedOutputStream.computeStringSizeNoTag(element);
-        }
-        size += dataSize;
-        size += 1 * getUIDList().size();
-      }
-      size += getUnknownFields().getSerializedSize();
-      memoizedSerializedSize = size;
-      return size;
-    }
-    
-    public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException {
-      return newBuilder().mergeFrom(data).buildParsed();
-    }
-    
-    public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return newBuilder().mergeFrom(data, extensionRegistry).buildParsed();
-    }
-    
-    public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException {
-      return newBuilder().mergeFrom(data).buildParsed();
-    }
-    
-    public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws com.google.protobuf.InvalidProtocolBufferException {
-      return newBuilder().mergeFrom(data, extensionRegistry).buildParsed();
-    }
-    
-    public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(java.io.InputStream input) throws java.io.IOException {
-      return newBuilder().mergeFrom(input).buildParsed();
-    }
-    
-    public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return newBuilder().mergeFrom(input, extensionRegistry).buildParsed();
-    }
-    
-    public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException {
-      Builder builder = newBuilder();
-      if (builder.mergeDelimitedFrom(input)) {
-        return builder.buildParsed();
-      } else {
-        return null;
-      }
-    }
-    
-    public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseDelimitedFrom(java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      Builder builder = newBuilder();
-      if (builder.mergeDelimitedFrom(input, extensionRegistry)) {
-        return builder.buildParsed();
-      } else {
-        return null;
-      }
-    }
-    
-    public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.CodedInputStream input) throws java.io.IOException {
-      return newBuilder().mergeFrom(input).buildParsed();
-    }
-    
-    public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-        throws java.io.IOException {
-      return newBuilder().mergeFrom(input, extensionRegistry).buildParsed();
-    }
-    
-    public static Builder newBuilder() {
-      return Builder.create();
-    }
-    
-    public Builder newBuilderForType() {
-      return newBuilder();
-    }
-    
-    public static Builder newBuilder(org.apache.accumulo.examples.wikisearch.protobuf.Uid.List prototype) {
-      return newBuilder().mergeFrom(prototype);
-    }
-    
-    public Builder toBuilder() {
-      return newBuilder(this);
-    }
-    
-    public static final class Builder extends com.google.protobuf.GeneratedMessage.Builder<Builder> {
-      private org.apache.accumulo.examples.wikisearch.protobuf.Uid.List result;
-      
-      // Construct using protobuf.Uid.List.newBuilder()
-      private Builder() {}
-      
-      private static Builder create() {
-        Builder builder = new Builder();
-        builder.result = new org.apache.accumulo.examples.wikisearch.protobuf.Uid.List();
-        return builder;
-      }
-      
-      protected org.apache.accumulo.examples.wikisearch.protobuf.Uid.List internalGetResult() {
-        return result;
-      }
-      
-      public Builder clear() {
-        if (result == null) {
-          throw new IllegalStateException("Cannot call clear() after build().");
-        }
-        result = new org.apache.accumulo.examples.wikisearch.protobuf.Uid.List();
-        return this;
-      }
-      
-      public Builder clone() {
-        return create().mergeFrom(result);
-      }
-      
-      public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() {
-        return org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.getDescriptor();
-      }
-      
-      public org.apache.accumulo.examples.wikisearch.protobuf.Uid.List getDefaultInstanceForType() {
-        return org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.getDefaultInstance();
-      }
-      
-      public boolean isInitialized() {
-        return result.isInitialized();
-      }
-      
-      public org.apache.accumulo.examples.wikisearch.protobuf.Uid.List build() {
-        if (result != null && !isInitialized()) {
-          throw newUninitializedMessageException(result);
-        }
-        return buildPartial();
-      }
-      
-      private org.apache.accumulo.examples.wikisearch.protobuf.Uid.List buildParsed() throws com.google.protobuf.InvalidProtocolBufferException {
-        if (!isInitialized()) {
-          throw newUninitializedMessageException(result).asInvalidProtocolBufferException();
-        }
-        return buildPartial();
-      }
-      
-      public org.apache.accumulo.examples.wikisearch.protobuf.Uid.List buildPartial() {
-        if (result == null) {
-          throw new IllegalStateException("build() has already been called on this Builder.");
-        }
-        if (result.uID_ != java.util.Collections.EMPTY_LIST) {
-          result.uID_ = java.util.Collections.unmodifiableList(result.uID_);
-        }
-        org.apache.accumulo.examples.wikisearch.protobuf.Uid.List returnMe = result;
-        result = null;
-        return returnMe;
-      }
-      
-      public Builder mergeFrom(com.google.protobuf.Message other) {
-        if (other instanceof org.apache.accumulo.examples.wikisearch.protobuf.Uid.List) {
-          return mergeFrom((org.apache.accumulo.examples.wikisearch.protobuf.Uid.List) other);
-        } else {
-          super.mergeFrom(other);
-          return this;
-        }
-      }
-      
-      public Builder mergeFrom(org.apache.accumulo.examples.wikisearch.protobuf.Uid.List other) {
-        if (other == org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.getDefaultInstance())
-          return this;
-        if (other.hasIGNORE()) {
-          setIGNORE(other.getIGNORE());
-        }
-        if (other.hasCOUNT()) {
-          setCOUNT(other.getCOUNT());
-        }
-        if (!other.uID_.isEmpty()) {
-          if (result.uID_.isEmpty()) {
-            result.uID_ = new java.util.ArrayList<java.lang.String>();
-          }
-          result.uID_.addAll(other.uID_);
-        }
-        this.mergeUnknownFields(other.getUnknownFields());
-        return this;
-      }
-      
-      public Builder mergeFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry)
-          throws java.io.IOException {
-        com.google.protobuf.UnknownFieldSet.Builder unknownFields = com.google.protobuf.UnknownFieldSet.newBuilder(this.getUnknownFields());
-        while (true) {
-          int tag = input.readTag();
-          switch (tag) {
-            case 0:
-              this.setUnknownFields(unknownFields.build());
-              return this;
-            default: {
-              if (!parseUnknownField(input, unknownFields, extensionRegistry, tag)) {
-                this.setUnknownFields(unknownFields.build());
-                return this;
-              }
-              break;
-            }
-            case 8: {
-              setIGNORE(input.readBool());
-              break;
-            }
-            case 16: {
-              setCOUNT(input.readUInt64());
-              break;
-            }
-            case 26: {
-              addUID(input.readString());
-              break;
-            }
-          }
-        }
-      }
-      
-      // required bool IGNORE = 1;
-      public boolean hasIGNORE() {
-        return result.hasIGNORE();
-      }
-      
-      public boolean getIGNORE() {
-        return result.getIGNORE();
-      }
-      
-      public Builder setIGNORE(boolean value) {
-        result.hasIGNORE = true;
-        result.iGNORE_ = value;
-        return this;
-      }
-      
-      public Builder clearIGNORE() {
-        result.hasIGNORE = false;
-        result.iGNORE_ = false;
-        return this;
-      }
-      
-      // required uint64 COUNT = 2;
-      public boolean hasCOUNT() {
-        return result.hasCOUNT();
-      }
-      
-      public long getCOUNT() {
-        return result.getCOUNT();
-      }
-      
-      public Builder setCOUNT(long value) {
-        result.hasCOUNT = true;
-        result.cOUNT_ = value;
-        return this;
-      }
-      
-      public Builder clearCOUNT() {
-        result.hasCOUNT = false;
-        result.cOUNT_ = 0L;
-        return this;
-      }
-      
-      // repeated string UID = 3;
-      public java.util.List<java.lang.String> getUIDList() {
-        return java.util.Collections.unmodifiableList(result.uID_);
-      }
-      
-      public int getUIDCount() {
-        return result.getUIDCount();
-      }
-      
-      public java.lang.String getUID(int index) {
-        return result.getUID(index);
-      }
-      
-      public Builder setUID(int index, java.lang.String value) {
-        if (value == null) {
-          throw new NullPointerException();
-        }
-        result.uID_.set(index, value);
-        return this;
-      }
-      
-      public Builder addUID(java.lang.String value) {
-        if (value == null) {
-          throw new NullPointerException();
-        }
-        if (result.uID_.isEmpty()) {
-          result.uID_ = new java.util.ArrayList<java.lang.String>();
-        }
-        result.uID_.add(value);
-        return this;
-      }
-      
-      public Builder addAllUID(java.lang.Iterable<? extends java.lang.String> values) {
-        if (result.uID_.isEmpty()) {
-          result.uID_ = new java.util.ArrayList<java.lang.String>();
-        }
-        super.addAll(values, result.uID_);
-        return this;
-      }
-      
-      public Builder clearUID() {
-        result.uID_ = java.util.Collections.emptyList();
-        return this;
-      }
-      
-      // @@protoc_insertion_point(builder_scope:protobuf.List)
-    }
-    
-    static {
-      defaultInstance = new List(true);
-      org.apache.accumulo.examples.wikisearch.protobuf.Uid.internalForceInit();
-      defaultInstance.initFields();
-    }
-    
-    // @@protoc_insertion_point(class_scope:protobuf.List)
-  }
-  
-  private static com.google.protobuf.Descriptors.Descriptor internal_static_protobuf_List_descriptor;
-  private static com.google.protobuf.GeneratedMessage.FieldAccessorTable internal_static_protobuf_List_fieldAccessorTable;
-  
-  public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() {
-    return descriptor;
-  }
-  
-  private static com.google.protobuf.Descriptors.FileDescriptor descriptor;
-  static {
-    java.lang.String[] descriptorData = {"\n\tUid.proto\022\010protobuf\"2\n\004List\022\016\n\006IGNORE\030"
-        + "\001 \002(\010\022\r\n\005COUNT\030\002 \002(\004\022\013\n\003UID\030\003 \003(\tB\014\n\010pro" + "tobufH\001"};
-    com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
-      public com.google.protobuf.ExtensionRegistry assignDescriptors(com.google.protobuf.Descriptors.FileDescriptor root) {
-        descriptor = root;
-        internal_static_protobuf_List_descriptor = getDescriptor().getMessageTypes().get(0);
-        internal_static_protobuf_List_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable(
-            internal_static_protobuf_List_descriptor, new java.lang.String[] {"IGNORE", "COUNT", "UID",}, org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.class,
-            org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder.class);
-        return null;
-      }
-    };
-    com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[] {},
-        assigner);
-  }
-  
-  public static void internalForceInit() {}
-  
-  // @@protoc_insertion_point(outer_class_scope)
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java
deleted file mode 100644
index 09755c0..0000000
--- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.reader;
-
-
-import java.io.IOException;
-
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration;
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
-import org.apache.accumulo.examples.wikisearch.util.TextUtil;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-
-/**
- * This class aggregates Text values based on a start and end filter. An example use case for this would be XML data. This will not work with data that has
- * nested start and stop tokens.
- * 
- */
-public class AggregatingRecordReader extends LongLineRecordReader {
-  
-  public static final String START_TOKEN = "aggregating.token.start";
-  public static final String END_TOKEN = "aggregating.token.end";
-  public static final String RETURN_PARTIAL_MATCHES = "aggregating.allow.partial";
-  
-  private LongWritable key = new LongWritable();
-  private String startToken = null;
-  private String endToken = null;
-  private long counter = 0;
-  private Text aggValue = new Text();
-  private boolean startFound = false;
-  private StringBuilder remainder = new StringBuilder(0);
-  private boolean returnPartialMatches = false;
-  
-  @Override
-  public LongWritable getCurrentKey() {
-    key.set(counter);
-    return key;
-  }
-  
-  @Override
-  public Text getCurrentValue() {
-    return aggValue;
-  }
-  
-  @Override
-  public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
-    super.initialize(((WikipediaInputSplit)genericSplit).getFileSplit(), context);
-    this.startToken = WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class);
-    this.endToken = WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class);
-    this.returnPartialMatches = context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false);
-    
-    /*
-     * Text-appending works almost exactly like the + operator on Strings- it creates a byte array exactly the size of [prefix + suffix] and dumps the bytes
-     * into the new array. This module works by doing lots of little additions, one line at a time. With most XML, the documents are partitioned on line
-     * boundaries, so we will generally have lots of additions. Setting a large default byte array for a text object can avoid this and give us
-     * StringBuilder-like functionality for Text objects.
-     */
-    byte[] txtBuffer = new byte[2048];
-    aggValue.set(txtBuffer);
-  }
-  
-  @Override
-  public boolean nextKeyValue() throws IOException {
-    aggValue.clear();
-    boolean hasNext = false;
-    boolean finished = false;
-    // Find the start token
-    while (!finished && (((hasNext = super.nextKeyValue()) == true) || remainder.length() > 0)) {
-      if (hasNext)
-        finished = process(super.getCurrentValue());
-      else
-        finished = process(null);
-      if (finished) {
-        startFound = false;
-        counter++;
-        return true;
-      }
-    }
-    // If we have anything loaded in the agg value (and we found a start)
-    // then we ran out of data before finding the end. Just return the
-    // data we have and if it's not valid, downstream parsing of the data
-    // will fail.
-    if (returnPartialMatches && startFound && aggValue.getLength() > 0) {
-      startFound = false;
-      counter++;
-      return true;
-    }
-    return false;
-  }
-  
-  /**
-   * Populates aggValue with the contents of the Text object.
-   * 
-   * @param t
-   * @return true if aggValue is complete, else false and needs more data.
-   */
-  private boolean process(Text t) {
-    
-    if (null != t)
-      remainder.append(t.toString());
-    while (remainder.length() > 0) {
-      if (!startFound) {
-        // If found, then begin aggregating at the start offset
-        int start = remainder.indexOf(startToken);
-        if (-1 != start) {
-          // Append the start token to the aggregate value
-          TextUtil.textAppendNoNull(aggValue, remainder.substring(start, start + startToken.length()), false);
-          // Remove to the end of the start token from the remainder
-          remainder.delete(0, start + startToken.length());
-          startFound = true;
-        } else {
-          // If we are looking for the start and have not found it, then remove
-          // the bytes
-          remainder.delete(0, remainder.length());
-        }
-      } else {
-        // Try to find the end
-        int end = remainder.indexOf(endToken);
-        // Also try to find the start
-        int start = remainder.indexOf(startToken);
-        if (-1 == end) {
-          if (returnPartialMatches && start >= 0) {
-            // End token not found, but another start token was found...
-            // The amount to copy is up to the beginning of the next start token
-            TextUtil.textAppendNoNull(aggValue, remainder.substring(0, start), false);
-            remainder.delete(0, start);
-            return true;
-          } else {
-            // Not found, aggregate the entire remainder
-            TextUtil.textAppendNoNull(aggValue, remainder.toString(), false);
-            // Delete all chars from remainder
-            remainder.delete(0, remainder.length());
-          }
-        } else {
-          if (returnPartialMatches && start >= 0 && start < end) {
-            // We found the end token, but found another start token first, so
-            // deal with that.
-            TextUtil.textAppendNoNull(aggValue, remainder.substring(0, start), false);
-            remainder.delete(0, start);
-            return true;
-          } else {
-            // END_TOKEN was found. Extract to the end of END_TOKEN
-            TextUtil.textAppendNoNull(aggValue, remainder.substring(0, end + endToken.length()), false);
-            // Remove from remainder up to the end of END_TOKEN
-            remainder.delete(0, end + endToken.length());
-            return true;
-          }
-        }
-      }
-    }
-    return false;
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java
deleted file mode 100644
index a4da0ad..0000000
--- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.reader;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-
-/**
- * A class that provides a line reader from an input stream.
- */
-public class LfLineReader {
-  private static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
-  private int bufferSize = DEFAULT_BUFFER_SIZE;
-  private InputStream in;
-  private byte[] buffer;
-  // the number of bytes of real data in the buffer
-  private int bufferLength = 0;
-  // the current position in the buffer
-  private int bufferPosn = 0;
-  
-  private static final byte LF = '\n';
-  
-  /**
-   * Create a line reader that reads from the given stream using the default buffer-size (64k).
-   * 
-   * @param in
-   *          The input stream
-   * @throws IOException
-   */
-  public LfLineReader(InputStream in) {
-    this(in, DEFAULT_BUFFER_SIZE);
-  }
-  
-  /**
-   * Create a line reader that reads from the given stream using the given buffer-size.
-   * 
-   * @param in
-   *          The input stream
-   * @param bufferSize
-   *          Size of the read buffer
-   * @throws IOException
-   */
-  public LfLineReader(InputStream in, int bufferSize) {
-    this.in = in;
-    this.bufferSize = bufferSize;
-    this.buffer = new byte[this.bufferSize];
-  }
-  
-  /**
-   * Create a line reader that reads from the given stream using the <code>io.file.buffer.size</code> specified in the given <code>Configuration</code>.
-   * 
-   * @param in
-   *          input stream
-   * @param conf
-   *          configuration
-   * @throws IOException
-   */
-  public LfLineReader(InputStream in, Configuration conf) throws IOException {
-    this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE));
-  }
-  
-  /**
-   * Close the underlying stream.
-   * 
-   * @throws IOException
-   */
-  public void close() throws IOException {
-    in.close();
-  }
-  
-  /**
-   * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF). EOF also terminates an otherwise unterminated line.
-   * 
-   * @param str
-   *          the object to store the given line (without newline)
-   * @param maxLineLength
-   *          the maximum number of bytes to store into str; the rest of the line is silently discarded.
-   * @param maxBytesToConsume
-   *          the maximum number of bytes to consume in this call. This is only a hint, because if the line cross this threshold, we allow it to happen. It can
-   *          overshoot potentially by as much as one buffer length.
-   * 
-   * @return the number of bytes read including the (longest) newline found.
-   * 
-   * @throws IOException
-   *           if the underlying stream throws
-   */
-  public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
-    /*
-     * We're reading data from in, but the head of the stream may be already buffered in buffer, so we have several cases: 1. No newline characters are in the
-     * buffer, so we need to copy everything and read another buffer from the stream. 2. An unambiguously terminated line is in buffer, so we just copy to str.
-     */
-    str.clear();
-    int txtLength = 0; // tracks str.getLength(), as an optimization
-    int newlineLength = 0; // length of terminating newline
-    long bytesConsumed = 0;
-    do {
-      int startPosn = bufferPosn; // starting from where we left off the last time
-      if (bufferPosn >= bufferLength) {
-        startPosn = bufferPosn = 0;
-        bufferLength = in.read(buffer);
-        if (bufferLength <= 0)
-          break; // EOF
-      }
-      for (; bufferPosn < bufferLength; ++bufferPosn) { // search for newline
-        if (buffer[bufferPosn] == LF) {
-          newlineLength = 1;
-          ++bufferPosn; // at next invocation proceed from following byte
-          break;
-        }
-      }
-      int readLength = bufferPosn - startPosn;
-      bytesConsumed += readLength;
-      int appendLength = readLength - newlineLength;
-      if (appendLength > maxLineLength - txtLength) {
-        appendLength = maxLineLength - txtLength;
-      }
-      if (appendLength > 0) {
-        str.append(buffer, startPosn, appendLength);
-        txtLength += appendLength;
-      }
-    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);
-    
-    if (bytesConsumed > Integer.MAX_VALUE)
-      throw new IOException("Too many bytes before newline: " + bytesConsumed);
-    return (int) bytesConsumed;
-  }
-  
-  /**
-   * Read from the InputStream into the given Text.
-   * 
-   * @param str
-   *          the object to store the given line
-   * @param maxLineLength
-   *          the maximum number of bytes to store into str.
-   * @return the number of bytes read including the newline
-   * @throws IOException
-   *           if the underlying stream throws
-   */
-  public int readLine(Text str, int maxLineLength) throws IOException {
-    return readLine(str, maxLineLength, Integer.MAX_VALUE);
-  }
-  
-  /**
-   * Read from the InputStream into the given Text.
-   * 
-   * @param str
-   *          the object to store the given line
-   * @return the number of bytes read including the newline
-   * @throws IOException
-   *           if the underlying stream throws
-   */
-  public int readLine(Text str) throws IOException {
-    return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE);
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java
deleted file mode 100644
index f36c373..0000000
--- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.reader;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.CompressionCodecFactory;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
-import org.apache.hadoop.util.LineReader;
-
-/**
- * A copy of {@link LineRecordReader} which does not discard lines longer than "mapred.linerecordreader.maxlength". Instead, it returns them, leaving it to the
- * mapper to decide what to do with it. It also does not treat '\r' (CR) characters as new lines -- it uses {@link LfLineReader} instead of {@link LineReader}
- * to read lines.
- */
-public class LongLineRecordReader extends RecordReader<LongWritable,Text> {
-  private CompressionCodecFactory compressionCodecs = null;
-  private long start;
-  private long pos;
-  private long end;
-  private LfLineReader in;
-  private int maxLineLength;
-  private LongWritable key = null;
-  private Text value = null;
-  
-  @Override
-  public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
-    FileSplit split = (FileSplit) genericSplit;
-    Configuration job = context.getConfiguration();
-    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
-    start = split.getStart();
-    end = start + split.getLength();
-    final Path file = split.getPath();
-    compressionCodecs = new CompressionCodecFactory(job);
-    final CompressionCodec codec = compressionCodecs.getCodec(file);
-    
-    // open the file and seek to the start of the split
-    FileSystem fs = file.getFileSystem(job);
-    FSDataInputStream fileIn = fs.open(split.getPath());
-    boolean skipFirstLine = false;
-    if (codec != null) {
-      in = new LfLineReader(codec.createInputStream(fileIn), job);
-      end = Long.MAX_VALUE;
-    } else {
-      if (start != 0) {
-        skipFirstLine = true;
-        --start;
-        fileIn.seek(start);
-      }
-      in = new LfLineReader(fileIn, job);
-    }
-    if (skipFirstLine) { // skip first line and re-establish "start".
-      start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
-    }
-    this.pos = start;
-  }
-  
-  @Override
-  public boolean nextKeyValue() throws IOException {
-    if (key == null) {
-      key = new LongWritable();
-    }
-    key.set(pos);
-    if (value == null) {
-      value = new Text();
-    }
-    int newSize = 0;
-    if (pos < end) {
-      newSize = in.readLine(value, maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
-      if (newSize != 0) {
-        pos += newSize;
-      }
-    }
-    if (newSize == 0) {
-      key = null;
-      value = null;
-      return false;
-    } else {
-      return true;
-    }
-  }
-  
-  @Override
-  public LongWritable getCurrentKey() {
-    return key;
-  }
-  
-  @Override
-  public Text getCurrentValue() {
-    return value;
-  }
-  
-  /**
-   * Get the progress within the split
-   */
-  @Override
-  public float getProgress() {
-    if (start == end) {
-      return 0.0f;
-    } else {
-      return Math.min(1.0f, (pos - start) / (float) (end - start));
-    }
-  }
-  
-  @Override
-  public synchronized void close() throws IOException {
-    if (in != null) {
-      in.close();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
deleted file mode 100644
index 1623d55..0000000
--- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.util;
-
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-
-import org.apache.accumulo.core.iterators.user.SummingCombiner;
-import org.apache.hadoop.io.Text;
-
-public class TextUtil {
-  
-  /**
-   * Appends a null byte followed by the UTF-8 bytes of the given string to the given {@link Text}
-   * 
-   * @param text
-   *          the Text to which to append
-   * @param string
-   *          the String to append
-   */
-  public static void textAppend(Text text, String string) {
-    appendNullByte(text);
-    textAppendNoNull(text, string);
-  }
-  
-  public static void textAppend(Text text, String string, boolean replaceBadChar) {
-    appendNullByte(text);
-    textAppendNoNull(text, string, replaceBadChar);
-  }
-  
-  public static void textAppend(Text t, long s) {
-    t.append(nullByte, 0, 1);
-    t.append(SummingCombiner.FIXED_LEN_ENCODER.encode(s), 0, 8);
-  }
-  
-  private static final byte[] nullByte = {0};
-  
-  /**
-   * Appends a null byte to the given text
-   * 
-   * @param text
-   *          the text to which to append the null byte
-   */
-  public static void appendNullByte(Text text) {
-    text.append(nullByte, 0, nullByte.length);
-  }
-  
-  /**
-   * Appends the UTF-8 bytes of the given string to the given {@link Text}
-   * 
-   * @param t
-   *          the Text to which to append
-   * @param s
-   *          the String to append
-   */
-  public static void textAppendNoNull(Text t, String s) {
-    textAppendNoNull(t, s, false);
-  }
-  
-  /**
-   * Appends the UTF-8 bytes of the given string to the given {@link Text}
-   * 
-   * @param t
-   * @param s
-   * @param replaceBadChar
-   */
-  public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) {
-    try {
-      ByteBuffer buffer = Text.encode(s, replaceBadChar);
-      t.append(buffer.array(), 0, buffer.limit());
-    } catch (CharacterCodingException cce) {
-      throw new IllegalArgumentException(cce);
-    }
-  }
-  
-  /**
-   * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to UTF-8 and is much faster than calling
-   * {@link String#getBytes(String)}.
-   * 
-   * @param string
-   *          the string to convert
-   * @return the UTF-8 representation of the string
-   */
-  public static byte[] toUtf8(String string) {
-    ByteBuffer buffer;
-    try {
-      buffer = Text.encode(string, false);
-    } catch (CharacterCodingException cce) {
-      throw new IllegalArgumentException(cce);
-    }
-    byte[] bytes = new byte[buffer.limit()];
-    System.arraycopy(buffer.array(), 0, bytes, 0, bytes.length);
-    return bytes;
-  }
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto b/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto
deleted file mode 100644
index 41ae188..0000000
--- a/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto
+++ /dev/null
@@ -1,28 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one or more
-// contributor license agreements.  See the NOTICE file distributed with
-// this work for additional information regarding copyright ownership.
-// The ASF licenses this file to You under the Apache License, Version 2.0
-// (the "License"); you may not use this file except in compliance with
-// the License.  You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// compile with protoc --java_out ../java
-// compile extra builder util with java accumulo.data.protobuf.builder.ProtoBufBuilder -d ../java accumulo.data.protobuf.UidList
-//      classpath for compile command should include ../../../target/classes and protobuf-java-2.2.0.jar
-
-package protobuf;
-
-option java_package = "protobuf";
-option optimize_for = SPEED;
-
-message Info {
-	required float normalizedTermFrequency = 1;
-	repeated uint32 wordOffset = 2;
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto b/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto
deleted file mode 100644
index 30aa446..0000000
--- a/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto
+++ /dev/null
@@ -1,29 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one or more
-// contributor license agreements.  See the NOTICE file distributed with
-// this work for additional information regarding copyright ownership.
-// The ASF licenses this file to You under the Apache License, Version 2.0
-// (the "License"); you may not use this file except in compliance with
-// the License.  You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// compile with protoc --java_out ../java
-// compile extra builder util with java accumulo.data.protobuf.builder.ProtoBufBuilder -d ../java accumulo.data.protobuf.UidList
-//      classpath for compile command should include ../../../target/classes and protobuf-java-2.2.0.jar
-
-package protobuf;
-
-option java_package = "protobuf";
-option optimize_for = SPEED;
-
-message List {
-  required bool IGNORE = 1;
-  required uint64 COUNT = 2;
-  repeated string UID = 3;
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh b/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh
deleted file mode 100755
index 6702998..0000000
--- a/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-for PROTO in `ls -1 *proto`; do protoc --java_out ../java $PROTO; done

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java
deleted file mode 100644
index 6af1e9b..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.ingest;
-
-import org.apache.hadoop.mapreduce.Counter;
-import org.apache.hadoop.mapreduce.Counters;
-import org.apache.hadoop.mapreduce.StatusReporter;
-
-public class StandaloneStatusReporter extends StatusReporter {
-  
-  private Counters c = new Counters();
-  
-  private long filesProcessed = 0;
-  private long recordsProcessed = 0;
-  
-  public Counters getCounters() {
-    return c;
-  }
-  
-  @Override
-  public Counter getCounter(Enum<?> name) {
-    return c.findCounter(name);
-  }
-  
-  @Override
-  public Counter getCounter(String group, String name) {
-    return c.findCounter(group, name);
-  }
-  
-  @Override
-  public void progress() {
-    // do nothing
-  }
-  
-  @Override
-  public void setStatus(String status) {
-    // do nothing
-  }
-  
-  public long getFilesProcessed() {
-    return filesProcessed;
-  }
-  
-  public long getRecordsProcessed() {
-    return recordsProcessed;
-  }
-  
-  public void incrementFilesProcessed() {
-    filesProcessed++;
-    recordsProcessed = 0;
-  }
-  
-  public void incrementRecordsProcessed() {
-    recordsProcessed++;
-  }
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
deleted file mode 100644
index f6b2791..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.ingest;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInput;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-
-import junit.framework.Assert;
-
-import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.junit.Test;
-
-public class WikipediaInputSplitTest {
-  @Test
-  public void testSerialization() throws IOException {
-    Path testPath = new Path("/foo/bar");
-    String[] hosts = new String[2];
-    hosts[0] = "abcd";
-    hosts[1] = "efgh";
-    FileSplit fSplit = new FileSplit(testPath, 1, 2, hosts);
-    WikipediaInputSplit split = new WikipediaInputSplit(fSplit, 7);
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    ObjectOutputStream out = new ObjectOutputStream(baos);
-    split.write(out);
-    out.close();
-    baos.close();
-    
-    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
-    DataInput in = new ObjectInputStream(bais);
-    
-    WikipediaInputSplit split2 = new WikipediaInputSplit();
-    split2.readFields(in);
-    Assert.assertTrue(bais.available() == 0);
-    bais.close();
-    
-    Assert.assertTrue(split.getPartition() == split2.getPartition());
-    
-    FileSplit fSplit2 = split2.getFileSplit();
-    Assert.assertTrue(fSplit.getPath().equals(fSplit2.getPath()));
-    Assert.assertTrue(fSplit.getStart() == fSplit2.getStart());
-    Assert.assertTrue(fSplit.getLength() == fSplit2.getLength());
-    
-    String[] hosts2 = fSplit2.getLocations();
-    Assert.assertEquals(hosts.length, hosts2.length);
-    for (int i = 0; i < hosts.length; i++) {
-      Assert.assertEquals(hosts[i], hosts2[i]);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
deleted file mode 100644
index c659ec4..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.ingest;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URL;
-import java.util.HashMap;
-import java.util.Map.Entry;
-
-import junit.framework.Assert;
-
-import org.apache.accumulo.core.client.BatchWriter;
-import org.apache.accumulo.core.client.Connector;
-import org.apache.accumulo.core.client.MutationsRejectedException;
-import org.apache.accumulo.core.client.Scanner;
-import org.apache.accumulo.core.client.mock.MockInstance;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Mutation;
-import org.apache.accumulo.core.data.Range;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.security.Authorizations;
-import org.apache.accumulo.core.util.ContextFactory;
-import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.RawLocalFileSystem;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
-import org.junit.Before;
-
-/**
- * Load some data into mock accumulo
- */
-public class WikipediaMapperTest {
-  
-  private static final String METADATA_TABLE_NAME = "wikiMetadata";
-  
-  private static final String TABLE_NAME = "wiki";
-  
-  private static final String INDEX_TABLE_NAME = "wikiIndex";
-  
-  private static final String RINDEX_TABLE_NAME = "wikiReverseIndex";
-  
-  private class MockAccumuloRecordWriter extends RecordWriter<Text,Mutation> {
-    @Override
-    public void write(Text key, Mutation value) throws IOException, InterruptedException {
-      try {
-        writerMap.get(key).addMutation(value);
-      } catch (MutationsRejectedException e) {
-        throw new IOException("Error adding mutation", e);
-      }
-    }
-    
-    @Override
-    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
-      try {
-        for (BatchWriter w : writerMap.values()) {
-          w.flush();
-          w.close();
-        }
-      } catch (MutationsRejectedException e) {
-        throw new IOException("Error closing Batch Writer", e);
-      }
-    }
-    
-  }
-  
-  private Connector c = null;
-  private Configuration conf = new Configuration();
-  private HashMap<Text,BatchWriter> writerMap = new HashMap<Text,BatchWriter>();
-  
-  @Before
-  public void setup() throws Exception {
-    
-    conf.set(AggregatingRecordReader.START_TOKEN, "<page>");
-    conf.set(AggregatingRecordReader.END_TOKEN, "</page>");
-    conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME);
-    conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1");
-    conf.set(WikipediaConfiguration.NUM_GROUPS, "1");
-    
-    MockInstance i = new MockInstance();
-    c = i.getConnector("root", "pass");
-    c.tableOperations().delete(METADATA_TABLE_NAME);
-    c.tableOperations().delete(TABLE_NAME);
-    c.tableOperations().delete(INDEX_TABLE_NAME);
-    c.tableOperations().delete(RINDEX_TABLE_NAME);
-    c.tableOperations().create(METADATA_TABLE_NAME);
-    c.tableOperations().create(TABLE_NAME);
-    c.tableOperations().create(INDEX_TABLE_NAME);
-    c.tableOperations().create(RINDEX_TABLE_NAME);
-    
-    writerMap.put(new Text(METADATA_TABLE_NAME), c.createBatchWriter(METADATA_TABLE_NAME, 1000L, 1000L, 1));
-    writerMap.put(new Text(TABLE_NAME), c.createBatchWriter(TABLE_NAME, 1000L, 1000L, 1));
-    writerMap.put(new Text(INDEX_TABLE_NAME), c.createBatchWriter(INDEX_TABLE_NAME, 1000L, 1000L, 1));
-    writerMap.put(new Text(RINDEX_TABLE_NAME), c.createBatchWriter(RINDEX_TABLE_NAME, 1000L, 1000L, 1));
-    
-    TaskAttemptContext context = ContextFactory.createTaskAttemptContext(conf);
-    
-    RawLocalFileSystem fs = new RawLocalFileSystem();
-    fs.setConf(conf);
-    
-    URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml");
-    Assert.assertNotNull(url);
-    File data = new File(url.toURI());
-    Path tmpFile = new Path(data.getAbsolutePath());
-    
-    // Setup the Mapper
-    InputSplit split = new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null);
-    AggregatingRecordReader rr = new AggregatingRecordReader();
-    Path ocPath = new Path(tmpFile, "oc");
-    OutputCommitter oc = new FileOutputCommitter(ocPath, context);
-    fs.deleteOnExit(ocPath);
-    StandaloneStatusReporter sr = new StandaloneStatusReporter();
-    rr.initialize(split, context);
-    MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter();
-    WikipediaMapper mapper = new WikipediaMapper();
-    
-    // Load data into Mock Accumulo
-    Mapper<LongWritable,Text,Text,Mutation>.Context con = ContextFactory.createMapContext(mapper, context, rr, rw, oc, sr, split);
-    mapper.run(con);
-    
-    // Flush and close record writers.
-    rw.close(context);
-    
-  }
-  
-  private void debugQuery(String tableName) throws Exception {
-    Scanner s = c.createScanner(tableName, new Authorizations("all"));
-    Range r = new Range();
-    s.setRange(r);
-    for (Entry<Key,Value> entry : s)
-      System.out.println(entry.getKey().toString() + " " + entry.getValue().toString());
-  }
-  
-  public void testViewAllData() throws Exception {
-    debugQuery(METADATA_TABLE_NAME);
-    debugQuery(TABLE_NAME);
-    debugQuery(INDEX_TABLE_NAME);
-    debugQuery(RINDEX_TABLE_NAME);
-  }
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
deleted file mode 100644
index 6619ede..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.iterator;
-
-import static org.junit.Assert.assertTrue;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.UUID;
-
-import org.apache.accumulo.core.client.IteratorSetting;
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.core.iterators.Combiner;
-import org.apache.accumulo.examples.wikisearch.protobuf.Uid;
-import org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.junit.Before;
-import org.junit.Test;
-
-public class GlobalIndexUidTest {
-  private GlobalIndexUidCombiner combiner;
-  private List<Value> values;
-  
-  @Before
-  public void setup() throws Exception {
-    combiner = new GlobalIndexUidCombiner();
-    combiner.init(null, Collections.singletonMap("all", "true"), null);
-    values = new ArrayList<Value>();
-  }
-  
-  private Uid.List.Builder createNewUidList() {
-    return Uid.List.newBuilder();
-  }
-  
-  @Test
-  public void testSingleUid() {
-    Builder b = createNewUidList();
-    b.setCOUNT(1);
-    b.setIGNORE(false);
-    b.addUID(UUID.randomUUID().toString());
-    Uid.List uidList = b.build();
-    Value val = new Value(uidList.toByteArray());
-    values.add(val);
-    Value result = combiner.reduce(new Key(), values.iterator());
-    assertTrue(val.compareTo(result.get()) == 0);
-  }
-  
-  @Test
-  public void testLessThanMax() throws Exception {
-    List<String> savedUUIDs = new ArrayList<String>();
-    for (int i = 0; i < GlobalIndexUidCombiner.MAX - 1; i++) {
-      Builder b = createNewUidList();
-      b.setIGNORE(false);
-      String uuid = UUID.randomUUID().toString();
-      savedUUIDs.add(uuid);
-      b.setCOUNT(i);
-      b.addUID(uuid);
-      Uid.List uidList = b.build();
-      Value val = new Value(uidList.toByteArray());
-      values.add(val);
-    }
-    Value result = combiner.reduce(new Key(), values.iterator());
-    Uid.List resultList = Uid.List.parseFrom(result.get());
-    assertTrue(resultList.getIGNORE() == false);
-    assertTrue(resultList.getUIDCount() == (GlobalIndexUidCombiner.MAX - 1));
-    List<String> resultListUUIDs = resultList.getUIDList();
-    for (String s : savedUUIDs)
-      assertTrue(resultListUUIDs.contains(s));
-  }
-  
-  @Test
-  public void testEqualsMax() throws Exception {
-    List<String> savedUUIDs = new ArrayList<String>();
-    for (int i = 0; i < GlobalIndexUidCombiner.MAX; i++) {
-      Builder b = createNewUidList();
-      b.setIGNORE(false);
-      String uuid = UUID.randomUUID().toString();
-      savedUUIDs.add(uuid);
-      b.setCOUNT(i);
-      b.addUID(uuid);
-      Uid.List uidList = b.build();
-      Value val = new Value(uidList.toByteArray());
-      values.add(val);
-    }
-    Value result = combiner.reduce(new Key(), values.iterator());
-    Uid.List resultList = Uid.List.parseFrom(result.get());
-    assertTrue(resultList.getIGNORE() == false);
-    assertTrue(resultList.getUIDCount() == (GlobalIndexUidCombiner.MAX));
-    List<String> resultListUUIDs = resultList.getUIDList();
-    for (String s : savedUUIDs)
-      assertTrue(resultListUUIDs.contains(s));
-  }
-  
-  @Test
-  public void testMoreThanMax() throws Exception {
-    List<String> savedUUIDs = new ArrayList<String>();
-    for (int i = 0; i < GlobalIndexUidCombiner.MAX + 10; i++) {
-      Builder b = createNewUidList();
-      b.setIGNORE(false);
-      String uuid = UUID.randomUUID().toString();
-      savedUUIDs.add(uuid);
-      b.setCOUNT(1);
-      b.addUID(uuid);
-      Uid.List uidList = b.build();
-      Value val = new Value(uidList.toByteArray());
-      values.add(val);
-    }
-    Value result = combiner.reduce(new Key(), values.iterator());
-    Uid.List resultList = Uid.List.parseFrom(result.get());
-    assertTrue(resultList.getIGNORE() == true);
-    assertTrue(resultList.getUIDCount() == 0);
-    assertTrue(resultList.getCOUNT() == (GlobalIndexUidCombiner.MAX + 10));
-  }
-  
-  @Test
-  public void testSeenIgnore() throws Exception {
-    Builder b = createNewUidList();
-    b.setIGNORE(true);
-    b.setCOUNT(0);
-    Uid.List uidList = b.build();
-    Value val = new Value(uidList.toByteArray());
-    values.add(val);
-    b = createNewUidList();
-    b.setIGNORE(false);
-    b.setCOUNT(1);
-    b.addUID(UUID.randomUUID().toString());
-    uidList = b.build();
-    val = new Value(uidList.toByteArray());
-    values.add(val);
-    Value result = combiner.reduce(new Key(), values.iterator());
-    Uid.List resultList = Uid.List.parseFrom(result.get());
-    assertTrue(resultList.getIGNORE() == true);
-    assertTrue(resultList.getUIDCount() == 0);
-    assertTrue(resultList.getCOUNT() == 1);
-  }
-  
-  @Test
-  public void testInvalidValueType() throws Exception {
-    Combiner comb = new GlobalIndexUidCombiner();
-    IteratorSetting setting = new IteratorSetting(1, GlobalIndexUidCombiner.class);
-    GlobalIndexUidCombiner.setCombineAllColumns(setting, true);
-    GlobalIndexUidCombiner.setLossyness(setting, true);
-    comb.init(null, setting.getOptions(), null);
-    Logger.getLogger(GlobalIndexUidCombiner.class).setLevel(Level.OFF);
-    Value val = new Value(UUID.randomUUID().toString().getBytes());
-    values.add(val);
-    Value result = comb.reduce(new Key(), values.iterator());
-    Uid.List resultList = Uid.List.parseFrom(result.get());
-    assertTrue(resultList.getIGNORE() == false);
-    assertTrue(resultList.getUIDCount() == 0);
-    assertTrue(resultList.getCOUNT() == 0);
-  }
-  
-  @Test
-  public void testCount() throws Exception {
-    UUID uuid = UUID.randomUUID();
-    // Collect the same UUID five times.
-    for (int i = 0; i < 5; i++) {
-      Builder b = createNewUidList();
-      b.setCOUNT(1);
-      b.setIGNORE(false);
-      b.addUID(uuid.toString());
-      Uid.List uidList = b.build();
-      Value val = new Value(uidList.toByteArray());
-      values.add(val);
-    }
-    Value result = combiner.reduce(new Key(), values.iterator());
-    Uid.List resultList = Uid.List.parseFrom(result.get());
-    assertTrue(resultList.getIGNORE() == false);
-    assertTrue(resultList.getUIDCount() == 1);
-    assertTrue(resultList.getCOUNT() == 5);
-    
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
deleted file mode 100644
index 7297b5a..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.iterator;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-import junit.framework.Assert;
-
-import org.apache.accumulo.core.data.Key;
-import org.apache.accumulo.core.data.Value;
-import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight;
-import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight.Info.Builder;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import com.google.protobuf.InvalidProtocolBufferException;
-
-public class TextIndexTest {
-  private TextIndexCombiner combiner;
-  private List<Value> values;
-  
-  @Before
-  public void setup() throws Exception {
-    combiner = new TextIndexCombiner();
-    combiner.init(null, Collections.singletonMap("all", "true"), null);
-    values = new ArrayList<Value>();
-  }
-  
-  @After
-  public void cleanup() {
-    
-  }
-  
-  private TermWeight.Info.Builder createBuilder() {
-    return TermWeight.Info.newBuilder();
-  }
-  
-  @Test
-  public void testSingleValue() throws InvalidProtocolBufferException {
-    Builder builder = createBuilder();
-    builder.addWordOffset(1);
-    builder.addWordOffset(5);
-    builder.setNormalizedTermFrequency(0.1f);
-    
-    values.add(new Value(builder.build().toByteArray()));
-    
-    Value result = combiner.reduce(new Key(), values.iterator());
-    
-    TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-    
-    Assert.assertTrue(info.getNormalizedTermFrequency() == 0.1f);
-    
-    List<Integer> offsets = info.getWordOffsetList();
-    Assert.assertTrue(offsets.size() == 2);
-    Assert.assertTrue(offsets.get(0) == 1);
-    Assert.assertTrue(offsets.get(1) == 5);
-  }
-  
-  @Test
-  public void testAggregateTwoValues() throws InvalidProtocolBufferException {
-    Builder builder = createBuilder();
-    builder.addWordOffset(1);
-    builder.addWordOffset(5);
-    builder.setNormalizedTermFrequency(0.1f);
-    
-    values.add(new Value(builder.build().toByteArray()));
-    
-    builder = createBuilder();
-    builder.addWordOffset(3);
-    builder.setNormalizedTermFrequency(0.05f);
-    
-    values.add(new Value(builder.build().toByteArray()));
-    
-    Value result = combiner.reduce(new Key(), values.iterator());
-    
-    TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-    
-    Assert.assertTrue(info.getNormalizedTermFrequency() == 0.15f);
-    
-    List<Integer> offsets = info.getWordOffsetList();
-    Assert.assertTrue(offsets.size() == 3);
-    Assert.assertTrue(offsets.get(0) == 1);
-    Assert.assertTrue(offsets.get(1) == 3);
-    Assert.assertTrue(offsets.get(2) == 5);
-  }
-  
-  @Test
-  public void testAggregateManyValues() throws InvalidProtocolBufferException {
-    Builder builder = createBuilder();
-    builder.addWordOffset(13);
-    builder.addWordOffset(15);
-    builder.addWordOffset(19);
-    builder.setNormalizedTermFrequency(0.12f);
-    
-    values.add(new Value(builder.build().toByteArray()));
-    
-    builder = createBuilder();
-    builder.addWordOffset(1);
-    builder.addWordOffset(5);
-    builder.setNormalizedTermFrequency(0.1f);
-    
-    values.add(new Value(builder.build().toByteArray()));
-    
-    builder = createBuilder();
-    builder.addWordOffset(3);
-    builder.setNormalizedTermFrequency(0.05f);
-    
-    values.add(new Value(builder.build().toByteArray()));
-    
-    Value result = combiner.reduce(new Key(), values.iterator());
-    
-    TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-    
-    Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f);
-    
-    List<Integer> offsets = info.getWordOffsetList();
-    Assert.assertTrue(offsets.size() == 6);
-    Assert.assertTrue(offsets.get(0) == 1);
-    Assert.assertTrue(offsets.get(1) == 3);
-    Assert.assertTrue(offsets.get(2) == 5);
-    Assert.assertTrue(offsets.get(3) == 13);
-    Assert.assertTrue(offsets.get(4) == 15);
-    Assert.assertTrue(offsets.get(5) == 19);
-  }
-  
-  @Test
-  public void testEmptyValue() throws InvalidProtocolBufferException {
-    Builder builder = createBuilder();
-    builder.addWordOffset(13);
-    builder.addWordOffset(15);
-    builder.addWordOffset(19);
-    builder.setNormalizedTermFrequency(0.12f);
-    
-    values.add(new Value("".getBytes()));
-    values.add(new Value(builder.build().toByteArray()));
-    values.add(new Value("".getBytes()));
-    
-    builder = createBuilder();
-    builder.addWordOffset(1);
-    builder.addWordOffset(5);
-    builder.setNormalizedTermFrequency(0.1f);
-    
-    values.add(new Value(builder.build().toByteArray()));
-    values.add(new Value("".getBytes()));
-    
-    builder = createBuilder();
-    builder.addWordOffset(3);
-    builder.setNormalizedTermFrequency(0.05f);
-    
-    values.add(new Value(builder.build().toByteArray()));
-    values.add(new Value("".getBytes()));
-    
-    Value result = combiner.reduce(new Key(), values.iterator());
-    
-    TermWeight.Info info = TermWeight.Info.parseFrom(result.get());
-    
-    Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f);
-    
-    List<Integer> offsets = info.getWordOffsetList();
-    Assert.assertTrue(offsets.size() == 6);
-    Assert.assertTrue(offsets.get(0) == 1);
-    Assert.assertTrue(offsets.get(1) == 3);
-    Assert.assertTrue(offsets.get(2) == 5);
-    Assert.assertTrue(offsets.get(3) == 13);
-    Assert.assertTrue(offsets.get(4) == 15);
-    Assert.assertTrue(offsets.get(5) == 19);
-  }
-}

http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java
----------------------------------------------------------------------
diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java
deleted file mode 100644
index 470633c..0000000
--- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.accumulo.examples.wikisearch.normalizer;
-
-import static org.junit.Assert.assertTrue;
-
-import org.apache.accumulo.examples.wikisearch.normalizer.NumberNormalizer;
-import org.junit.Test;
-
-public class testNumberNormalizer {
-  
-  @Test
-  public void test1() throws Exception {
-    NumberNormalizer nn = new NumberNormalizer();
-    
-    String n1 = nn.normalizeFieldValue(null, "1");
-    String n2 = nn.normalizeFieldValue(null, "1.00000000");
-    
-    assertTrue(n1.compareTo(n2) < 0);
-    
-  }
-  
-  @Test
-  public void test2() {
-    NumberNormalizer nn = new NumberNormalizer();
-    
-    String n1 = nn.normalizeFieldValue(null, "-1.0");
-    String n2 = nn.normalizeFieldValue(null, "1.0");
-    
-    assertTrue(n1.compareTo(n2) < 0);
-    
-  }
-  
-  @Test
-  public void test3() {
-    NumberNormalizer nn = new NumberNormalizer();
-    String n1 = nn.normalizeFieldValue(null, "-0.0001");
-    String n2 = nn.normalizeFieldValue(null, "0");
-    String n3 = nn.normalizeFieldValue(null, "0.00001");
-    
-    assertTrue((n1.compareTo(n2) < 0) && (n2.compareTo(n3) < 0));
-  }
-  
-  @Test
-  public void test4() {
-    NumberNormalizer nn = new NumberNormalizer();
-    String nn1 = nn.normalizeFieldValue(null, Integer.toString(Integer.MAX_VALUE));
-    String nn2 = nn.normalizeFieldValue(null, Integer.toString(Integer.MAX_VALUE - 1));
-    
-    assertTrue((nn2.compareTo(nn1) < 0));
-    
-  }
-  
-  @Test
-  public void test5() {
-    NumberNormalizer nn = new NumberNormalizer();
-    String nn1 = nn.normalizeFieldValue(null, "-0.001");
-    String nn2 = nn.normalizeFieldValue(null, "-0.0009");
-    String nn3 = nn.normalizeFieldValue(null, "-0.00090");
-    
-    assertTrue((nn3.compareTo(nn2) == 0) && (nn2.compareTo(nn1) > 0));
-    
-  }
-  
-  @Test
-  public void test6() {
-    NumberNormalizer nn = new NumberNormalizer();
-    String nn1 = nn.normalizeFieldValue(null, "00.0");
-    String nn2 = nn.normalizeFieldValue(null, "0");
-    String nn3 = nn.normalizeFieldValue(null, "0.0");
-    
-    assertTrue((nn3.compareTo(nn2) == 0) && (nn2.compareTo(nn1) == 0));
-    
-  }
-  
-}


Mime
View raw message