Return-Path: X-Original-To: apmail-accumulo-commits-archive@www.apache.org Delivered-To: apmail-accumulo-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 7167010858 for ; Tue, 26 Nov 2013 15:19:06 +0000 (UTC) Received: (qmail 63080 invoked by uid 500); 26 Nov 2013 15:18:18 -0000 Delivered-To: apmail-accumulo-commits-archive@accumulo.apache.org Received: (qmail 62812 invoked by uid 500); 26 Nov 2013 15:18:14 -0000 Mailing-List: contact commits-help@accumulo.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@accumulo.apache.org Delivered-To: mailing list commits@accumulo.apache.org Received: (qmail 62308 invoked by uid 99); 26 Nov 2013 15:18:08 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 26 Nov 2013 15:18:08 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 8D9FC91850D; Tue, 26 Nov 2013 15:17:56 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: ecn@apache.org To: commits@accumulo.apache.org Date: Tue, 26 Nov 2013 15:18:10 -0000 Message-Id: <800a218695184413a8040c81301d121c@git.apache.org> In-Reply-To: <35172c83b6424670a9d2ec8231f9f5b0@git.apache.org> References: <35172c83b6424670a9d2ec8231f9f5b0@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [15/37] ACCUMULO-600 removed wikisearch from trunk http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java deleted file mode 100644 index c469748..0000000 --- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/protobuf/Uid.java +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -// Generated by the protocol buffer compiler. DO NOT EDIT! -// source: Uid.proto - -package org.apache.accumulo.examples.wikisearch.protobuf; - -public final class Uid { - private Uid() {} - - public static void registerAllExtensions(com.google.protobuf.ExtensionRegistry registry) {} - - public static final class List extends com.google.protobuf.GeneratedMessage { - // Use List.newBuilder() to construct. - private List() { - initFields(); - } - - private List(boolean noInit) {} - - private static final List defaultInstance; - - public static List getDefaultInstance() { - return defaultInstance; - } - - public List getDefaultInstanceForType() { - return defaultInstance; - } - - public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { - return org.apache.accumulo.examples.wikisearch.protobuf.Uid.internal_static_protobuf_List_descriptor; - } - - protected com.google.protobuf.GeneratedMessage.FieldAccessorTable internalGetFieldAccessorTable() { - return org.apache.accumulo.examples.wikisearch.protobuf.Uid.internal_static_protobuf_List_fieldAccessorTable; - } - - // required bool IGNORE = 1; - public static final int IGNORE_FIELD_NUMBER = 1; - private boolean hasIGNORE; - private boolean iGNORE_ = false; - - public boolean hasIGNORE() { - return hasIGNORE; - } - - public boolean getIGNORE() { - return iGNORE_; - } - - // required uint64 COUNT = 2; - public static final int COUNT_FIELD_NUMBER = 2; - private boolean hasCOUNT; - private long cOUNT_ = 0L; - - public boolean hasCOUNT() { - return hasCOUNT; - } - - public long getCOUNT() { - return cOUNT_; - } - - // repeated string UID = 3; - public static final int UID_FIELD_NUMBER = 3; - private java.util.List uID_ = java.util.Collections.emptyList(); - - public java.util.List getUIDList() { - return uID_; - } - - public int getUIDCount() { - return uID_.size(); - } - - public java.lang.String getUID(int index) { - return uID_.get(index); - } - - private void initFields() {} - - public final boolean isInitialized() { - if (!hasIGNORE) - return false; - if (!hasCOUNT) - return false; - return true; - } - - public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException { - getSerializedSize(); - if (hasIGNORE()) { - output.writeBool(1, getIGNORE()); - } - if (hasCOUNT()) { - output.writeUInt64(2, getCOUNT()); - } - for (java.lang.String element : getUIDList()) { - output.writeString(3, element); - } - getUnknownFields().writeTo(output); - } - - private int memoizedSerializedSize = -1; - - public int getSerializedSize() { - int size = memoizedSerializedSize; - if (size != -1) - return size; - - size = 0; - if (hasIGNORE()) { - size += com.google.protobuf.CodedOutputStream.computeBoolSize(1, getIGNORE()); - } - if (hasCOUNT()) { - size += com.google.protobuf.CodedOutputStream.computeUInt64Size(2, getCOUNT()); - } - { - int dataSize = 0; - for (java.lang.String element : getUIDList()) { - dataSize += com.google.protobuf.CodedOutputStream.computeStringSizeNoTag(element); - } - size += dataSize; - size += 1 * getUIDList().size(); - } - size += getUnknownFields().getSerializedSize(); - memoizedSerializedSize = size; - return size; - } - - public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException { - return newBuilder().mergeFrom(data).buildParsed(); - } - - public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) - throws com.google.protobuf.InvalidProtocolBufferException { - return newBuilder().mergeFrom(data, extensionRegistry).buildParsed(); - } - - public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException { - return newBuilder().mergeFrom(data).buildParsed(); - } - - public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) - throws com.google.protobuf.InvalidProtocolBufferException { - return newBuilder().mergeFrom(data, extensionRegistry).buildParsed(); - } - - public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(java.io.InputStream input) throws java.io.IOException { - return newBuilder().mergeFrom(input).buildParsed(); - } - - public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) - throws java.io.IOException { - return newBuilder().mergeFrom(input, extensionRegistry).buildParsed(); - } - - public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException { - Builder builder = newBuilder(); - if (builder.mergeDelimitedFrom(input)) { - return builder.buildParsed(); - } else { - return null; - } - } - - public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseDelimitedFrom(java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) - throws java.io.IOException { - Builder builder = newBuilder(); - if (builder.mergeDelimitedFrom(input, extensionRegistry)) { - return builder.buildParsed(); - } else { - return null; - } - } - - public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.CodedInputStream input) throws java.io.IOException { - return newBuilder().mergeFrom(input).buildParsed(); - } - - public static org.apache.accumulo.examples.wikisearch.protobuf.Uid.List parseFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) - throws java.io.IOException { - return newBuilder().mergeFrom(input, extensionRegistry).buildParsed(); - } - - public static Builder newBuilder() { - return Builder.create(); - } - - public Builder newBuilderForType() { - return newBuilder(); - } - - public static Builder newBuilder(org.apache.accumulo.examples.wikisearch.protobuf.Uid.List prototype) { - return newBuilder().mergeFrom(prototype); - } - - public Builder toBuilder() { - return newBuilder(this); - } - - public static final class Builder extends com.google.protobuf.GeneratedMessage.Builder { - private org.apache.accumulo.examples.wikisearch.protobuf.Uid.List result; - - // Construct using protobuf.Uid.List.newBuilder() - private Builder() {} - - private static Builder create() { - Builder builder = new Builder(); - builder.result = new org.apache.accumulo.examples.wikisearch.protobuf.Uid.List(); - return builder; - } - - protected org.apache.accumulo.examples.wikisearch.protobuf.Uid.List internalGetResult() { - return result; - } - - public Builder clear() { - if (result == null) { - throw new IllegalStateException("Cannot call clear() after build()."); - } - result = new org.apache.accumulo.examples.wikisearch.protobuf.Uid.List(); - return this; - } - - public Builder clone() { - return create().mergeFrom(result); - } - - public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { - return org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.getDescriptor(); - } - - public org.apache.accumulo.examples.wikisearch.protobuf.Uid.List getDefaultInstanceForType() { - return org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.getDefaultInstance(); - } - - public boolean isInitialized() { - return result.isInitialized(); - } - - public org.apache.accumulo.examples.wikisearch.protobuf.Uid.List build() { - if (result != null && !isInitialized()) { - throw newUninitializedMessageException(result); - } - return buildPartial(); - } - - private org.apache.accumulo.examples.wikisearch.protobuf.Uid.List buildParsed() throws com.google.protobuf.InvalidProtocolBufferException { - if (!isInitialized()) { - throw newUninitializedMessageException(result).asInvalidProtocolBufferException(); - } - return buildPartial(); - } - - public org.apache.accumulo.examples.wikisearch.protobuf.Uid.List buildPartial() { - if (result == null) { - throw new IllegalStateException("build() has already been called on this Builder."); - } - if (result.uID_ != java.util.Collections.EMPTY_LIST) { - result.uID_ = java.util.Collections.unmodifiableList(result.uID_); - } - org.apache.accumulo.examples.wikisearch.protobuf.Uid.List returnMe = result; - result = null; - return returnMe; - } - - public Builder mergeFrom(com.google.protobuf.Message other) { - if (other instanceof org.apache.accumulo.examples.wikisearch.protobuf.Uid.List) { - return mergeFrom((org.apache.accumulo.examples.wikisearch.protobuf.Uid.List) other); - } else { - super.mergeFrom(other); - return this; - } - } - - public Builder mergeFrom(org.apache.accumulo.examples.wikisearch.protobuf.Uid.List other) { - if (other == org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.getDefaultInstance()) - return this; - if (other.hasIGNORE()) { - setIGNORE(other.getIGNORE()); - } - if (other.hasCOUNT()) { - setCOUNT(other.getCOUNT()); - } - if (!other.uID_.isEmpty()) { - if (result.uID_.isEmpty()) { - result.uID_ = new java.util.ArrayList(); - } - result.uID_.addAll(other.uID_); - } - this.mergeUnknownFields(other.getUnknownFields()); - return this; - } - - public Builder mergeFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) - throws java.io.IOException { - com.google.protobuf.UnknownFieldSet.Builder unknownFields = com.google.protobuf.UnknownFieldSet.newBuilder(this.getUnknownFields()); - while (true) { - int tag = input.readTag(); - switch (tag) { - case 0: - this.setUnknownFields(unknownFields.build()); - return this; - default: { - if (!parseUnknownField(input, unknownFields, extensionRegistry, tag)) { - this.setUnknownFields(unknownFields.build()); - return this; - } - break; - } - case 8: { - setIGNORE(input.readBool()); - break; - } - case 16: { - setCOUNT(input.readUInt64()); - break; - } - case 26: { - addUID(input.readString()); - break; - } - } - } - } - - // required bool IGNORE = 1; - public boolean hasIGNORE() { - return result.hasIGNORE(); - } - - public boolean getIGNORE() { - return result.getIGNORE(); - } - - public Builder setIGNORE(boolean value) { - result.hasIGNORE = true; - result.iGNORE_ = value; - return this; - } - - public Builder clearIGNORE() { - result.hasIGNORE = false; - result.iGNORE_ = false; - return this; - } - - // required uint64 COUNT = 2; - public boolean hasCOUNT() { - return result.hasCOUNT(); - } - - public long getCOUNT() { - return result.getCOUNT(); - } - - public Builder setCOUNT(long value) { - result.hasCOUNT = true; - result.cOUNT_ = value; - return this; - } - - public Builder clearCOUNT() { - result.hasCOUNT = false; - result.cOUNT_ = 0L; - return this; - } - - // repeated string UID = 3; - public java.util.List getUIDList() { - return java.util.Collections.unmodifiableList(result.uID_); - } - - public int getUIDCount() { - return result.getUIDCount(); - } - - public java.lang.String getUID(int index) { - return result.getUID(index); - } - - public Builder setUID(int index, java.lang.String value) { - if (value == null) { - throw new NullPointerException(); - } - result.uID_.set(index, value); - return this; - } - - public Builder addUID(java.lang.String value) { - if (value == null) { - throw new NullPointerException(); - } - if (result.uID_.isEmpty()) { - result.uID_ = new java.util.ArrayList(); - } - result.uID_.add(value); - return this; - } - - public Builder addAllUID(java.lang.Iterable values) { - if (result.uID_.isEmpty()) { - result.uID_ = new java.util.ArrayList(); - } - super.addAll(values, result.uID_); - return this; - } - - public Builder clearUID() { - result.uID_ = java.util.Collections.emptyList(); - return this; - } - - // @@protoc_insertion_point(builder_scope:protobuf.List) - } - - static { - defaultInstance = new List(true); - org.apache.accumulo.examples.wikisearch.protobuf.Uid.internalForceInit(); - defaultInstance.initFields(); - } - - // @@protoc_insertion_point(class_scope:protobuf.List) - } - - private static com.google.protobuf.Descriptors.Descriptor internal_static_protobuf_List_descriptor; - private static com.google.protobuf.GeneratedMessage.FieldAccessorTable internal_static_protobuf_List_fieldAccessorTable; - - public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { - return descriptor; - } - - private static com.google.protobuf.Descriptors.FileDescriptor descriptor; - static { - java.lang.String[] descriptorData = {"\n\tUid.proto\022\010protobuf\"2\n\004List\022\016\n\006IGNORE\030" - + "\001 \002(\010\022\r\n\005COUNT\030\002 \002(\004\022\013\n\003UID\030\003 \003(\tB\014\n\010pro" + "tobufH\001"}; - com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { - public com.google.protobuf.ExtensionRegistry assignDescriptors(com.google.protobuf.Descriptors.FileDescriptor root) { - descriptor = root; - internal_static_protobuf_List_descriptor = getDescriptor().getMessageTypes().get(0); - internal_static_protobuf_List_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( - internal_static_protobuf_List_descriptor, new java.lang.String[] {"IGNORE", "COUNT", "UID",}, org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.class, - org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder.class); - return null; - } - }; - com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[] {}, - assigner); - } - - public static void internalForceInit() {} - - // @@protoc_insertion_point(outer_class_scope) -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java deleted file mode 100644 index 09755c0..0000000 --- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.reader; - - -import java.io.IOException; - -import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration; -import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit; -import org.apache.accumulo.examples.wikisearch.util.TextUtil; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.TaskAttemptContext; - - -/** - * This class aggregates Text values based on a start and end filter. An example use case for this would be XML data. This will not work with data that has - * nested start and stop tokens. - * - */ -public class AggregatingRecordReader extends LongLineRecordReader { - - public static final String START_TOKEN = "aggregating.token.start"; - public static final String END_TOKEN = "aggregating.token.end"; - public static final String RETURN_PARTIAL_MATCHES = "aggregating.allow.partial"; - - private LongWritable key = new LongWritable(); - private String startToken = null; - private String endToken = null; - private long counter = 0; - private Text aggValue = new Text(); - private boolean startFound = false; - private StringBuilder remainder = new StringBuilder(0); - private boolean returnPartialMatches = false; - - @Override - public LongWritable getCurrentKey() { - key.set(counter); - return key; - } - - @Override - public Text getCurrentValue() { - return aggValue; - } - - @Override - public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { - super.initialize(((WikipediaInputSplit)genericSplit).getFileSplit(), context); - this.startToken = WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class); - this.endToken = WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class); - this.returnPartialMatches = context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false); - - /* - * Text-appending works almost exactly like the + operator on Strings- it creates a byte array exactly the size of [prefix + suffix] and dumps the bytes - * into the new array. This module works by doing lots of little additions, one line at a time. With most XML, the documents are partitioned on line - * boundaries, so we will generally have lots of additions. Setting a large default byte array for a text object can avoid this and give us - * StringBuilder-like functionality for Text objects. - */ - byte[] txtBuffer = new byte[2048]; - aggValue.set(txtBuffer); - } - - @Override - public boolean nextKeyValue() throws IOException { - aggValue.clear(); - boolean hasNext = false; - boolean finished = false; - // Find the start token - while (!finished && (((hasNext = super.nextKeyValue()) == true) || remainder.length() > 0)) { - if (hasNext) - finished = process(super.getCurrentValue()); - else - finished = process(null); - if (finished) { - startFound = false; - counter++; - return true; - } - } - // If we have anything loaded in the agg value (and we found a start) - // then we ran out of data before finding the end. Just return the - // data we have and if it's not valid, downstream parsing of the data - // will fail. - if (returnPartialMatches && startFound && aggValue.getLength() > 0) { - startFound = false; - counter++; - return true; - } - return false; - } - - /** - * Populates aggValue with the contents of the Text object. - * - * @param t - * @return true if aggValue is complete, else false and needs more data. - */ - private boolean process(Text t) { - - if (null != t) - remainder.append(t.toString()); - while (remainder.length() > 0) { - if (!startFound) { - // If found, then begin aggregating at the start offset - int start = remainder.indexOf(startToken); - if (-1 != start) { - // Append the start token to the aggregate value - TextUtil.textAppendNoNull(aggValue, remainder.substring(start, start + startToken.length()), false); - // Remove to the end of the start token from the remainder - remainder.delete(0, start + startToken.length()); - startFound = true; - } else { - // If we are looking for the start and have not found it, then remove - // the bytes - remainder.delete(0, remainder.length()); - } - } else { - // Try to find the end - int end = remainder.indexOf(endToken); - // Also try to find the start - int start = remainder.indexOf(startToken); - if (-1 == end) { - if (returnPartialMatches && start >= 0) { - // End token not found, but another start token was found... - // The amount to copy is up to the beginning of the next start token - TextUtil.textAppendNoNull(aggValue, remainder.substring(0, start), false); - remainder.delete(0, start); - return true; - } else { - // Not found, aggregate the entire remainder - TextUtil.textAppendNoNull(aggValue, remainder.toString(), false); - // Delete all chars from remainder - remainder.delete(0, remainder.length()); - } - } else { - if (returnPartialMatches && start >= 0 && start < end) { - // We found the end token, but found another start token first, so - // deal with that. - TextUtil.textAppendNoNull(aggValue, remainder.substring(0, start), false); - remainder.delete(0, start); - return true; - } else { - // END_TOKEN was found. Extract to the end of END_TOKEN - TextUtil.textAppendNoNull(aggValue, remainder.substring(0, end + endToken.length()), false); - // Remove from remainder up to the end of END_TOKEN - remainder.delete(0, end + endToken.length()); - return true; - } - } - } - } - return false; - } - -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java deleted file mode 100644 index a4da0ad..0000000 --- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.reader; - -import java.io.IOException; -import java.io.InputStream; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.Text; - -/** - * A class that provides a line reader from an input stream. - */ -public class LfLineReader { - private static final int DEFAULT_BUFFER_SIZE = 64 * 1024; - private int bufferSize = DEFAULT_BUFFER_SIZE; - private InputStream in; - private byte[] buffer; - // the number of bytes of real data in the buffer - private int bufferLength = 0; - // the current position in the buffer - private int bufferPosn = 0; - - private static final byte LF = '\n'; - - /** - * Create a line reader that reads from the given stream using the default buffer-size (64k). - * - * @param in - * The input stream - * @throws IOException - */ - public LfLineReader(InputStream in) { - this(in, DEFAULT_BUFFER_SIZE); - } - - /** - * Create a line reader that reads from the given stream using the given buffer-size. - * - * @param in - * The input stream - * @param bufferSize - * Size of the read buffer - * @throws IOException - */ - public LfLineReader(InputStream in, int bufferSize) { - this.in = in; - this.bufferSize = bufferSize; - this.buffer = new byte[this.bufferSize]; - } - - /** - * Create a line reader that reads from the given stream using the io.file.buffer.size specified in the given Configuration. - * - * @param in - * input stream - * @param conf - * configuration - * @throws IOException - */ - public LfLineReader(InputStream in, Configuration conf) throws IOException { - this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE)); - } - - /** - * Close the underlying stream. - * - * @throws IOException - */ - public void close() throws IOException { - in.close(); - } - - /** - * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF). EOF also terminates an otherwise unterminated line. - * - * @param str - * the object to store the given line (without newline) - * @param maxLineLength - * the maximum number of bytes to store into str; the rest of the line is silently discarded. - * @param maxBytesToConsume - * the maximum number of bytes to consume in this call. This is only a hint, because if the line cross this threshold, we allow it to happen. It can - * overshoot potentially by as much as one buffer length. - * - * @return the number of bytes read including the (longest) newline found. - * - * @throws IOException - * if the underlying stream throws - */ - public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { - /* - * We're reading data from in, but the head of the stream may be already buffered in buffer, so we have several cases: 1. No newline characters are in the - * buffer, so we need to copy everything and read another buffer from the stream. 2. An unambiguously terminated line is in buffer, so we just copy to str. - */ - str.clear(); - int txtLength = 0; // tracks str.getLength(), as an optimization - int newlineLength = 0; // length of terminating newline - long bytesConsumed = 0; - do { - int startPosn = bufferPosn; // starting from where we left off the last time - if (bufferPosn >= bufferLength) { - startPosn = bufferPosn = 0; - bufferLength = in.read(buffer); - if (bufferLength <= 0) - break; // EOF - } - for (; bufferPosn < bufferLength; ++bufferPosn) { // search for newline - if (buffer[bufferPosn] == LF) { - newlineLength = 1; - ++bufferPosn; // at next invocation proceed from following byte - break; - } - } - int readLength = bufferPosn - startPosn; - bytesConsumed += readLength; - int appendLength = readLength - newlineLength; - if (appendLength > maxLineLength - txtLength) { - appendLength = maxLineLength - txtLength; - } - if (appendLength > 0) { - str.append(buffer, startPosn, appendLength); - txtLength += appendLength; - } - } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); - - if (bytesConsumed > Integer.MAX_VALUE) - throw new IOException("Too many bytes before newline: " + bytesConsumed); - return (int) bytesConsumed; - } - - /** - * Read from the InputStream into the given Text. - * - * @param str - * the object to store the given line - * @param maxLineLength - * the maximum number of bytes to store into str. - * @return the number of bytes read including the newline - * @throws IOException - * if the underlying stream throws - */ - public int readLine(Text str, int maxLineLength) throws IOException { - return readLine(str, maxLineLength, Integer.MAX_VALUE); - } - - /** - * Read from the InputStream into the given Text. - * - * @param str - * the object to store the given line - * @return the number of bytes read including the newline - * @throws IOException - * if the underlying stream throws - */ - public int readLine(Text str) throws IOException { - return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE); - } - -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java deleted file mode 100644 index f36c373..0000000 --- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LongLineRecordReader.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.reader; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.CompressionCodecFactory; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.input.FileSplit; -import org.apache.hadoop.mapreduce.lib.input.LineRecordReader; -import org.apache.hadoop.util.LineReader; - -/** - * A copy of {@link LineRecordReader} which does not discard lines longer than "mapred.linerecordreader.maxlength". Instead, it returns them, leaving it to the - * mapper to decide what to do with it. It also does not treat '\r' (CR) characters as new lines -- it uses {@link LfLineReader} instead of {@link LineReader} - * to read lines. - */ -public class LongLineRecordReader extends RecordReader { - private CompressionCodecFactory compressionCodecs = null; - private long start; - private long pos; - private long end; - private LfLineReader in; - private int maxLineLength; - private LongWritable key = null; - private Text value = null; - - @Override - public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { - FileSplit split = (FileSplit) genericSplit; - Configuration job = context.getConfiguration(); - this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); - start = split.getStart(); - end = start + split.getLength(); - final Path file = split.getPath(); - compressionCodecs = new CompressionCodecFactory(job); - final CompressionCodec codec = compressionCodecs.getCodec(file); - - // open the file and seek to the start of the split - FileSystem fs = file.getFileSystem(job); - FSDataInputStream fileIn = fs.open(split.getPath()); - boolean skipFirstLine = false; - if (codec != null) { - in = new LfLineReader(codec.createInputStream(fileIn), job); - end = Long.MAX_VALUE; - } else { - if (start != 0) { - skipFirstLine = true; - --start; - fileIn.seek(start); - } - in = new LfLineReader(fileIn, job); - } - if (skipFirstLine) { // skip first line and re-establish "start". - start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start)); - } - this.pos = start; - } - - @Override - public boolean nextKeyValue() throws IOException { - if (key == null) { - key = new LongWritable(); - } - key.set(pos); - if (value == null) { - value = new Text(); - } - int newSize = 0; - if (pos < end) { - newSize = in.readLine(value, maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength)); - if (newSize != 0) { - pos += newSize; - } - } - if (newSize == 0) { - key = null; - value = null; - return false; - } else { - return true; - } - } - - @Override - public LongWritable getCurrentKey() { - return key; - } - - @Override - public Text getCurrentValue() { - return value; - } - - /** - * Get the progress within the split - */ - @Override - public float getProgress() { - if (start == end) { - return 0.0f; - } else { - return Math.min(1.0f, (pos - start) / (float) (end - start)); - } - } - - @Override - public synchronized void close() throws IOException { - if (in != null) { - in.close(); - } - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java b/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java deleted file mode 100644 index 1623d55..0000000 --- a/src/examples/wikisearch/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.util; - -import java.nio.ByteBuffer; -import java.nio.charset.CharacterCodingException; - -import org.apache.accumulo.core.iterators.user.SummingCombiner; -import org.apache.hadoop.io.Text; - -public class TextUtil { - - /** - * Appends a null byte followed by the UTF-8 bytes of the given string to the given {@link Text} - * - * @param text - * the Text to which to append - * @param string - * the String to append - */ - public static void textAppend(Text text, String string) { - appendNullByte(text); - textAppendNoNull(text, string); - } - - public static void textAppend(Text text, String string, boolean replaceBadChar) { - appendNullByte(text); - textAppendNoNull(text, string, replaceBadChar); - } - - public static void textAppend(Text t, long s) { - t.append(nullByte, 0, 1); - t.append(SummingCombiner.FIXED_LEN_ENCODER.encode(s), 0, 8); - } - - private static final byte[] nullByte = {0}; - - /** - * Appends a null byte to the given text - * - * @param text - * the text to which to append the null byte - */ - public static void appendNullByte(Text text) { - text.append(nullByte, 0, nullByte.length); - } - - /** - * Appends the UTF-8 bytes of the given string to the given {@link Text} - * - * @param t - * the Text to which to append - * @param s - * the String to append - */ - public static void textAppendNoNull(Text t, String s) { - textAppendNoNull(t, s, false); - } - - /** - * Appends the UTF-8 bytes of the given string to the given {@link Text} - * - * @param t - * @param s - * @param replaceBadChar - */ - public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) { - try { - ByteBuffer buffer = Text.encode(s, replaceBadChar); - t.append(buffer.array(), 0, buffer.limit()); - } catch (CharacterCodingException cce) { - throw new IllegalArgumentException(cce); - } - } - - /** - * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to UTF-8 and is much faster than calling - * {@link String#getBytes(String)}. - * - * @param string - * the string to convert - * @return the UTF-8 representation of the string - */ - public static byte[] toUtf8(String string) { - ByteBuffer buffer; - try { - buffer = Text.encode(string, false); - } catch (CharacterCodingException cce) { - throw new IllegalArgumentException(cce); - } - byte[] bytes = new byte[buffer.limit()]; - System.arraycopy(buffer.array(), 0, bytes, 0, bytes.length); - return bytes; - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto b/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto deleted file mode 100644 index 41ae188..0000000 --- a/src/examples/wikisearch/ingest/src/main/protobuf/TermWeight.proto +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// compile with protoc --java_out ../java -// compile extra builder util with java accumulo.data.protobuf.builder.ProtoBufBuilder -d ../java accumulo.data.protobuf.UidList -// classpath for compile command should include ../../../target/classes and protobuf-java-2.2.0.jar - -package protobuf; - -option java_package = "protobuf"; -option optimize_for = SPEED; - -message Info { - required float normalizedTermFrequency = 1; - repeated uint32 wordOffset = 2; -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto b/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto deleted file mode 100644 index 30aa446..0000000 --- a/src/examples/wikisearch/ingest/src/main/protobuf/Uid.proto +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// compile with protoc --java_out ../java -// compile extra builder util with java accumulo.data.protobuf.builder.ProtoBufBuilder -d ../java accumulo.data.protobuf.UidList -// classpath for compile command should include ../../../target/classes and protobuf-java-2.2.0.jar - -package protobuf; - -option java_package = "protobuf"; -option optimize_for = SPEED; - -message List { - required bool IGNORE = 1; - required uint64 COUNT = 2; - repeated string UID = 3; -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh b/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh deleted file mode 100755 index 6702998..0000000 --- a/src/examples/wikisearch/ingest/src/main/protobuf/compile_protos.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -for PROTO in `ls -1 *proto`; do protoc --java_out ../java $PROTO; done http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java deleted file mode 100644 index 6af1e9b..0000000 --- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.ingest; - -import org.apache.hadoop.mapreduce.Counter; -import org.apache.hadoop.mapreduce.Counters; -import org.apache.hadoop.mapreduce.StatusReporter; - -public class StandaloneStatusReporter extends StatusReporter { - - private Counters c = new Counters(); - - private long filesProcessed = 0; - private long recordsProcessed = 0; - - public Counters getCounters() { - return c; - } - - @Override - public Counter getCounter(Enum name) { - return c.findCounter(name); - } - - @Override - public Counter getCounter(String group, String name) { - return c.findCounter(group, name); - } - - @Override - public void progress() { - // do nothing - } - - @Override - public void setStatus(String status) { - // do nothing - } - - public long getFilesProcessed() { - return filesProcessed; - } - - public long getRecordsProcessed() { - return recordsProcessed; - } - - public void incrementFilesProcessed() { - filesProcessed++; - recordsProcessed = 0; - } - - public void incrementRecordsProcessed() { - recordsProcessed++; - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java deleted file mode 100644 index f6b2791..0000000 --- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.ingest; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInput; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; - -import junit.framework.Assert; - -import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.lib.input.FileSplit; -import org.junit.Test; - -public class WikipediaInputSplitTest { - @Test - public void testSerialization() throws IOException { - Path testPath = new Path("/foo/bar"); - String[] hosts = new String[2]; - hosts[0] = "abcd"; - hosts[1] = "efgh"; - FileSplit fSplit = new FileSplit(testPath, 1, 2, hosts); - WikipediaInputSplit split = new WikipediaInputSplit(fSplit, 7); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - ObjectOutputStream out = new ObjectOutputStream(baos); - split.write(out); - out.close(); - baos.close(); - - ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); - DataInput in = new ObjectInputStream(bais); - - WikipediaInputSplit split2 = new WikipediaInputSplit(); - split2.readFields(in); - Assert.assertTrue(bais.available() == 0); - bais.close(); - - Assert.assertTrue(split.getPartition() == split2.getPartition()); - - FileSplit fSplit2 = split2.getFileSplit(); - Assert.assertTrue(fSplit.getPath().equals(fSplit2.getPath())); - Assert.assertTrue(fSplit.getStart() == fSplit2.getStart()); - Assert.assertTrue(fSplit.getLength() == fSplit2.getLength()); - - String[] hosts2 = fSplit2.getLocations(); - Assert.assertEquals(hosts.length, hosts2.length); - for (int i = 0; i < hosts.length; i++) { - Assert.assertEquals(hosts[i], hosts2[i]); - } - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java deleted file mode 100644 index c659ec4..0000000 --- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.ingest; - -import java.io.File; -import java.io.IOException; -import java.net.URL; -import java.util.HashMap; -import java.util.Map.Entry; - -import junit.framework.Assert; - -import org.apache.accumulo.core.client.BatchWriter; -import org.apache.accumulo.core.client.Connector; -import org.apache.accumulo.core.client.MutationsRejectedException; -import org.apache.accumulo.core.client.Scanner; -import org.apache.accumulo.core.client.mock.MockInstance; -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.Mutation; -import org.apache.accumulo.core.data.Range; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.security.Authorizations; -import org.apache.accumulo.core.util.ContextFactory; -import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RawLocalFileSystem; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.input.FileSplit; -import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; -import org.junit.Before; - -/** - * Load some data into mock accumulo - */ -public class WikipediaMapperTest { - - private static final String METADATA_TABLE_NAME = "wikiMetadata"; - - private static final String TABLE_NAME = "wiki"; - - private static final String INDEX_TABLE_NAME = "wikiIndex"; - - private static final String RINDEX_TABLE_NAME = "wikiReverseIndex"; - - private class MockAccumuloRecordWriter extends RecordWriter { - @Override - public void write(Text key, Mutation value) throws IOException, InterruptedException { - try { - writerMap.get(key).addMutation(value); - } catch (MutationsRejectedException e) { - throw new IOException("Error adding mutation", e); - } - } - - @Override - public void close(TaskAttemptContext context) throws IOException, InterruptedException { - try { - for (BatchWriter w : writerMap.values()) { - w.flush(); - w.close(); - } - } catch (MutationsRejectedException e) { - throw new IOException("Error closing Batch Writer", e); - } - } - - } - - private Connector c = null; - private Configuration conf = new Configuration(); - private HashMap writerMap = new HashMap(); - - @Before - public void setup() throws Exception { - - conf.set(AggregatingRecordReader.START_TOKEN, ""); - conf.set(AggregatingRecordReader.END_TOKEN, ""); - conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME); - conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1"); - conf.set(WikipediaConfiguration.NUM_GROUPS, "1"); - - MockInstance i = new MockInstance(); - c = i.getConnector("root", "pass"); - c.tableOperations().delete(METADATA_TABLE_NAME); - c.tableOperations().delete(TABLE_NAME); - c.tableOperations().delete(INDEX_TABLE_NAME); - c.tableOperations().delete(RINDEX_TABLE_NAME); - c.tableOperations().create(METADATA_TABLE_NAME); - c.tableOperations().create(TABLE_NAME); - c.tableOperations().create(INDEX_TABLE_NAME); - c.tableOperations().create(RINDEX_TABLE_NAME); - - writerMap.put(new Text(METADATA_TABLE_NAME), c.createBatchWriter(METADATA_TABLE_NAME, 1000L, 1000L, 1)); - writerMap.put(new Text(TABLE_NAME), c.createBatchWriter(TABLE_NAME, 1000L, 1000L, 1)); - writerMap.put(new Text(INDEX_TABLE_NAME), c.createBatchWriter(INDEX_TABLE_NAME, 1000L, 1000L, 1)); - writerMap.put(new Text(RINDEX_TABLE_NAME), c.createBatchWriter(RINDEX_TABLE_NAME, 1000L, 1000L, 1)); - - TaskAttemptContext context = ContextFactory.createTaskAttemptContext(conf); - - RawLocalFileSystem fs = new RawLocalFileSystem(); - fs.setConf(conf); - - URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml"); - Assert.assertNotNull(url); - File data = new File(url.toURI()); - Path tmpFile = new Path(data.getAbsolutePath()); - - // Setup the Mapper - InputSplit split = new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null); - AggregatingRecordReader rr = new AggregatingRecordReader(); - Path ocPath = new Path(tmpFile, "oc"); - OutputCommitter oc = new FileOutputCommitter(ocPath, context); - fs.deleteOnExit(ocPath); - StandaloneStatusReporter sr = new StandaloneStatusReporter(); - rr.initialize(split, context); - MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter(); - WikipediaMapper mapper = new WikipediaMapper(); - - // Load data into Mock Accumulo - Mapper.Context con = ContextFactory.createMapContext(mapper, context, rr, rw, oc, sr, split); - mapper.run(con); - - // Flush and close record writers. - rw.close(context); - - } - - private void debugQuery(String tableName) throws Exception { - Scanner s = c.createScanner(tableName, new Authorizations("all")); - Range r = new Range(); - s.setRange(r); - for (Entry entry : s) - System.out.println(entry.getKey().toString() + " " + entry.getValue().toString()); - } - - public void testViewAllData() throws Exception { - debugQuery(METADATA_TABLE_NAME); - debugQuery(TABLE_NAME); - debugQuery(INDEX_TABLE_NAME); - debugQuery(RINDEX_TABLE_NAME); - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java deleted file mode 100644 index 6619ede..0000000 --- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/GlobalIndexUidTest.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.iterator; - -import static org.junit.Assert.assertTrue; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.UUID; - -import org.apache.accumulo.core.client.IteratorSetting; -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.iterators.Combiner; -import org.apache.accumulo.examples.wikisearch.protobuf.Uid; -import org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.junit.Before; -import org.junit.Test; - -public class GlobalIndexUidTest { - private GlobalIndexUidCombiner combiner; - private List values; - - @Before - public void setup() throws Exception { - combiner = new GlobalIndexUidCombiner(); - combiner.init(null, Collections.singletonMap("all", "true"), null); - values = new ArrayList(); - } - - private Uid.List.Builder createNewUidList() { - return Uid.List.newBuilder(); - } - - @Test - public void testSingleUid() { - Builder b = createNewUidList(); - b.setCOUNT(1); - b.setIGNORE(false); - b.addUID(UUID.randomUUID().toString()); - Uid.List uidList = b.build(); - Value val = new Value(uidList.toByteArray()); - values.add(val); - Value result = combiner.reduce(new Key(), values.iterator()); - assertTrue(val.compareTo(result.get()) == 0); - } - - @Test - public void testLessThanMax() throws Exception { - List savedUUIDs = new ArrayList(); - for (int i = 0; i < GlobalIndexUidCombiner.MAX - 1; i++) { - Builder b = createNewUidList(); - b.setIGNORE(false); - String uuid = UUID.randomUUID().toString(); - savedUUIDs.add(uuid); - b.setCOUNT(i); - b.addUID(uuid); - Uid.List uidList = b.build(); - Value val = new Value(uidList.toByteArray()); - values.add(val); - } - Value result = combiner.reduce(new Key(), values.iterator()); - Uid.List resultList = Uid.List.parseFrom(result.get()); - assertTrue(resultList.getIGNORE() == false); - assertTrue(resultList.getUIDCount() == (GlobalIndexUidCombiner.MAX - 1)); - List resultListUUIDs = resultList.getUIDList(); - for (String s : savedUUIDs) - assertTrue(resultListUUIDs.contains(s)); - } - - @Test - public void testEqualsMax() throws Exception { - List savedUUIDs = new ArrayList(); - for (int i = 0; i < GlobalIndexUidCombiner.MAX; i++) { - Builder b = createNewUidList(); - b.setIGNORE(false); - String uuid = UUID.randomUUID().toString(); - savedUUIDs.add(uuid); - b.setCOUNT(i); - b.addUID(uuid); - Uid.List uidList = b.build(); - Value val = new Value(uidList.toByteArray()); - values.add(val); - } - Value result = combiner.reduce(new Key(), values.iterator()); - Uid.List resultList = Uid.List.parseFrom(result.get()); - assertTrue(resultList.getIGNORE() == false); - assertTrue(resultList.getUIDCount() == (GlobalIndexUidCombiner.MAX)); - List resultListUUIDs = resultList.getUIDList(); - for (String s : savedUUIDs) - assertTrue(resultListUUIDs.contains(s)); - } - - @Test - public void testMoreThanMax() throws Exception { - List savedUUIDs = new ArrayList(); - for (int i = 0; i < GlobalIndexUidCombiner.MAX + 10; i++) { - Builder b = createNewUidList(); - b.setIGNORE(false); - String uuid = UUID.randomUUID().toString(); - savedUUIDs.add(uuid); - b.setCOUNT(1); - b.addUID(uuid); - Uid.List uidList = b.build(); - Value val = new Value(uidList.toByteArray()); - values.add(val); - } - Value result = combiner.reduce(new Key(), values.iterator()); - Uid.List resultList = Uid.List.parseFrom(result.get()); - assertTrue(resultList.getIGNORE() == true); - assertTrue(resultList.getUIDCount() == 0); - assertTrue(resultList.getCOUNT() == (GlobalIndexUidCombiner.MAX + 10)); - } - - @Test - public void testSeenIgnore() throws Exception { - Builder b = createNewUidList(); - b.setIGNORE(true); - b.setCOUNT(0); - Uid.List uidList = b.build(); - Value val = new Value(uidList.toByteArray()); - values.add(val); - b = createNewUidList(); - b.setIGNORE(false); - b.setCOUNT(1); - b.addUID(UUID.randomUUID().toString()); - uidList = b.build(); - val = new Value(uidList.toByteArray()); - values.add(val); - Value result = combiner.reduce(new Key(), values.iterator()); - Uid.List resultList = Uid.List.parseFrom(result.get()); - assertTrue(resultList.getIGNORE() == true); - assertTrue(resultList.getUIDCount() == 0); - assertTrue(resultList.getCOUNT() == 1); - } - - @Test - public void testInvalidValueType() throws Exception { - Combiner comb = new GlobalIndexUidCombiner(); - IteratorSetting setting = new IteratorSetting(1, GlobalIndexUidCombiner.class); - GlobalIndexUidCombiner.setCombineAllColumns(setting, true); - GlobalIndexUidCombiner.setLossyness(setting, true); - comb.init(null, setting.getOptions(), null); - Logger.getLogger(GlobalIndexUidCombiner.class).setLevel(Level.OFF); - Value val = new Value(UUID.randomUUID().toString().getBytes()); - values.add(val); - Value result = comb.reduce(new Key(), values.iterator()); - Uid.List resultList = Uid.List.parseFrom(result.get()); - assertTrue(resultList.getIGNORE() == false); - assertTrue(resultList.getUIDCount() == 0); - assertTrue(resultList.getCOUNT() == 0); - } - - @Test - public void testCount() throws Exception { - UUID uuid = UUID.randomUUID(); - // Collect the same UUID five times. - for (int i = 0; i < 5; i++) { - Builder b = createNewUidList(); - b.setCOUNT(1); - b.setIGNORE(false); - b.addUID(uuid.toString()); - Uid.List uidList = b.build(); - Value val = new Value(uidList.toByteArray()); - values.add(val); - } - Value result = combiner.reduce(new Key(), values.iterator()); - Uid.List resultList = Uid.List.parseFrom(result.get()); - assertTrue(resultList.getIGNORE() == false); - assertTrue(resultList.getUIDCount() == 1); - assertTrue(resultList.getCOUNT() == 5); - - } - -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java deleted file mode 100644 index 7297b5a..0000000 --- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.iterator; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -import junit.framework.Assert; - -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight; -import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight.Info.Builder; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import com.google.protobuf.InvalidProtocolBufferException; - -public class TextIndexTest { - private TextIndexCombiner combiner; - private List values; - - @Before - public void setup() throws Exception { - combiner = new TextIndexCombiner(); - combiner.init(null, Collections.singletonMap("all", "true"), null); - values = new ArrayList(); - } - - @After - public void cleanup() { - - } - - private TermWeight.Info.Builder createBuilder() { - return TermWeight.Info.newBuilder(); - } - - @Test - public void testSingleValue() throws InvalidProtocolBufferException { - Builder builder = createBuilder(); - builder.addWordOffset(1); - builder.addWordOffset(5); - builder.setNormalizedTermFrequency(0.1f); - - values.add(new Value(builder.build().toByteArray())); - - Value result = combiner.reduce(new Key(), values.iterator()); - - TermWeight.Info info = TermWeight.Info.parseFrom(result.get()); - - Assert.assertTrue(info.getNormalizedTermFrequency() == 0.1f); - - List offsets = info.getWordOffsetList(); - Assert.assertTrue(offsets.size() == 2); - Assert.assertTrue(offsets.get(0) == 1); - Assert.assertTrue(offsets.get(1) == 5); - } - - @Test - public void testAggregateTwoValues() throws InvalidProtocolBufferException { - Builder builder = createBuilder(); - builder.addWordOffset(1); - builder.addWordOffset(5); - builder.setNormalizedTermFrequency(0.1f); - - values.add(new Value(builder.build().toByteArray())); - - builder = createBuilder(); - builder.addWordOffset(3); - builder.setNormalizedTermFrequency(0.05f); - - values.add(new Value(builder.build().toByteArray())); - - Value result = combiner.reduce(new Key(), values.iterator()); - - TermWeight.Info info = TermWeight.Info.parseFrom(result.get()); - - Assert.assertTrue(info.getNormalizedTermFrequency() == 0.15f); - - List offsets = info.getWordOffsetList(); - Assert.assertTrue(offsets.size() == 3); - Assert.assertTrue(offsets.get(0) == 1); - Assert.assertTrue(offsets.get(1) == 3); - Assert.assertTrue(offsets.get(2) == 5); - } - - @Test - public void testAggregateManyValues() throws InvalidProtocolBufferException { - Builder builder = createBuilder(); - builder.addWordOffset(13); - builder.addWordOffset(15); - builder.addWordOffset(19); - builder.setNormalizedTermFrequency(0.12f); - - values.add(new Value(builder.build().toByteArray())); - - builder = createBuilder(); - builder.addWordOffset(1); - builder.addWordOffset(5); - builder.setNormalizedTermFrequency(0.1f); - - values.add(new Value(builder.build().toByteArray())); - - builder = createBuilder(); - builder.addWordOffset(3); - builder.setNormalizedTermFrequency(0.05f); - - values.add(new Value(builder.build().toByteArray())); - - Value result = combiner.reduce(new Key(), values.iterator()); - - TermWeight.Info info = TermWeight.Info.parseFrom(result.get()); - - Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f); - - List offsets = info.getWordOffsetList(); - Assert.assertTrue(offsets.size() == 6); - Assert.assertTrue(offsets.get(0) == 1); - Assert.assertTrue(offsets.get(1) == 3); - Assert.assertTrue(offsets.get(2) == 5); - Assert.assertTrue(offsets.get(3) == 13); - Assert.assertTrue(offsets.get(4) == 15); - Assert.assertTrue(offsets.get(5) == 19); - } - - @Test - public void testEmptyValue() throws InvalidProtocolBufferException { - Builder builder = createBuilder(); - builder.addWordOffset(13); - builder.addWordOffset(15); - builder.addWordOffset(19); - builder.setNormalizedTermFrequency(0.12f); - - values.add(new Value("".getBytes())); - values.add(new Value(builder.build().toByteArray())); - values.add(new Value("".getBytes())); - - builder = createBuilder(); - builder.addWordOffset(1); - builder.addWordOffset(5); - builder.setNormalizedTermFrequency(0.1f); - - values.add(new Value(builder.build().toByteArray())); - values.add(new Value("".getBytes())); - - builder = createBuilder(); - builder.addWordOffset(3); - builder.setNormalizedTermFrequency(0.05f); - - values.add(new Value(builder.build().toByteArray())); - values.add(new Value("".getBytes())); - - Value result = combiner.reduce(new Key(), values.iterator()); - - TermWeight.Info info = TermWeight.Info.parseFrom(result.get()); - - Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f); - - List offsets = info.getWordOffsetList(); - Assert.assertTrue(offsets.size() == 6); - Assert.assertTrue(offsets.get(0) == 1); - Assert.assertTrue(offsets.get(1) == 3); - Assert.assertTrue(offsets.get(2) == 5); - Assert.assertTrue(offsets.get(3) == 13); - Assert.assertTrue(offsets.get(4) == 15); - Assert.assertTrue(offsets.get(5) == 19); - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java b/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java deleted file mode 100644 index 470633c..0000000 --- a/src/examples/wikisearch/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/normalizer/testNumberNormalizer.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.normalizer; - -import static org.junit.Assert.assertTrue; - -import org.apache.accumulo.examples.wikisearch.normalizer.NumberNormalizer; -import org.junit.Test; - -public class testNumberNormalizer { - - @Test - public void test1() throws Exception { - NumberNormalizer nn = new NumberNormalizer(); - - String n1 = nn.normalizeFieldValue(null, "1"); - String n2 = nn.normalizeFieldValue(null, "1.00000000"); - - assertTrue(n1.compareTo(n2) < 0); - - } - - @Test - public void test2() { - NumberNormalizer nn = new NumberNormalizer(); - - String n1 = nn.normalizeFieldValue(null, "-1.0"); - String n2 = nn.normalizeFieldValue(null, "1.0"); - - assertTrue(n1.compareTo(n2) < 0); - - } - - @Test - public void test3() { - NumberNormalizer nn = new NumberNormalizer(); - String n1 = nn.normalizeFieldValue(null, "-0.0001"); - String n2 = nn.normalizeFieldValue(null, "0"); - String n3 = nn.normalizeFieldValue(null, "0.00001"); - - assertTrue((n1.compareTo(n2) < 0) && (n2.compareTo(n3) < 0)); - } - - @Test - public void test4() { - NumberNormalizer nn = new NumberNormalizer(); - String nn1 = nn.normalizeFieldValue(null, Integer.toString(Integer.MAX_VALUE)); - String nn2 = nn.normalizeFieldValue(null, Integer.toString(Integer.MAX_VALUE - 1)); - - assertTrue((nn2.compareTo(nn1) < 0)); - - } - - @Test - public void test5() { - NumberNormalizer nn = new NumberNormalizer(); - String nn1 = nn.normalizeFieldValue(null, "-0.001"); - String nn2 = nn.normalizeFieldValue(null, "-0.0009"); - String nn3 = nn.normalizeFieldValue(null, "-0.00090"); - - assertTrue((nn3.compareTo(nn2) == 0) && (nn2.compareTo(nn1) > 0)); - - } - - @Test - public void test6() { - NumberNormalizer nn = new NumberNormalizer(); - String nn1 = nn.normalizeFieldValue(null, "00.0"); - String nn2 = nn.normalizeFieldValue(null, "0"); - String nn3 = nn.normalizeFieldValue(null, "0.0"); - - assertTrue((nn3.compareTo(nn2) == 0) && (nn2.compareTo(nn1) == 0)); - - } - -}