Return-Path: X-Original-To: apmail-incubator-accumulo-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-accumulo-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 59C3D7578 for ; Mon, 5 Dec 2011 20:06:20 +0000 (UTC) Received: (qmail 15721 invoked by uid 500); 5 Dec 2011 20:06:20 -0000 Delivered-To: apmail-incubator-accumulo-commits-archive@incubator.apache.org Received: (qmail 15692 invoked by uid 500); 5 Dec 2011 20:06:20 -0000 Mailing-List: contact accumulo-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: accumulo-dev@incubator.apache.org Delivered-To: mailing list accumulo-commits@incubator.apache.org Received: (qmail 15683 invoked by uid 99); 5 Dec 2011 20:06:20 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 05 Dec 2011 20:06:20 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 05 Dec 2011 20:06:16 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 1FCA9238899C; Mon, 5 Dec 2011 20:05:55 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1210600 [2/16] - in /incubator/accumulo/trunk/contrib/accumulo_sample: ./ ingest/ ingest/src/main/java/aggregator/ ingest/src/main/java/ingest/ ingest/src/main/java/iterator/ ingest/src/main/java/normalizer/ ingest/src/main/java/protobuf/ ... Date: Mon, 05 Dec 2011 20:05:51 -0000 To: accumulo-commits@incubator.apache.org From: billie@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20111205200555.1FCA9238899C@eris.apache.org> Modified: incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/iterator/TotalAggregatingIterator.java URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/iterator/TotalAggregatingIterator.java?rev=1210600&r1=1210599&r2=1210600&view=diff ============================================================================== --- incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/iterator/TotalAggregatingIterator.java (original) +++ incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/iterator/TotalAggregatingIterator.java Mon Dec 5 20:05:49 2011 @@ -1,19 +1,19 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package iterator; import java.io.IOException; @@ -35,174 +35,170 @@ import org.apache.accumulo.core.iterator import org.apache.accumulo.start.classloader.AccumuloClassLoader; /** - * Aggregate all values with the same key (row, colf, colq, colVis.). - * + * Aggregate all values with the same key (row, colf, colq, colVis.). + * */ -public class TotalAggregatingIterator implements SortedKeyValueIterator, OptionDescriber { - - private SortedKeyValueIterator iterator; - - private Key workKey = new Key(); - - private Key aggrKey; - private Value aggrValue; - - private Aggregator agg; - - public TotalAggregatingIterator deepCopy(IteratorEnvironment env) - { - return new TotalAggregatingIterator(this, env); - } - - private TotalAggregatingIterator(TotalAggregatingIterator other, IteratorEnvironment env) - { - iterator = other.iterator.deepCopy(env); - agg = other.agg; - } - - public TotalAggregatingIterator(){} - - private void aggregateRowColumn(Aggregator aggr) throws IOException { - //this function assumes that first value is not delete - - workKey.set(iterator.getTopKey()); - - Key keyToAggregate = workKey; - - aggr.reset(); - - aggr.collect(iterator.getTopValue()); - iterator.next(); - - while(iterator.hasTop() && iterator.getTopKey().equals(keyToAggregate, PartialKey.ROW_COLFAM_COLQUAL_COLVIS)){ - aggr.collect(iterator.getTopValue()); - iterator.next(); - } - - aggrKey = workKey; - aggrValue = aggr.aggregate(); - - } - - private void findTop() throws IOException{ - //check if aggregation is needed - if(iterator.hasTop()){ - aggregateRowColumn(agg); - } - } - - public TotalAggregatingIterator(SortedKeyValueIterator iterator, ColumnToClassMapping aggregators) throws IOException{ - this.iterator = iterator; - } - - @Override - public Key getTopKey() { - if(aggrKey != null){ - return aggrKey; - } - return iterator.getTopKey(); - } - - @Override - public Value getTopValue() { - if(aggrKey != null){ - return aggrValue; - } - return iterator.getTopValue(); - } - - @Override - public boolean hasTop() { - return aggrKey != null || iterator.hasTop(); - } - - @Override - public void next() throws IOException { - if(aggrKey != null){ - aggrKey = null; - aggrValue = null; - }else{ - iterator.next(); - } - - findTop(); - } - - @Override - public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { - //do not want to seek to the middle of a value that should be - //aggregated... - - Range seekRange = maximizeStartKeyTimeStamp(range); - - iterator.seek(seekRange, columnFamilies, inclusive); - findTop(); - - if(range.getStartKey() != null){ - while(hasTop() && - getTopKey().equals(range.getStartKey(), PartialKey.ROW_COLFAM_COLQUAL_COLVIS) && - getTopKey().getTimestamp() > range.getStartKey().getTimestamp()) - { - //the value has a more recent time stamp, so - //pass it up - //log.debug("skipping "+getTopKey()); - next(); - } - - while(hasTop() && range.beforeStartKey(getTopKey())){ - next(); - } - } - - } - - @Override - public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) throws IOException { - agg = createAggregator(options); - this.iterator = source; - } - - @Override - public IteratorOptions describeOptions() { - return new IteratorOptions("agg","Aggregators apply aggregating functions to values with identical keys", - null, Collections.singletonList("* ")); - } - - @Override - public boolean validateOptions(Map options) { - if (options.size() > 1) - throw new IllegalArgumentException("This iterator only accepts one configuration option, the name of the aggregating class"); - agg = createAggregator(options); - return true; - } - - private Aggregator createAggregator(Map options) { - Aggregator a = null; - for (Entry entry : options.entrySet()) { - try { - Class clazz = AccumuloClassLoader.loadClass(entry.getValue(), Aggregator.class); - a = clazz.newInstance(); - } catch (ClassNotFoundException e) { - throw new IllegalArgumentException("class not found: "+entry.getValue()); - } catch (InstantiationException e) { - throw new IllegalArgumentException("instantiation exception: "+entry.getValue()); - } catch (IllegalAccessException e) { - throw new IllegalArgumentException("illegal access exception: "+entry.getValue()); - } - } - return a; - } - - static Range maximizeStartKeyTimeStamp(Range range) { - Range seekRange = range; - - if(range.getStartKey() != null && range.getStartKey().getTimestamp() != Long.MAX_VALUE){ - Key seekKey = new Key(seekRange.getStartKey()); - seekKey.setTimestamp(Long.MAX_VALUE); - seekRange = new Range(seekKey, true, range.getEndKey(), range.isEndKeyInclusive()); - } - - return seekRange; - } +public class TotalAggregatingIterator implements SortedKeyValueIterator, OptionDescriber { + + private SortedKeyValueIterator iterator; + + private Key workKey = new Key(); + + private Key aggrKey; + private Value aggrValue; + + private Aggregator agg; + + public TotalAggregatingIterator deepCopy(IteratorEnvironment env) { + return new TotalAggregatingIterator(this, env); + } + + private TotalAggregatingIterator(TotalAggregatingIterator other, IteratorEnvironment env) { + iterator = other.iterator.deepCopy(env); + agg = other.agg; + } + + public TotalAggregatingIterator() {} + + private void aggregateRowColumn(Aggregator aggr) throws IOException { + // this function assumes that first value is not delete + + workKey.set(iterator.getTopKey()); + + Key keyToAggregate = workKey; + + aggr.reset(); + + aggr.collect(iterator.getTopValue()); + iterator.next(); + + while (iterator.hasTop() && iterator.getTopKey().equals(keyToAggregate, PartialKey.ROW_COLFAM_COLQUAL_COLVIS)) { + aggr.collect(iterator.getTopValue()); + iterator.next(); + } + + aggrKey = workKey; + aggrValue = aggr.aggregate(); + + } + + private void findTop() throws IOException { + // check if aggregation is needed + if (iterator.hasTop()) { + aggregateRowColumn(agg); + } + } + + public TotalAggregatingIterator(SortedKeyValueIterator iterator, ColumnToClassMapping aggregators) throws IOException { + this.iterator = iterator; + } + + @Override + public Key getTopKey() { + if (aggrKey != null) { + return aggrKey; + } + return iterator.getTopKey(); + } + + @Override + public Value getTopValue() { + if (aggrKey != null) { + return aggrValue; + } + return iterator.getTopValue(); + } + + @Override + public boolean hasTop() { + return aggrKey != null || iterator.hasTop(); + } + + @Override + public void next() throws IOException { + if (aggrKey != null) { + aggrKey = null; + aggrValue = null; + } else { + iterator.next(); + } + + findTop(); + } + + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + // do not want to seek to the middle of a value that should be + // aggregated... + + Range seekRange = maximizeStartKeyTimeStamp(range); + + iterator.seek(seekRange, columnFamilies, inclusive); + findTop(); + + if (range.getStartKey() != null) { + while (hasTop() && getTopKey().equals(range.getStartKey(), PartialKey.ROW_COLFAM_COLQUAL_COLVIS) + && getTopKey().getTimestamp() > range.getStartKey().getTimestamp()) { + // the value has a more recent time stamp, so + // pass it up + // log.debug("skipping "+getTopKey()); + next(); + } + + while (hasTop() && range.beforeStartKey(getTopKey())) { + next(); + } + } + + } + + @Override + public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) throws IOException { + agg = createAggregator(options); + this.iterator = source; + } + + @Override + public IteratorOptions describeOptions() { + return new IteratorOptions("agg", "Aggregators apply aggregating functions to values with identical keys", null, + Collections.singletonList("* ")); + } + + @Override + public boolean validateOptions(Map options) { + if (options.size() > 1) + throw new IllegalArgumentException("This iterator only accepts one configuration option, the name of the aggregating class"); + agg = createAggregator(options); + return true; + } + + private Aggregator createAggregator(Map options) { + Aggregator a = null; + for (Entry entry : options.entrySet()) { + try { + Class clazz = AccumuloClassLoader.loadClass(entry.getValue(), Aggregator.class); + a = clazz.newInstance(); + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException("class not found: " + entry.getValue()); + } catch (InstantiationException e) { + throw new IllegalArgumentException("instantiation exception: " + entry.getValue()); + } catch (IllegalAccessException e) { + throw new IllegalArgumentException("illegal access exception: " + entry.getValue()); + } + } + return a; + } + + static Range maximizeStartKeyTimeStamp(Range range) { + Range seekRange = range; + + if (range.getStartKey() != null && range.getStartKey().getTimestamp() != Long.MAX_VALUE) { + Key seekKey = new Key(seekRange.getStartKey()); + seekKey.setTimestamp(Long.MAX_VALUE); + seekRange = new Range(seekKey, true, range.getEndKey(), range.isEndKeyInclusive()); + } + + return seekRange; + } } Modified: incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/LcNoDiacriticsNormalizer.java URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/LcNoDiacriticsNormalizer.java?rev=1210600&r1=1210599&r2=1210600&view=diff ============================================================================== --- incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/LcNoDiacriticsNormalizer.java (original) +++ incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/LcNoDiacriticsNormalizer.java Mon Dec 5 20:05:49 2011 @@ -1,19 +1,19 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package normalizer; import java.text.Normalizer; @@ -25,28 +25,25 @@ import java.util.regex.Pattern; /** * An {@link Normalizer} which performs the following steps: *
    - *
  1. Unicode canonical decomposition ({@link Form#NFD})
  2. - *
  3. Removal of diacritical marks
  4. - *
  5. Unicode canonical composition ({@link Form#NFC})
  6. - *
  7. lower casing in the {@link Locale#ENGLISH English local} + *
  8. Unicode canonical decomposition ({@link Form#NFD})
  9. + *
  10. Removal of diacritical marks
  11. + *
  12. Unicode canonical composition ({@link Form#NFC})
  13. + *
  14. lower casing in the {@link Locale#ENGLISH English local} *
*/ -public class LcNoDiacriticsNormalizer implements normalizer.Normalizer -{ - private static final Pattern diacriticals = Pattern.compile( - "\\p{InCombiningDiacriticalMarks}"); - - public String normalizeFieldValue(String fieldName, Object fieldValue) - { - String decomposed = Normalizer.normalize(fieldValue.toString(), Form.NFD); - String noDiacriticals = removeDiacriticalMarks(decomposed); - String recomposed = Normalizer.normalize(noDiacriticals, Form.NFC); - return recomposed.toLowerCase(Locale.ENGLISH); - } - - private String removeDiacriticalMarks(String str) { - Matcher matcher = diacriticals.matcher(str); - return matcher.replaceAll(""); - } - +public class LcNoDiacriticsNormalizer implements normalizer.Normalizer { + private static final Pattern diacriticals = Pattern.compile("\\p{InCombiningDiacriticalMarks}"); + + public String normalizeFieldValue(String fieldName, Object fieldValue) { + String decomposed = Normalizer.normalize(fieldValue.toString(), Form.NFD); + String noDiacriticals = removeDiacriticalMarks(decomposed); + String recomposed = Normalizer.normalize(noDiacriticals, Form.NFC); + return recomposed.toLowerCase(Locale.ENGLISH); + } + + private String removeDiacriticalMarks(String str) { + Matcher matcher = diacriticals.matcher(str); + return matcher.replaceAll(""); + } + } Modified: incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/NoOpNormalizer.java URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/NoOpNormalizer.java?rev=1210600&r1=1210599&r2=1210600&view=diff ============================================================================== --- incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/NoOpNormalizer.java (original) +++ incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/NoOpNormalizer.java Mon Dec 5 20:05:49 2011 @@ -1,24 +1,23 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package normalizer; -public class NoOpNormalizer implements Normalizer -{ - public String normalizeFieldValue(String field, Object value) { - return value.toString(); - } +public class NoOpNormalizer implements Normalizer { + public String normalizeFieldValue(String field, Object value) { + return value.toString(); + } } Modified: incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/Normalizer.java URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/Normalizer.java?rev=1210600&r1=1210599&r2=1210600&view=diff ============================================================================== --- incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/Normalizer.java (original) +++ incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/Normalizer.java Mon Dec 5 20:05:49 2011 @@ -1,30 +1,32 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package normalizer; - public interface Normalizer { - - /** - * Creates normalized content for ingest based upon implemented logic. - * @param field The field being normalized - * @param value The value to normalize - * @return a normalized value - */ - public String normalizeFieldValue(String field, Object value); - + + /** + * Creates normalized content for ingest based upon implemented logic. + * + * @param field + * The field being normalized + * @param value + * The value to normalize + * @return a normalized value + */ + public String normalizeFieldValue(String field, Object value); + } Modified: incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/NumberNormalizer.java URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/NumberNormalizer.java?rev=1210600&r1=1210599&r2=1210600&view=diff ============================================================================== --- incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/NumberNormalizer.java (original) +++ incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/normalizer/NumberNormalizer.java Mon Dec 5 20:05:49 2011 @@ -1,43 +1,42 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package normalizer; import org.apache.commons.lang.math.NumberUtils; import org.apache.lucene.util.NumericUtils; - public class NumberNormalizer implements Normalizer { - - public String normalizeFieldValue(String field, Object value) { - if (NumberUtils.isNumber(value.toString())) { - Number n = NumberUtils.createNumber(value.toString()); - if (n instanceof Integer) - return NumericUtils.intToPrefixCoded((Integer) n); - else if (n instanceof Long) - return NumericUtils.longToPrefixCoded((Long) n); - else if (n instanceof Float) - return NumericUtils.floatToPrefixCoded((Float) n); - else if (n instanceof Double) - return NumericUtils.doubleToPrefixCoded((Double) n); - else - throw new IllegalArgumentException("Unhandled numeric type: " + n.getClass()); - } else { - throw new IllegalArgumentException("Value is not a number: " + value); - } - } - + + public String normalizeFieldValue(String field, Object value) { + if (NumberUtils.isNumber(value.toString())) { + Number n = NumberUtils.createNumber(value.toString()); + if (n instanceof Integer) + return NumericUtils.intToPrefixCoded((Integer) n); + else if (n instanceof Long) + return NumericUtils.longToPrefixCoded((Long) n); + else if (n instanceof Float) + return NumericUtils.floatToPrefixCoded((Float) n); + else if (n instanceof Double) + return NumericUtils.doubleToPrefixCoded((Double) n); + else + throw new IllegalArgumentException("Unhandled numeric type: " + n.getClass()); + } else { + throw new IllegalArgumentException("Value is not a number: " + value); + } + } + } Modified: incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/protobuf/TermWeight.java URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/protobuf/TermWeight.java?rev=1210600&r1=1210599&r2=1210600&view=diff ============================================================================== --- incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/protobuf/TermWeight.java (original) +++ incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/protobuf/TermWeight.java Mon Dec 5 20:05:49 2011 @@ -1,19 +1,19 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ // Generated by the protocol buffer compiler. DO NOT EDIT! // source: TermWeight.proto @@ -21,18 +21,19 @@ package protobuf; public final class TermWeight { private TermWeight() {} - public static void registerAllExtensions( - com.google.protobuf.ExtensionRegistry registry) { - } - public static final class Info extends - com.google.protobuf.GeneratedMessage { + + public static void registerAllExtensions(com.google.protobuf.ExtensionRegistry registry) {} + + public static final class Info extends com.google.protobuf.GeneratedMessage { // Use Info.newBuilder() to construct. private Info() { initFields(); } + private Info(boolean noInit) {} private static final Info defaultInstance; + public static Info getDefaultInstance() { return defaultInstance; } @@ -41,13 +42,11 @@ public final class TermWeight { return defaultInstance; } - public static final com.google.protobuf.Descriptors.Descriptor - getDescriptor() { + public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return protobuf.TermWeight.internal_static_protobuf_Info_descriptor; } - protected com.google.protobuf.GeneratedMessage.FieldAccessorTable - internalGetFieldAccessorTable() { + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable internalGetFieldAccessorTable() { return protobuf.TermWeight.internal_static_protobuf_Info_fieldAccessorTable; } @@ -55,30 +54,40 @@ public final class TermWeight { public static final int NORMALIZEDTERMFREQUENCY_FIELD_NUMBER = 1; private boolean hasNormalizedTermFrequency; private float normalizedTermFrequency_ = 0F; - public boolean hasNormalizedTermFrequency() { return hasNormalizedTermFrequency; } - public float getNormalizedTermFrequency() { return normalizedTermFrequency_; } + + public boolean hasNormalizedTermFrequency() { + return hasNormalizedTermFrequency; + } + + public float getNormalizedTermFrequency() { + return normalizedTermFrequency_; + } // repeated uint32 wordOffset = 2; public static final int WORDOFFSET_FIELD_NUMBER = 2; - private java.util.List wordOffset_ = - java.util.Collections.emptyList(); + private java.util.List wordOffset_ = java.util.Collections.emptyList(); + public java.util.List getWordOffsetList() { return wordOffset_; } - public int getWordOffsetCount() { return wordOffset_.size(); } + + public int getWordOffsetCount() { + return wordOffset_.size(); + } + public int getWordOffset(int index) { return wordOffset_.get(index); } - private void initFields() { - } + private void initFields() {} + public final boolean isInitialized() { - if (!hasNormalizedTermFrequency) return false; + if (!hasNormalizedTermFrequency) + return false; return true; } - public void writeTo(com.google.protobuf.CodedOutputStream output) - throws java.io.IOException { + public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException { getSerializedSize(); if (hasNormalizedTermFrequency()) { output.writeFloat(1, getNormalizedTermFrequency()); @@ -90,20 +99,20 @@ public final class TermWeight { } private int memoizedSerializedSize = -1; + public int getSerializedSize() { int size = memoizedSerializedSize; - if (size != -1) return size; - + if (size != -1) + return size; + size = 0; if (hasNormalizedTermFrequency()) { - size += com.google.protobuf.CodedOutputStream - .computeFloatSize(1, getNormalizedTermFrequency()); + size += com.google.protobuf.CodedOutputStream.computeFloatSize(1, getNormalizedTermFrequency()); } { int dataSize = 0; for (int element : getWordOffsetList()) { - dataSize += com.google.protobuf.CodedOutputStream - .computeUInt32SizeNoTag(element); + dataSize += com.google.protobuf.CodedOutputStream.computeUInt32SizeNoTag(element); } size += dataSize; size += 1 * getWordOffsetList().size(); @@ -113,42 +122,34 @@ public final class TermWeight { return size; } - public static protobuf.TermWeight.Info parseFrom( - com.google.protobuf.ByteString data) - throws com.google.protobuf.InvalidProtocolBufferException { + public static protobuf.TermWeight.Info parseFrom(com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException { return newBuilder().mergeFrom(data).buildParsed(); } - public static protobuf.TermWeight.Info parseFrom( - com.google.protobuf.ByteString data, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + + public static protobuf.TermWeight.Info parseFrom(com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { - return newBuilder().mergeFrom(data, extensionRegistry) - .buildParsed(); + return newBuilder().mergeFrom(data, extensionRegistry).buildParsed(); } - public static protobuf.TermWeight.Info parseFrom(byte[] data) - throws com.google.protobuf.InvalidProtocolBufferException { + + public static protobuf.TermWeight.Info parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException { return newBuilder().mergeFrom(data).buildParsed(); } - public static protobuf.TermWeight.Info parseFrom( - byte[] data, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + + public static protobuf.TermWeight.Info parseFrom(byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { - return newBuilder().mergeFrom(data, extensionRegistry) - .buildParsed(); + return newBuilder().mergeFrom(data, extensionRegistry).buildParsed(); } - public static protobuf.TermWeight.Info parseFrom(java.io.InputStream input) - throws java.io.IOException { + + public static protobuf.TermWeight.Info parseFrom(java.io.InputStream input) throws java.io.IOException { return newBuilder().mergeFrom(input).buildParsed(); } - public static protobuf.TermWeight.Info parseFrom( - java.io.InputStream input, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + + public static protobuf.TermWeight.Info parseFrom(java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { - return newBuilder().mergeFrom(input, extensionRegistry) - .buildParsed(); + return newBuilder().mergeFrom(input, extensionRegistry).buildParsed(); } - public static protobuf.TermWeight.Info parseDelimitedFrom(java.io.InputStream input) - throws java.io.IOException { + + public static protobuf.TermWeight.Info parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException { Builder builder = newBuilder(); if (builder.mergeDelimitedFrom(input)) { return builder.buildParsed(); @@ -156,9 +157,8 @@ public final class TermWeight { return null; } } - public static protobuf.TermWeight.Info parseDelimitedFrom( - java.io.InputStream input, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + + public static protobuf.TermWeight.Info parseDelimitedFrom(java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { Builder builder = newBuilder(); if (builder.mergeDelimitedFrom(input, extensionRegistry)) { @@ -167,28 +167,33 @@ public final class TermWeight { return null; } } - public static protobuf.TermWeight.Info parseFrom( - com.google.protobuf.CodedInputStream input) - throws java.io.IOException { + + public static protobuf.TermWeight.Info parseFrom(com.google.protobuf.CodedInputStream input) throws java.io.IOException { return newBuilder().mergeFrom(input).buildParsed(); } - public static protobuf.TermWeight.Info parseFrom( - com.google.protobuf.CodedInputStream input, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + + public static protobuf.TermWeight.Info parseFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { - return newBuilder().mergeFrom(input, extensionRegistry) - .buildParsed(); + return newBuilder().mergeFrom(input, extensionRegistry).buildParsed(); + } + + public static Builder newBuilder() { + return Builder.create(); + } + + public Builder newBuilderForType() { + return newBuilder(); } - public static Builder newBuilder() { return Builder.create(); } - public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder(protobuf.TermWeight.Info prototype) { return newBuilder().mergeFrom(prototype); } - public Builder toBuilder() { return newBuilder(this); } - public static final class Builder extends - com.google.protobuf.GeneratedMessage.Builder { + public Builder toBuilder() { + return newBuilder(this); + } + + public static final class Builder extends com.google.protobuf.GeneratedMessage.Builder { private protobuf.TermWeight.Info result; // Construct using protobuf.TermWeight.Info.newBuilder() @@ -206,8 +211,7 @@ public final class TermWeight { public Builder clear() { if (result == null) { - throw new IllegalStateException( - "Cannot call clear() after build()."); + throw new IllegalStateException("Cannot call clear() after build()."); } result = new protobuf.TermWeight.Info(); return this; @@ -217,8 +221,7 @@ public final class TermWeight { return create().mergeFrom(result); } - public com.google.protobuf.Descriptors.Descriptor - getDescriptorForType() { + public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { return protobuf.TermWeight.Info.getDescriptor(); } @@ -229,6 +232,7 @@ public final class TermWeight { public boolean isInitialized() { return result.isInitialized(); } + public protobuf.TermWeight.Info build() { if (result != null && !isInitialized()) { throw newUninitializedMessageException(result); @@ -236,23 +240,19 @@ public final class TermWeight { return buildPartial(); } - private protobuf.TermWeight.Info buildParsed() - throws com.google.protobuf.InvalidProtocolBufferException { + private protobuf.TermWeight.Info buildParsed() throws com.google.protobuf.InvalidProtocolBufferException { if (!isInitialized()) { - throw newUninitializedMessageException( - result).asInvalidProtocolBufferException(); + throw newUninitializedMessageException(result).asInvalidProtocolBufferException(); } return buildPartial(); } public protobuf.TermWeight.Info buildPartial() { if (result == null) { - throw new IllegalStateException( - "build() has already been called on this Builder."); + throw new IllegalStateException("build() has already been called on this Builder."); } if (result.wordOffset_ != java.util.Collections.EMPTY_LIST) { - result.wordOffset_ = - java.util.Collections.unmodifiableList(result.wordOffset_); + result.wordOffset_ = java.util.Collections.unmodifiableList(result.wordOffset_); } protobuf.TermWeight.Info returnMe = result; result = null; @@ -261,7 +261,7 @@ public final class TermWeight { public Builder mergeFrom(com.google.protobuf.Message other) { if (other instanceof protobuf.TermWeight.Info) { - return mergeFrom((protobuf.TermWeight.Info)other); + return mergeFrom((protobuf.TermWeight.Info) other); } else { super.mergeFrom(other); return this; @@ -269,7 +269,8 @@ public final class TermWeight { } public Builder mergeFrom(protobuf.TermWeight.Info other) { - if (other == protobuf.TermWeight.Info.getDefaultInstance()) return this; + if (other == protobuf.TermWeight.Info.getDefaultInstance()) + return this; if (other.hasNormalizedTermFrequency()) { setNormalizedTermFrequency(other.getNormalizedTermFrequency()); } @@ -283,13 +284,9 @@ public final class TermWeight { return this; } - public Builder mergeFrom( - com.google.protobuf.CodedInputStream input, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + public Builder mergeFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { - com.google.protobuf.UnknownFieldSet.Builder unknownFields = - com.google.protobuf.UnknownFieldSet.newBuilder( - this.getUnknownFields()); + com.google.protobuf.UnknownFieldSet.Builder unknownFields = com.google.protobuf.UnknownFieldSet.newBuilder(this.getUnknownFields()); while (true) { int tag = input.readTag(); switch (tag) { @@ -297,8 +294,7 @@ public final class TermWeight { this.setUnknownFields(unknownFields.build()); return this; default: { - if (!parseUnknownField(input, unknownFields, - extensionRegistry, tag)) { + if (!parseUnknownField(input, unknownFields, extensionRegistry, tag)) { this.setUnknownFields(unknownFields.build()); return this; } @@ -325,19 +321,21 @@ public final class TermWeight { } } - // required float normalizedTermFrequency = 1; public boolean hasNormalizedTermFrequency() { return result.hasNormalizedTermFrequency(); } + public float getNormalizedTermFrequency() { return result.getNormalizedTermFrequency(); } + public Builder setNormalizedTermFrequency(float value) { result.hasNormalizedTermFrequency = true; result.normalizedTermFrequency_ = value; return this; } + public Builder clearNormalizedTermFrequency() { result.hasNormalizedTermFrequency = false; result.normalizedTermFrequency_ = 0F; @@ -348,16 +346,20 @@ public final class TermWeight { public java.util.List getWordOffsetList() { return java.util.Collections.unmodifiableList(result.wordOffset_); } + public int getWordOffsetCount() { return result.getWordOffsetCount(); } + public int getWordOffset(int index) { return result.getWordOffset(index); } + public Builder setWordOffset(int index, int value) { result.wordOffset_.set(index, value); return this; } + public Builder addWordOffset(int value) { if (result.wordOffset_.isEmpty()) { result.wordOffset_ = new java.util.ArrayList(); @@ -365,14 +367,15 @@ public final class TermWeight { result.wordOffset_.add(value); return this; } - public Builder addAllWordOffset( - java.lang.Iterable values) { + + public Builder addAllWordOffset(java.lang.Iterable values) { if (result.wordOffset_.isEmpty()) { result.wordOffset_ = new java.util.ArrayList(); } super.addAll(values, result.wordOffset_); return this; } + public Builder clearWordOffset() { result.wordOffset_ = java.util.Collections.emptyList(); return this; @@ -390,44 +393,29 @@ public final class TermWeight { // @@protoc_insertion_point(class_scope:protobuf.Info) } - private static com.google.protobuf.Descriptors.Descriptor - internal_static_protobuf_Info_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_protobuf_Info_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor internal_static_protobuf_Info_descriptor; + private static com.google.protobuf.GeneratedMessage.FieldAccessorTable internal_static_protobuf_Info_fieldAccessorTable; - public static com.google.protobuf.Descriptors.FileDescriptor - getDescriptor() { + public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { return descriptor; } - private static com.google.protobuf.Descriptors.FileDescriptor - descriptor; + + private static com.google.protobuf.Descriptors.FileDescriptor descriptor; static { - java.lang.String[] descriptorData = { - "\n\020TermWeight.proto\022\010protobuf\";\n\004Info\022\037\n\027" + - "normalizedTermFrequency\030\001 \002(\002\022\022\n\nwordOff" + - "set\030\002 \003(\rB\014\n\010protobufH\001" + java.lang.String[] descriptorData = {"\n\020TermWeight.proto\022\010protobuf\";\n\004Info\022\037\n\027" + + "normalizedTermFrequency\030\001 \002(\002\022\022\n\nwordOff" + "set\030\002 \003(\rB\014\n\010protobufH\001"}; + com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { + public com.google.protobuf.ExtensionRegistry assignDescriptors(com.google.protobuf.Descriptors.FileDescriptor root) { + descriptor = root; + internal_static_protobuf_Info_descriptor = getDescriptor().getMessageTypes().get(0); + internal_static_protobuf_Info_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_protobuf_Info_descriptor, new java.lang.String[] {"NormalizedTermFrequency", "WordOffset",}, protobuf.TermWeight.Info.class, + protobuf.TermWeight.Info.Builder.class); + return null; + } }; - com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = - new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { - public com.google.protobuf.ExtensionRegistry assignDescriptors( - com.google.protobuf.Descriptors.FileDescriptor root) { - descriptor = root; - internal_static_protobuf_Info_descriptor = - getDescriptor().getMessageTypes().get(0); - internal_static_protobuf_Info_fieldAccessorTable = new - com.google.protobuf.GeneratedMessage.FieldAccessorTable( - internal_static_protobuf_Info_descriptor, - new java.lang.String[] { "NormalizedTermFrequency", "WordOffset", }, - protobuf.TermWeight.Info.class, - protobuf.TermWeight.Info.Builder.class); - return null; - } - }; - com.google.protobuf.Descriptors.FileDescriptor - .internalBuildGeneratedFileFrom(descriptorData, - new com.google.protobuf.Descriptors.FileDescriptor[] { - }, assigner); + com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[] {}, + assigner); } public static void internalForceInit() {} Modified: incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/protobuf/Uid.java URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/protobuf/Uid.java?rev=1210600&r1=1210599&r2=1210600&view=diff ============================================================================== --- incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/protobuf/Uid.java (original) +++ incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/protobuf/Uid.java Mon Dec 5 20:05:49 2011 @@ -1,19 +1,19 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ // Generated by the protocol buffer compiler. DO NOT EDIT! // source: Uid.proto @@ -21,18 +21,19 @@ package protobuf; public final class Uid { private Uid() {} - public static void registerAllExtensions( - com.google.protobuf.ExtensionRegistry registry) { - } - public static final class List extends - com.google.protobuf.GeneratedMessage { + + public static void registerAllExtensions(com.google.protobuf.ExtensionRegistry registry) {} + + public static final class List extends com.google.protobuf.GeneratedMessage { // Use List.newBuilder() to construct. private List() { initFields(); } + private List(boolean noInit) {} private static final List defaultInstance; + public static List getDefaultInstance() { return defaultInstance; } @@ -41,13 +42,11 @@ public final class Uid { return defaultInstance; } - public static final com.google.protobuf.Descriptors.Descriptor - getDescriptor() { + public static final com.google.protobuf.Descriptors.Descriptor getDescriptor() { return protobuf.Uid.internal_static_protobuf_List_descriptor; } - protected com.google.protobuf.GeneratedMessage.FieldAccessorTable - internalGetFieldAccessorTable() { + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable internalGetFieldAccessorTable() { return protobuf.Uid.internal_static_protobuf_List_fieldAccessorTable; } @@ -55,38 +54,55 @@ public final class Uid { public static final int IGNORE_FIELD_NUMBER = 1; private boolean hasIGNORE; private boolean iGNORE_ = false; - public boolean hasIGNORE() { return hasIGNORE; } - public boolean getIGNORE() { return iGNORE_; } + + public boolean hasIGNORE() { + return hasIGNORE; + } + + public boolean getIGNORE() { + return iGNORE_; + } // required uint64 COUNT = 2; public static final int COUNT_FIELD_NUMBER = 2; private boolean hasCOUNT; private long cOUNT_ = 0L; - public boolean hasCOUNT() { return hasCOUNT; } - public long getCOUNT() { return cOUNT_; } + + public boolean hasCOUNT() { + return hasCOUNT; + } + + public long getCOUNT() { + return cOUNT_; + } // repeated string UID = 3; public static final int UID_FIELD_NUMBER = 3; - private java.util.List uID_ = - java.util.Collections.emptyList(); + private java.util.List uID_ = java.util.Collections.emptyList(); + public java.util.List getUIDList() { return uID_; } - public int getUIDCount() { return uID_.size(); } + + public int getUIDCount() { + return uID_.size(); + } + public java.lang.String getUID(int index) { return uID_.get(index); } - private void initFields() { - } + private void initFields() {} + public final boolean isInitialized() { - if (!hasIGNORE) return false; - if (!hasCOUNT) return false; + if (!hasIGNORE) + return false; + if (!hasCOUNT) + return false; return true; } - public void writeTo(com.google.protobuf.CodedOutputStream output) - throws java.io.IOException { + public void writeTo(com.google.protobuf.CodedOutputStream output) throws java.io.IOException { getSerializedSize(); if (hasIGNORE()) { output.writeBool(1, getIGNORE()); @@ -101,24 +117,23 @@ public final class Uid { } private int memoizedSerializedSize = -1; + public int getSerializedSize() { int size = memoizedSerializedSize; - if (size != -1) return size; - + if (size != -1) + return size; + size = 0; if (hasIGNORE()) { - size += com.google.protobuf.CodedOutputStream - .computeBoolSize(1, getIGNORE()); + size += com.google.protobuf.CodedOutputStream.computeBoolSize(1, getIGNORE()); } if (hasCOUNT()) { - size += com.google.protobuf.CodedOutputStream - .computeUInt64Size(2, getCOUNT()); + size += com.google.protobuf.CodedOutputStream.computeUInt64Size(2, getCOUNT()); } { int dataSize = 0; for (java.lang.String element : getUIDList()) { - dataSize += com.google.protobuf.CodedOutputStream - .computeStringSizeNoTag(element); + dataSize += com.google.protobuf.CodedOutputStream.computeStringSizeNoTag(element); } size += dataSize; size += 1 * getUIDList().size(); @@ -128,42 +143,34 @@ public final class Uid { return size; } - public static protobuf.Uid.List parseFrom( - com.google.protobuf.ByteString data) - throws com.google.protobuf.InvalidProtocolBufferException { + public static protobuf.Uid.List parseFrom(com.google.protobuf.ByteString data) throws com.google.protobuf.InvalidProtocolBufferException { return newBuilder().mergeFrom(data).buildParsed(); } - public static protobuf.Uid.List parseFrom( - com.google.protobuf.ByteString data, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + + public static protobuf.Uid.List parseFrom(com.google.protobuf.ByteString data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { - return newBuilder().mergeFrom(data, extensionRegistry) - .buildParsed(); + return newBuilder().mergeFrom(data, extensionRegistry).buildParsed(); } - public static protobuf.Uid.List parseFrom(byte[] data) - throws com.google.protobuf.InvalidProtocolBufferException { + + public static protobuf.Uid.List parseFrom(byte[] data) throws com.google.protobuf.InvalidProtocolBufferException { return newBuilder().mergeFrom(data).buildParsed(); } - public static protobuf.Uid.List parseFrom( - byte[] data, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + + public static protobuf.Uid.List parseFrom(byte[] data, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws com.google.protobuf.InvalidProtocolBufferException { - return newBuilder().mergeFrom(data, extensionRegistry) - .buildParsed(); + return newBuilder().mergeFrom(data, extensionRegistry).buildParsed(); } - public static protobuf.Uid.List parseFrom(java.io.InputStream input) - throws java.io.IOException { + + public static protobuf.Uid.List parseFrom(java.io.InputStream input) throws java.io.IOException { return newBuilder().mergeFrom(input).buildParsed(); } - public static protobuf.Uid.List parseFrom( - java.io.InputStream input, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + + public static protobuf.Uid.List parseFrom(java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { - return newBuilder().mergeFrom(input, extensionRegistry) - .buildParsed(); + return newBuilder().mergeFrom(input, extensionRegistry).buildParsed(); } - public static protobuf.Uid.List parseDelimitedFrom(java.io.InputStream input) - throws java.io.IOException { + + public static protobuf.Uid.List parseDelimitedFrom(java.io.InputStream input) throws java.io.IOException { Builder builder = newBuilder(); if (builder.mergeDelimitedFrom(input)) { return builder.buildParsed(); @@ -171,9 +178,8 @@ public final class Uid { return null; } } - public static protobuf.Uid.List parseDelimitedFrom( - java.io.InputStream input, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + + public static protobuf.Uid.List parseDelimitedFrom(java.io.InputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { Builder builder = newBuilder(); if (builder.mergeDelimitedFrom(input, extensionRegistry)) { @@ -182,28 +188,33 @@ public final class Uid { return null; } } - public static protobuf.Uid.List parseFrom( - com.google.protobuf.CodedInputStream input) - throws java.io.IOException { + + public static protobuf.Uid.List parseFrom(com.google.protobuf.CodedInputStream input) throws java.io.IOException { return newBuilder().mergeFrom(input).buildParsed(); } - public static protobuf.Uid.List parseFrom( - com.google.protobuf.CodedInputStream input, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + + public static protobuf.Uid.List parseFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { - return newBuilder().mergeFrom(input, extensionRegistry) - .buildParsed(); + return newBuilder().mergeFrom(input, extensionRegistry).buildParsed(); + } + + public static Builder newBuilder() { + return Builder.create(); + } + + public Builder newBuilderForType() { + return newBuilder(); } - public static Builder newBuilder() { return Builder.create(); } - public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder(protobuf.Uid.List prototype) { return newBuilder().mergeFrom(prototype); } - public Builder toBuilder() { return newBuilder(this); } - public static final class Builder extends - com.google.protobuf.GeneratedMessage.Builder { + public Builder toBuilder() { + return newBuilder(this); + } + + public static final class Builder extends com.google.protobuf.GeneratedMessage.Builder { private protobuf.Uid.List result; // Construct using protobuf.Uid.List.newBuilder() @@ -221,8 +232,7 @@ public final class Uid { public Builder clear() { if (result == null) { - throw new IllegalStateException( - "Cannot call clear() after build()."); + throw new IllegalStateException("Cannot call clear() after build()."); } result = new protobuf.Uid.List(); return this; @@ -232,8 +242,7 @@ public final class Uid { return create().mergeFrom(result); } - public com.google.protobuf.Descriptors.Descriptor - getDescriptorForType() { + public com.google.protobuf.Descriptors.Descriptor getDescriptorForType() { return protobuf.Uid.List.getDescriptor(); } @@ -244,6 +253,7 @@ public final class Uid { public boolean isInitialized() { return result.isInitialized(); } + public protobuf.Uid.List build() { if (result != null && !isInitialized()) { throw newUninitializedMessageException(result); @@ -251,23 +261,19 @@ public final class Uid { return buildPartial(); } - private protobuf.Uid.List buildParsed() - throws com.google.protobuf.InvalidProtocolBufferException { + private protobuf.Uid.List buildParsed() throws com.google.protobuf.InvalidProtocolBufferException { if (!isInitialized()) { - throw newUninitializedMessageException( - result).asInvalidProtocolBufferException(); + throw newUninitializedMessageException(result).asInvalidProtocolBufferException(); } return buildPartial(); } public protobuf.Uid.List buildPartial() { if (result == null) { - throw new IllegalStateException( - "build() has already been called on this Builder."); + throw new IllegalStateException("build() has already been called on this Builder."); } if (result.uID_ != java.util.Collections.EMPTY_LIST) { - result.uID_ = - java.util.Collections.unmodifiableList(result.uID_); + result.uID_ = java.util.Collections.unmodifiableList(result.uID_); } protobuf.Uid.List returnMe = result; result = null; @@ -276,7 +282,7 @@ public final class Uid { public Builder mergeFrom(com.google.protobuf.Message other) { if (other instanceof protobuf.Uid.List) { - return mergeFrom((protobuf.Uid.List)other); + return mergeFrom((protobuf.Uid.List) other); } else { super.mergeFrom(other); return this; @@ -284,7 +290,8 @@ public final class Uid { } public Builder mergeFrom(protobuf.Uid.List other) { - if (other == protobuf.Uid.List.getDefaultInstance()) return this; + if (other == protobuf.Uid.List.getDefaultInstance()) + return this; if (other.hasIGNORE()) { setIGNORE(other.getIGNORE()); } @@ -301,13 +308,9 @@ public final class Uid { return this; } - public Builder mergeFrom( - com.google.protobuf.CodedInputStream input, - com.google.protobuf.ExtensionRegistryLite extensionRegistry) + public Builder mergeFrom(com.google.protobuf.CodedInputStream input, com.google.protobuf.ExtensionRegistryLite extensionRegistry) throws java.io.IOException { - com.google.protobuf.UnknownFieldSet.Builder unknownFields = - com.google.protobuf.UnknownFieldSet.newBuilder( - this.getUnknownFields()); + com.google.protobuf.UnknownFieldSet.Builder unknownFields = com.google.protobuf.UnknownFieldSet.newBuilder(this.getUnknownFields()); while (true) { int tag = input.readTag(); switch (tag) { @@ -315,8 +318,7 @@ public final class Uid { this.setUnknownFields(unknownFields.build()); return this; default: { - if (!parseUnknownField(input, unknownFields, - extensionRegistry, tag)) { + if (!parseUnknownField(input, unknownFields, extensionRegistry, tag)) { this.setUnknownFields(unknownFields.build()); return this; } @@ -338,19 +340,21 @@ public final class Uid { } } - // required bool IGNORE = 1; public boolean hasIGNORE() { return result.hasIGNORE(); } + public boolean getIGNORE() { return result.getIGNORE(); } + public Builder setIGNORE(boolean value) { result.hasIGNORE = true; result.iGNORE_ = value; return this; } + public Builder clearIGNORE() { result.hasIGNORE = false; result.iGNORE_ = false; @@ -361,14 +365,17 @@ public final class Uid { public boolean hasCOUNT() { return result.hasCOUNT(); } + public long getCOUNT() { return result.getCOUNT(); } + public Builder setCOUNT(long value) { result.hasCOUNT = true; result.cOUNT_ = value; return this; } + public Builder clearCOUNT() { result.hasCOUNT = false; result.cOUNT_ = 0L; @@ -379,37 +386,42 @@ public final class Uid { public java.util.List getUIDList() { return java.util.Collections.unmodifiableList(result.uID_); } + public int getUIDCount() { return result.getUIDCount(); } + public java.lang.String getUID(int index) { return result.getUID(index); } + public Builder setUID(int index, java.lang.String value) { if (value == null) { - throw new NullPointerException(); - } - result.uID_.set(index, value); + throw new NullPointerException(); + } + result.uID_.set(index, value); return this; } + public Builder addUID(java.lang.String value) { if (value == null) { - throw new NullPointerException(); - } - if (result.uID_.isEmpty()) { + throw new NullPointerException(); + } + if (result.uID_.isEmpty()) { result.uID_ = new java.util.ArrayList(); } result.uID_.add(value); return this; } - public Builder addAllUID( - java.lang.Iterable values) { + + public Builder addAllUID(java.lang.Iterable values) { if (result.uID_.isEmpty()) { result.uID_ = new java.util.ArrayList(); } super.addAll(values, result.uID_); return this; } + public Builder clearUID() { result.uID_ = java.util.Collections.emptyList(); return this; @@ -427,44 +439,29 @@ public final class Uid { // @@protoc_insertion_point(class_scope:protobuf.List) } - private static com.google.protobuf.Descriptors.Descriptor - internal_static_protobuf_List_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_protobuf_List_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor internal_static_protobuf_List_descriptor; + private static com.google.protobuf.GeneratedMessage.FieldAccessorTable internal_static_protobuf_List_fieldAccessorTable; - public static com.google.protobuf.Descriptors.FileDescriptor - getDescriptor() { + public static com.google.protobuf.Descriptors.FileDescriptor getDescriptor() { return descriptor; } - private static com.google.protobuf.Descriptors.FileDescriptor - descriptor; + + private static com.google.protobuf.Descriptors.FileDescriptor descriptor; static { - java.lang.String[] descriptorData = { - "\n\tUid.proto\022\010protobuf\"2\n\004List\022\016\n\006IGNORE\030" + - "\001 \002(\010\022\r\n\005COUNT\030\002 \002(\004\022\013\n\003UID\030\003 \003(\tB\014\n\010pro" + - "tobufH\001" + java.lang.String[] descriptorData = {"\n\tUid.proto\022\010protobuf\"2\n\004List\022\016\n\006IGNORE\030" + + "\001 \002(\010\022\r\n\005COUNT\030\002 \002(\004\022\013\n\003UID\030\003 \003(\tB\014\n\010pro" + "tobufH\001"}; + com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { + public com.google.protobuf.ExtensionRegistry assignDescriptors(com.google.protobuf.Descriptors.FileDescriptor root) { + descriptor = root; + internal_static_protobuf_List_descriptor = getDescriptor().getMessageTypes().get(0); + internal_static_protobuf_List_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_protobuf_List_descriptor, new java.lang.String[] {"IGNORE", "COUNT", "UID",}, protobuf.Uid.List.class, + protobuf.Uid.List.Builder.class); + return null; + } }; - com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = - new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { - public com.google.protobuf.ExtensionRegistry assignDescriptors( - com.google.protobuf.Descriptors.FileDescriptor root) { - descriptor = root; - internal_static_protobuf_List_descriptor = - getDescriptor().getMessageTypes().get(0); - internal_static_protobuf_List_fieldAccessorTable = new - com.google.protobuf.GeneratedMessage.FieldAccessorTable( - internal_static_protobuf_List_descriptor, - new java.lang.String[] { "IGNORE", "COUNT", "UID", }, - protobuf.Uid.List.class, - protobuf.Uid.List.Builder.class); - return null; - } - }; - com.google.protobuf.Descriptors.FileDescriptor - .internalBuildGeneratedFileFrom(descriptorData, - new com.google.protobuf.Descriptors.FileDescriptor[] { - }, assigner); + com.google.protobuf.Descriptors.FileDescriptor.internalBuildGeneratedFileFrom(descriptorData, new com.google.protobuf.Descriptors.FileDescriptor[] {}, + assigner); } public static void internalForceInit() {} Modified: incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/reader/AggregatingRecordReader.java URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/reader/AggregatingRecordReader.java?rev=1210600&r1=1210599&r2=1210600&view=diff ============================================================================== --- incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/reader/AggregatingRecordReader.java (original) +++ incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/reader/AggregatingRecordReader.java Mon Dec 5 20:05:49 2011 @@ -1,19 +1,19 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package reader; import ingest.WikipediaConfiguration; @@ -27,19 +27,17 @@ import org.apache.hadoop.mapreduce.TaskA import util.TextUtil; - /** - * This class aggregates Text values based on a start and end filter. An example - * use case for this would be XML data. This will not work with data that has + * This class aggregates Text values based on a start and end filter. An example use case for this would be XML data. This will not work with data that has * nested start and stop tokens. * */ public class AggregatingRecordReader extends LongLineRecordReader { - + public static final String START_TOKEN = "aggregating.token.start"; public static final String END_TOKEN = "aggregating.token.end"; public static final String RETURN_PARTIAL_MATCHES = "aggregating.allow.partial"; - + private LongWritable key = new LongWritable(); private String startToken = null; private String endToken = null; @@ -48,38 +46,35 @@ public class AggregatingRecordReader ext private boolean startFound = false; private StringBuilder remainder = new StringBuilder(0); private boolean returnPartialMatches = false; - + @Override public LongWritable getCurrentKey() { key.set(counter); return key; } - + @Override public Text getCurrentValue() { return aggValue; } - + @Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { super.initialize(genericSplit, context); this.startToken = WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class); this.endToken = WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class); this.returnPartialMatches = context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false); - + /* - * Text-appending works almost exactly like the + operator on Strings- it - * creates a byte array exactly the size of [prefix + suffix] and dumps the - * bytes into the new array. This module works by doing lots of little - * additions, one line at a time. With most XML, the documents are - * partitioned on line boundaries, so we will generally have lots of - * additions. Setting a large default byte array for a text object can avoid - * this and give us StringBuilder-like functionality for Text objects. + * Text-appending works almost exactly like the + operator on Strings- it creates a byte array exactly the size of [prefix + suffix] and dumps the bytes + * into the new array. This module works by doing lots of little additions, one line at a time. With most XML, the documents are partitioned on line + * boundaries, so we will generally have lots of additions. Setting a large default byte array for a text object can avoid this and give us + * StringBuilder-like functionality for Text objects. */ byte[] txtBuffer = new byte[2048]; aggValue.set(txtBuffer); } - + @Override public boolean nextKeyValue() throws IOException { aggValue.clear(); @@ -108,7 +103,7 @@ public class AggregatingRecordReader ext } return false; } - + /** * Populates aggValue with the contents of the Text object. * @@ -116,7 +111,7 @@ public class AggregatingRecordReader ext * @return true if aggValue is complete, else false and needs more data. */ private boolean process(Text t) { - + if (null != t) remainder.append(t.toString()); while (remainder.length() > 0) { @@ -143,12 +138,12 @@ public class AggregatingRecordReader ext if (returnPartialMatches && start >= 0) { // End token not found, but another start token was found... // The amount to copy is up to the beginning of the next start token - TextUtil.textAppendNoNull(aggValue, remainder.substring(0, start), false); + TextUtil.textAppendNoNull(aggValue, remainder.substring(0, start), false); remainder.delete(0, start); return true; } else { // Not found, aggregate the entire remainder - TextUtil.textAppendNoNull(aggValue, remainder.toString(), false); + TextUtil.textAppendNoNull(aggValue, remainder.toString(), false); // Delete all chars from remainder remainder.delete(0, remainder.length()); } @@ -156,12 +151,12 @@ public class AggregatingRecordReader ext if (returnPartialMatches && start >= 0 && start < end) { // We found the end token, but found another start token first, so // deal with that. - TextUtil.textAppendNoNull(aggValue, remainder.substring(0, start), false); + TextUtil.textAppendNoNull(aggValue, remainder.substring(0, start), false); remainder.delete(0, start); return true; } else { // END_TOKEN was found. Extract to the end of END_TOKEN - TextUtil.textAppendNoNull(aggValue, remainder.substring(0, end + endToken.length()), false); + TextUtil.textAppendNoNull(aggValue, remainder.substring(0, end + endToken.length()), false); // Remove from remainder up to the end of END_TOKEN remainder.delete(0, end + endToken.length()); return true; Modified: incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/reader/LfLineReader.java URL: http://svn.apache.org/viewvc/incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/reader/LfLineReader.java?rev=1210600&r1=1210599&r2=1210600&view=diff ============================================================================== --- incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/reader/LfLineReader.java (original) +++ incubator/accumulo/trunk/contrib/accumulo_sample/ingest/src/main/java/reader/LfLineReader.java Mon Dec 5 20:05:49 2011 @@ -1,19 +1,19 @@ /* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package reader; import java.io.IOException; @@ -34,24 +34,27 @@ public class LfLineReader { private int bufferLength = 0; // the current position in the buffer private int bufferPosn = 0; - + private static final byte LF = '\n'; - + /** - * Create a line reader that reads from the given stream using the - * default buffer-size (64k). - * @param in The input stream + * Create a line reader that reads from the given stream using the default buffer-size (64k). + * + * @param in + * The input stream * @throws IOException */ public LfLineReader(InputStream in) { this(in, DEFAULT_BUFFER_SIZE); } - + /** - * Create a line reader that reads from the given stream using the - * given buffer-size. - * @param in The input stream - * @param bufferSize Size of the read buffer + * Create a line reader that reads from the given stream using the given buffer-size. + * + * @param in + * The input stream + * @param bufferSize + * Size of the read buffer * @throws IOException */ public LfLineReader(InputStream in, int bufferSize) { @@ -59,67 +62,63 @@ public class LfLineReader { this.bufferSize = bufferSize; this.buffer = new byte[this.bufferSize]; } - + /** - * Create a line reader that reads from the given stream using the - * io.file.buffer.size specified in the given - * Configuration. - * @param in input stream - * @param conf configuration + * Create a line reader that reads from the given stream using the io.file.buffer.size specified in the given Configuration. + * + * @param in + * input stream + * @param conf + * configuration * @throws IOException */ public LfLineReader(InputStream in, Configuration conf) throws IOException { this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE)); } - + /** * Close the underlying stream. + * * @throws IOException */ public void close() throws IOException { in.close(); } - + /** - * Read one line from the InputStream into the given Text. A line - * can be terminated by '\n' (LF). EOF also terminates an - * otherwise unterminated line. - * - * @param str the object to store the given line (without newline) - * @param maxLineLength the maximum number of bytes to store into str; - * the rest of the line is silently discarded. - * @param maxBytesToConsume the maximum number of bytes to consume - * in this call. This is only a hint, because if the line cross - * this threshold, we allow it to happen. It can overshoot - * potentially by as much as one buffer length. - * - * @return the number of bytes read including the (longest) newline - * found. - * - * @throws IOException if the underlying stream throws - */ - public int readLine(Text str, int maxLineLength, - int maxBytesToConsume) throws IOException { - /* We're reading data from in, but the head of the stream may be - * already buffered in buffer, so we have several cases: - * 1. No newline characters are in the buffer, so we need to copy - * everything and read another buffer from the stream. - * 2. An unambiguously terminated line is in buffer, so we just - * copy to str. + * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF). EOF also terminates an otherwise unterminated line. + * + * @param str + * the object to store the given line (without newline) + * @param maxLineLength + * the maximum number of bytes to store into str; the rest of the line is silently discarded. + * @param maxBytesToConsume + * the maximum number of bytes to consume in this call. This is only a hint, because if the line cross this threshold, we allow it to happen. It can + * overshoot potentially by as much as one buffer length. + * + * @return the number of bytes read including the (longest) newline found. + * + * @throws IOException + * if the underlying stream throws + */ + public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { + /* + * We're reading data from in, but the head of the stream may be already buffered in buffer, so we have several cases: 1. No newline characters are in the + * buffer, so we need to copy everything and read another buffer from the stream. 2. An unambiguously terminated line is in buffer, so we just copy to str. */ str.clear(); - int txtLength = 0; //tracks str.getLength(), as an optimization - int newlineLength = 0; //length of terminating newline + int txtLength = 0; // tracks str.getLength(), as an optimization + int newlineLength = 0; // length of terminating newline long bytesConsumed = 0; do { - int startPosn = bufferPosn; //starting from where we left off the last time + int startPosn = bufferPosn; // starting from where we left off the last time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; bufferLength = in.read(buffer); if (bufferLength <= 0) break; // EOF } - for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline + for (; bufferPosn < bufferLength; ++bufferPosn) { // search for newline if (buffer[bufferPosn] == LF) { newlineLength = 1; ++bufferPosn; // at next invocation proceed from following byte @@ -137,31 +136,38 @@ public class LfLineReader { txtLength += appendLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); - + if (bytesConsumed > Integer.MAX_VALUE) throw new IOException("Too many bytes before newline: " + bytesConsumed); - return (int)bytesConsumed; + return (int) bytesConsumed; } - + /** * Read from the InputStream into the given Text. - * @param str the object to store the given line - * @param maxLineLength the maximum number of bytes to store into str. + * + * @param str + * the object to store the given line + * @param maxLineLength + * the maximum number of bytes to store into str. * @return the number of bytes read including the newline - * @throws IOException if the underlying stream throws + * @throws IOException + * if the underlying stream throws */ public int readLine(Text str, int maxLineLength) throws IOException { return readLine(str, maxLineLength, Integer.MAX_VALUE); -} - + } + /** * Read from the InputStream into the given Text. - * @param str the object to store the given line + * + * @param str + * the object to store the given line * @return the number of bytes read including the newline - * @throws IOException if the underlying stream throws + * @throws IOException + * if the underlying stream throws */ public int readLine(Text str) throws IOException { return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE); } - + }