Return-Path: X-Original-To: apmail-crunch-commits-archive@www.apache.org Delivered-To: apmail-crunch-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 647B218645 for ; Tue, 28 Jul 2015 03:08:39 +0000 (UTC) Received: (qmail 32849 invoked by uid 500); 28 Jul 2015 03:08:36 -0000 Delivered-To: apmail-crunch-commits-archive@crunch.apache.org Received: (qmail 32807 invoked by uid 500); 28 Jul 2015 03:08:36 -0000 Mailing-List: contact commits-help@crunch.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@crunch.apache.org Delivered-To: mailing list commits@crunch.apache.org Received: (qmail 32798 invoked by uid 99); 28 Jul 2015 03:08:36 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 28 Jul 2015 03:08:36 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 29688E0AFA; Tue, 28 Jul 2015 03:08:36 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jwills@apache.org To: commits@crunch.apache.org Message-Id: <366a28931ed9445e98e5aae30d48ff36@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: crunch git commit: CRUNCH-553: Fix record drop issue that can occur w/From.formattedFile TableSources Date: Tue, 28 Jul 2015 03:08:36 +0000 (UTC) Repository: crunch Updated Branches: refs/heads/master 2f5b33ead -> 3a1d474b0 CRUNCH-553: Fix record drop issue that can occur w/From.formattedFile TableSources Project: http://git-wip-us.apache.org/repos/asf/crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/3a1d474b Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/3a1d474b Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/3a1d474b Branch: refs/heads/master Commit: 3a1d474b0514f02a5ce46f6db1fbe2e8abcb03be Parents: 2f5b33e Author: Josh Wills Authored: Mon Jul 27 18:45:23 2015 -0700 Committer: Josh Wills Committed: Mon Jul 27 19:35:30 2015 -0700 ---------------------------------------------------------------------- .../it/java/org/apache/crunch/RecordDropIT.java | 77 ++++++++++++++++++++ .../impl/dist/collect/BaseInputTable.java | 3 + 2 files changed, 80 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/crunch/blob/3a1d474b/crunch-core/src/it/java/org/apache/crunch/RecordDropIT.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/it/java/org/apache/crunch/RecordDropIT.java b/crunch-core/src/it/java/org/apache/crunch/RecordDropIT.java new file mode 100644 index 0000000..8c4c57f --- /dev/null +++ b/crunch-core/src/it/java/org/apache/crunch/RecordDropIT.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.crunch; + +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import org.apache.crunch.impl.mr.MRPipeline; +import org.apache.crunch.io.From; +import org.apache.crunch.test.TemporaryPath; +import org.apache.crunch.test.TemporaryPaths; +import org.apache.crunch.types.writable.Writables; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.junit.Rule; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class RecordDropIT { + @Rule + public TemporaryPath tmpDir = TemporaryPaths.create(); + + @Test + public void testMultiReadCount() throws Exception { + int numReads = 10; + MRPipeline p = new MRPipeline(RecordDropIT.class, tmpDir.getDefaultConfiguration()); + Path shakes = tmpDir.copyResourcePath("shakes.txt"); + TableSource src = From.formattedFile(shakes, + TextInputFormat.class, LongWritable.class, Text.class); + PTable in = p.read(src); + List> values = Lists.newArrayList(); + for (int i = 0; i < numReads; i++) { + PCollection cnt = in.parallelDo(new LineCountFn>(), Writables.ints()); + values.add(cnt.materialize()); + } + int index = 0; + for (Iterable iter : values) { + assertEquals("Checking index = " + index, 3667, Iterables.getFirst(iter, 0).intValue()); + index++; + } + p.done(); + } + + public static class LineCountFn extends DoFn { + + private int count = 0; + + @Override + public void process(T input, Emitter emitter) { + count++; + } + + @Override + public void cleanup(Emitter emitter) { + emitter.emit(count); + } + } +} http://git-wip-us.apache.org/repos/asf/crunch/blob/3a1d474b/crunch-core/src/main/java/org/apache/crunch/impl/dist/collect/BaseInputTable.java ---------------------------------------------------------------------- diff --git a/crunch-core/src/main/java/org/apache/crunch/impl/dist/collect/BaseInputTable.java b/crunch-core/src/main/java/org/apache/crunch/impl/dist/collect/BaseInputTable.java index 18a671b..6315eb4 100644 --- a/crunch-core/src/main/java/org/apache/crunch/impl/dist/collect/BaseInputTable.java +++ b/crunch-core/src/main/java/org/apache/crunch/impl/dist/collect/BaseInputTable.java @@ -98,6 +98,9 @@ public class BaseInputTable extends PTableBase { @Override public boolean equals(Object other) { + if (other == null || !(other instanceof BaseInputTable)) { + return false; + } return asCollection.equals(other); } }