Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 9353F2004F1 for ; Wed, 30 Aug 2017 19:57:13 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 91C11162687; Wed, 30 Aug 2017 17:57:13 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id B1C82161A11 for ; Wed, 30 Aug 2017 19:57:12 +0200 (CEST) Received: (qmail 23257 invoked by uid 500); 30 Aug 2017 17:57:11 -0000 Mailing-List: contact issues-help@drill.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@drill.apache.org Delivered-To: mailing list issues@drill.apache.org Received: (qmail 23248 invoked by uid 99); 30 Aug 2017 17:57:11 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 30 Aug 2017 17:57:11 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id 651051A02F8 for ; Wed, 30 Aug 2017 17:57:11 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -100.002 X-Spam-Level: X-Spam-Status: No, score=-100.002 tagged_above=-999 required=6.31 tests=[RP_MATCHES_RCVD=-0.001, SPF_PASS=-0.001, USER_IN_WHITELIST=-100] autolearn=disabled Received: from mx1-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id IYqbMmEyEQYw for ; Wed, 30 Aug 2017 17:57:09 +0000 (UTC) Received: from mailrelay1-us-west.apache.org (mailrelay1-us-west.apache.org [209.188.14.139]) by mx1-lw-eu.apache.org (ASF Mail Server at mx1-lw-eu.apache.org) with ESMTP id 070E96104E for ; Wed, 30 Aug 2017 17:57:03 +0000 (UTC) Received: from jira-lw-us.apache.org (unknown [207.244.88.139]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id 7FA49E06BD for ; Wed, 30 Aug 2017 17:57:01 +0000 (UTC) Received: from jira-lw-us.apache.org (localhost [127.0.0.1]) by jira-lw-us.apache.org (ASF Mail Server at jira-lw-us.apache.org) with ESMTP id 7EE412416E for ; Wed, 30 Aug 2017 17:57:00 +0000 (UTC) Date: Wed, 30 Aug 2017 17:57:00 +0000 (UTC) From: "ASF GitHub Bot (JIRA)" To: issues@drill.apache.org Message-ID: In-Reply-To: References: Subject: [jira] [Commented] (DRILL-5546) Schema change problems caused by empty batch MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit X-JIRA-FingerPrint: 30527f35849b9dde25b450d4833f0394 archived-at: Wed, 30 Aug 2017 17:57:13 -0000 [ https://issues.apache.org/jira/browse/DRILL-5546?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16147691#comment-16147691 ] ASF GitHub Bot commented on DRILL-5546: --------------------------------------- Github user jinfengni commented on a diff in the pull request: https://github.com/apache/drill/pull/906#discussion_r136143881 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/union/UnionAllRecordBatch.java --- @@ -39,88 +35,107 @@ import org.apache.drill.exec.expr.ValueVectorWriteExpression; import org.apache.drill.exec.ops.FragmentContext; import org.apache.drill.exec.physical.config.UnionAll; -import org.apache.drill.exec.record.AbstractRecordBatch; +import org.apache.drill.exec.record.AbstractBinaryRecordBatch; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.MaterializedField; import org.apache.drill.exec.record.RecordBatch; import org.apache.drill.exec.record.TransferPair; import org.apache.drill.exec.record.TypedFieldId; import org.apache.drill.exec.record.VectorWrapper; -import org.apache.drill.exec.record.WritableBatch; -import org.apache.drill.exec.record.selection.SelectionVector2; -import org.apache.drill.exec.record.selection.SelectionVector4; import org.apache.drill.exec.resolver.TypeCastRules; import org.apache.drill.exec.vector.AllocationHelper; import org.apache.drill.exec.vector.FixedWidthVector; import org.apache.drill.exec.vector.SchemaChangeCallBack; import org.apache.drill.exec.vector.ValueVector; -import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Stack; -public class UnionAllRecordBatch extends AbstractRecordBatch { +public class UnionAllRecordBatch extends AbstractBinaryRecordBatch { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(UnionAllRecordBatch.class); - private List outputFields; + private SchemaChangeCallBack callBack = new SchemaChangeCallBack(); private UnionAller unionall; - private UnionAllInput unionAllInput; - private RecordBatch current; - private final List transfers = Lists.newArrayList(); - private List allocationVectors; - protected SchemaChangeCallBack callBack = new SchemaChangeCallBack(); + private List allocationVectors = Lists.newArrayList(); private int recordCount = 0; - private boolean schemaAvailable = false; + private UnionInputIterator unionInputIterator; public UnionAllRecordBatch(UnionAll config, List children, FragmentContext context) throws OutOfMemoryException { - super(config, context, false); - assert (children.size() == 2) : "The number of the operands of Union must be 2"; - unionAllInput = new UnionAllInput(this, children.get(0), children.get(1)); - } - - @Override - public int getRecordCount() { - return recordCount; + super(config, context, true, children.get(0), children.get(1)); } @Override protected void killIncoming(boolean sendUpstream) { - unionAllInput.getLeftRecordBatch().kill(sendUpstream); - unionAllInput.getRightRecordBatch().kill(sendUpstream); + left.kill(sendUpstream); + right.kill(sendUpstream); } - @Override - public SelectionVector2 getSelectionVector2() { - throw new UnsupportedOperationException("UnionAllRecordBatch does not support selection vector"); - } + protected void buildSchema() throws SchemaChangeException { + if (! prefetchFirstBatchFromBothSides()) { + return; + } - @Override - public SelectionVector4 getSelectionVector4() { - throw new UnsupportedOperationException("UnionAllRecordBatch does not support selection vector"); + unionInputIterator = new UnionInputIterator(leftUpstream, left, rightUpstream, right); + + if (leftUpstream == IterOutcome.NONE && rightUpstream == IterOutcome.OK_NEW_SCHEMA) { + inferOutputFieldsOneSide(right.getSchema()); + } else if (rightUpstream == IterOutcome.NONE && leftUpstream == IterOutcome.OK_NEW_SCHEMA) { + inferOutputFieldsOneSide((left.getSchema())); + } else if (leftUpstream == IterOutcome.OK_NEW_SCHEMA && rightUpstream == IterOutcome.OK_NEW_SCHEMA) { + inferOutputFieldsBothSide(left.getSchema(), right.getSchema()); + } + + container.buildSchema(BatchSchema.SelectionVectorMode.NONE); + + for (VectorWrapper vv: container) { + vv.getValueVector().allocateNew(); + vv.getValueVector().getMutator().setValueCount(0); + } } @Override public IterOutcome innerNext() { try { - IterOutcome upstream = unionAllInput.nextBatch(); - logger.debug("Upstream of Union-All: {}", upstream); + if (!unionInputIterator.hasNext()) { + return IterOutcome.NONE; + } + + Pair nextBatch = unionInputIterator.next(); + + IterOutcome upstream = nextBatch.left; + RecordBatch incoming = nextBatch.right; + + // skip batches with same schema as the previous one yet having 0 row. + if (upstream == IterOutcome.OK && incoming.getRecordCount() == 0) { + do { + for (final VectorWrapper w : incoming) { + w.clear(); + } + if (!unionInputIterator.hasNext()) { + return IterOutcome.NONE; + } + nextBatch = unionInputIterator.next(); + upstream = nextBatch.left; + incoming = nextBatch.right; + } while ((upstream == IterOutcome.OK) && + incoming.getRecordCount() == 0); + } + --- End diff -- The loop does not have to handle `STOP` or `OOM` as they are handled in the `switch` statement. Previously, the loop was intended to handle the same schema yet having row cases. The revised patch adopted your suggestion, by putting everything in the loop. > Schema change problems caused by empty batch > -------------------------------------------- > > Key: DRILL-5546 > URL: https://issues.apache.org/jira/browse/DRILL-5546 > Project: Apache Drill > Issue Type: Bug > Reporter: Jinfeng Ni > Assignee: Jinfeng Ni > > There have been a few JIRAs opened related to schema change failure caused by empty batch. This JIRA is opened as an umbrella for all those related JIRAS ( such as DRILL-4686, DRILL-4734, DRILL4476, DRILL-4255, etc). > -- This message was sent by Atlassian JIRA (v6.4.14#64029)