drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (DRILL-5657) Implement size-aware result set loader
Date Wed, 09 Aug 2017 00:03:00 GMT

    [ https://issues.apache.org/jira/browse/DRILL-5657?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16119217#comment-16119217
] 

ASF GitHub Bot commented on DRILL-5657:
---------------------------------------

Github user bitblender commented on a diff in the pull request:

    https://github.com/apache/drill/pull/866#discussion_r131554894
  
    --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/rowSet/impl/ResultSetLoaderImpl.java
---
    @@ -0,0 +1,412 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.drill.exec.physical.rowSet.impl;
    +
    +import java.util.Collection;
    +
    +import org.apache.drill.common.exceptions.UserException;
    +import org.apache.drill.exec.memory.BufferAllocator;
    +import org.apache.drill.exec.physical.rowSet.ResultSetLoader;
    +import org.apache.drill.exec.physical.rowSet.TupleLoader;
    +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode;
    +import org.apache.drill.exec.record.VectorContainer;
    +import org.apache.drill.exec.vector.ValueVector;
    +
    +/**
    + * Implementation of the result set loader.
    + * @see {@link ResultSetLoader}
    + */
    +
    +public class ResultSetLoaderImpl implements ResultSetLoader, WriterIndexImpl.WriterIndexListener
{
    +
    +  public static class ResultSetOptions {
    +    public final int vectorSizeLimit;
    +    public final int rowCountLimit;
    +    public final boolean caseSensitive;
    +    public final ResultVectorCache inventory;
    +    private final Collection<String> selection;
    +
    +    public ResultSetOptions() {
    +      vectorSizeLimit = ValueVector.MAX_BUFFER_SIZE;
    +      rowCountLimit = ValueVector.MAX_ROW_COUNT;
    +      caseSensitive = false;
    +      selection = null;
    +      inventory = null;
    +    }
    +
    +    public ResultSetOptions(OptionBuilder builder) {
    +      this.vectorSizeLimit = builder.vectorSizeLimit;
    +      this.rowCountLimit = builder.rowCountLimit;
    +      this.caseSensitive = builder.caseSensitive;
    +      this.selection = builder.selection;
    +      this.inventory = builder.inventory;
    +    }
    +  }
    +
    +  public static class OptionBuilder {
    +    private int vectorSizeLimit;
    +    private int rowCountLimit;
    +    private boolean caseSensitive;
    +    private Collection<String> selection;
    +    private ResultVectorCache inventory;
    +
    +    public OptionBuilder() {
    +      ResultSetOptions options = new ResultSetOptions();
    +      vectorSizeLimit = options.vectorSizeLimit;
    +      rowCountLimit = options.rowCountLimit;
    +      caseSensitive = options.caseSensitive;
    +    }
    +
    +    public OptionBuilder setCaseSensitive(boolean flag) {
    +      caseSensitive = flag;
    +      return this;
    +    }
    +
    +    public OptionBuilder setRowCountLimit(int limit) {
    +      rowCountLimit = Math.min(limit, ValueVector.MAX_ROW_COUNT);
    +      return this;
    +    }
    +
    +    public OptionBuilder setSelection(Collection<String> selection) {
    +      this.selection = selection;
    +      return this;
    +    }
    +
    +    public OptionBuilder setVectorCache(ResultVectorCache inventory) {
    +      this.inventory = inventory;
    +      return this;
    +    }
    +
    +    // TODO: No setter for vector length yet: is hard-coded
    +    // at present in the value vector.
    +
    +    public ResultSetOptions build() {
    +      return new ResultSetOptions(this);
    +    }
    +  }
    +
    +  public static class VectorContainerBuilder {
    +    private final ResultSetLoaderImpl rowSetMutator;
    +    private int lastUpdateVersion = -1;
    +    private VectorContainer container;
    +
    +    public VectorContainerBuilder(ResultSetLoaderImpl rowSetMutator) {
    +      this.rowSetMutator = rowSetMutator;
    +      container = new VectorContainer(rowSetMutator.allocator);
    +    }
    +
    +    public void update() {
    +      if (lastUpdateVersion < rowSetMutator.schemaVersion()) {
    +        rowSetMutator.rootTuple.buildContainer(this);
    +        container.buildSchema(SelectionVectorMode.NONE);
    +        lastUpdateVersion = rowSetMutator.schemaVersion();
    +      }
    +    }
    +
    +    public VectorContainer container() { return container; }
    +
    +    public int lastUpdateVersion() { return lastUpdateVersion; }
    +
    +    public void add(ValueVector vector) {
    +      container.add(vector);
    +    }
    +  }
    +
    +  private enum State {
    +    /**
    +     * Before the first batch.
    +     */
    +    START,
    +    /**
    +     * Writing to a batch normally.
    +     */
    +    ACTIVE,
    +    /**
    +     * Batch overflowed a vector while writing. Can continue
    +     * to write to a temporary "overflow" batch until the
    +     * end of the current row.
    +     */
    +    OVERFLOW,
    +    /**
    +     * Batch is full due to reaching the row count limit
    +     * when saving a row.
    +     * No more writes allowed until harvesting the current batch.
    +     */
    +    FULL_BATCH,
    +
    +    /**
    +     * Current batch was harvested: data is gone. A lookahead
    +     * row may exist for the next batch.
    +     */
    +    HARVESTED,
    +    /**
    +     * Mutator is closed: no more operations are allowed.
    +     */
    +    CLOSED
    +  }
    +
    +  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ResultSetLoaderImpl.class);
    +
    +  private final ResultSetOptions options;
    +  private final BufferAllocator allocator;
    +  private final TupleSetImpl rootTuple;
    +  private final TupleLoader rootWriter;
    +  private final WriterIndexImpl writerIndex;
    +  private final ResultVectorCache inventory;
    +  private ResultSetLoaderImpl.State state = State.START;
    +  private int activeSchemaVersion = 0;
    +  private int harvestSchemaVersion = 0;
    --- End diff --
    
    It is not obvious how the schema version is used. Comments would be helpful.


> Implement size-aware result set loader
> --------------------------------------
>
>                 Key: DRILL-5657
>                 URL: https://issues.apache.org/jira/browse/DRILL-5657
>             Project: Apache Drill
>          Issue Type: Improvement
>    Affects Versions: Future
>            Reporter: Paul Rogers
>            Assignee: Paul Rogers
>             Fix For: Future
>
>
> A recent extension to Drill's set of test tools created a "row set" abstraction to allow
us to create, and verify, record batches with very few lines of code. Part of this work involved
creating a set of "column accessors" in the vector subsystem. Column readers provide a uniform
API to obtain data from columns (vectors), while column writers provide a uniform writing
interface.
> DRILL-5211 discusses a set of changes to limit value vectors to 16 MB in size (to avoid
memory fragmentation due to Drill's two memory allocators.) The column accessors have proven
to be so useful that they will be the basis for the new, size-aware writers used by Drill's
record readers.
> A step in that direction is to retrofit the column writers to use the size-aware {{setScalar()}}
and {{setArray()}} methods introduced in DRILL-5517.
> Since the test framework row set classes are (at present) the only consumer of the accessors,
those classes must also be updated with the changes.
> This then allows us to add a new "row mutator" class that handles size-aware vector writing,
including the case in which a vector fills in the middle of a row.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Mime
View raw message