drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (DRILL-5657) Implement size-aware result set loader
Date Fri, 10 Nov 2017 17:47:00 GMT

    [ https://issues.apache.org/jira/browse/DRILL-5657?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16247831#comment-16247831
] 

ASF GitHub Bot commented on DRILL-5657:
---------------------------------------

Github user parthchandra commented on a diff in the pull request:

    https://github.com/apache/drill/pull/914#discussion_r149758695
  
    --- Diff: exec/vector/src/main/codegen/templates/ColumnAccessors.java ---
    @@ -191,141 +180,268 @@ public void bind(RowIndex vectorIndex, ValueVector vector) {
         <#if accessorType=="BigDecimal">
           <#assign label="Decimal">
         </#if>
    +    <#if drillType == "VarChar" || drillType == "Var16Char">
    +      <#assign accessorType = "byte[]">
    +      <#assign label = "Bytes">
    +    </#if>
         <#if ! notyet>
       //------------------------------------------------------------------------
       // ${drillType} readers and writers
     
    -  public static class ${drillType}ColumnReader extends AbstractColumnReader {
    +  public static class ${drillType}ColumnReader extends BaseScalarReader {
     
    -    <@bindReader "" drillType />
    +    <@bindReader "" drillType false />
     
    -    <@getType label />
    +    <@getType drillType label />
     
         <@get drillType accessorType label false/>
       }
     
    -  public static class Nullable${drillType}ColumnReader extends AbstractColumnReader {
    +  public static class Nullable${drillType}ColumnReader extends BaseScalarReader {
     
    -    <@bindReader "Nullable" drillType />
    +    <@bindReader "Nullable" drillType false />
     
    -    <@getType label />
    +    <@getType drillType label />
     
         @Override
         public boolean isNull() {
    -      return accessor().isNull(vectorIndex.index());
    -    }
    -
    -    <@get drillType accessorType label false/>
    -  }
    -
    -  public static class Repeated${drillType}ColumnReader extends AbstractArrayReader {
    -
    -    <@bindReader "Repeated" drillType />
    -
    -    <@getType label />
    -
    -    @Override
    -    public int size() {
    -      return accessor().getInnerValueCountAt(vectorIndex.index());
    +      return accessor().isNull(vectorIndex.vectorIndex());
         }
     
    -    <@get drillType accessorType label true/>
    +    <@get drillType accessorType label false />
       }
     
    -  public static class ${drillType}ColumnWriter extends AbstractColumnWriter {
    +  public static class Repeated${drillType}ColumnReader extends BaseElementReader {
     
    -    <@bindWriter "" drillType />
    +    <@bindReader "" drillType true />
     
    -    <@getType label />
    +    <@getType drillType label />
     
    -    <@set drillType accessorType label false "set" />
    +    <@get drillType accessorType label true />
       }
     
    -  public static class Nullable${drillType}ColumnWriter extends AbstractColumnWriter {
    -
    -    <@bindWriter "Nullable" drillType />
    +      <#assign varWidth = drillType == "VarChar" || drillType == "Var16Char" || drillType
== "VarBinary" />
    +      <#if varWidth>
    +  public static class ${drillType}ColumnWriter extends BaseVarWidthWriter {
    +      <#else>
    +  public static class ${drillType}ColumnWriter extends BaseFixedWidthWriter {
    +        <#if drillType = "Decimal9" || drillType == "Decimal18" ||
    +             drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse">
    +    private MajorType type;
    +        </#if>
    +    private static final int VALUE_WIDTH = ${drillType}Vector.VALUE_WIDTH;
    +      </#if>
    +    private final ${drillType}Vector vector;
    +
    +    public ${drillType}ColumnWriter(final ValueVector vector) {
    +      <#if varWidth>
    +      super(((${drillType}Vector) vector).getOffsetVector());
    +      <#else>
    +        <#if drillType = "Decimal9" || drillType == "Decimal18" ||
    +             drillType == "Decimal28Sparse" || drillType == "Decimal38Sparse">
    +      type = vector.getField().getType();
    +        </#if>
    +      </#if>
    +      this.vector = (${drillType}Vector) vector;
    +    }
     
    -    <@getType label />
    +    @Override public ValueVector vector() { return vector; }
     
    +        <#-- All change of buffer comes through this function to allow capturing
    +             the buffer address and capacity. Only two ways to set the buffer:
    +             by binding to a vector in bindVector(), or by resizing the vector
    +             in writeIndex(). -->
         @Override
    -    public void setNull() {
    -      mutator.setNull(vectorIndex.index());
    +    protected final void setAddr() {
    +      final DrillBuf buf = vector.getBuffer();
    +      bufAddr = buf.addr();
    +          <#if varWidth>
    +      capacity = buf.capacity();
    +          <#else>
    +          <#-- Turns out that keeping track of capacity as the count of
    +               values simplifies the per-value code path. -->
    +      capacity = buf.capacity() / VALUE_WIDTH;
    +          </#if>
         }
     
    -    <@set drillType accessorType label true "set" />
    -  }
    -
    -  public static class Repeated${drillType}ColumnWriter extends AbstractArrayWriter {
    -
    -    <@bindWriter "Repeated" drillType />
    +        <#-- reallocRaw() is type specific. -->
    +    @Override
    +    protected void realloc(int size) {
    +      vector.reallocRaw(size);
    +      setAddr();
    +    }
     
    -    <@getType label />
    +       <#if ! varWidth>
    +    @Override public int width() { return VALUE_WIDTH; }
    +
    +      </#if>
    +    <@getType drillType label />
    +
    +      <#if accessorType == "byte[]">
    +        <#assign args = ", int len">
    +      <#else>
    +        <#assign args = "">
    +      </#if>
    +      <#if javaType == "char">
    +        <#assign putType = "short" />
    +        <#assign doCast = true />
    +      <#else>
    +        <#assign putType = javaType />
    +        <#assign doCast = (cast == "set") />
    +      </#if>
    +      <#if ! varWidth>
    +    @Override
    +    protected final void fillEmpties(final int writeIndex) {
    +      <#-- Fill empties. This is required because the allocated memory is not
    +           zero-filled. -->
    +      while (lastWriteIndex < writeIndex - 1) {
    +        <#-- Implemented in a type-specific way because PlatformDependent does not
    +             provide a general-purpose "fillBytes" function that we could use instead.
    +             If that was provided, we'd just fill the entire missing span of data
    +             with zeros. -->
    +        <#assign putAddr = "bufAddr + ++lastWriteIndex * VALUE_WIDTH" />
    --- End diff --
    
    Three +'s in a row are a trifle tricky to read. 


> Implement size-aware result set loader
> --------------------------------------
>
>                 Key: DRILL-5657
>                 URL: https://issues.apache.org/jira/browse/DRILL-5657
>             Project: Apache Drill
>          Issue Type: Improvement
>    Affects Versions: Future
>            Reporter: Paul Rogers
>            Assignee: Paul Rogers
>             Fix For: Future
>
>
> A recent extension to Drill's set of test tools created a "row set" abstraction to allow
us to create, and verify, record batches with very few lines of code. Part of this work involved
creating a set of "column accessors" in the vector subsystem. Column readers provide a uniform
API to obtain data from columns (vectors), while column writers provide a uniform writing
interface.
> DRILL-5211 discusses a set of changes to limit value vectors to 16 MB in size (to avoid
memory fragmentation due to Drill's two memory allocators.) The column accessors have proven
to be so useful that they will be the basis for the new, size-aware writers used by Drill's
record readers.
> A step in that direction is to retrofit the column writers to use the size-aware {{setScalar()}}
and {{setArray()}} methods introduced in DRILL-5517.
> Since the test framework row set classes are (at present) the only consumer of the accessors,
those classes must also be updated with the changes.
> This then allows us to add a new "row mutator" class that handles size-aware vector writing,
including the case in which a vector fills in the middle of a row.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Mime
View raw message