drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (DRILL-5152) Enhance the mock data source: better data, SQL access
Date Sat, 07 Jan 2017 21:01:58 GMT

    [ https://issues.apache.org/jira/browse/DRILL-5152?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15808137#comment-15808137
] 

ASF GitHub Bot commented on DRILL-5152:
---------------------------------------

Github user paul-rogers commented on a diff in the pull request:

    https://github.com/apache/drill/pull/708#discussion_r95063773
  
    --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/ColumnDef.java
---
    @@ -0,0 +1,178 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.drill.exec.store.mock;
    +
    +import org.apache.drill.common.types.TypeProtos.MinorType;
    +import org.apache.drill.exec.expr.TypeHelper;
    +import org.apache.drill.exec.store.mock.MockGroupScanPOP.MockColumn;
    +
    +/**
    + * Defines a column for the "enhanced" version of the mock data
    + * source. This class is built from the column definitions in either
    + * the physical plan or an SQL statement (which gives rise to a
    + * physical plan.)
    + */
    +
    +public class ColumnDef {
    +  public MockColumn mockCol;
    +  public String name;
    +  public int width;
    +  public FieldGen generator;
    +
    +  public ColumnDef( MockColumn mockCol ) {
    +    this.mockCol = mockCol;
    +    name = mockCol.getName();
    +    width = TypeHelper.getSize(mockCol.getMajorType());
    +    makeGenerator( );
    +  }
    +
    +  /**
    +   * Create the data generator class for this column. The generator is
    +   * created to match the data type by default. Or, the plan can
    +   * specify a generator class (in which case the plan must ensure that
    +   * the generator produces the correct value for the column data type.)
    +   * The generator names a class: either a fully qualified name, or a
    +   * class in this package.
    +   */
    +
    +  private void makeGenerator( ) {
    +    String genName = mockCol.getGenerator( );
    +    if ( genName != null ) {
    +      if ( ! genName.contains(".") ) {
    +        genName = "org.apache.drill.exec.store.mock." + genName;
    +      }
    +      try {
    +        ClassLoader cl = getClass( ).getClassLoader();
    +        Class<?> genClass = cl.loadClass(genName);
    +        generator = (FieldGen) genClass.newInstance( );
    +      } catch (ClassNotFoundException | InstantiationException
    +          | IllegalAccessException | ClassCastException e) {
    +        throw new IllegalArgumentException( "Generator " + genName + " is undefined for
mock field " + name );
    +      }
    +      generator.setup( this );
    +      return;
    +    }
    +
    +    makeDefaultGenerator( );
    +  }
    +
    +  private void makeDefaultGenerator( ) {
    +
    +    MinorType minorType = mockCol.getMinorType();
    +    switch ( minorType ) {
    +    case BIGINT:
    +      break;
    +    case BIT:
    +      break;
    +    case DATE:
    +      break;
    +    case DECIMAL18:
    +      break;
    +    case DECIMAL28DENSE:
    +      break;
    +    case DECIMAL28SPARSE:
    +      break;
    +    case DECIMAL38DENSE:
    +      break;
    +    case DECIMAL38SPARSE:
    +      break;
    +    case DECIMAL9:
    +      break;
    +    case FIXED16CHAR:
    +      break;
    +    case FIXEDBINARY:
    +      break;
    +    case FIXEDCHAR:
    +      break;
    +    case FLOAT4:
    +      break;
    +    case FLOAT8:
    +      generator = new DoubleGen( );
    +      break;
    +    case GENERIC_OBJECT:
    +      break;
    +    case INT:
    +      generator = new IntGen( );
    +      break;
    +    case INTERVAL:
    +      break;
    +    case INTERVALDAY:
    +      break;
    +    case INTERVALYEAR:
    +      break;
    +    case LATE:
    +      break;
    +    case LIST:
    +      break;
    +    case MAP:
    +      break;
    +    case MONEY:
    +      break;
    +    case NULL:
    +      break;
    +    case SMALLINT:
    +      break;
    +    case TIME:
    +      break;
    +    case TIMESTAMP:
    +      break;
    +    case TIMESTAMPTZ:
    +      break;
    +    case TIMETZ:
    +      break;
    +    case TINYINT:
    +      break;
    +    case UINT1:
    +      break;
    +    case UINT2:
    +      break;
    +    case UINT4:
    +      break;
    +    case UINT8:
    +      break;
    +    case UNION:
    +      break;
    +    case VAR16CHAR:
    +      break;
    +    case VARBINARY:
    +      break;
    +    case VARCHAR:
    +      generator = new StringGen( );
    +      break;
    +    default:
    +      break;
    +    }
    +    if ( generator == null ) {
    +      throw new IllegalArgumentException( "No default column generator for column " +
name + " of type " + minorType );
    +    }
    +    generator.setup(this);
    +  }
    +
    +  public ColumnDef( MockColumn mockCol, int rep ) {
    +    this( mockCol );
    +    name = name += Integer.toString(rep);
    --- End diff --
    
    Typo. Fixed.


> Enhance the mock data source: better data, SQL access
> -----------------------------------------------------
>
>                 Key: DRILL-5152
>                 URL: https://issues.apache.org/jira/browse/DRILL-5152
>             Project: Apache Drill
>          Issue Type: Improvement
>          Components: Tools, Build & Test
>    Affects Versions: 1.9.0
>            Reporter: Paul Rogers
>            Assignee: Paul Rogers
>            Priority: Minor
>
> Drill provides a mock data storage engine that generates random data. The mock engine
is used in some older unit tests that need a volume of data, but that are not too particular
about the details of the data.
> The mock data source continues to have use even for modern tests. For example, the work
in the external storage batch requires tests with varying amounts of data, but the exact form
of the data is not important, just the quantity. For example, if we want to ensure that spilling
happens at various trigger points, we need to read the right amount of data for that trigger.
> The existing mock data source has two limitations:
> 1. It generates only "black/white" (alternating) values, which is awkward for use in
sorting.
> 2. The mock generator is accessible only from a physical plan, but not from SQL queries.
> This enhancement proposes to fix both limitations:
> 1. Generate a uniform, randomly distributed set of values.
> 2. Provide an encoding that lets a SQL query specify the data to be generated.
> Example SQL query:
> {code}
> SELECT id_i, name_s50 FROM `mock`.employee_10K;
> {code}
> The above says to generate two fields: INTEGER (the "_i" suffix) and VARCHAR(50) (the
"_s50") suffix; and to generate 10,000 rows (the "_10K" suffix on the table.)



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message