drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (DRILL-5080) Create a memory-managed version of the External Sort operator
Date Thu, 02 Feb 2017 03:14:52 GMT

    [ https://issues.apache.org/jira/browse/DRILL-5080?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15849325#comment-15849325
] 

ASF GitHub Bot commented on DRILL-5080:
---------------------------------------

Github user Ben-Zvi commented on a diff in the pull request:

    https://github.com/apache/drill/pull/717#discussion_r99039654
  
    --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/xsort/managed/OperatorCodeGenerator.java
---
    @@ -0,0 +1,271 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.drill.exec.physical.impl.xsort.managed;
    +
    +import java.io.IOException;
    +import java.util.List;
    +
    +import org.apache.calcite.rel.RelFieldCollation.Direction;
    +import org.apache.drill.common.exceptions.UserException;
    +import org.apache.drill.common.expression.ErrorCollector;
    +import org.apache.drill.common.expression.ErrorCollectorImpl;
    +import org.apache.drill.common.expression.LogicalExpression;
    +import org.apache.drill.common.logical.data.Order.Ordering;
    +import org.apache.drill.exec.compile.sig.GeneratorMapping;
    +import org.apache.drill.exec.compile.sig.MappingSet;
    +import org.apache.drill.exec.exception.ClassTransformationException;
    +import org.apache.drill.exec.expr.ClassGenerator;
    +import org.apache.drill.exec.expr.ClassGenerator.HoldingContainer;
    +import org.apache.drill.exec.expr.CodeGenerator;
    +import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
    +import org.apache.drill.exec.expr.fn.FunctionGenerationHelper;
    +import org.apache.drill.exec.ops.FragmentContext;
    +import org.apache.drill.exec.physical.config.ExternalSort;
    +import org.apache.drill.exec.physical.config.Sort;
    +import org.apache.drill.exec.physical.impl.xsort.SingleBatchSorter;
    +import org.apache.drill.exec.record.BatchSchema;
    +import org.apache.drill.exec.record.VectorAccessible;
    +import org.apache.drill.exec.vector.CopyUtil;
    +
    +import com.sun.codemodel.JConditional;
    +import com.sun.codemodel.JExpr;
    +
    +/**
    + * Generates and manages the data-specific classes for this operator.
    + * <p>
    + * Several of the code generation methods take a batch, but the methods
    + * are called for many batches, and generate code only for the first one.
    + * Better would be to generate code from a schema; but Drill is not set
    + * up for that at present.
    + */
    +
    +public class OperatorCodeGenerator {
    +  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(OperatorCodeGenerator.class);
    +
    +  protected static final MappingSet MAIN_MAPPING = new MappingSet((String) null, null,
ClassGenerator.DEFAULT_SCALAR_MAP, ClassGenerator.DEFAULT_SCALAR_MAP);
    +  protected static final MappingSet LEFT_MAPPING = new MappingSet("leftIndex", null,
ClassGenerator.DEFAULT_SCALAR_MAP, ClassGenerator.DEFAULT_SCALAR_MAP);
    +  protected static final MappingSet RIGHT_MAPPING = new MappingSet("rightIndex", null,
ClassGenerator.DEFAULT_SCALAR_MAP, ClassGenerator.DEFAULT_SCALAR_MAP);
    +
    +  private static final GeneratorMapping COPIER_MAPPING = new GeneratorMapping("doSetup",
"doCopy", null, null);
    +  private static final MappingSet COPIER_MAPPING_SET = new MappingSet(COPIER_MAPPING,
COPIER_MAPPING);
    +
    +  private final FragmentContext context;
    +  @SuppressWarnings("unused")
    +  private BatchSchema schema;
    +
    +  /**
    +   * A single PriorityQueueCopier instance is used for 2 purposes:
    +   * 1. Merge sorted batches before spilling
    +   * 2. Merge sorted batches when all incoming data fits in memory
    +   */
    +
    +  private PriorityQueueCopier copier;
    +  private final Sort popConfig;
    +  private MSorter mSorter;
    +
    +  /**
    +   * Generated sort operation used to sort each incoming batch according to
    +   * the sort criteria specified in the {@link ExternalSort} definition of
    +   * this operator.
    +   */
    +
    +  private SingleBatchSorter sorter;
    +
    +  public OperatorCodeGenerator(FragmentContext context, Sort popConfig) {
    +    this.context = context;
    +    this.popConfig = popConfig;
    +  }
    +
    +  public void setSchema(BatchSchema schema) {
    +    close();
    +    this.schema = schema;
    +  }
    +
    +  public void close() {
    +    closeCopier();
    +    closeMSorter();
    +    sorter = null;
    +  }
    +
    +  private void closeMSorter() {
    +    if (mSorter == null) {
    +      return; }
    +    mSorter.clear();
    +    mSorter = null;
    +  }
    +
    +  public void closeCopier() {
    +    if (copier == null) {
    +      return; }
    +    try {
    +      copier.close();
    +      copier = null;
    +    } catch (IOException e) {
    +      throw UserException.dataWriteError(e)
    +            .message("Failure while flushing spilled data")
    +            .build(logger);
    +    }
    +  }
    +
    +  public PriorityQueueCopier getCopier(VectorAccessible batch) {
    +    if (copier == null) {
    +      copier = generateCopier(batch);
    +    }
    +    return copier;
    +  }
    +
    +  private PriorityQueueCopier generateCopier(VectorAccessible batch) {
    +    // Generate the copier code and obtain the resulting class
    +
    +    CodeGenerator<PriorityQueueCopier> cg = CodeGenerator.get(PriorityQueueCopier.TEMPLATE_DEFINITION,
context.getFunctionRegistry(), context.getOptions());
    +    ClassGenerator<PriorityQueueCopier> g = cg.getRoot();
    +    cg.plainJavaCapable(true);
    +    // Uncomment out this line to debug the generated code.
    +//  cg.saveCodeForDebugging(true);
    +
    +    generateComparisons(g, batch);
    +
    +    g.setMappingSet(COPIER_MAPPING_SET);
    +    CopyUtil.generateCopies(g, batch, true);
    +    g.setMappingSet(MAIN_MAPPING);
    +    return getInstance(cg);
    +  }
    +
    +  public MSorter createNewMSorter(VectorAccessible batch) {
    --- End diff --
    
    Maybe change the method name to getMSorter() ?  As internally it does not always create
a new MSorter.


> Create a memory-managed version of the External Sort operator
> -------------------------------------------------------------
>
>                 Key: DRILL-5080
>                 URL: https://issues.apache.org/jira/browse/DRILL-5080
>             Project: Apache Drill
>          Issue Type: Improvement
>    Affects Versions: 1.8.0
>            Reporter: Paul Rogers
>            Assignee: Paul Rogers
>             Fix For: 1.10.0
>
>         Attachments: ManagedExternalSortDesign.pdf
>
>
> We propose to create a "managed" version of the external sort operator that works to
a clearly-defined memory limit. Attached is a design specification for the work.
> The project will include fixing a number of bugs related to the external sort, include
as sub-tasks of this umbrella task.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Mime
View raw message