drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From paul-rogers <...@git.apache.org>
Subject [GitHub] drill pull request #808: DRILL-5325: Unit tests for the managed sort
Date Tue, 06 Jun 2017 22:40:46 GMT
Github user paul-rogers commented on a diff in the pull request:

    https://github.com/apache/drill/pull/808#discussion_r120485596
  
    --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/xsort/managed/SortImpl.java
---
    @@ -0,0 +1,495 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.drill.exec.physical.impl.xsort.managed;
    +
    +import java.io.IOException;
    +import java.util.List;
    +
    +import org.apache.drill.exec.memory.BufferAllocator;
    +import org.apache.drill.exec.ops.OperExecContext;
    +import org.apache.drill.exec.physical.impl.spill.RecordBatchSizer;
    +import org.apache.drill.exec.physical.impl.xsort.MSortTemplate;
    +import org.apache.drill.exec.physical.impl.xsort.managed.BatchGroup.InputBatch;
    +import org.apache.drill.exec.physical.impl.xsort.managed.SortMemoryManager.MergeTask;
    +import org.apache.drill.exec.record.BatchSchema;
    +import org.apache.drill.exec.record.VectorAccessible;
    +import org.apache.drill.exec.record.VectorAccessibleUtilities;
    +import org.apache.drill.exec.record.VectorContainer;
    +import org.apache.drill.exec.record.selection.SelectionVector2;
    +import org.apache.drill.exec.record.selection.SelectionVector4;
    +
    +/**
    + * Implementation of the external sort which is wrapped into the Drill
    + * "next" protocol by the {@link ExternalSortBatch} class.
    + * <p>
    + * Accepts incoming batches. Sorts each and will spill to disk as needed.
    + * When all input is delivered, can either do an in-memory merge or a
    + * merge from disk. If runs spilled, may have to do one or more "consolidation"
    + * passes to reduce the number of runs to the level that will fit in memory.
    + */
    +
    +public class SortImpl {
    +  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ExternalSortBatch.class);
    +
    +  /**
    +   * Iterates over the final sorted results. Implemented differently
    +   * depending on whether the results are in-memory or spilled to
    +   * disk.
    +   */
    +
    +  public interface SortResults {
    +    /**
    +     * Container into which results are delivered. May the
    +     * the original operator container, or may be a different
    +     * one. This is the container that should be sent
    +     * downstream. This is a fixed value for all returned
    +     * results.
    +     * @return
    +     */
    +    VectorContainer getContainer();
    +    boolean next();
    +    void close();
    +    int getBatchCount();
    +    int getRecordCount();
    +    SelectionVector2 getSv2();
    +    SelectionVector4 getSv4();
    +  }
    +
    +  private final SortConfig config;
    +  private final SortMetrics metrics;
    +  private final SortMemoryManager memManager;
    +  private VectorContainer outputBatch;
    +  private OperExecContext context;
    +
    +  /**
    +   * Memory allocator for this operator itself. Incoming batches are
    +   * transferred into this allocator. Intermediate batches used during
    +   * merge also reside here.
    +   */
    +
    +  private final BufferAllocator allocator;
    +
    +  private final SpilledRuns spilledRuns;
    +
    +  private final BufferedBatches bufferedBatches;
    +
    +  public SortImpl(OperExecContext opContext, SortConfig sortConfig, SpilledRuns spilledRuns,
VectorContainer batch) {
    +    this.context = opContext;
    +    outputBatch = batch;
    +    this.spilledRuns = spilledRuns;
    +    allocator = opContext.getAllocator();
    +    config = sortConfig;
    +    memManager = new SortMemoryManager(config, allocator.getLimit());
    +    metrics = new SortMetrics(opContext.getStats());
    +    bufferedBatches = new BufferedBatches(opContext);
    +
    +    // Reset the allocator to allow a 10% safety margin. This is done because
    +    // the memory manager will enforce the original limit. Changing the hard
    +    // limit will reduce the probability that random chance causes the allocator
    +    // to kill the query because of a small, spurious over-allocation.
    +
    +    allocator.setLimit((long)(allocator.getLimit() * 1.10));
    +  }
    +
    +  public void setSchema(BatchSchema schema) {
    +    bufferedBatches.setSchema(schema);
    +    spilledRuns.setSchema(schema);
    +  }
    +
    +  public boolean forceSpill() {
    +    if (bufferedBatches.size() < 2) {
    --- End diff --
    
    The logic is this. If we spill less than two batches (really, one batch), then we've gained
nothing. Progress can only be made if spilling two or more batches. So, if we don't have enough
batches to make spilling worthwhile, this method returns false. (It is used when reacting
to the bogus OUT_OF_MEMORY iterator outcome.)


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message