Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 0D71D200CA3 for ; Thu, 1 Jun 2017 10:24:22 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 0C1BB160BB5; Thu, 1 Jun 2017 08:24:22 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id CB534160BDF for ; Thu, 1 Jun 2017 10:24:19 +0200 (CEST) Received: (qmail 27760 invoked by uid 500); 1 Jun 2017 08:24:16 -0000 Mailing-List: contact commits-help@cassandra.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@cassandra.apache.org Received: (qmail 27075 invoked by uid 99); 1 Jun 2017 08:24:14 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 01 Jun 2017 08:24:13 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 104E0DFF81; Thu, 1 Jun 2017 08:24:13 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: blerer@apache.org To: commits@cassandra.apache.org Date: Thu, 01 Jun 2017 08:24:17 -0000 Message-Id: <3df063c5a3a44cbebeeb3b343bd36a55@git.apache.org> In-Reply-To: <1d3bf1f4e7cd4d3ab6c8895f15c96e2f@git.apache.org> References: <1d3bf1f4e7cd4d3ab6c8895f15c96e2f@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [6/8] cassandra git commit: Merge branch cassandra-3.0 into cassandra-3.11 archived-at: Thu, 01 Jun 2017 08:24:22 -0000 http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java ---------------------------------------------------------------------- diff --cc src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java index 4536036,0000000..84a742b mode 100644,000000..100644 --- a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java +++ b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java @@@ -1,261 -1,0 +1,263 @@@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ +package org.apache.cassandra.db.rows; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Comparator; +import java.util.List; + +import org.apache.cassandra.config.CFMetaData; +import org.apache.cassandra.db.*; +import org.apache.cassandra.db.filter.ClusteringIndexFilter; +import org.apache.cassandra.db.filter.ColumnFilter; +import org.apache.cassandra.io.sstable.IndexInfo; +import org.apache.cassandra.io.sstable.format.SSTableReader; ++import org.apache.cassandra.io.sstable.format.SSTableReadsListener; +import org.apache.cassandra.io.sstable.metadata.StatsMetadata; +import org.apache.cassandra.thrift.ThriftResultsMerger; +import org.apache.cassandra.utils.IteratorWithLowerBound; + +/** + * An unfiltered row iterator with a lower bound retrieved from either the global + * sstable statistics or the row index lower bounds (if available in the cache). + * Before initializing the sstable unfiltered row iterator, we return an empty row + * with the clustering set to the lower bound. The empty row will be filtered out and + * the result is that if we don't need to access this sstable, i.e. due to the LIMIT conditon, + * then we will not. See CASSANDRA-8180 for examples of why this is useful. + */ +public class UnfilteredRowIteratorWithLowerBound extends LazilyInitializedUnfilteredRowIterator implements IteratorWithLowerBound +{ + private final SSTableReader sstable; + private final ClusteringIndexFilter filter; + private final ColumnFilter selectedColumns; + private final boolean isForThrift; + private final int nowInSec; + private final boolean applyThriftTransformation; ++ private final SSTableReadsListener listener; + private ClusteringBound lowerBound; + private boolean firstItemRetrieved; + + public UnfilteredRowIteratorWithLowerBound(DecoratedKey partitionKey, + SSTableReader sstable, + ClusteringIndexFilter filter, + ColumnFilter selectedColumns, + boolean isForThrift, + int nowInSec, - boolean applyThriftTransformation) ++ boolean applyThriftTransformation, ++ SSTableReadsListener listener) + { + super(partitionKey); + this.sstable = sstable; + this.filter = filter; + this.selectedColumns = selectedColumns; + this.isForThrift = isForThrift; + this.nowInSec = nowInSec; + this.applyThriftTransformation = applyThriftTransformation; ++ this.listener = listener; + this.lowerBound = null; + this.firstItemRetrieved = false; + } + + public Unfiltered lowerBound() + { + if (lowerBound != null) + return makeBound(lowerBound); + + // The partition index lower bound is more accurate than the sstable metadata lower bound but it is only + // present if the iterator has already been initialized, which we only do when there are tombstones since in + // this case we cannot use the sstable metadata clustering values + ClusteringBound ret = getPartitionIndexLowerBound(); + return ret != null ? makeBound(ret) : makeBound(getMetadataLowerBound()); + } + + private Unfiltered makeBound(ClusteringBound bound) + { + if (bound == null) + return null; + + if (lowerBound != bound) + lowerBound = bound; + + return new RangeTombstoneBoundMarker(lowerBound, DeletionTime.LIVE); + } + + @Override + protected UnfilteredRowIterator initializeIterator() + { - sstable.incrementReadCount(); - + @SuppressWarnings("resource") // 'iter' is added to iterators which is closed on exception, or through the closing of the final merged iterator - UnfilteredRowIterator iter = sstable.iterator(partitionKey(), filter.getSlices(metadata()), selectedColumns, filter.isReversed(), isForThrift); ++ UnfilteredRowIterator iter = sstable.iterator(partitionKey(), filter.getSlices(metadata()), selectedColumns, filter.isReversed(), isForThrift, listener); + return isForThrift && applyThriftTransformation + ? ThriftResultsMerger.maybeWrap(iter, nowInSec) + : iter; + } + + @Override + protected Unfiltered computeNext() + { + Unfiltered ret = super.computeNext(); + if (firstItemRetrieved) + return ret; + + // Check that the lower bound is not bigger than the first item retrieved + firstItemRetrieved = true; + if (lowerBound != null && ret != null) + assert comparator().compare(lowerBound, ret.clustering()) <= 0 + : String.format("Lower bound [%s ]is bigger than first returned value [%s] for sstable %s", + lowerBound.toString(sstable.metadata), + ret.toString(sstable.metadata), + sstable.getFilename()); + + return ret; + } + + private Comparator comparator() + { + return filter.isReversed() ? sstable.metadata.comparator.reversed() : sstable.metadata.comparator; + } + + @Override + public CFMetaData metadata() + { + return sstable.metadata; + } + + @Override + public boolean isReverseOrder() + { + return filter.isReversed(); + } + + @Override + public PartitionColumns columns() + { + return selectedColumns.fetchedColumns(); + } + + @Override + public EncodingStats stats() + { + return sstable.stats(); + } + + @Override + public DeletionTime partitionLevelDeletion() + { + if (!sstable.mayHaveTombstones()) + return DeletionTime.LIVE; + + return super.partitionLevelDeletion(); + } + + @Override + public Row staticRow() + { + if (columns().statics.isEmpty()) + return Rows.EMPTY_STATIC_ROW; + + return super.staticRow(); + } + + /** + * @return the lower bound stored on the index entry for this partition, if available. + */ + private ClusteringBound getPartitionIndexLowerBound() + { + // NOTE: CASSANDRA-11206 removed the lookup against the key-cache as the IndexInfo objects are no longer + // in memory for not heap backed IndexInfo objects (so, these are on disk). + // CASSANDRA-11369 is there to fix this afterwards. + + // Creating the iterator ensures that rowIndexEntry is loaded if available (partitions bigger than + // DatabaseDescriptor.column_index_size_in_kb) + if (!canUseMetadataLowerBound()) + maybeInit(); + + RowIndexEntry rowIndexEntry = sstable.getCachedPosition(partitionKey(), false); + if (rowIndexEntry == null || !rowIndexEntry.indexOnHeap()) + return null; + + try (RowIndexEntry.IndexInfoRetriever onHeapRetriever = rowIndexEntry.openWithIndex(null)) + { + IndexInfo column = onHeapRetriever.columnsIndex(filter.isReversed() ? rowIndexEntry.columnsIndexCount() - 1 : 0); + ClusteringPrefix lowerBoundPrefix = filter.isReversed() ? column.lastName : column.firstName; + assert lowerBoundPrefix.getRawValues().length <= sstable.metadata.comparator.size() : + String.format("Unexpected number of clustering values %d, expected %d or fewer for %s", + lowerBoundPrefix.getRawValues().length, + sstable.metadata.comparator.size(), + sstable.getFilename()); + return ClusteringBound.inclusiveOpen(filter.isReversed(), lowerBoundPrefix.getRawValues()); + } + catch (IOException e) + { + throw new RuntimeException("should never occur", e); + } + } + + /** + * Whether we can use the clustering values in the stats of the sstable to build the lower bound. + *

+ * Currently, the clustering values of the stats file records for each clustering component the min and max + * value seen, null excluded. In other words, having a non-null value for a component in those min/max clustering + * values does _not_ guarantee that there isn't an unfiltered in the sstable whose clustering has either no value for + * that component (it's a prefix) or a null value. + *

+ * This is problematic as this means we can't in general build a lower bound from those values since the "min" + * values doesn't actually guarantee minimality. + *

+ * However, we can use those values if we can guarantee that no clustering in the sstable 1) is a true prefix and + * 2) uses null values. Nat having true prefixes means having no range tombstone markers since rows use + * {@link Clustering} which is always "full" (all components are always present). As for null values, we happen to + * only allow those in compact tables (for backward compatibility), so we can simply exclude those tables. + *

+ * Note that the information we currently have at our disposal make this condition less precise that it could be. + * In particular, {@link SSTableReader#mayHaveTombstones} could return {@code true} (making us not use the stats) + * because of cell tombstone or even expiring cells even if the sstable has no range tombstone markers, even though + * it's really only markers we want to exclude here (more precisely, as said above, we want to exclude anything + * whose clustering is not "full", but that's only markers). It wouldn't be very hard to collect whether a sstable + * has any range tombstone marker however so it's a possible improvement. + */ + private boolean canUseMetadataLowerBound() + { + // Side-note: pre-2.1 sstable stat file had clustering value arrays whose size may not match the comparator size + // and that would break getMetadataLowerBound. We don't support upgrade from 2.0 to 3.0 directly however so it's + // not a true concern. Besides, !sstable.mayHaveTombstones already ensure this is a 3.0 sstable anyway. + return !sstable.mayHaveTombstones() && !sstable.metadata.isCompactTable(); + } + + /** + * @return a global lower bound made from the clustering values stored in the sstable metadata, note that + * this currently does not correctly compare tombstone bounds, especially ranges. + */ + private ClusteringBound getMetadataLowerBound() + { + if (!canUseMetadataLowerBound()) + return null; + + final StatsMetadata m = sstable.getSSTableMetadata(); + List vals = filter.isReversed() ? m.maxClusteringValues : m.minClusteringValues; + assert vals.size() <= sstable.metadata.comparator.size() : + String.format("Unexpected number of clustering values %d, expected %d or fewer for %s", + vals.size(), + sstable.metadata.comparator.size(), + sstable.getFilename()); + return ClusteringBound.inclusiveOpen(filter.isReversed(), vals.toArray(new ByteBuffer[vals.size()])); + } +} http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java ---------------------------------------------------------------------- diff --cc src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java index f6f30db,f38738d..e9b2491 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java @@@ -1524,31 -1501,48 +1524,56 @@@ public abstract class SSTableReader ext } /** -- * Get position updating key cache and stats. -- * @see #getPosition(PartitionPosition, SSTableReader.Operator, boolean) ++ * Retrieves the position while updating the key cache and the stats. ++ * @param key The key to apply as the rhs to the given Operator. A 'fake' key is allowed to ++ * allow key selection by token bounds but only if op != * EQ ++ * @param op The Operator defining matching keys: the nearest key to the target matching the operator wins. + */ + public final RowIndexEntry getPosition(PartitionPosition key, Operator op) + { + return getPosition(key, op, SSTableReadsListener.NOOP_LISTENER); + } + ++ /** ++ * Retrieves the position while updating the key cache and the stats. ++ * @param key The key to apply as the rhs to the given Operator. A 'fake' key is allowed to ++ * allow key selection by token bounds but only if op != * EQ ++ * @param op The Operator defining matching keys: the nearest key to the target matching the operator wins. ++ * @param listener the {@code SSTableReaderListener} that must handle the notifications. + */ - public RowIndexEntry getPosition(PartitionPosition key, Operator op) + public final RowIndexEntry getPosition(PartitionPosition key, Operator op, SSTableReadsListener listener) { - return getPosition(key, op, true, false); + return getPosition(key, op, true, false, listener); } - public RowIndexEntry getPosition(PartitionPosition key, Operator op, boolean updateCacheAndStats) + public final RowIndexEntry getPosition(PartitionPosition key, Operator op, boolean updateCacheAndStats) { - return getPosition(key, op, updateCacheAndStats, false); + return getPosition(key, op, updateCacheAndStats, false, SSTableReadsListener.NOOP_LISTENER); } /** * @param key The key to apply as the rhs to the given Operator. A 'fake' key is allowed to * allow key selection by token bounds but only if op != * EQ * @param op The Operator defining matching keys: the nearest key to the target matching the operator wins. * @param updateCacheAndStats true if updating stats and cache ++ * @param listener a listener used to handle internal events * @return The index entry corresponding to the key, or null if the key is not present */ - protected abstract RowIndexEntry getPosition(PartitionPosition key, Operator op, boolean updateCacheAndStats, boolean permitMatchPastLast); + protected abstract RowIndexEntry getPosition(PartitionPosition key, + Operator op, + boolean updateCacheAndStats, + boolean permitMatchPastLast, + SSTableReadsListener listener); + - public abstract SliceableUnfilteredRowIterator iterator(DecoratedKey key, - ColumnFilter selectedColumns, - boolean reversed, - boolean isForThrift, - SSTableReadsListener listener); ++ public abstract UnfilteredRowIterator iterator(DecoratedKey key, ++ Slices slices, ++ ColumnFilter selectedColumns, ++ boolean reversed, ++ boolean isForThrift, ++ SSTableReadsListener listener); + - public abstract UnfilteredRowIterator iterator(DecoratedKey key, Slices slices, ColumnFilter selectedColumns, boolean reversed, boolean isForThrift); + public abstract UnfilteredRowIterator iterator(FileDataInput file, DecoratedKey key, RowIndexEntry indexEntry, Slices slices, ColumnFilter selectedColumns, boolean reversed, boolean isForThrift); - public abstract SliceableUnfilteredRowIterator iterator(FileDataInput file, - DecoratedKey key, - RowIndexEntry indexEntry, - ColumnFilter selectedColumns, - boolean reversed, - boolean isForThrift); + public abstract UnfilteredRowIterator simpleIterator(FileDataInput file, DecoratedKey key, RowIndexEntry indexEntry, boolean tombstoneOnly); /** * Finds and returns the first key beyond a given token in this SSTable or null if no such key exists. @@@ -2017,23 -1990,40 +2051,11 @@@ readMeter.mark(); } - private int compare(List values1, List values2) - /** - * Checks if this sstable can overlap with another one based on the min/man clustering values. - * If this methods return false, we're guarantee that {@code this} and {@code other} have no overlapping - * data, i.e. no cells to reconcile. - */ - public boolean mayOverlapsWith(SSTableReader other) -- { - ClusteringComparator comparator = metadata.comparator; - for (int i = 0; i < Math.min(values1.size(), values2.size()); i++) - { - int cmp = comparator.subtype(i).compare(values1.get(i), values2.get(i)); - if (cmp != 0) - return cmp; - } - return 0; - StatsMetadata m1 = getSSTableMetadata(); - StatsMetadata m2 = other.getSSTableMetadata(); - - if (m1.minClusteringValues.isEmpty() || m1.maxClusteringValues.isEmpty() || m2.minClusteringValues.isEmpty() || m2.maxClusteringValues.isEmpty()) - return true; - - return !(compare(m1.maxClusteringValues, m2.minClusteringValues) < 0 || compare(m1.minClusteringValues, m2.maxClusteringValues) > 0); -- } -- - private int compare(List values1, List values2) + public EncodingStats stats() { - ClusteringComparator comparator = metadata.comparator; - for (int i = 0; i < Math.min(values1.size(), values2.size()); i++) - { - int cmp = comparator.subtype(i).compare(values1.get(i), values2.get(i)); - if (cmp != 0) - return cmp; - } - return 0; - } - - public static class SizeComparator implements Comparator - { - public int compare(SSTableReader o1, SSTableReader o2) - { - return Longs.compare(o1.onDiskLength(), o2.onDiskLength()); - } + // We could return sstable.header.stats(), but this may not be as accurate than the actual sstable stats (see + // SerializationHeader.make() for details) so we use the latter instead. + return new EncodingStats(getMinTimestamp(), getMinLocalDeletionTime(), getMinTTL()); } public Ref tryRef() http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/src/java/org/apache/cassandra/io/sstable/format/SSTableReadsListener.java ---------------------------------------------------------------------- diff --cc src/java/org/apache/cassandra/io/sstable/format/SSTableReadsListener.java index 0000000,6d384bf..8f6e3c0 mode 000000,100644..100644 --- a/src/java/org/apache/cassandra/io/sstable/format/SSTableReadsListener.java +++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableReadsListener.java @@@ -1,0 -1,81 +1,82 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.cassandra.io.sstable.format; + + import org.apache.cassandra.db.RowIndexEntry; ++import org.apache.cassandra.io.sstable.format.SSTableReadsListener.SelectionReason; + + /** + * Listener for receiving notifications associated with reading SSTables. + */ + public interface SSTableReadsListener + { + /** + * The reasons for skipping an SSTable + */ + enum SkippingReason + { + BLOOM_FILTER, + MIN_MAX_KEYS, + PARTITION_INDEX_LOOKUP, + INDEX_ENTRY_NOT_FOUND; + } + + /** + * The reasons for selecting an SSTable + */ + enum SelectionReason + { + KEY_CACHE_HIT, + INDEX_ENTRY_FOUND; + } + + /** + * Listener that does nothing. + */ + static final SSTableReadsListener NOOP_LISTENER = new SSTableReadsListener() {}; + + /** + * Handles notification that the specified SSTable has been skipped during a single partition query. + * + * @param sstable the SSTable reader + * @param reason the reason for which the SSTable has been skipped + */ + default void onSSTableSkipped(SSTableReader sstable, SkippingReason reason) + { + } + + /** + * Handles notification that the specified SSTable has been selected during a single partition query. + * + * @param sstable the SSTable reader + * @param indexEntry the index entry + * @param reason the reason for which the SSTable has been selected + */ + default void onSSTableSelected(SSTableReader sstable, RowIndexEntry indexEntry, SelectionReason reason) + { + } + + /** + * Handles notification that the specified SSTable is being scanned during a partition range query. + * + * @param sstable the SSTable reader of the SSTable being scanned. + */ + default void onScanningStarted(SSTableReader sstable) + { + } + } http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java ---------------------------------------------------------------------- diff --cc src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java index 8c64b01,eeea18f..8551819 --- a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java +++ b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableReader.java @@@ -30,11 -28,16 +30,13 @@@ import org.apache.cassandra.db.rows.Unf import org.apache.cassandra.dht.AbstractBounds; import org.apache.cassandra.dht.Range; import org.apache.cassandra.dht.Token; -import org.apache.cassandra.io.sstable.Component; -import org.apache.cassandra.io.sstable.CorruptSSTableException; -import org.apache.cassandra.io.sstable.Descriptor; -import org.apache.cassandra.io.sstable.ISSTableScanner; +import org.apache.cassandra.io.sstable.*; import org.apache.cassandra.io.sstable.format.SSTableReader; + import org.apache.cassandra.io.sstable.format.SSTableReadsListener; + import org.apache.cassandra.io.sstable.format.SSTableReadsListener.SkippingReason; + import org.apache.cassandra.io.sstable.format.SSTableReadsListener.SelectionReason; import org.apache.cassandra.io.sstable.metadata.StatsMetadata; import org.apache.cassandra.io.util.FileDataInput; - import org.apache.cassandra.io.util.RandomAccessReader; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.ByteBufferUtil; import org.slf4j.Logger; @@@ -57,29 -60,32 +59,29 @@@ public class BigTableReader extends SST super(desc, components, metadata, maxDataAge, sstableMetadata, openReason, header); } - public UnfilteredRowIterator iterator(DecoratedKey key, Slices slices, ColumnFilter selectedColumns, boolean reversed, boolean isForThrift) - public SliceableUnfilteredRowIterator iterator(DecoratedKey key, - ColumnFilter selectedColumns, - boolean reversed, - boolean isForThrift, - SSTableReadsListener listener) ++ public UnfilteredRowIterator iterator(DecoratedKey key, Slices slices, ColumnFilter selectedColumns, boolean reversed, boolean isForThrift, SSTableReadsListener listener) { - RowIndexEntry rie = getPosition(key, SSTableReader.Operator.EQ); - return reversed - ? new SSTableReversedIterator(this, key, selectedColumns, isForThrift, listener) - : new SSTableIterator(this, key, selectedColumns, isForThrift, listener); ++ RowIndexEntry rie = getPosition(key, SSTableReader.Operator.EQ, listener); + return iterator(null, key, rie, slices, selectedColumns, reversed, isForThrift); } - public SliceableUnfilteredRowIterator iterator(FileDataInput file, DecoratedKey key, RowIndexEntry indexEntry, ColumnFilter selectedColumns, boolean reversed, boolean isForThrift) + public UnfilteredRowIterator iterator(FileDataInput file, DecoratedKey key, RowIndexEntry indexEntry, Slices slices, ColumnFilter selectedColumns, boolean reversed, boolean isForThrift) { + if (indexEntry == null) + return UnfilteredRowIterators.noRowsIterator(metadata, key, Rows.EMPTY_STATIC_ROW, DeletionTime.LIVE, reversed); return reversed - ? new SSTableReversedIterator(this, file, key, indexEntry, selectedColumns, isForThrift) - : new SSTableIterator(this, file, key, indexEntry, selectedColumns, isForThrift); + ? new SSTableReversedIterator(this, file, key, indexEntry, slices, selectedColumns, isForThrift, ifile) + : new SSTableIterator(this, file, key, indexEntry, slices, selectedColumns, isForThrift, ifile); } - /** - * @param columns the columns to return. - * @param dataRange filter to use when reading the columns - * @return A Scanner for seeking over the rows of the SSTable. - */ - public ISSTableScanner getScanner(ColumnFilter columns, DataRange dataRange, RateLimiter limiter, boolean isForThrift) + @Override + public ISSTableScanner getScanner(ColumnFilter columns, + DataRange dataRange, + RateLimiter limiter, + boolean isForThrift, + SSTableReadsListener listener) { - return BigTableScanner.getScanner(this, columns, dataRange, limiter, isForThrift); + return BigTableScanner.getScanner(this, columns, dataRange, limiter, isForThrift, listener); } /** @@@ -118,21 -124,19 +120,19 @@@ } - /** - * @param key The key to apply as the rhs to the given Operator. A 'fake' key is allowed to - * allow key selection by token bounds but only if op != * EQ - * @param op The Operator defining matching keys: the nearest key to the target matching the operator wins. - * @param updateCacheAndStats true if updating stats and cache - * @param listener a listener used to handle internal events - * @return The index entry corresponding to the key, or null if the key is not present - */ + @SuppressWarnings("resource") // caller to close + @Override + public UnfilteredRowIterator simpleIterator(FileDataInput dfile, DecoratedKey key, RowIndexEntry position, boolean tombstoneOnly) + { + return SSTableIdentityIterator.create(this, dfile, position, key, tombstoneOnly); + } + - /** - * @param key The key to apply as the rhs to the given Operator. A 'fake' key is allowed to - * allow key selection by token bounds but only if op != * EQ - * @param op The Operator defining matching keys: the nearest key to the target matching the operator wins. - * @param updateCacheAndStats true if updating stats and cache - * @return The index entry corresponding to the key, or null if the key is not present - */ - protected RowIndexEntry getPosition(PartitionPosition key, Operator op, boolean updateCacheAndStats, boolean permitMatchPastLast) ++ @Override + protected RowIndexEntry getPosition(PartitionPosition key, + Operator op, + boolean updateCacheAndStats, + boolean permitMatchPastLast, + SSTableReadsListener listener) { if (op == Operator.EQ) { @@@ -257,7 -265,8 +261,8 @@@ } if (op == Operator.EQ && updateCacheAndStats) bloomFilterTracker.addTruePositive(); + listener.onSSTableSelected(this, indexEntry, SelectionReason.INDEX_ENTRY_FOUND); - Tracing.trace("Partition index with {} entries found for sstable {}", indexEntry.columnsIndex().size(), descriptor.generation); + Tracing.trace("Partition index with {} entries found for sstable {}", indexEntry.columnsIndexCount(), descriptor.generation); return indexEntry; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java ---------------------------------------------------------------------- diff --cc src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java index e465a02,82d8211..f4bd1ea --- a/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java +++ b/src/java/org/apache/cassandra/io/sstable/format/big/BigTableScanner.java @@@ -62,20 -63,30 +63,29 @@@ public class BigTableScanner implement private final DataRange dataRange; private final RowIndexEntry.IndexSerializer rowIndexEntrySerializer; private final boolean isForThrift; + private final SSTableReadsListener listener; + private long startScan = -1; + private long bytesScanned = 0; protected Iterator iterator; // Full scan of the sstables public static ISSTableScanner getScanner(SSTableReader sstable, RateLimiter limiter) { - return new BigTableScanner(sstable, ColumnFilter.all(sstable.metadata), null, limiter, false, Iterators.singletonIterator(fullRange(sstable))); - return new BigTableScanner(sstable, limiter, Iterators.singletonIterator(fullRange(sstable))); ++ return new BigTableScanner(sstable, ++ ColumnFilter.all(sstable.metadata), ++ limiter, ++ Iterators.singletonIterator(fullRange(sstable))); } - public static ISSTableScanner getScanner(SSTableReader sstable, ColumnFilter columns, DataRange dataRange, RateLimiter limiter, boolean isForThrift) + public static ISSTableScanner getScanner(SSTableReader sstable, + ColumnFilter columns, + DataRange dataRange, + RateLimiter limiter, + boolean isForThrift, + SSTableReadsListener listener) { - return new BigTableScanner(sstable, columns, dataRange, limiter, isForThrift, makeBounds(sstable, dataRange).iterator()); - return new BigTableScanner(sstable, - columns, - dataRange, - limiter, - isForThrift, - makeBounds(sstable, dataRange).iterator(), - listener); ++ return new BigTableScanner(sstable, columns, dataRange, limiter, isForThrift, makeBounds(sstable, dataRange).iterator(), listener); } public static ISSTableScanner getScanner(SSTableReader sstable, Collection> tokenRanges, RateLimiter limiter) @@@ -85,15 -96,28 +95,32 @@@ if (positions.isEmpty()) return new EmptySSTableScanner(sstable); - return new BigTableScanner(sstable, ColumnFilter.all(sstable.metadata), null, limiter, false, makeBounds(sstable, tokenRanges).iterator()); - return new BigTableScanner(sstable, limiter, makeBounds(sstable, tokenRanges).iterator()); ++ return new BigTableScanner(sstable, ++ ColumnFilter.all(sstable.metadata), ++ limiter, ++ makeBounds(sstable, tokenRanges).iterator()); } public static ISSTableScanner getScanner(SSTableReader sstable, Iterator> rangeIterator) { - return new BigTableScanner(sstable, ColumnFilter.all(sstable.metadata), null, null, false, rangeIterator); - return new BigTableScanner(sstable, null, rangeIterator); ++ return new BigTableScanner(sstable, ColumnFilter.all(sstable.metadata), null, rangeIterator); } - private BigTableScanner(SSTableReader sstable, ColumnFilter columns, DataRange dataRange, RateLimiter limiter, boolean isForThrift, Iterator> rangeIterator) + private BigTableScanner(SSTableReader sstable, ++ ColumnFilter columns, + RateLimiter limiter, + Iterator> rangeIterator) + { - this(sstable, ColumnFilter.all(sstable.metadata), null, limiter, false, rangeIterator, SSTableReadsListener.NOOP_LISTENER); ++ this(sstable, columns, null, limiter, false, rangeIterator, SSTableReadsListener.NOOP_LISTENER); + } + + private BigTableScanner(SSTableReader sstable, + ColumnFilter columns, + DataRange dataRange, + RateLimiter limiter, + boolean isForThrift, + Iterator> rangeIterator, + SSTableReadsListener listener) { assert sstable != null; http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java ---------------------------------------------------------------------- diff --cc test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java index fd9f221,2cf518a..d363ecf --- a/test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java @@@ -71,422 -69,68 +70,488 @@@ public class SSTablesIteratedTest exten } @Test + public void testSinglePartitionQuery() throws Throwable + { + createTable("CREATE TABLE %s (pk int, c int, v text, PRIMARY KEY (pk, c))"); + + execute("INSERT INTO %s (pk, c, v) VALUES (?, ?, ?)", 1, 40, "41"); + execute("INSERT INTO %s (pk, c, v) VALUES (?, ?, ?)", 2, 10, "12"); + flush(); + + execute("INSERT INTO %s (pk, c, v) VALUES (?, ?, ?)", 1, 10, "11"); + execute("INSERT INTO %s (pk, c, v) VALUES (?, ?, ?)", 3, 30, "33"); + flush(); + + execute("INSERT INTO %s (pk, c, v) VALUES (?, ?, ?)", 1, 20, "21"); + execute("INSERT INTO %s (pk, c, v) VALUES (?, ?, ?)", 2, 40, "42"); + execute("UPDATE %s SET v = '12' WHERE pk = 2 AND c = 10"); + flush(); + + // Test with all the table being merged + executeAndCheck("SELECT * FROM %s WHERE pk = 1", 3, + row(1, 10, "11"), + row(1, 20, "21"), + row(1, 40, "41")); + + // Test with only 2 of the 3 SSTables being merged + executeAndCheck("SELECT * FROM %s WHERE pk = 2", 2, + row(2, 10, "12"), + row(2, 40, "42")); + + executeAndCheck("SELECT * FROM %s WHERE pk = 2 ORDER BY c DESC", 2, + row(2, 40, "42"), + row(2, 10, "12")); + + // Test with only 2 of the 3 SSTables being merged and a Slice filter + executeAndCheck("SELECT * FROM %s WHERE pk = 2 AND c > 20", 2, + row(2, 40, "42")); + + executeAndCheck("SELECT * FROM %s WHERE pk = 2 AND c > 20 ORDER BY c DESC", 2, + row(2, 40, "42")); + + // Test with only 2 of the 3 SSTables being merged and a Name filter + // This test checks the SinglePartitionReadCommand::queryMemtableAndSSTablesInTimestampOrder which is only + // used for ClusteringIndexNamesFilter when there are no multi-cell columns + executeAndCheck("SELECT * FROM %s WHERE pk = 2 AND c = 10", 2, + row(2, 10, "12")); + + // For partition range queries the metric must not be updated. The reason being that range queries simply + // scan all the SSTables containing data within the partition range. Due to that they might pollute the metric + // and give a wrong view of the system. + executeAndCheck("SELECT * FROM %s", 0, + row(1, 10, "11"), + row(1, 20, "21"), + row(1, 40, "41"), + row(2, 10, "12"), + row(2, 40, "42"), + row(3, 30, "33")); + + executeAndCheck("SELECT * FROM %s WHERE token(pk) = token(1)", 0, + row(1, 10, "11"), + row(1, 20, "21"), + row(1, 40, "41")); + + assertInvalidMessage("ORDER BY is only supported when the partition key is restricted by an EQ or an IN", + "SELECT * FROM %s WHERE token(pk) = token(1) ORDER BY C DESC"); + } -} ++ ++ @Test + public void testSSTablesOnlyASC() throws Throwable + { + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col ASC)"); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 10, "10"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 20, "20"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 30, "30"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 1, row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 2, row(1, 10, "10"), row(1, 20, "20")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 3", 3, row(1, 10, "10"), row(1, 20, "20"), row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1", 3, row(1, 10, "10"), row(1, 20, "20"), row(1, 30, "30")); + + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 25 LIMIT 1", 1, row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col < 40 LIMIT 1", 1, row(1, 10, "10")); + } + + @Test + public void testMixedMemtableSStablesASC() throws Throwable + { + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col ASC)"); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 30, "30"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 20, "20"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 10, "10"); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 0, row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 1, row(1, 10, "10"), row(1, 20, "20")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 3", 2, row(1, 10, "10"), row(1, 20, "20"), row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1", 2, row(1, 10, "10"), row(1, 20, "20"), row(1, 30, "30")); + + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 25 LIMIT 1", 1, row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col < 40 LIMIT 1", 0, row(1, 10, "10")); + } + + @Test + public void testOverlappingSStablesASC() throws Throwable + { + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col ASC)"); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 10, "10"); + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 30, "30"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 20, "20"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 1, row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 2, row(1, 10, "10"), row(1, 20, "20")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 3", 2, row(1, 10, "10"), row(1, 20, "20"), row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1", 2, row(1, 10, "10"), row(1, 20, "20"), row(1, 30, "30")); + + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 25 LIMIT 1", 1, row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col < 40 LIMIT 1", 1, row(1, 10, "10")); + } + + @Test + public void testSSTablesOnlyDESC() throws Throwable + { + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col DESC)"); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 10, "10"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 20, "20"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 30, "30"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 1, row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 2, row(1, 30, "30"), row(1, 20, "20")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 3", 3, row(1, 30, "30"), row(1, 20, "20"), row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1", 3, row(1, 30, "30"), row(1, 20, "20"), row(1, 10, "10")); + + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 25 LIMIT 1", 1, row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col < 40 LIMIT 1", 1, row(1, 30, "30")); + } + + @Test + public void testMixedMemtableSStablesDESC() throws Throwable + { + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col DESC)"); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 10, "10"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 20, "20"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 30, "30"); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 0, row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 1, row(1, 30, "30"), row(1, 20, "20")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 3", 2, row(1, 30, "30"), row(1, 20, "20"), row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1", 2, row(1, 30, "30"), row(1, 20, "20"), row(1, 10, "10")); + + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 25 LIMIT 1", 0, row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col < 40 LIMIT 1", 0, row(1, 30, "30")); + } + + @Test + public void testOverlappingSStablesDESC() throws Throwable + { + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col DESC)"); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 10, "10"); + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 30, "30"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 20, "20"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 1, row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 2, row(1, 30, "30"), row(1, 20, "20")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 3", 2, row(1, 30, "30"), row(1, 20, "20"), row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1", 2, row(1, 30, "30"), row(1, 20, "20"), row(1, 10, "10")); + + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 25 LIMIT 1", 1, row(1, 30, "30")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col < 40 LIMIT 1", 1, row(1, 30, "30")); + } + + @Test + public void testDeletionOnDifferentSSTables() throws Throwable + { + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col DESC)"); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 10, "10"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 20, "20"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 30, "30"); + flush(); + + execute("DELETE FROM %s WHERE id=1 and col=30"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 3, row(1, 20, "20")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 4, row(1, 20, "20"), row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 3", 4, row(1, 20, "20"), row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1", 4, row(1, 20, "20"), row(1, 10, "10")); + + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 25 LIMIT 1", 2); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col < 40 LIMIT 1", 3, row(1, 20, "20")); + } + + @Test + public void testDeletionOnSameSSTable() throws Throwable + { + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col DESC)"); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 10, "10"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 20, "20"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 30, "30"); + execute("DELETE FROM %s WHERE id=1 and col=30"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 2, row(1, 20, "20")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 3, row(1, 20, "20"), row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 3", 3, row(1, 20, "20"), row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1", 3, row(1, 20, "20"), row(1, 10, "10")); + + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 25 LIMIT 1", 1); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col < 40 LIMIT 1", 2, row(1, 20, "20")); + } + + @Test + public void testDeletionOnMemTable() throws Throwable + { + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col DESC)"); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 10, "10"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 20, "20"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 30, "30"); + execute("DELETE FROM %s WHERE id=1 and col=30"); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 1, row(1, 20, "20")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 2, row(1, 20, "20"), row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 3", 2, row(1, 20, "20"), row(1, 10, "10")); + executeAndCheck("SELECT * FROM %s WHERE id=1", 2, row(1, 20, "20"), row(1, 10, "10")); + + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 25 LIMIT 1", 0); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col < 40 LIMIT 1", 1, row(1, 20, "20")); + } + + @Test + public void testDeletionOnIndexedSSTableDESC() throws Throwable + { + testDeletionOnIndexedSSTableDESC(true); + testDeletionOnIndexedSSTableDESC(false); + } + + private void testDeletionOnIndexedSSTableDESC(boolean deleteWithRange) throws Throwable + { + // reduce the column index size so that columns get indexed during flush + DatabaseDescriptor.setColumnIndexSize(1); + + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col DESC)"); + + for (int i = 1; i <= 1000; i++) + { + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, i, Integer.toString(i)); + } + flush(); + + Object[][] allRows = new Object[1000][]; + for (int i = 1001; i <= 2000; i++) + { + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, i, Integer.toString(i)); + allRows[2000 - i] = row(1, i, Integer.toString(i)); + } + + if (deleteWithRange) + { + execute("DELETE FROM %s WHERE id=1 and col <= ?", 1000); + } + else + { + for (int i = 1; i <= 1000; i++) + execute("DELETE FROM %s WHERE id=1 and col = ?", i); + } + flush(); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 1, row(1, 2000, "2000")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 1, row(1, 2000, "2000"), row(1, 1999, "1999")); + + executeAndCheck("SELECT * FROM %s WHERE id=1", 2, allRows); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 1000 LIMIT 1", 1, row(1, 2000, "2000")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col <= 2000 LIMIT 1", 1, row(1, 2000, "2000")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 1000", 1, allRows); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col <= 2000", 2, allRows); + } + + @Test + public void testDeletionOnIndexedSSTableASC() throws Throwable + { + testDeletionOnIndexedSSTableASC(true); + testDeletionOnIndexedSSTableASC(false); + } + + private void testDeletionOnIndexedSSTableASC(boolean deleteWithRange) throws Throwable + { + // reduce the column index size so that columns get indexed during flush + DatabaseDescriptor.setColumnIndexSize(1); + + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col ASC)"); + + for (int i = 1; i <= 1000; i++) + { + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, i, Integer.toString(i)); + } + flush(); + + Object[][] allRows = new Object[1000][]; + for (int i = 1001; i <= 2000; i++) + { + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, i, Integer.toString(i)); + allRows[i - 1001] = row(1, i, Integer.toString(i)); + } + flush(); + + if (deleteWithRange) + { + execute("DELETE FROM %s WHERE id =1 and col <= ?", 1000); + } + else + { + for (int i = 1; i <= 1000; i++) + execute("DELETE FROM %s WHERE id=1 and col = ?", i); + } + flush(); + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 3, row(1, 1001, "1001")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 3, row(1, 1001, "1001"), row(1, 1002, "1002")); + + executeAndCheck("SELECT * FROM %s WHERE id=1", 3, allRows); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 1000 LIMIT 1", 2, row(1, 1001, "1001")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col <= 2000 LIMIT 1", 3, row(1, 1001, "1001")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 1000", 2, allRows); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col <= 2000", 3, allRows); + } + + @Test + public void testDeletionOnOverlappingIndexedSSTable() throws Throwable + { + testDeletionOnOverlappingIndexedSSTable(true); + testDeletionOnOverlappingIndexedSSTable(false); + } + + private void testDeletionOnOverlappingIndexedSSTable(boolean deleteWithRange) throws Throwable + { + // reduce the column index size so that columns get indexed during flush + DatabaseDescriptor.setColumnIndexSize(1); + + createTable("CREATE TABLE %s (id int, col int, val1 text, val2 text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col ASC)"); + + for (int i = 1; i <= 500; i++) + { + if (i % 2 == 0) + execute("INSERT INTO %s (id, col, val1) VALUES (?, ?, ?)", 1, i, Integer.toString(i)); + else + execute("INSERT INTO %s (id, col, val1, val2) VALUES (?, ?, ?, ?)", 1, i, Integer.toString(i), Integer.toString(i)); + } + + for (int i = 1001; i <= 1500; i++) + { + if (i % 2 == 0) + execute("INSERT INTO %s (id, col, val1) VALUES (?, ?, ?)", 1, i, Integer.toString(i)); + else + execute("INSERT INTO %s (id, col, val1, val2) VALUES (?, ?, ?, ?)", 1, i, Integer.toString(i), Integer.toString(i)); + } + + flush(); + + for (int i = 501; i <= 1000; i++) + { + if (i % 2 == 0) + execute("INSERT INTO %s (id, col, val1) VALUES (?, ?, ?)", 1, i, Integer.toString(i)); + else + execute("INSERT INTO %s (id, col, val1, val2) VALUES (?, ?, ?, ?)", 1, i, Integer.toString(i), Integer.toString(i)); + } + + for (int i = 1501; i <= 2000; i++) + { + if (i % 2 == 0) + execute("INSERT INTO %s (id, col, val1) VALUES (?, ?, ?)", 1, i, Integer.toString(i)); + else + execute("INSERT INTO %s (id, col, val1, val2) VALUES (?, ?, ?, ?)", 1, i, Integer.toString(i), Integer.toString(i)); + } + + if (deleteWithRange) + { + execute("DELETE FROM %s WHERE id=1 and col > ? and col <= ?", 250, 750); + } + else + { + for (int i = 251; i <= 750; i++) + execute("DELETE FROM %s WHERE id=1 and col = ?", i); + } + + flush(); + + Object[][] allRows = new Object[1500][]; // non deleted rows + for (int i = 1; i <= 2000; i++) + { + if (i > 250 && i <= 750) + continue; // skip deleted records + + int idx = (i <= 250 ? i - 1 : i - 501); + + if (i % 2 == 0) + allRows[idx] = row(1, i, Integer.toString(i), null); + else + allRows[idx] = row(1, i, Integer.toString(i), Integer.toString(i)); + } + + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 1", 2, row(1, 1, "1", "1")); + executeAndCheck("SELECT * FROM %s WHERE id=1 LIMIT 2", 2, row(1, 1, "1", "1"), row(1, 2, "2", null)); + + executeAndCheck("SELECT * FROM %s WHERE id=1", 2, allRows); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 1000 LIMIT 1", 2, row(1, 1001, "1001", "1001")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col <= 2000 LIMIT 1", 2, row(1, 1, "1", "1")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col > 500 LIMIT 1", 2, row(1, 751, "751", "751")); + executeAndCheck("SELECT * FROM %s WHERE id=1 AND col <= 500 LIMIT 1", 2, row(1, 1, "1", "1")); + } + + @Test + public void testMultiplePartitionsDESC() throws Throwable + { + createTable("CREATE TABLE %s (id int, col int, val text, PRIMARY KEY (id, col)) WITH CLUSTERING ORDER BY (col DESC)"); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 10, "10"); + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 2, 10, "10"); + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 3, 10, "10"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 20, "20"); + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 2, 20, "20"); + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 3, 20, "20"); + flush(); + + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 1, 30, "30"); + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 2, 30, "30"); + execute("INSERT INTO %s (id, col, val) VALUES (?, ?, ?)", 3, 30, "30"); + flush(); + + for (int i = 1; i <= 3; i++) + { + String base = "SELECT * FROM %s "; + + executeAndCheck(base + String.format("WHERE id=%d LIMIT 1", i), 1, row(i, 30, "30")); + executeAndCheck(base + String.format("WHERE id=%d LIMIT 2", i), 2, row(i, 30, "30"), row(i, 20, "20")); + executeAndCheck(base + String.format("WHERE id=%d LIMIT 3", i), 3, row(i, 30, "30"), row(i, 20, "20"), row(i, 10, "10")); + executeAndCheck(base + String.format("WHERE id=%d", i), 3, row(i, 30, "30"), row(i, 20, "20"), row(i, 10, "10")); + + executeAndCheck(base + String.format("WHERE id=%d AND col > 25 LIMIT 1", i), 1, row(i, 30, "30")); + executeAndCheck(base + String.format("WHERE id=%d AND col < 40 LIMIT 1", i), 1, row(i, 30, "30")); + } + } +} http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/test/unit/org/apache/cassandra/db/compaction/TTLExpiryTest.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/test/unit/org/apache/cassandra/io/sstable/SSTableCorruptionDetectionTest.java ---------------------------------------------------------------------- diff --cc test/unit/org/apache/cassandra/io/sstable/SSTableCorruptionDetectionTest.java index 88ed52e,451af25..f7ced23 --- a/test/unit/org/apache/cassandra/io/sstable/SSTableCorruptionDetectionTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/SSTableCorruptionDetectionTest.java @@@ -210,7 -208,11 +211,12 @@@ public class SSTableCorruptionDetection for (int i = 0; i < numberOfPks; i++) { DecoratedKey dk = Util.dk(String.format("pkvalue_%07d", i)); - try (UnfilteredRowIterator rowIter = sstable.iterator(dk, Slices.ALL, ColumnFilter.all(cfs.metadata), false, false)) + try (UnfilteredRowIterator rowIter = sstable.iterator(dk, ++ Slices.ALL, + ColumnFilter.all(cfs.metadata), + false, + false, + SSTableReadsListener.NOOP_LISTENER)) { while (rowIter.hasNext()) { http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/test/unit/org/apache/cassandra/io/sstable/SSTableScannerTest.java ---------------------------------------------------------------------- diff --cc test/unit/org/apache/cassandra/io/sstable/SSTableScannerTest.java index d73c278,cf57b17..d1db09a --- a/test/unit/org/apache/cassandra/io/sstable/SSTableScannerTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/SSTableScannerTest.java @@@ -182,7 -183,10 +183,7 @@@ public class SSTableScannerTes assert boundaries.length % 2 == 0; for (DataRange range : dataRanges(sstable.metadata, scanStart, scanEnd)) { - try(ISSTableScanner scanner = sstable.getScanner(ColumnFilter.all(sstable.metadata), range, false)) - try(ISSTableScanner scanner = sstable.getScanner(ColumnFilter.all(sstable.metadata), - range, - false, - SSTableReadsListener.NOOP_LISTENER)) ++ try(ISSTableScanner scanner = sstable.getScanner(ColumnFilter.all(sstable.metadata), range, false, SSTableReadsListener.NOOP_LISTENER)) { for (int b = 0; b < boundaries.length; b += 2) for (int i = boundaries[b]; i <= boundaries[b + 1]; i++) http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/test/unit/org/apache/cassandra/io/sstable/SSTableWriterTest.java ---------------------------------------------------------------------- diff --cc test/unit/org/apache/cassandra/io/sstable/SSTableWriterTest.java index 1c7d61f,e714c60..391927c --- a/test/unit/org/apache/cassandra/io/sstable/SSTableWriterTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/SSTableWriterTest.java @@@ -223,7 -224,11 +224,12 @@@ public class SSTableWriterTest extends try { DecoratedKey dk = Util.dk("large_value"); - UnfilteredRowIterator rowIter = sstable.iterator(dk, Slices.ALL, ColumnFilter.all(cfs.metadata), false, false); + UnfilteredRowIterator rowIter = sstable.iterator(dk, ++ Slices.ALL, + ColumnFilter.all(cfs.metadata), + false, + false, + SSTableReadsListener.NOOP_LISTENER); while (rowIter.hasNext()) { rowIter.next(); http://git-wip-us.apache.org/repos/asf/cassandra/blob/5c9db9af/test/unit/org/apache/cassandra/io/sstable/format/ClientModeSSTableTest.java ---------------------------------------------------------------------- diff --cc test/unit/org/apache/cassandra/io/sstable/format/ClientModeSSTableTest.java index 7a741f9,48a8af5..d86a44f --- a/test/unit/org/apache/cassandra/io/sstable/format/ClientModeSSTableTest.java +++ b/test/unit/org/apache/cassandra/io/sstable/format/ClientModeSSTableTest.java @@@ -105,7 -107,11 +105,12 @@@ public class ClientModeSSTableTes ByteBuffer key = bytes(Integer.toString(100)); - try (UnfilteredRowIterator iter = reader.iterator(metadata.decorateKey(key), Slices.ALL, ColumnFilter.selection(metadata.partitionColumns()), false, false)) - try (SliceableUnfilteredRowIterator iter = reader.iterator(metadata.decorateKey(key), - ColumnFilter.selection(metadata.partitionColumns()), - false, - false, - SSTableReadsListener.NOOP_LISTENER)) ++ try (UnfilteredRowIterator iter = reader.iterator(metadata.decorateKey(key), ++ Slices.ALL, ++ ColumnFilter.selection(metadata.partitionColumns()), ++ false, ++ false, ++ SSTableReadsListener.NOOP_LISTENER)) { assert iter.next().clustering().get(0).equals(key); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org For additional commands, e-mail: commits-help@cassandra.apache.org