cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From slebre...@apache.org
Subject [1/3] cassandra git commit: Possible AssertionError in UnfilteredRowIteratorWithLowerBound
Date Thu, 23 Mar 2017 09:30:11 GMT
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-3.11 ec9ce3dfb -> f55cb88ab
  refs/heads/trunk 8b74ae4b6 -> a87b15d1d


Possible AssertionError in UnfilteredRowIteratorWithLowerBound

patch by Sylvain Lebresne; reviewed by Stefania for CASSANDRA-13366


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/f55cb88a
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/f55cb88a
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/f55cb88a

Branch: refs/heads/cassandra-3.11
Commit: f55cb88ab595ccb941ebb4a088ab90f860f463d5
Parents: ec9ce3d
Author: Sylvain Lebresne <sylvain@datastax.com>
Authored: Wed Mar 22 15:41:49 2017 +0100
Committer: Sylvain Lebresne <sylvain@datastax.com>
Committed: Thu Mar 23 10:26:37 2017 +0100

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../db/SinglePartitionReadCommand.java          |  4 +--
 .../db/compaction/CompactionController.java     |  2 +-
 .../UnfilteredRowIteratorWithLowerBound.java    | 31 ++++++++++++++---
 .../io/sstable/format/SSTableReader.java        | 35 ++++++++------------
 5 files changed, 44 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/f55cb88a/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 8386c20..f4e48ff 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.11.0
+ * Possible AssertionError in UnfilteredRowIteratorWithLowerBound (CASSANDRA-13366)
  * Support unaligned memory access for AArch64 (CASSANDRA-13326)
  * Improve SASI range iterator efficiency on intersection with an empty range (CASSANDRA-12915).
  * Fix equality comparisons of columns using the duration type (CASSANDRA-13174)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f55cb88a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
index f6d10f5..724f59e 100644
--- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
+++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java
@@ -584,7 +584,7 @@ public class SinglePartitionReadCommand extends ReadCommand
                 if (!shouldInclude(sstable))
                 {
                     nonIntersectingSSTables++;
-                    if (sstable.hasTombstones())
+                    if (sstable.mayHaveTombstones())
                     { // if sstable has tombstones we need to check after one pass if it
can be safely skipped
                         if (skippedSSTablesWithTombstones == null)
                             skippedSSTablesWithTombstones = new ArrayList<>();
@@ -773,7 +773,7 @@ public class SinglePartitionReadCommand extends ReadCommand
                 // however: if it is set, it impacts everything and must be included. Getting
that top-level partition deletion costs us
                 // some seek in general however (unless the partition is indexed and is in
the key cache), so we first check if the sstable
                 // has any tombstone at all as a shortcut.
-                if (!sstable.hasTombstones())
+                if (!sstable.mayHaveTombstones())
                     continue; // no tombstone at all, we can skip that sstable
 
                 // We need to get the partition deletion and include it if it's live. In
any case though, we're done with that sstable.

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f55cb88a/src/java/org/apache/cassandra/db/compaction/CompactionController.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionController.java b/src/java/org/apache/cassandra/db/compaction/CompactionController.java
index 64c35d9..bf3647a 100644
--- a/src/java/org/apache/cassandra/db/compaction/CompactionController.java
+++ b/src/java/org/apache/cassandra/db/compaction/CompactionController.java
@@ -297,7 +297,7 @@ public class CompactionController implements AutoCloseable
     {
         if (reader.isMarkedSuspect() ||
             reader.getMaxTimestamp() <= minTimestamp ||
-            tombstoneOnly && !reader.hasTombstones())
+            tombstoneOnly && !reader.mayHaveTombstones())
             return null;
         RowIndexEntry<?> position = reader.getPosition(key, SSTableReader.Operator.EQ);
         if (position == null)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f55cb88a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java
b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java
index 14730ac..4536036 100644
--- a/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java
+++ b/src/java/org/apache/cassandra/db/rows/UnfilteredRowIteratorWithLowerBound.java
@@ -159,7 +159,7 @@ public class UnfilteredRowIteratorWithLowerBound extends LazilyInitializedUnfilt
     @Override
     public DeletionTime partitionLevelDeletion()
     {
-        if (!sstable.hasTombstones())
+        if (!sstable.mayHaveTombstones())
             return DeletionTime.LIVE;
 
         return super.partitionLevelDeletion();
@@ -210,13 +210,34 @@ public class UnfilteredRowIteratorWithLowerBound extends LazilyInitializedUnfilt
     }
 
     /**
-     * @return true if we can use the clustering values in the stats of the sstable:
-     * - we need the latest stats file format (or else the clustering values create clusterings
with the wrong size)
-     * - we cannot create tombstone bounds from these values only and so we rule out sstables
with tombstones
+     * Whether we can use the clustering values in the stats of the sstable to build the
lower bound.
+     * <p>
+     * Currently, the clustering values of the stats file records for each clustering component
the min and max
+     * value seen, null excluded. In other words, having a non-null value for a component
in those min/max clustering
+     * values does _not_ guarantee that there isn't an unfiltered in the sstable whose clustering
has either no value for
+     * that component (it's a prefix) or a null value.
+     * <p>
+     * This is problematic as this means we can't in general build a lower bound from those
values since the "min"
+     * values doesn't actually guarantee minimality.
+     * <p>
+     * However, we can use those values if we can guarantee that no clustering in the sstable
1) is a true prefix and
+     * 2) uses null values. Nat having true prefixes means having no range tombstone markers
since rows use
+     * {@link Clustering} which is always "full" (all components are always present). As
for null values, we happen to
+     * only allow those in compact tables (for backward compatibility), so we can simply
exclude those tables.
+     * <p>
+     * Note that the information we currently have at our disposal make this condition less
precise that it could be.
+     * In particular, {@link SSTableReader#mayHaveTombstones} could return {@code true} (making
us not use the stats)
+     * because of cell tombstone or even expiring cells even if the sstable has no range
tombstone markers, even though
+     * it's really only markers we want to exclude here (more precisely, as said above, we
want to exclude anything
+     * whose clustering is not "full", but that's only markers). It wouldn't be very hard
to collect whether a sstable
+     * has any range tombstone marker however so it's a possible improvement.
      */
     private boolean canUseMetadataLowerBound()
     {
-        return !sstable.hasTombstones() && sstable.descriptor.version.hasNewStatsFile();
+        // Side-note: pre-2.1 sstable stat file had clustering value arrays whose size may
not match the comparator size
+        // and that would break getMetadataLowerBound. We don't support upgrade from 2.0
to 3.0 directly however so it's
+        // not a true concern. Besides, !sstable.mayHaveTombstones already ensure this is
a 3.0 sstable anyway.
+        return !sstable.mayHaveTombstones() && !sstable.metadata.isCompactTable();
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/cassandra/blob/f55cb88a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java
index 56609b3..321abc7 100644
--- a/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java
+++ b/src/java/org/apache/cassandra/io/sstable/format/SSTableReader.java
@@ -51,6 +51,7 @@ import org.apache.cassandra.config.Schema;
 import org.apache.cassandra.config.SchemaConstants;
 import org.apache.cassandra.db.*;
 import org.apache.cassandra.db.filter.ColumnFilter;
+import org.apache.cassandra.db.rows.Cell;
 import org.apache.cassandra.db.rows.EncodingStats;
 import org.apache.cassandra.db.rows.UnfilteredRowIterator;
 import org.apache.cassandra.dht.AbstractBounds;
@@ -1881,12 +1882,20 @@ public abstract class SSTableReader extends SSTable implements SelfRefCounted<SS
         return sstableMetadata.maxLocalDeletionTime;
     }
 
-    /** sstable contains no tombstones if minLocalDeletionTime == Integer.MAX_VALUE */
-    public boolean hasTombstones()
+    /**
+     * Whether the sstable may contain tombstones or if it is guaranteed to not contain any.
+     * <p>
+     * Note that having that method return {@code false} guarantees the sstable has no tombstones
whatsoever (so no
+     * cell tombstone, no range tombstone maker and no expiring columns), but having it return
{@code true} doesn't
+     * guarantee it contains any as 1) it may simply have non-expired cells and 2) old-format
sstables didn't contain
+     * enough information to decide this and so always return {@code true}.
+     */
+    public boolean mayHaveTombstones()
     {
-        // sstable contains no tombstone if minLocalDeletionTime is still set to  the default
value Integer.MAX_VALUE
-        // which is bigger than any valid deletion times
-        return getMinLocalDeletionTime() != Integer.MAX_VALUE;
+        // A sstable is guaranteed to have no tombstones if it properly tracked the minLocalDeletionTime
(which we only
+        // do since 3.0 - see CASSANDRA-13366) and that value is still set to its default,
Cell.NO_DELETION_TIME, which
+        // is bigger than any valid deletion times.
+        return !descriptor.version.storeRows() || getMinLocalDeletionTime() != Cell.NO_DELETION_TIME;
     }
 
     public int getMinTTL()
@@ -2008,22 +2017,6 @@ public abstract class SSTableReader extends SSTable implements SelfRefCounted<SS
             readMeter.mark();
     }
 
-    /**
-     * Checks if this sstable can overlap with another one based on the min/man clustering
values.
-     * If this methods return false, we're guarantee that {@code this} and {@code other}
have no overlapping
-     * data, i.e. no cells to reconcile.
-     */
-    public boolean mayOverlapsWith(SSTableReader other)
-    {
-        StatsMetadata m1 = getSSTableMetadata();
-        StatsMetadata m2 = other.getSSTableMetadata();
-
-        if (m1.minClusteringValues.isEmpty() || m1.maxClusteringValues.isEmpty() || m2.minClusteringValues.isEmpty()
|| m2.maxClusteringValues.isEmpty())
-            return true;
-
-        return !(compare(m1.maxClusteringValues, m2.minClusteringValues) < 0 || compare(m1.minClusteringValues,
m2.maxClusteringValues) > 0);
-    }
-
     private int compare(List<ByteBuffer> values1, List<ByteBuffer> values2)
     {
         ClusteringComparator comparator = metadata.comparator;


Mime
View raw message