cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From slebre...@apache.org
Subject [2/6] cassandra git commit: Fix problem with undeleteable rows on upgrade to new sstable format.
Date Tue, 19 Jul 2016 10:07:18 GMT
Fix problem with undeleteable rows on upgrade to new sstable format.

Patch by Alex Petrov; reviewed by Sylvain Lebresne for CASSANDRA-12144.


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/c481e8dc
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/c481e8dc
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/c481e8dc

Branch: refs/heads/cassandra-3.9
Commit: c481e8dc84c713bda21724368094850ff9150011
Parents: 557c596
Author: Alex Petrov <oleksandr.petrov@gmail.com>
Authored: Mon Jul 18 18:22:48 2016 +0200
Committer: Sylvain Lebresne <sylvain@datastax.com>
Committed: Tue Jul 19 11:57:09 2016 +0200

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../org/apache/cassandra/db/LegacyLayout.java   |  19 +++-
 .../cassandra/db/compaction/Scrubber.java       |  49 +++++++++--
 .../io/sstable/SSTableIdentityIterator.java     |   9 +-
 .../lb-1-big-CompressionInfo.db                 | Bin 0 -> 43 bytes
 .../cf_with_duplicates_2_0/lb-1-big-Data.db     | Bin 0 -> 84 bytes
 .../lb-1-big-Digest.adler32                     |   1 +
 .../cf_with_duplicates_2_0/lb-1-big-Filter.db   | Bin 0 -> 16 bytes
 .../cf_with_duplicates_2_0/lb-1-big-Index.db    | Bin 0 -> 18 bytes
 .../lb-1-big-Statistics.db                      | Bin 0 -> 4474 bytes
 .../cf_with_duplicates_2_0/lb-1-big-Summary.db  | Bin 0 -> 84 bytes
 .../cf_with_duplicates_2_0/lb-1-big-TOC.txt     |   8 ++
 .../mb-3-big-CompressionInfo.db                 | Bin 0 -> 51 bytes
 .../cf_with_duplicates_3_0/mb-3-big-Data.db     | Bin 0 -> 72 bytes
 .../mb-3-big-Digest.crc32                       |   1 +
 .../cf_with_duplicates_3_0/mb-3-big-Filter.db   | Bin 0 -> 16 bytes
 .../cf_with_duplicates_3_0/mb-3-big-Index.db    | Bin 0 -> 8 bytes
 .../mb-3-big-Statistics.db                      | Bin 0 -> 4664 bytes
 .../cf_with_duplicates_3_0/mb-3-big-Summary.db  | Bin 0 -> 56 bytes
 .../cf_with_duplicates_3_0/mb-3-big-TOC.txt     |   8 ++
 .../unit/org/apache/cassandra/db/ScrubTest.java |  88 ++++++++++++++++++-
 21 files changed, 173 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 59f0a5f..f205e0b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0.9
+ * Fix problem with undeleteable rows on upgrade to new sstable format (CASSANDRA-12144)
  * Fix paging logic for deleted partitions with static columns (CASSANDRA-12107)
  * Wait until the message is being send to decide which serializer must be used (CASSANDRA-11393)
  * Fix migration of static thrift column names with non-text comparators (CASSANDRA-12147)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/src/java/org/apache/cassandra/db/LegacyLayout.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/LegacyLayout.java b/src/java/org/apache/cassandra/db/LegacyLayout.java
index 495a12a..3feb1f4 100644
--- a/src/java/org/apache/cassandra/db/LegacyLayout.java
+++ b/src/java/org/apache/cassandra/db/LegacyLayout.java
@@ -1205,9 +1205,26 @@ public abstract class LegacyLayout
         {
             if (tombstone.isRowDeletion(metadata))
             {
-                // If we're already within a row, it can't be the same one
                 if (clustering != null)
+                {
+                    // If we're already in the row, there might be a chance that there were
two range tombstones
+                    // written, as 2.x storage format does not guarantee just one range tombstone,
unlike 3.x.
+                    // We have to make sure that clustering matches, which would mean that
tombstone is for the
+                    // same row.
+                    if (rowDeletion != null && clustering.equals(tombstone.start.getAsClustering(metadata)))
+                    {
+                        // If the tombstone superceeds the previous delete, we discard the
previous one
+                        if (tombstone.deletionTime.supersedes(rowDeletion.deletionTime))
+                        {
+                            builder.addRowDeletion(Row.Deletion.regular(tombstone.deletionTime));
+                            rowDeletion = tombstone;
+                        }
+                        return true;
+                    }
+
+                    // If we're already within a row and there was no delete written before
that one, it can't be the same one
                     return false;
+                }
 
                 clustering = tombstone.start.getAsClustering(metadata);
                 builder.newRow(clustering);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/src/java/org/apache/cassandra/db/compaction/Scrubber.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/compaction/Scrubber.java b/src/java/org/apache/cassandra/db/compaction/Scrubber.java
index d824d04..539c4c7 100644
--- a/src/java/org/apache/cassandra/db/compaction/Scrubber.java
+++ b/src/java/org/apache/cassandra/db/compaction/Scrubber.java
@@ -34,10 +34,7 @@ import org.apache.cassandra.io.sstable.format.SSTableWriter;
 import org.apache.cassandra.io.util.FileUtils;
 import org.apache.cassandra.io.util.RandomAccessReader;
 import org.apache.cassandra.service.ActiveRepairService;
-import org.apache.cassandra.utils.ByteBufferUtil;
-import org.apache.cassandra.utils.JVMStabilityInspector;
-import org.apache.cassandra.utils.OutputHandler;
-import org.apache.cassandra.utils.UUIDGen;
+import org.apache.cassandra.utils.*;
 
 public class Scrubber implements Closeable
 {
@@ -216,7 +213,7 @@ public class Scrubber implements Closeable
                     if (indexFile != null && dataStart != dataStartFromIndex)
                         outputHandler.warn(String.format("Data file row position %d differs
from index file row position %d", dataStart, dataStartFromIndex));
 
-                    try (UnfilteredRowIterator iterator = withValidation(new SSTableIdentityIterator(sstable,
dataFile, key), dataFile.getPath()))
+                    try (UnfilteredRowIterator iterator = withValidation(new RowMergingSSTableIterator(sstable,
dataFile, key), dataFile.getPath()))
                     {
                         if (prevKey != null && prevKey.compareTo(key) > 0)
                         {
@@ -470,4 +467,46 @@ public class Scrubber implements Closeable
             this.emptyRows = scrubber.emptyRows;
         }
     }
+
+    /**
+     * During 2.x migration, under some circumstances rows might have gotten duplicated.
+     * Merging iterator merges rows with same clustering.
+     *
+     * For more details, refer to CASSANDRA-12144.
+     */
+    private static class RowMergingSSTableIterator extends SSTableIdentityIterator
+    {
+        RowMergingSSTableIterator(SSTableReader sstable, RandomAccessReader file, DecoratedKey
key)
+        {
+            super(sstable, file, key);
+        }
+
+        @Override
+        protected Unfiltered doCompute()
+        {
+            if (!iterator.hasNext())
+                return endOfData();
+
+            Unfiltered next = iterator.next();
+            if (!next.isRow())
+                return next;
+
+            while (iterator.hasNext())
+            {
+                Unfiltered peek = iterator.peek();
+                // If there was a duplicate row, merge it.
+                if (next.clustering().equals(peek.clustering()) && peek.isRow())
+                {
+                    iterator.next(); // Make sure that the peeked item was consumed.
+                    next = Rows.merge((Row) next, (Row) peek, FBUtilities.nowInSeconds());
+                }
+                else
+                {
+                    break;
+                }
+            }
+
+            return next;
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
index 6fbc690..a5af334 100644
--- a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
+++ b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java
@@ -34,7 +34,7 @@ public class SSTableIdentityIterator extends AbstractIterator<Unfiltered>
implem
     private final DeletionTime partitionLevelDeletion;
     private final String filename;
 
-    private final SSTableSimpleIterator iterator;
+    protected final SSTableSimpleIterator iterator;
     private final Row staticRow;
 
     /**
@@ -97,7 +97,7 @@ public class SSTableIdentityIterator extends AbstractIterator<Unfiltered>
implem
     {
         try
         {
-            return iterator.hasNext() ? iterator.next() : endOfData();
+            return doCompute();
         }
         catch (IndexOutOfBoundsException e)
         {
@@ -118,6 +118,11 @@ public class SSTableIdentityIterator extends AbstractIterator<Unfiltered>
implem
         }
     }
 
+    protected Unfiltered doCompute()
+    {
+        return iterator.hasNext() ? iterator.next() : endOfData();
+    }
+
     public void close()
     {
         // creator is responsible for closing file when finished

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-CompressionInfo.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-CompressionInfo.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-CompressionInfo.db
new file mode 100644
index 0000000..307eeb3
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-CompressionInfo.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Data.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Data.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Data.db
new file mode 100644
index 0000000..175a5b6
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Data.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Digest.adler32
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Digest.adler32
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Digest.adler32
new file mode 100644
index 0000000..ad624d2
--- /dev/null
+++ b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Digest.adler32
@@ -0,0 +1 @@
+408097082
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Filter.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Filter.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Filter.db
new file mode 100644
index 0000000..00a88b4
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Filter.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Index.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Index.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Index.db
new file mode 100644
index 0000000..c3b42d8
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Index.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Statistics.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Statistics.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Statistics.db
new file mode 100644
index 0000000..056cf17
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Statistics.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Summary.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Summary.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Summary.db
new file mode 100644
index 0000000..453753f
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-Summary.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-TOC.txt
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-TOC.txt
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-TOC.txt
new file mode 100644
index 0000000..ceb1dab
--- /dev/null
+++ b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_2_0/lb-1-big-TOC.txt
@@ -0,0 +1,8 @@
+CompressionInfo.db
+Digest.adler32
+TOC.txt
+Filter.db
+Data.db
+Index.db
+Statistics.db
+Summary.db

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-CompressionInfo.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-CompressionInfo.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-CompressionInfo.db
new file mode 100644
index 0000000..3c39b5d
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-CompressionInfo.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Data.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Data.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Data.db
new file mode 100644
index 0000000..1f90815
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Data.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Digest.crc32
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Digest.crc32
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Digest.crc32
new file mode 100644
index 0000000..eeb8a5f
--- /dev/null
+++ b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Digest.crc32
@@ -0,0 +1 @@
+3332428483
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Filter.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Filter.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Filter.db
new file mode 100644
index 0000000..f9c2d6e
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Filter.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Index.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Index.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Index.db
new file mode 100644
index 0000000..b077026
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Index.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Statistics.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Statistics.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Statistics.db
new file mode 100644
index 0000000..0b49b88
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Statistics.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Summary.db
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Summary.db
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Summary.db
new file mode 100644
index 0000000..4547a94
Binary files /dev/null and b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-Summary.db
differ

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-TOC.txt
----------------------------------------------------------------------
diff --git a/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-TOC.txt
b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-TOC.txt
new file mode 100644
index 0000000..9a29338
--- /dev/null
+++ b/test/data/invalid-legacy-sstables/Keyspace1/cf_with_duplicates_3_0/mb-3-big-TOC.txt
@@ -0,0 +1,8 @@
+Statistics.db
+Digest.crc32
+Summary.db
+Index.db
+TOC.txt
+CompressionInfo.db
+Filter.db
+Data.db

http://git-wip-us.apache.org/repos/asf/cassandra/blob/c481e8dc/test/unit/org/apache/cassandra/db/ScrubTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/ScrubTest.java b/test/unit/org/apache/cassandra/db/ScrubTest.java
index 936ccd8..f97d9a9 100644
--- a/test/unit/org/apache/cassandra/db/ScrubTest.java
+++ b/test/unit/org/apache/cassandra/db/ScrubTest.java
@@ -20,6 +20,9 @@ package org.apache.cassandra.db;
 
 import java.io.*;
 import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.*;
 import java.util.concurrent.ExecutionException;
 
@@ -43,14 +46,12 @@ import org.apache.cassandra.db.marshal.UUIDType;
 import org.apache.cassandra.db.partitions.Partition;
 import org.apache.cassandra.db.partitions.PartitionUpdate;
 import org.apache.cassandra.db.rows.EncodingStats;
-import org.apache.cassandra.dht.ByteOrderedPartitioner;
-import org.apache.cassandra.dht.IPartitioner;
+import org.apache.cassandra.dht.*;
 import org.apache.cassandra.exceptions.ConfigurationException;
 import org.apache.cassandra.exceptions.RequestExecutionException;
 import org.apache.cassandra.exceptions.WriteTimeoutException;
 import org.apache.cassandra.io.compress.CompressionMetadata;
 import org.apache.cassandra.io.sstable.*;
-import org.apache.cassandra.io.sstable.format.SSTableFormat;
 import org.apache.cassandra.io.sstable.format.SSTableReader;
 import org.apache.cassandra.io.sstable.format.SSTableWriter;
 import org.apache.cassandra.io.sstable.format.big.BigTableWriter;
@@ -68,6 +69,8 @@ import static org.junit.Assume.assumeTrue;
 @RunWith(OrderedJUnit4ClassRunner.class)
 public class ScrubTest
 {
+    public static final String INVALID_LEGACY_SSTABLE_ROOT_PROP = "invalid-legacy-sstable-root";
+
     public static final String KEYSPACE = "Keyspace1";
     public static final String CF = "Standard1";
     public static final String CF2 = "Standard2";
@@ -661,4 +664,83 @@ public class ScrubTest
             return dataFile.position();
         }
     }
+
+    /**
+     * Tests with invalid sstables (containing duplicate entries in 2.0 and 3.0 storage format),
+     * that were caused by upgrading from 2.x with duplicate range tombstones.
+     *
+     * See CASSANDRA-12144 for details.
+     */
+    @Test
+    public void testFilterOutDuplicates() throws Exception
+    {
+        DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+        QueryProcessor.process(String.format("CREATE TABLE \"%s\".cf_with_duplicates_3_0
(a int, b int, c int, PRIMARY KEY (a, b))", KEYSPACE), ConsistencyLevel.ONE);
+
+        Keyspace keyspace = Keyspace.open(KEYSPACE);
+        ColumnFamilyStore cfs = keyspace.getColumnFamilyStore("cf_with_duplicates_3_0");
+
+        Path legacySSTableRoot = Paths.get(System.getProperty(INVALID_LEGACY_SSTABLE_ROOT_PROP),
+                                           "Keyspace1",
+                                           "cf_with_duplicates_3_0");
+
+        for (String filename : new String[]{ "mb-3-big-CompressionInfo.db",
+                                             "mb-3-big-Digest.crc32",
+                                             "mb-3-big-Index.db",
+                                             "mb-3-big-Summary.db",
+                                             "mb-3-big-Data.db",
+                                             "mb-3-big-Filter.db",
+                                             "mb-3-big-Statistics.db",
+                                             "mb-3-big-TOC.txt" })
+        {
+            Files.copy(Paths.get(legacySSTableRoot.toString(), filename), cfs.getDirectories().getDirectoryForNewSSTables().toPath().resolve(filename));
+        }
+
+        cfs.loadNewSSTables();
+
+        cfs.scrub(true, true, true, 1);
+
+        UntypedResultSet rs = QueryProcessor.executeInternal(String.format("SELECT * FROM
\"%s\".cf_with_duplicates_3_0", KEYSPACE));
+        assertEquals(1, rs.size());
+        QueryProcessor.executeInternal(String.format("DELETE FROM \"%s\".cf_with_duplicates_3_0
WHERE a=1 AND b =2", KEYSPACE));
+        rs = QueryProcessor.executeInternal(String.format("SELECT * FROM \"%s\".cf_with_duplicates_3_0",
KEYSPACE));
+        assertEquals(0, rs.size());
+    }
+
+    @Test
+    public void testUpgradeSstablesWithDuplicates() throws Exception
+    {
+        DatabaseDescriptor.setPartitionerUnsafe(Murmur3Partitioner.instance);
+        String cf = "cf_with_duplicates_2_0";
+        QueryProcessor.process(String.format("CREATE TABLE \"%s\".%s (a int, b int, c int,
PRIMARY KEY (a, b))", KEYSPACE, cf), ConsistencyLevel.ONE);
+
+        Keyspace keyspace = Keyspace.open(KEYSPACE);
+        ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(cf);
+
+        Path legacySSTableRoot = Paths.get(System.getProperty(INVALID_LEGACY_SSTABLE_ROOT_PROP),
+                                           "Keyspace1",
+                                           cf);
+
+        for (String filename : new String[]{ "lb-1-big-CompressionInfo.db",
+                                             "lb-1-big-Data.db",
+                                             "lb-1-big-Digest.adler32",
+                                             "lb-1-big-Filter.db",
+                                             "lb-1-big-Index.db",
+                                             "lb-1-big-Statistics.db",
+                                             "lb-1-big-Summary.db",
+                                             "lb-1-big-TOC.txt" })
+        {
+            Files.copy(Paths.get(legacySSTableRoot.toString(), filename), cfs.getDirectories().getDirectoryForNewSSTables().toPath().resolve(filename));
+        }
+
+        cfs.loadNewSSTables();
+
+        cfs.sstablesRewrite(true, 1);
+
+        UntypedResultSet rs = QueryProcessor.executeInternal(String.format("SELECT * FROM
\"%s\".%s", KEYSPACE, cf));
+        assertEquals(1, rs.size());
+        QueryProcessor.executeInternal(String.format("DELETE FROM \"%s\".%s WHERE a=1 AND
b =2", KEYSPACE, cf));
+        rs = QueryProcessor.executeInternal(String.format("SELECT * FROM \"%s\".%s", KEYSPACE,
cf));
+        assertEquals(0, rs.size());
+    }
 }


Mime
View raw message