cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From alek...@apache.org
Subject [1/2] git commit: Optimize CounterColumn#reconcile()
Date Mon, 31 Mar 2014 10:28:02 GMT
Repository: cassandra
Updated Branches:
  refs/heads/trunk 6f78382fa -> cbde96724


Optimize CounterColumn#reconcile()

patch by Aleksey Yeschenko; reviewed by Sylvain Lebresne for
CASSANDRA-6953


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/a79d54ee
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/a79d54ee
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/a79d54ee

Branch: refs/heads/trunk
Commit: a79d54eeafc7880e0257775e344b11c1252a8e89
Parents: 86c79d6
Author: Aleksey Yeschenko <aleksey@apache.org>
Authored: Mon Mar 31 13:23:36 2014 +0300
Committer: Aleksey Yeschenko <aleksey@apache.org>
Committed: Mon Mar 31 13:23:36 2014 +0300

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../org/apache/cassandra/db/CounterCell.java    |  17 ++-
 .../cassandra/db/context/CounterContext.java    | 138 ++++++++++++-------
 3 files changed, 98 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/a79d54ee/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index b27a52d..0457b5e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -36,6 +36,7 @@
  * Add optimized CF.hasColumns() implementations (CASSANDRA-6941)
  * Serialize batchlog mutations with the version of the target node
    (CASSANDRA-6931)
+ * Optimize CounterColumn#reconcile() (CASSANDRA-6953)
 Merged from 2.0:
  * Restrict Windows to parallel repairs (CASSANDRA-6907)
  * (Hadoop) Allow manually specifying start/end tokens in CFIF (CASSANDRA-6436)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/a79d54ee/src/java/org/apache/cassandra/db/CounterCell.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/CounterCell.java b/src/java/org/apache/cassandra/db/CounterCell.java
index cc26ef5..6b588ef 100644
--- a/src/java/org/apache/cassandra/db/CounterCell.java
+++ b/src/java/org/apache/cassandra/db/CounterCell.java
@@ -168,11 +168,18 @@ public class CounterCell extends Cell
         // live last delete > live
         if (timestampOfLastDelete() > cell.timestamp())
             return this;
-        // live + live: merge clocks; update value
-        return new CounterCell(name(),
-                               contextManager.merge(value(), cell.value()),
-                               Math.max(timestamp(), cell.timestamp()),
-                               Math.max(timestampOfLastDelete(), ((CounterCell) cell).timestampOfLastDelete()));
+
+        // live + live. return one of the cells if its context is a superset of the other's,
or merge them otherwise
+        ByteBuffer context = contextManager.merge(value(), cell.value());
+        if (context == value() && timestamp() >= cell.timestamp() && timestampOfLastDelete()
>= ((CounterCell) cell).timestampOfLastDelete())
+            return this;
+        else if (context == cell.value() && cell.timestamp() >= timestamp() &&
((CounterCell) cell).timestampOfLastDelete() >= timestampOfLastDelete())
+            return cell;
+        else // merge clocks and timsestamps.
+            return new CounterCell(name(),
+                                   context,
+                                   Math.max(timestamp(), cell.timestamp()),
+                                   Math.max(timestampOfLastDelete(), ((CounterCell) cell).timestampOfLastDelete()));
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/cassandra/blob/a79d54ee/src/java/org/apache/cassandra/db/context/CounterContext.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/context/CounterContext.java b/src/java/org/apache/cassandra/db/context/CounterContext.java
index 1e830a6..0b1677b 100644
--- a/src/java/org/apache/cassandra/db/context/CounterContext.java
+++ b/src/java/org/apache/cassandra/db/context/CounterContext.java
@@ -22,7 +22,6 @@ import java.security.MessageDigest;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.cassandra.serializers.MarshalException;
 import com.google.common.annotations.VisibleForTesting;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -30,6 +29,7 @@ import org.slf4j.LoggerFactory;
 import org.apache.cassandra.db.ClockAndCount;
 import org.apache.cassandra.db.TypeSizes;
 import org.apache.cassandra.db.compaction.CompactionManager;
+import org.apache.cassandra.serializers.MarshalException;
 import org.apache.cassandra.utils.*;
 
 /**
@@ -256,6 +256,9 @@ public class CounterContext
      */
     public ByteBuffer merge(ByteBuffer left, ByteBuffer right)
     {
+        boolean leftIsSuperSet = true;
+        boolean rightIsSuperSet = true;
+
         int globalCount = 0;
         int localCount = 0;
         int remoteCount = 0;
@@ -268,6 +271,14 @@ public class CounterContext
             int cmp = leftState.compareIdTo(rightState);
             if (cmp == 0)
             {
+                Relationship rel = compare(leftState, rightState);
+                if (rel == Relationship.GREATER_THAN)
+                    rightIsSuperSet = false;
+                else if (rel == Relationship.LESS_THAN)
+                    leftIsSuperSet = false;
+                else if (rel == Relationship.DISJOINT)
+                    leftIsSuperSet = rightIsSuperSet = false;
+
                 if (leftState.isGlobal() || rightState.isGlobal())
                     globalCount += 1;
                 else if (leftState.isLocal() || rightState.isLocal())
@@ -280,6 +291,8 @@ public class CounterContext
             }
             else if (cmp > 0)
             {
+                leftIsSuperSet = false;
+
                 if (rightState.isGlobal())
                     globalCount += 1;
                 else if (rightState.isLocal())
@@ -291,6 +304,8 @@ public class CounterContext
             }
             else // cmp < 0
             {
+                rightIsSuperSet = false;
+
                 if (leftState.isGlobal())
                     globalCount += 1;
                 else if (leftState.isLocal())
@@ -302,6 +317,17 @@ public class CounterContext
             }
         }
 
+        if (leftState.hasRemaining())
+            rightIsSuperSet = false;
+        else if (rightState.hasRemaining())
+            leftIsSuperSet = false;
+
+        // if one of the contexts is a superset, return it early.
+        if (leftIsSuperSet)
+            return left;
+        else if (rightIsSuperSet)
+            return right;
+
         while (leftState.hasRemaining())
         {
             if (leftState.isGlobal())
@@ -339,7 +365,16 @@ public class CounterContext
             int cmp = leftState.compareIdTo(rightState);
             if (cmp == 0)
             {
-                mergeTie(mergedState, leftState, rightState);
+                Relationship rel = compare(leftState, rightState);
+                if (rel == Relationship.DISJOINT) // two local shards
+                    mergedState.writeLocal(leftState.getCounterId(),
+                                           leftState.getClock() + rightState.getClock(),
+                                           leftState.getCount() + rightState.getCount());
+                else if (rel == Relationship.GREATER_THAN)
+                    leftState.copyTo(mergedState);
+                else // EQUAL or LESS_THAN
+                    rightState.copyTo(mergedState);
+
                 rightState.moveToNext();
                 leftState.moveToNext();
             }
@@ -370,20 +405,26 @@ public class CounterContext
         return mergedState.context;
     }
 
-    private void mergeTie(ContextState mergedState, ContextState leftState, ContextState
rightState)
+    /*
+     * Compares two shards, returns:
+     * - GREATER_THAN if leftState overrides rightState
+     * - LESS_THAN if rightState overrides leftState
+     * - EQUAL for two equal, non-local, shards
+     * - DISJOINT for any two local shards
+     */
+    private Relationship compare(ContextState leftState, ContextState rightState)
     {
+        long leftClock = leftState.getClock();
+        long leftCount = leftState.getCount();
+        long rightClock = rightState.getClock();
+        long rightCount = rightState.getCount();
+
         if (leftState.isGlobal() || rightState.isGlobal())
         {
             if (leftState.isGlobal() && rightState.isGlobal())
             {
-                long leftClock = leftState.getClock();
-                long rightClock = rightState.getClock();
-
                 if (leftClock == rightClock)
                 {
-                    long leftCount = leftState.getCount();
-                    long rightCount = rightState.getCount();
-
                     // Can happen if an sstable gets lost and disk failure policy is set
to 'best effort'
                     if (leftCount != rightCount && CompactionManager.isCompactionManager.get())
                     {
@@ -394,69 +435,60 @@ public class CounterContext
                     }
 
                     if (leftCount > rightCount)
-                        leftState.copyTo(mergedState);
+                        return Relationship.GREATER_THAN;
+                    else if (leftCount == rightCount)
+                        return Relationship.EQUAL;
                     else
-                        rightState.copyTo(mergedState);
+                        return Relationship.LESS_THAN;
                 }
                 else
                 {
-                    (leftClock > rightClock ? leftState : rightState).copyTo(mergedState);
+                    return leftClock > rightClock ? Relationship.GREATER_THAN : Relationship.LESS_THAN;
                 }
             }
             else // only one is global - keep that one
             {
-                (leftState.isGlobal() ? leftState : rightState).copyTo(mergedState);
+                return leftState.isGlobal() ? Relationship.GREATER_THAN : Relationship.LESS_THAN;
             }
         }
-        else if (leftState.isLocal() || rightState.isLocal())
+
+        if (leftState.isLocal() || rightState.isLocal())
         {
             // Local id and at least one is a local shard.
             if (leftState.isLocal() && rightState.isLocal())
-            {
-                // both local - sum
-                long clock = leftState.getClock() + rightState.getClock();
-                long count = leftState.getCount() + rightState.getCount();
-                mergedState.writeLocal(leftState.getCounterId(), clock, count);
-            }
+                return Relationship.DISJOINT;
             else // only one is local - keep that one
-            {
-                (leftState.isLocal() ? leftState : rightState).copyTo(mergedState);
-            }
+                return leftState.isLocal() ? Relationship.GREATER_THAN : Relationship.LESS_THAN;
         }
-        else // both are remote shards
-        {
-            long leftClock = leftState.getClock();
-            long rightClock = rightState.getClock();
 
-            if (leftClock == rightClock)
+        // both are remote shards
+        if (leftClock == rightClock)
+        {
+            // We should never see non-local shards w/ same id+clock but different counts.
However, if we do
+            // we should "heal" the problem by being deterministic in our selection of shard
- and
+            // log the occurrence so that the operator will know something is wrong.
+            if (leftCount != rightCount && CompactionManager.isCompactionManager.get())
             {
-                // We should never see non-local shards w/ same id+clock but different counts.
However, if we do
-                // we should "heal" the problem by being deterministic in our selection of
shard - and
-                // log the occurrence so that the operator will know something is wrong.
-                long leftCount = leftState.getCount();
-                long rightCount = rightState.getCount();
-
-                if (leftCount != rightCount && CompactionManager.isCompactionManager.get())
-                {
-                    logger.warn("invalid remote counter shard detected; ({}, {}, {}) and
({}, {}, {}) differ only in "
-                                + "count; will pick highest to self-heal on compaction",
-                                leftState.getCounterId(), leftClock, leftCount,
-                                rightState.getCounterId(), rightClock, rightCount);
-                }
-
-                if (leftCount > rightCount)
-                    leftState.copyTo(mergedState);
-                else
-                    rightState.copyTo(mergedState);
+                logger.warn("invalid remote counter shard detected; ({}, {}, {}) and ({},
{}, {}) differ only in "
+                            + "count; will pick highest to self-heal on compaction",
+                            leftState.getCounterId(), leftClock, leftCount,
+                            rightState.getCounterId(), rightClock, rightCount);
             }
+
+            if (leftCount > rightCount)
+                return Relationship.GREATER_THAN;
+            else if (leftCount == rightCount)
+                return Relationship.EQUAL;
             else
-            {
-                if ((leftClock >= 0 && rightClock > 0 && leftClock
>= rightClock)
-                        || (leftClock < 0 && (rightClock > 0 || leftClock <
rightClock)))
-                    leftState.copyTo(mergedState);
-                else
-                    rightState.copyTo(mergedState);
-            }
+                return Relationship.LESS_THAN;
+        }
+        else
+        {
+            if ((leftClock >= 0 && rightClock > 0 && leftClock >=
rightClock)
+                    || (leftClock < 0 && (rightClock > 0 || leftClock <
rightClock)))
+                return Relationship.GREATER_THAN;
+            else
+                return Relationship.LESS_THAN;
         }
     }
 


Mime
View raw message