cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bened...@apache.org
Subject [2/3] cassandra git commit: Improve BTree
Date Wed, 15 Jul 2015 10:29:12 GMT
http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/BTreeSet.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/BTreeSet.java b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java
new file mode 100644
index 0000000..d1271fc
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java
@@ -0,0 +1,642 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.cassandra.utils.btree;
+
+import java.util.*;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Ordering;
+
+import static org.apache.cassandra.utils.btree.BTree.findIndex;
+import static org.apache.cassandra.utils.btree.BTree.lower;
+import static org.apache.cassandra.utils.btree.BTree.toArray;
+
+public class BTreeSet<V> implements NavigableSet<V>, List<V>
+{
+    protected final Comparator<? super V> comparator;
+    protected final Object[] tree;
+
+    public BTreeSet(Object[] tree, Comparator<? super V> comparator)
+    {
+        this.tree = tree;
+        this.comparator = comparator;
+    }
+
+    public BTreeSet<V> update(Collection<V> updateWith)
+    {
+        return new BTreeSet<>(BTree.update(tree, comparator, updateWith, UpdateFunction.<V>noOp()), comparator);
+    }
+
+    @Override
+    public Comparator<? super V> comparator()
+    {
+        return comparator;
+    }
+
+    protected BTreeSearchIterator<V, V> slice(boolean forwards)
+    {
+        return BTree.slice(tree, comparator, forwards);
+    }
+
+    public Object[] tree()
+    {
+        return tree;
+    }
+
+    /**
+     * The index of the item within the list, or its insertion point otherwise. i.e. binarySearch semantics
+     */
+    public int indexOf(Object item)
+    {
+        return findIndex(tree, comparator, (V) item);
+    }
+
+    /**
+     * The converse of indexOf: provided an index between 0 and size, returns the i'th item, in set order.
+     */
+    public V get(int index)
+    {
+        return BTree.<V>findByIndex(tree, index);
+    }
+
+    public int lastIndexOf(Object o)
+    {
+        return indexOf(o);
+    }
+
+    public BTreeSet<V> subList(int fromIndex, int toIndex)
+    {
+        return new BTreeRange<V>(tree, comparator, fromIndex, toIndex - 1);
+    }
+
+    @Override
+    public int size()
+    {
+        return BTree.size(tree);
+    }
+
+    @Override
+    public boolean isEmpty()
+    {
+        return BTree.isEmpty(tree);
+    }
+
+    @Override
+    public BTreeSearchIterator<V, V> iterator()
+    {
+        return slice(true);
+    }
+
+    @Override
+    public BTreeSearchIterator<V, V> descendingIterator()
+    {
+        return slice(false);
+    }
+
+    @Override
+    public Object[] toArray()
+    {
+        return toArray(new Object[0]);
+    }
+
+    @Override
+    public <T> T[] toArray(T[] a)
+    {
+        return toArray(a, 0);
+    }
+
+    public <T> T[] toArray(T[] a, int offset)
+    {
+        int size = size();
+        if (a.length < size + offset)
+            a = Arrays.copyOf(a, size);
+        BTree.toArray(tree, a, offset);
+        return a;
+    }
+
+    public Spliterator<V> spliterator()
+    {
+        return Spliterators.spliterator(this, Spliterator.ORDERED | Spliterator.DISTINCT | Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.SIZED);
+    }
+
+    @Override
+    public BTreeSet<V> subSet(V fromElement, boolean fromInclusive, V toElement, boolean toInclusive)
+    {
+        return new BTreeRange<>(tree, comparator, fromElement, fromInclusive, toElement, toInclusive);
+    }
+
+    @Override
+    public BTreeSet<V> headSet(V toElement, boolean inclusive)
+    {
+        return new BTreeRange<>(tree, comparator, null, true, toElement, inclusive);
+    }
+
+    @Override
+    public BTreeSet<V> tailSet(V fromElement, boolean inclusive)
+    {
+        return new BTreeRange<>(tree, comparator, fromElement, inclusive, null, true);
+    }
+
+    @Override
+    public SortedSet<V> subSet(V fromElement, V toElement)
+    {
+        return subSet(fromElement, true, toElement, false);
+    }
+
+    @Override
+    public SortedSet<V> headSet(V toElement)
+    {
+        return headSet(toElement, false);
+    }
+
+    @Override
+    public SortedSet<V> tailSet(V fromElement)
+    {
+        return tailSet(fromElement, true);
+    }
+
+    @Override
+    public BTreeSet<V> descendingSet()
+    {
+        return new BTreeRange<>(this.tree, this.comparator).descendingSet();
+    }
+
+    @Override
+    public V first()
+    {
+        return get(0);
+    }
+
+    @Override
+    public V last()
+    {
+        return get(size() - 1);
+    }
+
+    @Override
+    public V lower(V v)
+    {
+        return BTree.lower(tree, comparator, v);
+    }
+
+    @Override
+    public V floor(V v)
+    {
+        return BTree.floor(tree, comparator, v);
+    }
+
+    @Override
+    public V ceiling(V v)
+    {
+        return BTree.ceil(tree, comparator, v);
+    }
+
+    @Override
+    public V higher(V v)
+    {
+        return BTree.higher(tree, comparator, v);
+    }
+
+    @Override
+    public boolean contains(Object o)
+    {
+        return indexOf((V) o) >= 0;
+    }
+
+    @Override
+    public boolean containsAll(Collection<?> c)
+    {
+        // TODO: if we ever use this method, it can be specialized quite easily for SortedSet arguments
+        for (Object o : c)
+            if (!contains(o))
+                return false;
+        return true;
+    }
+
+    public int hashCode()
+    {
+        // we can't just delegate to Arrays.deepHashCode(),
+        // because two equivalent sets may be represented by differently shaped trees
+        int result = 1;
+        for (V v : this)
+            result = 31 * result + Objects.hashCode(v);
+        return result;
+    }
+
+    @Override
+    public boolean addAll(Collection<? extends V> c)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    public boolean addAll(int index, Collection<? extends V> c)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean retainAll(Collection<?> c)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean removeAll(Collection<?> c)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void clear()
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public V pollFirst()
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public V pollLast()
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean add(V v)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean remove(Object o)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    public V set(int index, V element)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    public void add(int index, V element)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    public V remove(int index)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    public ListIterator<V> listIterator()
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    public ListIterator<V> listIterator(int index)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    public static class BTreeRange<V> extends BTreeSet<V>
+    {
+        // both inclusive
+        protected final int lowerBound, upperBound;
+        BTreeRange(Object[] tree, Comparator<? super V> comparator)
+        {
+            this(tree, comparator, null, true, null, true);
+        }
+
+        BTreeRange(BTreeRange<V> from)
+        {
+            super(from.tree, from.comparator);
+            this.lowerBound = from.lowerBound;
+            this.upperBound = from.upperBound;
+        }
+
+        BTreeRange(Object[] tree, Comparator<? super V> comparator, int lowerBound, int upperBound)
+        {
+            super(tree, comparator);
+            if (upperBound < lowerBound - 1)
+                upperBound = lowerBound - 1;
+            this.lowerBound = lowerBound;
+            this.upperBound = upperBound;
+        }
+
+        BTreeRange(Object[] tree, Comparator<? super V> comparator, V lowerBound, boolean inclusiveLowerBound, V upperBound, boolean inclusiveUpperBound)
+        {
+            this(tree, comparator,
+                 lowerBound == null ? 0 : inclusiveLowerBound ? BTree.ceilIndex(tree, comparator, lowerBound)
+                                                              : BTree.higherIndex(tree, comparator, lowerBound),
+                 upperBound == null ? BTree.size(tree) - 1 : inclusiveUpperBound ? BTree.floorIndex(tree, comparator, upperBound)
+                                                                                 : BTree.lowerIndex(tree, comparator, upperBound));
+        }
+
+        // narrowing range constructor - makes this the intersection of the two ranges over the same tree b
+        BTreeRange(BTreeRange<V> a, BTreeRange<V> b)
+        {
+            this(a.tree, a.comparator, Math.max(a.lowerBound, b.lowerBound), Math.min(a.upperBound, b.upperBound));
+            assert a.tree == b.tree;
+        }
+
+        @Override
+        protected BTreeSearchIterator<V, V> slice(boolean forwards)
+        {
+            return new BTreeSearchIterator<>(tree, comparator, forwards, lowerBound, upperBound);
+        }
+
+        @Override
+        public boolean isEmpty()
+        {
+            return upperBound < lowerBound;
+        }
+
+        public int size()
+        {
+            return (upperBound - lowerBound) + 1;
+        }
+
+        boolean outOfBounds(int i)
+        {
+            return (i < lowerBound) | (i > upperBound);
+        }
+
+        public V get(int index)
+        {
+            index += lowerBound;
+            if (outOfBounds(index))
+                throw new NoSuchElementException();
+            return super.get(index);
+        }
+
+        public int indexOf(Object item)
+        {
+            int i = super.indexOf(item);
+            boolean negate = i < 0;
+            if (negate)
+                i = -1 - i;
+            if (outOfBounds(i))
+                return i < lowerBound ? -1 : -1 - size();
+            i = i - lowerBound;
+            if (negate)
+                i = -1 -i;
+            return i;
+        }
+
+        public V lower(V v)
+        {
+            return maybe(Math.min(upperBound, BTree.lowerIndex(tree, comparator, v)));
+        }
+
+        public V floor(V v)
+        {
+            return maybe(Math.min(upperBound, BTree.floorIndex(tree, comparator, v)));
+        }
+
+        public V ceiling(V v)
+        {
+            return maybe(Math.max(lowerBound, BTree.ceilIndex(tree, comparator, v)));
+        }
+
+        public V higher(V v)
+        {
+            return maybe(Math.max(lowerBound, BTree.higherIndex(tree, comparator, v)));
+        }
+
+        private V maybe(int i)
+        {
+            if (outOfBounds(i))
+                return null;
+            return super.get(i);
+        }
+
+        @Override
+        public BTreeSet<V> subSet(V fromElement, boolean fromInclusive, V toElement, boolean toInclusive)
+        {
+            return new BTreeRange<>(this, new BTreeRange<>(tree, comparator, fromElement, fromInclusive, toElement, toInclusive));
+        }
+
+        @Override
+        public BTreeSet<V> headSet(V toElement, boolean inclusive)
+        {
+            return new BTreeRange<>(this, new BTreeRange<>(tree, comparator, null, true, toElement, inclusive));
+        }
+
+        @Override
+        public BTreeSet<V> tailSet(V fromElement, boolean inclusive)
+        {
+            return new BTreeRange<>(this, new BTreeRange<>(tree, comparator, fromElement, inclusive, null, true));
+        }
+
+        @Override
+        public BTreeSet<V> descendingSet()
+        {
+            return new BTreeDescRange<>(this);
+        }
+
+        public BTreeSet<V> subList(int fromIndex, int toIndex)
+        {
+            if (fromIndex < 0 || toIndex > size())
+                throw new IndexOutOfBoundsException();
+            return new BTreeRange<V>(tree, comparator, lowerBound + fromIndex, lowerBound + toIndex - 1);
+        }
+
+        @Override
+        public <T> T[] toArray(T[] a)
+        {
+            return toArray(a, 0);
+        }
+
+        public <T> T[] toArray(T[] a, int offset)
+        {
+            if (size() + offset < a.length)
+                a = Arrays.copyOf(a, size() + offset);
+
+            BTree.toArray(tree, lowerBound, upperBound + 1, a, offset);
+            return a;
+        }
+    }
+
+    public static class BTreeDescRange<V> extends BTreeRange<V>
+    {
+        BTreeDescRange(BTreeRange<V> from)
+        {
+            super(from.tree, from.comparator, from.lowerBound, from.upperBound);
+        }
+
+        @Override
+        protected BTreeSearchIterator<V, V> slice(boolean forwards)
+        {
+            return super.slice(!forwards);
+        }
+
+        /* Flip the methods we call for inequality searches */
+
+        public V higher(V v)
+        {
+            return super.lower(v);
+        }
+
+        public V ceiling(V v)
+        {
+            return super.floor(v);
+        }
+
+        public V floor(V v)
+        {
+            return super.ceiling(v);
+        }
+
+        public V lower(V v)
+        {
+            return super.higher(v);
+        }
+
+        public V get(int index)
+        {
+            index = upperBound - index;
+            if (outOfBounds(index))
+                throw new NoSuchElementException();
+            return BTree.findByIndex(tree, index);
+        }
+
+        public int indexOf(Object item)
+        {
+            int i = super.indexOf(item);
+            // i is in range [-1 - size()..size())
+            // so we just need to invert by adding/subtracting from size
+            return i < 0 ? -2 - size() - i  : size() - (i + 1);
+        }
+
+        public BTreeSet<V> subList(int fromIndex, int toIndex)
+        {
+            if (fromIndex < 0 || toIndex > size())
+                throw new IndexOutOfBoundsException();
+            return new BTreeDescRange<V>(new BTreeRange<V>(tree, comparator, upperBound - (toIndex - 1), upperBound - fromIndex));
+        }
+
+        @Override
+        public BTreeSet<V> subSet(V fromElement, boolean fromInclusive, V toElement, boolean toInclusive)
+        {
+            return super.subSet(toElement, toInclusive, fromElement, fromInclusive).descendingSet();
+        }
+
+        @Override
+        public BTreeSet<V> headSet(V toElement, boolean inclusive)
+        {
+            return super.tailSet(toElement, inclusive).descendingSet();
+        }
+
+        @Override
+        public BTreeSet<V> tailSet(V fromElement, boolean inclusive)
+        {
+            return super.headSet(fromElement, inclusive).descendingSet();
+        }
+
+        @Override
+        public BTreeSet<V> descendingSet()
+        {
+            return new BTreeRange<>(this);
+        }
+
+        public Comparator<V> comparator()
+        {
+            return (a, b) -> comparator.compare(b, a);
+        }
+
+        public <T> T[] toArray(T[] a, int offset)
+        {
+            a = super.toArray(a, offset);
+            int count = size();
+            int flip = count / 2;
+            for (int i = 0 ; i < flip ; i++)
+            {
+                int j = count - (i + 1);
+                T t = a[i + offset];
+                a[i + offset] = a[j + offset];
+                a[j + offset] = t;
+            }
+            return a;
+        }
+    }
+
+    public static class Builder<V>
+    {
+        final BTree.Builder<V> builder;
+        protected Builder(Comparator<? super V> comparator)
+        {
+            builder= BTree.builder(comparator);
+        }
+
+        public Builder<V> add(V v)
+        {
+            builder.add(v);
+            return this;
+        }
+
+        public Builder<V> addAll(Collection<V> iter)
+        {
+            builder.addAll(iter);
+            return this;
+        }
+
+        public boolean isEmpty()
+        {
+            return builder.isEmpty();
+        }
+        public BTreeSet<V> build()
+        {
+            return new BTreeSet<>(builder.build(), builder.comparator);
+        }
+    }
+
+    public static <V> Builder<V> builder(Comparator<? super V> comparator)
+    {
+        return new Builder<>(comparator);
+    }
+
+    public static <V> BTreeSet<V> wrap(Object[] btree, Comparator<V> comparator)
+    {
+        return new BTreeSet<>(btree, comparator);
+    }
+
+    public static <V extends Comparable<V>> BTreeSet<V> of(Collection<V> sortedValues)
+    {
+        return new BTreeSet<>(BTree.build(sortedValues, UpdateFunction.<V>noOp()), Ordering.<V>natural());
+    }
+
+    public static <V extends Comparable<V>> BTreeSet<V> of(V value)
+    {
+        return new BTreeSet<>(BTree.build(ImmutableList.of(value), UpdateFunction.<V>noOp()), Ordering.<V>natural());
+    }
+
+    public static <V> BTreeSet<V> empty(Comparator<? super V> comparator)
+    {
+        return new BTreeSet<>(BTree.empty(), comparator);
+    }
+
+    public static <V> BTreeSet<V> of(Comparator<? super V> comparator, V value)
+    {
+        return new BTreeSet<>(BTree.build(ImmutableList.of(value), UpdateFunction.<V>noOp()), comparator);
+    }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/Builder.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/Builder.java b/src/java/org/apache/cassandra/utils/btree/Builder.java
deleted file mode 100644
index b835554..0000000
--- a/src/java/org/apache/cassandra/utils/btree/Builder.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.cassandra.utils.btree;
-
-import java.util.Comparator;
-
-import static org.apache.cassandra.utils.btree.BTree.EMPTY_LEAF;
-import static org.apache.cassandra.utils.btree.BTree.FAN_SHIFT;
-import static org.apache.cassandra.utils.btree.BTree.POSITIVE_INFINITY;
-
-/**
- * A class for constructing a new BTree, either from an existing one and some set of modifications
- * or a new tree from a sorted collection of items.
- * <p/>
- * This is a fairly heavy-weight object, so a ThreadLocal instance is created for making modifications to a tree
- */
-final class Builder
-{
-    private final NodeBuilder rootBuilder = new NodeBuilder();
-
-    /**
-     * At the highest level, we adhere to the classic b-tree insertion algorithm:
-     *
-     * 1. Add to the appropriate leaf
-     * 2. Split the leaf if necessary, add the median to the parent
-     * 3. Split the parent if necessary, etc.
-     *
-     * There is one important difference: we don't actually modify the original tree, but copy each node that we
-     * modify.  Note that every node on the path to the key being inserted or updated will be modified; this
-     * implies that at a minimum, the root node will be modified for every update, so every root is a "snapshot"
-     * of a tree that can be iterated or sliced without fear of concurrent modifications.
-     *
-     * The NodeBuilder class handles the details of buffering the copied contents of the original tree and
-     * adding in our changes.  Since NodeBuilder maintains parent/child references, it also handles parent-splitting
-     * (easy enough, since any node affected by the split will already be copied into a NodeBuilder).
-     *
-     * One other difference from the simple algorithm is that we perform modifications in bulk;
-     * we assume @param source has been sorted, e.g. by BTree.update, so the update of each key resumes where
-     * the previous left off.
-     */
-    public <C, K extends C, V extends C> Object[] update(Object[] btree, Comparator<C> comparator, Iterable<K> source, UpdateFunction<K, V> updateF)
-    {
-        assert updateF != null;
-
-        NodeBuilder current = rootBuilder;
-        current.reset(btree, POSITIVE_INFINITY, updateF, comparator);
-
-        for (K key : source)
-        {
-            while (true)
-            {
-                if (updateF.abortEarly())
-                {
-                    rootBuilder.clear();
-                    return null;
-                }
-                NodeBuilder next = current.update(key);
-                if (next == null)
-                    break;
-                // we were in a subtree from a previous key that didn't contain this new key;
-                // retry against the correct subtree
-                current = next;
-            }
-        }
-
-        // finish copying any remaining keys from the original btree
-        while (true)
-        {
-            NodeBuilder next = current.finish();
-            if (next == null)
-                break;
-            current = next;
-        }
-
-        // updating with POSITIVE_INFINITY means that current should be back to the root
-        assert current.isRoot();
-
-        Object[] r = current.toNode();
-        current.clear();
-        return r;
-    }
-
-    public <C, K extends C, V extends C> Object[] build(Iterable<K> source, UpdateFunction<K, V> updateF, int size)
-    {
-        assert updateF != null;
-
-        NodeBuilder current = rootBuilder;
-        // we descend only to avoid wasting memory; in update() we will often descend into existing trees
-        // so here we want to descend also, so we don't have lg max(N) depth in both directions
-        while ((size >>= FAN_SHIFT) > 0)
-            current = current.ensureChild();
-
-        current.reset(EMPTY_LEAF, POSITIVE_INFINITY, updateF, null);
-        for (K key : source)
-            current.addNewKey(updateF.apply(key));
-
-        current = current.ascendToRoot();
-
-        Object[] r = current.toNode();
-        current.clear();
-        return r;
-    }
-}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/Cursor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/Cursor.java b/src/java/org/apache/cassandra/utils/btree/Cursor.java
deleted file mode 100644
index 6814d26..0000000
--- a/src/java/org/apache/cassandra/utils/btree/Cursor.java
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.cassandra.utils.btree;
-
-import java.util.Comparator;
-import java.util.Iterator;
-
-import static org.apache.cassandra.utils.btree.BTree.NEGATIVE_INFINITY;
-import static org.apache.cassandra.utils.btree.BTree.POSITIVE_INFINITY;
-import static org.apache.cassandra.utils.btree.BTree.getLeafKeyEnd;
-import static org.apache.cassandra.utils.btree.BTree.isLeaf;
-
-/**
- * An extension of Path which provides a public interface for iterating over or counting a subrange of the tree
- *
- * @param <V>
- */
-public final class Cursor<K, V extends K> extends Path implements Iterator<V>
-{
-    /*
-     * Conceptually, a Cursor derives two Paths, one for the first object in the slice requested (inclusive),
-     * and one for the last (exclusive).  Then hasNext just checks, have we reached the last yet, and next
-     * calls successor() to get to the next item in the Tree.
-     *
-     * To optimize memory use, we summarize the last Path as just endNode/endIndex, and inherit from Path for
-     *
-     * the first one.
-     */
-
-    // the last node covered by the requested range
-    private Object[] endNode;
-    // the index within endNode that signals we're finished -- that is, endNode[endIndex] is NOT part of the Cursor
-    private byte endIndex;
-
-    private boolean forwards;
-
-    /**
-     * Reset this cursor for the provided tree, to iterate over its entire range
-     *
-     * @param btree    the tree to iterate over
-     * @param forwards if false, the cursor will start at the end and move backwards
-     */
-    public void reset(Object[] btree, boolean forwards)
-    {
-        _reset(btree, null, NEGATIVE_INFINITY, false, POSITIVE_INFINITY, false, forwards);
-    }
-
-    /**
-     * Reset this cursor for the provided tree, to iterate between the provided start and end
-     *
-     * @param btree      the tree to iterate over
-     * @param comparator the comparator that defines the ordering over the items in the tree
-     * @param lowerBound the first item to include, inclusive
-     * @param upperBound the last item to include, exclusive
-     * @param forwards   if false, the cursor will start at the end and move backwards
-     */
-    public void reset(Object[] btree, Comparator<K> comparator, K lowerBound, K upperBound, boolean forwards)
-    {
-        _reset(btree, comparator, lowerBound, true, upperBound, false, forwards);
-    }
-
-    /**
-     * Reset this cursor for the provided tree, to iterate between the provided start and end
-     *
-     * @param btree               the tree to iterate over
-     * @param comparator          the comparator that defines the ordering over the items in the tree
-     * @param lowerBound          the first item to include
-     * @param inclusiveLowerBound should include start in the iterator, if present in the tree
-     * @param upperBound          the last item to include
-     * @param inclusiveUpperBound should include end in the iterator, if present in the tree
-     * @param forwards            if false, the cursor will start at the end and move backwards
-     */
-    public void reset(Object[] btree, Comparator<K> comparator, K lowerBound, boolean inclusiveLowerBound, K upperBound, boolean inclusiveUpperBound, boolean forwards)
-    {
-        _reset(btree, comparator, lowerBound, inclusiveLowerBound, upperBound, inclusiveUpperBound, forwards);
-    }
-
-    private void _reset(Object[] btree, Comparator<K> comparator, Object lowerBound, boolean inclusiveLowerBound, Object upperBound, boolean inclusiveUpperBound, boolean forwards)
-    {
-        init(btree);
-        if (lowerBound == null)
-            lowerBound = NEGATIVE_INFINITY;
-        if (upperBound == null)
-            upperBound = POSITIVE_INFINITY;
-
-        this.forwards = forwards;
-
-        Path findLast = new Path(this.path.length, btree);
-        if (forwards)
-        {
-            findLast.find(comparator, upperBound, inclusiveUpperBound ? Op.HIGHER : Op.CEIL, true);
-            find(comparator, lowerBound, inclusiveLowerBound ? Op.CEIL : Op.HIGHER, true);
-        }
-        else
-        {
-            findLast.find(comparator, lowerBound, inclusiveLowerBound ? Op.LOWER : Op.FLOOR, false);
-            find(comparator, upperBound, inclusiveUpperBound ? Op.FLOOR : Op.LOWER, false);
-        }
-        int c = this.compareTo(findLast, forwards);
-        if (forwards ? c > 0 : c < 0)
-        {
-            endNode = currentNode();
-            endIndex = currentIndex();
-        }
-        else
-        {
-            endNode = findLast.currentNode();
-            endIndex = findLast.currentIndex();
-        }
-    }
-
-    public boolean hasNext()
-    {
-        return path[depth] != endNode || indexes[depth] != endIndex;
-    }
-
-    public V next()
-    {
-        Object r = currentKey();
-        if (forwards)
-            successor();
-        else
-            predecessor();
-        return (V) r;
-    }
-
-    public int count()
-    {
-        if (!forwards)
-            throw new IllegalStateException("Count can only be run on forward cursors");
-        int count = 0;
-        int next;
-        while ((next = consumeNextLeaf()) >= 0)
-            count += next;
-        return count;
-    }
-
-    /**
-     * @return the number of objects consumed by moving out of the next (possibly current) leaf
-     */
-    private int consumeNextLeaf()
-    {
-        Object[] node = currentNode();
-        int r = 0;
-
-        if (!isLeaf(node))
-        {
-            // if we're not in a leaf, then calling successor once will take us to a leaf, since the next
-            // key will be in the leftmost subtree of whichever branch is next.  For instance, if we
-            // are in the root node of the tree depicted by http://cis.stvincent.edu/html/tutorials/swd/btree/btree1.gif,
-            // successor() will take us to the leaf containing N and O.
-            int i = currentIndex();
-            if (node == endNode && i == endIndex)
-                return -1;
-            r = 1;
-            successor();
-            node = currentNode();
-        }
-
-        if (node == endNode)
-        {
-            // only count up to endIndex, and don't call successor()
-            if (currentIndex() == endIndex)
-                return r > 0 ? r : -1;
-            r += endIndex - currentIndex();
-            setIndex(endIndex);
-            return r;
-        }
-
-        // count the remaining objects in this leaf
-        int keyEnd = getLeafKeyEnd(node);
-        r += keyEnd - currentIndex();
-        setIndex(keyEnd);
-        successor();
-        return r;
-    }
-
-    public void remove()
-    {
-        throw new UnsupportedOperationException();
-    }
-}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java b/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java
index c715873..83fc661 100644
--- a/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java
+++ b/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java
@@ -23,12 +23,7 @@ import org.apache.cassandra.utils.ObjectSizes;
 import java.util.Arrays;
 import java.util.Comparator;
 
-import static org.apache.cassandra.utils.btree.BTree.EMPTY_BRANCH;
-import static org.apache.cassandra.utils.btree.BTree.FAN_FACTOR;
-import static org.apache.cassandra.utils.btree.BTree.compare;
-import static org.apache.cassandra.utils.btree.BTree.find;
-import static org.apache.cassandra.utils.btree.BTree.getKeyEnd;
-import static org.apache.cassandra.utils.btree.BTree.isLeaf;
+import static org.apache.cassandra.utils.btree.BTree.*;
 
 /**
  * Represents a level / stack item of in progress modifications to a BTree.
@@ -151,7 +146,7 @@ final class NodeBuilder
             }
             else
             {
-                i = find(comparator, key, copyFrom, i + 1, copyFromKeyEnd);
+                i = Arrays.binarySearch(copyFrom, i + 1, copyFromKeyEnd, key, comparator);
                 found = i >= 0;
                 if (!found)
                     i = -i - 1;
@@ -167,7 +162,7 @@ final class NodeBuilder
                 return null;
             key = next;
         }
-        else if (i == copyFromKeyEnd && compare(comparator, key, upperBound) >= 0)
+        else if (i == copyFromKeyEnd && compareUpperBound(comparator, key, upperBound) >= 0)
             owns = false;
 
         if (isLeaf(copyFrom))
@@ -240,6 +235,10 @@ final class NodeBuilder
         return ascend();
     }
 
+    private static <V> int compareUpperBound(Comparator<V> comparator, Object value, Object upperBound)
+    {
+        return upperBound == POSITIVE_INFINITY ? -1 : comparator.compare((V)value, (V)upperBound);
+    }
 
     // UTILITY METHODS FOR IMPLEMENTATION OF UPDATE/BUILD/DELETE
 
@@ -264,7 +263,7 @@ final class NodeBuilder
     // builds a new root BTree node - must be called on root of operation
     Object[] toNode()
     {
-        assert buildKeyPosition <= FAN_FACTOR && (buildKeyPosition > 0 || copyFrom.length > 0) : buildKeyPosition;
+        assert buildKeyPosition <= FAN_FACTOR && (buildKeyPosition > 0 || getKeyEnd(copyFrom) > 0) : buildKeyPosition;
         return buildFromRange(0, buildKeyPosition, isLeaf(copyFrom), false);
     }
 
@@ -384,14 +383,25 @@ final class NodeBuilder
         Object[] a;
         if (isLeaf)
         {
-            a = new Object[keyLength + (keyLength & 1)];
+            a = new Object[keyLength | 1];
             System.arraycopy(buildKeys, offset, a, 0, keyLength);
         }
         else
         {
-            a = new Object[1 + (keyLength * 2)];
+            a = new Object[2 + (keyLength * 2)];
             System.arraycopy(buildKeys, offset, a, 0, keyLength);
             System.arraycopy(buildChildren, offset, a, keyLength, keyLength + 1);
+
+            // calculate the indexOffsets of each key in this node, within the sub-tree rooted at this node
+            int[] indexOffsets = new int[keyLength + 1];
+            int size = BTree.size((Object[]) a[keyLength]);
+            for (int i = 0 ; i < keyLength ; i++)
+            {
+                indexOffsets[i] = size;
+                size += 1 + BTree.size((Object[]) a[keyLength + 1 + i]);
+            }
+            indexOffsets[keyLength] = size;
+            a[a.length - 1] = indexOffsets;
         }
         if (isExtra)
             updateFunction.allocated(ObjectSizes.sizeOfArray(a));

http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/NodeCursor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/NodeCursor.java b/src/java/org/apache/cassandra/utils/btree/NodeCursor.java
new file mode 100644
index 0000000..e9fa89e
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/btree/NodeCursor.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.cassandra.utils.btree;
+
+import java.util.Arrays;
+import java.util.Comparator;
+
+import static org.apache.cassandra.utils.btree.BTree.*;
+
+/**
+ * A class for searching within one node of a btree: a linear chain (stack) of these is built of tree height
+ * to form a Cursor. Some corollaries of the basic building block operations in TreeCursor (moveOne and seekTo),
+ * along with some other methods for helping implement movement between two NodeCursor
+ *
+ * The behaviour is not dissimilar to that of NodeBuilder and TreeBuilder, wherein functions that may move
+ * us to a different node pass us the node we should move to, from which we continue our operations.
+ * @param <K>
+ */
+class NodeCursor<K>
+{
+    // TODO: consider splitting forwards from backwards
+    final NodeCursor<K> parent, child;
+    final Comparator<? super K> comparator;
+
+    boolean inChild;
+    // if !inChild, this is the key position we are currently on;
+    // if inChild, this is the child position we are currently descending into
+    int position;
+    Object[] node;
+    int nodeOffset;
+
+    NodeCursor(Object[] node, NodeCursor<K> parent, Comparator<? super K> comparator)
+    {
+        this.node = node;
+        this.parent = parent;
+        this.comparator = comparator;
+        // a well formed b-tree (text book, or ours) must be balanced, so by building a stack following the left-most branch
+        // we have a stack capable of visiting any path in the tree
+        this.child = BTree.isLeaf(node) ? null : new NodeCursor<>((Object[]) node[getChildStart(node)], this, comparator);
+    }
+
+    void resetNode(Object[] node, int nodeOffset)
+    {
+        this.node = node;
+        this.nodeOffset = nodeOffset;
+    }
+
+    /**
+     * adapt child position to key position within branch, knowing it is safe to do so
+     */
+    void safeAdvanceIntoBranchFromChild(boolean forwards)
+    {
+        if (!forwards)
+            --position;
+    }
+
+    /**
+     * adapt child position to key position within branch, and return if this was successful or we're now out of bounds
+     */
+    boolean advanceIntoBranchFromChild(boolean forwards)
+    {
+        return forwards ? position < getBranchKeyEnd(node) : --position >= 0;
+    }
+
+    boolean advanceLeafNode(boolean forwards)
+    {
+        return forwards ? ++position < getLeafKeyEnd(node)
+                        : --position >= 0;
+    }
+
+    /**
+     * @return the upper/lower bound of the child we are currently descended in
+     */
+    K bound(boolean upper)
+    {
+        return (K) node[position - (upper ? 0 : 1)];
+    }
+
+    /**
+     * The parent that covers a range wider than ourselves, either ascending or descending,
+     * i.e. that defines the upper or lower bound on the subtree rooted at our node
+     * @param upper
+     * @return the NodeCursor parent that can tell us the upper/lower bound of ourselves
+     */
+    NodeCursor<K> boundIterator(boolean upper)
+    {
+        NodeCursor<K> bound = this.parent;
+        while (bound != null && (upper ? bound.position >= getChildCount(bound.node) - 1
+                                       : bound.position <= 0))
+            bound = bound.parent;
+        return bound;
+    }
+
+    /**
+     * look for the provided key in this node, in the specified direction:
+     * forwards => ceil search; otherwise floor
+     *
+     * we require that the node's "current" key (including the relevant bound if we are a parent we have ascended into)
+     * be already excluded by the search. this is useful for the following reasons:
+     *   1: we must ensure we never go backwards, so excluding that key from our binary search prevents our
+     *      descending into a child we have already visited (without any further checks)
+     *   2: we already check the bounds as we search upwards for our natural parent;
+     *   3: we want to cheaply check sequential access, so we always check the first key we're on anyway (if it can be done easily)
+     */
+    boolean seekInNode(K key, boolean forwards)
+    {
+        int position = this.position;
+        int lb, ub;
+        if (forwards)
+        {
+            lb = position + 1;
+            ub = getKeyEnd(node);
+        }
+        else
+        {
+            ub = position;
+            lb = 0;
+        }
+
+        int find = Arrays.binarySearch((K[]) node, lb, ub, key, comparator);
+        if (find >= 0)
+        {
+            // exact key match, so we're in the correct node already. return success
+            this.position = find;
+            inChild = false;
+            return true;
+        }
+
+        // if we are a branch, and we are an inequality match, the direction of travel doesn't matter
+        // so we only need to modify if we are going backwards on a leaf node, to produce floor semantics
+        int delta = isLeaf() & !forwards ? -1 : 0;
+        this.position = delta -1 -find;
+        return false;
+    }
+
+    NodeCursor<K> descendToFirstChild(boolean forwards)
+    {
+        if (isLeaf())
+        {
+            position = forwards ? 0 : getLeafKeyEnd(node) - 1;
+            return null;
+        }
+        inChild = true;
+        position = forwards ? 0 : getChildCount(node) - 1;
+        return descend();
+    }
+
+    // descend into the child at "position"
+    NodeCursor<K> descend()
+    {
+        Object[] childNode = (Object[]) node[position + getChildStart(node)];
+        int childOffset = nodeOffset + treeIndexOffsetOfChild(node, position);
+        child.resetNode(childNode, childOffset);
+        inChild = true;
+        return child;
+    }
+
+    boolean isLeaf()
+    {
+        return child == null;
+    }
+
+    int globalIndex()
+    {
+        return nodeOffset + treeIndexOfKey(node, position);
+    }
+
+    int globalLeafIndex()
+    {
+        return nodeOffset + treeIndexOfLeafKey(position);
+    }
+
+    int globalBranchIndex()
+    {
+        return nodeOffset + treeIndexOfBranchKey(node, position);
+    }
+
+    K value()
+    {
+        return (K) node[position];
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/Path.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/Path.java b/src/java/org/apache/cassandra/utils/btree/Path.java
deleted file mode 100644
index 9b6789c..0000000
--- a/src/java/org/apache/cassandra/utils/btree/Path.java
+++ /dev/null
@@ -1,354 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.cassandra.utils.btree;
-
-import java.util.Comparator;
-
-import static org.apache.cassandra.utils.btree.BTree.NEGATIVE_INFINITY;
-import static org.apache.cassandra.utils.btree.BTree.POSITIVE_INFINITY;
-import static org.apache.cassandra.utils.btree.BTree.getBranchKeyEnd;
-import static org.apache.cassandra.utils.btree.BTree.getKeyEnd;
-import static org.apache.cassandra.utils.btree.BTree.getLeafKeyEnd;
-import static org.apache.cassandra.utils.btree.BTree.isLeaf;
-
-/**
- * An internal class for searching and iterating through a tree.  As it traverses the tree,
- * it adds the nodes visited to a stack.  This allows us to backtrack from a child node
- * to its parent.
- *
- * As we navigate the tree, we destructively modify this stack.
- *
- * Path is only intended to be used via Cursor.
- */
-public class Path<V>
-{
-    // operations corresponding to the ones in NavigableSet
-    static enum Op
-    {
-        CEIL,   // the least element greater than or equal to the given element
-        FLOOR,  // the greatest element less than or equal to the given element
-        HIGHER, // the least element strictly greater than the given element
-        LOWER   // the greatest element strictly less than the given element
-    }
-
-    // the path to the searched-for key
-    Object[][] path;
-    // the index within the node of our path at a given depth
-    byte[] indexes;
-    // current depth.  nothing in path[i] for i > depth is valid.
-    byte depth;
-
-    Path() { }
-    Path(int depth, Object[] btree)
-    {
-        this.path = new Object[depth][];
-        this.indexes = new byte[depth];
-        this.path[0] = btree;
-    }
-
-    void init(Object[] btree)
-    {
-        int depth = BTree.depth(btree);
-        if (path == null || path.length < depth)
-        {
-            path = new Object[depth][];
-            indexes = new byte[depth];
-        }
-        path[0] = btree;
-    }
-
-    void moveEnd(Object[] node, boolean forwards)
-    {
-        push(node, getKeyEnd(node));
-        if (!forwards)
-            predecessor();
-    }
-
-    void moveStart(Object[] node, boolean forwards)
-    {
-        push(node, -1);
-        if (forwards)
-            successor();
-    }
-
-    /**
-     * Find the provided key in the tree rooted at node, and store the root to it in the path
-     *
-     * @param comparator the comparator defining the order on the tree
-     * @param target     the key to search for
-     * @param mode       the type of search to perform
-     * @param forwards   if the path should be setup for forward or backward iteration
-     * @param <K>
-     */
-    <K> boolean find(Comparator<K> comparator, Object target, Op mode, boolean forwards)
-    {
-        // TODO : should not require parameter 'forwards' - consider modifying index to represent both
-        // child and key position, as opposed to just key position (which necessitates a different value depending
-        // on which direction you're moving in. Prerequisite for making Path public and using to implement general
-        // search
-
-        Object[] node = path[depth];
-        int lb = forwards ? indexes[depth] : 0;
-        pop();
-
-        if (target instanceof BTree.Special)
-        {
-            if (target == POSITIVE_INFINITY)
-                moveEnd(node, forwards);
-            else if (target == NEGATIVE_INFINITY)
-                moveStart(node, forwards);
-            else
-                throw new AssertionError();
-            return false;
-        }
-
-        while (true)
-        {
-            int keyEnd = getKeyEnd(node);
-
-            // search for the target in the current node
-            int i = BTree.find(comparator, target, node, lb, keyEnd);
-            lb = 0;
-            if (i >= 0)
-            {
-                // exact match. transform exclusive bounds into the correct index by moving back or forwards one
-                push(node, i);
-                switch (mode)
-                {
-                    case HIGHER:
-                        successor();
-                        break;
-                    case LOWER:
-                        predecessor();
-                }
-                return true;
-            }
-            i = -i - 1;
-
-            // traverse into the appropriate child
-            if (!isLeaf(node))
-            {
-                push(node, forwards ? i - 1 : i);
-                node = (Object[]) node[keyEnd + i];
-                continue;
-            }
-
-            // bottom of the tree and still not found.  pick the right index to satisfy Op
-            switch (mode)
-            {
-                case FLOOR:
-                case LOWER:
-                    i--;
-            }
-
-            if (i < 0)
-            {
-                push(node, 0);
-                predecessor();
-            }
-            else if (i >= keyEnd)
-            {
-                push(node, keyEnd - 1);
-                successor();
-            }
-            else
-            {
-                push(node, i);
-            }
-
-            return false;
-        }
-    }
-
-    boolean isRoot()
-    {
-        return depth == 0;
-    }
-
-    void pop()
-    {
-        depth--;
-    }
-
-    Object[] currentNode()
-    {
-        return path[depth];
-    }
-
-    byte currentIndex()
-    {
-        return indexes[depth];
-    }
-
-    void push(Object[] node, int index)
-    {
-        path[++depth] = node;
-        indexes[depth] = (byte) index;
-    }
-
-    void setIndex(int index)
-    {
-        indexes[depth] = (byte) index;
-    }
-
-    byte findSuccessorParentDepth()
-    {
-        byte depth = this.depth;
-        depth--;
-        while (depth >= 0)
-        {
-            int ub = indexes[depth] + 1;
-            Object[] node = path[depth];
-            if (ub < getBranchKeyEnd(node))
-                return depth;
-            depth--;
-        }
-        return -1;
-    }
-
-    byte findPredecessorParentDepth()
-    {
-        byte depth = this.depth;
-        depth--;
-        while (depth >= 0)
-        {
-            int ub = indexes[depth] - 1;
-            if (ub >= 0)
-                return depth;
-            depth--;
-        }
-        return -1;
-    }
-
-    // move to the next key in the tree
-    void successor()
-    {
-        Object[] node = currentNode();
-        int i = currentIndex();
-
-        if (!isLeaf(node))
-        {
-            // if we're on a key in a branch, we MUST have a descendant either side of us,
-            // so we always go down the left-most child until we hit a leaf
-            node = (Object[]) node[getBranchKeyEnd(node) + i + 1];
-            while (!isLeaf(node))
-            {
-                push(node, -1);
-                node = (Object[]) node[getBranchKeyEnd(node)];
-            }
-            push(node, 0);
-            return;
-        }
-
-        // if we haven't reached the end of this leaf, just increment our index and return
-        i += 1;
-        if (i < getLeafKeyEnd(node))
-        {
-            // moved to the next key in the same leaf
-            setIndex(i);
-            return;
-        }
-
-        // we've reached the end of this leaf,
-        // so go up until we reach something we've not finished visiting
-        while (!isRoot())
-        {
-            pop();
-            i = currentIndex() + 1;
-            node = currentNode();
-            if (i < getKeyEnd(node))
-            {
-                setIndex(i);
-                return;
-            }
-        }
-
-        // we've visited the last key in the root node, so we're done
-        setIndex(getKeyEnd(node));
-    }
-
-    // move to the previous key in the tree
-    void predecessor()
-    {
-        Object[] node = currentNode();
-        int i = currentIndex();
-
-        if (!isLeaf(node))
-        {
-            // if we're on a key in a branch, we MUST have a descendant either side of us
-            // so we always go down the right-most child until we hit a leaf
-            node = (Object[]) node[getBranchKeyEnd(node) + i];
-            while (!isLeaf(node))
-            {
-                i = getBranchKeyEnd(node);
-                push(node, i);
-                node = (Object[]) node[i * 2];
-            }
-            push(node, getLeafKeyEnd(node) - 1);
-            return;
-        }
-
-        // if we haven't reached the beginning of this leaf, just decrement our index and return
-        i -= 1;
-        if (i >= 0)
-        {
-            setIndex(i);
-            return;
-        }
-
-        // we've reached the beginning of this leaf,
-        // so go up until we reach something we've not finished visiting
-        while (!isRoot())
-        {
-            pop();
-            i = currentIndex() - 1;
-            if (i >= 0)
-            {
-                setIndex(i);
-                return;
-            }
-        }
-
-        // we've visited the last key in the root node, so we're done
-        setIndex(-1);
-    }
-
-    Object currentKey()
-    {
-        return currentNode()[currentIndex()];
-    }
-
-    int compareTo(Path<V> that, boolean forwards)
-    {
-        int d = Math.min(this.depth, that.depth);
-        for (int i = 0; i <= d; i++)
-        {
-            int c = this.indexes[i] - that.indexes[i];
-            if (c != 0)
-                return c;
-        }
-        // identical indices up to depth, so if somebody is lower depth they are on a later item if iterating forwards
-        // and an earlier item if iterating backwards, as the node at max common depth must be a branch if they are
-        // different depths, and branches that are currently descended into lag the child index they are in when iterating forwards,
-        // i.e. if they are in child 0 they record an index of -1 forwards, or 0 when backwards
-        d = this.depth - that.depth;
-        return forwards ? d : -d;
-    }
-}
-

http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java b/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java
new file mode 100644
index 0000000..cb3ddc5
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.cassandra.utils.btree;
+
+import java.util.Comparator;
+
+import static org.apache.cassandra.utils.btree.BTree.EMPTY_LEAF;
+import static org.apache.cassandra.utils.btree.BTree.FAN_SHIFT;
+import static org.apache.cassandra.utils.btree.BTree.POSITIVE_INFINITY;
+
+/**
+ * A class for constructing a new BTree, either from an existing one and some set of modifications
+ * or a new tree from a sorted collection of items.
+ * <p/>
+ * This is a fairly heavy-weight object, so a ThreadLocal instance is created for making modifications to a tree
+ */
+final class TreeBuilder
+{
+    private final NodeBuilder rootBuilder = new NodeBuilder();
+
+    /**
+     * At the highest level, we adhere to the classic b-tree insertion algorithm:
+     *
+     * 1. Add to the appropriate leaf
+     * 2. Split the leaf if necessary, add the median to the parent
+     * 3. Split the parent if necessary, etc.
+     *
+     * There is one important difference: we don't actually modify the original tree, but copy each node that we
+     * modify.  Note that every node on the path to the key being inserted or updated will be modified; this
+     * implies that at a minimum, the root node will be modified for every update, so every root is a "snapshot"
+     * of a tree that can be iterated or sliced without fear of concurrent modifications.
+     *
+     * The NodeBuilder class handles the details of buffering the copied contents of the original tree and
+     * adding in our changes.  Since NodeBuilder maintains parent/child references, it also handles parent-splitting
+     * (easy enough, since any node affected by the split will already be copied into a NodeBuilder).
+     *
+     * One other difference from the simple algorithm is that we perform modifications in bulk;
+     * we assume @param source has been sorted, e.g. by BTree.update, so the update of each key resumes where
+     * the previous left off.
+     */
+    public <C, K extends C, V extends C> Object[] update(Object[] btree, Comparator<C> comparator, Iterable<K> source, UpdateFunction<K, V> updateF)
+    {
+        assert updateF != null;
+
+        NodeBuilder current = rootBuilder;
+        current.reset(btree, POSITIVE_INFINITY, updateF, comparator);
+
+        for (K key : source)
+        {
+            while (true)
+            {
+                if (updateF.abortEarly())
+                {
+                    rootBuilder.clear();
+                    return null;
+                }
+                NodeBuilder next = current.update(key);
+                if (next == null)
+                    break;
+                // we were in a subtree from a previous key that didn't contain this new key;
+                // retry against the correct subtree
+                current = next;
+            }
+        }
+
+        // finish copying any remaining keys from the original btree
+        while (true)
+        {
+            NodeBuilder next = current.finish();
+            if (next == null)
+                break;
+            current = next;
+        }
+
+        // updating with POSITIVE_INFINITY means that current should be back to the root
+        assert current.isRoot();
+
+        Object[] r = current.toNode();
+        current.clear();
+        return r;
+    }
+
+    public <C, K extends C, V extends C> Object[] build(Iterable<K> source, UpdateFunction<K, V> updateF, int size)
+    {
+        assert updateF != null;
+
+        NodeBuilder current = rootBuilder;
+        // we descend only to avoid wasting memory; in update() we will often descend into existing trees
+        // so here we want to descend also, so we don't have lg max(N) depth in both directions
+        while ((size >>= FAN_SHIFT) > 0)
+            current = current.ensureChild();
+
+        current.reset(EMPTY_LEAF, POSITIVE_INFINITY, updateF, null);
+        for (K key : source)
+            current.addNewKey(updateF.apply(key));
+
+        current = current.ascendToRoot();
+
+        Object[] r = current.toNode();
+        current.clear();
+        return r;
+    }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/TreeCursor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/TreeCursor.java b/src/java/org/apache/cassandra/utils/btree/TreeCursor.java
new file mode 100644
index 0000000..9e946f9
--- /dev/null
+++ b/src/java/org/apache/cassandra/utils/btree/TreeCursor.java
@@ -0,0 +1,272 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.cassandra.utils.btree;
+
+import java.util.Arrays;
+import java.util.Comparator;
+
+import static org.apache.cassandra.utils.btree.BTree.*;
+
+/**
+ * Supports two basic operations for moving around a BTree, either forwards or backwards:
+ * moveOne(), and seekTo()
+ *
+ * These two methods, along with movement to the start/end, permit us to construct any desired
+ * movement around a btree, without much cognitive burden.
+ *
+ * This TreeCursor is (and has its methods) package private. This is to avoid polluting the BTreeSearchIterator
+ * that extends it, and uses its functionality. If this class is useful for wider consumption, a public extension
+ * class can be provided, that just makes all of its methods public.
+ */
+class TreeCursor<K> extends NodeCursor<K>
+{
+    // TODO: spend some time optimising compiler inlining decisions: many of these methods have only one primary call-site
+
+    NodeCursor<K> cur;
+
+    TreeCursor(Comparator<? super K> comparator, Object[] node)
+    {
+        super(node, null, comparator);
+    }
+
+    /**
+     * Move the cursor to either the first or last item in the btree
+     * @param start true if should move to the first item; false moves to the last
+     */
+    void reset(boolean start)
+    {
+        cur = root();
+        root().inChild = false;
+        // this is a corrupt position, but we ensure we never use it except to start our search from
+        root().position = start ? -1 : getKeyEnd(root().node);
+    }
+
+    /**
+     * move the Cursor one item, either forwards or backwards
+     * @param forwards direction of travel
+     * @return false iff the cursor is exhausted in the direction of travel
+     */
+    int moveOne(boolean forwards)
+    {
+        NodeCursor<K> cur = this.cur;
+        if (cur.isLeaf())
+        {
+            // if we're a leaf, we try to step forwards inside ourselves
+            if (cur.advanceLeafNode(forwards))
+                return cur.globalLeafIndex();
+
+            // if we fail, we just find our bounding parent
+            this.cur = cur = moveOutOfLeaf(forwards, cur, root());
+            return cur.globalIndex();
+        }
+
+        // otherwise we descend directly into our next child
+        if (forwards)
+            ++cur.position;
+        cur = cur.descend();
+
+        // and go to its first item
+        NodeCursor<K> next;
+        while ( null != (next = cur.descendToFirstChild(forwards)) )
+            cur = next;
+
+        this.cur = cur;
+        return cur.globalLeafIndex();
+    }
+
+    /**
+     * seeks from the current position, forwards or backwards, for the provided key
+     * while the direction could be inferred (or ignored), it is required so that (e.g.) we do not infinitely loop on bad inputs
+     * if there is no such key, it moves to the key that would naturally proceed it (i.e. it behaves as ceil when ascending; floor when descending)
+     */
+    int seekTo(K key, boolean forwards, boolean skipOne)
+    {
+        NodeCursor<K> cur = this.cur;
+
+        /**
+         * decide if we will "try one" value by itself, as a sequential access;
+         * we actually *require* that we try the "current key" for any node before we call seekInNode on it.
+         *
+         * if we are already on a value, we just check it irregardless of if it is a leaf or not;
+         * if we are not, we have already excluded it (as we have consumed it), so:
+         *    if we are on a branch we consider that good enough;
+         *    otherwise, we move onwards one, and we try the new value
+         *
+         */
+        boolean tryOne = !skipOne;
+        if ((!tryOne & cur.isLeaf()) && !(tryOne = (cur.advanceLeafNode(forwards) || (cur = moveOutOfLeaf(forwards, cur, null)) != null)))
+        {
+            // we moved out of the tree; return out-of-bounds
+            this.cur = root();
+            return forwards ? -1 - size(rootNode()) : -1;
+        }
+
+        if (tryOne)
+        {
+            // we're presently on a value we can (and *must*) cheaply test
+            K test = cur.value();
+
+            int cmp;
+            if (key == test) cmp = 0; // check object identity first, since we utilise that in some places and it's very cheap
+            else cmp = comparator.compare(key, test);
+            if (forwards ? cmp <= 0 : cmp >= 0)
+            {
+                // we've either matched, or excluded the value from being present
+                int index = cur.globalIndex();
+                this.cur = cur;
+                return cmp == 0 ? index : -1 -index;
+            }
+        }
+
+        // if we failed to match with the cheap test, first look to see if we're even in the correct sub-tree
+        while (cur != root())
+        {
+            NodeCursor<K> bound = cur.boundIterator(forwards);
+            if (bound == null)
+                break; // we're all that's left
+
+            int cmpbound = comparator.compare(key, bound.bound(forwards));
+            if (forwards ? cmpbound < 0 : cmpbound > 0)
+                break; //  already in correct sub-tree
+
+            // bound is on-or-before target, so ascend to that bound and continue looking upwards
+            cur = bound;
+            cur.safeAdvanceIntoBranchFromChild(forwards);
+            if (cmpbound == 0) // it was an exact match, so terminate here
+            {
+                this.cur = cur;
+                return cur.globalBranchIndex();
+            }
+        }
+
+        // we must now be able to find our target in the sub-tree rooted at cur
+        boolean match;
+        while (!(match = cur.seekInNode(key, forwards)) && !cur.isLeaf())
+        {
+            cur = cur.descend();
+            cur.position = forwards ? -1 : getKeyEnd(cur.node);
+        }
+
+        if (!match)
+            cur = ensureValidLocation(forwards, cur);
+
+        this.cur = cur;
+        assert !cur.inChild;
+        int index = cur.globalIndex();
+        return match ? index : -1 -index;
+    }
+
+    /**
+     * ensures a leaf node we have seeked in, is not positioned outside of its bounds,
+     * by moving us into its parents (if any); if it is the root, we're permitted to be out-of-bounds
+     * as this indicates exhaustion
+     */
+    private NodeCursor<K> ensureValidLocation(boolean forwards, NodeCursor<K> cur)
+    {
+        assert cur.isLeaf();
+        int position = cur.position;
+        // if we're out of bounds of the leaf, move once in direction of travel
+        if ((position < 0) | (position >= getLeafKeyEnd(cur.node)))
+            cur = moveOutOfLeaf(forwards, cur, root());
+        return cur;
+    }
+
+    /**
+     * move out of a leaf node that is currently out of (its own) bounds
+     * @return null if we're now out-of-bounds of the whole true
+     */
+    private <K> NodeCursor<K> moveOutOfLeaf(boolean forwards, NodeCursor<K> cur, NodeCursor<K> ifFail)
+    {
+        while (true)
+        {
+            cur = cur.parent;
+            if (cur == null)
+            {
+                root().inChild = false;
+                return ifFail;
+            }
+            if (cur.advanceIntoBranchFromChild(forwards))
+                break;
+        }
+        cur.inChild = false;
+        return cur;
+    }
+
+    /**
+     * resets the cursor and seeks to the specified position; does not assume locality or take advantage of the cursor's current position
+     */
+    void seekTo(int index)
+    {
+        if ((index < 0) | (index >= BTree.size(rootNode())))
+        {
+            if ((index < -1) | (index > BTree.size(rootNode())))
+                throw new IndexOutOfBoundsException(index + " not in range [0.." + BTree.size(rootNode()) + ")");
+            reset(index == -1);
+            return;
+        }
+
+        NodeCursor<K> cur = this.cur;
+        cur = root();
+        assert cur.nodeOffset == 0;
+        while (true)
+        {
+            int relativeIndex = index - cur.nodeOffset; // index within subtree rooted at cur
+            Object[] node = cur.node;
+
+            if (cur.isLeaf())
+            {
+                assert relativeIndex < getLeafKeyEnd(node);
+                cur.position = relativeIndex;
+                this.cur = cur;
+                return;
+            }
+
+            int[] sizeMap = getSizeMap(node);
+            int boundary = Arrays.binarySearch(sizeMap, relativeIndex);
+            if (boundary >= 0)
+            {
+                // exact match, in this branch node
+                assert boundary < sizeMap.length - 1;
+                cur.position = boundary;
+                cur.inChild = false;
+                this.cur = cur;
+                return;
+            }
+
+            cur.inChild = true;
+            cur.position = -1 -boundary;
+            cur = cur.descend();
+        }
+    }
+
+    private NodeCursor<K> root()
+    {
+        return this;
+    }
+
+    Object[] rootNode()
+    {
+        return this.node;
+    }
+
+    K currentValue()
+    {
+        return cur.value();
+    }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java b/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java
index 93c02ae..9a7fa1b 100644
--- a/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java
+++ b/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java
@@ -42,4 +42,30 @@ public interface UpdateFunction<K, V> extends Function<K, V>
      */
     void allocated(long heapSize);
 
+    static final UpdateFunction<Object, Object> noOp = new UpdateFunction<Object, Object>()
+    {
+        public Object apply(Object replacing, Object updating)
+        {
+            return updating;
+        }
+
+        public boolean abortEarly()
+        {
+            return false;
+        }
+
+        public void allocated(long heapSize)
+        {
+        }
+
+        public Object apply(Object k)
+        {
+            return k;
+        }
+    };
+
+    public static <K> UpdateFunction<K, K> noOp()
+    {
+        return (UpdateFunction<K, K>) noOp;
+    }
 }


Mime
View raw message