ignite-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From agoncha...@apache.org
Subject [32/47] ignite git commit: IGNITE-5267 - Moved ignite-ps module to ignite-core
Date Sun, 11 Jun 2017 20:03:58 GMT
http://git-wip-us.apache.org/repos/asf/ignite/blob/6bf5ce46/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java
----------------------------------------------------------------------
diff --git a/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java
new file mode 100644
index 0000000..74d251a
--- /dev/null
+++ b/modules/core/src/main/java/org/apache/ignite/internal/processors/cache/persistence/tree/BPlusTree.java
@@ -0,0 +1,4808 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.processors.cache.persistence.tree;
+
+import java.io.Externalizable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
+import org.apache.ignite.IgniteCheckedException;
+import org.apache.ignite.IgniteException;
+import org.apache.ignite.internal.IgniteInterruptedCheckedException;
+import org.apache.ignite.internal.pagemem.PageIdUtils;
+import org.apache.ignite.internal.pagemem.PageMemory;
+import org.apache.ignite.internal.pagemem.wal.IgniteWriteAheadLogManager;
+import org.apache.ignite.internal.pagemem.wal.record.delta.FixCountRecord;
+import org.apache.ignite.internal.pagemem.wal.record.delta.FixLeftmostChildRecord;
+import org.apache.ignite.internal.pagemem.wal.record.delta.FixRemoveId;
+import org.apache.ignite.internal.pagemem.wal.record.delta.InsertRecord;
+import org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageAddRootRecord;
+import org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageCutRootRecord;
+import org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageInitRootInlineRecord;
+import org.apache.ignite.internal.pagemem.wal.record.delta.NewRootInitRecord;
+import org.apache.ignite.internal.pagemem.wal.record.delta.RemoveRecord;
+import org.apache.ignite.internal.pagemem.wal.record.delta.ReplaceRecord;
+import org.apache.ignite.internal.pagemem.wal.record.delta.SplitExistingPageRecord;
+import org.apache.ignite.internal.processors.cache.persistence.DataStructure;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.BPlusIO;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.BPlusInnerIO;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.BPlusLeafIO;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.BPlusMetaIO;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.IOVersions;
+import org.apache.ignite.internal.processors.cache.persistence.tree.io.PageIO;
+import org.apache.ignite.internal.processors.cache.persistence.tree.reuse.ReuseBag;
+import org.apache.ignite.internal.processors.cache.persistence.tree.reuse.ReuseList;
+import org.apache.ignite.internal.processors.cache.persistence.tree.util.PageHandler;
+import org.apache.ignite.internal.util.GridArrays;
+import org.apache.ignite.internal.util.GridLongList;
+import org.apache.ignite.internal.util.IgniteTree;
+import org.apache.ignite.internal.util.lang.GridCursor;
+import org.apache.ignite.internal.util.lang.GridTreePrinter;
+import org.apache.ignite.internal.util.typedef.F;
+import org.apache.ignite.internal.util.typedef.internal.S;
+import org.apache.ignite.internal.util.typedef.internal.SB;
+import org.apache.ignite.internal.util.typedef.internal.U;
+import org.apache.ignite.lang.IgniteInClosure;
+
+import static org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.Bool.DONE;
+import static org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.Bool.FALSE;
+import static org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.Bool.READY;
+import static org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.Bool.TRUE;
+import static org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.Result.FOUND;
+import static org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.Result.GO_DOWN;
+import static org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.Result.GO_DOWN_X;
+import static org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.Result.NOT_FOUND;
+import static org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.Result.RETRY;
+import static org.apache.ignite.internal.processors.cache.persistence.tree.BPlusTree.Result.RETRY_ROOT;
+
+/**
+ * Abstract B+Tree.
+ */
+@SuppressWarnings({"RedundantThrowsDeclaration", "ConstantValueVariableUse"})
+public abstract class BPlusTree<L, T extends L> extends DataStructure implements IgniteTree<L, T> {
+    /** */
+    private static final Object[] EMPTY = {};
+
+    /** */
+    private static volatile boolean interrupted;
+
+    /** */
+    private final AtomicBoolean destroyed = new AtomicBoolean(false);
+
+    /** */
+    private final String name;
+
+    /** */
+    private final float minFill;
+
+    /** */
+    private final float maxFill;
+
+    /** */
+    protected final long metaPageId;
+
+    /** */
+    private boolean canGetRowFromInner;
+
+    /** */
+    private IOVersions<? extends BPlusInnerIO<L>> innerIos;
+
+    /** */
+    private IOVersions<? extends BPlusLeafIO<L>> leafIos;
+
+    /** */
+    private final AtomicLong globalRmvId;
+
+    /** */
+    private volatile TreeMetaData treeMeta;
+
+    /** */
+    private final GridTreePrinter<Long> treePrinter = new GridTreePrinter<Long>() {
+        /** */
+        private boolean keys = true;
+
+        @Override protected List<Long> getChildren(final Long pageId) {
+            if (pageId == null || pageId == 0L)
+                return null;
+
+            try {
+                long page = acquirePage(pageId);
+
+                try {
+                    long pageAddr = readLock(pageId, page); // No correctness guaranties.
+
+                    try {
+                        BPlusIO io = io(pageAddr);
+
+                        if (io.isLeaf())
+                            return null;
+
+                        int cnt = io.getCount(pageAddr);
+
+                        assert cnt >= 0 : cnt;
+
+                        List<Long> res;
+
+                        if (cnt > 0) {
+                            res = new ArrayList<>(cnt + 1);
+
+                            for (int i = 0; i < cnt; i++)
+                                res.add(inner(io).getLeft(pageAddr, i));
+
+                            res.add(inner(io).getRight(pageAddr, cnt - 1));
+                        }
+                        else {
+                            long left = inner(io).getLeft(pageAddr, 0);
+
+                            res = left == 0 ? Collections.<Long>emptyList() : Collections.singletonList(left);
+                        }
+
+                        return res;
+                    }
+                    finally {
+                        readUnlock(pageId, page, pageAddr);
+                    }
+                }
+                finally {
+                    releasePage(pageId, page);
+                }
+            }
+            catch (IgniteCheckedException ignored) {
+                throw new AssertionError("Can not acquire page.");
+            }
+        }
+
+        @Override protected String formatTreeNode(final Long pageId) {
+            if (pageId == null)
+                return ">NPE<";
+
+            if (pageId == 0L)
+                return "<Zero>";
+
+            try {
+                long page = acquirePage(pageId);
+                try {
+                    long pageAddr = readLock(pageId, page); // No correctness guaranties.
+                    try {
+                        BPlusIO<L> io = io(pageAddr);
+
+                        return printPage(io, pageAddr, keys);
+                    }
+                    finally {
+                        readUnlock(pageId, page, pageAddr);
+                    }
+                }
+                finally {
+                    releasePage(pageId, page);
+                }
+            }
+            catch (IgniteCheckedException e) {
+                throw new IllegalStateException(e);
+            }
+        }
+    };
+
+    /** */
+    private final GetPageHandler<Get> askNeighbor = new AskNeighbor();
+
+    /**
+     *
+     */
+    private class AskNeighbor extends GetPageHandler<Get> {
+        /** {@inheritDoc} */
+        @Override public Result run0(long pageId, long page, long pageAddr, BPlusIO<L> io, Get g, int isBack) {
+            assert !io.isLeaf(); // Inner page.
+
+            boolean back = isBack == TRUE.ordinal();
+
+            long res = doAskNeighbor(io, pageAddr, back);
+
+            if (back) {
+                if (io.getForward(pageAddr) != g.backId) // See how g.backId is setup in removeDown for this check.
+                    return RETRY;
+
+                g.backId(res);
+            }
+            else {
+                assert isBack == FALSE.ordinal() : isBack;
+
+                g.fwdId(res);
+            }
+
+            return FOUND;
+        }
+    }
+
+    /** */
+    private final GetPageHandler<Get> search = new Search();
+
+    /**
+     *
+     */
+    private class Search extends GetPageHandler<Get> {
+        /** {@inheritDoc} */
+        @Override public Result run0(long pageId, long page, long pageAddr, BPlusIO<L> io, Get g, int lvl)
+            throws IgniteCheckedException {
+            // Check the triangle invariant.
+            if (io.getForward(pageAddr) != g.fwdId)
+                return RETRY;
+
+            boolean needBackIfRouting = g.backId != 0;
+
+            g.backId(0L); // Usually we'll go left down and don't need it.
+
+            int cnt = io.getCount(pageAddr);
+
+            int idx;
+
+            if (g.findLast)
+                idx = io.isLeaf() ? cnt - 1 : -cnt - 1; // (-cnt - 1) mimics not_found result of findInsertionPoint
+                // in case of cnt = 0 we end up in 'not found' branch below with idx being 0 after fix() adjustment
+            else
+                idx = findInsertionPoint(lvl, io, pageAddr, 0, cnt, g.row, g.shift);
+
+            boolean found = idx >= 0;
+
+            if (found) { // Found exact match.
+                assert g.getClass() != GetCursor.class;
+
+                if (g.found(io, pageAddr, idx, lvl))
+                    return FOUND;
+
+                // Else we need to reach leaf page, go left down.
+            }
+            else {
+                idx = fix(idx);
+
+                if (g.notFound(io, pageAddr, idx, lvl)) // No way down, stop here.
+                    return NOT_FOUND;
+            }
+
+            assert !io.isLeaf() : io;
+
+            // If idx == cnt then we go right down, else left down: getLeft(cnt) == getRight(cnt - 1).
+            g.pageId(inner(io).getLeft(pageAddr, idx));
+
+            // If we see the tree in consistent state, then our right down page must be forward for our left down page,
+            // we need to setup fwdId and/or backId to be able to check this invariant on lower level.
+            if (idx < cnt) {
+                // Go left down here.
+                g.fwdId(inner(io).getRight(pageAddr, idx));
+            }
+            else {
+                // Go right down here or it is an empty branch.
+                assert idx == cnt;
+
+                // Here child's forward is unknown to us (we either go right or it is an empty "routing" page),
+                // need to ask our forward about the child's forward (it must be leftmost child of our forward page).
+                // This is ok from the locking standpoint because we take all locks in the forward direction.
+                long fwdId = io.getForward(pageAddr);
+
+                // Setup fwdId.
+                if (fwdId == 0)
+                    g.fwdId(0L);
+                else {
+                    // We can do askNeighbor on forward page here because we always take locks in forward direction.
+                    Result res = askNeighbor(fwdId, g, false);
+
+                    if (res != FOUND)
+                        return res; // Retry.
+                }
+
+                // Setup backId.
+                if (cnt != 0) // It is not a routing page and we are going to the right, can get backId here.
+                    g.backId(inner(io).getLeft(pageAddr, cnt - 1));
+                else if (needBackIfRouting) {
+                    // Can't get backId here because of possible deadlock and it is only needed for remove operation.
+                    return GO_DOWN_X;
+                }
+            }
+
+            return GO_DOWN;
+        }
+    }
+
+    /** */
+    private final GetPageHandler<Put> replace = new Replace();
+
+    /**
+     *
+     */
+    private class Replace extends GetPageHandler<Put> {
+        /** {@inheritDoc} */
+        @SuppressWarnings("unchecked")
+        @Override public Result run0(long pageId, long page, long pageAddr, BPlusIO<L> io, Put p, int lvl)
+            throws IgniteCheckedException  {
+            // Check the triangle invariant.
+            if (io.getForward(pageAddr) != p.fwdId)
+                return RETRY;
+
+            assert p.btmLvl == 0 : "split is impossible with replace";
+
+            final int cnt = io.getCount(pageAddr);
+            final int idx = findInsertionPoint(lvl, io, pageAddr, 0, cnt, p.row, 0);
+
+            if (idx < 0) // Not found, split or merge happened.
+                return RETRY;
+
+            // Replace link at idx with new one.
+            // Need to read link here because `p.finish()` will clear row.
+            L newRow = p.row;
+
+            // Detach the old row if we are on a leaf page.
+            if (lvl == 0) {
+                assert p.oldRow == null; // The old row must be set only once.
+
+                // Inner replace state must be consistent by the end of the operation.
+                assert p.needReplaceInner == FALSE || p.needReplaceInner == DONE : p.needReplaceInner;
+
+                // Need to replace inner key if now we are replacing the rightmost row and have a forward page.
+                if (canGetRowFromInner && idx + 1 == cnt && p.fwdId != 0L && p.needReplaceInner == FALSE) {
+                    // Can happen only for invoke, otherwise inner key must be replaced on the way down.
+                    assert p.invoke != null;
+
+                    // We need to restart the operation from root to perform inner replace.
+                    // On the second pass we will not get here (will avoid infinite loop) because
+                    // needReplaceInner will be DONE or our key will not be the rightmost anymore.
+                    return RETRY_ROOT;
+                }
+
+                // Get old row in leaf page to reduce contention at upper level.
+                p.oldRow = p.needOld ? getRow(io, pageAddr, idx) : (T)Boolean.TRUE;
+
+                p.finish();
+            }
+
+            boolean needWal = needWalDeltaRecord(pageId, page, null);
+
+            byte[] newRowBytes = io.store(pageAddr, idx, newRow, null, needWal);
+
+            if (needWal)
+                wal.log(new ReplaceRecord<>(cacheId, pageId, io, newRowBytes, idx));
+
+            return FOUND;
+        }
+    }
+
+    /** */
+    private final GetPageHandler<Put> insert = new Insert();
+
+    /**
+     *
+     */
+    private class Insert extends GetPageHandler<Put> {
+        /** {@inheritDoc} */
+        @Override public Result run0(long pageId, long page, long pageAddr, BPlusIO<L> io, Put p, int lvl)
+            throws IgniteCheckedException {
+            assert p.btmLvl == lvl : "we must always insert at the bottom level: " + p.btmLvl + " " + lvl;
+
+            // Check triangle invariant.
+            if (io.getForward(pageAddr) != p.fwdId)
+                return RETRY;
+
+            int cnt = io.getCount(pageAddr);
+            int idx = findInsertionPoint(lvl, io, pageAddr, 0, cnt, p.row, 0);
+
+            if (idx >= 0) // We do not support concurrent put of the same key.
+                throw new IllegalStateException("Duplicate row in index.");
+
+            idx = fix(idx);
+
+            // Do insert.
+            L moveUpRow = p.insert(pageId, page, pageAddr, io, idx, lvl);
+
+            // Check if split happened.
+            if (moveUpRow != null) {
+                p.btmLvl++; // Get high.
+                p.row = moveUpRow;
+
+                if (p.invoke != null)
+                    p.invoke.row = moveUpRow;
+
+                // Here forward page can't be concurrently removed because we keep write lock on tail which is the only
+                // page who knows about the forward page, because it was just produced by split.
+                p.rightId = io.getForward(pageAddr);
+                p.tail(pageId, page, pageAddr);
+
+                assert p.rightId != 0;
+            }
+            else
+                p.finish();
+
+            return FOUND;
+        }
+    }
+
+    /** */
+    private final GetPageHandler<Remove> rmvFromLeaf = new RemoveFromLeaf();
+
+    /**
+     *
+     */
+    private class RemoveFromLeaf extends GetPageHandler<Remove> {
+        /** {@inheritDoc} */
+        @Override public Result run0(long leafId, long leafPage, long leafAddr, BPlusIO<L> io, Remove r, int lvl)
+            throws IgniteCheckedException {
+            assert lvl == 0 : lvl; // Leaf.
+
+            // Check the triangle invariant.
+            if (io.getForward(leafAddr) != r.fwdId)
+                return RETRY;
+
+            final int cnt = io.getCount(leafAddr);
+
+            assert cnt <= Short.MAX_VALUE: cnt;
+
+            int idx = findInsertionPoint(lvl, io, leafAddr, 0, cnt, r.row, 0);
+
+            if (idx < 0)
+                return RETRY; // We've found exact match on search but now it's gone.
+
+            assert idx >= 0 && idx < cnt: idx;
+
+            // Need to do inner replace when we remove the rightmost element and the leaf have no forward page,
+            // i.e. it is not the rightmost leaf of the tree.
+            boolean needReplaceInner = canGetRowFromInner && idx == cnt - 1 && io.getForward(leafAddr) != 0;
+
+            // !!! Before modifying state we have to make sure that we will not go for retry.
+
+            // We may need to replace inner key or want to merge this leaf with sibling after the remove -> keep lock.
+            if (needReplaceInner ||
+                // We need to make sure that we have back or forward to be able to merge.
+                ((r.fwdId != 0 || r.backId != 0) && mayMerge(cnt - 1, io.getMaxCount(leafAddr, pageSize())))) {
+                // If we have backId then we've already locked back page, nothing to do here.
+                if (r.fwdId != 0 && r.backId == 0) {
+                    Result res = r.lockForward(0);
+
+                    if (res != FOUND) {
+                        assert r.tail == null;
+
+                        return res; // Retry.
+                    }
+
+                    assert r.tail != null; // We've just locked forward page.
+                }
+
+                // Retry must reset these fields when we release the whole branch without remove.
+                assert r.needReplaceInner == FALSE: "needReplaceInner";
+                assert r.needMergeEmptyBranch == FALSE: "needMergeEmptyBranch";
+
+                if (cnt == 1) // It was the last element on the leaf.
+                    r.needMergeEmptyBranch = TRUE;
+
+                if (needReplaceInner)
+                    r.needReplaceInner = TRUE;
+
+                Tail<L> t = r.addTail(leafId, leafPage, leafAddr, io, 0, Tail.EXACT);
+
+                t.idx = (short)idx;
+
+                // We will do the actual remove only when we made sure that
+                // we've locked the whole needed branch correctly.
+                return FOUND;
+            }
+
+            r.removeDataRowFromLeaf(leafId, leafPage, leafAddr, null, io, cnt, idx);
+
+            return FOUND;
+        }
+    }
+
+    /** */
+    private final GetPageHandler<Remove> lockBackAndRmvFromLeaf = new LockBackAndRmvFromLeaf();
+
+    /**
+     *
+     */
+    private class LockBackAndRmvFromLeaf extends GetPageHandler<Remove> {
+        /** {@inheritDoc} */
+        @Override protected Result run0(long backId, long backPage, long backAddr, BPlusIO<L> io, Remove r, int lvl)
+            throws IgniteCheckedException {
+            // Check that we have consistent view of the world.
+            if (io.getForward(backAddr) != r.pageId)
+                return RETRY;
+
+            // Correct locking order: from back to forward.
+            Result res = r.doRemoveFromLeaf();
+
+            // Keep locks on back and leaf pages for subsequent merges.
+            if (res == FOUND && r.tail != null)
+                r.addTail(backId, backPage, backAddr, io, lvl, Tail.BACK);
+
+            return res;
+        }
+    }
+
+    /** */
+    private final GetPageHandler<Remove> lockBackAndTail = new LockBackAndTail();
+
+    /**
+     *
+     */
+    private class LockBackAndTail extends GetPageHandler<Remove> {
+        /** {@inheritDoc} */
+        @Override public Result run0(long backId, long backPage, long backAddr, BPlusIO<L> io, Remove r, int lvl)
+            throws IgniteCheckedException {
+            // Check that we have consistent view of the world.
+            if (io.getForward(backAddr) != r.pageId)
+                return RETRY;
+
+            // Correct locking order: from back to forward.
+            Result res = r.doLockTail(lvl);
+
+            if (res == FOUND)
+                r.addTail(backId, backPage, backAddr, io, lvl, Tail.BACK);
+
+            return res;
+        }
+    }
+
+    /** */
+    private final GetPageHandler<Remove> lockTailForward = new LockTailForward();
+
+    /**
+     *
+     */
+    private class LockTailForward extends GetPageHandler<Remove> {
+        /** {@inheritDoc} */
+        @Override protected Result run0(long pageId, long page, long pageAddr, BPlusIO<L> io, Remove r, int lvl)
+            throws IgniteCheckedException {
+            r.addTail(pageId, page, pageAddr, io, lvl, Tail.FORWARD);
+
+            return FOUND;
+        }
+    }
+
+    /** */
+    private final GetPageHandler<Remove> lockTail = new LockTail();
+
+    /**
+     *
+     */
+    private class LockTail extends GetPageHandler<Remove> {
+        /** {@inheritDoc} */
+        @Override public Result run0(long pageId, long page, long pageAddr, BPlusIO<L> io, Remove r, int lvl)
+            throws IgniteCheckedException {
+            assert lvl > 0 : lvl; // We are not at the bottom.
+
+            // Check that we have a correct view of the world.
+            if (io.getForward(pageAddr) != r.fwdId)
+                return RETRY;
+
+            // We don't have a back page, need to lock our forward and become a back for it.
+            if (r.fwdId != 0 && r.backId == 0) {
+                Result res = r.lockForward(lvl);
+
+                if (res != FOUND)
+                    return res; // Retry.
+            }
+
+            r.addTail(pageId, page, pageAddr, io, lvl, Tail.EXACT);
+
+            return FOUND;
+        }
+    }
+
+    /** */
+    private final PageHandler<Void, Bool> cutRoot = new CutRoot();
+
+    /**
+     *
+     */
+    private class CutRoot extends PageHandler<Void, Bool> {
+        /** {@inheritDoc} */
+        @Override public Bool run(int cacheId, long metaId, long metaPage, long metaAddr, PageIO iox, Boolean walPlc, Void ignore, int lvl)
+            throws IgniteCheckedException {
+            // Safe cast because we should never recycle meta page until the tree is destroyed.
+            BPlusMetaIO io = (BPlusMetaIO)iox;
+
+            assert lvl == io.getRootLevel(metaAddr); // Can drop only root.
+
+            io.cutRoot(metaAddr, pageSize());
+
+            if (needWalDeltaRecord(metaId, metaPage, walPlc))
+                wal.log(new MetaPageCutRootRecord(cacheId, metaId));
+
+            int newLvl = lvl - 1;
+
+            assert io.getRootLevel(metaAddr) == newLvl;
+
+            treeMeta = new TreeMetaData(newLvl, io.getFirstPageId(metaAddr, newLvl));
+
+            return TRUE;
+        }
+    }
+
+    /** */
+    private final PageHandler<Long, Bool> addRoot = new AddRoot();
+
+    /**
+     *
+     */
+    private class AddRoot extends PageHandler<Long, Bool> {
+        /** {@inheritDoc} */
+        @Override public Bool run(int cacheId, long metaId, long metaPage, long pageAddr, PageIO iox, Boolean walPlc, Long rootPageId, int lvl)
+            throws IgniteCheckedException {
+            assert rootPageId != null;
+
+            // Safe cast because we should never recycle meta page until the tree is destroyed.
+            BPlusMetaIO io = (BPlusMetaIO)iox;
+
+            assert lvl == io.getLevelsCount(pageAddr);
+
+            io.addRoot(pageAddr, rootPageId, pageSize());
+
+            if (needWalDeltaRecord(metaId, metaPage, walPlc))
+                wal.log(new MetaPageAddRootRecord(cacheId, metaId, rootPageId));
+
+            assert io.getRootLevel(pageAddr) == lvl;
+            assert io.getFirstPageId(pageAddr, lvl) == rootPageId;
+
+            treeMeta = new TreeMetaData(lvl, rootPageId);
+
+            return TRUE;
+        }
+    }
+
+    /** */
+    private final PageHandler<Long, Bool> initRoot = new InitRoot();
+
+    /**
+     *
+     */
+    private class InitRoot extends PageHandler<Long, Bool> {
+        /** {@inheritDoc} */
+        @Override public Bool run(int cacheId, long metaId, long metaPage, long pageAddr, PageIO iox, Boolean walPlc, Long rootId, int inlineSize)
+            throws IgniteCheckedException {
+            assert rootId != null;
+
+            // Safe cast because we should never recycle meta page until the tree is destroyed.
+            BPlusMetaIO io = (BPlusMetaIO)iox;
+
+            io.initRoot(pageAddr, rootId, pageSize());
+            io.setInlineSize(pageAddr, inlineSize);
+
+            if (needWalDeltaRecord(metaId, metaPage, walPlc))
+                wal.log(new MetaPageInitRootInlineRecord(cacheId, metaId, rootId, inlineSize));
+
+            assert io.getRootLevel(pageAddr) == 0;
+            assert io.getFirstPageId(pageAddr, 0) == rootId;
+
+            treeMeta = new TreeMetaData(0, rootId);
+
+            return TRUE;
+        }
+    }
+
+    /**
+     * @param name Tree name.
+     * @param cacheId Cache ID.
+     * @param pageMem Page memory.
+     * @param wal Write ahead log manager.
+     * @param globalRmvId Remove ID.
+     * @param metaPageId Meta page ID.
+     * @param reuseList Reuse list.
+     * @param innerIos Inner IO versions.
+     * @param leafIos Leaf IO versions.
+     * @throws IgniteCheckedException If failed.
+     */
+    protected BPlusTree(
+        String name,
+        int cacheId,
+        PageMemory pageMem,
+        IgniteWriteAheadLogManager wal,
+        AtomicLong globalRmvId,
+        long metaPageId,
+        ReuseList reuseList,
+        IOVersions<? extends BPlusInnerIO<L>> innerIos,
+        IOVersions<? extends BPlusLeafIO<L>> leafIos
+    ) throws IgniteCheckedException {
+        this(name, cacheId, pageMem, wal, globalRmvId, metaPageId, reuseList);
+        setIos(innerIos, leafIos);
+    }
+
+    /**
+     * @param name Tree name.
+     * @param cacheId Cache ID.
+     * @param pageMem Page memory.
+     * @param wal Write ahead log manager.
+     * @param globalRmvId Remove ID.
+     * @param metaPageId Meta page ID.
+     * @param reuseList Reuse list.
+     * @throws IgniteCheckedException If failed.
+     */
+    protected BPlusTree(
+        String name,
+        int cacheId,
+        PageMemory pageMem,
+        IgniteWriteAheadLogManager wal,
+        AtomicLong globalRmvId,
+        long metaPageId,
+        ReuseList reuseList
+    ) throws IgniteCheckedException {
+        super(cacheId, pageMem, wal);
+
+        assert !F.isEmpty(name);
+
+        // TODO make configurable: 0 <= minFill <= maxFill <= 1
+        minFill = 0f; // Testing worst case when merge happens only on empty page.
+        maxFill = 0f; // Avoiding random effects on testing.
+
+        assert metaPageId != 0L;
+
+        this.metaPageId = metaPageId;
+        this.name = name;
+        this.reuseList = reuseList;
+        this.globalRmvId = globalRmvId;
+    }
+
+    /**
+     * @param innerIos Inner IO versions.
+     * @param leafIos Leaf IO versions.
+     */
+    protected void setIos(IOVersions<? extends BPlusInnerIO<L>> innerIos,
+        IOVersions<? extends BPlusLeafIO<L>> leafIos) {
+        assert innerIos != null;
+        assert leafIos != null;
+
+        this.canGetRowFromInner = innerIos.latest().canGetRow(); // TODO refactor
+        this.innerIos = innerIos;
+        this.leafIos = leafIos;
+    }
+
+    /**
+     * @return Tree name.
+     */
+    public final String getName() {
+        return name;
+    }
+
+    /**
+     * Initialize new tree.
+     *
+     * @param initNew {@code True} if new tree should be created.
+     * @throws IgniteCheckedException If failed.
+     */
+    protected final void initTree(boolean initNew) throws IgniteCheckedException {
+        initTree(initNew, 0);
+    }
+
+    /**
+     * Initialize new tree.
+     *
+     * @param initNew {@code True} if new tree should be created.
+     * @param inlineSize Inline size.
+     * @throws IgniteCheckedException If failed.
+     */
+    protected final void initTree(boolean initNew, int inlineSize) throws IgniteCheckedException {
+        if (initNew) {
+            // Allocate the first leaf page, it will be our root.
+            long rootId = allocatePage(null);
+
+            init(rootId, latestLeafIO());
+
+            // Initialize meta page with new root page.
+            Bool res = write(metaPageId, initRoot, BPlusMetaIO.VERSIONS.latest(), rootId, inlineSize, FALSE);
+
+            assert res == TRUE: res;
+
+            assert treeMeta != null;
+        }
+    }
+
+    /**
+     * @return Tree meta data.
+     * @throws IgniteCheckedException If failed.
+     */
+    private TreeMetaData treeMeta() throws IgniteCheckedException {
+        TreeMetaData meta0 = treeMeta;
+
+        if (meta0 != null)
+            return meta0;
+
+        final long metaPage = acquirePage(metaPageId);
+        try {
+            long pageAddr = readLock(metaPageId, metaPage); // Meta can't be removed.
+
+            assert pageAddr != 0 : "Failed to read lock meta page [metaPageId=" +
+                U.hexLong(metaPageId) + ']';
+
+            try {
+                BPlusMetaIO io = BPlusMetaIO.VERSIONS.forPage(pageAddr);
+
+                int rootLvl = io.getRootLevel(pageAddr);
+                long rootId = io.getFirstPageId(pageAddr, rootLvl);
+
+                treeMeta = meta0 = new TreeMetaData(rootLvl, rootId);
+            }
+            finally {
+                readUnlock(metaPageId, metaPage, pageAddr);
+            }
+        }
+        finally {
+            releasePage(metaPageId, metaPage);
+        }
+
+        return meta0;
+    }
+
+    /**
+     * @return Root level.
+     * @throws IgniteCheckedException If failed.
+     */
+    private int getRootLevel() throws IgniteCheckedException {
+        TreeMetaData meta0 = treeMeta();
+
+        assert meta0 != null;
+
+        return meta0.rootLvl;
+    }
+
+    /**
+     * @param metaId Meta page ID.
+     * @param metaPage Meta page pointer.
+     * @param lvl Level, if {@code 0} then it is a bottom level, if negative then root.
+     * @return Page ID.
+     */
+    private long getFirstPageId(long metaId, long metaPage, int lvl) {
+        long pageAddr = readLock(metaId, metaPage); // Meta can't be removed.
+
+        try {
+            BPlusMetaIO io = BPlusMetaIO.VERSIONS.forPage(pageAddr);
+
+            if (lvl < 0)
+                lvl = io.getRootLevel(pageAddr);
+
+            if (lvl >= io.getLevelsCount(pageAddr))
+                return 0;
+
+            return io.getFirstPageId(pageAddr, lvl);
+        }
+        finally {
+            readUnlock(metaId, metaPage, pageAddr);
+        }
+    }
+
+    /**
+     * @param upper Upper bound.
+     * @param x Implementation specific argument, {@code null} always means that we need to return full detached data row.
+     * @return Cursor.
+     * @throws IgniteCheckedException If failed.
+     */
+    private GridCursor<T> findLowerUnbounded(L upper, Object x) throws IgniteCheckedException {
+        ForwardCursor cursor = new ForwardCursor(null, upper, x);
+
+        long firstPageId;
+
+        long metaPage = acquirePage(metaPageId);
+        try  {
+            firstPageId = getFirstPageId(metaPageId, metaPage, 0); // Level 0 is always at the bottom.
+        }
+        finally {
+            releasePage(metaPageId, metaPage);
+        }
+
+        long firstPage = acquirePage(firstPageId);
+
+        try {
+            long pageAddr = readLock(firstPageId, firstPage); // We always merge pages backwards, the first page is never removed.
+
+            try {
+                cursor.init(pageAddr, io(pageAddr), 0);
+            }
+            finally {
+                readUnlock(firstPageId, firstPage, pageAddr);
+            }
+        }
+        finally {
+            releasePage(firstPageId, firstPage);
+        }
+
+        return cursor;
+    }
+
+    /**
+     * Check if the tree is getting destroyed.
+     */
+    private void checkDestroyed() {
+        if (destroyed.get())
+            throw new IllegalStateException("Tree is being concurrently destroyed: " + getName());
+    }
+
+    /**
+     * @param lower Lower bound inclusive or {@code null} if unbounded.
+     * @param upper Upper bound inclusive or {@code null} if unbounded.
+     * @return Cursor.
+     * @throws IgniteCheckedException If failed.
+     */
+    @Override public GridCursor<T> find(L lower, L upper) throws IgniteCheckedException {
+        return find(lower, upper, null);
+    }
+
+    /**
+     * @param lower Lower bound inclusive or {@code null} if unbounded.
+     * @param upper Upper bound inclusive or {@code null} if unbounded.
+     * @param x Implementation specific argument, {@code null} always means that we need to return full detached data row.
+     * @return Cursor.
+     * @throws IgniteCheckedException If failed.
+     */
+    public final GridCursor<T> find(L lower, L upper, Object x) throws IgniteCheckedException {
+        checkDestroyed();
+
+        try {
+            if (lower == null)
+                return findLowerUnbounded(upper, x);
+
+            ForwardCursor cursor = new ForwardCursor(lower, upper, x);
+
+            cursor.find();
+
+            return cursor;
+        }
+        catch (IgniteCheckedException e) {
+            throw new IgniteCheckedException("Runtime failure on bounds: [lower=" + lower + ", upper=" + upper + "]", e);
+        }
+        catch (RuntimeException e) {
+            throw new IgniteException("Runtime failure on bounds: [lower=" + lower + ", upper=" + upper + "]", e);
+        }
+        catch (AssertionError e) {
+            throw new AssertionError("Assertion error on bounds: [lower=" + lower + ", upper=" + upper + "]", e);
+        }
+        finally {
+            checkDestroyed();
+        }
+    }
+
+    /** {@inheritDoc} */
+    @Override public T findFirst() throws IgniteCheckedException {
+        checkDestroyed();
+
+        try {
+            long firstPageId;
+
+            long metaPage = acquirePage(metaPageId);
+            try {
+                firstPageId = getFirstPageId(metaPageId, metaPage, 0);
+            }
+            finally {
+                releasePage(metaPageId, metaPage);
+            }
+
+            long page = acquirePage(firstPageId);
+
+            try {
+                long pageAddr = readLock(firstPageId, page);
+
+                try {
+                    BPlusIO<L> io = io(pageAddr);
+
+                    int cnt = io.getCount(pageAddr);
+
+                    if (cnt == 0)
+                        return null;
+
+                    return getRow(io, pageAddr, 0);
+                }
+                finally {
+                    readUnlock(firstPageId, page, pageAddr);
+                }
+            }
+            finally {
+                releasePage(firstPageId, page);
+            }
+        }
+        catch (IgniteCheckedException e) {
+            throw new IgniteCheckedException("Runtime failure on first row lookup", e);
+        }
+        catch (RuntimeException e) {
+            throw new IgniteException("Runtime failure on first row lookup", e);
+        }
+        catch (AssertionError e) {
+            throw new AssertionError("Assertion error on first row lookup", e);
+        }
+        finally {
+            checkDestroyed();
+        }
+    }
+
+    /** {@inheritDoc} */
+    @SuppressWarnings("unchecked")
+    @Override public T findLast() throws IgniteCheckedException {
+        checkDestroyed();
+
+        try {
+            GetOne g = new GetOne(null, null, true);
+            doFind(g);
+
+            return (T)g.row;
+        }
+        catch (IgniteCheckedException e) {
+            throw new IgniteCheckedException("Runtime failure on last row lookup", e);
+        }
+        catch (RuntimeException e) {
+            throw new IgniteException("Runtime failure on last row lookup", e);
+        }
+        catch (AssertionError e) {
+            throw new AssertionError("Assertion error on last row lookup", e);
+        }
+        finally {
+            checkDestroyed();
+        }
+    }
+
+    /**
+     * @param row Lookup row for exact match.
+     * @param x Implementation specific argument, {@code null} always means that we need to return full detached data row.
+     * @return Found result or {@code null}.
+     * @throws IgniteCheckedException If failed.
+     */
+    @SuppressWarnings("unchecked")
+    public final <R> R findOne(L row, Object x) throws IgniteCheckedException {
+        checkDestroyed();
+
+        try {
+            GetOne g = new GetOne(row, x, false);
+
+            doFind(g);
+
+            return (R)g.row;
+        }
+        catch (IgniteCheckedException e) {
+            throw new IgniteCheckedException("Runtime failure on lookup row: " + row, e);
+        }
+        catch (RuntimeException e) {
+            throw new IgniteException("Runtime failure on lookup row: " + row, e);
+        }
+        catch (AssertionError e) {
+            throw new AssertionError("Assertion error on lookup row: " + row, e);
+        }
+        finally {
+            checkDestroyed();
+        }
+    }
+
+    /**
+     * @param row Lookup row for exact match.
+     * @return Found row.
+     * @throws IgniteCheckedException If failed.
+     */
+    @SuppressWarnings("unchecked")
+    @Override public final T findOne(L row) throws IgniteCheckedException {
+        return findOne(row, null);
+    }
+
+    /**
+     * @param g Get.
+     * @throws IgniteCheckedException If failed.
+     */
+    private void doFind(Get g) throws IgniteCheckedException {
+        for (;;) { // Go down with retries.
+            g.init();
+
+            switch (findDown(g, g.rootId, 0L, g.rootLvl)) {
+                case RETRY:
+                case RETRY_ROOT:
+                    checkInterrupted();
+
+                    continue;
+
+                default:
+                    return;
+            }
+        }
+    }
+
+    /**
+     * @param g Get.
+     * @param pageId Page ID.
+     * @param fwdId Expected forward page ID.
+     * @param lvl Level.
+     * @return Result code.
+     * @throws IgniteCheckedException If failed.
+     */
+    private Result findDown(final Get g, final long pageId, final long fwdId, final int lvl)
+        throws IgniteCheckedException {
+        long page = acquirePage(pageId);
+
+        try {
+            for (;;) {
+                // Init args.
+                g.pageId = pageId;
+                g.fwdId = fwdId;
+
+                Result res = read(pageId, page, search, g, lvl, RETRY);
+
+                switch (res) {
+                    case GO_DOWN:
+                    case GO_DOWN_X:
+                        assert g.pageId != pageId;
+                        assert g.fwdId != fwdId || fwdId == 0;
+
+                        // Go down recursively.
+                        res = findDown(g, g.pageId, g.fwdId, lvl - 1);
+
+                        switch (res) {
+                            case RETRY:
+                                checkInterrupted();
+
+                                continue; // The child page got split, need to reread our page.
+
+                            default:
+                                return res;
+                        }
+
+                    case NOT_FOUND:
+                        assert lvl == 0 : lvl;
+
+                        g.row = null; // Mark not found result.
+
+                        return res;
+
+                    default:
+                        return res;
+                }
+            }
+        }
+        finally {
+            if (g.canRelease(pageId, lvl))
+                releasePage(pageId, page);
+        }
+    }
+
+    /**
+     * @param instance Instance name.
+     * @param type Tree type.
+     * @return Tree name.
+     */
+    public static String treeName(String instance, String type) {
+        return instance + "##" + type;
+    }
+
+    /**
+     * For debug.
+     *
+     * @return Tree as {@link String}.
+     * @throws IgniteCheckedException If failed.
+     */
+    @SuppressWarnings("unused")
+    public final String printTree() throws IgniteCheckedException {
+        long rootPageId;
+
+        long metaPage = acquirePage(metaPageId);
+        try {
+            rootPageId = getFirstPageId(metaPageId, metaPage, -1);
+        }
+        finally {
+            releasePage(metaPageId, metaPage);
+        }
+
+        return treePrinter.print(rootPageId);
+    }
+
+    /**
+     * @throws IgniteCheckedException If failed.
+     */
+    public final void validateTree() throws IgniteCheckedException {
+        long rootPageId;
+        int rootLvl;
+
+        long metaPage = acquirePage(metaPageId);
+        try  {
+            rootLvl = getRootLevel();
+
+            if (rootLvl < 0)
+                fail("Root level: " + rootLvl);
+
+            validateFirstPages(metaPageId, metaPage, rootLvl);
+
+            rootPageId = getFirstPageId(metaPageId, metaPage, rootLvl);
+
+            validateDownPages(rootPageId, 0L, rootLvl);
+
+            validateDownKeys(rootPageId, null, rootLvl);
+        }
+        finally {
+            releasePage(metaPageId, metaPage);
+        }
+    }
+
+    /**
+     * @param pageId Page ID.
+     * @param minRow Minimum row.
+     * @throws IgniteCheckedException If failed.
+     */
+    private void validateDownKeys(long pageId, L minRow, int lvl) throws IgniteCheckedException {
+        long page = acquirePage(pageId);
+        try {
+            long pageAddr = readLock(pageId, page); // No correctness guaranties.
+
+            try {
+                BPlusIO<L> io = io(pageAddr);
+
+                int cnt = io.getCount(pageAddr);
+
+                if (cnt < 0)
+                    fail("Negative count: " + cnt);
+
+                if (io.isLeaf()) {
+                    for (int i = 0; i < cnt; i++) {
+                        if (minRow != null && compare(lvl, io, pageAddr, i, minRow) <= 0)
+                            fail("Wrong sort order: " + U.hexLong(pageId) + " , at " + i + " , minRow: " + minRow);
+
+                        minRow = io.getLookupRow(this, pageAddr, i);
+                    }
+
+                    return;
+                }
+
+                // To find our inner key we have to go left and then always go to the right.
+                for (int i = 0; i < cnt; i++) {
+                    L row = io.getLookupRow(this, pageAddr, i);
+
+                    if (minRow != null && compare(lvl, io, pageAddr, i, minRow) <= 0)
+                        fail("Min row violated: " + row + " , minRow: " + minRow);
+
+                    long leftId = inner(io).getLeft(pageAddr, i);
+
+                    L leafRow = getGreatestRowInSubTree(leftId);
+
+                    int cmp = compare(lvl, io, pageAddr, i, leafRow);
+
+                    if (cmp < 0 || (cmp != 0 && canGetRowFromInner))
+                        fail("Wrong inner row: " + U.hexLong(pageId) + " , at: " + i + " , leaf:  " + leafRow +
+                            " , inner: " + row);
+
+                    validateDownKeys(leftId, minRow, lvl - 1);
+
+                    minRow = row;
+                }
+
+                // Need to handle the rightmost child subtree separately or handle empty routing page.
+                long rightId = inner(io).getLeft(pageAddr, cnt); // The same as getRight(cnt - 1)
+
+                validateDownKeys(rightId, minRow, lvl - 1);
+            }
+            finally {
+                readUnlock(pageId, page, pageAddr);
+            }
+        }
+        finally {
+            releasePage(pageId, page);
+        }
+    }
+
+    /**
+     * @param pageId Page ID.
+     * @return Search row.
+     * @throws IgniteCheckedException If failed.
+     */
+    private L getGreatestRowInSubTree(long pageId) throws IgniteCheckedException {
+        long page = acquirePage(pageId);
+        try {
+            long pageAddr = readLock(pageId, page); // No correctness guaranties.
+
+            try {
+                BPlusIO<L> io = io(pageAddr);
+
+                int cnt = io.getCount(pageAddr);
+
+                if (io.isLeaf()) {
+                    if (cnt <= 0) // This code is called only if the tree is not empty, so we can't see empty leaf.
+                        fail("Invalid leaf count: " + cnt + " " + U.hexLong(pageId));
+
+                    return io.getLookupRow(this, pageAddr, cnt - 1);
+                }
+
+                long rightId = inner(io).getLeft(pageAddr, cnt);// The same as getRight(cnt - 1), but good for routing pages.
+
+                return getGreatestRowInSubTree(rightId);
+            }
+            finally {
+                readUnlock(pageId, page, pageAddr);
+            }
+        }
+        finally {
+            releasePage(pageId, page);
+        }
+    }
+
+    /**
+     * @param metaId Meta page ID.
+     * @param metaPage Meta page pointer.
+     * @param rootLvl Root level.
+     * @throws IgniteCheckedException If failed.
+     */
+    private void validateFirstPages(long metaId, long metaPage, int rootLvl) throws IgniteCheckedException {
+        for (int lvl = rootLvl; lvl > 0; lvl--)
+            validateFirstPage(metaId, metaPage, lvl);
+    }
+
+    /**
+     * @param msg Message.
+     */
+    private static void fail(Object msg) {
+        throw new AssertionError(msg);
+    }
+
+    /**
+     * @param metaId Meta page ID.
+     * @param metaPage Meta page pointer.
+     * @param lvl Level.
+     * @throws IgniteCheckedException If failed.
+     */
+    private void validateFirstPage(long metaId, long metaPage, int lvl) throws IgniteCheckedException {
+        if (lvl == 0)
+            fail("Leaf level: " + lvl);
+
+        long pageId = getFirstPageId(metaId, metaPage, lvl);
+
+        long leftmostChildId;
+
+        long page = acquirePage(pageId);
+        try {
+            long pageAddr = readLock(pageId, page); // No correctness guaranties.
+
+            try {
+                BPlusIO<L> io = io(pageAddr);
+
+                if (io.isLeaf())
+                    fail("Leaf.");
+
+                leftmostChildId = inner(io).getLeft(pageAddr, 0);
+            }
+            finally {
+                readUnlock(pageId, page, pageAddr);
+            }
+        }
+        finally {
+            releasePage(pageId, page);
+        }
+
+        long firstDownPageId = getFirstPageId(metaId, metaPage, lvl - 1);
+
+        if (firstDownPageId != leftmostChildId)
+            fail(new SB("First: meta ").appendHex(firstDownPageId).a(", child ").appendHex(leftmostChildId));
+    }
+
+    /**
+     * @param pageId Page ID.
+     * @param fwdId Forward ID.
+     * @param lvl Level.
+     * @throws IgniteCheckedException If failed.
+     */
+    private void validateDownPages(long pageId, long fwdId, int lvl) throws IgniteCheckedException {
+        long page = acquirePage(pageId);
+        try {
+            long pageAddr = readLock(pageId, page); // No correctness guaranties.
+
+            try {
+                long realPageId = BPlusIO.getPageId(pageAddr);
+
+                if (realPageId != pageId)
+                    fail(new SB("ABA on page ID: ref ").appendHex(pageId).a(", buf ").appendHex(realPageId));
+
+                BPlusIO<L> io = io(pageAddr);
+
+                if (io.isLeaf() != (lvl == 0)) // Leaf pages only at the level 0.
+                    fail("Leaf level mismatch: " + lvl);
+
+                long actualFwdId = io.getForward(pageAddr);
+
+                if (actualFwdId != fwdId)
+                    fail(new SB("Triangle: expected fwd ").appendHex(fwdId).a(", actual fwd ").appendHex(actualFwdId));
+
+                int cnt = io.getCount(pageAddr);
+
+                if (cnt < 0)
+                    fail("Negative count: " + cnt);
+
+                if (io.isLeaf()) {
+                    if (cnt == 0 && getRootLevel() != 0)
+                        fail("Empty leaf page.");
+                }
+                else {
+                    // Recursively go down if we are on inner level.
+                    for (int i = 0; i < cnt; i++)
+                        validateDownPages(inner(io).getLeft(pageAddr, i), inner(io).getRight(pageAddr, i), lvl - 1);
+
+                    if (fwdId != 0) {
+                        // For the rightmost child ask neighbor.
+                        long fwdId0 = fwdId;
+                        long fwdPage = acquirePage(fwdId0);
+                        try {
+                            long fwdPageAddr = readLock(fwdId0, fwdPage); // No correctness guaranties.
+
+                            try {
+                                if (io(fwdPageAddr) != io)
+                                    fail("IO on the same level must be the same");
+
+                                fwdId = inner(io).getLeft(fwdPageAddr, 0);
+                            }
+                            finally {
+                                readUnlock(fwdId0, fwdPage, fwdPageAddr);
+                            }
+                        }
+                        finally {
+                            releasePage(fwdId0, fwdPage);
+                        }
+                    }
+
+                    long leftId = inner(io).getLeft(pageAddr, cnt); // The same as io.getRight(cnt - 1) but works for routing pages.
+
+                    validateDownPages(leftId, fwdId, lvl - 1);
+                }
+            }
+            finally {
+                readUnlock(pageId, page, pageAddr);
+            }
+        }
+        finally {
+            releasePage(pageId, page);
+        }
+    }
+
+    /**
+     * @param io IO.
+     * @param pageAddr Page address.
+     * @param keys Keys.
+     * @return String.
+     * @throws IgniteCheckedException If failed.
+     */
+    private String printPage(BPlusIO<L> io, long pageAddr, boolean keys) throws IgniteCheckedException {
+        StringBuilder b = new StringBuilder();
+
+        b.append(formatPageId(PageIO.getPageId(pageAddr)));
+
+        b.append(" [ ");
+        b.append(io.isLeaf() ? "L " : "I ");
+
+        int cnt = io.getCount(pageAddr);
+        long fwdId = io.getForward(pageAddr);
+
+        b.append("cnt=").append(cnt).append(' ');
+        b.append("fwd=").append(formatPageId(fwdId)).append(' ');
+
+        if (!io.isLeaf()) {
+            b.append("lm=").append(formatPageId(inner(io).getLeft(pageAddr, 0))).append(' ');
+
+            if (cnt > 0)
+                b.append("rm=").append(formatPageId(inner(io).getRight(pageAddr, cnt - 1))).append(' ');
+        }
+
+        if (keys)
+            b.append("keys=").append(printPageKeys(io, pageAddr)).append(' ');
+
+        b.append(']');
+
+        return b.toString();
+    }
+
+    /**
+     * @param io IO.
+     * @param pageAddr Page address.
+     * @return Keys as String.
+     * @throws IgniteCheckedException If failed.
+     */
+    private String printPageKeys(BPlusIO<L> io, long pageAddr) throws IgniteCheckedException {
+        int cnt = io.getCount(pageAddr);
+
+        StringBuilder b = new StringBuilder();
+
+        b.append('[');
+
+        for (int i = 0; i < cnt; i++) {
+            if (i != 0)
+                b.append(',');
+
+            b.append(io.isLeaf() || canGetRowFromInner ? getRow(io, pageAddr, i) : io.getLookupRow(this, pageAddr, i));
+        }
+
+        b.append(']');
+
+        return b.toString();
+    }
+
+    /**
+     * @param x Long.
+     * @return String.
+     */
+    private static String formatPageId(long x) {
+        return U.hexLong(x);
+    }
+
+    /**
+     * @param idx Index after binary search, which can be negative.
+     * @return Always positive index.
+     */
+    private static int fix(int idx) {
+        assert checkIndex(idx): idx;
+
+        if (idx < 0)
+            idx = -idx - 1;
+
+        return idx;
+    }
+
+    /**
+     * @param idx Index.
+     * @return {@code true} If correct.
+     */
+    private static boolean checkIndex(int idx) {
+        return idx > -Short.MAX_VALUE && idx < Short.MAX_VALUE;
+    }
+
+    /**
+     * Check if interrupted.
+     * @throws IgniteInterruptedCheckedException If interrupted.
+     */
+    private static void checkInterrupted() throws IgniteInterruptedCheckedException {
+        // We should not interrupt operations in the middle, because otherwise we'll end up in inconsistent state.
+        // Because of that we do not check for Thread.interrupted()
+        if (interrupted)
+            throw new IgniteInterruptedCheckedException("Interrupted.");
+    }
+
+    /**
+     * Interrupt all operations on all threads and all indexes.
+     */
+    @SuppressWarnings("unused")
+    public static void interruptAll() {
+        interrupted = true;
+    }
+
+    /**
+     * @param row Lookup row.
+     * @return Removed row.
+     * @throws IgniteCheckedException If failed.
+     */
+    @Override public final T remove(L row) throws IgniteCheckedException {
+        return doRemove(row, true);
+    }
+
+    /**
+     * @param row Lookup row.
+     * @throws IgniteCheckedException If failed.
+     * @return {@code True} if removed row.
+     */
+    public final boolean removex(L row) throws IgniteCheckedException {
+        Boolean res = (Boolean)doRemove(row, false);
+
+        return res != null ? res : false;
+    }
+
+    /** {@inheritDoc} */
+    @Override public void invoke(L row, Object z, InvokeClosure<T> c) throws IgniteCheckedException {
+        checkDestroyed();
+
+        Invoke x = new Invoke(row, z, c);
+
+        try {
+            for (;;) {
+                x.init();
+
+                Result res = invokeDown(x, x.rootId, 0L, 0L, x.rootLvl);
+
+                switch (res) {
+                    case RETRY:
+                    case RETRY_ROOT:
+                        checkInterrupted();
+
+                        continue;
+
+                    default:
+                        if (!x.isFinished()) {
+                            res = x.tryFinish();
+
+                            if (res == RETRY || res == RETRY_ROOT) {
+                                checkInterrupted();
+
+                                continue;
+                            }
+
+                            assert x.isFinished(): res;
+                        }
+
+                        return;
+                }
+            }
+        }
+        catch (IgniteCheckedException e) {
+            throw new IgniteCheckedException("Runtime failure on search row: " + row, e);
+        }
+        catch (RuntimeException e) {
+            throw new IgniteException("Runtime failure on search row: " + row, e);
+        }
+        catch (AssertionError e) {
+            throw new AssertionError("Assertion error on search row: " + row, e);
+        }
+        finally {
+            x.releaseAll();
+            checkDestroyed();
+        }
+    }
+
+    /**
+     * @param x Invoke operation.
+     * @param pageId Page ID.
+     * @param backId Expected backward page ID if we are going to the right.
+     * @param fwdId Expected forward page ID.
+     * @param lvl Level.
+     * @return Result code.
+     * @throws IgniteCheckedException If failed.
+     */
+    private Result invokeDown(final Invoke x, final long pageId, final long backId, final long fwdId, final int lvl)
+        throws IgniteCheckedException {
+        assert lvl >= 0 : lvl;
+
+        if (x.isTail(pageId, lvl))
+            return FOUND; // We've already locked this page, so return that we are ok.
+
+        long page = acquirePage(pageId);
+
+        try {
+            for (;;) {
+                // Init args.
+                x.pageId(pageId);
+                x.fwdId(fwdId);
+                x.backId(backId);
+
+                Result res = read(pageId, page, search, x, lvl, RETRY);
+
+                switch (res) {
+                    case GO_DOWN_X:
+                        assert backId != 0;
+                        assert x.backId == 0; // We did not setup it yet.
+
+                        x.backId(pageId); // Dirty hack to setup a check inside of askNeighbor.
+
+                        // We need to get backId here for our child page, it must be the last child of our back.
+                        res = askNeighbor(backId, x, true);
+
+                        if (res != FOUND)
+                            return res; // Retry.
+
+                        assert x.backId != pageId; // It must be updated in askNeighbor.
+
+                        // Intentional fallthrough.
+                    case GO_DOWN:
+                        res = x.tryReplaceInner(pageId, page, fwdId, lvl);
+
+                        if (res != RETRY)
+                            res = invokeDown(x, x.pageId, x.backId, x.fwdId, lvl - 1);
+
+                        if (res == RETRY_ROOT || x.isFinished())
+                            return res;
+
+                        if (res == RETRY) {
+                            checkInterrupted();
+
+                            continue;
+                        }
+
+                        // Unfinished Put does insertion on the same level.
+                        if (x.isPut())
+                            continue;
+
+                        assert x.isRemove(); // Guarded by isFinished.
+
+                        res = x.finishOrLockTail(pageId, page, backId, fwdId, lvl);
+
+                        return res;
+
+                    case NOT_FOUND:
+                        if (lvl == 0)
+                            x.invokeClosure();
+
+                        return x.onNotFound(pageId, page, fwdId, lvl);
+
+                    case FOUND:
+                        if (lvl == 0)
+                            x.invokeClosure();
+
+                        return x.onFound(pageId, page, backId, fwdId, lvl);
+
+                    default:
+                        return res;
+                }
+            }
+        }
+        finally {
+            x.levelExit();
+
+            if (x.canRelease(pageId, lvl))
+                releasePage(pageId, page);
+        }
+    }
+
+
+    /**
+     * @param row Lookup row.
+     * @param needOld {@code True} if need return removed row.
+     * @return Removed row.
+     * @throws IgniteCheckedException If failed.
+     */
+    private T doRemove(L row, boolean needOld) throws IgniteCheckedException {
+        checkDestroyed();
+
+        Remove r = new Remove(row, needOld);
+
+        try {
+            for (;;) {
+                r.init();
+
+                Result res = removeDown(r, r.rootId, 0L, 0L, r.rootLvl);
+
+                switch (res) {
+                    case RETRY:
+                    case RETRY_ROOT:
+                        checkInterrupted();
+
+                        continue;
+
+                    default:
+                        if (!r.isFinished()) {
+                            res = r.finishTail();
+
+                            // If not found, then the tree grew beyond our call stack -> retry from the actual root.
+                            if (res == RETRY || res == NOT_FOUND) {
+                                assert r.checkTailLevel(getRootLevel()) : "tail=" + r.tail + ", res=" + res;
+
+                                checkInterrupted();
+
+                                continue;
+                            }
+
+                            assert res == FOUND: res;
+                        }
+
+                        assert r.isFinished();
+
+                        return r.rmvd;
+                }
+            }
+        }
+        catch (IgniteCheckedException e) {
+            throw new IgniteCheckedException("Runtime failure on search row: " + row, e);
+        }
+        catch (RuntimeException e) {
+            throw new IgniteException("Runtime failure on search row: " + row, e);
+        }
+        catch (AssertionError e) {
+            throw new AssertionError("Assertion error on search row: " + row, e);
+        }
+        finally {
+            r.releaseAll();
+            checkDestroyed();
+        }
+    }
+
+    /**
+     * @param r Remove operation.
+     * @param pageId Page ID.
+     * @param backId Expected backward page ID if we are going to the right.
+     * @param fwdId Expected forward page ID.
+     * @param lvl Level.
+     * @return Result code.
+     * @throws IgniteCheckedException If failed.
+     */
+    private Result removeDown(final Remove r, final long pageId, final long backId, final long fwdId, final int lvl)
+        throws IgniteCheckedException {
+        assert lvl >= 0 : lvl;
+
+        if (r.isTail(pageId, lvl))
+            return FOUND; // We've already locked this page, so return that we are ok.
+
+        long page = acquirePage(pageId);
+
+        try {
+            for (;;) {
+                // Init args.
+                r.pageId = pageId;
+                r.fwdId = fwdId;
+                r.backId = backId;
+
+                Result res = read(pageId, page, search, r, lvl, RETRY);
+
+                switch (res) {
+                    case GO_DOWN_X:
+                        assert backId != 0;
+                        assert r.backId == 0; // We did not setup it yet.
+
+                        r.backId = pageId; // Dirty hack to setup a check inside of askNeighbor.
+
+                        // We need to get backId here for our child page, it must be the last child of our back.
+                        res = askNeighbor(backId, r, true);
+
+                        if (res != FOUND)
+                            return res; // Retry.
+
+                        assert r.backId != pageId; // It must be updated in askNeighbor.
+
+                        // Intentional fallthrough.
+                    case GO_DOWN:
+                        res = removeDown(r, r.pageId, r.backId, r.fwdId, lvl - 1);
+
+                        if (res == RETRY) {
+                            checkInterrupted();
+
+                            continue;
+                        }
+
+                        if (res == RETRY_ROOT || r.isFinished())
+                            return res;
+
+                        res = r.finishOrLockTail(pageId, page, backId, fwdId, lvl);
+
+                        return res;
+
+                    case NOT_FOUND:
+                        // We are at the bottom.
+                        assert lvl == 0 : lvl;
+
+                        r.finish();
+
+                        return res;
+
+                    case FOUND:
+                        return r.tryRemoveFromLeaf(pageId, page, backId, fwdId, lvl);
+
+                    default:
+                        return res;
+                }
+            }
+        }
+        finally {
+            r.page = 0L;
+
+            if (r.canRelease(pageId, lvl))
+                releasePage(pageId, page);
+        }
+    }
+
+    /**
+     * @param cnt Count.
+     * @param cap Capacity.
+     * @return {@code true} If may merge.
+     */
+    private boolean mayMerge(int cnt, int cap) {
+        int minCnt = (int)(minFill * cap);
+
+        if (cnt <= minCnt) {
+            assert cnt == 0; // TODO remove
+
+            return true;
+        }
+
+        assert cnt > 0;
+
+        int maxCnt = (int)(maxFill * cap);
+
+        if (cnt > maxCnt)
+            return false;
+
+        assert false; // TODO remove
+
+        // Randomization is for smoothing worst case scenarios. Probability of merge attempt
+        // is proportional to free space in our page (discounted on fill factor).
+        return randomInt(maxCnt - minCnt) >= cnt - minCnt;
+    }
+
+    /**
+     * @return Root level.
+     * @throws IgniteCheckedException If failed.
+     */
+    public final int rootLevel() throws IgniteCheckedException {
+        checkDestroyed();
+
+        return getRootLevel();
+    }
+
+    /**
+     * !!! For debug only! May produce wrong results on concurrent access.
+     *
+     * @return Size.
+     * @throws IgniteCheckedException If failed.
+     */
+    @Override public final long size() throws IgniteCheckedException {
+        checkDestroyed();
+
+        long pageId;
+
+        long metaPage = acquirePage(metaPageId);
+        try {
+            pageId = getFirstPageId(metaPageId, metaPage, 0); // Level 0 is always at the bottom.
+        }
+        finally {
+            releasePage(metaPageId, metaPage);
+        }
+
+        BPlusIO<L> io = null;
+
+        long cnt = 0;
+
+        while (pageId != 0) {
+            long curId = pageId;
+            long curPage = acquirePage(curId);
+            try {
+                long curAddr = readLock(curId, curPage); // No correctness guaranties.
+
+                try {
+                    if (io == null) {
+                        io = io(curAddr);
+
+                        assert io.isLeaf();
+                    }
+
+                    cnt += io.getCount(curAddr);
+
+                    pageId = io.getForward(curAddr);
+                }
+                finally {
+                    readUnlock(curId, curPage, curAddr);
+                }
+            }
+            finally {
+                releasePage(curId, curPage);
+            }
+        }
+
+        checkDestroyed();
+
+        return cnt;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override public final T put(T row) throws IgniteCheckedException {
+        return doPut(row, true);
+    }
+
+    /**
+     * @param row New value.
+     * @throws IgniteCheckedException If failed.
+     * @return {@code True} if replaced existing row.
+     */
+    public boolean putx(T row) throws IgniteCheckedException {
+        Boolean res = (Boolean)doPut(row, false);
+
+        return res != null ? res : false;
+    }
+
+    /**
+     * @param row New value.
+     * @param needOld {@code True} If need return old value.
+     * @return Old row.
+     * @throws IgniteCheckedException If failed.
+     */
+    private T doPut(T row, boolean needOld) throws IgniteCheckedException {
+        checkDestroyed();
+
+        Put p = new Put(row, needOld);
+
+        try {
+            for (;;) { // Go down with retries.
+                p.init();
+
+                Result res = putDown(p, p.rootId, 0L, p.rootLvl);
+
+                switch (res) {
+                    case RETRY:
+                    case RETRY_ROOT:
+                        checkInterrupted();
+
+                        continue;
+
+                    case FOUND:
+                        // We may need to insert split key into upper level here.
+                        if (!p.isFinished()) {
+                            // It must be impossible to have an insert higher than the current root,
+                            // because we are making decision about creating new root while keeping
+                            // write lock on current root, so it can't concurrently change.
+                            assert p.btmLvl <= getRootLevel();
+
+                            checkInterrupted();
+
+                            continue;
+                        }
+
+                        return p.oldRow;
+
+                    default:
+                        throw new IllegalStateException("Result: " + res);
+                }
+            }
+        }
+        catch (IgniteCheckedException e) {
+            throw new IgniteCheckedException("Runtime failure on row: " + row, e);
+        }
+        catch (RuntimeException e) {
+            throw new IgniteException("Runtime failure on row: " + row, e);
+        }
+        catch (AssertionError e) {
+            throw new AssertionError("Assertion error on row: " + row, e);
+        }
+        finally {
+            checkDestroyed();
+        }
+    }
+
+    /**
+     * Destroys tree. This method is allowed to be invoked only when the tree is out of use (no concurrent operations
+     * are trying to read or update the tree after destroy beginning).
+     *
+     * @return Number of pages recycled from this tree. If the tree was destroyed by someone else concurrently returns
+     *     {@code 0}, otherwise it should return at least {@code 2} (for meta page and root page), unless this tree is
+     *     used as metadata storage, or {@code -1} if we don't have a reuse list and did not do recycling at all.
+     * @throws IgniteCheckedException If failed.
+     */
+    public final long destroy() throws IgniteCheckedException {
+        return destroy(null);
+    }
+
+    /**
+     * Destroys tree. This method is allowed to be invoked only when the tree is out of use (no concurrent operations
+     * are trying to read or update the tree after destroy beginning).
+     *
+     * @param c Visitor closure. Visits only leaf pages.
+     * @return Number of pages recycled from this tree. If the tree was destroyed by someone else concurrently returns
+     *     {@code 0}, otherwise it should return at least {@code 2} (for meta page and root page), unless this tree is
+     *     used as metadata storage, or {@code -1} if we don't have a reuse list and did not do recycling at all.
+     * @throws IgniteCheckedException If failed.
+     */
+    public final long destroy(IgniteInClosure<L> c) throws IgniteCheckedException {
+        if (!markDestroyed())
+            return 0;
+
+        if (reuseList == null)
+            return -1;
+
+        DestroyBag bag = new DestroyBag();
+
+        long pagesCnt = 0;
+
+        long metaPage = acquirePage(metaPageId);
+        try {
+            long metaPageAddr = writeLock(metaPageId, metaPage); // No checks, we must be out of use.
+
+            assert metaPageAddr != 0L;
+
+            try {
+                for (long pageId : getFirstPageIds(metaPageAddr)) {
+                    assert pageId != 0;
+
+                    do {
+                        final long pId = pageId;
+
+                        long page = acquirePage(pId);
+
+                        try {
+                            long pageAddr = writeLock(pId, page); // No checks, we must be out of use.
+
+                            try {
+                                BPlusIO<L> io = io(pageAddr);
+
+                                if (c != null && io.isLeaf())
+                                    io.visit(pageAddr, c);
+
+                                long fwdPageId = io.getForward(pageAddr);
+
+                                bag.addFreePage(recyclePage(pageId, page, pageAddr, null));
+                                pagesCnt++;
+
+                                pageId = fwdPageId;
+                            }
+                            finally {
+                                writeUnlock(pId, page, pageAddr, true);
+                            }
+                        }
+                        finally {
+                            releasePage(pId, page);
+                        }
+
+                        if (bag.size() == 128) {
+                            reuseList.addForRecycle(bag);
+
+                            assert bag.isEmpty() : bag.size();
+                        }
+                    }
+                    while (pageId != 0);
+                }
+
+                bag.addFreePage(recyclePage(metaPageId, metaPage, metaPageAddr, null));
+                pagesCnt++;
+            }
+            finally {
+                writeUnlock(metaPageId, metaPage, metaPageAddr, true);
+            }
+        }
+        finally {
+            releasePage(metaPageId, metaPage);
+        }
+
+        reuseList.addForRecycle(bag);
+
+        assert bag.size() == 0 : bag.size();
+
+        return pagesCnt;
+    }
+
+    /**
+     * @return {@code True} if state was changed.
+     */
+    private boolean markDestroyed() {
+        return destroyed.compareAndSet(false, true);
+    }
+
+    /**
+     * @param pageAddr Meta page address.
+     * @return First page IDs.
+     */
+    protected Iterable<Long> getFirstPageIds(long pageAddr) {
+        List<Long> res = new ArrayList<>();
+
+        BPlusMetaIO mio = BPlusMetaIO.VERSIONS.forPage(pageAddr);
+
+        for (int lvl = mio.getRootLevel(pageAddr); lvl >= 0; lvl--)
+            res.add(mio.getFirstPageId(pageAddr, lvl));
+
+        return res;
+    }
+
+    /**
+     * @param pageId Page ID.
+     * @param page Page pointer.
+     * @param pageAddr Page address
+     * @param io IO.
+     * @param fwdId Forward page ID.
+     * @param fwdBuf Forward buffer.
+     * @param idx Insertion index.
+     * @return {@code true} The middle index was shifted to the right.
+     * @throws IgniteCheckedException If failed.
+     */
+    private boolean splitPage(
+        long pageId, long page, long pageAddr, BPlusIO io, long fwdId, long fwdBuf, int idx
+    ) throws IgniteCheckedException {
+        int cnt = io.getCount(pageAddr);
+        int mid = cnt >>> 1;
+
+        boolean res = false;
+
+        if (idx > mid) { // If insertion is going to be to the forward page, keep more in the back page.
+            mid++;
+
+            res = true;
+        }
+
+        // Update forward page.
+        io.splitForwardPage(pageAddr, fwdId, fwdBuf, mid, cnt, pageSize());
+
+        // Update existing page.
+        io.splitExistingPage(pageAddr, mid, fwdId);
+
+        if (needWalDeltaRecord(pageId, page, null))
+            wal.log(new SplitExistingPageRecord(cacheId, pageId, mid, fwdId));
+
+        return res;
+    }
+
+    /**
+     * @param pageId Page ID.
+     * @param page Page pointer.
+     * @param pageAddr Page address
+     * @param walPlc Full page WAL record policy.
+     */
+    private void writeUnlockAndClose(long pageId, long page, long pageAddr, Boolean walPlc) {
+        try {
+            writeUnlock(pageId, page, pageAddr, walPlc, true);
+        }
+        finally {
+            releasePage(pageId, page);
+        }
+    }
+
+    /**
+     * @param pageId Inner page ID.
+     * @param g Get.
+     * @param back Get back (if {@code true}) or forward page (if {@code false}).
+     * @return Operation result.
+     * @throws IgniteCheckedException If failed.
+     */
+    private Result askNeighbor(long pageId, Get g, boolean back) throws IgniteCheckedException {
+        return read(pageId, askNeighbor, g, back ? TRUE.ordinal() : FALSE.ordinal(), RETRY);
+    }
+
+    /**
+     * @param p Put.
+     * @param pageId Page ID.
+     * @param fwdId Expected forward page ID.
+     * @param lvl Level.
+     * @return Result code.
+     * @throws IgniteCheckedException If failed.
+     */
+    private Result putDown(final Put p, final long pageId, final long fwdId, final int lvl)
+        throws IgniteCheckedException {
+        assert lvl >= 0 : lvl;
+
+        final long page = acquirePage(pageId);
+
+        try {
+            for (;;) {
+                // Init args.
+                p.pageId = pageId;
+                p.fwdId = fwdId;
+
+                Result res = read(pageId, page, search, p, lvl, RETRY);
+
+                switch (res) {
+                    case GO_DOWN:
+                    case GO_DOWN_X:
+                        assert lvl > 0 : lvl;
+                        assert p.pageId != pageId;
+                        assert p.fwdId != fwdId || fwdId == 0;
+
+                        res = p.tryReplaceInner(pageId, page, fwdId, lvl);
+
+                        if (res != RETRY) // Go down recursively.
+                            res = putDown(p, p.pageId, p.fwdId, lvl - 1);
+
+                        if (res == RETRY_ROOT || p.isFinished())
+                            return res;
+
+                        if (res == RETRY)
+                            checkInterrupted();
+
+                        continue; // We have to insert split row to this level or it is a retry.
+
+                    case FOUND: // Do replace.
+                        assert lvl == 0 : "This replace can happen only at the bottom level.";
+
+                        return p.tryReplace(pageId, page, fwdId, lvl);
+
+                    case NOT_FOUND: // Do insert.
+                        assert lvl == p.btmLvl : "must insert at the bottom level";
+                        assert p.needReplaceInner == FALSE : p.needReplaceInner + " " + lvl;
+
+                        return p.tryInsert(pageId, page, fwdId, lvl);
+
+                    default:
+                        return res;
+                }
+            }
+        }
+        finally {
+            if (p.canRelease(pageId, lvl))
+                releasePage(pageId, page);
+        }
+    }
+
+    /**
+     * @param io IO.
+     * @param pageAddr Page address.
+     * @param back Backward page.
+     * @return Page ID.
+     */
+    private long doAskNeighbor(BPlusIO<L> io, long pageAddr, boolean back) {
+        long res;
+
+        if (back) {
+            // Count can be 0 here if it is a routing page, in this case we have a single child.
+            int cnt = io.getCount(pageAddr);
+
+            // We need to do get the rightmost child: io.getRight(cnt - 1),
+            // here io.getLeft(cnt) is the same, but handles negative index if count is 0.
+            res = inner(io).getLeft(pageAddr, cnt);
+        }
+        else // Leftmost child.
+            res = inner(io).getLeft(pageAddr, 0);
+
+        assert res != 0 : "inner page with no route down: " + U.hexLong(PageIO.getPageId(pageAddr));
+
+        return res;
+    }
+
+    /** {@inheritDoc} */
+    @Override public String toString() {
+        return S.toString(BPlusTree.class, this);
+    }
+
+    /**
+     * Get operation.
+     */
+    private abstract class Get {
+        /** */
+        long rmvId;
+
+        /** Starting point root level. May be outdated. Must be modified only in {@link Get#init()}. */
+        int rootLvl;
+
+        /** Starting point root ID. May be outdated. Must be modified only in {@link Get#init()}. */
+        long rootId;
+
+        /** */
+        L row;
+
+        /** In/Out parameter: Page ID. */
+        long pageId;
+
+        /** In/Out parameter: expected forward page ID. */
+        long fwdId;
+
+        /** In/Out parameter: in case of right turn this field will contain backward page ID for the child. */
+        long backId;
+
+        /** */
+        int shift;
+
+        /** If this operation is a part of invoke. */
+        Invoke invoke;
+
+        /** Ignore row passed, find last row */
+        boolean findLast;
+
+        /**
+         * @param row Row.
+         * @param findLast find last row.
+         */
+        Get(L row, boolean findLast) {
+            assert findLast ^ row != null;
+
+            this.row = row;
+            this.findLast = findLast;
+        }
+
+        /**
+         * @param g Other operation to copy from.
+         */
+        final void copyFrom(Get g) {
+            rmvId = g.rmvId;
+            rootLvl = g.rootLvl;
+            pageId = g.pageId;
+            fwdId = g.fwdId;
+            backId = g.backId;
+            shift = g.shift;
+            findLast = g.findLast;
+        }
+
+        /**
+         * Initialize operation.
+         *
+         * @throws IgniteCheckedException If failed.
+         */
+        final void init() throws IgniteCheckedException {
+            TreeMetaData meta0 = treeMeta();
+
+            assert meta0 != null;
+
+            restartFromRoot(meta0.rootId, meta0.rootLvl, globalRmvId.get());
+        }
+
+        /**
+         * @param rootId Root page ID.
+         * @param rootLvl Root level.
+         * @param rmvId Remove ID to be afraid of.
+         */
+        void restartFromRoot(long rootId, int rootLvl, long rmvId) {
+            this.rootId = rootId;
+            this.rootLvl = rootLvl;
+            this.rmvId = rmvId;
+        }
+
+        /**
+         * @param io IO.
+         * @param pageAddr Page address.
+         * @param idx Index of found entry.
+         * @param lvl Level.
+         * @return {@code true} If we need to stop.
+         * @throws IgniteCheckedException If failed.
+         */
+        boolean found(BPlusIO<L> io, long pageAddr, int idx, int lvl) throws IgniteCheckedException {
+            assert lvl >= 0;
+
+            return lvl == 0; // Stop if we are at the bottom.
+        }
+
+        /**
+         * @param io IO.
+         * @param pageAddr Page address.
+         * @param idx Insertion point.
+         * @param lvl Level.
+         * @return {@code true} If we need to stop.
+         * @throws IgniteCheckedException If failed.
+         */
+        boolean notFound(BPlusIO<L> io, long pageAddr, int idx, int lvl) throws IgniteCheckedException {
+            assert lvl >= 0;
+
+            return lvl == 0; // Stop if we are at the bottom.
+        }
+
+        /**
+         * @param pageId Page.
+         * @param lvl Level.
+         * @return {@code true} If we can release the given page.
+         */
+        boolean canRelease(long pageId, int lvl) {
+            return pageId != 0L;
+        }
+
+        /**
+         * @param backId Back page ID.
+         */
+        void backId(long backId) {
+            this.backId = backId;
+        }
+
+        /**
+         * @param pageId Page ID.
+         */
+        void pageId(long pageId) {
+            this.pageId = pageId;
+        }
+
+        /**
+         * @param fwdId Forward page ID.
+         */
+        void fwdId(long fwdId) {
+            this.fwdId = fwdId;
+        }
+
+        /**
+         * @return {@code true} If the operation is finished.
+         */
+        boolean isFinished() {
+            throw new IllegalStateException();
+        }
+    }
+
+    /**
+     * Get a single entry.
+     */
+    private final class GetOne extends Get {
+        /** */
+        Object x;
+
+        /**
+         * @param row Row.
+         * @param x Implementation specific argument.
+         * @param findLast Ignore row passed, find last row
+         */
+        private GetOne(L row, Object x, boolean findLast) {
+            super(row, findLast);
+
+            this.x = x;
+        }
+
+        /** {@inheritDoc} */
+        @SuppressWarnings("unchecked")
+        @Override boolean found(BPlusIO<L> io, long pageAddr, int idx, int lvl) throws IgniteCheckedException {
+            // Check if we are on an inner page and can't get row from it.
+            if (lvl != 0 && !canGetRowFromInner)
+                return false;
+
+            row = getRow(io, pageAddr, idx, x);
+
+            return true;
+        }
+    }
+
+    /**
+     * Get a cursor for range.
+     */
+    private final class GetCursor extends Get {
+        /** */
+        ForwardCursor cursor;
+
+        /**
+         * @param lower Lower bound.
+         * @param shift Shift.
+         * @param cursor Cursor.
+         */
+        GetCursor(L lower, int shift, ForwardCursor cursor) {
+            super(lower, false);
+
+            assert shift != 0; // Either handle range of equal rows or find a greater row after concurrent merge.
+
+            this.shift = shift;
+            this.cursor = cursor;
+        }
+
+        /** {@inheritDoc} */
+        @Override boolean found(BPlusIO<L> io, long pageAddr, int idx, int lvl) throws IgniteCheckedException {
+            throw new IllegalStateException(); // Must never be called because we always have a shift.
+        }
+
+        /** {@inheritDoc} */
+        @Override boolean notFound(BPlusIO<L> io, long pageAddr, int idx, int lvl) throws IgniteCheckedException {
+            if (lvl != 0)
+                return false;
+
+            cursor.init(pageAddr, io, idx);
+
+            return true;
+        }
+    }
+
+    /**
+     * Put operation.
+     */
+    private final class Put extends Get {
+        /** Right child page ID for split row. */
+        long rightId;
+
+        /** Replaced row if any. */
+        T oldRow;
+
+        /**
+         * This page is kept locked after split until insert to the upper level will not be finished.
+         * It is needed because split row will be "in flight" and if we'll release tail, remove on
+         * split row may fail.
+         */
+        long tailId;
+
+        /** */
+        long tailPage;
+
+        /** */
+        long tailAddr;
+
+        /**
+         * Bottom level for insertion (insert can't go deeper). Will be incremented on split on each level.
+         */
+        short btmLvl;
+
+        /** */
+        Bool needReplaceInner = FALSE;
+
+        /** */
+        final boolean needOld;
+
+        /**
+         * @param row Row.
+         * @param needOld {@code True} If need return old value.
+         */
+        private Put(T row, boolean needOld) {
+            super(row, false);
+
+            this.needOld = needOld;
+        }
+
+        /** {@inheritDoc} */
+        @Override boolean found(BPlusIO<L> io, long pageAddr, int idx, int lvl) {
+            if (lvl == 0) // Leaf: need to stop.
+                return true;
+
+            assert btmLvl == 0; // It can not be insert.
+
+            // If we can get full row from the inner page, we have to replace it with the new one. On the way down
+            // we can not miss inner key even in presence of concurrent operations because of `triangle` invariant +
+            // concurrent inner replace handling by retrying from root.
+            if (canGetRowFromInner && needReplaceInner == FALSE)
+                needReplaceInner = TRUE;
+
+            return false;
+        }
+
+        /** {@inheritDoc} */
+        @Override boolean notFound(BPlusIO<L> io, long pageAddr, int idx, int lvl) {
+            assert btmLvl >= 0 : btmLvl;
+            assert lvl >= btmLvl : lvl;
+
+            return lvl == btmLvl;
+        }
+
+        /**
+         * @param tailId Tail page ID.
+         * @param tailPage Tail page pointer.
+         * @param tailPageAddr Tail page address
+         */
+        private void tail(long tailId, long tailPage, long tailPageAddr) {
+            assert (tailId == 0L) == (tailPage == 0L);
+            assert (tailPage == 0L) == (tailPageAddr == 0L);
+
+            if (this.tailPage != 0L)
+                writeUnlockAndClose(this.tailId, this.tailPage, this.tailAddr, null);
+
+            this.tailId = tailId;
+            this.tailPage = tailPage;
+            this.tailAddr = tailPageAddr;
+        }
+
+        /** {@inheritDoc} */
+        @Override boolean canRelease(long pageId, int lvl) {
+            return pageId != 0L && tailId != pageId;
+        }
+
+        /**
+         * Finish put.
+         */
+        private void finish() {
+            row = null;
+            rightId = 0;
+
+            tail(0L, 0L, 0L);
+        }
+
+        /** {@inheritDoc} */
+        @Override boolean isFinished() {
+            return row == null;
+        }
+
+        /**
+         * @param pageId Page ID.
+         * @param page Page pointer.
+         * @param pageAddr Page address.
+         * @param io IO.
+         * @param idx Index.
+         * @param lvl Level.
+         * @return Move up row.
+         * @throws IgniteCheckedException If failed.
+         */
+        private L insert(long pageId, long page, long pageAddr, BPlusIO<L> io, int idx, int lvl)
+            throws IgniteCheckedException {

<TRUNCATED>

Mime
View raw message