asterixdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mb...@apache.org
Subject [02/12] incubator-asterixdb git commit: ASTERIXDB-1436: Big Object Support For Storage
Date Fri, 13 May 2016 02:40:52 GMT
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMInteriorFrame.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMInteriorFrame.java b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMInteriorFrame.java
index 5b2bdfc..c2b18b7 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMInteriorFrame.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMInteriorFrame.java
@@ -26,12 +26,13 @@ import java.util.Collections;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.data.std.primitive.IntegerPointable;
 import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
-import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
 import org.apache.hyracks.storage.am.btree.api.IBTreeInteriorFrame;
 import org.apache.hyracks.storage.am.btree.impls.BTreeOpContext.PageValidationInfo;
 import org.apache.hyracks.storage.am.btree.impls.RangePredicate;
+import org.apache.hyracks.storage.am.common.api.IMetaDataPageManager;
 import org.apache.hyracks.storage.am.common.api.ISplitKey;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexFrame;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
 import org.apache.hyracks.storage.am.common.api.TreeIndexException;
@@ -41,10 +42,11 @@ import org.apache.hyracks.storage.am.common.ophelpers.FindTupleMode;
 import org.apache.hyracks.storage.am.common.ophelpers.FindTupleNoExactMatchPolicy;
 import org.apache.hyracks.storage.am.common.ophelpers.MultiComparator;
 import org.apache.hyracks.storage.am.common.ophelpers.SlotOffTupleOff;
+import org.apache.hyracks.storage.common.buffercache.IBufferCache;
 
 public class BTreeNSMInteriorFrame extends TreeIndexNSMFrame implements IBTreeInteriorFrame {
 
-    private static final int rightLeafOff = smFlagOff + 1;
+    private static final int rightLeafOff = flagOff + 1; // 22
     private static final int childPtrSize = 4;
 
     private final ITreeIndexTupleReference cmpFrameTuple;
@@ -53,7 +55,7 @@ public class BTreeNSMInteriorFrame extends TreeIndexNSMFrame implements IBTreeIn
     private MultiComparator cmp;
 
     public BTreeNSMInteriorFrame(ITreeIndexTupleWriter tupleWriter) {
-        super(tupleWriter, new OrderedSlotManager());
+        super(tupleWriter, new OrderedSlotManager(), null);
         cmpFrameTuple = tupleWriter.createTupleReference();
         previousFt = tupleWriter.createTupleReference();
     }
@@ -80,9 +82,13 @@ public class BTreeNSMInteriorFrame extends TreeIndexNSMFrame implements IBTreeIn
     }
 
     @Override
-    public FrameOpSpaceStatus hasSpaceInsert(ITupleReference tuple) {
+    public FrameOpSpaceStatus hasSpaceInsert(ITupleReference tuple) throws HyracksDataException {
+        int tupleSize = tupleWriter.bytesRequired(tuple) + childPtrSize;
+        if (tupleSize > getMaxTupleSize(buf.capacity())) {
+            return FrameOpSpaceStatus.TOO_LARGE;
+        }
         // Tuple bytes + child pointer + slot.
-        int bytesRequired = tupleWriter.bytesRequired(tuple) + childPtrSize + slotManager.getSlotSize();
+        int bytesRequired = tupleSize + slotManager.getSlotSize();
         if (bytesRequired <= getFreeContiguousSpace()) {
             return FrameOpSpaceStatus.SUFFICIENT_CONTIGUOUS_SPACE;
         }
@@ -194,8 +200,9 @@ public class BTreeNSMInteriorFrame extends TreeIndexNSMFrame implements IBTreeIn
     }
 
     @Override
-    public void split(ITreeIndexFrame rightFrame, ITupleReference tuple, ISplitKey splitKey)
-            throws HyracksDataException {
+    public void split(ITreeIndexFrame rightFrame, ITupleReference tuple, ISplitKey splitKey,
+            IMetaDataPageManager freePageManager, ITreeIndexMetaDataFrame metaFrame, IBufferCache bufferCache)
+                    throws HyracksDataException, TreeIndexException {
         ByteBuffer right = rightFrame.getBuffer();
         int tupleCount = getTupleCount();
 
@@ -233,8 +240,8 @@ public class BTreeNSMInteriorFrame extends TreeIndexNSMFrame implements IBTreeIn
 
             // On the right page we need to copy rightmost slots to left.
             int src = rightFrame.getSlotManager().getSlotEndOff();
-            int dest = rightFrame.getSlotManager().getSlotEndOff() + tuplesToLeft
-                    * rightFrame.getSlotManager().getSlotSize();
+            int dest = rightFrame.getSlotManager().getSlotEndOff()
+                    + tuplesToLeft * rightFrame.getSlotManager().getSlotSize();
             int length = rightFrame.getSlotManager().getSlotSize() * tuplesToRight;
             System.arraycopy(right.array(), src, right.array(), dest, length);
             right.putInt(tupleCountOff, tuplesToRight);
@@ -365,12 +372,6 @@ public class BTreeNSMInteriorFrame extends TreeIndexNSMFrame implements IBTreeIn
     }
 
     @Override
-    protected void resetSpaceParams() {
-        buf.putInt(freeSpaceOff, rightLeafOff + childPtrSize);
-        buf.putInt(totalFreeSpaceOff, buf.capacity() - (rightLeafOff + childPtrSize));
-    }
-
-    @Override
     public int getLeftmostChildPageId() {
         int tupleOff = slotManager.getTupleOff(slotManager.getSlotStartOff());
         frameTuple.resetByTupleOffset(buf, tupleOff);
@@ -398,20 +399,6 @@ public class BTreeNSMInteriorFrame extends TreeIndexNSMFrame implements IBTreeIn
     }
 
     @Override
-    public boolean getSmFlag() {
-        return buf.get(smFlagOff) != 0;
-    }
-
-    @Override
-    public void setSmFlag(boolean smFlag) {
-        if (smFlag) {
-            buf.put(smFlagOff, (byte) 1);
-        } else {
-            buf.put(smFlagOff, (byte) 0);
-        }
-    }
-
-    @Override
     public void setMultiComparator(MultiComparator cmp) {
         this.cmp = cmp;
         cmpFrameTuple.setFieldCount(cmp.getKeyFieldCount());
@@ -434,8 +421,9 @@ public class BTreeNSMInteriorFrame extends TreeIndexNSMFrame implements IBTreeIn
         for (int i = 0; i < tupleCount; i++) {
             int tupleOff = slotManager.getTupleOff(slotManager.getSlotOff(i));
             frameTuple.resetByTupleOffset(buf, tupleOff);
-            int intVal = IntegerPointable.getInteger(buf.array(), frameTuple.getFieldStart(frameTuple.getFieldCount() - 1)
-            + frameTuple.getFieldLength(frameTuple.getFieldCount() - 1));
+            int intVal = IntegerPointable.getInteger(buf.array(),
+                    frameTuple.getFieldStart(frameTuple.getFieldCount() - 1)
+                            + frameTuple.getFieldLength(frameTuple.getFieldCount() - 1));
             ret.add(intVal);
         }
         if (!isLeaf()) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMInteriorFrameFactory.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMInteriorFrameFactory.java b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMInteriorFrameFactory.java
index 029be11..3737486 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMInteriorFrameFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMInteriorFrameFactory.java
@@ -22,6 +22,7 @@ package org.apache.hyracks.storage.am.btree.frames;
 import org.apache.hyracks.storage.am.btree.api.IBTreeInteriorFrame;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriterFactory;
+import org.apache.hyracks.storage.common.buffercache.ILargePageHelper;
 
 public class BTreeNSMInteriorFrameFactory implements ITreeIndexFrameFactory {
 
@@ -42,4 +43,9 @@ public class BTreeNSMInteriorFrameFactory implements ITreeIndexFrameFactory {
     public ITreeIndexTupleWriterFactory getTupleWriterFactory() {
         return tupleWriterFactory;
     }
+
+    @Override
+    public ILargePageHelper getLargePageHelper() {
+        return null;
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMLeafFrame.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMLeafFrame.java b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMLeafFrame.java
index fef9661..8f560fe 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMLeafFrame.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMLeafFrame.java
@@ -25,31 +25,43 @@ import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
 import org.apache.hyracks.storage.am.btree.api.IBTreeLeafFrame;
 import org.apache.hyracks.storage.am.btree.impls.BTreeOpContext.PageValidationInfo;
+import org.apache.hyracks.storage.am.common.api.IMetaDataPageManager;
 import org.apache.hyracks.storage.am.common.api.ISplitKey;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexFrame;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
 import org.apache.hyracks.storage.am.common.api.TreeIndexException;
 import org.apache.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
 import org.apache.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
+import org.apache.hyracks.storage.am.common.frames.FrameOpSpaceStatus;
 import org.apache.hyracks.storage.am.common.frames.TreeIndexNSMFrame;
 import org.apache.hyracks.storage.am.common.ophelpers.FindTupleMode;
 import org.apache.hyracks.storage.am.common.ophelpers.FindTupleNoExactMatchPolicy;
 import org.apache.hyracks.storage.am.common.ophelpers.MultiComparator;
+import org.apache.hyracks.storage.common.buffercache.IBufferCache;
+import org.apache.hyracks.storage.common.buffercache.ILargePageHelper;
 
 public class BTreeNSMLeafFrame extends TreeIndexNSMFrame implements IBTreeLeafFrame {
-    protected static final int nextLeafOff = smFlagOff + 1;
+    protected static final int nextLeafOff = flagOff + 1; // 22
+    protected static final int supplementalNumPagesOff = nextLeafOff + 4; // 26
+    protected static final int supplementalPageIdOff = supplementalNumPagesOff + 4; // 30
 
     private MultiComparator cmp;
 
     private final ITreeIndexTupleReference previousFt;
 
-    public BTreeNSMLeafFrame(ITreeIndexTupleWriter tupleWriter) {
-        super(tupleWriter, new OrderedSlotManager());
+    public BTreeNSMLeafFrame(ITreeIndexTupleWriter tupleWriter, ILargePageHelper largePageHelper) {
+        super(tupleWriter, new OrderedSlotManager(), largePageHelper);
         previousFt = tupleWriter.createTupleReference();
     }
 
     @Override
+    public int getPageHeaderSize() {
+        return supplementalPageIdOff + 4;
+    }
+
+    @Override
     public int getBytesRequiredToWriteTuple(ITupleReference tuple) {
         return tupleWriter.bytesRequired(tuple) + slotManager.getSlotSize();
     }
@@ -58,6 +70,8 @@ public class BTreeNSMLeafFrame extends TreeIndexNSMFrame implements IBTreeLeafFr
     public void initBuffer(byte level) {
         super.initBuffer(level);
         buf.putInt(nextLeafOff, -1);
+        buf.putInt(supplementalNumPagesOff, 0);
+        buf.putInt(supplementalPageIdOff, -1);
     }
 
     @Override
@@ -70,6 +84,28 @@ public class BTreeNSMLeafFrame extends TreeIndexNSMFrame implements IBTreeLeafFr
         return buf.getInt(nextLeafOff);
     }
 
+    public static int getSupplementalNumPages(ByteBuffer buf) {
+        return buf.getInt(supplementalNumPagesOff);
+    }
+
+    public int getSupplementalNumPages() {
+        return getSupplementalNumPages(buf);
+    }
+
+    public static int getSupplementalPageId(ByteBuffer buf) {
+        return buf.getInt(supplementalPageIdOff);
+    }
+
+    public int getSupplementalPageId() {
+        return getSupplementalPageId(buf);
+    }
+
+    public void configureLargePage(int supplementalPages, int supplementalBlockPageId) {
+        setLargeFlag(true);
+        buf.putInt(supplementalNumPagesOff, supplementalPages);
+        buf.putInt(supplementalPageIdOff, supplementalBlockPageId);
+    }
+
     @Override
     public int findInsertTupleIndex(ITupleReference tuple) throws TreeIndexException {
         int tupleIndex;
@@ -166,19 +202,58 @@ public class BTreeNSMLeafFrame extends TreeIndexNSMFrame implements IBTreeLeafFr
         insert(tuple, slotManager.getGreatestKeyIndicator());
     }
 
+    boolean isLargeTuple(int tupleSize) {
+        // TODO(mblow): make page size available to avoid calculating it
+        int pageSize = isLargePage() ? buf.capacity() / (getSupplementalNumPages() + 1) : buf.capacity();
+
+        return tupleSize > getMaxTupleSize(pageSize);
+    }
+
     @Override
-    public void split(ITreeIndexFrame rightFrame, ITupleReference tuple, ISplitKey splitKey)
-            throws HyracksDataException {
-        ByteBuffer right = rightFrame.getBuffer();
+    public FrameOpSpaceStatus hasSpaceInsert(ITupleReference tuple) throws HyracksDataException {
+        int tupleSize = getBytesRequiredToWriteTuple(tuple);
+
+        if (isLargeTuple(tupleSize)) {
+            // when do we want to overload this frame instead of creating a new one?
+            // If we have fewer than two tuples in the frame, grow the current page
+            return getTupleCount() < 2 ? FrameOpSpaceStatus.EXPAND : FrameOpSpaceStatus.INSUFFICIENT_SPACE;
+        } else {
+            return super.hasSpaceInsert(tuple);
+        }
+    }
+
+    @Override
+    public FrameOpSpaceStatus hasSpaceUpdate(ITupleReference newTuple, int oldTupleIndex) {
+        frameTuple.resetByTupleIndex(this, oldTupleIndex);
+        int oldTupleBytes = frameTuple.getTupleSize();
+        int newTupleBytes = tupleWriter.bytesRequired(newTuple);
+        FrameOpSpaceStatus status = hasSpaceUpdate(oldTupleBytes, newTupleBytes);
+        if (status == FrameOpSpaceStatus.INSUFFICIENT_SPACE && (isLargePage() || getTupleCount() == 1)
+                && isLargeTuple(newTupleBytes)) {
+            return FrameOpSpaceStatus.EXPAND;
+        }
+        return status;
+    }
+
+    @Override
+    public void split(ITreeIndexFrame rightFrame, ITupleReference tuple, ISplitKey splitKey,
+            IMetaDataPageManager freePageManager, ITreeIndexMetaDataFrame metaFrame, IBufferCache bufferCache)
+                    throws HyracksDataException {
+
+        int tupleSize = getBytesRequiredToWriteTuple(tuple);
+
+        boolean tupleLarge = isLargeTuple(tupleSize);
+
+        // normal case.
         int tupleCount = getTupleCount();
 
         // Find split point, and determine into which frame the new tuple should
         // be inserted into.
-        ITreeIndexFrame targetFrame = null;
+        BTreeNSMLeafFrame targetFrame = null;
         frameTuple.resetByTupleIndex(this, tupleCount - 1);
         if (cmp.compare(tuple, frameTuple) > 0) {
             // This is a special optimization case when the tuple to be inserted is the largest key on the page.
-            targetFrame = rightFrame;
+            targetFrame = (BTreeNSMLeafFrame) rightFrame;
         } else {
             int tuplesToLeft;
             int totalSize = 0;
@@ -194,20 +269,33 @@ public class BTreeNSMLeafFrame extends TreeIndexNSMFrame implements IBTreeLeafFr
 
             if (cmp.compare(tuple, frameTuple) >= 0) {
                 tuplesToLeft = i + 1;
-                targetFrame = rightFrame;
+                targetFrame = (BTreeNSMLeafFrame) rightFrame;
             } else {
                 tuplesToLeft = i;
                 targetFrame = this;
             }
             int tuplesToRight = tupleCount - tuplesToLeft;
+            int supplementalPages = 0;
+            int supplementalPageId = -1;
+            if (isLargePage()) {
+                ((BTreeNSMLeafFrame) rightFrame).growCapacity(freePageManager, metaFrame, bufferCache,
+                        buf.capacity() - rightFrame.getBuffer().capacity());
+                supplementalPages = ((BTreeNSMLeafFrame) rightFrame).getSupplementalNumPages();
+                supplementalPageId = ((BTreeNSMLeafFrame) rightFrame).getSupplementalPageId();
+            }
 
+            ByteBuffer right = rightFrame.getBuffer();
             // Copy entire page.
             System.arraycopy(buf.array(), 0, right.array(), 0, buf.capacity());
+            if (isLargePage()) {
+                // restore the supplemental page metadata
+                ((BTreeNSMLeafFrame) rightFrame).configureLargePage(supplementalPages, supplementalPageId);
+            }
 
             // On the right page we need to copy rightmost slots to the left.
             int src = rightFrame.getSlotManager().getSlotEndOff();
-            int dest = rightFrame.getSlotManager().getSlotEndOff() + tuplesToLeft
-                    * rightFrame.getSlotManager().getSlotSize();
+            int dest = rightFrame.getSlotManager().getSlotEndOff()
+                    + tuplesToLeft * rightFrame.getSlotManager().getSlotSize();
             int length = rightFrame.getSlotManager().getSlotSize() * tuplesToRight;
             System.arraycopy(right.array(), src, right.array(), dest, length);
             right.putInt(tupleCountOff, tuplesToRight);
@@ -220,6 +308,10 @@ public class BTreeNSMLeafFrame extends TreeIndexNSMFrame implements IBTreeLeafFr
             compact();
         }
 
+        if (tupleLarge) {
+            targetFrame.ensureCapacity(freePageManager, metaFrame, bufferCache, tuple);
+        }
+
         // Insert the new tuple.
         int targetTupleIndex;
         // it's safe to catch this exception since it will have been caught
@@ -240,10 +332,49 @@ public class BTreeNSMLeafFrame extends TreeIndexNSMFrame implements IBTreeLeafFr
         splitKey.getTuple().resetByTupleOffset(splitKey.getBuffer(), 0);
     }
 
-    @Override
-    protected void resetSpaceParams() {
-        buf.putInt(freeSpaceOff, nextLeafOff + 4);
-        buf.putInt(totalFreeSpaceOff, buf.capacity() - (nextLeafOff + 4));
+    public void ensureCapacity(IMetaDataPageManager freePageManager, ITreeIndexMetaDataFrame metaFrame,
+            IBufferCache bufferCache, ITupleReference tuple) throws HyracksDataException {
+        int gapBytes = getBytesRequiredToWriteTuple(tuple) - getFreeContiguousSpace();
+        growCapacity(freePageManager, metaFrame, bufferCache, gapBytes);
+    }
+
+    public void growCapacity(IMetaDataPageManager freePageManager, ITreeIndexMetaDataFrame metaFrame,
+            IBufferCache bufferCache, int delta) throws HyracksDataException {
+        if (delta <= 0) {
+            setLargeFlag(true);
+            return;
+        }
+        int deltaPages = (int) Math.ceil((double) delta / bufferCache.getPageSize());
+        int framePagesOld = getBuffer().capacity() / bufferCache.getPageSize();
+        int oldSupplementalPages = 0;
+        int oldSupplementalPageId = -1;
+        if (isLargePage()) {
+            oldSupplementalPages = getSupplementalNumPages();
+            oldSupplementalPageId = getSupplementalPageId();
+        }
+
+        configureLargePage(framePagesOld + deltaPages - 1,
+                freePageManager.getFreePageBlock(metaFrame, framePagesOld + deltaPages - 1));
+
+        int pageDelta = (framePagesOld + deltaPages) - 1 - oldSupplementalPages;
+
+        // we need to get the old slot offsets before we grow
+        int oldSlotEnd = slotManager.getSlotEndOff();
+        int oldSlotStart = slotManager.getSlotStartOff() + slotManager.getSlotSize();
+
+        bufferCache.resizePage(getPage(), framePagesOld + deltaPages);
+        buf = getPage().getBuffer();
+
+        // return the dropped supplemental pages to the page manager...
+        if (oldSupplementalPages > 0) {
+            freePageManager.addFreePageBlock(metaFrame, oldSupplementalPageId, oldSupplementalPages);
+        }
+
+        // fixup the slots
+        System.arraycopy(buf.array(), oldSlotEnd, buf.array(), slotManager.getSlotEndOff(), oldSlotStart - oldSlotEnd);
+
+        // fixup total free space counter
+        buf.putInt(totalFreeSpaceOff, buf.getInt(totalFreeSpaceOff) + (bufferCache.getPageSize() * pageDelta));
     }
 
     @Override
@@ -258,25 +389,6 @@ public class BTreeNSMLeafFrame extends TreeIndexNSMFrame implements IBTreeLeafFr
     }
 
     @Override
-    public int getPageHeaderSize() {
-        return nextLeafOff + 4;
-    }
-
-    @Override
-    public boolean getSmFlag() {
-        return buf.get(smFlagOff) != 0;
-    }
-
-    @Override
-    public void setSmFlag(boolean smFlag) {
-        if (smFlag) {
-            buf.put(smFlagOff, (byte) 1);
-        } else {
-            buf.put(smFlagOff, (byte) 0);
-        }
-    }
-
-    @Override
     public void setMultiComparator(MultiComparator cmp) {
         this.cmp = cmp;
     }
@@ -299,4 +411,13 @@ public class BTreeNSMLeafFrame extends TreeIndexNSMFrame implements IBTreeLeafFr
             }
         }
     }
+
+    @Override
+    public String printHeader() {
+        StringBuilder strBuilder = new StringBuilder(super.printHeader());
+        strBuilder.append("nextLeafOff:       " + nextLeafOff + "\n");
+        strBuilder.append("supplementalNumPagesOff: " + supplementalNumPagesOff + "\n");
+        strBuilder.append("supplementalPageIdOff: " + supplementalPageIdOff + "\n");
+        return strBuilder.toString();
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMLeafFrameFactory.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMLeafFrameFactory.java b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMLeafFrameFactory.java
index 5712b38..2b7f12b 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMLeafFrameFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/frames/BTreeNSMLeafFrameFactory.java
@@ -22,6 +22,7 @@ package org.apache.hyracks.storage.am.btree.frames;
 import org.apache.hyracks.storage.am.btree.api.IBTreeLeafFrame;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriterFactory;
+import org.apache.hyracks.storage.common.buffercache.ILargePageHelper;
 
 public class BTreeNSMLeafFrameFactory implements ITreeIndexFrameFactory {
 
@@ -35,11 +36,16 @@ public class BTreeNSMLeafFrameFactory implements ITreeIndexFrameFactory {
 
     @Override
     public IBTreeLeafFrame createFrame() {
-        return new BTreeNSMLeafFrame(tupleWriterFactory.createTupleWriter());
+        return new BTreeNSMLeafFrame(tupleWriterFactory.createTupleWriter(), getLargePageHelper());
     }
 
     @Override
     public ITreeIndexTupleWriterFactory getTupleWriterFactory() {
         return tupleWriterFactory;
     }
+
+    @Override
+    public ILargePageHelper getLargePageHelper() {
+        return BTreeLargeFrameHelper.INSTANCE;
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java
index a017ea5..3043940 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java
@@ -40,8 +40,22 @@ import org.apache.hyracks.storage.am.btree.exceptions.BTreeException;
 import org.apache.hyracks.storage.am.btree.exceptions.BTreeNotUpdateableException;
 import org.apache.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrame;
 import org.apache.hyracks.storage.am.btree.impls.BTreeOpContext.PageValidationInfo;
-import org.apache.hyracks.storage.am.common.api.*;
+import org.apache.hyracks.storage.am.common.api.IIndexAccessor;
+import org.apache.hyracks.storage.am.common.api.IIndexBulkLoader;
+import org.apache.hyracks.storage.am.common.api.IIndexCursor;
 import org.apache.hyracks.storage.am.common.api.IMetaDataPageManager;
+import org.apache.hyracks.storage.am.common.api.IModificationOperationCallback;
+import org.apache.hyracks.storage.am.common.api.ISearchOperationCallback;
+import org.apache.hyracks.storage.am.common.api.ISearchPredicate;
+import org.apache.hyracks.storage.am.common.api.ISplitKey;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexAccessor;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexCursor;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexFrame;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
+import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
+import org.apache.hyracks.storage.am.common.api.IndexException;
+import org.apache.hyracks.storage.am.common.api.TreeIndexException;
+import org.apache.hyracks.storage.am.common.api.UnsortedInputException;
 import org.apache.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
 import org.apache.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
 import org.apache.hyracks.storage.am.common.frames.FrameOpSpaceStatus;
@@ -52,6 +66,7 @@ import org.apache.hyracks.storage.am.common.impls.TreeIndexDiskOrderScanCursor;
 import org.apache.hyracks.storage.am.common.ophelpers.IndexOperation;
 import org.apache.hyracks.storage.am.common.ophelpers.MultiComparator;
 import org.apache.hyracks.storage.common.buffercache.BufferCache;
+import org.apache.hyracks.storage.common.buffercache.CachedPage;
 import org.apache.hyracks.storage.common.buffercache.IBufferCache;
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
 import org.apache.hyracks.storage.common.file.BufferedFileHandle;
@@ -88,7 +103,8 @@ public class BTree extends AbstractTreeIndex {
         RangePredicate diskOrderScanPred = new RangePredicate(null, null, true, true, ctx.cmp, ctx.cmp);
         int maxPageId = freePageManager.getMaxPage(ctx.metaFrame);
         int currentPageId = bulkloadLeafStart;
-        ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), false);
+        ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), false,
+                largePageHelper);
         page.acquireReadLatch();
         try {
             cursor.setBufferCache(bufferCache);
@@ -120,7 +136,7 @@ public class BTree extends AbstractTreeIndex {
     }
 
     private void validate(BTreeOpContext ctx, int pageId) throws HyracksDataException {
-        ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
+        ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false, largePageHelper);
         ctx.interiorFrame.setPage(page);
         PageValidationInfo currentPvi = ctx.validationInfos.peekFirst();
 
@@ -204,7 +220,8 @@ public class BTree extends AbstractTreeIndex {
         ICachedPage originalPage = ctx.interiorFrame.getPage();
         for (int i = 0; i < ctx.smPages.size(); i++) {
             int pageId = ctx.smPages.get(i);
-            ICachedPage smPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
+            ICachedPage smPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false,
+                    largePageHelper);
             smPage.acquireWriteLatch();
             try {
                 ctx.interiorFrame.setPage(smPage);
@@ -229,22 +246,35 @@ public class BTree extends AbstractTreeIndex {
     private void createNewRoot(BTreeOpContext ctx) throws HyracksDataException, TreeIndexException {
         // Make sure the root is always in the same page.
         ICachedPage leftNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, ctx.splitKey.getLeftPage()),
-                false);
+                false, largePageHelper);
         leftNode.acquireWriteLatch();
         try {
             int newLeftId = freePageManager.getFreePage(ctx.metaFrame);
-            ICachedPage newLeftNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, newLeftId), true);
+            ICachedPage newLeftNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, newLeftId), true,
+                    largePageHelper);
             newLeftNode.acquireWriteLatch();
             try {
+                boolean largePage = false;
+                if (leftNode.getBuffer().capacity() > newLeftNode.getBuffer().capacity()) {
+                    bufferCache.resizePage(newLeftNode, leftNode.getBuffer().capacity() / bufferCache.getPageSize());
+                    largePage = true;
+                }
                 // Copy left child to new left child.
-                System.arraycopy(leftNode.getBuffer().array(), 0, newLeftNode.getBuffer().array(), 0, newLeftNode
-                        .getBuffer().capacity());
+                System.arraycopy(leftNode.getBuffer().array(), 0, newLeftNode.getBuffer().array(), 0,
+                        newLeftNode.getBuffer().capacity());
                 ctx.interiorFrame.setPage(newLeftNode);
                 ctx.interiorFrame.setSmFlag(false);
                 // Remember LSN to set it in the root.
                 long leftNodeLSN = ctx.interiorFrame.getPageLsn();
                 // Initialize new root (leftNode becomes new root).
-                ctx.interiorFrame.setPage(leftNode);
+                if (largePage) {
+                    bufferCache.resizePage(leftNode, 1);
+                    ctx.interiorFrame.setPage(leftNode);
+                    ctx.interiorFrame.setLargeFlag(false);
+                } else {
+                    ctx.interiorFrame.setPage(leftNode);
+                    ctx.interiorFrame.setLargeFlag(false);
+                }
                 ctx.interiorFrame.initBuffer((byte) (ctx.interiorFrame.getLevel() + 1));
                 // Copy over LSN.
                 ctx.interiorFrame.setPageLsn(leftNodeLSN);
@@ -252,6 +282,11 @@ public class BTree extends AbstractTreeIndex {
                 ctx.interiorFrame.setSmFlag(true);
                 ctx.splitKey.setLeftPage(newLeftId);
                 int targetTupleIndex = ctx.interiorFrame.findInsertTupleIndex(ctx.splitKey.getTuple());
+                int tupleSize = ctx.interiorFrame.getBytesRequiredToWriteTuple(ctx.splitKey.getTuple());
+                if (tupleSize > maxTupleSize) {
+                    throw new TreeIndexException("Space required for record (" + tupleSize
+                            + ") larger than maximum acceptable size (" + maxTupleSize + ")");
+                }
                 ctx.interiorFrame.insert(ctx.splitKey.getTuple(), targetTupleIndex);
             } finally {
                 newLeftNode.releaseWriteLatch(true);
@@ -263,8 +298,8 @@ public class BTree extends AbstractTreeIndex {
         }
     }
 
-    private void insertUpdateOrDelete(ITupleReference tuple, BTreeOpContext ctx) throws HyracksDataException,
-            TreeIndexException {
+    private void insertUpdateOrDelete(ITupleReference tuple, BTreeOpContext ctx)
+            throws HyracksDataException, TreeIndexException {
         ctx.reset();
         ctx.pred.setLowKeyComparator(ctx.cmp);
         ctx.pred.setHighKeyComparator(ctx.cmp);
@@ -304,23 +339,11 @@ public class BTree extends AbstractTreeIndex {
     }
 
     private void insert(ITupleReference tuple, BTreeOpContext ctx) throws HyracksDataException, TreeIndexException {
-        int tupleSize = Math.max(ctx.leafFrame.getBytesRequiredToWriteTuple(tuple),
-                ctx.interiorFrame.getBytesRequiredToWriteTuple(tuple));
-        if (tupleSize > maxTupleSize) {
-            throw new TreeIndexException("Space required for record (" + tupleSize
-                    + ") larger than maximum acceptable size (" + maxTupleSize + ")");
-        }
         ctx.modificationCallback.before(tuple);
         insertUpdateOrDelete(tuple, ctx);
     }
 
     private void upsert(ITupleReference tuple, BTreeOpContext ctx) throws HyracksDataException, TreeIndexException {
-        int tupleSize = Math.max(ctx.leafFrame.getBytesRequiredToWriteTuple(tuple),
-                ctx.interiorFrame.getBytesRequiredToWriteTuple(tuple));
-        if (tupleSize > maxTupleSize) {
-            throw new TreeIndexException("Space required for record (" + tupleSize
-                    + ") larger than maximum acceptable size (" + maxTupleSize + ")");
-        }
         ctx.modificationCallback.before(tuple);
         insertUpdateOrDelete(tuple, ctx);
     }
@@ -332,12 +355,6 @@ public class BTree extends AbstractTreeIndex {
         if (fieldCount == ctx.cmp.getKeyFieldCount()) {
             throw new BTreeNotUpdateableException("Cannot perform updates when the entire tuple forms the key.");
         }
-        int tupleSize = Math.max(ctx.leafFrame.getBytesRequiredToWriteTuple(tuple),
-                ctx.interiorFrame.getBytesRequiredToWriteTuple(tuple));
-        if (tupleSize > maxTupleSize) {
-            throw new TreeIndexException("Space required for record (" + tupleSize
-                    + ") larger than maximum acceptable size (" + maxTupleSize + ")");
-        }
         ctx.modificationCallback.before(tuple);
         insertUpdateOrDelete(tuple, ctx);
     }
@@ -351,7 +368,13 @@ public class BTree extends AbstractTreeIndex {
             throws Exception {
         boolean restartOp = false;
         FrameOpSpaceStatus spaceStatus = ctx.leafFrame.hasSpaceInsert(tuple);
+
         switch (spaceStatus) {
+            case EXPAND: {
+                // TODO: avoid repeated calculation of tuple size
+                ctx.leafFrame.ensureCapacity(freePageManager, ctx.metaFrame, bufferCache, tuple);
+            }
+                // fall-through
             case SUFFICIENT_CONTIGUOUS_SPACE: {
                 ctx.modificationCallback.found(null, tuple);
                 ctx.leafFrame.insert(tuple, targetTupleIndex);
@@ -385,6 +408,9 @@ public class BTree extends AbstractTreeIndex {
                 }
                 break;
             }
+            default: {
+                throw new IllegalStateException("NYI: " + spaceStatus);
+            }
         }
         return restartOp;
     }
@@ -406,7 +432,8 @@ public class BTree extends AbstractTreeIndex {
             }
         }
         int rightPageId = freePageManager.getFreePage(ctx.metaFrame);
-        ICachedPage rightNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rightPageId), true);
+        ICachedPage rightNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rightPageId), true,
+                largePageHelper);
         rightNode.acquireWriteLatch();
         try {
             IBTreeLeafFrame rightFrame = ctx.createLeafFrame();
@@ -422,7 +449,7 @@ public class BTree extends AbstractTreeIndex {
             } else {
                 ctx.modificationCallback.found(null, tuple);
             }
-            ctx.leafFrame.split(rightFrame, tuple, ctx.splitKey);
+            ctx.leafFrame.split(rightFrame, tuple, ctx.splitKey, freePageManager, ctx.metaFrame, bufferCache);
 
             ctx.smPages.add(pageId);
             ctx.smPages.add(rightPageId);
@@ -458,6 +485,19 @@ public class BTree extends AbstractTreeIndex {
                 ctx.splitKey.reset();
                 break;
             }
+            case EXPAND: {
+                // TODO: avoid repeated calculation of tuple size
+                // TODO: in-place update on expand
+                // Delete the old tuple, compact the frame, and insert the new tuple.
+                ctx.modificationCallback.found(beforeTuple, tuple);
+                ctx.leafFrame.delete(tuple, oldTupleIndex);
+                ctx.leafFrame.compact();
+                ctx.leafFrame.ensureCapacity(freePageManager, ctx.metaFrame, bufferCache, tuple);
+                int targetTupleIndex = ctx.leafFrame.findInsertTupleIndex(tuple);
+                ctx.leafFrame.insert(tuple, targetTupleIndex);
+                ctx.splitKey.reset();
+                break;
+            }
             case SUFFICIENT_CONTIGUOUS_SPACE: {
                 ctx.modificationCallback.found(beforeTuple, tuple);
                 ctx.leafFrame.update(tuple, oldTupleIndex, false);
@@ -478,6 +518,9 @@ public class BTree extends AbstractTreeIndex {
                 restartOp = performLeafSplit(pageId, tuple, ctx, oldTupleIndex);
                 break;
             }
+            default: {
+                throw new IllegalStateException("NYI: " + spaceStatus);
+            }
         }
         return restartOp;
     }
@@ -507,7 +550,8 @@ public class BTree extends AbstractTreeIndex {
         switch (spaceStatus) {
             case INSUFFICIENT_SPACE: {
                 int rightPageId = freePageManager.getFreePage(ctx.metaFrame);
-                ICachedPage rightNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rightPageId), true);
+                ICachedPage rightNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rightPageId), true,
+                        largePageHelper);
                 rightNode.acquireWriteLatch();
                 try {
                     IBTreeFrame rightFrame = ctx.createInteriorFrame();
@@ -516,7 +560,8 @@ public class BTree extends AbstractTreeIndex {
                     rightFrame.setMultiComparator(ctx.cmp);
                     // instead of creating a new split key, use the existing
                     // splitKey
-                    ctx.interiorFrame.split(rightFrame, ctx.splitKey.getTuple(), ctx.splitKey);
+                    ctx.interiorFrame.split(rightFrame, ctx.splitKey.getTuple(), ctx.splitKey, freePageManager,
+                            ctx.metaFrame, bufferCache);
                     ctx.smPages.add(pageId);
                     ctx.smPages.add(rightPageId);
                     ctx.interiorFrame.setSmFlag(true);
@@ -547,6 +592,16 @@ public class BTree extends AbstractTreeIndex {
                 ctx.splitKey.reset();
                 break;
             }
+
+            case TOO_LARGE: {
+                int tupleSize = ctx.interiorFrame.getBytesRequiredToWriteTuple(tuple);
+                throw new TreeIndexException("Space required for record (" + tupleSize
+                        + ") larger than maximum acceptable size (" + maxTupleSize + ")");
+            }
+
+            default: {
+                throw new IllegalStateException("NYI: " + spaceStatus);
+            }
         }
     }
 
@@ -576,7 +631,7 @@ public class BTree extends AbstractTreeIndex {
     }
 
     private ICachedPage isConsistent(int pageId, BTreeOpContext ctx) throws Exception {
-        ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
+        ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false, largePageHelper);
         node.acquireReadLatch();
         ctx.interiorFrame.setPage(node);
         boolean isConsistent = ctx.pageLsns.getLast() == ctx.interiorFrame.getPageLsn();
@@ -590,7 +645,7 @@ public class BTree extends AbstractTreeIndex {
 
     private void performOp(int pageId, ICachedPage parent, boolean parentIsReadLatched, BTreeOpContext ctx)
             throws HyracksDataException, TreeIndexException {
-        ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
+        ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false, largePageHelper);
         ctx.interiorFrame.setPage(node);
         // this check performs an unprotected read in the page
         // the following could happen: TODO fill out
@@ -622,6 +677,7 @@ public class BTree extends AbstractTreeIndex {
                     while (repeatOp && ctx.opRestarts < MAX_RESTARTS) {
                         int childPageId = ctx.interiorFrame.getChildPageId(ctx.pred);
                         performOp(childPageId, node, isReadLatched, ctx);
+                        node = null;
 
                         if (!ctx.pageLsns.isEmpty()) {
                             if (ctx.pageLsns.getLast() == FULL_RESTART_OP) {
@@ -651,7 +707,7 @@ public class BTree extends AbstractTreeIndex {
                                 // Is there a propagated split key?
                                 if (ctx.splitKey.getBuffer() != null) {
                                     ICachedPage interiorNode = bufferCache.pin(
-                                            BufferedFileHandle.getDiskPageId(fileId, pageId), false);
+                                            BufferedFileHandle.getDiskPageId(fileId, pageId), false, largePageHelper);
                                     interiorNode.acquireWriteLatch();
                                     try {
                                         // Insert or update op. Both can cause split keys to propagate upwards.
@@ -780,10 +836,11 @@ public class BTree extends AbstractTreeIndex {
         }
     }
 
-    private BTreeOpContext createOpContext(IIndexAccessor accessor,
-            IModificationOperationCallback modificationCallback, ISearchOperationCallback searchCallback) {
-        return new BTreeOpContext(accessor, leafFrameFactory, interiorFrameFactory, freePageManager
-                .getMetaDataFrameFactory().createFrame(), cmpFactories, modificationCallback, searchCallback);
+    private BTreeOpContext createOpContext(IIndexAccessor accessor, IModificationOperationCallback modificationCallback,
+            ISearchOperationCallback searchCallback) {
+        return new BTreeOpContext(accessor, leafFrameFactory, interiorFrameFactory,
+                freePageManager.getMetaDataFrameFactory().createFrame(), cmpFactories, modificationCallback,
+                searchCallback);
     }
 
     @SuppressWarnings("rawtypes")
@@ -800,7 +857,7 @@ public class BTree extends AbstractTreeIndex {
     public void printTree(int pageId, ICachedPage parent, boolean unpin, IBTreeLeafFrame leafFrame,
             IBTreeInteriorFrame interiorFrame, byte treeHeight, ISerializerDeserializer[] keySerdes,
             StringBuilder strBuilder, MultiComparator cmp) throws Exception {
-        ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
+        ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false, largePageHelper);
         node.acquireReadLatch();
         try {
             if (parent != null && unpin == true) {
@@ -910,8 +967,8 @@ public class BTree extends AbstractTreeIndex {
         }
 
         @Override
-        public void search(IIndexCursor cursor, ISearchPredicate searchPred) throws HyracksDataException,
-                TreeIndexException {
+        public void search(IIndexCursor cursor, ISearchPredicate searchPred)
+                throws HyracksDataException, TreeIndexException {
             ctx.setOperation(IndexOperation.SEARCH);
             btree.search((ITreeIndexCursor) cursor, searchPred, ctx);
         }
@@ -960,8 +1017,8 @@ public class BTree extends AbstractTreeIndex {
         protected final ISplitKey splitKey;
         protected final boolean verifyInput;
 
-        public BTreeBulkLoader(float fillFactor, boolean verifyInput, boolean appendOnly) throws TreeIndexException,
-                HyracksDataException {
+        public BTreeBulkLoader(float fillFactor, boolean verifyInput, boolean appendOnly)
+                throws TreeIndexException, HyracksDataException {
             super(fillFactor, appendOnly);
             this.verifyInput = verifyInput;
             splitKey = new BTreeSplitKey(leafFrame.getTupleWriter().createTupleReference());
@@ -973,10 +1030,6 @@ public class BTree extends AbstractTreeIndex {
             try {
                 int tupleSize = Math.max(leafFrame.getBytesRequiredToWriteTuple(tuple),
                         interiorFrame.getBytesRequiredToWriteTuple(tuple));
-                if (tupleSize > maxTupleSize) {
-                    throw new TreeIndexException("Space required for record (" + tupleSize
-                            + ") larger than maximum acceptable size (" + maxTupleSize + ")");
-                }
 
                 NodeFrontier leafFrontier = nodeFrontiers.get(0);
 
@@ -990,41 +1043,64 @@ public class BTree extends AbstractTreeIndex {
                 }
                 //full, allocate new page
                 if (spaceUsed + spaceNeeded > leafMaxBytes) {
-                    leafFrontier.lastTuple.resetByTupleIndex(leafFrame, leafFrame.getTupleCount() - 1);
-                    if (verifyInput) {
-                        verifyInputTuple(tuple, leafFrontier.lastTuple);
-                    }
-                    int splitKeySize = tupleWriter.bytesRequired(leafFrontier.lastTuple, 0, cmp.getKeyFieldCount());
-                    splitKey.initData(splitKeySize);
-                    tupleWriter.writeTupleFields(leafFrontier.lastTuple, 0, cmp.getKeyFieldCount(), splitKey
-                            .getBuffer().array(), 0);
-                    splitKey.getTuple().resetByTupleOffset(splitKey.getBuffer(), 0);
-                    splitKey.setLeftPage(leafFrontier.pageId);
+                    if (leafFrame.getTupleCount() == 0) {
+                        bufferCache.returnPage(leafFrontier.page, false);
+                    } else {
+                        leafFrontier.lastTuple.resetByTupleIndex(leafFrame, leafFrame.getTupleCount() - 1);
+                        if (verifyInput) {
+                            verifyInputTuple(tuple, leafFrontier.lastTuple);
+                        }
+                        int splitKeySize = tupleWriter.bytesRequired(leafFrontier.lastTuple, 0, cmp.getKeyFieldCount());
+                        splitKey.initData(splitKeySize);
+                        tupleWriter.writeTupleFields(leafFrontier.lastTuple, 0, cmp.getKeyFieldCount(),
+                                splitKey.getBuffer().array(), 0);
+                        splitKey.getTuple().resetByTupleOffset(splitKey.getBuffer(), 0);
+                        splitKey.setLeftPage(leafFrontier.pageId);
 
-                    propagateBulk(1, pagesToWrite);
-                    leafFrontier.pageId = freePageManager.getFreePage(metaFrame);
+                        propagateBulk(1, pagesToWrite);
 
-                    ((IBTreeLeafFrame) leafFrame).setNextLeaf(leafFrontier.pageId);
+                        leafFrontier.pageId = freePageManager.getFreePage(metaFrame);
 
-                    queue.put(leafFrontier.page);
-                    for (ICachedPage c : pagesToWrite) {
-                        queue.put(c);
-                    }
-                    pagesToWrite.clear();
+                        ((IBTreeLeafFrame) leafFrame).setNextLeaf(leafFrontier.pageId);
+
+                        queue.put(leafFrontier.page);
+                        for (ICachedPage c : pagesToWrite) {
+                            queue.put(c);
+                        }
+                        pagesToWrite.clear();
 
-                    splitKey.setRightPage(leafFrontier.pageId);
-                    leafFrontier.page = bufferCache.confiscatePage(BufferedFileHandle.getDiskPageId(fileId,
-                            leafFrontier.pageId));
-                    leafFrame.setPage(leafFrontier.page);
-                    leafFrame.initBuffer((byte) 0);
+                        splitKey.setRightPage(leafFrontier.pageId);
+                    }
+                    if (tupleSize > maxTupleSize) {
+                        final long dpid = BufferedFileHandle.getDiskPageId(fileId, leafFrontier.pageId);
+                        // calculate required number of pages.
+                        int headerSize = Math.max(leafFrame.getPageHeaderSize(), interiorFrame.getPageHeaderSize());
+                        final int multiplier = (int) Math
+                                .ceil((double) tupleSize / (bufferCache.getPageSize() - headerSize));
+                        leafFrontier.page = bufferCache.confiscateLargePage(dpid, multiplier);
+                        ((CachedPage) leafFrontier.page).setLargePageHelper(largePageHelper);
+                        leafFrame.setPage(leafFrontier.page);
+                        leafFrame.initBuffer((byte) 0);
+                        if (multiplier > 1) {
+                            int supplementalPages = multiplier - 1;
+                            ((IBTreeLeafFrame) leafFrame).configureLargePage(supplementalPages,
+                                    freePageManager.getFreePageBlock(metaFrame, supplementalPages));
+                        } else {
+                            ((IBTreeLeafFrame) leafFrame).setLargeFlag(true);
+                        }
+                    } else {
+                        final long dpid = BufferedFileHandle.getDiskPageId(fileId, leafFrontier.pageId);
+                        leafFrontier.page = bufferCache.confiscatePage(dpid);
+                        ((CachedPage) leafFrontier.page).setLargePageHelper(largePageHelper);
+                        leafFrame.setPage(leafFrontier.page);
+                        leafFrame.initBuffer((byte) 0);
+                    }
                 } else {
                     if (verifyInput && leafFrame.getTupleCount() > 0) {
                         leafFrontier.lastTuple.resetByTupleIndex(leafFrame, leafFrame.getTupleCount() - 1);
                         verifyInputTuple(tuple, leafFrontier.lastTuple);
                     }
                 }
-
-                leafFrame.setPage(leafFrontier.page);
                 ((IBTreeLeafFrame) leafFrame).insertSorted(tuple);
             } catch (IndexException | HyracksDataException | RuntimeException e) {
                 handleException();
@@ -1032,8 +1108,8 @@ public class BTree extends AbstractTreeIndex {
             }
         }
 
-        protected void verifyInputTuple(ITupleReference tuple, ITupleReference prevTuple) throws IndexException,
-                HyracksDataException {
+        protected void verifyInputTuple(ITupleReference tuple, ITupleReference prevTuple)
+                throws IndexException, HyracksDataException {
             // New tuple should be strictly greater than last tuple.
             int cmpResult = cmp.compare(tuple, prevTuple);
             if (cmpResult < 0) {
@@ -1044,7 +1120,8 @@ public class BTree extends AbstractTreeIndex {
             }
         }
 
-        protected void propagateBulk(int level, List<ICachedPage> pagesToWrite) throws HyracksDataException {
+        protected void propagateBulk(int level, List<ICachedPage> pagesToWrite)
+                throws HyracksDataException, TreeIndexException {
             if (splitKey.getBuffer() == null)
                 return;
 
@@ -1055,7 +1132,14 @@ public class BTree extends AbstractTreeIndex {
             interiorFrame.setPage(frontier.page);
 
             ITupleReference tuple = splitKey.getTuple();
-            int spaceNeeded = tupleWriter.bytesRequired(tuple, 0, cmp.getKeyFieldCount()) + slotSize + 4;
+            int tupleBytes = tupleWriter.bytesRequired(tuple, 0, cmp.getKeyFieldCount());
+            int spaceNeeded = tupleBytes + slotSize + 4;
+            if (tupleBytes > interiorFrame.getMaxTupleSize(BTree.this.bufferCache.getPageSize())) {
+                throw new TreeIndexException(
+                        "Space required for record (" + tupleBytes + ") larger than maximum acceptable size ("
+                                + interiorFrame.getMaxTupleSize(BTree.this.bufferCache.getPageSize()) + ")");
+            }
+
             int spaceUsed = interiorFrame.getBuffer().capacity() - interiorFrame.getTotalFreeSpace();
             if (spaceUsed + spaceNeeded > interiorMaxBytes) {
 
@@ -1065,8 +1149,8 @@ public class BTree extends AbstractTreeIndex {
                 frontier.lastTuple.resetByTupleIndex(interiorFrame, interiorFrame.getTupleCount() - 1);
                 int splitKeySize = tupleWriter.bytesRequired(frontier.lastTuple, 0, cmp.getKeyFieldCount());
                 splitKey.initData(splitKeySize);
-                tupleWriter.writeTupleFields(frontier.lastTuple, 0, cmp.getKeyFieldCount(), splitKey.getBuffer()
-                        .array(), 0);
+                tupleWriter.writeTupleFields(frontier.lastTuple, 0, cmp.getKeyFieldCount(),
+                        splitKey.getBuffer().array(), 0);
                 splitKey.getTuple().resetByTupleOffset(splitKey.getBuffer(), 0);
 
                 ((IBTreeInteriorFrame) interiorFrame).deleteGreatest();
@@ -1077,6 +1161,7 @@ public class BTree extends AbstractTreeIndex {
 
                 propagateBulk(level + 1, pagesToWrite);
                 frontier.page = bufferCache.confiscatePage(BufferCache.INVALID_DPID);
+                ((CachedPage) frontier.page).setLargePageHelper(largePageHelper);
                 interiorFrame.setPage(frontier.page);
                 interiorFrame.initBuffer((byte) level);
             }
@@ -1095,7 +1180,7 @@ public class BTree extends AbstractTreeIndex {
             if (level < 1) {
                 ICachedPage lastLeaf = nodeFrontiers.get(level).page;
                 int lastLeafPage = nodeFrontiers.get(level).pageId;
-                setPageDpid(lastLeaf,nodeFrontiers.get(level).pageId);
+                setPageDpid(lastLeaf, nodeFrontiers.get(level).pageId);
                 queue.put(lastLeaf);
                 nodeFrontiers.get(level).page = null;
                 persistFrontiers(level + 1, lastLeafPage);
@@ -1105,7 +1190,8 @@ public class BTree extends AbstractTreeIndex {
             interiorFrame.setPage(frontier.page);
             //just finalize = the layer right above the leaves has correct righthand pointers already
             if (rightPage < 0) {
-                throw new HyracksDataException("Error in index creation. Internal node appears to have no rightmost guide");
+                throw new HyracksDataException(
+                        "Error in index creation. Internal node appears to have no rightmost guide");
             }
             ((IBTreeInteriorFrame) interiorFrame).setRightmostChildPageId(rightPage);
             int finalPageId = freePageManager.getFreePage(metaFrame);
@@ -1118,10 +1204,10 @@ public class BTree extends AbstractTreeIndex {
 
         @Override
         public void end() throws HyracksDataException {
-            try{
+            try {
                 persistFrontiers(0, -1);
                 super.end();
-            } catch ( HyracksDataException | RuntimeException e) {
+            } catch (HyracksDataException | RuntimeException e) {
                 handleException();
                 throw e;
             }
@@ -1132,8 +1218,8 @@ public class BTree extends AbstractTreeIndex {
             super.handleException();
         }
 
-        private void setPageDpid(ICachedPage page, int pageId){
-            bufferCache.setPageDiskId(page, BufferedFileHandle.getDiskPageId(fileId,pageId));
+        private void setPageDpid(ICachedPage page, int pageId) {
+            bufferCache.setPageDiskId(page, BufferedFileHandle.getDiskPageId(fileId, pageId));
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
index 6e8ab65..974860e 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
@@ -24,8 +24,8 @@ import org.apache.hyracks.data.std.primitive.IntegerPointable;
 import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
 import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference;
 import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
-import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
 import org.apache.hyracks.storage.am.btree.api.IBTreeLeafFrame;
+import org.apache.hyracks.storage.am.btree.frames.BTreeLargeFrameHelper;
 import org.apache.hyracks.storage.am.common.api.ICursorInitialState;
 import org.apache.hyracks.storage.am.common.api.ISearchPredicate;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexCursor;
@@ -119,7 +119,8 @@ public class BTreeCountingSearchCursor implements ITreeIndexCursor {
 
     private void fetchNextLeafPage(int nextLeafPage) throws HyracksDataException {
         do {
-            ICachedPage nextLeaf = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, nextLeafPage), false);
+            ICachedPage nextLeaf = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, nextLeafPage), false,
+                    BTreeLargeFrameHelper.INSTANCE);
             if (exclusiveLatchNodes) {
                 nextLeaf.acquireWriteLatch();
                 page.releaseWriteLatch(isPageDirty);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
index 3301e37..975bd9b 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTreeRangeSearchCursor.java
@@ -25,6 +25,7 @@ import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleReference;
 import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
 import org.apache.hyracks.dataflow.common.util.TupleUtils;
 import org.apache.hyracks.storage.am.btree.api.IBTreeLeafFrame;
+import org.apache.hyracks.storage.am.btree.frames.BTreeLargeFrameHelper;
 import org.apache.hyracks.storage.am.common.api.ICursorInitialState;
 import org.apache.hyracks.storage.am.common.api.IIndexAccessor;
 import org.apache.hyracks.storage.am.common.api.ISearchOperationCallback;
@@ -117,7 +118,8 @@ public class BTreeRangeSearchCursor implements ITreeIndexCursor {
 
     private void fetchNextLeafPage(int nextLeafPage) throws HyracksDataException {
         do {
-            ICachedPage nextLeaf = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, nextLeafPage), false);
+            ICachedPage nextLeaf = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, nextLeafPage), false,
+                    BTreeLargeFrameHelper.INSTANCE);
             if (exclusiveLatchNodes) {
                 nextLeaf.acquireWriteLatch();
                 page.releaseWriteLatch(isPageDirty);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/util/BTreeUtils.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/util/BTreeUtils.java b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/util/BTreeUtils.java
index 7c2abb1..5b6cee9 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/util/BTreeUtils.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/util/BTreeUtils.java
@@ -38,6 +38,7 @@ import org.apache.hyracks.storage.am.common.freepage.LinkedMetaDataPageManager;
 import org.apache.hyracks.storage.am.common.ophelpers.MultiComparator;
 import org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
 import org.apache.hyracks.storage.common.buffercache.IBufferCache;
+import org.apache.hyracks.storage.common.buffercache.ILargePageHelper;
 import org.apache.hyracks.storage.common.file.IFileMapProvider;
 
 public class BTreeUtils {
@@ -48,6 +49,7 @@ public class BTreeUtils {
         ITreeIndexFrameFactory leafFrameFactory = getLeafFrameFactory(tupleWriterFactory, leafType);
         ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(tupleWriterFactory);
         ITreeIndexMetaDataFrameFactory metaFrameFactory = new LIFOMetaDataFrameFactory();
+        ILargePageHelper largePageHelper = leafFrameFactory.getLargePageHelper();
         IMetaDataPageManager freePageManager;
         freePageManager = new LinkedMetaDataPageManager(bufferCache, metaFrameFactory);
         BTree btree = new BTree(bufferCache, fileMapProvider, freePageManager, interiorFrameFactory, leafFrameFactory,
@@ -61,6 +63,7 @@ public class BTreeUtils {
         TypeAwareTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(typeTraits);
         ITreeIndexFrameFactory leafFrameFactory = getLeafFrameFactory(tupleWriterFactory, leafType);
         ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(tupleWriterFactory);
+        ILargePageHelper largePageHelper = leafFrameFactory.getLargePageHelper();
         BTree btree = new BTree(bufferCache, fileMapProvider, freePageManager, interiorFrameFactory, leafFrameFactory,
                 cmpFactories, typeTraits.length, file);
         return btree;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IMetaDataPageManager.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IMetaDataPageManager.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IMetaDataPageManager.java
index 48d9e26..2550ab4 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IMetaDataPageManager.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/IMetaDataPageManager.java
@@ -43,7 +43,7 @@ public interface IMetaDataPageManager {
     public void close() throws HyracksDataException;
 
     /**
-     * Get the locaiton of a free page to use for index operations
+     * Get the location of a free page to use for index operations
      * @param metaFrame A metadata frame to use to wrap the raw page
      * @return A page location, or -1 if no free page could be found or allocated
      * @throws HyracksDataException
@@ -52,6 +52,15 @@ public interface IMetaDataPageManager {
     public int getFreePage(ITreeIndexMetaDataFrame metaFrame) throws HyracksDataException;
 
     /**
+     * Get the location of a block of free pages to use for index operations
+     * @param metaFrame A metadata frame to use to wrap the raw page
+     * @return The starting page location, or -1 if a block of free pages could be found or allocated
+     * @throws HyracksDataException
+     */
+
+    public int getFreePageBlock(ITreeIndexMetaDataFrame metaFrame, int count) throws HyracksDataException;
+
+    /**
      * Add a page back to the pool of free pages within an index file
      * @param metaFrame A metadata frame to use to wrap the raw page
      * @param freePage The page which to return to the free space
@@ -60,6 +69,9 @@ public interface IMetaDataPageManager {
 
     public void addFreePage(ITreeIndexMetaDataFrame metaFrame, int freePage) throws HyracksDataException;
 
+    public void addFreePageBlock(ITreeIndexMetaDataFrame metaFrame, int startingPage, int count)
+            throws HyracksDataException;
+
     /**
      * Gets the highest page offset according to the metadata
      * @param metaFrame A metadata frame to use to wrap the raw page
@@ -137,5 +149,4 @@ public interface IMetaDataPageManager {
      * @throws HyracksDataException
      */
     long getLSNOffset() throws HyracksDataException;
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java
index 9ac09a3..711db9e 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java
@@ -25,7 +25,9 @@ import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
 import org.apache.hyracks.storage.am.common.frames.FrameOpSpaceStatus;
 import org.apache.hyracks.storage.am.common.ophelpers.MultiComparator;
+import org.apache.hyracks.storage.common.buffercache.IBufferCache;
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
+import org.apache.hyracks.storage.common.buffercache.ILargePageHelper;
 
 public interface ITreeIndexFrame {
 
@@ -70,8 +72,9 @@ public interface ITreeIndexFrame {
     // for debugging
     public String printHeader();
 
-    public void split(ITreeIndexFrame rightFrame, ITupleReference tuple, ISplitKey splitKey)
-            throws HyracksDataException;
+    public void split(ITreeIndexFrame rightFrame, ITupleReference tuple, ISplitKey splitKey,
+                      IMetaDataPageManager freePageManager, ITreeIndexMetaDataFrame metaFrame, IBufferCache bufferCache)
+            throws HyracksDataException, TreeIndexException;
 
     public ISlotManager getSlotManager();
 
@@ -102,4 +105,5 @@ public interface ITreeIndexFrame {
 
     public void setMultiComparator(MultiComparator cmp);
 
+    ILargePageHelper getLargePageHelper();
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrameFactory.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrameFactory.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrameFactory.java
index 2fd3009..248baf5 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrameFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrameFactory.java
@@ -20,7 +20,10 @@ package org.apache.hyracks.storage.am.common.api;
 
 import java.io.Serializable;
 
+import org.apache.hyracks.storage.common.buffercache.ILargePageHelper;
+
 public interface ITreeIndexFrameFactory extends Serializable {
-    public ITreeIndexFrame createFrame();
-    public ITreeIndexTupleWriterFactory getTupleWriterFactory();
+    ITreeIndexFrame createFrame();
+    ITreeIndexTupleWriterFactory getTupleWriterFactory();
+    ILargePageHelper getLargePageHelper();
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/FrameOpSpaceStatus.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/FrameOpSpaceStatus.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/FrameOpSpaceStatus.java
index 0af94a3..dabd5f8 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/FrameOpSpaceStatus.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/FrameOpSpaceStatus.java
@@ -20,5 +20,10 @@
 package org.apache.hyracks.storage.am.common.frames;
 
 public enum FrameOpSpaceStatus {
-    INSUFFICIENT_SPACE, SUFFICIENT_CONTIGUOUS_SPACE, SUFFICIENT_SPACE, SUFFICIENT_INPLACE_SPACE
+    INSUFFICIENT_SPACE,
+    SUFFICIENT_CONTIGUOUS_SPACE,
+    SUFFICIENT_SPACE,
+    SUFFICIENT_INPLACE_SPACE,
+    EXPAND,
+    TOO_LARGE
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
index 414ce27..ea6bcac 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
@@ -23,6 +23,7 @@ import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Collections;
 
+import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
 import org.apache.hyracks.storage.am.common.api.ISlotManager;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexFrame;
@@ -30,6 +31,7 @@ import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
 import org.apache.hyracks.storage.am.common.ophelpers.SlotOffTupleOff;
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
+import org.apache.hyracks.storage.common.buffercache.ILargePageHelper;
 
 public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
 
@@ -38,7 +40,10 @@ public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
     protected static final int freeSpaceOff = tupleCountOff + 4; // 12
     protected static final int totalFreeSpaceOff = freeSpaceOff + 4; // 16
     protected static final int levelOff = totalFreeSpaceOff + 4; // 20
-    protected static final int smFlagOff = levelOff + 1; // 21
+    protected static final int flagOff = levelOff + 1; // 21
+
+    protected static final byte smFlagBit           = 0x1;
+    protected static final byte largeFlagBit        = 0x2;
 
     protected ICachedPage page = null;
     protected ByteBuffer buf = null;
@@ -46,12 +51,14 @@ public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
 
     protected ITreeIndexTupleWriter tupleWriter;
     protected ITreeIndexTupleReference frameTuple;
+    protected ILargePageHelper largePageHelper;
 
-    public TreeIndexNSMFrame(ITreeIndexTupleWriter tupleWriter, ISlotManager slotManager) {
+    public TreeIndexNSMFrame(ITreeIndexTupleWriter tupleWriter, ISlotManager slotManager, ILargePageHelper largePageHelper) {
         this.tupleWriter = tupleWriter;
         this.frameTuple = tupleWriter.createTupleReference();
         this.slotManager = slotManager;
         this.slotManager.setFrame(this);
+        this.largePageHelper = largePageHelper;
     }
 
     @Override
@@ -61,7 +68,7 @@ public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
         buf.putInt(tupleCountOff, 0);
         resetSpaceParams();
         buf.put(levelOff, level);
-        buf.put(smFlagOff, (byte) 0);
+        buf.put(flagOff, (byte) 0);
     }
 
     @Override
@@ -74,6 +81,34 @@ public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
         return buf.get(levelOff) == 0;
     }
 
+    public boolean getSmFlag() {
+        return (buf.get(flagOff) & smFlagBit) != 0;
+    }
+
+    public void setSmFlag(boolean smFlag) {
+        if (smFlag) {
+            buf.put(flagOff, (byte) (buf.get(flagOff) | smFlagBit));
+        } else {
+            buf.put(flagOff, (byte) (buf.get(flagOff) & ~smFlagBit));
+        }
+    }
+
+    public void setLargeFlag(boolean largeFlag) {
+        if (largeFlag) {
+            buf.put(flagOff, (byte) (buf.get(flagOff) | largeFlagBit));
+        } else {
+            buf.put(flagOff, (byte) (buf.get(flagOff) & ~largeFlagBit));
+        }
+    }
+
+    public static boolean isLargePage(ByteBuffer buf) {
+        return (buf.get(flagOff) & largeFlagBit) != 0;
+    }
+
+    public boolean isLargePage() {
+        return isLargePage(buf);
+    }
+
     @Override
     public boolean isInterior() {
         return buf.get(levelOff) > 0;
@@ -165,7 +200,7 @@ public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
     }
 
     @Override
-    public FrameOpSpaceStatus hasSpaceInsert(ITupleReference tuple) {
+    public FrameOpSpaceStatus hasSpaceInsert(ITupleReference tuple) throws HyracksDataException {
         int bytesRequired = tupleWriter.bytesRequired(tuple);
         // Enough space in the contiguous space region?
         if (bytesRequired + slotManager.getSlotSize() <= buf.capacity() - buf.getInt(freeSpaceOff)
@@ -184,6 +219,10 @@ public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
         frameTuple.resetByTupleIndex(this, oldTupleIndex);
         int oldTupleBytes = frameTuple.getTupleSize();
         int newTupleBytes = tupleWriter.bytesRequired(newTuple);
+        return hasSpaceUpdate(oldTupleBytes, newTupleBytes);
+    }
+
+    protected FrameOpSpaceStatus hasSpaceUpdate(int oldTupleBytes, int newTupleBytes) {
         int additionalBytesRequired = newTupleBytes - oldTupleBytes;
         // Enough space for an in-place update?
         if (additionalBytesRequired <= 0) {
@@ -203,8 +242,8 @@ public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
     }
 
     protected void resetSpaceParams() {
-        buf.putInt(freeSpaceOff, smFlagOff + 1);
-        buf.putInt(totalFreeSpaceOff, buf.capacity() - (smFlagOff + 1));
+        buf.putInt(freeSpaceOff, getPageHeaderSize());
+        buf.putInt(totalFreeSpaceOff, buf.capacity() - getPageHeaderSize());
     }
 
     @Override
@@ -246,7 +285,7 @@ public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
         strBuilder.append("freeSpaceOff:      " + freeSpaceOff + "\n");
         strBuilder.append("totalFreeSpaceOff: " + totalFreeSpaceOff + "\n");
         strBuilder.append("levelOff:          " + levelOff + "\n");
-        strBuilder.append("smFlagOff:         " + smFlagOff + "\n");
+        strBuilder.append("flagOff:           " + flagOff + "\n");
         return strBuilder.toString();
     }
 
@@ -302,4 +341,9 @@ public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
     public int getFreeContiguousSpace() {
         return buf.capacity() - getFreeSpaceOff() - (getTupleCount() * slotManager.getSlotSize());
     }
+
+    @Override
+    public ILargePageHelper getLargePageHelper() {
+        return largePageHelper;
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/freepage/LinkedMetaDataPageManager.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/freepage/LinkedMetaDataPageManager.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/freepage/LinkedMetaDataPageManager.java
index 61f5919..468654a 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/freepage/LinkedMetaDataPageManager.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/freepage/LinkedMetaDataPageManager.java
@@ -96,6 +96,14 @@ public class LinkedMetaDataPageManager implements IMetaDataPageManager {
     }
 
     @Override
+    public void addFreePageBlock(ITreeIndexMetaDataFrame metaFrame, int startingPage, int count)
+            throws HyracksDataException {
+        for (int i = 0; i < count; i++) {
+            addFreePage(metaFrame, startingPage + i);
+        }
+    }
+
+    @Override
     public int getFreePage(ITreeIndexMetaDataFrame metaFrame) throws HyracksDataException {
         ICachedPage metaNode;
         if (!appendOnly) {
@@ -164,6 +172,13 @@ public class LinkedMetaDataPageManager implements IMetaDataPageManager {
     }
 
     @Override
+    public int getFreePageBlock(ITreeIndexMetaDataFrame metaFrame, int count) throws HyracksDataException {
+        int maxPage = metaFrame.getMaxPage();
+        metaFrame.setMaxPage(maxPage + count);
+        return maxPage + 1;
+    }
+
+    @Override
     public int getMaxPage(ITreeIndexMetaDataFrame metaFrame) throws HyracksDataException {
         ICachedPage metaNode;
         if (!appendOnly || (appendOnly && confiscatedMetaNode == null)) {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractTreeIndex.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractTreeIndex.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractTreeIndex.java
index 83e39f2..b7b39d4 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractTreeIndex.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/AbstractTreeIndex.java
@@ -31,6 +31,7 @@ import org.apache.hyracks.storage.am.common.ophelpers.MultiComparator;
 import org.apache.hyracks.storage.common.buffercache.IBufferCache;
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
 import org.apache.hyracks.storage.common.buffercache.IFIFOPageQueue;
+import org.apache.hyracks.storage.common.buffercache.ILargePageHelper;
 import org.apache.hyracks.storage.common.file.BufferedFileHandle;
 import org.apache.hyracks.storage.common.file.IFileMapProvider;
 
@@ -61,10 +62,14 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
 
     protected int bulkloadLeafStart = 0;
 
+    protected final ILargePageHelper largePageHelper;
+
+
     public AbstractTreeIndex(IBufferCache bufferCache, IFileMapProvider fileMapProvider,
-            IMetaDataPageManager freePageManager, ITreeIndexFrameFactory interiorFrameFactory,
-            ITreeIndexFrameFactory leafFrameFactory, IBinaryComparatorFactory[] cmpFactories, int fieldCount,
-            FileReference file) {
+                             IMetaDataPageManager freePageManager, ITreeIndexFrameFactory interiorFrameFactory,
+                             ITreeIndexFrameFactory leafFrameFactory, IBinaryComparatorFactory[] cmpFactories,
+                             int fieldCount,
+                             FileReference file) {
         this.bufferCache = bufferCache;
         this.fileMapProvider = fileMapProvider;
         this.freePageManager = freePageManager;
@@ -73,6 +78,7 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
         this.cmpFactories = cmpFactories;
         this.fieldCount = fieldCount;
         this.file = file;
+        this.largePageHelper = leafFrameFactory.getLargePageHelper();
     }
 
     public synchronized void create() throws HyracksDataException {
@@ -119,7 +125,8 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
         ITreeIndexFrame frame = leafFrameFactory.createFrame();
         ITreeIndexMetaDataFrame metaFrame = freePageManager.getMetaDataFrameFactory().createFrame();
         freePageManager.init(metaFrame, rootPage);
-        ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), true);
+        ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), true,
+                largePageHelper);
         rootNode.acquireWriteLatch();
         try {
             frame.setPage(rootNode);
@@ -177,7 +184,7 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
         int mdPageLoc = freePageManager.getFirstMetadataPage();
         ITreeIndexMetaDataFrame metaFrame = freePageManager.getMetaDataFrameFactory().createFrame();
         int numPages = freePageManager.getMaxPage(metaFrame);
-        if(mdPageLoc > 1 || (mdPageLoc == 1 && numPages <= MINIMAL_TREE_PAGE_COUNT -1  )){ //md page doesn't count itself
+        if(mdPageLoc > 1 || (mdPageLoc == 1 && numPages <= MINIMAL_TREE_PAGE_COUNT -1)) { //md page doesn't count itself
             appendOnly = true;
         }
         else{
@@ -237,7 +244,8 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
         if(freePageManager.appendOnlyMode() && bufferCache.getNumPagesOfFile(fileId) <= MINIMAL_TREE_PAGE_COUNT){
             return true;
         }
-        ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), false);
+        ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), false,
+                largePageHelper);
         rootNode.acquireReadLatch();
         try {
             frame.setPage(rootNode);
@@ -255,7 +263,8 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
 
 
     public byte getTreeHeight(ITreeIndexFrame frame) throws HyracksDataException {
-        ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), false);
+        ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), false,
+                largePageHelper);
         rootNode.acquireReadLatch();
         try {
             frame.setPage(rootNode);
@@ -312,9 +321,9 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
         protected final ITreeIndexTupleWriter tupleWriter;
         protected ITreeIndexFrame leafFrame;
         protected ITreeIndexFrame interiorFrame;
-        // Immutable bulk loaders write their root page at page -2, as needed e.g. by append-only file systems such as HDFS.
-        // Since loading this tree relies on the root page actually being at that point, no further inserts into that tree are allowed.
-        // Currently, this is not enforced.
+        // Immutable bulk loaders write their root page at page -2, as needed e.g. by append-only file systems such as
+        // HDFS.  Since loading this tree relies on the root page actually being at that point, no further inserts into
+        // that tree are allowed.  Currently, this is not enforced.
         protected boolean releasedLatches;
         public boolean appendOnly = false;
         protected final IFIFOPageQueue queue;
@@ -385,12 +394,13 @@ public abstract class AbstractTreeIndex implements ITreeIndex {
             //move the root page to the first data page if necessary
             bufferCache.finishQueue();
             if (!appendOnly) {
-                ICachedPage newRoot = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), true);
+                ICachedPage newRoot = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), true,
+                        largePageHelper);
                 newRoot.acquireWriteLatch();
                 //root will be the highest frontier
                 NodeFrontier lastNodeFrontier = nodeFrontiers.get(nodeFrontiers.size() - 1);
                 ICachedPage oldRoot = bufferCache.pin(
-                        BufferedFileHandle.getDiskPageId(fileId, lastNodeFrontier.pageId), false);
+                        BufferedFileHandle.getDiskPageId(fileId, lastNodeFrontier.pageId), false, largePageHelper);
                 oldRoot.acquireReadLatch();
                 lastNodeFrontier.page = oldRoot;
                 try {

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/1defc92a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/TreeIndexDiskOrderScanCursor.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/TreeIndexDiskOrderScanCursor.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/TreeIndexDiskOrderScanCursor.java
index bd8c67b..87449eb 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/TreeIndexDiskOrderScanCursor.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/impls/TreeIndexDiskOrderScanCursor.java
@@ -70,7 +70,8 @@ public class TreeIndexDiskOrderScanCursor implements ITreeIndexCursor {
             page.releaseReadLatch();
             bufferCache.unpin(page);
 
-            ICachedPage nextPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), false);
+            ICachedPage nextPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), false,
+                    frame.getLargePageHelper());
             nextPage.acquireReadLatch();
 
             page = nextPage;



Mime
View raw message