phoenix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jamestay...@apache.org
Subject [6/7] phoenix git commit: PHOENIX-2067 Sort order incorrect for variable length DESC columns
Date Tue, 14 Jul 2015 18:21:12 GMT
http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/compile/UnionCompiler.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/compile/UnionCompiler.java b/phoenix-core/src/main/java/org/apache/phoenix/compile/UnionCompiler.java
index 269232e..942e244 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/compile/UnionCompiler.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/compile/UnionCompiler.java
@@ -80,8 +80,9 @@ public class UnionCompiler {
         }
         Long scn = statement.getConnection().getSCN();
         PTable tempTable = PTableImpl.makePTable(statement.getConnection().getTenantId(), UNION_SCHEMA_NAME, UNION_TABLE_NAME, 
-                PTableType.SUBQUERY, null, HConstants.LATEST_TIMESTAMP, scn == null ? HConstants.LATEST_TIMESTAMP : scn, null, null, projectedColumns, null, null, null,
-                        true, null, null, null, true, true, true, null, null, null);
+                PTableType.SUBQUERY, null, HConstants.LATEST_TIMESTAMP, scn == null ? HConstants.LATEST_TIMESTAMP : scn, null, null,
+                        projectedColumns, null, null, null,
+                        true, null, null, null, true, true, true, null, null, null, false);
         TableRef tableRef = new TableRef(null, tempTable, 0, false);
         return tableRef;
     }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/compile/UpsertCompiler.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/compile/UpsertCompiler.java b/phoenix-core/src/main/java/org/apache/phoenix/compile/UpsertCompiler.java
index 7b39a28..e12f5a4 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/compile/UpsertCompiler.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/compile/UpsertCompiler.java
@@ -150,8 +150,10 @@ public class UpsertCompiler {
                             SQLExceptionCode.DATA_EXCEEDS_MAX_CAPACITY).setColumnName(column.getName().getString())
                             .setMessage("value=" + column.getDataType().toStringLiteral(ptr, null)).build()
                             .buildException(); }
-                    column.getDataType().coerceBytes(ptr, value, column.getDataType(), precision, scale,
-                            SortOrder.getDefault(), column.getMaxLength(), column.getScale(), column.getSortOrder());
+                    column.getDataType().coerceBytes(ptr, value, column.getDataType(), 
+                            precision, scale, SortOrder.getDefault(), 
+                            column.getMaxLength(), column.getScale(), column.getSortOrder(),
+                            table.rowKeyOrderOptimizable());
                     values[i] = ByteUtil.copyKeyBytesIfNecessary(ptr);
                 }
                 setValues(values, pkSlotIndexes, columnIndexes, table, mutation, statement);
@@ -772,6 +774,7 @@ public class UpsertCompiler {
                 final SequenceManager sequenceManager = context.getSequenceManager();
                 // Next evaluate all the expressions
                 int nodeIndex = nodeIndexOffset;
+                PTable table = tableRef.getTable();
                 Tuple tuple = sequenceManager.getSequenceCount() == 0 ? null :
                     sequenceManager.newSequenceTuple(null);
                 for (Expression constantExpression : constantExpressions) {
@@ -793,9 +796,10 @@ public class UpsertCompiler {
                                 .setMessage("value=" + constantExpression.toString()).build().buildException();
                         }
                     }
-                    column.getDataType().coerceBytes(ptr, value,
-                            constantExpression.getDataType(), constantExpression.getMaxLength(), constantExpression.getScale(), constantExpression.getSortOrder(),
-                            column.getMaxLength(), column.getScale(),column.getSortOrder());
+                    column.getDataType().coerceBytes(ptr, value, constantExpression.getDataType(), 
+                            constantExpression.getMaxLength(), constantExpression.getScale(), constantExpression.getSortOrder(),
+                            column.getMaxLength(), column.getScale(),column.getSortOrder(),
+                            table.rowKeyOrderOptimizable());
                     if (overlapViewColumns.contains(column) && Bytes.compareTo(ptr.get(), ptr.getOffset(), ptr.getLength(), column.getViewConstant(), 0, column.getViewConstant().length-1) != 0) {
                         throw new SQLExceptionInfo.Builder(
                                 SQLExceptionCode.CANNOT_UPDATE_VIEW_COLUMN)
@@ -814,7 +818,7 @@ public class UpsertCompiler {
                     }
                 }
                 Map<ImmutableBytesPtr, RowMutationState> mutation = Maps.newHashMapWithExpectedSize(1);
-                setValues(values, pkSlotIndexes, columnIndexes, tableRef.getTable(), mutation, statement);
+                setValues(values, pkSlotIndexes, columnIndexes, table, mutation, statement);
                 return new MutationState(tableRef, mutation, 0, maxSize, connection);
             }
 

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/compile/WhereOptimizer.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/compile/WhereOptimizer.java b/phoenix-core/src/main/java/org/apache/phoenix/compile/WhereOptimizer.java
index 0cbef11..332f293 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/compile/WhereOptimizer.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/compile/WhereOptimizer.java
@@ -61,7 +61,9 @@ import org.apache.phoenix.schema.PTable;
 import org.apache.phoenix.schema.RowKeySchema;
 import org.apache.phoenix.schema.SaltingUtil;
 import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.ValueSchema.Field;
 import org.apache.phoenix.schema.tuple.Tuple;
+import org.apache.phoenix.schema.types.PArrayDataType;
 import org.apache.phoenix.schema.types.PChar;
 import org.apache.phoenix.schema.types.PDataType;
 import org.apache.phoenix.schema.types.PVarbinary;
@@ -194,8 +196,9 @@ public class WhereOptimizer {
             if (hasMinMaxRange) {
                 System.arraycopy(tenantIdBytes, 0, minMaxRangePrefix, minMaxRangeOffset, tenantIdBytes.length);
                 minMaxRangeOffset += tenantIdBytes.length;
-                if (!schema.getField(pkPos).getDataType().isFixedWidth()) {
-                    minMaxRangePrefix[minMaxRangeOffset] = QueryConstants.SEPARATOR_BYTE;
+                Field f = schema.getField(pkPos);
+                if (!f.getDataType().isFixedWidth()) {
+                    minMaxRangePrefix[minMaxRangeOffset] = SchemaUtil.getSeparatorByte(schema.rowKeyOrderOptimizable(), tenantIdBytes.length==0, f);
                     minMaxRangeOffset++;
                 }
             }
@@ -259,6 +262,8 @@ public class WhereOptimizer {
                     hasNonPointKey = true;
                 }
             }
+            keyRanges = transformKeyRangesIfNecessary(slot, context);
+            
             hasMultiRanges |= keyRanges.size() > 1;
             // Force a range scan if we've encountered a multi-span slot (i.e. RVC)
             // and a non point key, as our skip scan only handles fully qualified
@@ -317,6 +322,45 @@ public class WhereOptimizer {
         }
     }
     
+    // Special hack for PHOENIX-2067 to change the constant array to match
+    // the separators used for descending, variable length arrays.
+    // Note that there'd already be a coerce expression around the constant
+    // to convert it to the right type, so we shouldn't do that here.
+    private static List<KeyRange> transformKeyRangesIfNecessary(KeyExpressionVisitor.KeySlot slot, StatementContext context) {
+        KeyPart keyPart = slot.getKeyPart();
+        List<KeyRange> keyRanges = slot.getKeyRanges();
+        PColumn column = keyPart.getColumn();
+        if (column != null) {
+            PDataType type = column.getDataType();
+            PTable table = context.getCurrentTable().getTable();
+            // Constants are always build with rowKeyOptimizable as true, using the correct separators
+            // We only need to do this conversion if we have a table that has not yet been converted.
+            if (type != null && type.isArrayType() && column.getSortOrder() == SortOrder.DESC && !PArrayDataType.arrayBaseType(type).isFixedWidth() && !table.rowKeyOrderOptimizable()) {
+                ImmutableBytesWritable ptr = context.getTempPtr();
+                List<KeyRange> newKeyRanges = Lists.newArrayListWithExpectedSize(keyRanges.size());
+                for (KeyRange keyRange : keyRanges) {
+                    byte[] lower = keyRange.getLowerRange();
+                    if (!keyRange.lowerUnbound()) {
+                        ptr.set(lower);;
+                        type.coerceBytes(ptr, null, type, null, null, SortOrder.DESC, null, null, SortOrder.DESC, false);
+                        lower = ByteUtil.copyKeyBytesIfNecessary(ptr);
+                    }
+                    byte[] upper = keyRange.getUpperRange();
+                    if (!keyRange.upperUnbound()) {
+                        ptr.set(upper);;
+                        type.coerceBytes(ptr, null, type, null, null, SortOrder.DESC, null, null, SortOrder.DESC, false);
+                        upper = ByteUtil.copyKeyBytesIfNecessary(ptr);
+                    }
+                    keyRange = KeyRange.getKeyRange(lower, keyRange.isLowerInclusive(), upper, keyRange.isUpperInclusive());
+                    newKeyRanges.add(keyRange);
+                }
+                
+                return newKeyRanges;
+            }
+        }
+        return keyRanges;
+    }
+    
     /**
      * Get an optimal combination of key expressions for hash join key range optimization.
      * @return returns true if the entire combined expression is covered by key range optimization
@@ -996,6 +1040,9 @@ public class WhereOptimizer {
             // Handles cases like WHERE substr(foo,1,3) IN ('aaa','bbb')
             for (Expression key : keyExpressions) {
                 KeyRange range = childPart.getKeyRange(CompareOp.EQUAL, key);
+                if (range == null) {
+                    return null;
+                }
                 if (range != KeyRange.EMPTY_RANGE) { // null means it can't possibly be in range
                     ranges.add(range);
                 }
@@ -1393,7 +1440,7 @@ public class WhereOptimizer {
                     return null; // Shouldn't happen
                 }
                 ImmutableBytesWritable ptr = context.getTempPtr();
-                if (!rhs.evaluate(null, ptr) || ptr.getLength()==0) {
+                if (!rhs.evaluate(null, ptr)) { // Don't return if evaluated to null
                     return null; 
                 }
                 byte[] key = ByteUtil.copyKeyBytesIfNecessary(ptr);

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/BaseScannerRegionObserver.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/BaseScannerRegionObserver.java b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/BaseScannerRegionObserver.java
index a2269b4..aedee2b 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/BaseScannerRegionObserver.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/BaseScannerRegionObserver.java
@@ -87,6 +87,8 @@ abstract public class BaseScannerRegionObserver extends BaseRegionObserver {
     public static final String ANALYZE_TABLE = "_ANALYZETABLE";
     public static final String GUIDEPOST_WIDTH_BYTES = "_GUIDEPOST_WIDTH_BYTES";
     public static final String GUIDEPOST_PER_REGION = "_GUIDEPOST_PER_REGION";
+    public static final String UPGRADE_DESC_ROW_KEY = "_UPGRADE_DESC_ROW_KEY";
+    
     /**
      * Attribute name used to pass custom annotations in Scans and Mutations (later). Custom annotations
      * are used to augment log lines emitted by Phoenix. See https://issues.apache.org/jira/browse/PHOENIX-1198.
@@ -389,4 +391,4 @@ abstract public class BaseScannerRegionObserver extends BaseRegionObserver {
             }
         };
     }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataEndpointImpl.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataEndpointImpl.java b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataEndpointImpl.java
index f286bdc..e385a8f 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataEndpointImpl.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/MetaDataEndpointImpl.java
@@ -183,6 +183,7 @@ import org.apache.phoenix.util.QueryUtil;
 import org.apache.phoenix.util.ScanUtil;
 import org.apache.phoenix.util.SchemaUtil;
 import org.apache.phoenix.util.ServerUtil;
+import org.apache.phoenix.util.UpgradeUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -213,6 +214,10 @@ import com.google.protobuf.Service;
 public class MetaDataEndpointImpl extends MetaDataProtocol implements CoprocessorService, Coprocessor {
     private static final Logger logger = LoggerFactory.getLogger(MetaDataEndpointImpl.class);
 
+    // Column to track tables that have been upgraded based on PHOENIX-2067
+    public static final String ROW_KEY_ORDER_OPTIMIZABLE = "ROW_KEY_ORDER_OPTIMIZABLE";
+    public static final byte[] ROW_KEY_ORDER_OPTIMIZABLE_BYTES = Bytes.toBytes(ROW_KEY_ORDER_OPTIMIZABLE);
+
     // KeyValues for Table
     private static final KeyValue TABLE_TYPE_KV = createFirstOnRow(ByteUtil.EMPTY_BYTE_ARRAY, TABLE_FAMILY_BYTES, TABLE_TYPE_BYTES);
     private static final KeyValue TABLE_SEQ_NUM_KV = createFirstOnRow(ByteUtil.EMPTY_BYTE_ARRAY, TABLE_FAMILY_BYTES, TABLE_SEQ_NUM_BYTES);
@@ -233,6 +238,7 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
     private static final KeyValue STORE_NULLS_KV = createFirstOnRow(ByteUtil.EMPTY_BYTE_ARRAY, TABLE_FAMILY_BYTES, STORE_NULLS_BYTES);
     private static final KeyValue EMPTY_KEYVALUE_KV = createFirstOnRow(ByteUtil.EMPTY_BYTE_ARRAY, TABLE_FAMILY_BYTES, QueryConstants.EMPTY_COLUMN_BYTES);
     private static final KeyValue BASE_COLUMN_COUNT_KV = createFirstOnRow(ByteUtil.EMPTY_BYTE_ARRAY, TABLE_FAMILY_BYTES, PhoenixDatabaseMetaData.BASE_COLUMN_COUNT_BYTES);
+    private static final KeyValue ROW_KEY_ORDER_OPTIMIZABLE_KV = createFirstOnRow(ByteUtil.EMPTY_BYTE_ARRAY, TABLE_FAMILY_BYTES, ROW_KEY_ORDER_OPTIMIZABLE_BYTES);
     
     private static final List<KeyValue> TABLE_KV_COLUMNS = Arrays.<KeyValue>asList(
             EMPTY_KEYVALUE_KV,
@@ -253,7 +259,8 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
             INDEX_TYPE_KV,
             INDEX_DISABLE_TIMESTAMP_KV,
             STORE_NULLS_KV,
-            BASE_COLUMN_COUNT_KV
+            BASE_COLUMN_COUNT_KV,
+            ROW_KEY_ORDER_OPTIMIZABLE_KV
             );
     static {
         Collections.sort(TABLE_KV_COLUMNS, KeyValue.COMPARATOR);
@@ -276,6 +283,7 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
     private static final int INDEX_TYPE_INDEX = TABLE_KV_COLUMNS.indexOf(INDEX_TYPE_KV);
     private static final int STORE_NULLS_INDEX = TABLE_KV_COLUMNS.indexOf(STORE_NULLS_KV);
     private static final int BASE_COLUMN_COUNT_INDEX = TABLE_KV_COLUMNS.indexOf(BASE_COLUMN_COUNT_KV);
+    private static final int ROW_KEY_ORDER_OPTIMIZABLE_INDEX = TABLE_KV_COLUMNS.indexOf(ROW_KEY_ORDER_OPTIMIZABLE_KV);
 
     // KeyValues for Column
     private static final KeyValue DECIMAL_DIGITS_KV = createFirstOnRow(ByteUtil.EMPTY_BYTE_ARRAY, TABLE_FAMILY_BYTES, DECIMAL_DIGITS_BYTES);
@@ -782,6 +790,8 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
         Cell baseColumnCountKv = tableKeyValues[BASE_COLUMN_COUNT_INDEX];
         int baseColumnCount = baseColumnCountKv == null ? 0 : PInteger.INSTANCE.getCodec().decodeInt(baseColumnCountKv.getValueArray(),
             baseColumnCountKv.getValueOffset(), SortOrder.getDefault());
+        Cell rowKeyOrderOptimizableKv = tableKeyValues[ROW_KEY_ORDER_OPTIMIZABLE_INDEX];
+        boolean rowKeyOrderOptimizable = rowKeyOrderOptimizableKv == null ? false : Boolean.TRUE.equals(PBoolean.INSTANCE.toObject(rowKeyOrderOptimizableKv.getValueArray(), rowKeyOrderOptimizableKv.getValueOffset(), rowKeyOrderOptimizableKv.getValueLength()));
 
         List<PColumn> columns = Lists.newArrayListWithExpectedSize(columnCount);
         List<PTable> indexes = new ArrayList<PTable>();
@@ -826,7 +836,7 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
         return PTableImpl.makePTable(tenantId, schemaName, tableName, tableType, indexState, timeStamp,
             tableSeqNum, pkName, saltBucketNum, columns, tableType == INDEX ? schemaName : null,
             tableType == INDEX ? dataTableName : null, indexes, isImmutableRows, physicalTables, defaultFamilyName, viewStatement,
-            disableWAL, multiTenant, storeNulls, viewType, viewIndexId, indexType, stats, baseColumnCount);
+            disableWAL, multiTenant, storeNulls, viewType, viewIndexId, indexType, stats, baseColumnCount, rowKeyOrderOptimizable);
     }
 
     private PFunction getFunction(RegionScanner scanner, final boolean isReplace, long clientTimeStamp, List<Mutation> deleteMutationsForReplace)
@@ -1058,7 +1068,6 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
             return null;
         }
 
-
     @Override
     public void createTable(RpcController controller, CreateTableRequest request,
             RpcCallback<MetaDataResponse> done) {
@@ -1145,7 +1154,15 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                         return;
                     }
                 }
-                // TODO: Switch this to HRegion#batchMutate when we want to support indexes on the
+                PTableType tableType = MetaDataUtil.getTableType(tableMetadata, GenericKeyValueBuilder.INSTANCE,
+                        new ImmutableBytesWritable());
+                // Add cell for ROW_KEY_ORDER_OPTIMIZABLE = true, as we know that new tables
+                // conform the correct row key. The exception is for a VIEW, which the client
+                // sends over depending on its base physical table.
+                if (tableType != PTableType.VIEW) {
+                    UpgradeUtil.addRowKeyOrderOptimizableCell(tableMetadata, key, clientTimeStamp);
+                }
+                // TODO: Switch this to Region#batchMutate when we want to support indexes on the
                 // system
                 // table. Basically, we get all the locks that we don't already hold for all the
                 // tableMetadata rows. This ensures we don't have deadlock situations (ensuring
@@ -1560,7 +1577,7 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                 } else {
                     // server-side, except for indexing, we always expect the keyvalues to be standard KeyValues
                     PTableType expectedType = MetaDataUtil.getTableType(tableMetadata, GenericKeyValueBuilder.INSTANCE,
-                            new ImmutableBytesPtr());
+                            new ImmutableBytesWritable());
                     // We said to drop a table, but found a view or visa versa
                     if (type != expectedType) { return new MetaDataMutationResult(MutationCode.TABLE_NOT_FOUND,
                             EnvironmentEdgeManager.currentTimeMillis(), null); }
@@ -1610,6 +1627,8 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
             if (view.getBaseColumnCount() == QueryConstants.DIVERGED_VIEW_BASE_COLUMN_COUNT) {
                 // if a view has divorced itself from the base table, we don't allow schema changes
                 // to be propagated to it.
+                // FIXME: we should allow the update, but just not propagate it to this view
+                // The one exception is PK changes which need to be propagated to diverged views as well
             	return new MetaDataMutationResult(MutationCode.UNALLOWED_TABLE_MUTATION, EnvironmentEdgeManager.currentTimeMillis(), null);
             }
             int numColsAddedToBaseTable = 0;
@@ -1844,6 +1863,9 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                     
                     // add index row header key to the invalidate list to force clients to fetch the latest meta-data
                     invalidateList.add(new ImmutableBytesPtr(indexHeaderRowKey));
+                    if (index.rowKeyOrderOptimizable()) {
+                        UpgradeUtil.addRowKeyOrderOptimizableCell(mutationsForAddingColumnsToViews, indexHeaderRowKey, clientTimeStamp);
+                    }
                     mutationsForAddingColumnsToViews.add(indexHeaderRowMutation);
                 }
             }
@@ -1864,6 +1886,9 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
             viewHeaderRowPut.addColumn(PhoenixDatabaseMetaData.TABLE_FAMILY_BYTES,
                     PhoenixDatabaseMetaData.TABLE_SEQ_NUM_BYTES, clientTimeStamp, viewSequencePtr);
             mutationsForAddingColumnsToViews.add(viewHeaderRowPut);
+            if (view.rowKeyOrderOptimizable()) {
+                UpgradeUtil.addRowKeyOrderOptimizableCell(mutationsForAddingColumnsToViews, viewKey, clientTimeStamp);
+            }
 
             // Update positions of view columns
             for (PColumn column : view.getColumns()) {
@@ -1910,7 +1935,10 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                     byte[] schemaName = rowKeyMetaData[SCHEMA_NAME_INDEX];
                     byte[] tableName = rowKeyMetaData[TABLE_NAME_INDEX];
                     PTableType type = table.getType();
-                    List<Mutation> mutationsForAddingColumnsToViews = Collections.emptyList();
+                    byte[] tableHeaderRowKey = SchemaUtil.getTableKey(tenantId,
+                            schemaName, tableName);
+                    // Size for worst case - all new columns are PK column
+                    List<Mutation> mutationsForAddingColumnsToViews = Lists.newArrayListWithExpectedSize(tableMetaData.size() * ( 1 + table.getIndexes().size()));
                     /*
                      * If adding a column to a view, we don't want to propagate those meta-data changes to the child
                      * view hierarchy. This is because our check of finding child views is expensive and we want making
@@ -1971,6 +1999,17 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                                 continue;
                             } catch (ColumnNotFoundException e) {
                                 if (addingPKColumn) {
+                                    // We may be adding a DESC column, so if table is already
+                                    // able to be rowKeyOptimized, it should continue to be so.
+                                    if (table.rowKeyOrderOptimizable()) {
+                                        UpgradeUtil.addRowKeyOrderOptimizableCell(mutationsForAddingColumnsToViews, tableHeaderRowKey, clientTimeStamp);
+                                    } else if (table.getType() == PTableType.VIEW){
+                                        // Don't allow view PK to diverge from table PK as our upgrade code
+                                        // does not handle this.
+                                        return new MetaDataMutationResult(
+                                                MutationCode.UNALLOWED_TABLE_MUTATION, EnvironmentEdgeManager
+                                                        .currentTimeMillis(), null);
+                                    }
                                     // Add all indexes to invalidate list, as they will all be
                                     // adding the same PK column. No need to lock them, as we
                                     // have the parent table lock at this point.
@@ -1979,6 +2018,13 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                                                 .getTableKey(tenantId, index.getSchemaName()
                                                         .getBytes(), index.getTableName()
                                                         .getBytes())));
+                                        // We may be adding a DESC column, so if index is already
+                                        // able to be rowKeyOptimized, it should continue to be so.
+                                        if (index.rowKeyOrderOptimizable()) {
+                                            byte[] indexHeaderRowKey = SchemaUtil.getTableKey(index.getTenantId() == null ? ByteUtil.EMPTY_BYTE_ARRAY : index.getTenantId().getBytes(),
+                                                    index.getSchemaName().getBytes(), index.getTableName().getBytes());
+                                            UpgradeUtil.addRowKeyOrderOptimizableCell(mutationsForAddingColumnsToViews, indexHeaderRowKey, clientTimeStamp);
+                                        }
                                     }
                                 }
                                 continue;
@@ -2348,6 +2394,7 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                 get.addColumn(TABLE_FAMILY_BYTES, DATA_TABLE_NAME_BYTES);
                 get.addColumn(TABLE_FAMILY_BYTES, INDEX_STATE_BYTES);
                 get.addColumn(TABLE_FAMILY_BYTES, INDEX_DISABLE_TIMESTAMP_BYTES);
+                get.addColumn(TABLE_FAMILY_BYTES, ROW_KEY_ORDER_OPTIMIZABLE_BYTES);
                 Result currentResult = region.get(get);
                 if (currentResult.rawCells().length == 0) {
                     builder.setReturnCode(MetaDataProtos.MutationCode.TABLE_NOT_FOUND);
@@ -2358,6 +2405,7 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                 Cell dataTableKV = currentResult.getColumnLatestCell(TABLE_FAMILY_BYTES, DATA_TABLE_NAME_BYTES);
                 Cell currentStateKV = currentResult.getColumnLatestCell(TABLE_FAMILY_BYTES, INDEX_STATE_BYTES);
                 Cell currentDisableTimeStamp = currentResult.getColumnLatestCell(TABLE_FAMILY_BYTES, INDEX_DISABLE_TIMESTAMP_BYTES);
+                boolean rowKeyOrderOptimizable = currentResult.getColumnLatestCell(TABLE_FAMILY_BYTES, ROW_KEY_ORDER_OPTIMIZABLE_BYTES) != null;
 
                 PIndexState currentState =
                         PIndexState.fromSerializedValue(currentStateKV.getValueArray()[currentStateKV
@@ -2416,6 +2464,7 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                         INDEX_STATE_BYTES, timeStamp, Bytes.toBytes(newState.getSerializedValue())));
                 }
 
+                PTable returnTable = null;
                 if (currentState != newState) {
                     byte[] dataTableKey = null;
                     if(dataTableKV != null) {
@@ -2429,6 +2478,12 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                         p.add(TABLE_FAMILY_BYTES, QueryConstants.EMPTY_COLUMN_BYTES, timeStamp, ByteUtil.EMPTY_BYTE_ARRAY);
                         tableMetadata.add(p);
                     }
+                    boolean setRowKeyOrderOptimizableCell = newState == PIndexState.BUILDING && !rowKeyOrderOptimizable;
+                    // We're starting a rebuild of the index, so add our rowKeyOrderOptimizable cell
+                    // so that the row keys get generated using the new row key format
+                    if (setRowKeyOrderOptimizableCell) {
+                        UpgradeUtil.addRowKeyOrderOptimizableCell(tableMetadata, key, timeStamp);
+                    }
                     region.mutateRowsWithLocks(tableMetadata, Collections.<byte[]> emptySet());
                     // Invalidate from cache
                     Cache<ImmutableBytesPtr,PMetaDataEntity> metaDataCache = GlobalCache.getInstance(this.env).getMetaDataCache();
@@ -2436,12 +2491,18 @@ public class MetaDataEndpointImpl extends MetaDataProtocol implements Coprocesso
                     if(dataTableKey != null) {
                         metaDataCache.invalidate(new ImmutableBytesPtr(dataTableKey));
                     }
+                    if (setRowKeyOrderOptimizableCell) {
+                        returnTable = doGetTable(key, HConstants.LATEST_TIMESTAMP, rowLock);
+                    }
                 }
                 // Get client timeStamp from mutations, since it may get updated by the
                 // mutateRowsWithLocks call
                 long currentTime = MetaDataUtil.getClientTimeStamp(tableMetadata);
                 builder.setReturnCode(MetaDataProtos.MutationCode.TABLE_ALREADY_EXISTS);
                 builder.setMutationTime(currentTime);
+                if (returnTable != null) {
+                    builder.setTable(PTableImpl.toProto(returnTable));
+                }
                 done.run(builder.build());
                 return;
             } finally {

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/UngroupedAggregateRegionObserver.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/UngroupedAggregateRegionObserver.java b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/UngroupedAggregateRegionObserver.java
index e43e5e5..14e62fd 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/UngroupedAggregateRegionObserver.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/UngroupedAggregateRegionObserver.java
@@ -28,6 +28,7 @@ import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -79,9 +80,12 @@ import org.apache.phoenix.schema.PColumn;
 import org.apache.phoenix.schema.PRow;
 import org.apache.phoenix.schema.PTable;
 import org.apache.phoenix.schema.PTableImpl;
+import org.apache.phoenix.schema.RowKeySchema;
 import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.ValueSchema.Field;
 import org.apache.phoenix.schema.stats.StatisticsCollector;
 import org.apache.phoenix.schema.tuple.MultiKeyValueTuple;
+import org.apache.phoenix.schema.types.PArrayDataType;
 import org.apache.phoenix.schema.types.PDataType;
 import org.apache.phoenix.util.ByteUtil;
 import org.apache.phoenix.util.IndexUtil;
@@ -126,14 +130,15 @@ public class UngroupedAggregateRegionObserver extends BaseScannerRegionObserver{
     }
 
     private static void commitBatch(HRegion region, List<Mutation> mutations, byte[] indexUUID) throws IOException {
-      if (indexUUID != null) {
-          for (Mutation m : mutations) {
-              m.setAttribute(PhoenixIndexCodec.INDEX_UUID, indexUUID);
-          }
-      }
-      Mutation[] mutationArray = new Mutation[mutations.size()];
-      // TODO: should we use the one that is all or none?
-      region.batchMutate(mutations.toArray(mutationArray));
+        if (indexUUID != null) {
+            for (Mutation m : mutations) {
+                m.setAttribute(PhoenixIndexCodec.INDEX_UUID, indexUUID);
+            }
+        }
+        Mutation[] mutationArray = new Mutation[mutations.size()];
+        // TODO: should we use the one that is all or none?
+        logger.warn("Committing bactch of " + mutations.size() + " mutations for " + region.getRegionInfo().getTable().getNameAsString());
+        region.batchMutate(mutations.toArray(mutationArray));
     }
 
     public static void serializeIntoScan(Scan scan) {
@@ -157,7 +162,6 @@ public class UngroupedAggregateRegionObserver extends BaseScannerRegionObserver{
     
     @Override
     protected RegionScanner doPostScannerOpen(final ObserverContext<RegionCoprocessorEnvironment> c, final Scan scan, final RegionScanner s) throws IOException {
-        int offset = 0;
         HRegion region = c.getEnvironment().getRegion();
         long ts = scan.getTimeRange().getMax();
         StatisticsCollector stats = null;
@@ -167,15 +171,33 @@ public class UngroupedAggregateRegionObserver extends BaseScannerRegionObserver{
             // Let this throw, as this scan is being done for the sole purpose of collecting stats
             stats = new StatisticsCollector(c.getEnvironment(), region.getRegionInfo().getTable().getNameAsString(), ts, gp_width_bytes, gp_per_region_bytes);
         }
+        int offsetToBe = 0;
         if (ScanUtil.isLocalIndex(scan)) {
             /*
              * For local indexes, we need to set an offset on row key expressions to skip
              * the region start key.
              */
-            offset = region.getStartKey().length != 0 ? region.getStartKey().length:region.getEndKey().length;
-            ScanUtil.setRowKeyOffset(scan, offset);
+            offsetToBe = region.getRegionInfo().getStartKey().length != 0 ? region.getRegionInfo().getStartKey().length :
+                region.getRegionInfo().getEndKey().length;
+            ScanUtil.setRowKeyOffset(scan, offsetToBe);
         }
+        final int offset = offsetToBe;
 
+        PTable projectedTable = null;
+        PTable writeToTable = null;
+        byte[][] values = null;
+        byte[] descRowKeyTableBytes = scan.getAttribute(UPGRADE_DESC_ROW_KEY);
+        boolean isDescRowKeyOrderUpgrade = descRowKeyTableBytes != null;
+        if (isDescRowKeyOrderUpgrade) {
+            logger.warn("Upgrading row key for " + region.getRegionInfo().getTable().getNameAsString());
+            projectedTable = deserializeTable(descRowKeyTableBytes);
+            try {
+                writeToTable = PTableImpl.makePTable(projectedTable, true);
+            } catch (SQLException e) {
+                ServerUtil.throwIOException("Upgrade failed", e); // Impossible
+            }
+            values = new byte[projectedTable.getPKColumns().size()][];
+        }
         byte[] localIndexBytes = scan.getAttribute(LOCAL_INDEX_BUILD);
         List<IndexMaintainer> indexMaintainers = localIndexBytes == null ? null : IndexMaintainer.deserialize(localIndexBytes);
         List<Mutation> indexMutations = localIndexBytes == null ? Collections.<Mutation>emptyList() : Lists.<Mutation>newArrayListWithExpectedSize(1024);
@@ -183,22 +205,19 @@ public class UngroupedAggregateRegionObserver extends BaseScannerRegionObserver{
         RegionScanner theScanner = s;
         
         byte[] indexUUID = scan.getAttribute(PhoenixIndexCodec.INDEX_UUID);
-        PTable projectedTable = null;
         List<Expression> selectExpressions = null;
         byte[] upsertSelectTable = scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_TABLE);
         boolean isUpsert = false;
         boolean isDelete = false;
         byte[] deleteCQ = null;
         byte[] deleteCF = null;
-        byte[][] values = null;
         byte[] emptyCF = null;
-        ImmutableBytesWritable ptr = null;
+        ImmutableBytesWritable ptr = new ImmutableBytesWritable();
         if (upsertSelectTable != null) {
             isUpsert = true;
             projectedTable = deserializeTable(upsertSelectTable);
             selectExpressions = deserializeExpressions(scan.getAttribute(BaseScannerRegionObserver.UPSERT_SELECT_EXPRS));
             values = new byte[projectedTable.getPKColumns().size()][];
-            ptr = new ImmutableBytesWritable();
         } else {
             byte[] isDeleteAgg = scan.getAttribute(BaseScannerRegionObserver.DELETE_AGG);
             isDelete = isDeleteAgg != null && Bytes.compareTo(PDataType.TRUE_BYTES, isDeleteAgg) == 0;
@@ -208,9 +227,6 @@ public class UngroupedAggregateRegionObserver extends BaseScannerRegionObserver{
             }
             emptyCF = scan.getAttribute(BaseScannerRegionObserver.EMPTY_CF);
         }
-        if(localIndexBytes != null) {
-            ptr = new ImmutableBytesWritable();
-        }
         TupleProjector tupleProjector = null;
         HRegion dataRegion = null;
         byte[][] viewConstants = null;
@@ -218,7 +234,7 @@ public class UngroupedAggregateRegionObserver extends BaseScannerRegionObserver{
         boolean localIndexScan = ScanUtil.isLocalIndex(scan);
         final TupleProjector p = TupleProjector.deserializeProjectorFromScan(scan);
         final HashJoinInfo j = HashJoinInfo.deserializeHashJoinFromScan(scan);
-        if ((localIndexScan && !isDelete) || (j == null && p != null)) {
+        if ((localIndexScan && !isDelete && !isDescRowKeyOrderUpgrade) || (j == null && p != null)) {
             if (dataColumns != null) {
                 tupleProjector = IndexUtil.getTupleProjector(scan, dataColumns);
                 dataRegion = IndexUtil.getDataRegion(c.getEnvironment());
@@ -237,7 +253,7 @@ public class UngroupedAggregateRegionObserver extends BaseScannerRegionObserver{
         int batchSize = 0;
         List<Mutation> mutations = Collections.emptyList();
         boolean buildLocalIndex = indexMaintainers != null && dataColumns==null && !localIndexScan;
-        if (isDelete || isUpsert || (deleteCQ != null && deleteCF != null) || emptyCF != null || buildLocalIndex) {
+        if (isDescRowKeyOrderUpgrade || isDelete || isUpsert || (deleteCQ != null && deleteCF != null) || emptyCF != null || buildLocalIndex) {
             // TODO: size better
             mutations = Lists.newArrayListWithExpectedSize(1024);
             batchSize = c.getEnvironment().getConfiguration().getInt(MUTATE_BATCH_SIZE_ATTRIB, QueryServicesOptions.DEFAULT_MUTATE_BATCH_SIZE);
@@ -269,7 +285,72 @@ public class UngroupedAggregateRegionObserver extends BaseScannerRegionObserver{
                         rowCount++;
                         result.setKeyValues(results);
                         try {
-                            if (buildLocalIndex) {
+                            if (isDescRowKeyOrderUpgrade) {
+                                Arrays.fill(values, null);
+                                Cell firstKV = results.get(0);
+                                RowKeySchema schema = projectedTable.getRowKeySchema();
+                                int maxOffset = schema.iterator(firstKV.getRowArray(), firstKV.getRowOffset() + offset, firstKV.getRowLength(), ptr);
+                                for (int i = 0; i < schema.getFieldCount(); i++) {
+                                    Boolean hasValue = schema.next(ptr, i, maxOffset);
+                                    if (hasValue == null) {
+                                        break;
+                                    }
+                                    Field field = schema.getField(i);
+                                    // Special case for re-writing DESC ARRAY, as the actual byte value needs to change in this case
+                                    if (field.getDataType().isArrayType() && field.getSortOrder() == SortOrder.DESC && !PArrayDataType.arrayBaseType(field.getDataType()).isFixedWidth()) {
+                                        field.getDataType().coerceBytes(ptr, null, field.getDataType(),
+                                                field.getMaxLength(), field.getScale(), field.getSortOrder(), 
+                                                field.getMaxLength(), field.getScale(), field.getSortOrder(), true); // force to use correct separator byte
+                                    }
+                                    values[i] = ptr.copyBytes();
+                                }
+                                writeToTable.newKey(ptr, values);
+                                if (Bytes.compareTo(
+                                        firstKV.getRowArray(), firstKV.getRowOffset() + offset, firstKV.getRowLength(), 
+                                        ptr.get(),ptr.getOffset() + offset,ptr.getLength()) == 0) {
+                                    continue;
+                                }
+                                byte[] newRow = ByteUtil.copyKeyBytesIfNecessary(ptr);
+                                if (offset > 0) { // for local indexes (prepend region start key)
+                                    byte[] newRowWithOffset = new byte[offset + newRow.length];
+                                    System.arraycopy(firstKV.getRowArray(), firstKV.getRowOffset(), newRowWithOffset, 0, offset);;
+                                    System.arraycopy(newRow, 0, newRowWithOffset, offset, newRow.length);
+                                    newRow = newRowWithOffset;
+                                }
+                                byte[] oldRow = Bytes.copy(firstKV.getRowArray(), firstKV.getRowOffset(), firstKV.getRowLength());
+                                for (Cell cell : results) {
+                                    // Copy existing cell but with new row key
+                                    Cell newCell = new KeyValue(newRow, 0, newRow.length,
+                                            cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(),
+                                            cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength(),
+                                            cell.getTimestamp(), KeyValue.Type.codeToType(cell.getTypeByte()),
+                                            cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
+                                    switch (KeyValue.Type.codeToType(cell.getTypeByte())) {
+                                    case Put:
+                                        // If Put, point delete old Put
+                                        Delete del = new Delete(oldRow);
+                                        del.addDeleteMarker(new KeyValue(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength(),
+                                                cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength(),
+                                                cell.getQualifierArray(), cell.getQualifierOffset(),
+                                                cell.getQualifierLength(), cell.getTimestamp(), KeyValue.Type.Delete,
+                                                ByteUtil.EMPTY_BYTE_ARRAY, 0, 0));
+                                        mutations.add(del);
+                                        
+                                        Put put = new Put(newRow);
+                                        put.add(newCell);
+                                        mutations.add(put);
+                                        break;
+                                    case Delete:
+                                    case DeleteColumn:
+                                    case DeleteFamily:
+                                    case DeleteFamilyVersion:
+                                        Delete delete = new Delete(newRow);
+                                        delete.addDeleteMarker(newCell);
+                                        mutations.add(delete);
+                                        break;
+                                    }
+                                }
+                            } else if (buildLocalIndex) {
                                 for (IndexMaintainer maintainer : indexMaintainers) {
                                     if (!results.isEmpty()) {
                                         result.getKey(ptr);
@@ -332,7 +413,7 @@ public class UngroupedAggregateRegionObserver extends BaseScannerRegionObserver{
                                             expression.getDataType(), expression.getMaxLength(),
                                             expression.getScale(), expression.getSortOrder(), 
                                             column.getMaxLength(), column.getScale(),
-                                            column.getSortOrder());
+                                            column.getSortOrder(), projectedTable.rowKeyOrderOptimizable());
                                         byte[] bytes = ByteUtil.copyKeyBytesIfNecessary(ptr);
                                         row.setValue(column, bytes);
                                     }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PTableProtos.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PTableProtos.java b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PTableProtos.java
index dd6e303..5b98b5e 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PTableProtos.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PTableProtos.java
@@ -3118,6 +3118,16 @@ public final class PTableProtos {
      * <code>optional int32 baseColumnCount = 25;</code>
      */
     int getBaseColumnCount();
+
+    // optional bool rowKeyOrderOptimizable = 26;
+    /**
+     * <code>optional bool rowKeyOrderOptimizable = 26;</code>
+     */
+    boolean hasRowKeyOrderOptimizable();
+    /**
+     * <code>optional bool rowKeyOrderOptimizable = 26;</code>
+     */
+    boolean getRowKeyOrderOptimizable();
   }
   /**
    * Protobuf type {@code PTable}
@@ -3313,6 +3323,11 @@ public final class PTableProtos {
               baseColumnCount_ = input.readInt32();
               break;
             }
+            case 208: {
+              bitField0_ |= 0x00200000;
+              rowKeyOrderOptimizable_ = input.readBool();
+              break;
+            }
           }
         }
       } catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -3859,6 +3874,22 @@ public final class PTableProtos {
       return baseColumnCount_;
     }
 
+    // optional bool rowKeyOrderOptimizable = 26;
+    public static final int ROWKEYORDEROPTIMIZABLE_FIELD_NUMBER = 26;
+    private boolean rowKeyOrderOptimizable_;
+    /**
+     * <code>optional bool rowKeyOrderOptimizable = 26;</code>
+     */
+    public boolean hasRowKeyOrderOptimizable() {
+      return ((bitField0_ & 0x00200000) == 0x00200000);
+    }
+    /**
+     * <code>optional bool rowKeyOrderOptimizable = 26;</code>
+     */
+    public boolean getRowKeyOrderOptimizable() {
+      return rowKeyOrderOptimizable_;
+    }
+
     private void initFields() {
       schemaNameBytes_ = com.google.protobuf.ByteString.EMPTY;
       tableNameBytes_ = com.google.protobuf.ByteString.EMPTY;
@@ -3885,6 +3916,7 @@ public final class PTableProtos {
       statsTimeStamp_ = 0L;
       storeNulls_ = false;
       baseColumnCount_ = 0;
+      rowKeyOrderOptimizable_ = false;
     }
     private byte memoizedIsInitialized = -1;
     public final boolean isInitialized() {
@@ -4027,6 +4059,9 @@ public final class PTableProtos {
       if (((bitField0_ & 0x00100000) == 0x00100000)) {
         output.writeInt32(25, baseColumnCount_);
       }
+      if (((bitField0_ & 0x00200000) == 0x00200000)) {
+        output.writeBool(26, rowKeyOrderOptimizable_);
+      }
       getUnknownFields().writeTo(output);
     }
 
@@ -4141,6 +4176,10 @@ public final class PTableProtos {
         size += com.google.protobuf.CodedOutputStream
           .computeInt32Size(25, baseColumnCount_);
       }
+      if (((bitField0_ & 0x00200000) == 0x00200000)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeBoolSize(26, rowKeyOrderOptimizable_);
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -4277,6 +4316,11 @@ public final class PTableProtos {
         result = result && (getBaseColumnCount()
             == other.getBaseColumnCount());
       }
+      result = result && (hasRowKeyOrderOptimizable() == other.hasRowKeyOrderOptimizable());
+      if (hasRowKeyOrderOptimizable()) {
+        result = result && (getRowKeyOrderOptimizable()
+            == other.getRowKeyOrderOptimizable());
+      }
       result = result &&
           getUnknownFields().equals(other.getUnknownFields());
       return result;
@@ -4390,6 +4434,10 @@ public final class PTableProtos {
         hash = (37 * hash) + BASECOLUMNCOUNT_FIELD_NUMBER;
         hash = (53 * hash) + getBaseColumnCount();
       }
+      if (hasRowKeyOrderOptimizable()) {
+        hash = (37 * hash) + ROWKEYORDEROPTIMIZABLE_FIELD_NUMBER;
+        hash = (53 * hash) + hashBoolean(getRowKeyOrderOptimizable());
+      }
       hash = (29 * hash) + getUnknownFields().hashCode();
       memoizedHashCode = hash;
       return hash;
@@ -4564,6 +4612,8 @@ public final class PTableProtos {
         bitField0_ = (bitField0_ & ~0x00800000);
         baseColumnCount_ = 0;
         bitField0_ = (bitField0_ & ~0x01000000);
+        rowKeyOrderOptimizable_ = false;
+        bitField0_ = (bitField0_ & ~0x02000000);
         return this;
       }
 
@@ -4708,6 +4758,10 @@ public final class PTableProtos {
           to_bitField0_ |= 0x00100000;
         }
         result.baseColumnCount_ = baseColumnCount_;
+        if (((from_bitField0_ & 0x02000000) == 0x02000000)) {
+          to_bitField0_ |= 0x00200000;
+        }
+        result.rowKeyOrderOptimizable_ = rowKeyOrderOptimizable_;
         result.bitField0_ = to_bitField0_;
         onBuilt();
         return result;
@@ -4877,6 +4931,9 @@ public final class PTableProtos {
         if (other.hasBaseColumnCount()) {
           setBaseColumnCount(other.getBaseColumnCount());
         }
+        if (other.hasRowKeyOrderOptimizable()) {
+          setRowKeyOrderOptimizable(other.getRowKeyOrderOptimizable());
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -6514,6 +6571,39 @@ public final class PTableProtos {
         return this;
       }
 
+      // optional bool rowKeyOrderOptimizable = 26;
+      private boolean rowKeyOrderOptimizable_ ;
+      /**
+       * <code>optional bool rowKeyOrderOptimizable = 26;</code>
+       */
+      public boolean hasRowKeyOrderOptimizable() {
+        return ((bitField0_ & 0x02000000) == 0x02000000);
+      }
+      /**
+       * <code>optional bool rowKeyOrderOptimizable = 26;</code>
+       */
+      public boolean getRowKeyOrderOptimizable() {
+        return rowKeyOrderOptimizable_;
+      }
+      /**
+       * <code>optional bool rowKeyOrderOptimizable = 26;</code>
+       */
+      public Builder setRowKeyOrderOptimizable(boolean value) {
+        bitField0_ |= 0x02000000;
+        rowKeyOrderOptimizable_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional bool rowKeyOrderOptimizable = 26;</code>
+       */
+      public Builder clearRowKeyOrderOptimizable() {
+        bitField0_ = (bitField0_ & ~0x02000000);
+        rowKeyOrderOptimizable_ = false;
+        onChanged();
+        return this;
+      }
+
       // @@protoc_insertion_point(builder_scope:PTable)
     }
 
@@ -6560,7 +6650,7 @@ public final class PTableProtos {
       "values\030\002 \003(\014\022\033\n\023guidePostsByteCount\030\003 \001(" +
       "\003\022\025\n\rkeyBytesCount\030\004 \001(\003\022\027\n\017guidePostsCo",
       "unt\030\005 \001(\005\022!\n\013pGuidePosts\030\006 \001(\0132\014.PGuideP" +
-      "osts\"\317\004\n\006PTable\022\027\n\017schemaNameBytes\030\001 \002(\014" +
+      "osts\"\357\004\n\006PTable\022\027\n\017schemaNameBytes\030\001 \002(\014" +
       "\022\026\n\016tableNameBytes\030\002 \002(\014\022\036\n\ttableType\030\003 " +
       "\002(\0162\013.PTableType\022\022\n\nindexState\030\004 \001(\t\022\026\n\016" +
       "sequenceNumber\030\005 \002(\003\022\021\n\ttimeStamp\030\006 \002(\003\022" +
@@ -6574,11 +6664,12 @@ public final class PTableProtos {
       "nt\030\022 \001(\014\022\025\n\rphysicalNames\030\023 \003(\014\022\020\n\010tenan" +
       "tId\030\024 \001(\014\022\023\n\013viewIndexId\030\025 \001(\005\022\021\n\tindexT" +
       "ype\030\026 \001(\014\022\026\n\016statsTimeStamp\030\027 \001(\003\022\022\n\nsto" +
-      "reNulls\030\030 \001(\010\022\027\n\017baseColumnCount\030\031 \001(\005*A" +
-      "\n\nPTableType\022\n\n\006SYSTEM\020\000\022\010\n\004USER\020\001\022\010\n\004VI" +
-      "EW\020\002\022\t\n\005INDEX\020\003\022\010\n\004JOIN\020\004B@\n(org.apache." +
-      "phoenix.coprocessor.generatedB\014PTablePro" +
-      "tosH\001\210\001\001\240\001\001"
+      "reNulls\030\030 \001(\010\022\027\n\017baseColumnCount\030\031 \001(\005\022\036" +
+      "\n\026rowKeyOrderOptimizable\030\032 \001(\010*A\n\nPTable" +
+      "Type\022\n\n\006SYSTEM\020\000\022\010\n\004USER\020\001\022\010\n\004VIEW\020\002\022\t\n\005" +
+      "INDEX\020\003\022\010\n\004JOIN\020\004B@\n(org.apache.phoenix." +
+      "coprocessor.generatedB\014PTableProtosH\001\210\001\001",
+      "\240\001\001"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -6602,7 +6693,7 @@ public final class PTableProtos {
           internal_static_PTable_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_PTable_descriptor,
-              new java.lang.String[] { "SchemaNameBytes", "TableNameBytes", "TableType", "IndexState", "SequenceNumber", "TimeStamp", "PkNameBytes", "BucketNum", "Columns", "Indexes", "IsImmutableRows", "GuidePosts", "DataTableNameBytes", "DefaultFamilyName", "DisableWAL", "MultiTenant", "ViewType", "ViewStatement", "PhysicalNames", "TenantId", "ViewIndexId", "IndexType", "StatsTimeStamp", "StoreNulls", "BaseColumnCount", });
+              new java.lang.String[] { "SchemaNameBytes", "TableNameBytes", "TableType", "IndexState", "SequenceNumber", "TimeStamp", "PkNameBytes", "BucketNum", "Columns", "Indexes", "IsImmutableRows", "GuidePosts", "DataTableNameBytes", "DefaultFamilyName", "DisableWAL", "MultiTenant", "ViewType", "ViewStatement", "PhysicalNames", "TenantId", "ViewIndexId", "IndexType", "StatsTimeStamp", "StoreNulls", "BaseColumnCount", "RowKeyOrderOptimizable", });
           return null;
         }
       };

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/exception/SQLExceptionCode.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/exception/SQLExceptionCode.java b/phoenix-core/src/main/java/org/apache/phoenix/exception/SQLExceptionCode.java
index 195450c..acc3c86 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/exception/SQLExceptionCode.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/exception/SQLExceptionCode.java
@@ -249,6 +249,7 @@ public enum SQLExceptionCode {
     NO_LOCAL_INDEXES(1054, "43A11", "Local secondary indexes are not supported for HBase versions " + 
         MetaDataUtil.decodeHBaseVersionAsString(PhoenixDatabaseMetaData.MIN_LOCAL_SI_VERSION_DISALLOW) + " through " + MetaDataUtil.decodeHBaseVersionAsString(PhoenixDatabaseMetaData.MAX_LOCAL_SI_VERSION_DISALLOW) + " inclusive."),
     UNALLOWED_LOCAL_INDEXES(1055, "43A12", "Local secondary indexes are configured to not be allowed."),
+    DESC_VARBINARY_NOT_SUPPORTED(1056, "43A13", "Descending VARBINARY columns not supported"),
 
     /** Sequence related */
     SEQUENCE_ALREADY_EXIST(1200, "42Z00", "Sequence already exists.", new Factory() {

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/execute/BaseQueryPlan.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/execute/BaseQueryPlan.java b/phoenix-core/src/main/java/org/apache/phoenix/execute/BaseQueryPlan.java
index 5739e3e..5f63df8 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/execute/BaseQueryPlan.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/execute/BaseQueryPlan.java
@@ -156,6 +156,7 @@ public abstract class BaseQueryPlan implements QueryPlan {
         return iterator(Collections.<SQLCloseable>emptyList(), scanGrouper);
     }
     
+    @Override
     public final ResultIterator iterator() throws SQLException {
         return iterator(Collections.<SQLCloseable>emptyList(), DefaultParallelScanGrouper.getInstance());
     }
@@ -172,6 +173,7 @@ public abstract class BaseQueryPlan implements QueryPlan {
         // Set miscellaneous scan attributes. This is the last chance to set them before we
         // clone the scan for each parallelized chunk.
         Scan scan = context.getScan();
+        PTable table = context.getCurrentTable().getTable();
         
         if (OrderBy.REV_ROW_KEY_ORDER_BY.equals(orderBy)) {
             ScanUtil.setReversed(scan);
@@ -203,11 +205,12 @@ public abstract class BaseQueryPlan implements QueryPlan {
         } else {
             ScanUtil.setTimeRange(scan, context.getScanTimeRange());
         }
+        
         ScanUtil.setTenantId(scan, connection.getTenantId() == null ? null : connection.getTenantId().getBytes());
         String customAnnotations = LogUtil.customAnnotationsToString(connection);
         ScanUtil.setCustomAnnotations(scan, customAnnotations == null ? null : customAnnotations.getBytes());
         // Set local index related scan attributes. 
-        if (context.getCurrentTable().getTable().getIndexType() == IndexType.LOCAL) {
+        if (table.getIndexType() == IndexType.LOCAL) {
             ScanUtil.setLocalIndex(scan);
             Set<PColumn> dataColumns = context.getDataColumns();
             // If any data columns to join back from data table are present then we set following attributes
@@ -221,8 +224,8 @@ public abstract class BaseQueryPlan implements QueryPlan {
                 KeyValueSchema schema = ProjectedColumnExpression.buildSchema(dataColumns);
                 // Set key value schema of the data columns.
                 serializeSchemaIntoScan(scan, schema);
-                String parentSchema = context.getCurrentTable().getTable().getParentSchemaName().getString();
-                String parentTable = context.getCurrentTable().getTable().getParentTableName().getString();
+                String parentSchema = table.getParentSchemaName().getString();
+                String parentTable = table.getParentTableName().getString();
                 final ParseNodeFactory FACTORY = new ParseNodeFactory();
                 TableRef dataTableRef =
                         FromCompiler.getResolver(

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/execute/DescVarLengthFastByteComparisons.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/execute/DescVarLengthFastByteComparisons.java b/phoenix-core/src/main/java/org/apache/phoenix/execute/DescVarLengthFastByteComparisons.java
new file mode 100644
index 0000000..40960e0
--- /dev/null
+++ b/phoenix-core/src/main/java/org/apache/phoenix/execute/DescVarLengthFastByteComparisons.java
@@ -0,0 +1,219 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.phoenix.execute;
+
+import java.lang.reflect.Field;
+import java.nio.ByteOrder;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+
+import sun.misc.Unsafe;
+
+import com.google.common.primitives.Longs;
+import com.google.common.primitives.UnsignedBytes;
+
+/**
+ * Utility code to do optimized byte-array comparison.
+ * This is borrowed from org.apache.hadoop.io.FastByteComparisons
+ * which was borrowed and slightly modified from Guava's {@link UnsignedBytes}
+ * class to be able to compare arrays that start at non-zero offsets.
+ * 
+ * The only difference is that we sort a smaller length bytes as *larger*
+ * than longer length bytes when all the bytes are the same.
+ */
+@SuppressWarnings("restriction")
+public class DescVarLengthFastByteComparisons {
+
+    private DescVarLengthFastByteComparisons() {}
+
+    /**
+     * Lexicographically compare two byte arrays.
+     */
+    public static int compareTo(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+        return LexicographicalComparerHolder.BEST_COMPARER.compareTo(b1, s1, l1, b2, s2, l2);
+    }
+
+    private interface Comparer<T> {
+        abstract public int compareTo(T buffer1, int offset1, int length1, T buffer2, int offset2, int length2);
+    }
+
+    private static Comparer<byte[]> lexicographicalComparerJavaImpl() {
+        return LexicographicalComparerHolder.PureJavaComparer.INSTANCE;
+    }
+
+    /**
+     * Provides a lexicographical comparer implementation; either a Java implementation or a faster implementation based
+     * on {@link Unsafe}.
+     * <p>
+     * Uses reflection to gracefully fall back to the Java implementation if {@code Unsafe} isn't available.
+     */
+    private static class LexicographicalComparerHolder {
+        static final String UNSAFE_COMPARER_NAME = LexicographicalComparerHolder.class.getName() + "$UnsafeComparer";
+
+        static final Comparer<byte[]> BEST_COMPARER = getBestComparer();
+
+        /**
+         * Returns the Unsafe-using Comparer, or falls back to the pure-Java implementation if unable to do so.
+         */
+        static Comparer<byte[]> getBestComparer() {
+            try {
+                Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME);
+
+                // yes, UnsafeComparer does implement Comparer<byte[]>
+                @SuppressWarnings("unchecked")
+                Comparer<byte[]> comparer = (Comparer<byte[]>)theClass.getEnumConstants()[0];
+                return comparer;
+            } catch (Throwable t) { // ensure we really catch *everything*
+                return lexicographicalComparerJavaImpl();
+            }
+        }
+
+        private enum PureJavaComparer implements Comparer<byte[]> {
+            INSTANCE;
+
+            @Override
+            public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2) {
+                // Short circuit equal case
+                if (buffer1 == buffer2 && offset1 == offset2 && length1 == length2) { return 0; }
+                // Bring WritableComparator code local
+                int end1 = offset1 + length1;
+                int end2 = offset2 + length2;
+                for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
+                    int a = (buffer1[i] & 0xff);
+                    int b = (buffer2[j] & 0xff);
+                    if (a != b) { return a - b; }
+                }
+                return length2 - length1;
+            }
+        }
+
+        @SuppressWarnings("unused")
+        // used via reflection
+        private enum UnsafeComparer implements Comparer<byte[]> {
+            INSTANCE;
+
+            static final Unsafe theUnsafe;
+
+            /** The offset to the first element in a byte array. */
+            static final int BYTE_ARRAY_BASE_OFFSET;
+
+            static {
+                theUnsafe = (Unsafe)AccessController.doPrivileged(new PrivilegedAction<Object>() {
+                    @Override
+                    public Object run() {
+                        try {
+                            Field f = Unsafe.class.getDeclaredField("theUnsafe");
+                            f.setAccessible(true);
+                            return f.get(null);
+                        } catch (NoSuchFieldException e) {
+                            // It doesn't matter what we throw;
+                            // it's swallowed in getBestComparer().
+                            throw new Error();
+                        } catch (IllegalAccessException e) {
+                            throw new Error();
+                        }
+                    }
+                });
+
+                BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class);
+
+                // sanity check - this should never fail
+                if (theUnsafe.arrayIndexScale(byte[].class) != 1) { throw new AssertionError(); }
+            }
+
+            static final boolean littleEndian = ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN);
+
+            /**
+             * Returns true if x1 is less than x2, when both values are treated as unsigned.
+             */
+            static boolean lessThanUnsigned(long x1, long x2) {
+                return (x1 + Long.MIN_VALUE) < (x2 + Long.MIN_VALUE);
+            }
+
+            /**
+             * Lexicographically compare two arrays.
+             *
+             * @param buffer1
+             *            left operand
+             * @param buffer2
+             *            right operand
+             * @param offset1
+             *            Where to start comparing in the left buffer
+             * @param offset2
+             *            Where to start comparing in the right buffer
+             * @param length1
+             *            How much to compare from the left buffer
+             * @param length2
+             *            How much to compare from the right buffer
+             * @return 0 if equal, < 0 if left is less than right, etc.
+             */
+            @Override
+            public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, int offset2, int length2) {
+                // Short circuit equal case
+                if (buffer1 == buffer2 && offset1 == offset2 && length1 == length2) { return 0; }
+                int minLength = Math.min(length1, length2);
+                int minWords = minLength / Longs.BYTES;
+                int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
+                int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
+
+                /*
+                 * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a time is no slower than comparing
+                 * 4 bytes at a time even on 32-bit. On the other hand, it is substantially faster on 64-bit.
+                 */
+                for (int i = 0; i < minWords * Longs.BYTES; i += Longs.BYTES) {
+                    long lw = theUnsafe.getLong(buffer1, offset1Adj + (long)i);
+                    long rw = theUnsafe.getLong(buffer2, offset2Adj + (long)i);
+                    long diff = lw ^ rw;
+
+                    if (diff != 0) {
+                        if (!littleEndian) { return lessThanUnsigned(lw, rw) ? -1 : 1; }
+
+                        // Use binary search
+                        int n = 0;
+                        int y;
+                        int x = (int)diff;
+                        if (x == 0) {
+                            x = (int)(diff >>> 32);
+                            n = 32;
+                        }
+
+                        y = x << 16;
+                        if (y == 0) {
+                            n += 16;
+                        } else {
+                            x = y;
+                        }
+
+                        y = x << 8;
+                        if (y == 0) {
+                            n += 8;
+                        }
+                        return (int)(((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL));
+                    }
+                }
+
+                // The epilogue to cover the last (minLength % 8) elements.
+                for (int i = minWords * Longs.BYTES; i < minLength; i++) {
+                    int result = UnsignedBytes.compare(buffer1[offset1 + i], buffer2[offset2 + i]);
+                    if (result != 0) { return result; }
+                }
+                return length2 - length1;
+            }
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/expression/ArrayConstructorExpression.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/ArrayConstructorExpression.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/ArrayConstructorExpression.java
index da581ce..d1b4a0e 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/expression/ArrayConstructorExpression.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/ArrayConstructorExpression.java
@@ -123,7 +123,7 @@ public class ArrayConstructorExpression extends BaseCompoundExpression {
             if (position >= 0) position = elements.length;
             if (!baseType.isFixedWidth()) {
                 // Double seperator byte to show end of the non null array
-                PArrayDataType.writeEndSeperatorForVarLengthArray(oStream);
+                PArrayDataType.writeEndSeperatorForVarLengthArray(oStream, getSortOrder());
                 noOfElements = PArrayDataType.serailizeOffsetArrayIntoStream(oStream, byteStream, noOfElements,
                         offsetPos[offsetPos.length - 1], offsetPos);
                 PArrayDataType.serializeHeaderInfoIntoStream(oStream, noOfElements);

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/expression/OrderByExpression.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/OrderByExpression.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/OrderByExpression.java
index 14bba68..456e58b 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/expression/OrderByExpression.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/OrderByExpression.java
@@ -26,6 +26,7 @@ import java.io.IOException;
 
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableUtils;
+import org.apache.phoenix.schema.SortOrder;
 
 /**
  * A container for a column that appears in ORDER BY clause.
@@ -83,7 +84,17 @@ public class OrderByExpression implements Writable {
     
     @Override
     public String toString() {
-        return this.getExpression() + (isAscending ? "" : " DESC") + (isNullsLast ? " NULLS LAST" : "");
+        Expression e = this.getExpression();
+        boolean isNullsLast = this.isNullsLast;
+        boolean isAscending = this.isAscending;
+        // Flip back here based on sort order, as the compiler
+        // flips this, but we want to display the original back
+        // to the user.
+        if (e.getSortOrder() == SortOrder.DESC) {
+            isAscending = !isAscending;
+            isNullsLast = !isNullsLast;
+        }
+        return e + (isAscending ? "" : " DESC") + (isNullsLast ? " NULLS LAST" : "");
     }
     
     @Override

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/expression/RowValueConstructorExpression.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/RowValueConstructorExpression.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/RowValueConstructorExpression.java
index 940f909..481368c 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/expression/RowValueConstructorExpression.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/RowValueConstructorExpression.java
@@ -184,7 +184,7 @@ public class RowValueConstructorExpression extends BaseCompoundExpression {
                         } else {
                             output.write(tempPtr.get(), tempPtr.getOffset(), tempPtr.getLength());
                             if (!childType.isFixedWidth()) {
-                                output.write(QueryConstants.SEPARATOR_BYTE);
+                                output.write(SchemaUtil.getSeparatorByte(true, false, child));
                             }
                             if (previousCarryOver) {
                                 previousCarryOver = !ByteUtil.previousKey(output.getBuffer(), output.size());
@@ -193,8 +193,12 @@ public class RowValueConstructorExpression extends BaseCompoundExpression {
                     }
                     int outputSize = output.size();
                     byte[] outputBytes = output.getBuffer();
+                    // Don't remove trailing separator byte unless it's the one for ASC
+                    // as otherwise we need it to ensure sort order is correct
                     for (int k = expressionCount -1 ; 
-                            k >=0 &&  getChildren().get(k).getDataType() != null && !getChildren().get(k).getDataType().isFixedWidth() && outputBytes[outputSize-1] == QueryConstants.SEPARATOR_BYTE ; k--) {
+                            k >=0 &&  getChildren().get(k).getDataType() != null 
+                                  && !getChildren().get(k).getDataType().isFixedWidth() 
+                                  && outputBytes[outputSize-1] == QueryConstants.SEPARATOR_BYTE ; k--) {
                         outputSize--;
                     }
                     ptr.set(outputBytes, 0, outputSize);

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ArrayConcatFunction.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ArrayConcatFunction.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ArrayConcatFunction.java
index d2b846a..77790b9 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ArrayConcatFunction.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ArrayConcatFunction.java
@@ -50,6 +50,7 @@ public class ArrayConcatFunction extends ArrayModifierFunction {
         if (!getLHSExpr().evaluate(tuple, ptr)|| ptr.getLength() == 0){
             return false;
         }
+        boolean isLHSRowKeyOrderOptimized = PArrayDataType.isRowKeyOrderOptimized(getLHSExpr().getDataType(), getLHSExpr().getSortOrder(), ptr);
 
         int actualLengthOfArray1 = Math.abs(PArrayDataType.getArrayLength(ptr, getLHSBaseType(), getLHSExpr().getMaxLength()));
         int lengthArray1 = ptr.getLength();
@@ -62,8 +63,13 @@ public class ArrayConcatFunction extends ArrayModifierFunction {
 
         checkSizeCompatibility(ptr, getLHSExpr(), getLHSExpr().getDataType(), getRHSExpr(),getRHSExpr().getDataType());
 
-        // Coerce array2 to array1 type
-        coerceBytes(ptr, getLHSExpr(), getLHSExpr().getDataType(), getRHSExpr(),getRHSExpr().getDataType());
+        // FIXME: calling version of coerceBytes that takes into account the separator used by LHS
+        // If the RHS does not have the same separator, it'll be coerced to use it. It's unclear
+        // if we should do the same for all classes derived from the base class.
+        // Coerce RHS to LHS type
+        getLHSExpr().getDataType().coerceBytes(ptr, null, getRHSExpr().getDataType(), getRHSExpr().getMaxLength(),
+                getRHSExpr().getScale(), getRHSExpr().getSortOrder(), getLHSExpr().getMaxLength(),
+                getLHSExpr().getScale(), getLHSExpr().getSortOrder(), isLHSRowKeyOrderOptimized);
         return modifierFunction(ptr, lengthArray1, offsetArray1, array1Bytes, getLHSBaseType(), actualLengthOfArray1, getMaxLength(), getLHSExpr());
     }
 
@@ -72,6 +78,7 @@ public class ArrayConcatFunction extends ArrayModifierFunction {
                                        byte[] array1Bytes, PDataType baseDataType, int actualLengthOfArray1, Integer maxLength,
                                        Expression array1Exp) {
         int actualLengthOfArray2 = Math.abs(PArrayDataType.getArrayLength(ptr, baseDataType, array1Exp.getMaxLength()));
+        // FIXME: concatArrays will be fine if it's copying the separator bytes, including the terminating bytes.
         return PArrayDataType.concatArrays(ptr, len, offset, array1Bytes, baseDataType, actualLengthOfArray1, actualLengthOfArray2);
     }
 

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ArrayModifierFunction.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ArrayModifierFunction.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ArrayModifierFunction.java
index 3177c29..9bd7372 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ArrayModifierFunction.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/ArrayModifierFunction.java
@@ -27,7 +27,8 @@ import org.apache.phoenix.expression.LiteralExpression;
 import org.apache.phoenix.schema.SortOrder;
 import org.apache.phoenix.schema.TypeMismatchException;
 import org.apache.phoenix.schema.tuple.Tuple;
-import org.apache.phoenix.schema.types.*;
+import org.apache.phoenix.schema.types.PArrayDataType;
+import org.apache.phoenix.schema.types.PDataType;
 
 public abstract class ArrayModifierFunction extends ScalarFunction {
 

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LpadFunction.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LpadFunction.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LpadFunction.java
index 9dfc8fc..eee03bf 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LpadFunction.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/function/LpadFunction.java
@@ -22,12 +22,12 @@ import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.phoenix.expression.Expression;
 import org.apache.phoenix.parse.FunctionParseNode.Argument;
 import org.apache.phoenix.parse.FunctionParseNode.BuiltInFunction;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.schema.tuple.Tuple;
 import org.apache.phoenix.schema.types.PChar;
-import org.apache.phoenix.schema.types.PInteger;
 import org.apache.phoenix.schema.types.PDataType;
+import org.apache.phoenix.schema.types.PInteger;
 import org.apache.phoenix.schema.types.PVarchar;
-import org.apache.phoenix.schema.SortOrder;
-import org.apache.phoenix.schema.tuple.Tuple;
 import org.apache.phoenix.util.StringUtil;
 
 /**
@@ -146,7 +146,7 @@ public class LpadFunction extends ScalarFunction {
         int numFillCharsPrepended = padLen % fillLen;
 
         // byte length of the input string
-        int strByteLength = getSubstringByteLength(ptr, ptr.getLength(), strSortOrder, isStrCharType);
+        int strByteLength = ptr.getLength();
         // byte length of the fill string
         int fillByteLength = getSubstringByteLength(fillPtr, fillPtr.getLength(), fillSortOrder, isFillCharType);
         // byte length of the full fills to be prepended

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/expression/util/regex/JONIPattern.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/expression/util/regex/JONIPattern.java b/phoenix-core/src/main/java/org/apache/phoenix/expression/util/regex/JONIPattern.java
index 69f9eaf..af5bc2b 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/expression/util/regex/JONIPattern.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/expression/util/regex/JONIPattern.java
@@ -158,8 +158,9 @@ public class JONIPattern extends AbstractBasePattern implements AbstractBaseSpli
 
     private boolean
             split(byte[] srcBytes, int srcOffset, int srcLen, ImmutableBytesWritable outPtr) {
+        SortOrder sortOrder = SortOrder.ASC;
         PArrayDataTypeBytesArrayBuilder builder =
-                new PArrayDataTypeBytesArrayBuilder(PVarchar.INSTANCE, SortOrder.ASC);
+                new PArrayDataTypeBytesArrayBuilder(PVarchar.INSTANCE, sortOrder);
         int srcRange = srcOffset + srcLen;
         Matcher matcher = pattern.matcher(srcBytes, 0, srcRange);
         int cur = srcOffset;
@@ -191,7 +192,7 @@ public class JONIPattern extends AbstractBasePattern implements AbstractBaseSpli
                 break;
             }
         }
-        byte[] bytes = builder.getBytesAndClose();
+        byte[] bytes = builder.getBytesAndClose(SortOrder.ASC);
         if (bytes == null) return false;
         outPtr.set(bytes);
         return true;

http://git-wip-us.apache.org/repos/asf/phoenix/blob/b31608f9/phoenix-core/src/main/java/org/apache/phoenix/filter/SkipScanFilter.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/filter/SkipScanFilter.java b/phoenix-core/src/main/java/org/apache/phoenix/filter/SkipScanFilter.java
index 1923856..4dc888d 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/filter/SkipScanFilter.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/filter/SkipScanFilter.java
@@ -37,7 +37,6 @@ import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.io.Writable;
 import org.apache.phoenix.query.KeyRange;
 import org.apache.phoenix.query.KeyRange.Bound;
-import org.apache.phoenix.query.QueryConstants;
 import org.apache.phoenix.schema.RowKeySchema;
 import org.apache.phoenix.util.ByteUtil;
 import org.apache.phoenix.util.ScanUtil;
@@ -517,7 +516,7 @@ public class SkipScanFilter extends FilterBase implements Writable {
         startKeyLength = length;
         // Add separator byte if we're at the end of the buffer, since trailing separator bytes are stripped
         if (ptr.getOffset() + ptr.getLength() == offset + length && i-1 > 0 && !schema.getField(i-1).getDataType().isFixedWidth()) {
-            startKey[startKeyLength++] = QueryConstants.SEPARATOR_BYTE;
+            startKey[startKeyLength++] = SchemaUtil.getSeparatorByte(schema.rowKeyOrderOptimizable(), ptr.getLength()==0, schema.getField(i-1));
         }
         startKeyLength += setKey(Bound.LOWER, startKey, startKeyLength, i);
         return length;


Mime
View raw message