phoenix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ramkris...@apache.org
Subject [1/4] phoenix git commit: Phoenix-1453 - Collect row counts per region in stats table (Ramkrishna)
Date Wed, 14 Jan 2015 04:19:53 GMT
Repository: phoenix
Updated Branches:
  refs/heads/4.0 ba3fa3424 -> 2fb019a2f


http://git-wip-us.apache.org/repos/asf/phoenix/blob/2fb019a2/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsWriter.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsWriter.java
b/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsWriter.java
index bb624f8..f46c3c1 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsWriter.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsWriter.java
@@ -43,10 +43,10 @@ import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr;
 import org.apache.phoenix.jdbc.PhoenixDatabaseMetaData;
 import org.apache.phoenix.query.QueryConstants;
+import org.apache.phoenix.schema.SortOrder;
 import org.apache.phoenix.schema.types.PDate;
 import org.apache.phoenix.schema.types.PLong;
 import org.apache.phoenix.schema.types.PVarbinary;
-import org.apache.phoenix.schema.SortOrder;
 import org.apache.phoenix.util.ByteUtil;
 import org.apache.phoenix.util.ServerUtil;
 import org.apache.phoenix.util.TimeKeeper;
@@ -100,49 +100,81 @@ public class StatisticsWriter implements Closeable {
         statsWriterTable.close();
     }
 
-    public void splitStats(HRegion p, HRegion l, HRegion r, StatisticsCollector tracker,
ImmutableBytesPtr cfKey, List<Mutation> mutations) throws IOException {
+    public void splitStats(HRegion p, HRegion l, HRegion r, StatisticsCollector tracker,
ImmutableBytesPtr cfKey,
+            List<Mutation> mutations) throws IOException {
         if (tracker == null) { return; }
         boolean useMaxTimeStamp = clientTimeStamp == StatisticsCollector.NO_TIMESTAMP;
         if (!useMaxTimeStamp) {
             mutations.add(getLastStatsUpdatedTimePut(clientTimeStamp));
         }
         long readTimeStamp = useMaxTimeStamp ? HConstants.LATEST_TIMESTAMP : clientTimeStamp;
-        Result result = StatisticsUtil.readRegionStatistics(statsReaderTable, tableName,
cfKey, p.getRegionName(), readTimeStamp);
+        Result result = StatisticsUtil.readRegionStatistics(statsReaderTable, tableName,
cfKey, p.getRegionName(),
+                readTimeStamp);
         if (result != null && !result.isEmpty()) {
         	Cell cell = result.getColumnLatestCell(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES,
PhoenixDatabaseMetaData.GUIDE_POSTS_BYTES);
-
+        	Cell rowCountCell = result.getColumnLatestCell(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES,
PhoenixDatabaseMetaData.GUIDE_POSTS_ROW_COUNT_BYTES);
+            long rowCount = 0;
         	if (cell != null) {
                 long writeTimeStamp = useMaxTimeStamp ? cell.getTimestamp() : clientTimeStamp;
 
-                GuidePostsInfo guidePosts = GuidePostsInfo.fromBytes(cell.getValueArray(),
cell.getValueOffset(), cell.getValueLength());
+                GuidePostsInfo guidePostsRegionInfo = GuidePostsInfo.deserializeGuidePostsInfo(cell.getValueArray(),
+                        cell.getValueOffset(), cell.getValueLength(), rowCount);
                 byte[] pPrefix = StatisticsUtil.getRowKey(tableName, cfKey, p.getRegionName());
                 mutations.add(new Delete(pPrefix, writeTimeStamp));
                 
 	        	long byteSize = 0;
-	        	Cell byteSizeCell = result.getColumnLatestCell(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES,
PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH_BYTES);
-	        	if (byteSizeCell != null) {
-	        		byteSize = PLong.INSTANCE.getCodec().decodeLong(byteSizeCell.getValueArray(),
byteSizeCell.getValueOffset(), SortOrder.getDefault()) / 2;
-	        	}
-	        	int midEndIndex, midStartIndex;
-	            int index = Collections.binarySearch(guidePosts.getGuidePosts(), r.getStartKey(),
Bytes.BYTES_COMPARATOR);
-	            if (index < 0) {
-	                midEndIndex = midStartIndex = -(index + 1);
-	            } else {
-	                // For an exact match, we want to get rid of the exact match guidepost,
-	                // since it's replaced by the region boundary.
-	                midEndIndex = index;
-	                midStartIndex = index + 1;
-	            }
+                Cell byteSizeCell = result.getColumnLatestCell(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES,
+                        PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH_BYTES);
+                int index = Collections.binarySearch(guidePostsRegionInfo.getGuidePosts(),
r.getStartKey(),
+                        Bytes.BYTES_COMPARATOR);
+                int size = guidePostsRegionInfo.getGuidePosts().size();
+                int midEndIndex, midStartIndex;
+                if (index < 0) {
+                    midEndIndex = midStartIndex = -(index + 1);
+                } else {
+                    // For an exact match, we want to get rid of the exact match guidepost,
+                    // since it's replaced by the region boundary.
+                    midEndIndex = index;
+                    midStartIndex = index + 1;
+                }
+                double per = (double)(midEndIndex + 1) / size;
+                long leftRowCount = 0;
+                long rightRowCount = 0;
+                long leftByteCount = 0;
+                long rightByteCount = 0;
+                if (rowCountCell != null) {
+                    rowCount = PLong.INSTANCE.getCodec().decodeLong(rowCountCell.getValueArray(),
+                            rowCountCell.getValueOffset(), SortOrder.getDefault());
+                    leftRowCount = (long)(per * rowCount);
+                    if (leftRowCount == rowCount) {
+                        leftRowCount = (rightRowCount = rowCount / 2);
+                    } else {
+                        rightRowCount = (long)((1 - per) * rowCount);
+                    }
+                }
+                if (byteSizeCell != null) {
+                    byteSize = PLong.INSTANCE.getCodec().decodeLong(byteSizeCell.getValueArray(),
+                            byteSizeCell.getValueOffset(), SortOrder.getDefault());
+                    leftByteCount = (long)(per * byteSize);
+                    if (leftByteCount == byteSize) {
+                        leftByteCount = (rightByteCount = byteSize / 2);
+                    } else {
+                        rightByteCount = (long)((1 - per) * byteSize);
+                    }
+                }
 	            if (midEndIndex > 0) {
-	                GuidePostsInfo lguidePosts = new GuidePostsInfo(byteSize, guidePosts.getGuidePosts().subList(0,
midEndIndex));
-	                tracker.clear();
-	                tracker.addGuidePost(cfKey, lguidePosts, byteSize, cell.getTimestamp());
+	                GuidePostsInfo lguidePosts = new GuidePostsInfo(leftByteCount, guidePostsRegionInfo
+                            .getGuidePosts().subList(0, midEndIndex), leftRowCount);
+                    tracker.clear();
+	                tracker.addGuidePost(cfKey, lguidePosts, leftByteCount, cell.getTimestamp());
 	                addStats(l.getRegionName(), tracker, cfKey, mutations);
 	            }
-	            if (midStartIndex < guidePosts.getGuidePosts().size()) {
-	                GuidePostsInfo rguidePosts = new GuidePostsInfo(byteSize, guidePosts.getGuidePosts().subList(midStartIndex,
guidePosts.getGuidePosts().size()));
+	            if (midStartIndex < size) {
+	                GuidePostsInfo rguidePosts = new GuidePostsInfo(rightByteCount, guidePostsRegionInfo
+                            .getGuidePosts().subList(midStartIndex, size),
+                            rightRowCount);
 	                tracker.clear();
-	                tracker.addGuidePost(cfKey, rguidePosts, byteSize, cell.getTimestamp());
+	                tracker.addGuidePost(cfKey, rguidePosts, rightByteCount, cell.getTimestamp());
 	                addStats(r.getRegionName(), tracker, cfKey, mutations);
 	            }
         	}
@@ -150,7 +182,7 @@ public class StatisticsWriter implements Closeable {
     }
     
     /**
-     * Update a list of statistics for a given region.  If the ANALYZE <tablename>
query is issued
+     * Update a list of statistics for a given region.  If the UPDATE STATISTICS <tablename>
query is issued
      * then we use Upsert queries to update the table
      * If the region gets splitted or the major compaction happens we update using HTable.put()
      * @param tracker - the statistics tracker
@@ -160,7 +192,8 @@ public class StatisticsWriter implements Closeable {
      *             if we fail to do any of the puts. Any single failure will prevent any
future attempts for the remaining list of stats to
      *             update
      */
-    public void addStats(byte[] regionName, StatisticsCollector tracker, ImmutableBytesPtr
cfKey, List<Mutation> mutations) throws IOException {
+    public void addStats(byte[] regionName, StatisticsCollector tracker, ImmutableBytesPtr
cfKey,
+            List<Mutation> mutations) throws IOException {
         if (tracker == null) { return; }
         boolean useMaxTimeStamp = clientTimeStamp == StatisticsCollector.NO_TIMESTAMP;
         long timeStamp = clientTimeStamp;
@@ -174,14 +207,17 @@ public class StatisticsWriter implements Closeable {
         if (gp != null) {
             put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, PhoenixDatabaseMetaData.GUIDE_POSTS_COUNT_BYTES,
                     timeStamp, PLong.INSTANCE.toBytes((gp.getGuidePosts().size())));
-            put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, PhoenixDatabaseMetaData.GUIDE_POSTS_BYTES,
-                    timeStamp, PVarbinary.INSTANCE.toBytes(gp.toBytes()));
+            put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, PhoenixDatabaseMetaData.GUIDE_POSTS_BYTES,
timeStamp,
+                    PVarbinary.INSTANCE.toBytes(gp.serializeGuidePostsInfo()));
             put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH_BYTES,
                     timeStamp, PLong.INSTANCE.toBytes(gp.getByteCount()));
+            // Write as long_array
+            put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, PhoenixDatabaseMetaData.GUIDE_POSTS_ROW_COUNT_BYTES,
+                    timeStamp, PLong.INSTANCE.toBytes(gp.getRowCount()));
         }
         // Add our empty column value so queries behave correctly
-        put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, QueryConstants.EMPTY_COLUMN_BYTES,
-                timeStamp, ByteUtil.EMPTY_BYTE_ARRAY);
+        put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, QueryConstants.EMPTY_COLUMN_BYTES,
timeStamp,
+                ByteUtil.EMPTY_BYTE_ARRAY);
         mutations.add(put);
     }
 

http://git-wip-us.apache.org/repos/asf/phoenix/blob/2fb019a2/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java b/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
index 8c55f1b..b0d55e7 100644
--- a/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
+++ b/phoenix-core/src/test/java/org/apache/phoenix/util/TestUtil.java
@@ -37,6 +37,7 @@ import java.sql.SQLException;
 import java.sql.Statement;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
 import java.util.Properties;
 
@@ -79,6 +80,9 @@ import org.apache.phoenix.query.QueryConstants;
 import org.apache.phoenix.schema.PColumn;
 import org.apache.phoenix.schema.types.PDataType;
 import org.apache.phoenix.schema.RowKeyValueAccessor;
+import org.apache.phoenix.schema.TableRef;
+import org.apache.phoenix.schema.stats.GuidePostsInfo;
+import org.apache.phoenix.schema.stats.PTableStats;
 import org.apache.phoenix.schema.tuple.Tuple;
 
 import com.google.common.collect.Lists;
@@ -489,7 +493,38 @@ public class TestUtil {
         List<KeyRange> keyRanges = pstmt.getQueryPlan().getSplits();
         return keyRanges;
     }
-    
+
+    public static Collection<GuidePostsInfo> getGuidePostsList(Connection conn, String
tableName) throws SQLException {
+        return getGuidePostsList(conn, tableName, null, null, null, null);
+    }
+
+    public static Collection<GuidePostsInfo> getGuidePostsList(Connection conn, String
tableName, String where)
+            throws SQLException {
+        return getGuidePostsList(conn, tableName, null, null, null, where);
+    }
+
+    public static Collection<GuidePostsInfo> getGuidePostsList(Connection conn, String
tableName, String pkCol,
+            byte[] lowerRange, byte[] upperRange, String whereClauseSuffix) throws SQLException
{
+        String whereClauseStart = (lowerRange == null && upperRange == null ? ""
+                : " WHERE "
+                        + ((lowerRange != null ? (pkCol + " >= ? " + (upperRange != null
? " AND " : "")) : "") + (upperRange != null ? (pkCol + " < ?")
+                                : "")));
+        String whereClause = whereClauseSuffix == null ? whereClauseStart
+                : whereClauseStart.length() == 0 ? (" WHERE " + whereClauseSuffix) : (" AND
" + whereClauseSuffix);
+        String query = "SELECT /*+ NO_INDEX */ COUNT(*) FROM " + tableName + whereClause;
+        PhoenixPreparedStatement pstmt = conn.prepareStatement(query).unwrap(PhoenixPreparedStatement.class);
+        if (lowerRange != null) {
+            pstmt.setBytes(1, lowerRange);
+        }
+        if (upperRange != null) {
+            pstmt.setBytes(lowerRange != null ? 2 : 1, upperRange);
+        }
+        pstmt.execute();
+        TableRef tableRef = pstmt.getQueryPlan().getTableRef();
+        PTableStats tableStats = tableRef.getTable().getTableStats();
+        return tableStats.getGuidePosts().values();
+    }
+
     public static List<KeyRange> getSplits(Connection conn, byte[] lowerRange, byte[]
upperRange) throws SQLException {
         return getSplits(conn, STABLE_NAME, STABLE_PK_NAME, lowerRange, upperRange, null);
     }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/2fb019a2/phoenix-protocol/src/main/PGuidePosts.proto
----------------------------------------------------------------------
diff --git a/phoenix-protocol/src/main/PGuidePosts.proto b/phoenix-protocol/src/main/PGuidePosts.proto
new file mode 100644
index 0000000..1550391
--- /dev/null
+++ b/phoenix-protocol/src/main/PGuidePosts.proto
@@ -0,0 +1,10 @@
+option java_package = "org.apache.phoenix.coprocessor.generated";
+option java_outer_classname = "PGuidePostsProtos";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+option optimize_for = SPEED;
+message PGuidePosts {
+  repeated bytes guidePosts = 1;
+  optional int64 byteCount = 2;
+  optional int64 rowCount = 3;
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/phoenix/blob/2fb019a2/phoenix-protocol/src/main/PTable.proto
----------------------------------------------------------------------
diff --git a/phoenix-protocol/src/main/PTable.proto b/phoenix-protocol/src/main/PTable.proto
index 772d51d..03f63d5 100644
--- a/phoenix-protocol/src/main/PTable.proto
+++ b/phoenix-protocol/src/main/PTable.proto
@@ -24,6 +24,8 @@ option java_generic_services = true;
 option java_generate_equals_and_hash = true;
 option optimize_for = SPEED;
 
+import "PGuidePosts.proto";
+
 enum PTableType {
   SYSTEM = 0;
   USER = 1;
@@ -52,6 +54,7 @@ message PTableStats {
   optional int64 guidePostsByteCount = 3;
   optional int64 keyBytesCount = 4;
   optional int32 guidePostsCount = 5;
+  optional PGuidePosts pGuidePosts = 6;
 }
 
 message PTable {


Mime
View raw message