phoenix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sama...@apache.org
Subject phoenix git commit: PHOENIX-4289 UPDATE STATISTICS command does not collect stats for local indexes
Date Mon, 30 Oct 2017 06:08:46 GMT
Repository: phoenix
Updated Branches:
  refs/heads/4.12-HBase-1.2 e90daafda -> 4765a8bf8


PHOENIX-4289 UPDATE STATISTICS command does not collect stats for local indexes


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/4765a8bf
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/4765a8bf
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/4765a8bf

Branch: refs/heads/4.12-HBase-1.2
Commit: 4765a8bf8915787b15de6c03f6f16f7732efa9ce
Parents: e90daaf
Author: Samarth Jain <samarth@apache.org>
Authored: Sun Oct 29 23:08:39 2017 -0700
Committer: Samarth Jain <samarth@apache.org>
Committed: Sun Oct 29 23:08:39 2017 -0700

----------------------------------------------------------------------
 .../end2end/ExplainPlanWithStatsEnabledIT.java  |  12 ++
 .../phoenix/end2end/index/BaseLocalIndexIT.java |   1 +
 .../phoenix/end2end/index/LocalIndexIT.java     |  46 ++++++-
 .../parse/UpdateStatisticsStatement.java        |   4 +
 .../apache/phoenix/schema/MetaDataClient.java   | 125 +++++++++++--------
 5 files changed, 138 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/phoenix/blob/4765a8bf/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java
index cd4555c..b4f9871 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java
@@ -306,6 +306,18 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT
{
         final Long estimatedRows;
         final Long estimateInfoTs;
 
+        public Long getEstimatedBytes() {
+            return estimatedBytes;
+        }
+
+        public Long getEstimatedRows() {
+            return estimatedRows;
+        }
+
+        public Long getEstimateInfoTs() {
+            return estimateInfoTs;
+        }
+
         Estimate(Long rows, Long bytes, Long ts) {
             this.estimatedBytes = bytes;
             this.estimatedRows = rows;

http://git-wip-us.apache.org/repos/asf/phoenix/blob/4765a8bf/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java
b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java
index 30baec4..4f58ead 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java
@@ -59,6 +59,7 @@ public abstract class BaseLocalIndexIT extends BaseUniqueNamesOwnClusterIT
{
         serverProps.put(QueryServices.IS_NAMESPACE_MAPPING_ENABLED, "true");
         Map<String, String> clientProps = Maps.newHashMapWithExpectedSize(1);
         clientProps.put(QueryServices.IS_NAMESPACE_MAPPING_ENABLED, "true");
+        clientProps.put(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB, "120000");
         setUpTestDriver(new ReadOnlyProps(serverProps.entrySet().iterator()), new ReadOnlyProps(clientProps.entrySet().iterator()));
     }
 

http://git-wip-us.apache.org/repos/asf/phoenix/blob/4765a8bf/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java
index 48221ab..0dcf1d5 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java
@@ -17,6 +17,7 @@
  */
 package org.apache.phoenix.end2end.index;
 
+import static org.apache.phoenix.end2end.ExplainPlanWithStatsEnabledIT.getByteRowEstimates;
 import static org.apache.phoenix.util.MetaDataUtil.getViewIndexSequenceName;
 import static org.apache.phoenix.util.MetaDataUtil.getViewIndexSequenceSchemaName;
 import static org.junit.Assert.assertArrayEquals;
@@ -55,8 +56,10 @@ import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.phoenix.compile.QueryPlan;
 import org.apache.phoenix.coprocessor.BaseScannerRegionObserver;
+import org.apache.phoenix.end2end.ExplainPlanWithStatsEnabledIT.Estimate;
 import org.apache.phoenix.hbase.index.IndexRegionSplitPolicy;
 import org.apache.phoenix.jdbc.PhoenixConnection;
+import org.apache.phoenix.jdbc.PhoenixResultSet;
 import org.apache.phoenix.jdbc.PhoenixStatement;
 import org.apache.phoenix.query.QueryConstants;
 import org.apache.phoenix.schema.PNameFactory;
@@ -67,9 +70,10 @@ import org.apache.phoenix.schema.TableNotFoundException;
 import org.apache.phoenix.util.QueryUtil;
 import org.apache.phoenix.util.SchemaUtil;
 import org.apache.phoenix.util.TestUtil;
-import org.junit.Ignore;
 import org.junit.Test;
 
+import com.google.common.collect.Lists;
+
 public class LocalIndexIT extends BaseLocalIndexIT {
     public LocalIndexIT(boolean isNamespaceMapped) {
         super(isNamespaceMapped);
@@ -714,4 +718,44 @@ public class LocalIndexIT extends BaseLocalIndexIT {
         }
     }
 
+    @Test // See https://issues.apache.org/jira/browse/PHOENIX-4289
+    public void testEstimatesWithLocalIndexes() throws Exception {
+        String tableName = generateUniqueName();
+        String indexName = "IDX_" + generateUniqueName();
+        try (Connection conn = DriverManager.getConnection(getUrl())) {
+            int guidePostWidth = 20;
+            conn.createStatement()
+                    .execute("CREATE TABLE " + tableName
+                            + " (k INTEGER PRIMARY KEY, a bigint, b bigint)"
+                            + " GUIDE_POSTS_WIDTH=" + guidePostWidth);
+            conn.createStatement().execute("upsert into " + tableName + " values (100,1,3)");
+            conn.createStatement().execute("upsert into " + tableName + " values (101,2,4)");
+            conn.createStatement().execute("upsert into " + tableName + " values (102,2,4)");
+            conn.createStatement().execute("upsert into " + tableName + " values (103,2,4)");
+            conn.createStatement().execute("upsert into " + tableName + " values (104,2,4)");
+            conn.createStatement().execute("upsert into " + tableName + " values (105,2,4)");
+            conn.createStatement().execute("upsert into " + tableName + " values (106,2,4)");
+            conn.createStatement().execute("upsert into " + tableName + " values (107,2,4)");
+            conn.createStatement().execute("upsert into " + tableName + " values (108,2,4)");
+            conn.createStatement().execute("upsert into " + tableName + " values (109,2,4)");
+            conn.commit();
+            conn.createStatement().execute(
+                "CREATE LOCAL INDEX " + indexName + " ON " + tableName + " (a) INCLUDE (b)
");
+            String ddl = "ALTER TABLE " + tableName + " SET USE_STATS_FOR_PARALLELIZATION
= false";
+            conn.createStatement().execute(ddl);
+            conn.createStatement().execute("UPDATE STATISTICS " + tableName + "");
+        }
+        List<Object> binds = Lists.newArrayList();
+        try (Connection conn = DriverManager.getConnection(getUrl())) {
+            String sql =
+                    "SELECT COUNT(*) " + " FROM " + tableName;
+            ResultSet rs = conn.createStatement().executeQuery(sql);
+            assertTrue("Index " + indexName + " should have been used",
+                rs.unwrap(PhoenixResultSet.class).getStatement().getQueryPlan().getTableRef()
+                        .getTable().getName().getString().equals(indexName));
+            Estimate info = getByteRowEstimates(conn, sql, binds);
+            assertEquals((Long) 10l, info.getEstimatedRows());
+            assertTrue(info.getEstimateInfoTs() > 0);
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/4765a8bf/phoenix-core/src/main/java/org/apache/phoenix/parse/UpdateStatisticsStatement.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/parse/UpdateStatisticsStatement.java
b/phoenix-core/src/main/java/org/apache/phoenix/parse/UpdateStatisticsStatement.java
index 6f7b736..0331295 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/parse/UpdateStatisticsStatement.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/parse/UpdateStatisticsStatement.java
@@ -46,6 +46,10 @@ public class UpdateStatisticsStatement extends SingleTableStatement {
         return scope == INDEX || scope == ALL;
     }
 
+    public boolean updateIndexOnly() {
+        return scope == INDEX;
+    }
+
     public boolean updateAll() {
         return scope == ALL;
     }

http://git-wip-us.apache.org/repos/asf/phoenix/blob/4765a8bf/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
index 0f6bab2..67fc2ed 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java
@@ -1076,6 +1076,23 @@ public class MetaDataClient {
         }
     }
 
+    private boolean hasItBeenLongEnoughSinceLastUpdateStats(PName physicalName) throws SQLException
{
+        ReadOnlyProps props = connection.getQueryServices().getProps();
+        final long msMinBetweenUpdates = props
+                .getLong(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB,
+                        props.getLong(QueryServices.STATS_UPDATE_FREQ_MS_ATTRIB,
+                                QueryServicesOptions.DEFAULT_STATS_UPDATE_FREQ_MS) / 2);
+        String query = "SELECT CURRENT_DATE()," + LAST_STATS_UPDATE_TIME + " FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME
+                + " WHERE " + PHYSICAL_NAME + "='" + physicalName.getString() + "' AND "
+ COLUMN_FAMILY
+                + " IS NULL AND " + LAST_STATS_UPDATE_TIME + " IS NOT NULL";
+        ResultSet rs = connection.createStatement().executeQuery(query);
+        long msSinceLastUpdate = Long.MAX_VALUE;
+        if (rs.next()) {
+            msSinceLastUpdate = rs.getLong(1) - rs.getLong(2);
+        }
+        return msSinceLastUpdate >= msMinBetweenUpdates;
+    }
+
     public MutationState updateStatistics(UpdateStatisticsStatement updateStatisticsStmt)
             throws SQLException {
         // Don't mistakenly commit pending rows
@@ -1085,7 +1102,9 @@ public class MetaDataClient {
         PTable table = resolver.getTables().get(0).getTable();
         long rowCount = 0;
         if (updateStatisticsStmt.updateColumns()) {
-            rowCount += updateStatisticsInternal(table.getPhysicalName(), table, updateStatisticsStmt.getProps());
+            if (hasItBeenLongEnoughSinceLastUpdateStats(table.getPhysicalName())) {
+                rowCount += updateStatisticsInternal(table.getPhysicalName(), table, updateStatisticsStmt.getProps());
+            }
         }
         if (updateStatisticsStmt.updateIndex()) {
             // TODO: If our table is a VIEW with multiple indexes or a TABLE with local indexes,
@@ -1093,7 +1112,10 @@ public class MetaDataClient {
             // across all indexes in that case so that we don't re-calculate the same stats
             // multiple times.
             for (PTable index : table.getIndexes()) {
-                rowCount += updateStatisticsInternal(index.getPhysicalName(), index, updateStatisticsStmt.getProps());
+                // Local indexes are handled below
+                if (index.getIndexType() != IndexType.LOCAL && hasItBeenLongEnoughSinceLastUpdateStats(index.getPhysicalName()))
{
+                    rowCount += updateStatisticsInternal(index.getPhysicalName(), index,
updateStatisticsStmt.getProps());
+                }
             }
             // If analyzing the indexes of a multi-tenant table or a table with view indexes
             // then analyze all of those indexes too.
@@ -1106,12 +1128,32 @@ public class MetaDataClient {
                             return physicalName;
                         }
                     };
-                    rowCount += updateStatisticsInternal(physicalName, indexLogicalTable,
updateStatisticsStmt.getProps());
+                    if (hasItBeenLongEnoughSinceLastUpdateStats(physicalName)) {
+                        rowCount += updateStatisticsInternal(physicalName, indexLogicalTable,
updateStatisticsStmt.getProps());
+                    }
                 }
                 PName physicalName = table.getPhysicalName();
                 List<byte[]> localCFs = MetaDataUtil.getLocalIndexColumnFamilies(connection,
physicalName.getBytes());
                 if (!localCFs.isEmpty()) {
-                    rowCount += updateStatisticsInternal(physicalName, table, updateStatisticsStmt.getProps(),
localCFs);
+                    /*
+                     * Because local indexes share the same physical table as the data table,
when
+                     * stats collection has been requested for both the data table and the
local
+                     * index on it, checking when update stats was last run on the physical
table
+                     * can prevent update stats from being run for the local index. Consequently
and
+                     * unfortunately, we cannot tell when was last stats updated for a local
index.
+                     * As a result, if a user requests that he/she wants to collect stats
for a
+                     * local index only, we don't have any state available to prevent it
from
+                     * running if it is being requested to run too soon. For now, in this
special
+                     * case, we are just going to have to rely on the last_stats_update_time
for the
+                     * physical table.
+                     */
+                    boolean collectStatsForLocalIndexes = true;
+                    if (updateStatisticsStmt.updateIndexOnly()) {
+                        collectStatsForLocalIndexes = hasItBeenLongEnoughSinceLastUpdateStats(physicalName);
+                    }
+                    if (collectStatsForLocalIndexes) {
+                        rowCount += updateStatisticsInternal(physicalName, table, updateStatisticsStmt.getProps(),
localCFs);
+                    }
                 }
             }
         }
@@ -1130,57 +1172,42 @@ public class MetaDataClient {
     
     private long updateStatisticsInternal(PName physicalName, PTable logicalTable, Map<String,
Object> statsProps, List<byte[]> cfs) throws SQLException {
         ReadOnlyProps props = connection.getQueryServices().getProps();
-        final long msMinBetweenUpdates = props
-                .getLong(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB,
-                        props.getLong(QueryServices.STATS_UPDATE_FREQ_MS_ATTRIB,
-                                QueryServicesOptions.DEFAULT_STATS_UPDATE_FREQ_MS) / 2);
-        byte[] tenantIdBytes = ByteUtil.EMPTY_BYTE_ARRAY;
         Long scn = connection.getSCN();
         // Always invalidate the cache
         long clientTimeStamp = connection.getSCN() == null ? HConstants.LATEST_TIMESTAMP
: scn;
-        String query = "SELECT CURRENT_DATE()," + LAST_STATS_UPDATE_TIME + " FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME
-                + " WHERE " + PHYSICAL_NAME + "='" + physicalName.getString() + "' AND "
+ COLUMN_FAMILY
-                + " IS NULL AND " + LAST_STATS_UPDATE_TIME + " IS NOT NULL";
-        ResultSet rs = connection.createStatement().executeQuery(query);
-        long msSinceLastUpdate = Long.MAX_VALUE;
-        if (rs.next()) {
-            msSinceLastUpdate = rs.getLong(1) - rs.getLong(2);
-        }
         long rowCount = 0;
-        if (msSinceLastUpdate >= msMinBetweenUpdates) {
-            /*
-             * Execute a COUNT(*) through PostDDLCompiler as we need to use the logicalTable
passed through,
-             * since it may not represent a "real" table in the case of the view indexes
of a base table.
-             */
-            PostDDLCompiler compiler = new PostDDLCompiler(connection);
-            //even if table is transactional, while calculating stats we scan the table non-transactionally
to
-            //view all the data belonging to the table
-            PTable nonTxnLogicalTable = new DelegateTable(logicalTable) {
-                @Override
-                public boolean isTransactional() {
-                    return false;
-                }
-            };
-            TableRef tableRef = new TableRef(null, nonTxnLogicalTable, clientTimeStamp, false);
-            MutationPlan plan = compiler.compile(Collections.singletonList(tableRef), null,
cfs, null, clientTimeStamp);
-            Scan scan = plan.getContext().getScan();
-            scan.setCacheBlocks(false);
-            scan.setAttribute(ANALYZE_TABLE, TRUE_BYTES);
-            boolean runUpdateStatsAsync = props.getBoolean(QueryServices.RUN_UPDATE_STATS_ASYNC,
DEFAULT_RUN_UPDATE_STATS_ASYNC);
-            scan.setAttribute(RUN_UPDATE_STATS_ASYNC_ATTRIB, runUpdateStatsAsync ? TRUE_BYTES
: FALSE_BYTES);
-            if (statsProps != null) {
-                Object gp_width = statsProps.get(QueryServices.STATS_GUIDEPOST_WIDTH_BYTES_ATTRIB);
-                if (gp_width != null) {
-                    scan.setAttribute(BaseScannerRegionObserver.GUIDEPOST_WIDTH_BYTES, PLong.INSTANCE.toBytes(gp_width));
-                }
-                Object gp_per_region = statsProps.get(QueryServices.STATS_GUIDEPOST_PER_REGION_ATTRIB);
-                if (gp_per_region != null) {
-                    scan.setAttribute(BaseScannerRegionObserver.GUIDEPOST_PER_REGION, PInteger.INSTANCE.toBytes(gp_per_region));
-                }
+        /*
+         * Execute a COUNT(*) through PostDDLCompiler as we need to use the logicalTable
passed through,
+         * since it may not represent a "real" table in the case of the view indexes of a
base table.
+         */
+        PostDDLCompiler compiler = new PostDDLCompiler(connection);
+        //even if table is transactional, while calculating stats we scan the table non-transactionally
to
+        //view all the data belonging to the table
+        PTable nonTxnLogicalTable = new DelegateTable(logicalTable) {
+            @Override
+            public boolean isTransactional() {
+                return false;
+            }
+        };
+        TableRef tableRef = new TableRef(null, nonTxnLogicalTable, clientTimeStamp, false);
+        MutationPlan plan = compiler.compile(Collections.singletonList(tableRef), null, cfs,
null, clientTimeStamp);
+        Scan scan = plan.getContext().getScan();
+        scan.setCacheBlocks(false);
+        scan.setAttribute(ANALYZE_TABLE, TRUE_BYTES);
+        boolean runUpdateStatsAsync = props.getBoolean(QueryServices.RUN_UPDATE_STATS_ASYNC,
DEFAULT_RUN_UPDATE_STATS_ASYNC);
+        scan.setAttribute(RUN_UPDATE_STATS_ASYNC_ATTRIB, runUpdateStatsAsync ? TRUE_BYTES
: FALSE_BYTES);
+        if (statsProps != null) {
+            Object gp_width = statsProps.get(QueryServices.STATS_GUIDEPOST_WIDTH_BYTES_ATTRIB);
+            if (gp_width != null) {
+                scan.setAttribute(BaseScannerRegionObserver.GUIDEPOST_WIDTH_BYTES, PLong.INSTANCE.toBytes(gp_width));
+            }
+            Object gp_per_region = statsProps.get(QueryServices.STATS_GUIDEPOST_PER_REGION_ATTRIB);
+            if (gp_per_region != null) {
+                scan.setAttribute(BaseScannerRegionObserver.GUIDEPOST_PER_REGION, PInteger.INSTANCE.toBytes(gp_per_region));
             }
-            MutationState mutationState = plan.execute();
-            rowCount = mutationState.getUpdateCount();
         }
+        MutationState mutationState = plan.execute();
+        rowCount = mutationState.getUpdateCount();
 
         /*
          *  Update the stats table so that client will pull the new one with the updated
stats.


Mime
View raw message