hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hubertzh...@apache.org
Subject incubator-hawq git commit: HAWQ-481. Optimise analyse virtual segment number for random tables.
Date Thu, 03 Mar 2016 01:31:40 GMT
Repository: incubator-hawq
Updated Branches:
  refs/heads/master daa581d2a -> 74297b676


HAWQ-481. Optimise analyse virtual segment number for random tables.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/74297b67
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/74297b67
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/74297b67

Branch: refs/heads/master
Commit: 74297b676b97c8a330ba65fec21d3f3fb2c7c5d1
Parents: daa581d
Author: hubertzhang <hzhang@pivotal.io>
Authored: Thu Mar 3 09:31:19 2016 +0800
Committer: hubertzhang <hzhang@pivotal.io>
Committed: Thu Mar 3 09:31:19 2016 +0800

----------------------------------------------------------------------
 src/backend/commands/analyze.c | 60 +++++++++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/74297b67/src/backend/commands/analyze.c
----------------------------------------------------------------------
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 4dac537..b4ad0c9 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -141,6 +141,7 @@ static void analyzeComputeAttributeStatistics(Oid relationOid,
 static List* analyzableRelations(bool rootonly);
 static bool analyzePermitted(Oid relationOid);
 static List *analyzableAttributes(Relation candidateRelation);
+static int calculate_virtual_segment_number(List* candidateRelations);
 static List	*buildExplicitAttributeNames(Oid relationOid, VacuumStmt *stmt);
 
 /* Reltuples/relpages estimation functions */
@@ -255,7 +256,6 @@ void analyzeStatement(VacuumStmt *stmt, List *relids, int preferred_seg_num)
 	GpAutoStatsModeValue autostatvalBackup = gp_autostats_mode;
 	GpAutoStatsModeValue autostatInFunctionsvalBackup = gp_autostats_mode_in_functions;
 	bool optimizerBackup = optimizer;
-	int target_seg_num = (preferred_seg_num > 0) ? preferred_seg_num : GetUtilPartitionNum();
 
 	gp_autostats_mode = GP_AUTOSTATS_NONE;
 	gp_autostats_mode_in_functions = GP_AUTOSTATS_NONE;
@@ -263,7 +263,7 @@ void analyzeStatement(VacuumStmt *stmt, List *relids, int preferred_seg_num)
 
 	PG_TRY();
 	{
-		analyzeStmt(stmt, relids, target_seg_num);
+		analyzeStmt(stmt, relids, preferred_seg_num);
 		gp_autostats_mode = autostatvalBackup;
 		gp_autostats_mode_in_functions = autostatInFunctionsvalBackup;
 		optimizer = optimizerBackup;
@@ -292,7 +292,7 @@ void analyzeStatement(VacuumStmt *stmt, List *relids, int preferred_seg_num)
  * 	vacstmt - Vacuum statement.
  * 	relids  - Usually NULL except when called by autovacuum.
  */
-void analyzeStmt(VacuumStmt *stmt, List *relids, int target_seg_num)
+void analyzeStmt(VacuumStmt *stmt, List *relids, int preferred_seg_num)
 {
 	List	   			  	*lRelOids = NIL;
 	MemoryContext			callerContext = NULL;
@@ -494,6 +494,17 @@ void analyzeStmt(VacuumStmt *stmt, List *relids, int target_seg_num)
 	}
 
 	/**
+	 *  we use preferred_seg_num as default and
+	 *  compute target_seg_num based on data size and distributed type
+	 *  if there is no preferred_seg_num.
+	 */
+	int target_seg_num = preferred_seg_num;
+	if (target_seg_num <= 0) {
+		target_seg_num = calculate_virtual_segment_number(candidateRelations);
+	}
+	elog(LOG, "virtual segment number of analyze is: %d\n", target_seg_num);
+
+	/**
 	 * We allocate query resource for analyze
 	 */
 	QueryResource *resource = AllocateResource(QRL_ONCE, 1, 0, target_seg_num, target_seg_num,
NULL, 0);
@@ -788,6 +799,49 @@ void analyzeStmt(VacuumStmt *stmt, List *relids, int target_seg_num)
 	MemoryContextDelete(analyzeStatementContext);
 }
 
+/*
+ * calculate virtual segment number for analyze statement.
+ * if there is hash distributed relations exist, use the max bucket number.
+ * if all relation are random, use the data size to determine vseg number.
+ */
+static int calculate_virtual_segment_number(List* candidateRelations) {
+	ListCell* le1;
+	int vsegNumber = 1;
+	int64_t totalDataSize = 0;
+	bool isHashRelationExist = false;
+	int maxHashBucketNumber = 0;
+
+	foreach (le1, candidateRelations)
+	{
+		Relation rel = (Relation)lfirst(le1);
+		if (rel ) {
+			GpPolicy *targetPolicy = GpPolicyFetch(CurrentMemoryContext,
+					rel->rd_id);
+			if (targetPolicy->nattrs > 0) {
+				isHashRelationExist = true;
+				if(maxHashBucketNumber < targetPolicy->bucketnum){
+					maxHashBucketNumber = targetPolicy->bucketnum;
+				}
+			}
+			/*
+			 * if no hash relation, we calculate the data size of all the relations.
+			 */
+			if (!isHashRelationExist) {
+				totalDataSize += calculate_relation_size(rel);
+			}
+		}
+	}
+
+	if (isHashRelationExist) {
+		vsegNumber = maxHashBucketNumber;
+	} else {
+		/*we allocate one virtual segment for each 128M data */
+		totalDataSize >>= 27;
+		vsegNumber = totalDataSize + 1;
+	}
+
+	return vsegNumber;
+}
 
 /*
  * This method extracts the explicit attributes listed in a vacuum statement. It must


Mime
View raw message