hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j.@apache.org
Subject [08/17] incubator-hawq git commit: HAWQ-532. Optimise vseg number for copy to statement.
Date Fri, 18 Mar 2016 23:04:09 GMT
HAWQ-532. Optimise vseg number for copy to statement.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/c29d6476
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/c29d6476
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/c29d6476

Branch: refs/heads/fix-enable-orca
Commit: c29d647658e5e4219a370b4869f68cdcd577ab65
Parents: eaaf945
Author: hzhang2 <zhanghuan929@163.com>
Authored: Fri Mar 18 10:30:20 2016 +0800
Committer: hzhang2 <zhanghuan929@163.com>
Committed: Fri Mar 18 10:42:44 2016 +0800

----------------------------------------------------------------------
 src/backend/cdb/cdbmutate.c          |  2 +-
 src/backend/commands/copy.c          | 96 ++++++++++++++++++++++++++++---
 src/backend/commands/prepare.c       |  2 +-
 src/backend/optimizer/util/relnode.c |  2 +-
 src/backend/parser/analyze.c         |  8 +--
 src/backend/postmaster/identity.c    |  2 +-
 src/backend/utils/misc/guc.c         | 12 ++--
 src/include/postmaster/identity.h    |  2 +-
 8 files changed, 103 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/cdb/cdbmutate.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c
index 6959c64..9a998da 100644
--- a/src/backend/cdb/cdbmutate.c
+++ b/src/backend/cdb/cdbmutate.c
@@ -312,7 +312,7 @@ apply_motion(PlannerInfo *root, Plan *plan, Query *query)
 				
 					targetPolicy = palloc0(sizeof(GpPolicy)- sizeof(targetPolicy->attrs) + maxattrs *
sizeof(targetPolicy->attrs[0]));
 					targetPolicy->nattrs = 0;
-					targetPolicy->bucketnum = GetRelOpt_bucket_num_fromRangeVar(query->intoClause->rel,
GetRandomDistPartitionNum());
+					targetPolicy->bucketnum = GetRelOpt_bucket_num_fromRangeVar(query->intoClause->rel,
GetDefaultPartitionNum());
 					targetPolicy->ptype = POLICYTYPE_PARTITIONED;
 					
 					/* Find out what the flow is partitioned on */

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/commands/copy.c
----------------------------------------------------------------------
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index bcd5384..2f4cf39 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -91,6 +91,11 @@
 #include "postmaster/autovacuum.h"
 #include "cdb/dispatcher.h"
 
+/*
+ * in dbsize.c
+ */
+extern int64 calculate_relation_size(Relation rel);
+
 /* DestReceiver for COPY (SELECT) TO */
 typedef struct
 {
@@ -137,6 +142,7 @@ static void copy_in_error_callback(void *arg);
 static void CopyInitPartitioningState(EState *estate);
 static void CopyInitDataParser(CopyState cstate);
 static bool CopyCheckIsLastLine(CopyState cstate);
+static int calculate_virtual_segment_number(List* candidateRelations);
 
 /* ==========================================================================
  * The follwing macros aid in major refactoring of data processing code (in
@@ -1556,7 +1562,16 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
 
 			target_policy = GpPolicyFetch(CurrentMemoryContext, relid);
 			Assert(target_policy);
-			target_segment_num = target_policy->bucketnum;
+			/*
+			 * For hash table we use table bucket number to request vsegs
+			 * For random table, we use a fixed GUC value to request vsegs.
+			 */
+			if(target_policy->nattrs > 0){
+				target_segment_num = target_policy->bucketnum;
+			}
+			else{
+				target_segment_num = hawq_rm_nvseg_for_copy_from_perquery;
+			}
 			pfree(target_policy);
 
 			cstate->resource = AllocateResource(QRL_ONCE, 1, 1, target_segment_num, target_segment_num,NULL,0);
@@ -1749,6 +1764,63 @@ DoCopy(const CopyStmt *stmt, const char *queryString)
 }
 
 /*
+ * calculate virtual segment number for copy statement.
+ * if there is hash distributed relations exist, use the max bucket number.
+ * if all relation are random, use the data size to determine vseg number.
+ */
+static int calculate_virtual_segment_number(List* candidateOids) {
+	ListCell* le1;
+	int vsegNumber = 1;
+	int64 totalDataSize = 0;
+	bool isHashRelationExist = false;
+	int maxHashBucketNumber = 0;
+
+	foreach (le1, candidateOids)
+	{
+		Oid				candidateOid	  = InvalidOid;
+		candidateOid = lfirst_oid(le1);
+
+		//Relation rel = (Relation)lfirst(le1);
+		Relation rel = relation_open(candidateOid, AccessShareLock);
+		if (candidateOid > 0 ) {
+			GpPolicy *targetPolicy = GpPolicyFetch(CurrentMemoryContext,
+					candidateOid);
+			if(targetPolicy == NULL){
+				return GetAnalyzeVSegNumLimit();
+			}
+			if (targetPolicy->nattrs > 0) {
+				isHashRelationExist = true;
+				if(maxHashBucketNumber < targetPolicy->bucketnum){
+					maxHashBucketNumber = targetPolicy->bucketnum;
+				}
+			}
+			/*
+			 * if no hash relation, we calculate the data size of all the relations.
+			 */
+			if (!isHashRelationExist) {
+				totalDataSize += calculate_relation_size(rel);
+			}
+		}
+		relation_close(rel, AccessShareLock);
+	}
+
+	if (isHashRelationExist) {
+		vsegNumber = maxHashBucketNumber;
+	} else {
+		/*we allocate one virtual segment for each 128M data */
+		totalDataSize >>= 27;
+		vsegNumber = totalDataSize + 1;
+	}
+	Assert(vsegNumber > 0);
+	/*vsegNumber should be less than GetUtilPartitionNum*/
+	if(vsegNumber > GetQueryVsegNum()){
+		vsegNumber = GetQueryVsegNum();
+	}
+
+	return vsegNumber;
+}
+
+/*
  * This intermediate routine exists mainly to localize the effects of setjmp
  * so we don't need to plaster a lot of variables with "volatile".
  */
@@ -1809,13 +1881,21 @@ DoCopyTo(CopyState cstate)
 		 */
 		if (Gp_role == GP_ROLE_DISPATCH && cstate->rel && cstate->rel->rd_cdbpolicy)
 		{
-			GpPolicy *target_policy = NULL;
 			int target_segment_num = 0;
+			/*
+			 * copy hash table use table bucket number
+			 * copy random table use table size.
+			 */
+			PartitionNode *pn = get_parts(cstate->rel->rd_id, 0 /*level*/ ,
+													0 /*parent*/, false /* inctemplate */, CurrentMemoryContext, true /*includesubparts*/);
+			Assert(pn);
+			List		*lFullRelOids = NIL;
+			lFullRelOids = all_leaf_partition_relids(pn);
+			lFullRelOids = lappend_oid(lFullRelOids, cstate->rel->rd_id); /* root partition
*/
+			lFullRelOids = list_concat(lFullRelOids, all_interior_partition_relids(pn)); /* interior
partitions */
 
-			target_policy = GpPolicyFetch(CurrentMemoryContext, cstate->rel->rd_id);
-			Assert(target_policy);
-			target_segment_num = target_policy->bucketnum;
-			pfree(target_policy);
+			target_segment_num = calculate_virtual_segment_number(lFullRelOids);
+			elog(LOG, "virtual segment number of copy to is: %d\n", target_segment_num);
 
 			cstate->resource = AllocateResource(QRL_ONCE, 1, 1, target_segment_num, target_segment_num,NULL,0);
 			CopyToDispatch(cstate);
@@ -4200,7 +4280,7 @@ CopyFrom(CopyState cstate)
 					if (cstate->oids && file_has_oids)
 						MemTupleSetOid(tuple, resultRelInfo->ri_aoInsertDesc->mt_bind, loaded_oid);
 				}
-				else if ((relstorage == RELSTORAGE_PARQUET))
+				else if (relstorage == RELSTORAGE_PARQUET)
 				{
                     tuple = NULL;
 				}
@@ -4231,7 +4311,7 @@ CopyFrom(CopyState cstate)
 				{
 					HeapTuple	newtuple;
 
-					if((relstorage == RELSTORAGE_PARQUET))
+					if(relstorage == RELSTORAGE_PARQUET)
 					{
 						Assert(!tuple);
 						elog(ERROR, "triggers are not supported on tables that use column-oriented storage");

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/commands/prepare.c
----------------------------------------------------------------------
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index 76c04e9..013af9a 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -242,7 +242,7 @@ ExecuteQuery(ExecuteStmt *stmt, const char *queryString,
 									+ 255 * sizeof(pstmt->intoPolicy->attrs[0]));
 		pstmt->intoPolicy->nattrs = 0;
 		pstmt->intoPolicy->ptype = POLICYTYPE_PARTITIONED;
-		pstmt->intoPolicy->bucketnum = GetRelOpt_bucket_num_fromRangeVar(stmt->into->rel,
GetRandomDistPartitionNum());
+		pstmt->intoPolicy->bucketnum = GetRelOpt_bucket_num_fromRangeVar(stmt->into->rel,
GetDefaultPartitionNum());
 		
 		MemoryContextSwitchTo(oldContext);
 	}

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/optimizer/util/relnode.c
----------------------------------------------------------------------
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 9e94901..8fb676c 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -142,7 +142,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
 			{
 				rel->cdbpolicy = (GpPolicy *) palloc(sizeof(GpPolicy));
 				rel->cdbpolicy->ptype = POLICYTYPE_PARTITIONED;
-				rel->cdbpolicy->bucketnum = GetRandomDistPartitionNum();
+				rel->cdbpolicy->bucketnum = GetDefaultPartitionNum();
 				rel->cdbpolicy->nattrs = 0;
 				rel->cdbpolicy->attrs[0] = 1;
 			}

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/parser/analyze.c
----------------------------------------------------------------------
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index ad35484..f17c90a 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -2724,7 +2724,7 @@ transformETDistributedBy(ParseState *pstate, CreateStmtContext *cxt,
 										 sizeof(p->attrs[0]));
 			p->ptype = POLICYTYPE_PARTITIONED;
 			p->nattrs = 0;
-			p->bucketnum = GetRelOpt_bucket_num_fromOptions(options, GetRandomDistPartitionNum());
+			p->bucketnum = GetRelOpt_bucket_num_fromOptions(options, GetDefaultPartitionNum());
 			p->attrs[0] = 1;
 		}
 
@@ -2981,7 +2981,7 @@ transformDistributedBy(ParseState *pstate, CreateStmtContext *cxt,
 
 	if (policy->bucketnum == -1)
 	{
-		policy->bucketnum = GetRelOpt_bucket_num_fromOptions(options, GetRandomDistPartitionNum());
+		policy->bucketnum = GetRelOpt_bucket_num_fromOptions(options, GetDefaultPartitionNum());
 	}
 
 	*policyp = policy;
@@ -12187,7 +12187,7 @@ setQryDistributionPolicy(SelectStmt *stmt, Query *qry)
 	policy->nattrs = 0;
 	policy->attrs[0] = 1;
 	if(stmt->intoClause != NULL)
-		policy->bucketnum = GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options,
GetRandomDistPartitionNum());
+		policy->bucketnum = GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options,
GetDefaultPartitionNum());
 
 	if (stmt->distributedBy)
 	{
@@ -12241,7 +12241,7 @@ setQryDistributionPolicy(SelectStmt *stmt, Query *qry)
 		}
 		else
 		{
-			policy->bucketnum = GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options,
GetRandomDistPartitionNum());
+			policy->bucketnum = GetRelOpt_bucket_num_fromOptions(stmt->intoClause->options,
GetDefaultPartitionNum());
 		}
 	}
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/postmaster/identity.c
----------------------------------------------------------------------
diff --git a/src/backend/postmaster/identity.c b/src/backend/postmaster/identity.c
index b209ec5..36ea0cb 100644
--- a/src/backend/postmaster/identity.c
+++ b/src/backend/postmaster/identity.c
@@ -537,7 +537,7 @@ GetRelOpt_bucket_num_fromRangeVar(const RangeVar* rel_rv, int default_val)
 }
 
 int
-GetRandomDistPartitionNum(void)
+GetDefaultPartitionNum(void)
 {
 	return default_hash_table_bucket_number;
 }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a6a8bee..56909c9 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -4516,7 +4516,7 @@ static struct config_int ConfigureNamesInt[] =
 			NULL
 		},
 		&default_hash_table_bucket_number,
-		6, 1, 10000, NULL, NULL
+		6, 1, 65535, NULL, NULL
 	},
 
 	{
@@ -4525,7 +4525,7 @@ static struct config_int ConfigureNamesInt[] =
 			NULL
 		},
 		&hawq_rm_nvseg_for_copy_from_perquery,
-		6, 1, 10000, NULL, NULL
+		6, 1, 65535, NULL, NULL
 	},
 
 	{
@@ -4534,7 +4534,7 @@ static struct config_int ConfigureNamesInt[] =
 			NULL
 		},
 		&hawq_rm_nvseg_for_analyze_perquery_perseg_limit,
-		4, 1, 10000, NULL, NULL
+		4, 1, 65535, NULL, NULL
 	},
 
 	{
@@ -4543,7 +4543,7 @@ static struct config_int ConfigureNamesInt[] =
 			NULL
 		},
 		&hawq_rm_nvseg_for_analyze_perquery_limit,
-		256, 1, 10000, NULL, NULL
+		256, 1, 65535, NULL, NULL
 	},
 
 	{
@@ -6388,7 +6388,7 @@ static struct config_int ConfigureNamesInt[] =
                     NULL
             },
             &rm_nvseg_perquery_limit,
-            512, 1, 10000, NULL, NULL
+            512, 1, 65535, NULL, NULL
     },
 
     {
@@ -6398,7 +6398,7 @@ static struct config_int ConfigureNamesInt[] =
                     NULL
             },
             &rm_nvseg_perquery_perseg_limit,
-            6, 1, 10000, NULL, NULL
+            6, 1, 65535, NULL, NULL
     },
 
     {

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/c29d6476/src/include/postmaster/identity.h
----------------------------------------------------------------------
diff --git a/src/include/postmaster/identity.h b/src/include/postmaster/identity.h
index 814935e..f43ab62 100644
--- a/src/include/postmaster/identity.h
+++ b/src/include/postmaster/identity.h
@@ -91,7 +91,7 @@ extern int	 GetCopyFromVSegNum(void);
 extern int GetRelOpt_bucket_num_fromOptions(List *options, int default_val);
 extern int GetRelOpt_bucket_num_fromRel(Relation relation, int default_val);
 extern int GetRelOpt_bucket_num_fromRangeVar(const RangeVar* rel_rv, int default_val);
-extern int GetRandomDistPartitionNum(void);
+extern int GetDefaultPartitionNum(void);
 extern int GetHashDistPartitionNum(void);
 extern int GetExternalTablePartitionNum(void);
 extern int GetAllWorkerHostNum(void);


Mime
View raw message