hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From iw...@apache.org
Subject [1/3] incubator-hawq git commit: HAWQ-404. Add sort during INSERT of append only row oriented partition tables
Date Tue, 15 Mar 2016 01:54:57 GMT
Repository: incubator-hawq
Updated Branches:
  refs/heads/master ac26f48fd -> 2a1506604


HAWQ-404. Add sort during INSERT of append only row oriented partition tables


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/2c38b422
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/2c38b422
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/2c38b422

Branch: refs/heads/master
Commit: 2c38b42273d7daa0333fcd3827defe335ff4d89f
Parents: 74297b6
Author: Haisheng Yuan <hyuan@pivotal.io>
Authored: Thu Feb 25 14:59:49 2016 -0800
Committer: Haisheng Yuan <hyuan@pivotal.io>
Committed: Wed Mar 2 18:27:00 2016 -0800

----------------------------------------------------------------------
 src/backend/gpopt/ivy.xml                                |  2 +-
 src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp   |  2 ++
 src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp |  9 +++++++++
 src/backend/gpopt/utils/COptTasks.cpp                    |  4 +++-
 src/backend/nodes/copyfuncs.c                            |  1 +
 src/backend/nodes/outfast.c                              |  1 +
 src/backend/nodes/outfuncs.c                             |  1 +
 src/backend/nodes/readfast.c                             |  1 +
 src/backend/utils/misc/guc.c                             | 11 +++++++++++
 src/include/gpopt/translate/CTranslatorUtils.h           |  4 ----
 src/include/nodes/plannodes.h                            |  1 +
 src/include/utils/guc.h                                  |  1 +
 .../regress/expected/gpsql_alter_table_optimizer.out     |  2 +-
 src/test/regress/expected/insert_optimizer.out           |  2 +-
 14 files changed, 34 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/backend/gpopt/ivy.xml
----------------------------------------------------------------------
diff --git a/src/backend/gpopt/ivy.xml b/src/backend/gpopt/ivy.xml
index db5c89f..d8fcb85 100644
--- a/src/backend/gpopt/ivy.xml
+++ b/src/backend/gpopt/ivy.xml
@@ -38,7 +38,7 @@ under the License.
     </configurations>
 
     <dependencies>
-      <dependency org="emc"             name="optimizer"       rev="1.617"          conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64"
/>
+      <dependency org="emc"             name="optimizer"       rev="1.623"          conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64"
/>
       <dependency org="emc"             name="libgpos"         rev="1.133"          conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64"
/>
       <dependency org="xerces"          name="xerces-c"        rev="3.1.1-p1"       conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64"
/>
     </dependencies>

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
----------------------------------------------------------------------
diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
index 12ef75f..21da4fd 100644
--- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
+++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
@@ -4518,6 +4518,8 @@ CTranslatorDXLToPlStmt::PplanDML
 		plTargetListDML = plTargetListWithDroppedCols;
 	}
 	
+	pdml->inputSorted = pdxlop->FInputSorted();
+
 	// add ctid, action and oid columns to target list
 	pdml->oidColIdx = UlAddTargetEntryForColId(&plTargetListDML, &dxltrctxChild,
pdxlop->UlOid(), true /*fResjunk*/);
 	pdml->actionColIdx = UlAddTargetEntryForColId(&plTargetListDML, &dxltrctxChild,
pdxlop->UlAction(), true /*fResjunk*/);

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
----------------------------------------------------------------------
diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
index 80a81ee..8cb468a 100644
--- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
@@ -597,6 +597,7 @@ CTranslatorRelcacheToDXL::Pmdrel
 	DrgPmdid *pdrgpmdidIndexes = NULL;
 	DrgPmdid *pdrgpmdidTriggers = NULL;
 	DrgPul *pdrgpulPartKeys = NULL;
+	ULONG ulLeafPartitions = 0;
 	BOOL fConvertHashToRandom = false;
 	DrgPdrgPul *pdrgpdrgpulKeys = NULL;
 	DrgPmdid *pdrgpmdidCheckConstraints = NULL;
@@ -652,6 +653,13 @@ CTranslatorRelcacheToDXL::Pmdrel
 				erelstorage = IMDRelation::ErelstorageAppendOnlyParquet;
 			}
 		}
+
+		// get number of leaf partitions
+		if (gpdb::FRelPartIsRoot(oid))
+		{
+		   ulLeafPartitions = gpdb::UlLeafPartitions(oid);
+		}
+
 		// get key sets
 		BOOL fAddDefaultKeys = FHasSystemColumns(rel->rd_rel->relkind);
 		pdrgpdrgpulKeys = PdrgpdrgpulKeys(pmp, oid, fAddDefaultKeys, fPartitioned, pulAttnoMapping);
@@ -722,6 +730,7 @@ CTranslatorRelcacheToDXL::Pmdrel
 							pdrgpmdcol,
 							pdrpulDistrCols,
 							pdrgpulPartKeys,
+							ulLeafPartitions,
 							fConvertHashToRandom,
 							pdrgpdrgpulKeys,
 							pdrgpmdidIndexes,

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/backend/gpopt/utils/COptTasks.cpp
----------------------------------------------------------------------
diff --git a/src/backend/gpopt/utils/COptTasks.cpp b/src/backend/gpopt/utils/COptTasks.cpp
index 893cde2..4d10d39 100644
--- a/src/backend/gpopt/utils/COptTasks.cpp
+++ b/src/backend/gpopt/utils/COptTasks.cpp
@@ -774,13 +774,15 @@ COptTasks::PoconfCreate
 	DOUBLE dDampingFactorGroupBy = (DOUBLE) optimizer_damping_factor_groupby;
 
 	ULONG ulCTEInliningCutoff =  (ULONG) optimizer_cte_inlining_bound;
+	ULONG ulPartsToForceSortOnInsert =  (ULONG) optimizer_parts_to_force_sort_on_insert;
 
 	return GPOS_NEW(pmp) COptimizerConfig
 						(
 						GPOS_NEW(pmp) CEnumeratorConfig(pmp, ullPlanId, ullSamples, dCostThreshold),
 						GPOS_NEW(pmp) CStatisticsConfig(pmp, dDampingFactorFilter, dDampingFactorJoin, dDampingFactorGroupBy),
 						GPOS_NEW(pmp) CCTEConfig(ulCTEInliningCutoff),
-						pcm
+						pcm,
+						GPOS_NEW(pmp) CHint(ulPartsToForceSortOnInsert)
 						);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/backend/nodes/copyfuncs.c
----------------------------------------------------------------------
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 6792e45..7082dac 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -1119,6 +1119,7 @@ _copyDML(const DML *from)
 	COPY_SCALAR_FIELD(actionColIdx);
 	COPY_SCALAR_FIELD(ctidColIdx);
 	COPY_SCALAR_FIELD(tupleoidColIdx);
+	COPY_SCALAR_FIELD(inputSorted);
 
 	return newnode;
 }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/backend/nodes/outfast.c
----------------------------------------------------------------------
diff --git a/src/backend/nodes/outfast.c b/src/backend/nodes/outfast.c
index ac943c9..348bb2c 100644
--- a/src/backend/nodes/outfast.c
+++ b/src/backend/nodes/outfast.c
@@ -981,6 +981,7 @@ _outDML(StringInfo str, DML *node)
 	WRITE_INT_FIELD(actionColIdx);
 	WRITE_INT_FIELD(ctidColIdx);
 	WRITE_INT_FIELD(tupleoidColIdx);
+	WRITE_BOOL_FIELD(inputSorted);
 
 	_outPlanInfo(str, (Plan *) node);
 }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/backend/nodes/outfuncs.c
----------------------------------------------------------------------
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 0f35610..03a2550 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -987,6 +987,7 @@ _outDML(StringInfo str, DML *node)
 	WRITE_INT_FIELD(actionColIdx);
 	WRITE_INT_FIELD(ctidColIdx);
 	WRITE_INT_FIELD(tupleoidColIdx);
+	WRITE_BOOL_FIELD(inputSorted);
 
 	_outPlanInfo(str, (Plan *) node);
 }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/backend/nodes/readfast.c
----------------------------------------------------------------------
diff --git a/src/backend/nodes/readfast.c b/src/backend/nodes/readfast.c
index cfdcc06..a77a217 100644
--- a/src/backend/nodes/readfast.c
+++ b/src/backend/nodes/readfast.c
@@ -3604,6 +3604,7 @@ _readDML(const char ** str)
 	READ_INT_FIELD(actionColIdx);
 	READ_INT_FIELD(ctidColIdx);
 	READ_INT_FIELD(tupleoidColIdx);
+	READ_BOOL_FIELD(inputSorted);
 
 	readPlanInfo(str, (Plan *)local_node);
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a8f3f37..4e54ae8 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -762,6 +762,7 @@ bool 		optimizer_multilevel_partitioning;
 bool        optimizer_enable_derive_stats_all_groups;
 bool		optimizer_explain_show_status;
 bool		optimizer_prefer_scalar_dqa_multistage_agg;
+int		optimizer_parts_to_force_sort_on_insert;
 
 /* Security */
 bool		gp_reject_internal_tcp_conn = true;
@@ -6146,6 +6147,16 @@ static struct config_int ConfigureNamesInt[] =
 	},
 
 	{
+		{"optimizer_parts_to_force_sort_on_insert", PGC_USERSET, DEVELOPER_OPTIONS,
+			gettext_noop("Minimum number of partitions required to force sorting tuples during insertion
in an append only row-oriented partitioned table"),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&optimizer_parts_to_force_sort_on_insert,
+		INT_MAX, 0, INT_MAX, NULL, NULL
+	},
+
+	{
 		{"pxf_stat_max_fragments", PGC_USERSET, EXTERNAL_TABLES,
 			gettext_noop("Max number of fragments to be sampled during ANALYZE on a PXF table."),
 			NULL,

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/include/gpopt/translate/CTranslatorUtils.h
----------------------------------------------------------------------
diff --git a/src/include/gpopt/translate/CTranslatorUtils.h b/src/include/gpopt/translate/CTranslatorUtils.h
index 1f8776a..d0a0f71 100644
--- a/src/include/gpopt/translate/CTranslatorUtils.h
+++ b/src/include/gpopt/translate/CTranslatorUtils.h
@@ -70,10 +70,6 @@ namespace gpdxl
 {
 	using namespace gpopt;
 
-	// hash maps mapping INT -> ULONG
-	typedef CHashMap<INT, ULONG, gpos::UlHash<INT>, gpos::FEqual<INT>,
-					CleanupDelete<INT>, CleanupDelete<ULONG> > HMIUl;
-
 	//---------------------------------------------------------------------------
 	//	@class:
 	//		CTranslatorUtils

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/include/nodes/plannodes.h
----------------------------------------------------------------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 842795e..ebccb9d 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -1149,6 +1149,7 @@ typedef struct DML
 	AttrNumber	actionColIdx;	/* index of action column into the target list */
 	AttrNumber	ctidColIdx;		/* index of ctid column into the target list */
 	AttrNumber	tupleoidColIdx;	/* index of tuple oid column into the target list */
+	bool		inputSorted;		/* needs the data to be sorted */
 
 } DML;
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/include/utils/guc.h
----------------------------------------------------------------------
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index befeaf6..475cae1 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -434,6 +434,7 @@ extern bool optimizer_multilevel_partitioning;
 extern bool optimizer_enable_derive_stats_all_groups;
 extern bool optimizer_explain_show_status;
 extern bool optimizer_prefer_scalar_dqa_multistage_agg;
+extern int  optimizer_parts_to_force_sort_on_insert;
 
 /**
  * Enable logging of DPE match in optimizer.

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/test/regress/expected/gpsql_alter_table_optimizer.out
----------------------------------------------------------------------
diff --git a/src/test/regress/expected/gpsql_alter_table_optimizer.out b/src/test/regress/expected/gpsql_alter_table_optimizer.out
index 8a10774..cc17057 100644
--- a/src/test/regress/expected/gpsql_alter_table_optimizer.out
+++ b/src/test/regress/expected/gpsql_alter_table_optimizer.out
@@ -41,7 +41,7 @@ SELECT a, b, c FROM altable WHERE a = 12;
 ROLLBACK;
 ALTER TABLE altable ALTER COLUMN c SET NOT NULL;
 INSERT INTO altable(a, b) VALUES(13, '13');
-ERROR:  NULL value in column "c" violates not-null constraint (COptTasks.cpp:1294)
+ERROR:  null value in column "c" violates not-null constraint (COptTasks.cpp:1756)
 ALTER TABLE altable ALTER COLUMN c DROP NOT NULL;
 INSERT INTO altable(a, b) VALUES(13, '13');
 SELECT a, b, c FROM altable WHERE a = 13;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c38b422/src/test/regress/expected/insert_optimizer.out
----------------------------------------------------------------------
diff --git a/src/test/regress/expected/insert_optimizer.out b/src/test/regress/expected/insert_optimizer.out
index 58121e5..f5f263a 100755
--- a/src/test/regress/expected/insert_optimizer.out
+++ b/src/test/regress/expected/insert_optimizer.out
@@ -5,7 +5,7 @@ create table inserttest (col1 int4, col2 int4 NOT NULL, col3 text default
'testi
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'col1' as the Greenplum
Database data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s)
chosen are the optimal data distribution key to minimize skew.
 insert into inserttest (col1, col2, col3) values (DEFAULT, DEFAULT, DEFAULT);
-ERROR:  NULL value in column "col2" violates not-null constraint (COptTasks.cpp:1688)
+ERROR:  null value in column "col2" violates not-null constraint (COptTasks.cpp:1756)
 insert into inserttest (col2, col3) values (3, DEFAULT);
 insert into inserttest (col1, col2, col3) values (DEFAULT, 5, DEFAULT);
 insert into inserttest values (DEFAULT, 5, 'test');


Mime
View raw message