tajo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jihoon...@apache.org
Subject tajo git commit: TAJO-1962: Add description for session variables.
Date Thu, 26 Nov 2015 16:43:03 GMT
Repository: tajo
Updated Branches:
  refs/heads/master a1e852b9f -> e52c33089


TAJO-1962: Add description for session variables.

Closes #848


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/e52c3308
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/e52c3308
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/e52c3308

Branch: refs/heads/master
Commit: e52c33089608f6621c590066d429bf11de3a4843
Parents: a1e852b
Author: Jihoon Son <jihoonson@apache.org>
Authored: Fri Nov 27 01:42:31 2015 +0900
Committer: Jihoon Son <jihoonson@apache.org>
Committed: Fri Nov 27 01:42:31 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |   2 +
 .../main/java/org/apache/tajo/SessionVars.java  |  19 +-
 .../TestTajoCli/testHelpSessionVars.result      |  14 +-
 .../main/sphinx/configuration/tajo-site-xml.rst |  42 ++
 tajo-docs/src/main/sphinx/tsql/variables.rst    | 506 +++++++++++++++++--
 5 files changed, 537 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/e52c3308/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 4d9c712..3aceb1e 100644
--- a/CHANGES
+++ b/CHANGES
@@ -88,6 +88,8 @@ Release 0.12.0 - unreleased
 
   TASKS
 
+    TAJO-1962: Add description for session variables. (jihoon)
+
     TAJO-1857: Rename the section of 'File Formats' to 'Data Formats' and fill
     compression section of the 'Table Management' chapter. 
     (Contributed by Jongyoung Park. Committed by jihoon)

http://git-wip-us.apache.org/repos/asf/tajo/blob/e52c3308/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
index 08c12a0..8d99a53 100644
--- a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
+++ b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
@@ -68,7 +68,7 @@ public enum SessionVars implements ConfigKey {
   CLI_PAGING_ENABLED(ConfVars.$CLI_PRINT_PAUSE, "Enable paging of result display", CLI_SIDE_VAR),
   CLI_DISPLAY_ERROR_TRACE(ConfVars.$CLI_PRINT_ERROR_TRACE, "Enable display of error trace",
CLI_SIDE_VAR),
 
-  ON_ERROR_STOP(ConfVars.$CLI_ERROR_STOP, "tsql will exist if an error occurs.", CLI_SIDE_VAR),
+  ON_ERROR_STOP(ConfVars.$CLI_ERROR_STOP, "tsql will exit if an error occurs.", CLI_SIDE_VAR),
 
   // Timezone & Date ----------------------------------------------------------
   TIMEZONE(ConfVars.$TIMEZONE, "Sets timezone", DEFAULT),
@@ -96,7 +96,7 @@ public enum SessionVars implements ConfigKey {
       "restriction for the total size of broadcasted table for cross join (kb)", DEFAULT,
Long.class,
       Validators.min("0")),
 
-  JOIN_TASK_INPUT_SIZE(ConfVars.$DIST_QUERY_JOIN_TASK_VOLUME, "join task input size (mb)
", DEFAULT,
+  JOIN_TASK_INPUT_SIZE(ConfVars.$DIST_QUERY_JOIN_TASK_VOLUME, "join task input size (mb)",
DEFAULT,
       Integer.class, Validators.min("1")),
   SORT_TASK_INPUT_SIZE(ConfVars.$DIST_QUERY_SORT_TASK_VOLUME, "sort task input size (mb)",
DEFAULT,
       Integer.class, Validators.min("1")),
@@ -130,15 +130,18 @@ public enum SessionVars implements ConfigKey {
       DEFAULT, Long.class, Validators.min("0")),
   MAX_OUTPUT_FILE_SIZE(ConfVars.$MAX_OUTPUT_FILE_SIZE, "Maximum per-output file size (mb).
0 means infinite.", DEFAULT,
       Long.class, Validators.min("0")),
-  NULL_CHAR(ConfVars.$TEXT_NULL, "null char of text file output", DEFAULT),
+  NULL_CHAR(ConfVars.$TEXT_NULL, "Null char of text file output. " +
+      "This value is used when the table property 'text.null' is not specified.", DEFAULT),
   CODEGEN(ConfVars.$CODEGEN, "Runtime code generation enabled (experiment)", DEFAULT),
-  AGG_HASH_TABLE_SIZE(ConfVars.$AGG_HASH_TABLE_SIZE, "Aggregation hash table size", DEFAULT),
-  SORT_LIST_SIZE(ConfVars.$SORT_LIST_SIZE, "List size for in-memory sort", DEFAULT),
-  JOIN_HASH_TABLE_SIZE(ConfVars.$JOIN_HASH_TABLE_SIZE, "Join hash table size", DEFAULT),
+  AGG_HASH_TABLE_SIZE(ConfVars.$AGG_HASH_TABLE_SIZE, "The initial size of list for in-memory
aggregation", DEFAULT),
+  SORT_LIST_SIZE(ConfVars.$SORT_LIST_SIZE, "The initial size of list for in-memory sort",
DEFAULT),
+  JOIN_HASH_TABLE_SIZE(ConfVars.$JOIN_HASH_TABLE_SIZE, "The initial size of hash table for
in-memory hash join",
+      DEFAULT),
 
   // for index
   INDEX_ENABLED(ConfVars.$INDEX_ENABLED, "index scan enabled", DEFAULT),
-  INDEX_SELECTIVITY_THRESHOLD(ConfVars.$INDEX_SELECTIVITY_THRESHOLD, "the selectivity threshold
for index scan", DEFAULT),
+  INDEX_SELECTIVITY_THRESHOLD(ConfVars.$INDEX_SELECTIVITY_THRESHOLD, "the selectivity threshold
for index scan",
+      DEFAULT),
 
   // for partition overwrite
   PARTITION_NO_RESULT_OVERWRITE_ENABLED(ConfVars.$PARTITION_NO_RESULT_OVERWRITE_ENABLED,
@@ -150,7 +153,7 @@ public enum SessionVars implements ConfigKey {
       "If true, a running query will be terminated when an overflow or divide-by-zero occurs.",
DEFAULT),
 
   // ResultSet ----------------------------------------------------------------
-  FETCH_ROWNUM(ConfVars.$RESULT_SET_FETCH_ROWNUM, "Sets the number of rows at a time from
Master", DEFAULT,
+  FETCH_ROWNUM(ConfVars.$RESULT_SET_FETCH_ROWNUM, "The number of rows to be fetched from
Master at a time", DEFAULT,
       Integer.class, Validators.min("0")),
   BLOCK_ON_RESULT(ConfVars.$RESULT_SET_BLOCK_WAIT, "Whether to block result set on query
execution", DEFAULT,
       Boolean.class, Validators.bool()),

http://git-wip-us.apache.org/repos/asf/tajo/blob/e52c3308/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
b/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
index ec8344f..5c2468d 100644
--- a/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
+++ b/tajo-core-tests/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
@@ -7,7 +7,7 @@ Available Session Variables:
 \set CLI_PAGE_ROWS [int value] - Sets the number of rows for paging
 \set CLI_PAGING_ENABLED [true or false] - Enable paging of result display
 \set CLI_DISPLAY_ERROR_TRACE [true or false] - Enable display of error trace
-\set ON_ERROR_STOP [true or false] - tsql will exist if an error occurs.
+\set ON_ERROR_STOP [true or false] - tsql will exit if an error occurs.
 \set TIMEZONE [text value] - Sets timezone
 \set DATE_ORDER [text value] - date order (default is YMD)
 \set LANG [text value] - Language
@@ -20,7 +20,7 @@ Available Session Variables:
 \set LC_TIME [text value] - Formatting of dates and times
 \set BROADCAST_NON_CROSS_JOIN_THRESHOLD [long value] - restriction for the total size of
broadcasted table for non-cross join (kb)
 \set BROADCAST_CROSS_JOIN_THRESHOLD [long value] - restriction for the total size of broadcasted
table for cross join (kb)
-\set JOIN_TASK_INPUT_SIZE [int value] - join task input size (mb) 
+\set JOIN_TASK_INPUT_SIZE [int value] - join task input size (mb)
 \set SORT_TASK_INPUT_SIZE [int value] - sort task input size (mb)
 \set GROUPBY_TASK_INPUT_SIZE [int value] - group by task input size (mb)
 \set JOIN_PER_SHUFFLE_SIZE [int value] - shuffle output size for join (mb)
@@ -35,16 +35,16 @@ Available Session Variables:
 \set OUTER_HASH_JOIN_SIZE_LIMIT [long value] - limited size for hash outer join (mb)
 \set HASH_GROUPBY_SIZE_LIMIT [long value] - limited size for hash groupby (mb)
 \set MAX_OUTPUT_FILE_SIZE [int value] - Maximum per-output file size (mb). 0 means infinite.
-\set NULL_CHAR [text value] - null char of text file output
+\set NULL_CHAR [text value] - Null char of text file output. This value is used when the
table property 'text.null' is not specified.
 \set CODEGEN [true or false] - Runtime code generation enabled (experiment)
-\set AGG_HASH_TABLE_SIZE [int value] - Aggregation hash table size
-\set SORT_LIST_SIZE [int value] - List size for in-memory sort
-\set JOIN_HASH_TABLE_SIZE [int value] - Join hash table size
+\set AGG_HASH_TABLE_SIZE [int value] - The initial size of list for in-memory aggregation
+\set SORT_LIST_SIZE [int value] - The initial size of list for in-memory sort
+\set JOIN_HASH_TABLE_SIZE [int value] - The initial size of hash table for in-memory hash
join
 \set INDEX_ENABLED [true or false] - index scan enabled
 \set INDEX_SELECTIVITY_THRESHOLD [real value] - the selectivity threshold for index scan
 \set PARTITION_NO_RESULT_OVERWRITE_ENABLED [true or false] - If true, a partitioned table
is overwritten even if a sub query leads to no result. Otherwise, the table data will be kept
if there is no result
 \set ARITHABORT [true or false] - If true, a running query will be terminated when an overflow
or divide-by-zero occurs.
-\set FETCH_ROWNUM [int value] - Sets the number of rows at a time from Master
+\set FETCH_ROWNUM [int value] - The number of rows to be fetched from Master at a time
 \set BLOCK_ON_RESULT [true or false] - Whether to block result set on query execution
 \set COMPRESSED_RESULT_TRANSFER [true or false] - Use compression to optimize result transmission.
 \set DEBUG_ENABLED [true or false] - (debug only) debug mode enabled
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/e52c3308/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst b/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst
index cdf0362..2280897 100644
--- a/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst
+++ b/tajo-docs/src/main/sphinx/configuration/tajo-site-xml.rst
@@ -27,6 +27,8 @@ A flag to enable or disable the use of broadcast join.
     <value>true</value>
   </property>
 
+.. _tajo.dist-query.broadcast.non-cross-join.threshold-kb:
+
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 `tajo.dist-query.broadcast.non-cross-join.threshold-kb`
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
@@ -45,6 +47,8 @@ A threshold for non-cross joins. When a non-cross join query is executed
with th
     <value>5120</value>
   </property>
 
+.. _tajo.dist-query.broadcast.cross-join.threshold-kb:
+
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 `tajo.dist-query.broadcast.cross-join.threshold-kb`
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
@@ -66,6 +70,8 @@ A threshold for cross joins. When a cross join query is executed, the whole
size
 .. warning::
   In Tajo, the broadcast join is only the way to perform cross joins. Since the cross join
is a very expensive operation, this value need to be tuned carefully.
 
+.. _tajo.dist-query.join.task-volume-mb:
+
 """"""""""""""""""""""""""""""""""""""
 `tajo.dist-query.join.task-volume-mb`
 """"""""""""""""""""""""""""""""""""""
@@ -85,6 +91,8 @@ As a result, it determines the degree of the parallel processing of the
join que
     <value>64</value>
   </property>
 
+.. _tajo.dist-query.join.partition-volume-mb:
+
 """""""""""""""""""""""""""""""""""""""""""
 `tajo.dist-query.join.partition-volume-mb`
 """""""""""""""""""""""""""""""""""""""""""
@@ -104,6 +112,8 @@ this value indicates the output size of each task at the first stage,
which dete
     <value>128</value>
   </property>
 
+.. _tajo.executor.join.common.in-memory-hash-threshold-mb:
+
 """"""""""""""""""""""""""""""""""""""""""""""""""""""""
 `tajo.executor.join.common.in-memory-hash-threshold-mb`
 """"""""""""""""""""""""""""""""""""""""""""""""""""""""
@@ -129,6 +139,8 @@ Otherwise, the sort-merge join is used.
   its actual size is usually much larger than the configured value, which means that too
large threshold can cause unexpected OutOfMemory errors.
   This value should be tuned carefully.
 
+.. _tajo.executor.join.inner.in-memory-hash-threshold-mb:
+
 """"""""""""""""""""""""""""""""""""""""""""""""""""""""
 `tajo.executor.join.inner.in-memory-hash-threshold-mb`
 """"""""""""""""""""""""""""""""""""""""""""""""""""""""
@@ -154,6 +166,8 @@ Otherwise, the sort-merge join is used.
   its actual size is usually much larger than the configured value, which means that too
large threshold can cause unexpected OutOfMemory errors.
   This value should be tuned carefully.
 
+.. _tajo.executor.join.outer.in-memory-hash-threshold-mb:
+
 """"""""""""""""""""""""""""""""""""""""""""""""""""""""
 `tajo.executor.join.outer.in-memory-hash-threshold-mb`
 """"""""""""""""""""""""""""""""""""""""""""""""""""""""
@@ -179,6 +193,8 @@ Otherwise, the sort-merge join is used.
   its actual size is usually much larger than the configured value, which means that too
large threshold can cause unexpected OutOfMemory errors.
   This value should be tuned carefully.
 
+.. _tajo.executor.join.hash-table.size:
+
 """""""""""""""""""""""""""""""""""""
 `tajo.executor.join.hash-table.size`
 """""""""""""""""""""""""""""""""""""
@@ -200,6 +216,8 @@ The initial size of hash table for in-memory hash join.
 Sort Query Settings
 ======================
 
+.. _tajo.dist-query.sort.task-volume-mb:
+
 """"""""""""""""""""""""""""""""""""""
 `tajo.dist-query.sort.task-volume-mb`
 """"""""""""""""""""""""""""""""""""""
@@ -219,6 +237,8 @@ As a result, it determines the degree of the parallel processing of the
sort que
     <value>64</value>
   </property>
 
+.. _tajo.executor.external-sort.buffer-mb:
+
 """"""""""""""""""""""""""""""""""""""""
 `tajo.executor.external-sort.buffer-mb`
 """"""""""""""""""""""""""""""""""""""""
@@ -237,6 +257,8 @@ A threshold to choose the sort algorithm. If the input data is larger
than this
     <value>200</value>
   </property>
 
+.. _tajo.executor.sort.list.size:
+
 """"""""""""""""""""""""""""""""""""""
 `tajo.executor.sort.list.size`
 """"""""""""""""""""""""""""""""""""""
@@ -258,6 +280,8 @@ The initial size of list for in-memory sort.
 Group by Query Settings
 =========================
 
+.. _tajo.dist-query.groupby.multi-level-aggr:
+
 """"""""""""""""""""""""""""""""""""""""""""
 `tajo.dist-query.groupby.multi-level-aggr`
 """"""""""""""""""""""""""""""""""""""""""""
@@ -276,6 +300,8 @@ Otherwise, 2-phase aggregation algorithm is used.
     <value>true</value>
   </property>
 
+.. _tajo.dist-query.groupby.partition-volume-mb:
+
 """"""""""""""""""""""""""""""""""""""""""""""
 `tajo.dist-query.groupby.partition-volume-mb`
 """"""""""""""""""""""""""""""""""""""""""""""
@@ -295,6 +321,8 @@ this value indicates the output size of each task at the first stage,
which dete
     <value>256</value>
   </property>
 
+.. _tajo.dist-query.groupby.task-volume-mb:
+
 """"""""""""""""""""""""""""""""""""""""""""""
 `tajo.dist-query.groupby.task-volume-mb`
 """"""""""""""""""""""""""""""""""""""""""""""
@@ -314,6 +342,8 @@ As a result, it determines the degree of the parallel processing of the
aggregat
     <value>64</value>
   </property>
 
+.. _tajo.executor.groupby.in-memory-hash-threshold-mb:
+
 """"""""""""""""""""""""""""""""""""""""""""""""""""""""
 `tajo.executor.groupby.in-memory-hash-threshold-mb`
 """"""""""""""""""""""""""""""""""""""""""""""""""""""""
@@ -339,6 +369,8 @@ Otherwise, the sort-based aggregation is used.
   its actual size is usually much larger than the configured value, which means that too
large threshold can cause unexpected OutOfMemory errors.
   This value should be tuned carefully.
 
+.. _tajo.executor.aggregate.hash-table.size:
+
 """"""""""""""""""""""""""""""""""""""""""
 `tajo.executor.aggregate.hash-table.size`
 """"""""""""""""""""""""""""""""""""""""""
@@ -360,6 +392,8 @@ The initial size of hash table for in-memory aggregation.
 Date/Time Settings
 ======================
 
+.. _tajo.timezone:
+
 """""""""""""""""""
 `tajo.timezone`
 """""""""""""""""""
@@ -377,6 +411,8 @@ Refer to :doc:`/time_zone`.
     <value>GMT+9</value>
   </property>
 
+.. _tajo.datetime.date-order:
+
 """""""""""""""""""""""""""
 `tajo.datetime.date-order`
 """""""""""""""""""""""""""
@@ -398,6 +434,8 @@ Date order specification.
 Table partitions
 ======================
 
+.. _tajo.partition.overwrite.even-if-no-result:
+
 """"""""""""""""""""""""""""""""""""""""""""""""""""
 `tajo.partition.overwrite.even-if-no-result`
 """"""""""""""""""""""""""""""""""""""""""""""""""""
@@ -415,6 +453,8 @@ If this value is true, a partitioned table is overwritten even if a subquery
lea
     <value>false</value>
   </property>
 
+.. _tajo.dist-query.table-partition.task-volume-mb:
+
 """"""""""""""""""""""""""""""""""""""""""""""""""""
 `tajo.dist-query.table-partition.task-volume-mb`
 """"""""""""""""""""""""""""""""""""""""""""""""""""
@@ -438,6 +478,8 @@ This value indicates the output size of a task of the former stage, which
determ
 Arithmetic Settings
 ======================
 
+.. _tajo.behavior.arithmetic-abort:
+
 """"""""""""""""""""""""""""""""""""""""""""""""""""
 `tajo.behavior.arithmetic-abort`
 """"""""""""""""""""""""""""""""""""""""""""""""""""

http://git-wip-us.apache.org/repos/asf/tajo/blob/e52c3308/tajo-docs/src/main/sphinx/tsql/variables.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/tsql/variables.rst b/tajo-docs/src/main/sphinx/tsql/variables.rst
index 2e5249e..3f787e2 100644
--- a/tajo-docs/src/main/sphinx/tsql/variables.rst
+++ b/tajo-docs/src/main/sphinx/tsql/variables.rst
@@ -2,8 +2,10 @@
 Session Variables
 *********************************
 
+Once a tajo client connects to the Tajo master, it assigns a unique session. This session
is kept until the client is disconnected or it is expired.
 
-Each client connection to TajoMaster creates a unique session, and the client and TajoMaster
uses the session until disconnect. A session provides session variables which are used for
various configs per session.
+For the sake of more convenient user configuration, Tajo provides `session variables`.
+With session variables, different configurations are allowed for each session.
 
 ``tsql`` provides the meta command ``\set`` to manipulate session variables. Just ``\set``
command shows all session variables. ::
 
@@ -28,35 +30,477 @@ Each client connection to TajoMaster creates a unique session, and the
client an
 Also, ``\unset key`` will unset the session variable named *key*.
 
 
-Now, tajo provides the following session variables.
-
-* ``DIST_QUERY_BROADCAST_JOIN_THRESHOLD``
-* ``DIST_QUERY_JOIN_TASK_VOLUME``
-* ``DIST_QUERY_SORT_TASK_VOLUME``
-* ``DIST_QUERY_GROUPBY_TASK_VOLUME``
-* ``DIST_QUERY_JOIN_PARTITION_VOLUME``
-* ``DIST_QUERY_GROUPBY_PARTITION_VOLUME``
-* ``DIST_QUERY_TABLE_PARTITION_VOLUME``
-* ``EXECUTOR_EXTERNAL_SORT_BUFFER_SIZE``
-* ``EXECUTOR_HASH_JOIN_SIZE_THRESHOLD``
-* ``EXECUTOR_INNER_HASH_JOIN_SIZE_THRESHOLD``
-* ``EXECUTOR_OUTER_HASH_JOIN_SIZE_THRESHOLD``
-* ``EXECUTOR_GROUPBY_INMEMORY_HASH_THRESHOLD``
-* ``MAX_OUTPUT_FILE_SIZE``
-* ``CODEGEN``
-* ``CLIENT_SESSION_EXPIRY_TIME``
-* ``CLI_MAX_COLUMN``
-* ``CLI_NULL_CHAR``
-* ``CLI_PRINT_PAUSE_NUM_RECORDS``
-* ``CLI_PRINT_PAUSE``
-* ``CLI_PRINT_ERROR_TRACE``
-* ``CLI_OUTPUT_FORMATTER_CLASS``
-* ``CLI_ERROR_STOP``
-* ``TIMEZONE``
-* ``DATE_ORDER``
-* ``TEXT_NULL``
-* ``DEBUG_ENABLED``
-* ``BEHAVIOR_ARITHMETIC_ABORT``
-* ``RESULT_SET_FETCH_ROWNUM``
+Currently, tajo provides the following session variables.
+
+.. describe:: BROADCAST_NON_CROSS_JOIN_THRESHOLD
+
+A threshold for non-cross joins. When a non-cross join query is executed with the broadcast
join, the whole size of broadcasted tables won't exceed this threshold.
+
+  * Configuration name: :ref:`tajo.dist-query.broadcast.non-cross-join.threshold-kb`
+  * Property value: Integer
+  * Unit: KB
+  * Default value: 5120
+  * Example
+
+.. code-block:: sh
+
+  \set BROADCAST_NON_CROSS_JOIN_THRESHOLD 5120
+
+.. describe:: BROADCAST_CROSS_JOIN_THRESHOLD
+
+A threshold for cross joins. When a cross join query is executed, the whole size of broadcasted
tables won't exceed this threshold.
+
+  * Configuration name: :ref:`tajo.dist-query.broadcast.cross-join.threshold-kb`
+  * Property value: Integer
+  * Unit: KB
+  * Default value: 1024
+  * Example
+
+.. code-block:: sh
+
+  \set BROADCAST_CROSS_JOIN_THRESHOLD 1024
+
+.. warning::
+  In Tajo, the broadcast join is only the way to perform cross joins. Since the cross join
is a very expensive operation, this value need to be tuned carefully.
+
+.. describe:: JOIN_TASK_INPUT_SIZE
+
+The repartition join is executed in two stages. When a join query is executed with the repartition
join, this value indicates the amount of input data processed by each task at the second stage.
+As a result, it determines the degree of the parallel processing of the join query.
+
+  * Configuration name: :ref:`tajo.dist-query.join.task-volume-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 64
+  * Example
+
+.. code-block:: sh
+
+  \set JOIN_TASK_INPUT_SIZE 64
+
+.. describe:: JOIN_PER_SHUFFLE_SIZE
+
+The repartition join is executed in two stages. When a join query is executed with the repartition
join,
+this value indicates the output size of each task at the first stage, which determines the
number of partitions to be shuffled between two stages.
+
+  * Configuration name: :ref:`tajo.dist-query.join.partition-volume-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 128
+  * Example
+
+.. code-block:: sh
+
+  \set JOIN_PER_SHUFFLE_SIZE 128
+
+.. describe:: HASH_JOIN_SIZE_LIMIT
+
+This value provides the criterion to decide the algorithm to perform a join in a task.
+If the input data is smaller than this value, join is performed with the in-memory hash join.
+Otherwise, the sort-merge join is used.
+
+  * Configuration name: :ref:`tajo.executor.join.common.in-memory-hash-threshold-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 64
+  * Example
+
+.. code-block:: sh
+
+  \set HASH_JOIN_SIZE_LIMIT 64
+
+.. warning::
+  This value is the size of the input stored on file systems. So, when the input data is
loaded into JVM heap,
+  its actual size is usually much larger than the configured value, which means that too
large threshold can cause unexpected OutOfMemory errors.
+  This value should be tuned carefully.
+
+.. describe:: INNER_HASH_JOIN_SIZE_LIMIT
+
+This value provides the criterion to decide the algorithm to perform an inner join in a task.
+If the input data is smaller than this value, the inner join is performed with the in-memory
hash join.
+Otherwise, the sort-merge join is used.
+
+  * Configuration name: :ref:`tajo.executor.join.inner.in-memory-hash-threshold-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 64
+  * Example
+
+.. code-block:: sh
+
+  \set INNER_HASH_JOIN_SIZE_LIMIT 64
+
+.. warning::
+  This value is the size of the input stored on file systems. So, when the input data is
loaded into JVM heap,
+  its actual size is usually much larger than the configured value, which means that too
large threshold can cause unexpected OutOfMemory errors.
+  This value should be tuned carefully.
+
+.. describe:: OUTER_HASH_JOIN_SIZE_LIMIT
+
+This value provides the criterion to decide the algorithm to perform an outer join in a task.
+If the input data is smaller than this value, the outer join is performed with the in-memory
hash join.
+Otherwise, the sort-merge join is used.
+
+  * Configuration name: :ref:`tajo.executor.join.outer.in-memory-hash-threshold-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 64
+  * Example
+
+.. code-block:: sh
+
+  \set OUTER_HASH_JOIN_SIZE_LIMIT 64
+
+.. warning::
+  This value is the size of the input stored on file systems. So, when the input data is
loaded into JVM heap,
+  its actual size is usually much larger than the configured value, which means that too
large threshold can cause unexpected OutOfMemory errors.
+  This value should be tuned carefully.
+
+.. describe:: JOIN_HASH_TABLE_SIZE
+
+The initial size of hash table for in-memory hash join.
+
+  * Configuration name: :ref:`tajo.executor.join.hash-table.size`
+  * Property value: Integer
+  * Default value: 100000
+  * Example
+
+.. code-block:: sh
+
+  \set JOIN_HASH_TABLE_SIZE 100000
+
+.. describe:: SORT_TASK_INPUT_SIZE
+
+The sort operation is executed in two stages. When a sort query is executed, this value indicates
the amount of input data processed by each task at the second stage.
+As a result, it determines the degree of the parallel processing of the sort query.
+
+  * Configuration name: :ref:`tajo.dist-query.sort.task-volume-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 64
+  * Example
+
+.. code-block:: sh
+
+  \set SORT_TASK_INPUT_SIZE 64
+
+.. describe:: EXTSORT_BUFFER_SIZE
+
+A threshold to choose the sort algorithm. If the input data is larger than this threshold,
the external sort algorithm is used.
+
+  * Configuration name: :ref:`tajo.executor.external-sort.buffer-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 200
+  * Example
+
+.. code-block:: sh
+
+  \set EXTSORT_BUFFER_SIZE 200
+
+.. describe:: SORT_LIST_SIZE
+
+The initial size of list for in-memory sort.
+
+  * Configuration name: :ref:`tajo.executor.sort.list.size`
+  * Property value: Integer
+  * Default value: 100000
+  * Example
+
+.. code-block:: sh
+
+  \set SORT_LIST_SIZE 100000
+
+.. describe:: GROUPBY_MULTI_LEVEL_ENABLED
+
+A flag to enable the multi-level algorithm for distinct aggregation. If this value is set,
3-phase aggregation algorithm is used.
+Otherwise, 2-phase aggregation algorithm is used.
+
+  * Configuration name: :ref:`tajo.dist-query.groupby.multi-level-aggr`
+  * Property value: Boolean
+  * Default value: true
+  * Example
+
+.. code-block:: sh
+
+  \set GROUPBY_MULTI_LEVEL_ENABLED true
+
+.. describe:: GROUPBY_PER_SHUFFLE_SIZE
+
+The aggregation is executed in two stages. When an aggregation query is executed,
+this value indicates the output size of each task at the first stage, which determines the
number of partitions to be shuffled between two stages.
+
+  * Configuration name: :ref:`tajo.dist-query.groupby.partition-volume-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 256
+  * Example
+
+.. code-block:: sh
+
+  \set GROUPBY_PER_SHUFFLE_SIZE 256
+
+.. describe:: GROUPBY_TASK_INPUT_SIZE
+
+The aggregation operation is executed in two stages. When an aggregation query is executed,
this value indicates the amount of input data processed by each task at the second stage.
+As a result, it determines the degree of the parallel processing of the aggregation query.
+
+  * Configuration name: :ref:`tajo.dist-query.groupby.task-volume-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 64
+  * Example
+
+.. code-block:: sh
+
+  \set GROUPBY_TASK_INPUT_SIZE 64
+
+.. describe:: HASH_GROUPBY_SIZE_LIMIT
+
+This value provides the criterion to decide the algorithm to perform an aggregation in a
task.
+If the input data is smaller than this value, the aggregation is performed with the in-memory
hash aggregation.
+Otherwise, the sort-based aggregation is used.
+
+  * Configuration name: :ref:`tajo.executor.groupby.in-memory-hash-threshold-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 64
+  * Example
+
+.. code-block:: sh
+
+  \set HASH_GROUPBY_SIZE_LIMIT 64
+
+.. warning::
+  This value is the size of the input stored on file systems. So, when the input data is
loaded into JVM heap,
+  its actual size is usually much larger than the configured value, which means that too
large threshold can cause unexpected OutOfMemory errors.
+  This value should be tuned carefully.
+
+.. describe:: AGG_HASH_TABLE_SIZE
+
+The initial size of hash table for in-memory aggregation.
+
+  * Configuration name: :ref:`tajo.executor.aggregate.hash-table.size`
+  * Property value: Integer
+  * Default value: 10000
+  * Example
+
+.. code-block:: sh
+
+  \set AGG_HASH_TABLE_SIZE 10000
+
+.. describe:: TIMEZONE
+
+Refer to :doc:`/time_zone`.
+
+  * Configuration name: :ref:`tajo.timezone`
+  * Property value: Time zone id
+  * Default value: Default time zone of JVM
+  * Example
+
+.. code-block:: sh
+
+  \set TIMEZONE GMT+9
+
+.. describe:: DATE_ORDER
+
+Date order specification.
+
+  * Configuration name: :ref:`tajo.datetime.date-order`
+  * Property value: One of YMD, DMY, MDY.
+  * Default value: YMD
+  * Example
+
+.. code-block:: sh
+
+  \set DATE_ORDER YMD
+
+.. describe:: PARTITION_NO_RESULT_OVERWRITE_ENABLED
+
+If this value is true, a partitioned table is overwritten even if a subquery leads to no
result. Otherwise, the table data will be kept if there is no result.
+
+  * Configuration name: :ref:`tajo.partition.overwrite.even-if-no-result`
+  * Property value: Boolean
+  * Default value: false
+  * Example
+
+.. code-block:: sh
+
+  \set PARTITION_NO_RESULT_OVERWRITE_ENABLED false
+
+.. describe:: TABLE_PARTITION_PER_SHUFFLE_SIZE
+
+In Tajo, storing a partition table is executed in two stages.
+This value indicates the output size of a task of the former stage, which determines the
number of partitions to be shuffled between two stages.
+
+  * Configuration name: :ref:`tajo.dist-query.table-partition.task-volume-mb`
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 256
+  * Example
+
+.. code-block:: sh
+
+  \set TABLE_PARTITION_PER_SHUFFLE_SIZE 256
+
+.. describe:: ARITHABORT
+
+A flag to indicate how to handle the errors caused by invalid arithmetic operations. If true,
a running query will be terminated with an overflow or a divide-by-zero.
+
+  * Configuration name: :ref:`tajo.behavior.arithmetic-abort`
+  * Property value: Boolean
+  * Default value: false
+  * Example
+
+.. code-block:: sh
+
+  \set ARITHABORT false
+
+.. describe:: MAX_OUTPUT_FILE_SIZE
+
+Maximum per-output file size. 0 means infinite.
+
+  * Property value: Integer
+  * Unit: MB
+  * Default value: 0
+  * Example
+
+.. code-block:: sh
+
+  \set MAX_OUTPUT_FILE_SIZE 0
+
+.. describe:: SESSION_EXPIRY_TIME
+
+Session expiry time.
+
+  * Property value: Integer
+  * Unit: seconds
+  * Default value: 3600
+  * Example
+
+.. code-block:: sh
+
+  \set SESSION_EXPIRY_TIME 3600
+
+.. describe:: CLI_COLUMNS
+
+Sets the width for the wrapped format.
+
+  * Property value: Integer
+  * Default value: 120
+  * Example
+
+.. code-block:: sh
+
+  \set CLI_COLUMNS 120
+
+.. describe:: CLI_NULL_CHAR
+
+Sets the string to be printed in place of a null value.
+
+  * Property value: String
+  * Default value: ''
+  * Example
+
+.. code-block:: sh
+
+  \set CLI_NULL_CHAR ''
+
+.. describe:: CLI_PAGE_ROWS
+
+Sets the number of rows for paging.
+
+  * Property value: Integer
+  * Default value: 100
+  * Example
+
+.. code-block:: sh
+
+  \set CLI_PAGE_ROWS 100
+
+.. describe:: CLI_PAGING_ENABLED
+
+Enable paging of result display.
+
+  * Property value: Boolean
+  * Default value: true
+  * Example
+
+.. code-block:: sh
+
+  \set CLI_PAGING_ENABLED true
+
+.. describe:: CLI_DISPLAY_ERROR_TRACE
+
+Enable display of error trace.
+
+  * Property value: Boolean
+  * Default value: true
+  * Example
+
+.. code-block:: sh
+
+  \set CLI_DISPLAY_ERROR_TRACE true
+
+.. describe:: CLI_FORMATTER_CLASS
+
+Sets the output format class to display results.
+
+  * Property value: Class name
+  * Default value: org.apache.tajo.cli.tsql.DefaultTajoCliOutputFormatter
+  * Example
+
+.. code-block:: sh
+
+  \set CLI_FORMATTER_CLASS org.apache.tajo.cli.tsql.DefaultTajoCliOutputFormatter
+
+.. describe:: ON_ERROR_STOP
+
+tsql will exit if an error occurs.
+
+  * Property value: Boolean
+  * Default value: false
+  * Example
+
+.. code-block:: sh
+
+  \set ON_ERROR_STOP false
+
+.. describe:: NULL_CHAR
+
+Null char of text file output. This value is used when the table property `text.null` is
not specified.
+
+  * Property value: String
+  * Default value: '\\N'
+  * Example
+
+.. code-block:: sh
+
+  \set NULL_CHAR '\\N'
+
+.. describe:: DEBUG_ENABLED
+
+A flag to enable debug mode.
+
+  * Property value: Boolean
+  * Default value: false
+  * Example
+
+.. code-block:: sh
+
+  \set DEBUG_ENABLED false
+
+.. describe:: FETCH_ROWNUM
+
+The number of rows to be fetched from Master each time.
+
+  * Property value: Integer
+  * Default value: 200
+  * Example
+
+.. code-block:: sh
+
+  \set FETCH_ROWNUM 200
+
 
 


Mime
View raw message