tajo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jihoon...@apache.org
Subject tajo git commit: TAJO-1857: Rename the section of 'File Formats' to 'Data Formats' and fill compression section of the 'Table Management' chapter.
Date Thu, 26 Nov 2015 01:32:34 GMT
Repository: tajo
Updated Branches:
  refs/heads/branch-0.11.1 346b08d21 -> c91bfdabc


TAJO-1857: Rename the section of 'File Formats' to 'Data Formats' and fill compression section
of the 'Table Management' chapter.

Closes #870

Signed-off-by: Jihoon Son <jihoonson@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/c91bfdab
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/c91bfdab
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/c91bfdab

Branch: refs/heads/branch-0.11.1
Commit: c91bfdabc79d719c5f52b83b311488ddcf468f29
Parents: 346b08d
Author: Jongyoung Park <eminency@gmail.com>
Authored: Thu Nov 26 10:32:03 2015 +0900
Committer: Jihoon Son <jihoonson@apache.org>
Committed: Thu Nov 26 10:32:03 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |  4 ++++
 .../java/org/apache/tajo/conf/TajoConf.java     |  2 +-
 .../planner/physical/PhysicalPlanUtil.java      |  6 ++---
 tajo-docs/src/main/sphinx/table_management.rst  |  8 +++----
 .../sphinx/table_management/compression.rst     | 24 +++++++++++++++++---
 .../sphinx/table_management/data_formats.rst    | 15 ++++++++++++
 .../sphinx/table_management/file_formats.rst    | 15 ------------
 .../sphinx/table_management/table_overview.rst  | 17 +++++++++-----
 .../sphinx/table_management/tablespaces.rst     |  4 ++--
 .../apache/tajo/storage/StorageProperty.java    |  6 ++---
 10 files changed, 64 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/c91bfdab/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 3ceb249..b2ae4b7 100644
--- a/CHANGES
+++ b/CHANGES
@@ -43,6 +43,10 @@ Release 0.11.1 - unreleased
 
   TASKS
 
+    TAJO-1857: Rename the section of 'File Formats' to 'Data Formats' and fill 
+    compression section of the 'Table Management' chapter.
+    (Contributed by Jongyoung Park. Committed by jihoon)
+
     TAJO-1989: Upgrade Maven Surefire Plugin to 2.19. (jinho)
 
     TAJO-1975: Gathering fine-grained column statistics for range shuffle. (jihoon)

http://git-wip-us.apache.org/repos/asf/tajo/blob/c91bfdab/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
index 097d689..2e4d270 100644
--- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
+++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
@@ -368,7 +368,7 @@ public class TajoConf extends Configuration {
     $TIMEZONE("tajo.timezone", TimeZone.getDefault().getID()),
     $DATE_ORDER("tajo.datetime.date-order", "YMD"),
 
-    // FILE FORMAT
+    // null character for text file output
     $TEXT_NULL("tajo.text.null", "\\\\N"),
 
     // Only for Debug and Testing

http://git-wip-us.apache.org/repos/asf/tajo/blob/c91bfdab/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
index 1bd38d1..e27ba5a 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
@@ -203,7 +203,7 @@ public class PhysicalPlanUtil {
   }
 
   /**
-   * Set nullChar to TableMeta according to file format
+   * Set nullChar to TableMeta according to data format
    *
    * @param meta TableMeta
    * @param nullChar A character for NULL representation
@@ -220,10 +220,10 @@ public class PhysicalPlanUtil {
   }
 
   /**
-   * Check if TableMeta contains NULL char property according to file format
+   * Check if TableMeta contains NULL char property according to data format
    *
    * @param meta Table Meta
-   * @return True if TableMeta contains NULL char property according to file format
+   * @return True if TableMeta contains NULL char property according to data format
    */
   public static boolean containsNullChar(TableMeta meta) {
     String dataFormat = meta.getDataFormat();

http://git-wip-us.apache.org/repos/asf/tajo/blob/c91bfdab/tajo-docs/src/main/sphinx/table_management.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management.rst b/tajo-docs/src/main/sphinx/table_management.rst
index 5a4693e..dd7b9ad 100644
--- a/tajo-docs/src/main/sphinx/table_management.rst
+++ b/tajo-docs/src/main/sphinx/table_management.rst
@@ -1,6 +1,6 @@
-******************
+****************
 Table Management
-******************
+****************
 
 In Tajo, a table is a logical view of one data sources. Logically, one table consists of
a logical schema, partitions, URL, and various properties. Physically, A table can be a directory
in HDFS, a single file, one HBase table, or a RDBMS table. In order to make good use of Tajo,
users need to understand features and physical characteristics of their physical layout. This
section explains all about table management.
 
@@ -9,5 +9,5 @@ In Tajo, a table is a logical view of one data sources. Logically, one table
con
 
     table_management/table_overview
     table_management/tablespaces
-    table_management/file_formats
-    table_management/compression
\ No newline at end of file
+    table_management/data_formats
+    table_management/compression

http://git-wip-us.apache.org/repos/asf/tajo/blob/c91bfdab/tajo-docs/src/main/sphinx/table_management/compression.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management/compression.rst b/tajo-docs/src/main/sphinx/table_management/compression.rst
index 3d03ba8..67dd255 100644
--- a/tajo-docs/src/main/sphinx/table_management/compression.rst
+++ b/tajo-docs/src/main/sphinx/table_management/compression.rst
@@ -1,5 +1,23 @@
-*********************************
+***********
 Compression
-*********************************
+***********
 
-.. todo::
\ No newline at end of file
+Using compression can make data size compact, thereby enabling efficient use of network bandwidth
and storage. Most of Tajo data formats support data compression feature.
+Currently, compression configuration affects only for stored data format and it is specified
when a table is created as table meta information(See `Create Table <../sql_language/ddl.html#create-table>`_).
+Compression for intermidate data or others is not supported now.
+
+===========================================
+Compression Properties for each Data Format
+===========================================
+
+ .. csv-table:: Compression Properties
+
+  **Data Format**,**Property Name**,**Avaliable Values**
+  :doc:`text</table_management/text>`/:doc:`json</table_management/json>`/:doc:`rcfile</table_management/rcfile>`/:doc:`sequencefile</table_management/sequencefile>`
[#f1]_,compression.codec,Fully Qualified Classname in Hadoop [#f2]_
+  :doc:`parquet</table_management/parquet>`,parquet.compression,uncompressed/snappy/gzip/lzo
+  :doc:`orc</table_management/orc>`,orc.compression.kind,none/snappy/zlib
+
+.. rubric:: Footnotes
+
+.. [#f1] For sequence file, you should specify 'compression.type' in addition to 'compression.codec'.
Refer to :doc:`/table_management/sequencefile`.
+.. [#f2] All classes are available if they implement `org.apache.hadoop.io.compress.CompressionCodec
<https://hadoop.apache.org/docs/current/api/org/apache/hadoop/io/compress/CompressionCodec.html>`_.

http://git-wip-us.apache.org/repos/asf/tajo/blob/c91bfdab/tajo-docs/src/main/sphinx/table_management/data_formats.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management/data_formats.rst b/tajo-docs/src/main/sphinx/table_management/data_formats.rst
new file mode 100644
index 0000000..b17e3f3
--- /dev/null
+++ b/tajo-docs/src/main/sphinx/table_management/data_formats.rst
@@ -0,0 +1,15 @@
+************
+Data Formats
+************
+
+Currently, Tajo provides following data formats:
+
+.. toctree::
+    :maxdepth: 1
+
+    text
+    json
+    rcfile
+    parquet
+    orc
+    sequencefile
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/c91bfdab/tajo-docs/src/main/sphinx/table_management/file_formats.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management/file_formats.rst b/tajo-docs/src/main/sphinx/table_management/file_formats.rst
deleted file mode 100644
index 966903c..0000000
--- a/tajo-docs/src/main/sphinx/table_management/file_formats.rst
+++ /dev/null
@@ -1,15 +0,0 @@
-*************************************
-File Formats
-*************************************
-
-Currently, Tajo provides following file formats:
-
-.. toctree::
-    :maxdepth: 1
-
-    text
-    json
-    rcfile
-    parquet
-    orc
-    sequencefile
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/c91bfdab/tajo-docs/src/main/sphinx/table_management/table_overview.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management/table_overview.rst b/tajo-docs/src/main/sphinx/table_management/table_overview.rst
index d030b4a..7ecf53b 100644
--- a/tajo-docs/src/main/sphinx/table_management/table_overview.rst
+++ b/tajo-docs/src/main/sphinx/table_management/table_overview.rst
@@ -1,10 +1,12 @@
-*************************************
+***********************
 Overview of Tajo Tables
-*************************************
+***********************
 
+========
 Overview
 ========
 
+===========
 Tablespaces
 ===========
 
@@ -12,8 +14,9 @@ Tablespaces is a physical location where files or data objects representing
data
 
 Please refer to :doc:`/table_management/tablespaces` if you want to know more information
about tablespaces.
 
+=============
 Managed Table
-================
+=============
 
 ``CREATE TABLE`` statement lets you create a table located in the warehouse directory specified
by the configuration property ``tajo.warehouse.directory`` or ``${tajo.root}/warehouse`` by
default. For example:
 
@@ -26,8 +29,9 @@ Managed Table
  );
 
 
+==============
 External Table
-================
+==============
 
 ``CREATE EXTERNAL TABLE`` statement lets you create a table located in a specify location
so that Tajo does not use a default data warehouse location for the table. External tables
are in common used if you already have data generated. LOCATION clause must be required for
an external table. 
 
@@ -65,7 +69,7 @@ The following example is to set a custom field delimiter, NULL character,
and co
                    'text.null'='\\N',
                    'compression.codec'='org.apache.hadoop.io.compress.SnappyCodec');
 
-Each physical table layout has its own specialized properties. They will be addressed in
:doc:`/table_management/file_formats`.
+Each physical table layout has its own specialized properties. They will be addressed in
:doc:`/table_management/data_formats`.
 
 
 Common Table Properties
@@ -75,7 +79,8 @@ There are some common table properties which are used in most tables.
 
 Compression
 -----------
-.. todo::
+
+See :doc:`compression`.
 
 Time zone
 ---------

http://git-wip-us.apache.org/repos/asf/tajo/blob/c91bfdab/tajo-docs/src/main/sphinx/table_management/tablespaces.rst
----------------------------------------------------------------------
diff --git a/tajo-docs/src/main/sphinx/table_management/tablespaces.rst b/tajo-docs/src/main/sphinx/table_management/tablespaces.rst
index 79ea65f..9316154 100644
--- a/tajo-docs/src/main/sphinx/table_management/tablespaces.rst
+++ b/tajo-docs/src/main/sphinx/table_management/tablespaces.rst
@@ -1,6 +1,6 @@
-*************************************
+***********
 Tablespaces
-*************************************
+***********
 
 Tablespaces in Tajo allow users to define locations in the storage system where the files
or data objects representing database objects can be stored. Once defined, a tablespace can
be referred to by name when creating a database or a table. Especially, it is very useful
when a Tajo cluster instance should use heterogeneous storage systems such as HDFS, MySQL,
and Oracle.
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/c91bfdab/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java
b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java
index 41ecd38..c3dc3e9 100644
--- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java
+++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageProperty.java
@@ -22,7 +22,7 @@ package org.apache.tajo.storage;
  * Storage Properties
  */
 public class StorageProperty {
-  /** default file format */
+  /** default data format */
   private final String defaultFormat;
   /** if this storage supports move operator */
   private final boolean movable;
@@ -47,8 +47,8 @@ public class StorageProperty {
   }
 
   /**
-   * Return default file format
-   * @return Default file format
+   * Return default data format
+   * @return Default data format
    */
   public String defaultFormat() {
     return defaultFormat;


Mime
View raw message