drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From krish...@apache.org
Subject [03/11] drill git commit: 1.4 partition pruning
Date Mon, 14 Dec 2015 23:48:54 GMT
1.4 partition pruning


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/7b62a919
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/7b62a919
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/7b62a919

Branch: refs/heads/gh-pages
Commit: 7b62a919ed5c08c4a1682de768b30b886464dece
Parents: 965bfbf
Author: Kris Hahn <krishahn@apache.org>
Authored: Mon Dec 14 10:31:43 2015 -0800
Committer: Kris Hahn <krishahn@apache.org>
Committed: Mon Dec 14 15:46:37 2015 -0800

----------------------------------------------------------------------
 _data/docs.json                                 | 356 +++++++++++++++++--
 .../020-migrating-partitioned-data.md           |   2 +-
 .../partition-pruning/030-partition-pruning.md  | 111 ------
 .../030-using-partition-pruning.md              | 111 ++++++
 4 files changed, 443 insertions(+), 137 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/7b62a919/_data/docs.json
----------------------------------------------------------------------
diff --git a/_data/docs.json b/_data/docs.json
index 860cda2..24babff 100644
--- a/_data/docs.json
+++ b/_data/docs.json
@@ -371,12 +371,29 @@
             "next_title": "Apache Drill 1.2.0 Release Notes", 
             "next_url": "/docs/apache-drill-1-2-0-release-notes/", 
             "parent": "Release Notes", 
-            "previous_title": "Release Notes", 
-            "previous_url": "/docs/release-notes/", 
+            "previous_title": "Apache Drill 1.4.0 Release Notes", 
+            "previous_url": "/docs/apache-drill-1-4-0-release-notes/", 
             "relative_path": "_docs/rn/008-1.3.0-rn.md", 
             "title": "Apache Drill 1.3.0 Release Notes", 
             "url": "/docs/apache-drill-1-3-0-release-notes/"
         }, 
+        "Apache Drill 1.4.0 Release Notes": {
+            "breadcrumbs": [
+                {
+                    "title": "Release Notes", 
+                    "url": "/docs/release-notes/"
+                }
+            ], 
+            "children": [], 
+            "next_title": "Apache Drill 1.3.0 Release Notes", 
+            "next_url": "/docs/apache-drill-1-3-0-release-notes/", 
+            "parent": "Release Notes", 
+            "previous_title": "Release Notes", 
+            "previous_url": "/docs/release-notes/", 
+            "relative_path": "_docs/rn/007-1.4.0-rn.md", 
+            "title": "Apache Drill 1.4.0 Release Notes", 
+            "url": "/docs/apache-drill-1-4-0-release-notes/"
+        }, 
         "Apache Drill Contribution Guidelines": {
             "breadcrumbs": [
                 {
@@ -4847,6 +4864,27 @@
             "title": "Math and Trig", 
             "url": "/docs/math-and-trig/"
         }, 
+        "Migrating Partitioned Data": {
+            "breadcrumbs": [
+                {
+                    "title": "Partition Pruning", 
+                    "url": "/docs/partition-pruning/"
+                }, 
+                {
+                    "title": "Performance Tuning", 
+                    "url": "/docs/performance-tuning/"
+                }
+            ], 
+            "children": [], 
+            "next_title": "Using Partition Pruning", 
+            "next_url": "/docs/using-partition-pruning/", 
+            "parent": "Partition Pruning", 
+            "previous_title": "Partition Pruning Introduction", 
+            "previous_url": "/docs/partition-pruning-introduction/", 
+            "relative_path": "_docs/performance-tuning/partition-pruning/020-migrating-partitioned-data.md",

+            "title": "Migrating Partitioned Data", 
+            "url": "/docs/migrating-partitioned-data/"
+        }, 
         "Modify logback.xml": {
             "breadcrumbs": [
                 {
@@ -5731,8 +5769,8 @@
             "next_title": "Choosing a Storage Format", 
             "next_url": "/docs/choosing-a-storage-format/", 
             "parent": "Performance Tuning", 
-            "previous_title": "Partition Pruning", 
-            "previous_url": "/docs/partition-pruning/", 
+            "previous_title": "Using Partition Pruning", 
+            "previous_url": "/docs/using-partition-pruning/", 
             "relative_path": "_docs/performance-tuning/025-optimizing-parquet-reading.md",

             "title": "Optimizing Parquet Metadata Reading", 
             "url": "/docs/optimizing-parquet-metadata-reading/"
@@ -5782,9 +5820,73 @@
                     "url": "/docs/performance-tuning/"
                 }
             ], 
-            "children": [], 
-            "next_title": "Optimizing Parquet Metadata Reading", 
-            "next_url": "/docs/optimizing-parquet-metadata-reading/", 
+            "children": [
+                {
+                    "breadcrumbs": [
+                        {
+                            "title": "Partition Pruning", 
+                            "url": "/docs/partition-pruning/"
+                        }, 
+                        {
+                            "title": "Performance Tuning", 
+                            "url": "/docs/performance-tuning/"
+                        }
+                    ], 
+                    "children": [], 
+                    "next_title": "Migrating Partitioned Data", 
+                    "next_url": "/docs/migrating-partitioned-data/", 
+                    "parent": "Partition Pruning", 
+                    "previous_title": "Partition Pruning", 
+                    "previous_url": "/docs/partition-pruning/", 
+                    "relative_path": "_docs/performance-tuning/partition-pruning/010-partition-pruning-introduction.md",

+                    "title": "Partition Pruning Introduction", 
+                    "url": "/docs/partition-pruning-introduction/"
+                }, 
+                {
+                    "breadcrumbs": [
+                        {
+                            "title": "Partition Pruning", 
+                            "url": "/docs/partition-pruning/"
+                        }, 
+                        {
+                            "title": "Performance Tuning", 
+                            "url": "/docs/performance-tuning/"
+                        }
+                    ], 
+                    "children": [], 
+                    "next_title": "Using Partition Pruning", 
+                    "next_url": "/docs/using-partition-pruning/", 
+                    "parent": "Partition Pruning", 
+                    "previous_title": "Partition Pruning Introduction", 
+                    "previous_url": "/docs/partition-pruning-introduction/", 
+                    "relative_path": "_docs/performance-tuning/partition-pruning/020-migrating-partitioned-data.md",

+                    "title": "Migrating Partitioned Data", 
+                    "url": "/docs/migrating-partitioned-data/"
+                }, 
+                {
+                    "breadcrumbs": [
+                        {
+                            "title": "Partition Pruning", 
+                            "url": "/docs/partition-pruning/"
+                        }, 
+                        {
+                            "title": "Performance Tuning", 
+                            "url": "/docs/performance-tuning/"
+                        }
+                    ], 
+                    "children": [], 
+                    "next_title": "Optimizing Parquet Metadata Reading", 
+                    "next_url": "/docs/optimizing-parquet-metadata-reading/", 
+                    "parent": "Partition Pruning", 
+                    "previous_title": "Migrating Partitioned Data", 
+                    "previous_url": "/docs/migrating-partitioned-data/", 
+                    "relative_path": "_docs/performance-tuning/partition-pruning/030-using-partition-pruning.md",

+                    "title": "Using Partition Pruning", 
+                    "url": "/docs/using-partition-pruning/"
+                }
+            ], 
+            "next_title": "Partition Pruning Introduction", 
+            "next_url": "/docs/partition-pruning-introduction/", 
             "parent": "Performance Tuning", 
             "previous_title": "Performance Tuning Introduction", 
             "previous_url": "/docs/performance-tuning-introduction/", 
@@ -5792,6 +5894,27 @@
             "title": "Partition Pruning", 
             "url": "/docs/partition-pruning/"
         }, 
+        "Partition Pruning Introduction": {
+            "breadcrumbs": [
+                {
+                    "title": "Partition Pruning", 
+                    "url": "/docs/partition-pruning/"
+                }, 
+                {
+                    "title": "Performance Tuning", 
+                    "url": "/docs/performance-tuning/"
+                }
+            ], 
+            "children": [], 
+            "next_title": "Migrating Partitioned Data", 
+            "next_url": "/docs/migrating-partitioned-data/", 
+            "parent": "Partition Pruning", 
+            "previous_title": "Partition Pruning", 
+            "previous_url": "/docs/partition-pruning/", 
+            "relative_path": "_docs/performance-tuning/partition-pruning/010-partition-pruning-introduction.md",

+            "title": "Partition Pruning Introduction", 
+            "url": "/docs/partition-pruning-introduction/"
+        }, 
         "Performance": {
             "breadcrumbs": [
                 {
@@ -5836,9 +5959,73 @@
                             "url": "/docs/performance-tuning/"
                         }
                     ], 
-                    "children": [], 
-                    "next_title": "Optimizing Parquet Metadata Reading", 
-                    "next_url": "/docs/optimizing-parquet-metadata-reading/", 
+                    "children": [
+                        {
+                            "breadcrumbs": [
+                                {
+                                    "title": "Partition Pruning", 
+                                    "url": "/docs/partition-pruning/"
+                                }, 
+                                {
+                                    "title": "Performance Tuning", 
+                                    "url": "/docs/performance-tuning/"
+                                }
+                            ], 
+                            "children": [], 
+                            "next_title": "Migrating Partitioned Data", 
+                            "next_url": "/docs/migrating-partitioned-data/", 
+                            "parent": "Partition Pruning", 
+                            "previous_title": "Partition Pruning", 
+                            "previous_url": "/docs/partition-pruning/", 
+                            "relative_path": "_docs/performance-tuning/partition-pruning/010-partition-pruning-introduction.md",

+                            "title": "Partition Pruning Introduction", 
+                            "url": "/docs/partition-pruning-introduction/"
+                        }, 
+                        {
+                            "breadcrumbs": [
+                                {
+                                    "title": "Partition Pruning", 
+                                    "url": "/docs/partition-pruning/"
+                                }, 
+                                {
+                                    "title": "Performance Tuning", 
+                                    "url": "/docs/performance-tuning/"
+                                }
+                            ], 
+                            "children": [], 
+                            "next_title": "Using Partition Pruning", 
+                            "next_url": "/docs/using-partition-pruning/", 
+                            "parent": "Partition Pruning", 
+                            "previous_title": "Partition Pruning Introduction", 
+                            "previous_url": "/docs/partition-pruning-introduction/", 
+                            "relative_path": "_docs/performance-tuning/partition-pruning/020-migrating-partitioned-data.md",

+                            "title": "Migrating Partitioned Data", 
+                            "url": "/docs/migrating-partitioned-data/"
+                        }, 
+                        {
+                            "breadcrumbs": [
+                                {
+                                    "title": "Partition Pruning", 
+                                    "url": "/docs/partition-pruning/"
+                                }, 
+                                {
+                                    "title": "Performance Tuning", 
+                                    "url": "/docs/performance-tuning/"
+                                }
+                            ], 
+                            "children": [], 
+                            "next_title": "Optimizing Parquet Metadata Reading", 
+                            "next_url": "/docs/optimizing-parquet-metadata-reading/", 
+                            "parent": "Partition Pruning", 
+                            "previous_title": "Migrating Partitioned Data", 
+                            "previous_url": "/docs/migrating-partitioned-data/", 
+                            "relative_path": "_docs/performance-tuning/partition-pruning/030-using-partition-pruning.md",

+                            "title": "Using Partition Pruning", 
+                            "url": "/docs/using-partition-pruning/"
+                        }
+                    ], 
+                    "next_title": "Partition Pruning Introduction", 
+                    "next_url": "/docs/partition-pruning-introduction/", 
                     "parent": "Performance Tuning", 
                     "previous_title": "Performance Tuning Introduction", 
                     "previous_url": "/docs/performance-tuning-introduction/", 
@@ -5857,8 +6044,8 @@
                     "next_title": "Choosing a Storage Format", 
                     "next_url": "/docs/choosing-a-storage-format/", 
                     "parent": "Performance Tuning", 
-                    "previous_title": "Partition Pruning", 
-                    "previous_url": "/docs/partition-pruning/", 
+                    "previous_title": "Using Partition Pruning", 
+                    "previous_url": "/docs/using-partition-pruning/", 
                     "relative_path": "_docs/performance-tuning/025-optimizing-parquet-reading.md",

                     "title": "Optimizing Parquet Metadata Reading", 
                     "url": "/docs/optimizing-parquet-metadata-reading/"
@@ -7742,11 +7929,28 @@
                         }
                     ], 
                     "children": [], 
-                    "next_title": "Apache Drill 1.2.0 Release Notes", 
-                    "next_url": "/docs/apache-drill-1-2-0-release-notes/", 
+                    "next_title": "Apache Drill 1.3.0 Release Notes", 
+                    "next_url": "/docs/apache-drill-1-3-0-release-notes/", 
                     "parent": "Release Notes", 
                     "previous_title": "Release Notes", 
                     "previous_url": "/docs/release-notes/", 
+                    "relative_path": "_docs/rn/007-1.4.0-rn.md", 
+                    "title": "Apache Drill 1.4.0 Release Notes", 
+                    "url": "/docs/apache-drill-1-4-0-release-notes/"
+                }, 
+                {
+                    "breadcrumbs": [
+                        {
+                            "title": "Release Notes", 
+                            "url": "/docs/release-notes/"
+                        }
+                    ], 
+                    "children": [], 
+                    "next_title": "Apache Drill 1.2.0 Release Notes", 
+                    "next_url": "/docs/apache-drill-1-2-0-release-notes/", 
+                    "parent": "Release Notes", 
+                    "previous_title": "Apache Drill 1.4.0 Release Notes", 
+                    "previous_url": "/docs/apache-drill-1-4-0-release-notes/", 
                     "relative_path": "_docs/rn/008-1.3.0-rn.md", 
                     "title": "Apache Drill 1.3.0 Release Notes", 
                     "url": "/docs/apache-drill-1-3-0-release-notes/"
@@ -7939,8 +8143,8 @@
                     "url": "/docs/apache-drill-0-4-0-release-notes/"
                 }
             ], 
-            "next_title": "Apache Drill 1.3.0 Release Notes", 
-            "next_url": "/docs/apache-drill-1-3-0-release-notes/", 
+            "next_title": "Apache Drill 1.4.0 Release Notes", 
+            "next_url": "/docs/apache-drill-1-4-0-release-notes/", 
             "parent": "", 
             "previous_title": "Value Vectors", 
             "previous_url": "/docs/value-vectors/", 
@@ -11625,6 +11829,27 @@
             "title": "Using MicroStrategy Analytics with Apache Drill", 
             "url": "/docs/using-microstrategy-analytics-with-apache-drill/"
         }, 
+        "Using Partition Pruning": {
+            "breadcrumbs": [
+                {
+                    "title": "Partition Pruning", 
+                    "url": "/docs/partition-pruning/"
+                }, 
+                {
+                    "title": "Performance Tuning", 
+                    "url": "/docs/performance-tuning/"
+                }
+            ], 
+            "children": [], 
+            "next_title": "Optimizing Parquet Metadata Reading", 
+            "next_url": "/docs/optimizing-parquet-metadata-reading/", 
+            "parent": "Partition Pruning", 
+            "previous_title": "Migrating Partitioned Data", 
+            "previous_url": "/docs/migrating-partitioned-data/", 
+            "relative_path": "_docs/performance-tuning/partition-pruning/030-using-partition-pruning.md",

+            "title": "Using Partition Pruning", 
+            "url": "/docs/using-partition-pruning/"
+        }, 
         "Using Qlik Sense with Drill": {
             "breadcrumbs": [
                 {
@@ -14091,9 +14316,73 @@
                             "url": "/docs/performance-tuning/"
                         }
                     ], 
-                    "children": [], 
-                    "next_title": "Optimizing Parquet Metadata Reading", 
-                    "next_url": "/docs/optimizing-parquet-metadata-reading/", 
+                    "children": [
+                        {
+                            "breadcrumbs": [
+                                {
+                                    "title": "Partition Pruning", 
+                                    "url": "/docs/partition-pruning/"
+                                }, 
+                                {
+                                    "title": "Performance Tuning", 
+                                    "url": "/docs/performance-tuning/"
+                                }
+                            ], 
+                            "children": [], 
+                            "next_title": "Migrating Partitioned Data", 
+                            "next_url": "/docs/migrating-partitioned-data/", 
+                            "parent": "Partition Pruning", 
+                            "previous_title": "Partition Pruning", 
+                            "previous_url": "/docs/partition-pruning/", 
+                            "relative_path": "_docs/performance-tuning/partition-pruning/010-partition-pruning-introduction.md",

+                            "title": "Partition Pruning Introduction", 
+                            "url": "/docs/partition-pruning-introduction/"
+                        }, 
+                        {
+                            "breadcrumbs": [
+                                {
+                                    "title": "Partition Pruning", 
+                                    "url": "/docs/partition-pruning/"
+                                }, 
+                                {
+                                    "title": "Performance Tuning", 
+                                    "url": "/docs/performance-tuning/"
+                                }
+                            ], 
+                            "children": [], 
+                            "next_title": "Using Partition Pruning", 
+                            "next_url": "/docs/using-partition-pruning/", 
+                            "parent": "Partition Pruning", 
+                            "previous_title": "Partition Pruning Introduction", 
+                            "previous_url": "/docs/partition-pruning-introduction/", 
+                            "relative_path": "_docs/performance-tuning/partition-pruning/020-migrating-partitioned-data.md",

+                            "title": "Migrating Partitioned Data", 
+                            "url": "/docs/migrating-partitioned-data/"
+                        }, 
+                        {
+                            "breadcrumbs": [
+                                {
+                                    "title": "Partition Pruning", 
+                                    "url": "/docs/partition-pruning/"
+                                }, 
+                                {
+                                    "title": "Performance Tuning", 
+                                    "url": "/docs/performance-tuning/"
+                                }
+                            ], 
+                            "children": [], 
+                            "next_title": "Optimizing Parquet Metadata Reading", 
+                            "next_url": "/docs/optimizing-parquet-metadata-reading/", 
+                            "parent": "Partition Pruning", 
+                            "previous_title": "Migrating Partitioned Data", 
+                            "previous_url": "/docs/migrating-partitioned-data/", 
+                            "relative_path": "_docs/performance-tuning/partition-pruning/030-using-partition-pruning.md",

+                            "title": "Using Partition Pruning", 
+                            "url": "/docs/using-partition-pruning/"
+                        }
+                    ], 
+                    "next_title": "Partition Pruning Introduction", 
+                    "next_url": "/docs/partition-pruning-introduction/", 
                     "parent": "Performance Tuning", 
                     "previous_title": "Performance Tuning Introduction", 
                     "previous_url": "/docs/performance-tuning-introduction/", 
@@ -14112,8 +14401,8 @@
                     "next_title": "Choosing a Storage Format", 
                     "next_url": "/docs/choosing-a-storage-format/", 
                     "parent": "Performance Tuning", 
-                    "previous_title": "Partition Pruning", 
-                    "previous_url": "/docs/partition-pruning/", 
+                    "previous_title": "Using Partition Pruning", 
+                    "previous_url": "/docs/using-partition-pruning/", 
                     "relative_path": "_docs/performance-tuning/025-optimizing-parquet-reading.md",

                     "title": "Optimizing Parquet Metadata Reading", 
                     "url": "/docs/optimizing-parquet-metadata-reading/"
@@ -16298,11 +16587,28 @@
                         }
                     ], 
                     "children": [], 
-                    "next_title": "Apache Drill 1.2.0 Release Notes", 
-                    "next_url": "/docs/apache-drill-1-2-0-release-notes/", 
+                    "next_title": "Apache Drill 1.3.0 Release Notes", 
+                    "next_url": "/docs/apache-drill-1-3-0-release-notes/", 
                     "parent": "Release Notes", 
                     "previous_title": "Release Notes", 
                     "previous_url": "/docs/release-notes/", 
+                    "relative_path": "_docs/rn/007-1.4.0-rn.md", 
+                    "title": "Apache Drill 1.4.0 Release Notes", 
+                    "url": "/docs/apache-drill-1-4-0-release-notes/"
+                }, 
+                {
+                    "breadcrumbs": [
+                        {
+                            "title": "Release Notes", 
+                            "url": "/docs/release-notes/"
+                        }
+                    ], 
+                    "children": [], 
+                    "next_title": "Apache Drill 1.2.0 Release Notes", 
+                    "next_url": "/docs/apache-drill-1-2-0-release-notes/", 
+                    "parent": "Release Notes", 
+                    "previous_title": "Apache Drill 1.4.0 Release Notes", 
+                    "previous_url": "/docs/apache-drill-1-4-0-release-notes/", 
                     "relative_path": "_docs/rn/008-1.3.0-rn.md", 
                     "title": "Apache Drill 1.3.0 Release Notes", 
                     "url": "/docs/apache-drill-1-3-0-release-notes/"
@@ -16495,8 +16801,8 @@
                     "url": "/docs/apache-drill-0-4-0-release-notes/"
                 }
             ], 
-            "next_title": "Apache Drill 1.3.0 Release Notes", 
-            "next_url": "/docs/apache-drill-1-3-0-release-notes/", 
+            "next_title": "Apache Drill 1.4.0 Release Notes", 
+            "next_url": "/docs/apache-drill-1-4-0-release-notes/", 
             "parent": "", 
             "previous_title": "Value Vectors", 
             "previous_url": "/docs/value-vectors/", 

http://git-wip-us.apache.org/repos/asf/drill/blob/7b62a919/_docs/performance-tuning/partition-pruning/020-migrating-partitioned-data.md
----------------------------------------------------------------------
diff --git a/_docs/performance-tuning/partition-pruning/020-migrating-partitioned-data.md
b/_docs/performance-tuning/partition-pruning/020-migrating-partitioned-data.md
index d3ddcc8..729ab41 100755
--- a/_docs/performance-tuning/partition-pruning/020-migrating-partitioned-data.md
+++ b/_docs/performance-tuning/partition-pruning/020-migrating-partitioned-data.md
@@ -1,6 +1,6 @@
 ---
 title: "Migrating Partitioned Data"
-parent: "Performance Pruning Introduction"
+parent: "Partition Pruning"
 --- 
 
 Migrating Parquet data that you partitioned and generated using Drill 1.1 and 1.2 is mandatory
before using the data in Drill 1.3. The data in must be marked as Drill-generated. Use the
[drill-upgrade tool](https://github.com/parthchandra/drill-upgrade) to migrate Parquet data
that you partitioned and generated in Drill 1.1 or 1.2. 

http://git-wip-us.apache.org/repos/asf/drill/blob/7b62a919/_docs/performance-tuning/partition-pruning/030-partition-pruning.md
----------------------------------------------------------------------
diff --git a/_docs/performance-tuning/partition-pruning/030-partition-pruning.md b/_docs/performance-tuning/partition-pruning/030-partition-pruning.md
deleted file mode 100755
index e376d5d..0000000
--- a/_docs/performance-tuning/partition-pruning/030-partition-pruning.md
+++ /dev/null
@@ -1,111 +0,0 @@
----
-title: "Partition Pruning"
-parent: "Partition Pruning"
---- 
-
-In Drill 1.1.0 and later, if the data source is Parquet, no data organization tasks are required
to take advantage of partition pruning. To partition and query Parquet files generated from
other tools, use Drill to read and rewrite the files and metadata using the CTAS command with
the PARTITION BY clause, as described in the following section "How to Partition Data".
-
-## How to Partition Data
-
-Write Parquet data using the [PARTITION BY]({{site.baseurl}}/docs/partition-by-clause/) clause
in the CTAS statement. 
-
-The Parquet writer first sorts data by the partition keys, and then creates a new file when
it encounters a new value for the partition columns. During partitioning, Drill creates separate
files, but not separate directories, for different partitions. Each file contains exactly
one partition value, but there can be multiple files for the same partition value. 
-
-Partition pruning uses the Parquet column statistics to determine which columns to use to
prune. 
-
-Unlike using the Drill 1.0 partitioning, no view query is subsequently required, nor is it
necessary to use the [dir* variables]({{site.baseurl}}/docs/querying-directories) after you
use the PARTITION BY clause in a CTAS statement. 
-
-## Drill 1.0 Partitioning
-
-Drill 1.0 does not support the PARTITION BY clause of the CTAS command supported by later
versions. Partitioning Drill 1.0-generated data involves performing the following steps. 
 
- 
-1. Devise a logical way to store the data in a hierarchy of directories. 
-2. Use CTAS to create Parquet files from the original data, specifying filter conditions.
-3. Move the files into directories in the hierarchy. 
-
-After partitioning the data, you need to create a view of the partitioned data to query the
data. You can use the [dir* variables]({{site.baseurl}}/docs/querying-directories) in queries
to refer to subdirectories in your workspace path.
- 
-### Drill 1.0 Partitioning Example
-
-Suppose you have text files containing several years of log data. To partition the data by
year and quarter, create the following hierarchy of directories:  
-       
-       …/logs/1994/Q1  
-       …/logs/1994/Q2  
-       …/logs/1994/Q3  
-       …/logs/1994/Q4  
-       …/logs/1995/Q1  
-       …/logs/1995/Q2  
-       …/logs/1995/Q3  
-       …/logs/1995/Q4  
-       …/logs/1996/Q1  
-       …/logs/1996/Q2  
-       …/logs/1996/Q3  
-       …/logs/1996/Q4  
-
-Run the following CTAS statement, filtering on the Q1 1994 data.
- 
-          CREATE TABLE TT_1994_Q1 
-              AS SELECT * FROM <raw table data in text format >
-              WHERE columns[1] = 1994 AND columns[2] = 'Q1'
- 
-This creates a Parquet file with the log data for Q1 1994 in the current workspace.  You
can then move the file into the correlating directory, and repeat the process until all of
the files are stored in their respective directories.
-
-Now you can define views on the parquet files and query the views.  
-
-       0: jdbc:drill:zk=local> create view vv1 as select `dir0` as `year`, `dir1` as `qtr`
from dfs.`/Users/max/data/multilevel/parquet`;
-       +------------+------------+
-       |     ok     |  summary   |
-       +------------+------------+
-       | true       | View 'vv1' created successfully in 'dfs.tmp' schema |
-       +------------+------------+
-       1 row selected (0.16 seconds)  
-
-Query the view to see all of the logs.  
-
-       0: jdbc:drill:zk=local> select * from dfs.tmp.vv1;
-       +------------+------------+
-       |    year    |    qtr     |
-       +------------+------------+
-       | 1994       | Q1         |
-       | 1994       | Q3         |
-       | 1994       | Q3         |
-       | 1994       | Q4         |
-       | 1994       | Q4         |
-       | 1994       | Q4         |
-       | 1994       | Q4         |
-       | 1995       | Q2         |
-       | 1995       | Q2         |
-       | 1995       | Q2         |
-       | 1995       | Q2         |
-       | 1995       | Q4         |
-       | 1995       | Q4         |
-       | 1995       | Q4         |
-       | 1995       | Q4         |
-       | 1995       | Q4         |
-       | 1995       | Q4         |
-       | 1995       | Q4         |
-       | 1996       | Q1         |
-       | 1996       | Q1         |
-       | 1996       | Q1         |
-       | 1996       | Q1         |
-       | 1996       | Q1         |
-       | 1996       | Q2         |
-       | 1996       | Q3         |
-       | 1996       | Q3         |
-       | 1996       | Q3         |
-       +------------+------------+
-       ...
-
-
-When you query the view, Drill can apply partition pruning and read only the files and directories
required to return query results.
-
-       0: jdbc:drill:zk=local> explain plan for select * from dfs.tmp.vv1 where `year`
= 1996 and qtr = 'Q2';
-       +------------+------------+
-       |    text    |    json    |
-       +------------+------------+
-       | 00-00    Screen
-       00-01      Project(year=[$0], qtr=[$1])
-       00-02        Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=file:/Users/maxdata/multilevel/parquet/1996/Q2/orders_96_q2.parquet]],
selectionRoot=/Users/max/data/multilevel/parquet, numFiles=1, columns=[`dir0`, `dir1`]]])
-       
-
-

http://git-wip-us.apache.org/repos/asf/drill/blob/7b62a919/_docs/performance-tuning/partition-pruning/030-using-partition-pruning.md
----------------------------------------------------------------------
diff --git a/_docs/performance-tuning/partition-pruning/030-using-partition-pruning.md b/_docs/performance-tuning/partition-pruning/030-using-partition-pruning.md
new file mode 100755
index 0000000..6660a39
--- /dev/null
+++ b/_docs/performance-tuning/partition-pruning/030-using-partition-pruning.md
@@ -0,0 +1,111 @@
+---
+title: "Using Partition Pruning"
+parent: "Partition Pruning"
+--- 
+
+In Drill 1.1.0 and later, if the data source is Parquet, no data organization tasks are required
to take advantage of partition pruning. To partition and query Parquet files generated from
other tools, use Drill to read and rewrite the files and metadata using the CTAS command with
the PARTITION BY clause, as described in the following section "How to Partition Data".
+
+## How to Partition Data
+
+Write Parquet data using the [PARTITION BY]({{site.baseurl}}/docs/partition-by-clause/) clause
in the CTAS statement. 
+
+The Parquet writer first sorts data by the partition keys, and then creates a new file when
it encounters a new value for the partition columns. During partitioning, Drill creates separate
files, but not separate directories, for different partitions. Each file contains exactly
one partition value, but there can be multiple files for the same partition value. 
+
+Partition pruning uses the Parquet column statistics to determine which columns to use to
prune. 
+
+Unlike using the Drill 1.0 partitioning, no view query is subsequently required, nor is it
necessary to use the [dir* variables]({{site.baseurl}}/docs/querying-directories) after you
use the PARTITION BY clause in a CTAS statement. 
+
+## Drill 1.0 Partitioning
+
+Drill 1.0 does not support the PARTITION BY clause of the CTAS command supported by later
versions. Partitioning Drill 1.0-generated data involves performing the following steps. 
 
+ 
+1. Devise a logical way to store the data in a hierarchy of directories. 
+2. Use CTAS to create Parquet files from the original data, specifying filter conditions.
+3. Move the files into directories in the hierarchy. 
+
+After partitioning the data, you need to create a view of the partitioned data to query the
data. You can use the [dir* variables]({{site.baseurl}}/docs/querying-directories) in queries
to refer to subdirectories in your workspace path.
+ 
+### Drill 1.0 Partitioning Example
+
+Suppose you have text files containing several years of log data. To partition the data by
year and quarter, create the following hierarchy of directories:  
+       
+       …/logs/1994/Q1  
+       …/logs/1994/Q2  
+       …/logs/1994/Q3  
+       …/logs/1994/Q4  
+       …/logs/1995/Q1  
+       …/logs/1995/Q2  
+       …/logs/1995/Q3  
+       …/logs/1995/Q4  
+       …/logs/1996/Q1  
+       …/logs/1996/Q2  
+       …/logs/1996/Q3  
+       …/logs/1996/Q4  
+
+Run the following CTAS statement, filtering on the Q1 1994 data.
+ 
+          CREATE TABLE TT_1994_Q1 
+              AS SELECT * FROM <raw table data in text format >
+              WHERE columns[1] = 1994 AND columns[2] = 'Q1'
+ 
+This creates a Parquet file with the log data for Q1 1994 in the current workspace.  You
can then move the file into the correlating directory, and repeat the process until all of
the files are stored in their respective directories.
+
+Now you can define views on the parquet files and query the views.  
+
+       0: jdbc:drill:zk=local> create view vv1 as select `dir0` as `year`, `dir1` as `qtr`
from dfs.`/Users/max/data/multilevel/parquet`;
+       +------------+------------+
+       |     ok     |  summary   |
+       +------------+------------+
+       | true       | View 'vv1' created successfully in 'dfs.tmp' schema |
+       +------------+------------+
+       1 row selected (0.16 seconds)  
+
+Query the view to see all of the logs.  
+
+       0: jdbc:drill:zk=local> select * from dfs.tmp.vv1;
+       +------------+------------+
+       |    year    |    qtr     |
+       +------------+------------+
+       | 1994       | Q1         |
+       | 1994       | Q3         |
+       | 1994       | Q3         |
+       | 1994       | Q4         |
+       | 1994       | Q4         |
+       | 1994       | Q4         |
+       | 1994       | Q4         |
+       | 1995       | Q2         |
+       | 1995       | Q2         |
+       | 1995       | Q2         |
+       | 1995       | Q2         |
+       | 1995       | Q4         |
+       | 1995       | Q4         |
+       | 1995       | Q4         |
+       | 1995       | Q4         |
+       | 1995       | Q4         |
+       | 1995       | Q4         |
+       | 1995       | Q4         |
+       | 1996       | Q1         |
+       | 1996       | Q1         |
+       | 1996       | Q1         |
+       | 1996       | Q1         |
+       | 1996       | Q1         |
+       | 1996       | Q2         |
+       | 1996       | Q3         |
+       | 1996       | Q3         |
+       | 1996       | Q3         |
+       +------------+------------+
+       ...
+
+
+When you query the view, Drill can apply partition pruning and read only the files and directories
required to return query results.
+
+       0: jdbc:drill:zk=local> explain plan for select * from dfs.tmp.vv1 where `year`
= 1996 and qtr = 'Q2';
+       +------------+------------+
+       |    text    |    json    |
+       +------------+------------+
+       | 00-00    Screen
+       00-01      Project(year=[$0], qtr=[$1])
+       00-02        Scan(groupscan=[ParquetGroupScan [entries=[ReadEntryWithPath [path=file:/Users/maxdata/multilevel/parquet/1996/Q2/orders_96_q2.parquet]],
selectionRoot=/Users/max/data/multilevel/parquet, numFiles=1, columns=[`dir0`, `dir1`]]])
+       
+
+


Mime
View raw message