impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mjac...@apache.org
Subject [2/9] incubator-impala git commit: [DOCS] Major update to Impala + Kudu page
Date Fri, 17 Feb 2017 23:17:05 GMT
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/661921b2/docs/topics/impala_tables.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_tables.xml b/docs/topics/impala_tables.xml
index 5041111..ae65045 100644
--- a/docs/topics/impala_tables.xml
+++ b/docs/topics/impala_tables.xml
@@ -73,14 +73,16 @@ under the License.
     </ul>
 
     <p rev="2.2.0">
-      Impala tables can also represent data that is stored in HBase, or in the Amazon S3
filesystem (CDH 5.4.0 or higher),
-      or on Isilon storage devices (CDH 5.4.3 or higher).  See <xref href="impala_hbase.xml#impala_hbase"/>,
+      Impala tables can also represent data that is stored in HBase, or in the Amazon S3
filesystem (<keyword keyref="impala22_full"/> or higher),
+      or on Isilon storage devices (<keyword keyref="impala223_full"/> or higher).
 See <xref href="impala_hbase.xml#impala_hbase"/>,
       <xref href="impala_s3.xml#s3"/>, and <xref href="impala_isilon.xml#impala_isilon"/>
       for details about those special kinds of tables.
     </p>
 
     <p conref="../shared/impala_common.xml#common/ignore_file_extensions"/>
 
+    <p outputclass="toc inpage"/>
+
     <p>
       <b>Related statements:</b> <xref href="impala_create_table.xml#create_table"/>,
       <xref href="impala_drop_table.xml#drop_table"/>, <xref href="impala_alter_table.xml#alter_table"/>
@@ -241,6 +243,7 @@ under the License.
 
   <concept id="table_file_formats">
     <title>File Formats</title>
+
     <conbody>
       <p>
         Each table has an associated file format, which determines how Impala interprets
the
@@ -273,4 +276,142 @@ under the License.
     </conbody>
   </concept>
 
+  <concept rev="kudu" id="kudu_tables">
+    <title>Kudu Tables</title>
+    <prolog>
+      <metadata>
+        <data name="Category" value="Kudu"/>
+      </metadata>
+    </prolog>
+
+    <conbody>
+      <p>
+        Tables stored in Apache Kudu are treated specially, because Kudu manages its data
independently of HDFS files.
+        Some information about the table is stored in the metastore database for use by Impala.
Other table metadata is
+        managed internally by Kudu.
+      </p>
+
+      <p>
+        When you create a Kudu table through Impala, it is assigned an internal Kudu table
name of the form
+        <codeph>impala::<varname>db_name</varname>.<varname>table_name</varname></codeph>.
You can see the Kudu-assigned name
+        in the output of <codeph>DESCRIBE FORMATTED</codeph>, in the <codeph>kudu.table_name</codeph>
field of the table properties.
+        The Kudu-assigned name remains the same even if you use <codeph>ALTER TABLE</codeph>
to rename the Impala table
+        or move it to a different Impala database. If you issue the statement
+        <codeph>ALTER TABLE <varname>impala_name</varname> SET TBLPROPERTIES('kudu.table_name'
= '<varname>different_kudu_table_name</varname>')</codeph>,
+        the effect is different depending on whether the Impala table was created with a
regular <codeph>CREATE TABLE</codeph>
+        statement (that is, if it is an internal or managed table), or if it was created
with a
+        <codeph>CREATE EXTERNAL TABLE</codeph> statement (and therefore is an
external table). Changing the <codeph>kudu.table_name</codeph>
+        property of an internal table physically renames the underlying Kudu table to match
the new name.
+        Changing the <codeph>kudu.table_name</codeph> property of an external
table switches which underlying Kudu table
+        the Impala table refers to; the underlying Kudu table must already exist.
+      </p>
+
+      <p>
+        The following example shows what happens with both internal and external Kudu tables
as the <codeph>kudu.table_name</codeph>
+        property is changed. In practice, external tables are typically used to access underlying
Kudu tables that were created
+        outside of Impala, that is, through the Kudu API.
+      </p>
+
+<codeblock>
+-- This is an internal table that we will create and then rename.
+create table old_name (id bigint primary key, s string)
+  partition by hash(id) partitions 2 stored as kudu;
+
+-- Initially, the name OLD_NAME is the same on the Impala and Kudu sides.
+describe formatted old_name;
+...
+| Location:          | hdfs://host.example.com:8020/path/user.db/old_name
+| Table Type:        | MANAGED_TABLE         | NULL
+| Table Parameters:  | NULL                  | NULL
+|                    | DO_NOT_UPDATE_STATS   | true
+|                    | kudu.master_addresses | vd0342.halxg.cloudera.com
+|                    | kudu.table_name       | impala::user.old_name
+
+-- ALTER TABLE RENAME TO changes the Impala name but not the underlying Kudu name.
+alter table old_name rename to new_name;
+
+describe formatted new_name;
+| Location:          | hdfs://host.example.com:8020/path/user.db/new_name
+| Table Type:        | MANAGED_TABLE         | NULL
+| Table Parameters:  | NULL                  | NULL
+|                    | DO_NOT_UPDATE_STATS   | true
+|                    | kudu.master_addresses | vd0342.halxg.cloudera.com
+|                    | kudu.table_name       | impala::user.old_name
+
+-- Setting TBLPROPERTIES changes the underlying Kudu name.
+alter table new_name
+  set tblproperties('kudu.table_name' = 'impala::user.new_name');
+
+describe formatted new_name;
+| Location:          | hdfs://host.example.com:8020/path/user.db/new_name
+| Table Type:        | MANAGED_TABLE         | NULL
+| Table Parameters:  | NULL                  | NULL
+|                    | DO_NOT_UPDATE_STATS   | true
+|                    | kudu.master_addresses | vd0342.halxg.cloudera.com
+|                    | kudu.table_name       | impala::user.new_name
+
+-- Put some data in the table to demonstrate how external tables can map to
+-- different underlying Kudu tables.
+insert into new_name values (0, 'zero'), (1, 'one'), (2, 'two');
+
+-- This external table points to the same underlying Kudu table, NEW_NAME,
+-- as we created above. No need to declare columns or other table aspects.
+create external table kudu_table_alias stored as kudu
+  tblproperties('kudu.table_name' = 'impala::user.new_name');
+
+-- The external table can fetch data from the NEW_NAME table that already
+-- existed and already had data.
+select * from kudu_table_alias limit 100;
++----+------+
+| id | s    |
++----+------+
+| 1  | one  |
+| 0  | zero |
+| 2  | two  |
++----+------+
+
+-- We cannot re-point the external table at a different underlying Kudu table
+-- unless that other underlying Kudu table already exists.
+alter table kudu_table_alias
+  set tblproperties('kudu.table_name' = 'impala::user.yet_another_name');
+ERROR:
+TableLoadingException: Error opening Kudu table 'impala::user.yet_another_name',
+  Kudu error: The table does not exist: table_name: "impala::user.yet_another_name"
+
+-- Once the underlying Kudu table exists, we can re-point the external table to it.
+create table yet_another_name (id bigint primary key, x int, y int, s string)
+  partition by hash(id) partitions 2 stored as kudu;
+
+alter table kudu_table_alias
+  set tblproperties('kudu.table_name' = 'impala::user.yet_another_name');
+
+-- Now no data is returned because this other table is empty.
+select * from kudu_table_alias limit 100;
+
+-- The Impala table automatically recognizes the table schema of the new table,
+-- for example the extra X and Y columns not present in the original table.
+describe kudu_table_alias;
++------+--------+---------+-------------+----------+...
+| name | type   | comment | primary_key | nullable |...
++------+--------+---------+-------------+----------+...
+| id   | bigint |         | true        | false    |...
+| x    | int    |         | false       | true     |...
+| y    | int    |         | false       | true     |...
+| s    | string |         | false       | true     |...
++------+--------+---------+-------------+----------+...
+</codeblock>
+
+      <p>
+        The <codeph>SHOW TABLE STATS</codeph> output for a Kudu table shows Kudu-specific
details about the layout of the table.
+        Instead of information about the number and sizes of files, the information is divided
by the Kudu tablets.
+        For each tablet, the output includes the fields
+        <codeph># Rows</codeph> (although this number is not currently computed),
<codeph>Start Key</codeph>, <codeph>Stop Key</codeph>, <codeph>Leader
Replica</codeph>, and <codeph># Replicas</codeph>.
+        The output of <codeph>SHOW COLUMN STATS</codeph>, illustrating the distribution
of values within each column, is the same for Kudu tables
+        as for HDFS-backed tables.
+      </p>
+
+      <p conref="../shared/impala_common.xml#common/kudu_internal_external_tables"/>
+    </conbody>
+  </concept>
+
 </concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/661921b2/docs/topics/impala_timestamp.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_timestamp.xml b/docs/topics/impala_timestamp.xml
index 9dc9753..2e807b0 100644
--- a/docs/topics/impala_timestamp.xml
+++ b/docs/topics/impala_timestamp.xml
@@ -436,6 +436,9 @@ insert into dates_and_times values
 
     <p conref="../shared/impala_common.xml#common/avro_no_timestamp"/>
 
+    <p conref="../shared/impala_common.xml#common/kudu_blurb"/>
+    <p conref="../shared/impala_common.xml#common/kudu_unsupported_data_type"/>
+
     <p conref="../shared/impala_common.xml#common/related_info"/>
 
     <ul>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/661921b2/docs/topics/impala_truncate_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_truncate_table.xml b/docs/topics/impala_truncate_table.xml
index 3cf5ec3..e2de146 100644
--- a/docs/topics/impala_truncate_table.xml
+++ b/docs/topics/impala_truncate_table.xml
@@ -102,6 +102,9 @@ under the License.
       permission for all the files and directories that make up the table.
     </p>
 
+    <p conref="../shared/impala_common.xml#common/kudu_blurb"/>
+    <p conref="../shared/impala_common.xml#common/kudu_no_truncate_table"/>
+
     <p conref="../shared/impala_common.xml#common/example_blurb"/>
 
     <p>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/661921b2/docs/topics/impala_varchar.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_varchar.xml b/docs/topics/impala_varchar.xml
index 4226c3a..5917724 100644
--- a/docs/topics/impala_varchar.xml
+++ b/docs/topics/impala_varchar.xml
@@ -128,6 +128,9 @@ prefer to use an integer data type with sufficient range (<codeph>INT</codeph>,
 
     <p conref="../shared/impala_common.xml#common/column_stats_variable"/>
 
+    <p conref="../shared/impala_common.xml#common/kudu_blurb"/>
+    <p conref="../shared/impala_common.xml#common/kudu_unsupported_data_type"/>
+
     <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
 
     <p conref="../shared/impala_common.xml#common/blobs_are_strings"/>


Mime
View raw message