impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jruss...@apache.org
Subject [01/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.
Date Tue, 26 Jul 2016 23:04:53 GMT
Repository: incubator-impala
Updated Branches:
  refs/heads/doc_prototype 0ad935b63 -> 463ddf924


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_varchar.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_varchar.xml b/docs/topics/impala_varchar.xml
new file mode 100644
index 0000000..32db4ae
--- /dev/null
+++ b/docs/topics/impala_varchar.xml
@@ -0,0 +1,215 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="varchar" rev="2.0.0">
+
+  <title>VARCHAR Data Type (CDH 5.2 or higher only)</title>
+  <titlealts><navtitle>VARCHAR (CDH 5.2 or higher only)</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">VARCHAR data type</indexterm>
+      A variable-length character type, truncated during processing if necessary to fit within
the specified
+      length.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+    <p>
+      In the column definition of a <codeph>CREATE TABLE</codeph> statement:
+    </p>
+
+<codeblock><varname>column_name</varname> VARCHAR(<varname>max_length</varname>)</codeblock>
+
+    <p>
+      The maximum length you can specify is 65,535.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/partitioning_bad"/>
+
+<!--
+<p>
+This type can be used for partition key columns.
+Because of the efficiency advantage of numeric values over character-based values,
+if the partition key is a string representation of a number,
+prefer to use an integer data type with sufficient range (<codeph>INT</codeph>,
+<codeph>BIGINT</codeph>, and so on) rather than this type.
+</p>
+-->
+
+    <p conref="../shared/impala_common.xml#common/hbase_no"/>
+
+    <p conref="../shared/impala_common.xml#common/parquet_blurb"/>
+
+    <ul>
+      <li>
+        This type can be read from and written to Parquet files.
+      </li>
+
+      <li>
+        There is no requirement for a particular level of Parquet.
+      </li>
+
+      <li>
+        Parquet files generated by Impala and containing this type can be freely interchanged
with other components
+        such as Hive and MapReduce.
+      </li>
+
+      <li>
+        Parquet data files can contain values that are longer than allowed by the
+        <codeph>VARCHAR(<varname>n</varname>)</codeph> length limit.
Impala ignores any extra trailing characters
+        when it processes those values during a query.
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/text_blurb"/>
+
+    <p>
+      Text data files can contain values that are longer than allowed by the
+      <codeph>VARCHAR(<varname>n</varname>)</codeph> length limit.
Any extra trailing characters are ignored when
+      Impala processes those values during a query.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/schema_evolution_blurb"/>
+
+    <p>
+      You can use <codeph>ALTER TABLE ... CHANGE</codeph> to switch column data
types to and from
+      <codeph>VARCHAR</codeph>. You can convert from <codeph>STRING</codeph>
to
+      <codeph>VARCHAR(<varname>n</varname>)</codeph>, or from <codeph>VARCHAR(<varname>n</varname>)</codeph>
to
+      <codeph>STRING</codeph>, or from <codeph>CHAR(<varname>n</varname>)</codeph>
to
+      <codeph>VARCHAR(<varname>n</varname>)</codeph>, or from <codeph>VARCHAR(<varname>n</varname>)</codeph>
to
+      <codeph>CHAR(<varname>n</varname>)</codeph>. When switching
back and forth between <codeph>VARCHAR</codeph>
+      and <codeph>CHAR</codeph>, you can also change the length value. This schema
evolution works the same for
+      tables using any file format. If a table contains values longer than the maximum length
defined for a
+      <codeph>VARCHAR</codeph> column, Impala does not return an error. Any extra
trailing characters are ignored
+      when Impala processes those values during a query.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/compatibility_blurb"/>
+
+    <p>
+      This type is available using Impala 2.0 or higher under CDH 4, or with Impala on CDH
5.2 or higher. There are
+      no compatibility issues with other components when exchanging data files or running
Impala on CDH 4.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/internals_min_bytes"/>
+
+    <p conref="../shared/impala_common.xml#common/added_in_20"/>
+
+    <p conref="../shared/impala_common.xml#common/column_stats_variable"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/blobs_are_strings"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      The following examples show how long and short <codeph>VARCHAR</codeph>
values are treated. Values longer
+      than the maximum specified length are truncated by <codeph>CAST()</codeph>,
or when queried from existing
+      data files. Values shorter than the maximum specified length are represented as the
actual length of the
+      value, with no extra padding as seen with <codeph>CHAR</codeph> values.
+    </p>
+
+<codeblock>create table varchar_1 (s varchar(1));
+create table varchar_4 (s varchar(4));
+create table varchar_20 (s varchar(20));
+
+insert into varchar_1 values (cast('a' as varchar(1))), (cast('b' as varchar(1))), (cast('hello'
as varchar(1))), (cast('world' as varchar(1)));
+insert into varchar_4 values (cast('a' as varchar(4))), (cast('b' as varchar(4))), (cast('hello'
as varchar(4))), (cast('world' as varchar(4)));
+insert into varchar_20 values (cast('a' as varchar(20))), (cast('b' as varchar(20))), (cast('hello'
as varchar(20))), (cast('world' as varchar(20)));
+
+select * from varchar_1;
++---+
+| s |
++---+
+| a |
+| b |
+| h |
+| w |
++---+
+select * from varchar_4;
++------+
+| s    |
++------+
+| a    |
+| b    |
+| hell |
+| worl |
++------+
+[localhost:21000] &gt; select * from varchar_20;
++-------+
+| s     |
++-------+
+| a     |
+| b     |
+| hello |
+| world |
++-------+
+select concat('[',s,']') as s from varchar_20;
++---------+
+| s       |
++---------+
+| [a]     |
+| [b]     |
+| [hello] |
+| [world] |
++---------+
+</codeblock>
+
+    <p>
+      The following example shows how identical <codeph>VARCHAR</codeph> values
compare as equal, even if the
+      columns are defined with different maximum lengths. Both tables contain <codeph>'a'</codeph>
and
+      <codeph>'b'</codeph> values. The longer <codeph>'hello'</codeph>
and <codeph>'world'</codeph> values from the
+      <codeph>VARCHAR_20</codeph> table were truncated when inserted into the
<codeph>VARCHAR_1</codeph> table.
+    </p>
+
+<codeblock>select s from varchar_1 join varchar_20 using (s);
++-------+
+| s     |
++-------+
+| a     |
+| b     |
++-------+
+</codeblock>
+
+    <p>
+      The following examples show how <codeph>VARCHAR</codeph> values are freely
interchangeable with
+      <codeph>STRING</codeph> values in contexts such as comparison operators
and built-in functions:
+    </p>
+
+<codeblock>select length(cast('foo' as varchar(100))) as length;
++--------+
+| length |
++--------+
+| 3      |
++--------+
+select cast('xyz' as varchar(5)) &gt; cast('abc' as varchar(10)) as greater;
++---------+
+| greater |
++---------+
+| true    |
++---------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/udf_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_string.xml#string"/>, <xref href="impala_char.xml#char"/>,
+      <xref href="impala_literals.xml#string_literals"/>,
+      <xref href="impala_string_functions.xml#string_functions"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_variance.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_variance.xml b/docs/topics/impala_variance.xml
new file mode 100644
index 0000000..e0c5d02
--- /dev/null
+++ b/docs/topics/impala_variance.xml
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.4" id="variance">
+
+  <title>VARIANCE, VARIANCE_SAMP, VARIANCE_POP, VAR_SAMP, VAR_POP Functions</title>
+  <titlealts><navtitle>VARIANCE, VARIANCE_SAMP, VARIANCE_POP, VAR_SAMP, VAR_POP</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">variance() function</indexterm>
+      <indexterm audience="Cloudera">variance_samp() function</indexterm>
+      <indexterm audience="Cloudera">variance_pop() function</indexterm>
+      <indexterm audience="Cloudera">var_samp() function</indexterm>
+      <indexterm audience="Cloudera">var_pop() function</indexterm>
+      An aggregate function that returns the
+      <xref href="http://en.wikipedia.org/wiki/Variance" scope="external" format="html">variance</xref>
of a set of
+      numbers. This is a mathematical property that signifies how far the values spread apart
from the mean. The
+      return value can be zero (if the input is a single value, or a set of identical values),
or a positive number
+      otherwise.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>{ VARIANCE | VAR[IANCE]_SAMP | VAR[IANCE]_POP } ([DISTINCT | ALL] <varname>expression</varname>)</codeblock>
+
+    <p>
+      This function works with any numeric data type.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/former_odd_return_type_string"/>
+
+    <p>
+      This function is typically used in mathematical formulas related to probability distributions.
+    </p>
+
+    <p>
+      The <codeph>VARIANCE_SAMP()</codeph> and <codeph>VARIANCE_POP()</codeph>
functions compute the sample
+      variance and population variance, respectively, of the input values. (<codeph>VARIANCE()</codeph>
is an alias
+      for <codeph>VARIANCE_SAMP()</codeph>.) Both functions evaluate all input
rows matched by the query. The
+      difference is that <codeph>STDDEV_SAMP()</codeph> is scaled by <codeph>1/(N-1)</codeph>
while
+      <codeph>STDDEV_POP()</codeph> is scaled by <codeph>1/N</codeph>.
+    </p>
+
+    <p rev="2.0.0">
+      The functions <codeph>VAR_SAMP()</codeph> and <codeph>VAR_POP()</codeph>
are the same as
+      <codeph>VARIANCE_SAMP()</codeph> and <codeph>VARIANCE_POP()</codeph>,
respectively. These aliases are
+      available in Impala 2.0 and later.
+    </p>
+
+    <p>
+      If no input rows match the query, the result of any of these functions is <codeph>NULL</codeph>.
If a single
+      input row matches the query, the result of any of these functions is <codeph>"0.0"</codeph>.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+    <p>
+      This example demonstrates how <codeph>VARIANCE()</codeph> and <codeph>VARIANCE_SAMP()</codeph>
return the
+      same result, while <codeph>VARIANCE_POP()</codeph> uses a slightly different
calculation to reflect that the
+      input data is considered part of a larger <q>population</q>.
+    </p>
+
+<codeblock>[localhost:21000] &gt; select variance(score) from test_scores;
++-----------------+
+| variance(score) |
++-----------------+
+| 812.25          |
++-----------------+
+[localhost:21000] &gt; select variance_samp(score) from test_scores;
++----------------------+
+| variance_samp(score) |
++----------------------+
+| 812.25               |
++----------------------+
+[localhost:21000] &gt; select variance_pop(score) from test_scores;
++---------------------+
+| variance_pop(score) |
++---------------------+
+| 811.438             |
++---------------------+
+</codeblock>
+
+    <p>
+      This example demonstrates that, because the return value of these aggregate functions
is a
+      <codeph>STRING</codeph>, you convert the result with <codeph>CAST</codeph>
if you need to do further
+      calculations as a numeric value.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create table score_stats as select cast(stddev(score)
as decimal(7,4)) `standard_deviation`, cast(variance(score) as decimal(7,4)) `variance` from
test_scores;
++-------------------+
+| summary           |
++-------------------+
+| Inserted 1 row(s) |
++-------------------+
+[localhost:21000] &gt; desc score_stats;
++--------------------+--------------+---------+
+| name               | type         | comment |
++--------------------+--------------+---------+
+| standard_deviation | decimal(7,4) |         |
+| variance           | decimal(7,4) |         |
++--------------------+--------------+---------+
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/analytic_not_allowed_caveat"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      The <codeph>STDDEV()</codeph>, <codeph>STDDEV_POP()</codeph>,
and <codeph>STDDEV_SAMP()</codeph> functions
+      compute the standard deviation (square root of the variance) based on the results of
+      <codeph>VARIANCE()</codeph>, <codeph>VARIANCE_POP()</codeph>,
and <codeph>VARIANCE_SAMP()</codeph>
+      respectively. See <xref href="impala_stddev.xml#stddev"/> for details about the
standard deviation property.
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_views.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_views.xml b/docs/topics/impala_views.xml
new file mode 100644
index 0000000..a6c1a41
--- /dev/null
+++ b/docs/topics/impala_views.xml
@@ -0,0 +1,185 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="views">
+
+  <title>Overview of Impala Views</title>
+  <titlealts><navtitle>Views</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      Views are lightweight logical constructs that act as aliases for queries. You can specify
a view name in a
+      query (a <codeph>SELECT</codeph> statement or the <codeph>SELECT</codeph>
portion of an
+      <codeph>INSERT</codeph> statement) where you would usually specify a table
name.
+    </p>
+
+    <p>
+      A view lets you:
+    </p>
+
+    <ul>
+      <li>
+        Issue complicated queries with compact and simple syntax:
+<codeblock>-- Take a complicated reporting query, plug it into a CREATE VIEW statement...
+create view v1 as select c1, c2, avg(c3) from t1 group by c3 order by c1 desc limit 10;
+-- ... and now you can produce the report with 1 line of code.
+select * from v1;</codeblock>
+      </li>
+
+      <li>
+        Reduce maintenance, by avoiding the duplication of complicated queries across multiple
applications in
+        multiple languages:
+<codeblock>create view v2 as select t1.c1, t1.c2, t2.c3 from t1 join t2 on (t1.id =
t2.id);
+-- This simple query is safer to embed in reporting applications than the longer query above.
+-- The view definition can remain stable even if the structure of the underlying tables changes.
+select c1, c2, c3 from v2;</codeblock>
+      </li>
+
+      <li>
+        Build a new, more refined query on top of the original query by adding new clauses,
select-list
+        expressions, function calls, and so on:
+<codeblock>create view average_price_by_category as select category, avg(price) as
avg_price from products group by category;
+create view expensive_categories as select category, avg_price from average_price_by_category
order by avg_price desc limit 10000;
+create view top_10_expensive_categories as select category, avg_price from expensive_categories
limit 10;</codeblock>
+        This technique lets you build up several more or less granular variations of the
same query, and switch
+        between them when appropriate.
+<!-- My original assumption was confirmed correct by Alex: outer ORDER BY not actually
needed.
+In this case, we put an <codeph>ORDER BY</codeph> clause on the <q>top
10</q> view, even though there was already an <codeph>ORDER BY</codeph>
+on the <q>top 10000</q> view, because when a query is executed in parallel and
distributed among multiple nodes, the ordering is only
+guaranteed if there is an <codeph>ORDER BY</codeph> clause at the outermost level.
+-->
+      </li>
+
+      <li>
+        Set up aliases with intuitive names for tables, columns, result sets from joins,
and so on:
+<codeblock>-- The original tables might have cryptic names inherited from a legacy
system.
+create view action_items as select rrptsk as assignee, treq as due_date, dmisc as notes from
vxy_t1_br;
+-- You can leave original names for compatibility, build new applications using more intuitive
ones.
+select assignee, due_date, notes from action_items;</codeblock>
+      </li>
+
+      <li>
+        Swap tables with others that use different file formats, partitioning schemes, and
so on without any
+        downtime for data copying or conversion:
+<codeblock>create table slow (x int, s string) stored as textfile;
+create view report as select s from slow where x between 20 and 30;
+-- Query is kind of slow due to inefficient table definition, but it works.
+select * from report;
+
+create table fast (s string) partitioned by (x int) stored as parquet;
+-- ...Copy data from SLOW to FAST. Queries against REPORT view continue to work...
+
+-- After changing the view definition, queries will be faster due to partitioning,
+-- binary format, and compression in the new table.
+alter view report as select s from fast where x between 20 and 30;
+select * from report;</codeblock>
+      </li>
+
+      <li>
+        Avoid coding lengthy subqueries and repeating the same subquery text in many other
queries.
+      </li>
+
+      <li rev="2.3.0 collevelauth">
+        Set up fine-grained security where a user can query some columns from a table but
not other columns.
+        Because CDH 5.5 / Impala 2.3 and higher support column-level authorization, this
technique is no longer
+        required. If you formerly implemented column-level security through views, see
+        <xref href="sg_hive_sql.xml#concept_c2q_4qx_p4/col_level_auth_sentry"/> for
details about the
+        column-level authorization feature.
+        <!-- See <xref href="impala_authorization.xml#security_examples/sec_ex_views"/>
for details. -->
+      </li>
+    </ul>
+
+    <p>
+      The SQL statements that configure views are <xref href="impala_create_view.xml#create_view"/>,
+      <xref href="impala_alter_view.xml#alter_view"/>, and <xref href="impala_drop_view.xml#drop_view"/>.
You can
+      specify view names when querying data (<xref href="impala_select.xml#select"/>)
and copying data from one
+      table to another (<xref href="impala_insert.xml#insert"/>). The <xref href="impala_with.xml#with">WITH</xref>
+      clause creates an inline view, that only exists for the duration of a single query.
+    </p>
+
+<codeblock>[localhost:21000] &gt; create view trivial as select * from customer;
+[localhost:21000] &gt; create view some_columns as select c_first_name, c_last_name,
c_login from customer;
+[localhost:21000] &gt; select * from some_columns limit 5;
+Query finished, fetching results ...
++--------------+-------------+---------+
+| c_first_name | c_last_name | c_login |
++--------------+-------------+---------+
+| Javier       | Lewis       |         |
+| Amy          | Moses       |         |
+| Latisha      | Hamilton    |         |
+| Michael      | White       |         |
+| Robert       | Moran       |         |
++--------------+-------------+---------+
+[localhost:21000] &gt; create view ordered_results as select * from some_columns order
by c_last_name desc, c_first_name desc limit 1000;
+[localhost:21000] &gt; select * from ordered_results limit 5;
+Query: select * from ordered_results limit 5
+Query finished, fetching results ...
++--------------+-------------+---------+
+| c_first_name | c_last_name | c_login |
++--------------+-------------+---------+
+| Thomas       | Zuniga      |         |
+| Sarah        | Zuniga      |         |
+| Norma        | Zuniga      |         |
+| Lloyd        | Zuniga      |         |
+| Lisa         | Zuniga      |         |
++--------------+-------------+---------+
+Returned 5 row(s) in 0.48s</codeblock>
+
+    <p>
+      The previous example uses descending order for <codeph>ORDERED_RESULTS</codeph>
because in the sample TPCD-H
+      data, there are some rows with empty strings for both <codeph>C_FIRST_NAME</codeph>
and
+      <codeph>C_LAST_NAME</codeph>, making the lowest-ordered names unuseful
in a sample query.
+    </p>
+
+<codeblock>create view visitors_by_day as select day, count(distinct visitors) as howmany
from web_traffic group by day;
+create view top_10_days as select day, howmany from visitors_by_day order by howmany limit
10;
+select * from top_10_days;</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/describe_formatted_view"/>
+
+    <p conref="../shared/impala_common.xml#common/create_table_like_view"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_views"/>
+
+    <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
+
+    <ul>
+      <li>
+        <p>
+          You cannot insert into an Impala view. (In some database systems, this operation
is allowed and inserts
+          rows into the base table.) You can use a view name on the right-hand side of an
<codeph>INSERT</codeph>
+          statement, in the <codeph>SELECT</codeph> part.
+        </p>
+      </li>
+
+      <li>
+<!-- This same text is conref'ed in the #views and the #partition_pruning topics. -->
+        <p conref="../shared/impala_common.xml#common/partitions_and_views"/>
+      </li>
+
+      <li rev="1.4.0">
+        <p conref="../shared/impala_common.xml#common/order_by_view_restriction"/>
+      </li>
+    </ul>
+
+    <p>
+      <b>Related statements:</b> <xref href="impala_create_view.xml#create_view"/>,
+      <xref href="impala_alter_view.xml#alter_view"/>, <xref href="impala_drop_view.xml#drop_view"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_with.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_with.xml b/docs/topics/impala_with.xml
new file mode 100644
index 0000000..8d1001c
--- /dev/null
+++ b/docs/topics/impala_with.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="with">
+
+  <title>WITH Clause</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      A clause that can be added before a <codeph>SELECT</codeph> statement,
to define aliases for complicated
+      expressions that are referenced multiple times within the body of the <codeph>SELECT</codeph>.
Similar to
+      <codeph>CREATE VIEW</codeph>, except that the table and column names defined
in the <codeph>WITH</codeph>
+      clause do not persist after the query finishes, and do not conflict with names used
in actual tables or
+      views. Also known as <q>subquery factoring</q>.
+    </p>
+
+    <p>
+      You can rewrite a query using subqueries to work the same as with the <codeph>WITH</codeph>
clause. The
+      purposes of the <codeph>WITH</codeph> clause are:
+    </p>
+
+    <ul>
+      <li>
+        Convenience and ease of maintenance from less repetition with the body of the query.
Typically used with
+        queries involving <codeph>UNION</codeph>, joins, or aggregation functions
where the similar complicated
+        expressions are referenced multiple times.
+      </li>
+
+      <li>
+        SQL code that is easier to read and understand by abstracting the most complex part
of the query into a
+        separate block.
+      </li>
+
+      <li>
+        Improved compatibility with SQL from other database systems that support the same
clause (primarily Oracle
+        Database).
+        <note>
+          <p>
+            The Impala <codeph>WITH</codeph> clause does not support recursive
queries in the
+            <codeph>WITH</codeph>, which is supported in some other database
systems.
+          </p>
+        </note>
+      </li>
+    </ul>
+
+    <p conref="../shared/impala_common.xml#common/sql1999"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>-- Define 2 subqueries that can be referenced from the body of a longer
query.
+with t1 as (select 1), t2 as (select 2) insert into tab select * from t1 union all select
* from t2;
+
+-- Define one subquery at the outer level, and another at the inner level as part of the
+-- initial stage of the UNION ALL query.
+with t1 as (select 1) (with t2 as (select 2) select * from t2) union all select * from t1;</codeblock>
+  </conbody>
+</concept>


Mime
View raw message