hive-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Min Zhou <coderp...@gmail.com>
Subject Re: exception when query on a partitioned table w/o any data
Date Wed, 11 Nov 2009 05:14:44 GMT
Hi, Namit,

This is my configuration. a little bit lengthy.

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration>

<!-- Hive Configuration can either be stored in this file or in the
hadoop configuration files  -->
<!-- that are implied by Hadoop setup variables.
                         -->
<!-- Aside from Hadoop setup variables - this file is provided as a
convenience so that Hive    -->
<!-- users do not have to edit hadoop configuration files (that may be
managed as a centralized -->
<!-- resource).
                         -->

<!-- Hive Execution Parameters -->
<property>
  <name>mapred.reduce.tasks</name>
  <value>-1</value>
    <description>The default number of reduce tasks per job.  Typically set
  to a prime close to the number of available hosts.  Ignored when
  mapred.job.tracker is "local". Hadoop set this to 1 by default,
whereas hive uses -1 as its default value.
  By setting this property to -1, Hive will automatically figure out
what should be the number of reducers.
  </description>
</property>

<property>
  <name>hive.exec.reducers.bytes.per.reducer</name>
  <value>1000000000</value>
  <description>size per reducer.The default is 1G, i.e if the input
size is 10G, it will use 10 reducers.</description>
</property>

<property>
  <name>hive.exec.reducers.max</name>
  <value>999</value>
  <description>max number of reducers will be used. If the one
        specified in the configuration parameter mapred.reduce.tasks is
        negative, hive will use this one as the max number of reducers when
        automatically determine number of reducers.</description>
</property>

<property>
  <name>hive.exec.scratchdir</name>
  <value>/group/tbdev/zhoumin/hive-tmp</value>
  <description>Scratch space for Hive jobs</description>
</property>

<property>
  <name>hive.metastore.local</name>
  <value>true</value>
  <description>controls whether to connect to remove metastore server
or open a new metastore server in Hive Client JVM</description>
</property>

<property>
  <name>javax.jdo.option.ConnectionURL</name>
  <value>jdbc:mysql://mysql/zhoumin_hive?createDatabaseIfNotExist=true</value>
  <description>JDBC connect string for a JDBC metastore</description>
</property>

<property>
  <name>javax.jdo.option.ConnectionDriverName</name>
  <value>com.mysql.jdbc.Driver</value>
  <description>Driver class name for a JDBC metastore</description>
</property>

<property>
  <name>javax.jdo.PersistenceManagerFactoryClass</name>
  <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
  <description>class implementing the jdo persistence</description>
</property>

=true
javax.jdo.option.NontransactionalRead=true
javax.jdo.option.ConnectionDriverName=com.mysql.jdbc.Driver
javax.jdo.option.ConnectionURL=jdbc:mysql://mysql/zhoumin_hive?createDatabaseIfNotExist=true
=zhoumin
javax.jdo.option.ConnectionPassword=zhoumin
datanucleus.validateTables=false
datanucleus.validateColumns=false
datanucleus.validateConstraints=false
datanucleus.storeManagerType=rdbms
datanucleus.autoCreateSchema=true
datanucleus.autoStartMechanismMode=checked
datanucleus.transactionIsolation=read-committed
datanucleus.cache.level2=true
datanucleus.cache.level2.type=SOFT

<property>
  <name>javax.jdo.option.DetachAllOnCommit</name>
  <value>true</value>
  <description>detaches all objects from session so that they can be
used after transaction is committed</description>
</property>

<property>
  <name>javax.jdo.option.NonTransactionalRead</name>
  <value>true</value>
  <description>reads outside of transactions</description>
</property>

<property>
  <name>javax.jdo.option.ConnectionUserName</name>
  <value>zhoumin</value>
  <description>username to use against metastore database</description>
</property>

<property>
  <name>javax.jdo.option.ConnectionPassword</name>
  <value>zhoumin</value>
  <description>password to use against metastore database</description>
</property>

<property>
  <name>datanucleus.validateTables</name>
  <value>false</value>
  <description>validates existing schema against code. turn this on if
you want to verify existing schema </description>
</property>

<property>
  <name>datanucleus.validateColumns</name>
  <value>false</value>
  <description>validates existing schema against code. turn this on if
you want to verify existing schema </description>
</property>

<property>
  <name>datanucleus.validateConstraints</name>
  <value>false</value>
  <description>validates existing schema against code. turn this on if
you want to verify existing schema </description>
</property>

<property>
  <name>datanucleus.storeManagerType</name>
  <value>rdbms</value>
  <description>metadata store type</description>
</property>

<property>
  <name>datanucleus.autoCreateSchema</name>
  <value>true</value>
  <description>creates necessary schema on a startup if one doesn't
exist. set this to false, after creating it once</description>
</property>

<property>
  <name>datanucleus.autoStartMechanismMode</name>
  <value>checked</value>
  <description>throw exception if metadata tables are incorrect</description>
</property>

<property>
  <name>datancucleus.transactionIsolation</name>
  <value>read-committed</value>
  <description></description>
</property>

<property>
  <name>datanuclues.cache.level2</name>
  <value>true</value>
  <description>use a level 2 cache. turn this off if metadata is
changed independently of hive metastore server</description>
</property>

<property>
  <name>datanuclues.cache.level2.type</name>
  <value>SOFT</value>
  <description>SOFT=soft reference based cache, WEAK=weak reference
based cache.</description>
</property>

<property>
  <name>hive.metastore.warehouse.dir</name>
  <value>/group/tbdev/zhoumin/hive</value>
  <description>location of default database for the warehouse</description>
</property>

<property>
  <name>hive.metastore.connect.retries</name>
  <value>5</value>
  <description>Number of retries while opening a connection to
metastore</description>
</property>

<property>
  <name>hive.metastore.rawstore.impl</name>
  <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
  <description>Name of the class that implements
org.apache.hadoop.hive.metastore.rawstore interface. This class is
used to store and retrieval of raw metadata objects such as table,
database</description>
</property>

<property>
  <name>hive.default.fileformat</name>
  <value>TextFile</value>
  <description>Default file format for CREATE TABLE statement. Options
are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override</description>
</property>

<property>
  <name>hive.map.aggr</name>
  <value>true</value>
  <description>Whether to use map-side aggregation in Hive Group By
queries</description>
</property>

<property>
  <name>hive.groupby.skewindata</name>
  <value>false</value>
  <description>Whether there is skew in data to optimize group by
queries</description>
</property>

<property>
  <name>hive.groupby.mapaggr.checkinterval</name>
  <value>100000</value>
  <description>Number of rows after which size of the grouping
keys/aggregation classes is performed</description>
</property>

<property>
  <name>hive.mapred.local.mem</name>
  <value>0</value>
  <description>For local mode, memory of the mappers/reducers</description>
</property>

<property>
  <name>hive.map.aggr.hash.percentmemory</name>
  <value>0.5</value>
  <description>Portion of total memory to be used by map-side grup
aggregation hash table</description>
</property>

<property>
  <name>hive.map.aggr.hash.min.reduction</name>
  <value>0.5</value>
  <description>Hash aggregation will be turned off if the ratio between hash
  table size and input rows is bigger than this number. Set to 1 to make sure
  hash aggregation is never turned off.</description>
</property>

<property>
  <name>hive.optimize.ppd</name>
  <value>true</value>
  <description>Whether to enable predicate pushdown</description>
</property>

<property>
  <name>hive.join.emit.interval</name>
  <value>1000</value>
  <description>How many rows in the right-most join operand Hive
should buffer before emitting the join result. </description>
</property>

<property>
  <name>hive.mapred.mode</name>
  <value>nonstrict</value>
  <description>The mode in which the hive operations are being
performed. In strict mode, some risky queries are not allowed to
run</description>
</property>

<property>
  <name>hive.exec.script.maxerrsize</name>
  <value>100000</value>
  <description>Maximum number of bytes a script is allowed to emit to
standard error (per map-reduce task). This prevents runaway scripts
from filling logs partitions to capacity </description>
</property>

<property>
  <name>hive.exec.compress.output</name>
  <value>true</value>
  <description> This controls whether the final outputs of a query (to
a local/hdfs file or a hive table) is compressed. The compression
codec and other options are determined from hadoop config variables
mapred.output.compress* </description>
</property>

<property>
  <name>hive.exec.compress.intermediate</name>
  <value>false</value>
  <description> This controls whether intermediate files produced by
hive between multiple map-reduce jobs are compressed. The compression
codec and other options are determined from hadoop config variables
mapred.output.compress* </description>
</property>

<property>
  <name>hive.exec.pre.hooks</name>
  <value></value>
  <description>Pre Execute Hook for Tests</description>
</property>

<property>
  <name>hive.merge.mapfiles</name>
  <value>false</value>
  <description>Merge small files at the end of the job</description>
</property>

<property>
  <name>hive.heartbeat.interval</name>
  <value>1000</value>
  <description>Send a heartbeat after this interval - used by mapjoin
and filter operators</description>
</property>

<property>
  <name>hive.merge.size.per.mapper</name>
  <value>1000000000</value>
  <description>Size of merged files at the end of the job</description>
</property>

<property>
  <name>mapred.output.compression.type</name>
  <value>BLOCK</value>
  <description>If the job outputs are to compressed as SequenceFiles, how should
               they be compressed? Should be one of NONE, RECORD or BLOCK.
  </description>
</property>
</configuration>


Thanks,
Min

On Wed, Nov 11, 2009 at 1:08 PM, Namit Jain <njain@facebook.com> wrote:
> Looks like some configuration problem – this query should have failed at
> compile time, since it is not referencing any valid partition.
> Can you send your hive-site.xml ?
>
>
>
> On 11/10/09 8:01 PM, "Min Zhou" <coderplay@gmail.com> wrote:
>
> Hi, guys,
>
>
> hive> create table pokes(foo string, bar int) partitioned by (pt string);
> OK
> Time taken: 0.146 seconds
> hive> select * from pokes;
> Total MapReduce jobs = 1
> Number of reduce tasks is set to 0 since there's no reduce operator
> Job Submission failed with exception 'java.lang.NullPointerException(null)'
> FAILED: Execution Error, return code 1 from
> org.apache.hadoop.hive.ql.exec.ExecDriver
>
> exception stack from hive.log
> java.lang.NullPointerException
>         at
> org.apache.hadoop.hive.ql.exec.ExecDriver.addInputPath(ExecDriver.java:698)
>         at
> org.apache.hadoop.hive.ql.exec.ExecDriver.addInputPaths(ExecDriver.java:753)
>         at
> org.apache.hadoop.hive.ql.exec.ExecDriver.execute(ExecDriver.java:372)
>         at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:379)
>         at org.apache.hadoop.hive.ql.Driver.run(Driver.java:285)
>         at
> org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:123)
>         at
> org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:181)
>         at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:287)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>         at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>         at java.lang.reflect.Method.invoke(Method.java:597)
>         at org.apache.hadoop.util.RunJar.main(RunJar.java:165)
>         at org.apache.hadoop.mapred.JobShell.run(JobShell.java:54)
>         at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
>         at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79)
>         at org.apache.hadoop.mapred.JobShell.main(JobShell.java:68)
>
>
> This should be a pitfall of hive, since if the table is not
> partitioned, that query will pass.
>
>
> Thanks,
> Min
> --
> My research interests are distributed systems, parallel computing and
> bytecode based virtual machine.
>
> My profile:
> http://www.linkedin.com/in/coderplay
> My blog:
> http://coderplay.javaeye.com
>
>



-- 
My research interests are distributed systems, parallel computing and
bytecode based virtual machine.

My profile:
http://www.linkedin.com/in/coderplay
My blog:
http://coderplay.javaeye.com

Mime
View raw message