tez-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Soundararajan Velu (JIRA)" <j...@apache.org>
Subject [jira] [Created] (TEZ-2622) Tez gives different responses when run on Physical tables and logical views
Date Wed, 15 Jul 2015 16:39:05 GMT
Soundararajan Velu created TEZ-2622:
---------------------------------------

             Summary: Tez gives different responses when run on Physical tables and logical
views
                 Key: TEZ-2622
                 URL: https://issues.apache.org/jira/browse/TEZ-2622
             Project: Apache Tez
          Issue Type: Bug
    Affects Versions: 0.7.0
         Environment: Hive 1.2.0 and Tez 0.7.0,
            Reporter: Soundararajan Velu
            Priority: Critical


The same query, one on view and other on the physical table yields different results, the
query on view returns no or few records output.

CBO is turned on and following are the flags used,


set hive.cli.print.current.db=true;
set hive.cli.print.header=true;

set hive.execution.engine=tez;
set mapreduce.job.queuename=admin;
set tez.queue.name=admin;

set hive.tez.container.size=5096;
set tez.task.resource.memory.mb=5096;

set hive.auto.convert.join=true;

set hive.auto.convert.sortmerge.join.to.mapjoin=true;
set hive.auto.convert.sortmerge.join=true;

set hive.enforce.bucketmapjoin=true;
set hive.enforce.bucketing=true;
set hive.enforce.sorting=true;
set hive.enforce.sortmergebucketmapjoin=true;

set hive.optimize.bucketmapjoin.sortedmerge=true; 
set hive.optimize.skewjoin=true;
set hive.optimize.skewjoin.compiletime=true;

set hive.groupby.skewindata=true;
set hive.convert.join.bucket.mapjoin.tez=true;
set hive.exec.parallel=true;
set hive.vectorized.execution.enabled=true;
set hive.vectorized.groupby.maxentries=10240;
set hive.vectorized.groupby.flush.percent=0.1;
set hive.tez.auto.reducer.parallelism=true;
set hive.tez.min.partition.factor=50;
set hive.tez.max.partition.factor=100;
set io.sort.mb=400;
set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;

set hive.hashtable.initialCapacity=1000000;
set hive.hashtable.key.count.adjustment=1.0;
set hive.hashtable.loadfactor=0.99;

set tez.runtime.io.sort.mb=1800;
set tez.runtime.sort.threads=4;
set tez.runtime.io.sort.factor=200;
set tez.runtime.shuffle.memory-to-memory.enable=false;
set tez.runtime.shuffle.memory-to-memory.segments=4;
set tez.runtime.pipelined-shuffle.enable=true;
set tez.runtime.optimize.shared.fetch=true;
set tez.runtime.shuffle.keep-alive.enabled=true;
set tez.runtime.optimize.local.fetch=false;

set hive.exec.reducers.max=300;

set hive.mapjoin.hybridgrace.hashtable=true;
set hive.mapjoin.hybridgrace.memcheckfrequency=1024;
set hive.mapjoin.optimized.hashtable=true;
set hive.mapjoin.optimized.hashtable.wbsize=880000;
set hive.mapjoin.localtask.max.memory.usage=0.99;

set hive.optimize.skewjoin.compiletime=false;
set hive.skewjoin.key=10000000;
set hive.skewjoin.mapjoin.map.tasks=200;
set hive.skewjoin.mapjoin.min.split=134217728;


set hive.compute.query.using.stats=true;



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message