carbondata-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "SWATI RAO (JIRA)" <j...@apache.org>
Subject [jira] [Issue Comment Deleted] (CARBONDATA-864) After adding column using alter query, when we put any column in "Dictionary Exclude" then perform select query on that column then it will throws an exception.
Date Fri, 07 Apr 2017 07:48:41 GMT

     [ https://issues.apache.org/jira/browse/CARBONDATA-864?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

SWATI RAO updated CARBONDATA-864:
---------------------------------
    Comment: was deleted

(was: When we put string datatype in dictionary_exclude it will display "NULL" values when
perform select query.

CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp,
DOJ timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10),
DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1
int) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES ("TABLE_BLOCKSIZE"= "256 MB");



LOAD DATA INPATH 'HDFS_URL/BabuStore/Data/uniqdata/2000_UniqData.csv' into table uniqdata
OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1');LOAD
DATA INPATH 'HDFS_URL/BabuStore/Data/uniqdata/2000_UniqData.csv' into table uniqdata OPTIONS('DELIMITER'=','
, 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1');


ALTER TABLE uniqdata RENAME TO uniqdata1;


alter table uniqdata1 drop columns(ACTIVE_EMUI_VERSION);


alter table uniqdata1 add columns(ACTIVE_EMUI_VERSION string) TBLPROPERTIES('DICTIONARY_EXCLUDE'='ACTIVE_EMUI_VERSION',
'DEFAULT.VALUE.(ACTIVE_EMUI_VERSION'='abcd');


select distinct(ACTIVE_EMUI_VERSION) from uniqdata1 ;

Results:

0: jdbc:hive2://192.168.2.126:10000> alter table uniqdata1 drop columns(ACTIVE_EMUI_VERSION);
Error: java.lang.RuntimeException: Column active_emui_version does not exists in the table
default.uniqdata1 (state=,code=0)
0: jdbc:hive2://192.168.2.126:10000>
0: jdbc:hive2://192.168.2.126:10000> alter table uniqdata1 add columns(ACTIVE_EMUI_VERSION
string) TBLPROPERTIES('DICTIONARY_EXCLUDE'='ACTIVE_EMUI_VERSION', 'DEFAULT.VALUE.(ACTIVE_EMUI_VERSION'='abcd');
+---------+--+
| Result  |
+---------+--+
+---------+--+
No rows selected (0.308 seconds)
0: jdbc:hive2://192.168.2.126:10000> select distinct(ACTIVE_EMUI_VERSION) from uniqdata1
;
+----------------------+--+
| ACTIVE_EMUI_VERSION  |
+----------------------+--+
| NULL                 |
+----------------------+--+
1 row selected (0.88 seconds)
0: jdbc:hive2://192.168.2.126:10000>
)

> After adding column using alter query, when we put any column in "Dictionary Exclude"
then perform select query on that column then it will throws an exception.
> ----------------------------------------------------------------------------------------------------------------------------------------------------------------
>
>                 Key: CARBONDATA-864
>                 URL: https://issues.apache.org/jira/browse/CARBONDATA-864
>             Project: CarbonData
>          Issue Type: Bug
>    Affects Versions: 1.1.0-incubating
>         Environment: Spark2.1
>            Reporter: SWATI RAO
>         Attachments: 2000_UniqData.csv, error.png
>
>
> CREATE TABLE uniqdata (CUST_ID int,CUST_NAME String,ACTIVE_EMUI_VERSION string, DOB timestamp,
DOJ timestamp, BIGINT_COLUMN1 bigint,BIGINT_COLUMN2 bigint,DECIMAL_COLUMN1 decimal(30,10),
DECIMAL_COLUMN2 decimal(36,10),Double_COLUMN1 double, Double_COLUMN2 double,INTEGER_COLUMN1
int) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES ("TABLE_BLOCKSIZE"= "256 MB");
> LOAD DATA INPATH 'HDFS_URL/BabuStore/Data/uniqdata/2000_UniqData.csv' into table uniqdata
OPTIONS('DELIMITER'=',' , 'QUOTECHAR'='"','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'='CUST_ID,CUST_NAME,ACTIVE_EMUI_VERSION,DOB,DOJ,BIGINT_COLUMN1,BIGINT_COLUMN2,DECIMAL_COLUMN1,DECIMAL_COLUMN2,Double_COLUMN1,Double_COLUMN2,INTEGER_COLUMN1');
> ALTER TABLE uniqdata RENAME TO uniqdata1;
> alter table uniqdata1 add columns(dict int) TBLPROPERTIES('DICTIONARY_INCLUDE'='dict','DEFAULT.VALUE.dict'=
'9999');
> select distinct(dict) from uniqdata1 ;
> when we perform select query on "DICTIONARY_INCLUDE" it is working
> but when we perform select query on "DICTIONARY_EXCLUDE" then it will throws an exception:
> alter table uniqdata1 add columns(nodict string) TBLPROPERTIES('DICTIONARY_EXCLUDE'='nodict',
'DEFAULT.VALUE.NoDict'= 'abcd');
> select distinct(nodict) from uniqdata1 ;
> 0: jdbc:hive2://192.168.2.126:10000> select distinct(nodict) from uniqdata1 ;
> Error: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage
21.0 failed 1 times, most recent failure: Lost task 0.0 in stage 21.0 (TID 419, localhost,
executor driver): java.lang.ArrayIndexOutOfBoundsException: 4186
> 	at org.apache.spark.sql.execution.vectorized.OnHeapColumnVector.putByteArray(OnHeapColumnVector.java:401)
> 	at org.apache.spark.sql.execution.vectorized.ColumnVector.putByteArray(ColumnVector.java:569)
> 	at org.apache.carbondata.spark.vectorreader.ColumnarVectorWrapper.putBytes(ColumnarVectorWrapper.java:77)
> 	at org.apache.carbondata.spark.vectorreader.ColumnarVectorWrapper.putBytes(ColumnarVectorWrapper.java:83)
> 	at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.fillNoDictionaryData(RestructureBasedVectorResultCollector.java:167)
> 	at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.fillDataForNonExistingDimensions(RestructureBasedVectorResultCollector.java:122)
> 	at org.apache.carbondata.core.scan.collector.impl.RestructureBasedVectorResultCollector.collectVectorBatch(RestructureBasedVectorResultCollector.java:97)
> 	at org.apache.carbondata.core.scan.processor.impl.DataBlockIteratorImpl.processNextBatch(DataBlockIteratorImpl.java:65)
> 	at org.apache.carbondata.core.scan.result.iterator.VectorDetailQueryResultIterator.processNextBatch(VectorDetailQueryResultIterator.java:46)
> 	at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextBatch(VectorizedCarbonRecordReader.java:246)
> 	at org.apache.carbondata.spark.vectorreader.VectorizedCarbonRecordReader.nextKeyValue(VectorizedCarbonRecordReader.java:140)
> 	at org.apache.carbondata.spark.rdd.CarbonScanRDD$$anon$1.hasNext(CarbonScanRDD.scala:222)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.scan_nextBatch$(Unknown
Source)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.agg_doAggregateWithKeys$(Unknown
Source)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown
Source)
> 	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:377)
> 	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
> 	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:126)
> 	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
> 	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
> 	at org.apache.spark.scheduler.Task.run(Task.scala:99)
> 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Mime
View raw message