drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Rahul Challapalli (JIRA)" <j...@apache.org>
Subject [jira] [Created] (DRILL-5131) Parquet Writer fails with heap space not available error on TPCDS 1TB data set
Date Wed, 14 Dec 2016 23:36:58 GMT
Rahul Challapalli created DRILL-5131:
----------------------------------------

             Summary: Parquet Writer fails with heap space not available error on TPCDS 1TB
data set
                 Key: DRILL-5131
                 URL: https://issues.apache.org/jira/browse/DRILL-5131
             Project: Apache Drill
          Issue Type: Bug
          Components: Storage - Parquet
    Affects Versions: 1.9.0
            Reporter: Rahul Challapalli


git.commit.id.abbrev=cf2b7c7

The below query fails with "Out of Heap Space" error and brings down the drillbit

{code}
create table store_sales as select
case when (columns[0]='') then cast(null as integer) else cast(columns[0] as integer) end
as ss_sold_date_sk,
case when (columns[1]='') then cast(null as integer) else cast(columns[1] as integer) end
as ss_sold_time_sk,
case when (columns[2]='') then cast(null as integer) else cast(columns[2] as integer) end
as ss_item_sk,
case when (columns[3]='') then cast(null as integer) else cast(columns[3] as integer) end
as ss_customer_sk,
case when (columns[4]='') then cast(null as integer) else cast(columns[4] as integer) end
as ss_cdemo_sk,
case when (columns[5]='') then cast(null as integer) else cast(columns[5] as integer) end
as ss_hdemo_sk,
case when (columns[6]='') then cast(null as integer) else cast(columns[6] as integer) end
as ss_addr_sk,
case when (columns[7]='') then cast(null as integer) else cast(columns[7] as integer) end
as ss_store_sk,
case when (columns[8]='') then cast(null as integer) else cast(columns[8] as integer) end
as ss_promo_sk,
case when (columns[9]='') then cast(null as integer) else cast(columns[9] as integer) end
as ss_ticket_number,
case when (columns[10]='') then cast(null as integer) else cast(columns[10] as integer) end
as ss_quantity,
case when (columns[11]='') then cast(null as decimal(7,2)) else cast(columns[11] as decimal(7,2))
end as ss_wholesale_cost,
case when (columns[12]='') then cast(null as decimal(7,2)) else cast(columns[12] as decimal(7,2))
end as ss_list_price,
case when (columns[13]='') then cast(null as decimal(7,2)) else cast(columns[13] as decimal(7,2))
end as ss_sales_price,
case when (columns[14]='') then cast(null as decimal(7,2)) else cast(columns[14] as decimal(7,2))
end as ss_ext_discount_amt,
case when (columns[15]='') then cast(null as decimal(7,2)) else cast(columns[15] as decimal(7,2))
end as ss_ext_sales_price,
case when (columns[16]='') then cast(null as decimal(7,2)) else cast(columns[16] as decimal(7,2))
end as ss_ext_wholesale_cost,
case when (columns[17]='') then cast(null as decimal(7,2)) else cast(columns[17] as decimal(7,2))
end as ss_ext_list_price,
case when (columns[18]='') then cast(null as decimal(7,2)) else cast(columns[18] as decimal(7,2))
end as ss_ext_tax,
case when (columns[19]='') then cast(null as decimal(7,2)) else cast(columns[19] as decimal(7,2))
end as ss_coupon_amt,
case when (columns[20]='') then cast(null as decimal(7,2)) else cast(columns[20] as decimal(7,2))
end as ss_net_paid,
case when (columns[21]='') then cast(null as decimal(7,2)) else cast(columns[21] as decimal(7,2))
end as ss_net_paid_inc_tax,
case when (columns[22]='') then cast(null as decimal(7,2)) else cast(columns[22] as decimal(7,2))
end as ss_net_profit
from dfs.`/drill/testdata/tpcds/text/sf1000/store_sales.dat`;
{code}

Exception from the logs
{code}
2016-12-14 14:23:49,303 [27ae4152-0fd4-aa0f-56db-a21e2f54d6c2:frag:1:14] ERROR o.a.drill.common.CatastrophicFailure
- Catastrophic Failure Occurred, exiting. Information message: Unable to handle out of memory
condition in FragmentExecutor.
java.lang.OutOfMemoryError: Java heap space
        at org.apache.parquet.bytes.CapacityByteArrayOutputStream.writeToOutput(CapacityByteArrayOutputStream.java:223)
~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.bytes.CapacityByteArrayOutputStream.writeTo(CapacityByteArrayOutputStream.java:239)
~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.bytes.BytesInput$CapacityBAOSBytesInput.writeAllTo(BytesInput.java:355)
~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.bytes.BytesInput$SequenceBytesIn.writeAllTo(BytesInput.java:266)
~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.bytes.BytesInput.toByteArray(BytesInput.java:174) ~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.bytes.BytesInput.toByteBuffer(BytesInput.java:185) ~[parquet-encoding-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.hadoop.DirectCodecFactory$SnappyCompressor.compress(DirectCodecFactory.java:291)
~[parquet-hadoop-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.hadoop.ColumnChunkPageWriteStore$ColumnChunkPageWriter.writePage(ColumnChunkPageWriteStore.java:94)
~[parquet-hadoop-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.column.impl.ColumnWriterV1.writePage(ColumnWriterV1.java:154)
~[parquet-column-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.column.impl.ColumnWriterV1.accountForValueWritten(ColumnWriterV1.java:115)
~[parquet-column-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.column.impl.ColumnWriterV1.write(ColumnWriterV1.java:227) ~[parquet-column-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.parquet.io.MessageColumnIO$MessageColumnIORecordConsumer.addInteger(MessageColumnIO.java:433)
~[parquet-column-1.8.1-drill-r0.jar:1.8.1-drill-r0]
        at org.apache.drill.exec.store.ParquetOutputRecordWriter$NullableIntParquetConverter.writeField(ParquetOutputRecordWriter.java:377)
~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.store.EventBasedRecordWriter.write(EventBasedRecordWriter.java:65)
~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.physical.impl.WriterRecordBatch.innerNext(WriterRecordBatch.java:106)
~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:162)
~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:215)
~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:104) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.physical.impl.SingleSenderCreator$SingleSenderRootExec.innerNext(SingleSenderCreator.java:92)
~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:94) ~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:232)
~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.exec.work.fragment.FragmentExecutor$1.run(FragmentExecutor.java:226)
~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at java.security.AccessController.doPrivileged(Native Method) ~[na:1.8.0_92]
        at javax.security.auth.Subject.doAs(Subject.java:422) ~[na:1.8.0_92]
        at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1595)
~[hadoop-common-2.7.0-mapr-1607.jar:na]
        at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:226)
~[drill-java-exec-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at org.apache.drill.common.SelfCleaningRunnable.run(SelfCleaningRunnable.java:38)
[drill-common-1.10.0-SNAPSHOT.jar:1.10.0-SNAPSHOT]
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
[na:1.8.0_92]
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
[na:1.8.0_92]
        at java.lang.Thread.run(Thread.java:745) [na:1.8.0_92]
{code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message