drill-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Amit Katti (JIRA)" <j...@apache.org>
Subject [jira] [Comment Edited] (DRILL-1058) Unable to read or write nested/repeated data in PARQUET format
Date Thu, 24 Jul 2014 18:14:39 GMT

    [ https://issues.apache.org/jira/browse/DRILL-1058?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14072297#comment-14072297
] 

Amit Katti edited comment on DRILL-1058 at 7/24/14 6:13 PM:
------------------------------------------------------------

When I try to write another Json file as a parquet table which has nested data in addition
to repeated data, it throws a different exception (NullPointerException). In both cases, Drill
is able to read the Json itself perfectly.

The JSON looks like:
{"rownum":1,"name":"fred ovid","age":76,"gpa":1.55,"studentnum":692315658449,"create_time":"2014-05-27
00:26:07", "interests": [ "Reading", "Mountain Biking", "Hacking" ], "favorites": {"color":
"Blue", "sport": "Soccer", "food": "Spaghetti"}}


QUERY: create table complex_student_tbl as select * from `/user/root/drill/complex_student.json`;

ERROR MESSAGE:
Query failed: org.apache.drill.exec.rpc.RpcException: Remote failure while running query.[error_id:
"e631faac-22de-41ab-a073-3a8354a1fda0"
endpoint {
  address: "perfnode154.perf.lab"
  user_port: 31010
  control_port: 31011
  data_port: 31012
}
error_type: 0
message: "Screen received stop request sent. < NullPointerException"
]
Error: exception while executing query (state=,code=0)

The Exception in drillbit.log is:
{code}
2014-07-23 17:28:05,254 [4121f743-b308-4af8-b356-cb7a3adc065e:frag:0:0] ERROR o.a.d.e.p.i.ScreenCreator$ScreenRoot
- Error 5cb4c5c6-1295-43bf-9fda-096afd97d940: Screen received stop request sent.
java.lang.NullPointerException: null
	at parquet.column.ParquetProperties.getValuesWriter(ParquetProperties.java:67) ~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at parquet.column.impl.ColumnWriterImpl.<init>(ColumnWriterImpl.java:82) ~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at parquet.column.impl.ColumnWriteStoreImpl.newMemColumn(ColumnWriteStoreImpl.java:63) ~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at parquet.column.impl.ColumnWriteStoreImpl.getColumnWriter(ColumnWriteStoreImpl.java:55)
~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at parquet.io.MessageColumnIO$MessageColumnIORecordConsumer.<init>(MessageColumnIO.java:124)
~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at parquet.io.MessageColumnIO.getRecordWriter(MessageColumnIO.java:315) ~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at org.apache.drill.exec.store.parquet.ParquetRecordWriter.newSchema(ParquetRecordWriter.java:130)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.store.parquet.ParquetRecordWriter.updateSchema(ParquetRecordWriter.java:102)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.WriterRecordBatch.setupNewSchema(WriterRecordBatch.java:158)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.WriterRecordBatch.innerNext(WriterRecordBatch.java:101)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:91) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:116)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:72) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:65) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext(AbstractSingleRecordBatch.java:45)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext(ProjectRecordBatch.java:95)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:91) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:116)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:58) [drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.ScreenCreator$ScreenRoot.innerNext(ScreenCreator.java:97)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:48) [drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:100) [drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.work.WorkManager$RunnableWrapper.run(WorkManager.java:242) [drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) [na:1.7.0_60]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) [na:1.7.0_60]
	at java.lang.Thread.run(Thread.java:745) [na:1.7.0_60]
{code}


was (Author: amitskatti):
When I try to do the same with a different Json which has nested data in addition to repeated
data, it throws a different exception (NullPointerException). In both cases, Drill is able
to read the Json itself perfectly.

The JSON looks like:
{"rownum":1,"name":"fred ovid","age":76,"gpa":1.55,"studentnum":692315658449,"create_time":"2014-05-27
00:26:07", "interests": [ "Reading", "Mountain Biking", "Hacking" ], "favorites": {"color":
"Blue", "sport": "Soccer", "food": "Spaghetti"}}


QUERY: create table complex_student_tbl as select * from `/user/root/drill/complex_student.json`;

ERROR MESSAGE:
Query failed: org.apache.drill.exec.rpc.RpcException: Remote failure while running query.[error_id:
"e631faac-22de-41ab-a073-3a8354a1fda0"
endpoint {
  address: "perfnode154.perf.lab"
  user_port: 31010
  control_port: 31011
  data_port: 31012
}
error_type: 0
message: "Screen received stop request sent. < NullPointerException"
]
Error: exception while executing query (state=,code=0)

The Exception in drillbit.log is:
{code}
2014-07-23 17:28:05,254 [4121f743-b308-4af8-b356-cb7a3adc065e:frag:0:0] ERROR o.a.d.e.p.i.ScreenCreator$ScreenRoot
- Error 5cb4c5c6-1295-43bf-9fda-096afd97d940: Screen received stop request sent.
java.lang.NullPointerException: null
	at parquet.column.ParquetProperties.getValuesWriter(ParquetProperties.java:67) ~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at parquet.column.impl.ColumnWriterImpl.<init>(ColumnWriterImpl.java:82) ~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at parquet.column.impl.ColumnWriteStoreImpl.newMemColumn(ColumnWriteStoreImpl.java:63) ~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at parquet.column.impl.ColumnWriteStoreImpl.getColumnWriter(ColumnWriteStoreImpl.java:55)
~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at parquet.io.MessageColumnIO$MessageColumnIORecordConsumer.<init>(MessageColumnIO.java:124)
~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at parquet.io.MessageColumnIO.getRecordWriter(MessageColumnIO.java:315) ~[parquet-column-1.5.0-20140513.004024-1.jar:na]
	at org.apache.drill.exec.store.parquet.ParquetRecordWriter.newSchema(ParquetRecordWriter.java:130)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.store.parquet.ParquetRecordWriter.updateSchema(ParquetRecordWriter.java:102)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.WriterRecordBatch.setupNewSchema(WriterRecordBatch.java:158)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.WriterRecordBatch.innerNext(WriterRecordBatch.java:101)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:91) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:116)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:72) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:65) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext(AbstractSingleRecordBatch.java:45)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext(ProjectRecordBatch.java:95)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:91) ~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.validate.IteratorValidatorBatchIterator.next(IteratorValidatorBatchIterator.java:116)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:58) [drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.ScreenCreator$ScreenRoot.innerNext(ScreenCreator.java:97)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:48) [drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:100) [drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at org.apache.drill.exec.work.WorkManager$RunnableWrapper.run(WorkManager.java:242) [drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) [na:1.7.0_60]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) [na:1.7.0_60]
	at java.lang.Thread.run(Thread.java:745) [na:1.7.0_60]
{code}

> Unable to read or write nested/repeated data in PARQUET format
> --------------------------------------------------------------
>
>                 Key: DRILL-1058
>                 URL: https://issues.apache.org/jira/browse/DRILL-1058
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Storage - Writer
>         Environment: CentOS release 6.5
>            Reporter: Amit Katti
>            Assignee: Parth Chandra
>
> I have a JSON file with nested data (schema present below):
> {
>    "rownum": 1,
>    "name": "fred ovid",
>    "age": 76,
>    "gpa": 1.55,
>    "studentnum": 692315658449,
>    "create_time": "2014-05-27 00:26:07",
>    "interests": [
>       "Reading",
>       "Mountain Biking",
>       "Hacking"
>    ]
> }
> I am able to read this JSON file successfully from drill and access nested values. However
when I try to import this data and create a table in PARQUET format, it errors:
> QUERY: create table test as select * from `/user/root/sample-data/nested_student.json`;
> ERROR: Query failed: org.apache.drill.exec.rpc.RpcException: Remote failure while running
query.[error_id: "3ce3dc1e-d920-4262-ae2d-28bd2d034597"
> endpoint {
>   address: "perfnode154.perf.lab"
>   user_port: 31010
>   control_port: 31011
>   data_port: 31012
> }
> error_type: 0
> message: "Failure while running fragment. < ParquetEncodingException:[ error starting
field interests at 6 ] < ClassCastException:[ parquet.io.PrimitiveColumnIO cannot be cast
to parquet.io.GroupColumnIO ]"
> ]
> Error: exception while executing query (state=,code=0)
> {code}
> 2014-06-24 00:41:18,646 [b10db58d-8d4d-4d02-9fb5-a5081e5cb254:frag:0:0] ERROR o.a.d.e.w.f.AbstractStatusReporter
- Error 48602de2-8306-47d2-875f-8ad2cd2e964a: Failure while running fragment.
> java.lang.ClassCastException: parquet.io.PrimitiveColumnIO cannot be cast to parquet.io.GroupColumnIO
>         at parquet.io.MessageColumnIO$MessageColumnIORecordConsumer.startField(MessageColumnIO.java:171)
~[parquet-column-1.5.0-20140513.004024-1.jar:na]
>         at org.apache.drill.exec.store.ParquetOutputRecordWriter.addRepeatedVarCharHolder(ParquetOutputRecordWriter.java:761)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.store.EventBasedRecordWriter$RepeatedVarCharFieldWriter.writeField(EventBasedRecordWriter.java:1156)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.store.EventBasedRecordWriter.write(EventBasedRecordWriter.java:150)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.physical.impl.WriterRecordBatch.innerNext(WriterRecordBatch.java:111)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:91)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:72)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:65)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.record.AbstractSingleRecordBatch.innerNext(AbstractSingleRecordBatch.java:45)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.physical.impl.project.ProjectRecordBatch.innerNext(ProjectRecordBatch.java:94)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.record.AbstractRecordBatch.next(AbstractRecordBatch.java:91)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:56)
~[drill-java-exec-1.0.0-m2-incubat
> ing-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.physical.impl.ScreenCreator$ScreenRoot.innerNext(ScreenCreator.java:85)
~[drill-java-exec-1.0.0-m2-incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.physical.impl.BaseRootExec.next(BaseRootExec.java:46)
~[drill-java-exec-1.0.0-m2-incubat
> ing-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
>         at org.apache.drill.exec.work.fragment.FragmentExecutor.run(FragmentExecutor.java:100)
~[drill-java-exec-1.0.0-m2
> -incubating-SNAPSHOT-rebuffed.jar:1.0.0-m2-incubating-SNAPSHOT]
> {code}



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Mime
View raw message