asterixdb-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Wenhai (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (ASTERIXDB-1544) Omit the fuzzyjoin on inverted index
Date Mon, 25 Jul 2016 04:27:20 GMT

     [ https://issues.apache.org/jira/browse/ASTERIXDB-1544?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Wenhai updated ASTERIXDB-1544:
------------------------------
    Description: 
In the current master, we have NO testCases covering the fuzzyjoin on the (inverted) indexed
field. Once we trigger a fuzzy join "~=" on a indexed field, we will always get a error with
following log.
Schema
{noformat}
drop dataverse fuzzyjointest if exists;
create dataverse fuzzyjointest;
use dataverse fuzzyjointest;

create type DBLPType as open {
  tid: uuid,
  id: int64,
  dblpid: string?,
  title: string?,
  authors: string?,
  misc: string?
}

create type CSXType as closed {
  tid: uuid,
  id: int64,
  csxid: string?,
  title: string?,
  authors: string?,
  misc: string?
}

create dataset DBLP(DBLPType) primary key tid autogenerated;

create dataset CSX(CSXType) primary key tid autogenerated;

load dataset DBLP
using localfs
(("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"))
pre-sorted;

load dataset CSX
using localfs
(("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));

use dataverse fuzzyjointest;
drop index DBLP.title_index if exists;
create index title_index on DBLP(title) type keyword;
drop index DBLP.author_index if exists;
create index author_index on DBLP(authors) type keyword;
drop index CSX.csx_author_index if exists;
create index csx_author_index on CSX(authors) type keyword;
{noformat}

Query
{noformat}
use dataverse fuzzyjointest;
set simthreshold '.7f'
for $o in dataset('DBLP')
for $t in dataset('CSX')
where word-tokens($o.authors) ~= word-tokens($t.authors)
return {"cid": $t.id, "did": $o.id}
{noformat}

Plan
{noformat}
distribute result [%0->$$9]
-- DISTRIBUTE_RESULT  |PARTITIONED|
  exchange 
  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
    project ([$$9])
    -- STREAM_PROJECT  |PARTITIONED|
      assign [$$9] <- [function-call: asterix:closed-record-constructor, Args:[AString:
{cid}, %0->$$18, AString: {did}, %0->$$19]]
      -- ASSIGN  |PARTITIONED|
        project ([$$18, $$19])
        -- STREAM_PROJECT  |PARTITIONED|
          exchange 
          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
            join (function-call: algebricks:eq, Args:[%0->$$22, %0->$$12])
            -- HYBRID_HASH_JOIN [$$22][$$12]  |PARTITIONED|
              exchange 
              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                project ([$$19, $$22])
                -- STREAM_PROJECT  |PARTITIONED|
                  assign [$$19] <- [function-call: asterix:field-access-by-index, Args:[%0->$$0,
AInt32: {1}]]
                  -- ASSIGN  |PARTITIONED|
                    exchange 
                    -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                      data-scan []<-[$$22, $$0] <- fuzzyjointest:DBLP
                      -- DATASOURCE_SCAN  |PARTITIONED|
                        exchange 
                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                          empty-tuple-source
                          -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
              exchange 
              -- HASH_PARTITION_EXCHANGE [$$12]  |PARTITIONED|
                project ([$$18, $$12])
                -- STREAM_PROJECT  |PARTITIONED|
                  select (function-call: asterix:get-item, Args:[function-call: asterix:similarity-jaccard-check,
Args:[%0->$$14, function-call: asterix:word-tokens, Args:[%0->$$17], AFloat: {0.7}],
AInt32: {0}])
                  -- STREAM_SELECT  |PARTITIONED|
                    project ([$$17, $$18, $$12, $$14])
                    -- STREAM_PROJECT  |PARTITIONED|
                      assign [$$18, $$17] <- [function-call: asterix:field-access-by-index,
Args:[%0->$$1, AInt32: {1}], function-call: asterix:field-access-by-index, Args:[%0->$$1,
AInt32: {4}]]
                      -- ASSIGN  |PARTITIONED|
                        project ([$$1, $$12, $$14])
                        -- STREAM_PROJECT  |PARTITIONED|
                          exchange 
                          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                            unnest-map [$$13, $$1] <- function-call: asterix:index-search,
Args:[AString: {CSX}, AInt32: {0}, AString: {fuzzyjointest}, AString: {CSX}, ABoolean: {true},
ABoolean: {false}, AInt32: {1}, %0->$$26, AInt32: {1}, %0->$$26, TRUE, TRUE, TRUE]
                            -- BTREE_SEARCH  |PARTITIONED|
                              exchange 
                              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                order (ASC, %0->$$26) 
                                -- STABLE_SORT [$$26(ASC)]  |PARTITIONED|
                                  exchange 
                                  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                    unnest-map [$$26] <- function-call: asterix:index-search,
Args:[AString: {csx_author_index}, AInt32: {4}, AString: {fuzzyjointest}, AString: {CSX},
ABoolean: {true}, ABoolean: {true}, AInt32: {1}, AFloat: {0.7}, AInt32: {25}, AInt32: {1},
%0->$$14]
                                    -- LENGTH_PARTITIONED_INVERTED_INDEX_SEARCH  |PARTITIONED|
                                      exchange 
                                      -- BROADCAST_EXCHANGE  |PARTITIONED|
                                        project ([$$12, $$14])
                                        -- STREAM_PROJECT  |PARTITIONED|
                                          assign [$$14] <- [function-call: asterix:word-tokens,
Args:[function-call: asterix:field-access-by-index, Args:[%0->$$25, AInt32: {4}]]]
                                          -- ASSIGN  |PARTITIONED|
                                            exchange 
                                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                              data-scan []<-[$$12, $$25] <- fuzzyjointest:DBLP
                                              -- DATASOURCE_SCAN  |PARTITIONED|
                                                exchange 
                                                -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
                                                  empty-tuple-source
                                                  -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
{noformat}

The same error occurs similar to issue-1487.
{noformat}
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
Jul 24, 2016 8:31:29 AM org.apache.asterix.api.http.servlet.APIServlet doPost
SEVERE: Job failed on account of:
HYR0002: null

org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
{noformat}

  was:
In the current master, we have NO testCases covering the fuzzyjoin on the (inverted) indexed
field. Once we trigger a fuzzy join "~=" on a indexed field, we will always get a error with
following log.
Schema
{noformat}
drop dataverse fuzzyjointest if exists;
create dataverse fuzzyjointest;
use dataverse fuzzyjointest;

create type DBLPType as open {
  tid: uuid,
  id: int64,
  dblpid: string?,
  title: string?,
  authors: string?,
  misc: string?
}

create type CSXType as closed {
  tid: uuid,
  id: int64,
  csxid: string?,
  title: string?,
  authors: string?,
  misc: string?
}

create dataset DBLP(DBLPType) primary key tid autogenerated;

create dataset CSX(CSXType) primary key tid autogenerated;

load dataset DBLP
using localfs
(("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"))
pre-sorted;

load dataset CSX
using localfs
(("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));

use dataverse fuzzyjointest;
drop index DBLP.title_index if exists;
create index title_index on DBLP(title) type keyword;
drop index DBLP.author_index if exists;
create index author_index on DBLP(authors) type keyword;
drop index CSX.csx_author_index if exists;
create index csx_author_index on CSX(authors) type keyword;
{noformat}

Query
{noformat}
use dataverse fuzzyjointest;
set simthreshold '.7f'
for $o in dataset('DBLP')
for $t in dataset('CSX')
where word-tokens($o.authors) ~= word-tokens($t.authors)
return {"cid": $t.id, "did": $o.id}
{noformat}

The same error occurs similar to issue-1487.
{noformat}
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
Jul 24, 2016 8:31:29 AM org.apache.asterix.api.http.servlet.APIServlet doPost
SEVERE: Job failed on account of:
HYR0002: null

org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
HYR0002: null

	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
	... 3 more
Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
	... 4 more
Caused by: java.lang.NullPointerException
	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
	... 5 more
{noformat}


> Omit the fuzzyjoin on inverted index
> ------------------------------------
>
>                 Key: ASTERIXDB-1544
>                 URL: https://issues.apache.org/jira/browse/ASTERIXDB-1544
>             Project: Apache AsterixDB
>          Issue Type: Bug
>         Environment: MAC/linux
>            Reporter: Wenhai
>            Assignee: Wenhai
>            Priority: Critical
>
> In the current master, we have NO testCases covering the fuzzyjoin on the (inverted)
indexed field. Once we trigger a fuzzy join "~=" on a indexed field, we will always get a
error with following log.
> Schema
> {noformat}
> drop dataverse fuzzyjointest if exists;
> create dataverse fuzzyjointest;
> use dataverse fuzzyjointest;
> create type DBLPType as open {
>   tid: uuid,
>   id: int64,
>   dblpid: string?,
>   title: string?,
>   authors: string?,
>   misc: string?
> }
> create type CSXType as closed {
>   tid: uuid,
>   id: int64,
>   csxid: string?,
>   title: string?,
>   authors: string?,
>   misc: string?
> }
> create dataset DBLP(DBLPType) primary key tid autogenerated;
> create dataset CSX(CSXType) primary key tid autogenerated;
> load dataset DBLP
> using localfs
> (("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/dblp-small/dblp-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"))
pre-sorted;
> load dataset CSX
> using localfs
> (("path"="127.0.0.1:///Users/michael/Research/asterixdb-src/asterixdb-fuzzy/asterixdb/asterixdb/asterix-app/data/pub-small/csx-small-multi-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
> use dataverse fuzzyjointest;
> drop index DBLP.title_index if exists;
> create index title_index on DBLP(title) type keyword;
> drop index DBLP.author_index if exists;
> create index author_index on DBLP(authors) type keyword;
> drop index CSX.csx_author_index if exists;
> create index csx_author_index on CSX(authors) type keyword;
> {noformat}
> Query
> {noformat}
> use dataverse fuzzyjointest;
> set simthreshold '.7f'
> for $o in dataset('DBLP')
> for $t in dataset('CSX')
> where word-tokens($o.authors) ~= word-tokens($t.authors)
> return {"cid": $t.id, "did": $o.id}
> {noformat}
> Plan
> {noformat}
> distribute result [%0->$$9]
> -- DISTRIBUTE_RESULT  |PARTITIONED|
>   exchange 
>   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>     project ([$$9])
>     -- STREAM_PROJECT  |PARTITIONED|
>       assign [$$9] <- [function-call: asterix:closed-record-constructor, Args:[AString:
{cid}, %0->$$18, AString: {did}, %0->$$19]]
>       -- ASSIGN  |PARTITIONED|
>         project ([$$18, $$19])
>         -- STREAM_PROJECT  |PARTITIONED|
>           exchange 
>           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>             join (function-call: algebricks:eq, Args:[%0->$$22, %0->$$12])
>             -- HYBRID_HASH_JOIN [$$22][$$12]  |PARTITIONED|
>               exchange 
>               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                 project ([$$19, $$22])
>                 -- STREAM_PROJECT  |PARTITIONED|
>                   assign [$$19] <- [function-call: asterix:field-access-by-index,
Args:[%0->$$0, AInt32: {1}]]
>                   -- ASSIGN  |PARTITIONED|
>                     exchange 
>                     -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                       data-scan []<-[$$22, $$0] <- fuzzyjointest:DBLP
>                       -- DATASOURCE_SCAN  |PARTITIONED|
>                         exchange 
>                         -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                           empty-tuple-source
>                           -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
>               exchange 
>               -- HASH_PARTITION_EXCHANGE [$$12]  |PARTITIONED|
>                 project ([$$18, $$12])
>                 -- STREAM_PROJECT  |PARTITIONED|
>                   select (function-call: asterix:get-item, Args:[function-call: asterix:similarity-jaccard-check,
Args:[%0->$$14, function-call: asterix:word-tokens, Args:[%0->$$17], AFloat: {0.7}],
AInt32: {0}])
>                   -- STREAM_SELECT  |PARTITIONED|
>                     project ([$$17, $$18, $$12, $$14])
>                     -- STREAM_PROJECT  |PARTITIONED|
>                       assign [$$18, $$17] <- [function-call: asterix:field-access-by-index,
Args:[%0->$$1, AInt32: {1}], function-call: asterix:field-access-by-index, Args:[%0->$$1,
AInt32: {4}]]
>                       -- ASSIGN  |PARTITIONED|
>                         project ([$$1, $$12, $$14])
>                         -- STREAM_PROJECT  |PARTITIONED|
>                           exchange 
>                           -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                             unnest-map [$$13, $$1] <- function-call: asterix:index-search,
Args:[AString: {CSX}, AInt32: {0}, AString: {fuzzyjointest}, AString: {CSX}, ABoolean: {true},
ABoolean: {false}, AInt32: {1}, %0->$$26, AInt32: {1}, %0->$$26, TRUE, TRUE, TRUE]
>                             -- BTREE_SEARCH  |PARTITIONED|
>                               exchange 
>                               -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                 order (ASC, %0->$$26) 
>                                 -- STABLE_SORT [$$26(ASC)]  |PARTITIONED|
>                                   exchange 
>                                   -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                     unnest-map [$$26] <- function-call: asterix:index-search,
Args:[AString: {csx_author_index}, AInt32: {4}, AString: {fuzzyjointest}, AString: {CSX},
ABoolean: {true}, ABoolean: {true}, AInt32: {1}, AFloat: {0.7}, AInt32: {25}, AInt32: {1},
%0->$$14]
>                                     -- LENGTH_PARTITIONED_INVERTED_INDEX_SEARCH  |PARTITIONED|
>                                       exchange 
>                                       -- BROADCAST_EXCHANGE  |PARTITIONED|
>                                         project ([$$12, $$14])
>                                         -- STREAM_PROJECT  |PARTITIONED|
>                                           assign [$$14] <- [function-call: asterix:word-tokens,
Args:[function-call: asterix:field-access-by-index, Args:[%0->$$25, AInt32: {4}]]]
>                                           -- ASSIGN  |PARTITIONED|
>                                             exchange 
>                                             -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                               data-scan []<-[$$12, $$25] <- fuzzyjointest:DBLP
>                                               -- DATASOURCE_SCAN  |PARTITIONED|
>                                                 exchange 
>                                                 -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
>                                                   empty-tuple-source
>                                                   -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
> {noformat}
> The same error occurs similar to issue-1487.
> {noformat}
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
> 	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
> 	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> 	at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
> 	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
> 	... 4 more
> Caused by: java.lang.NullPointerException
> 	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
> 	... 5 more
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
> 	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
> 	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> 	at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
> 	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
> 	... 4 more
> Caused by: java.lang.NullPointerException
> 	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
> 	... 5 more
> Jul 24, 2016 8:31:29 AM org.apache.asterix.api.http.servlet.APIServlet doPost
> SEVERE: Job failed on account of:
> HYR0002: null
> org.apache.hyracks.api.exceptions.HyracksException: Job failed on account of:
> HYR0002: null
> 	at org.apache.hyracks.control.cc.job.JobRun.waitForCompletion(JobRun.java:212)
> 	at org.apache.hyracks.control.cc.work.WaitForJobCompletionWork$1.run(WaitForJobCompletionWork.java:48)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> 	at java.lang.Thread.run(Thread.java:745)
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: HYR0002: null
> 	at org.apache.hyracks.control.common.utils.ExceptionUtils.setNodeIds(ExceptionUtils.java:62)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:319)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:365)
> 	at org.apache.hyracks.control.nc.Task.run(Task.java:297)
> 	... 3 more
> Caused by: org.apache.hyracks.api.exceptions.HyracksDataException: null
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:143)
> 	at org.apache.hyracks.control.nc.Task.pushFrames(Task.java:341)
> 	... 4 more
> Caused by: java.lang.NullPointerException
> 	at org.apache.hyracks.storage.am.lsm.invertedindex.dataflow.LSMInvertedIndexSearchOperatorNodePushable.createSearchPredicate(LSMInvertedIndexSearchOperatorNodePushable.java:56)
> 	at org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePushable.open(IndexSearchOperatorNodePushable.java:131)
> 	... 5 more
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message