asterixdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ima...@apache.org
Subject [39/58] [abbrv] [partial] incubator-asterixdb git commit: Added support of typed indexes over open fields & indexes over nested fields
Date Fri, 24 Apr 2015 18:43:05 GMT
http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-let.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-let.aql
new file mode 100644
index 0000000..4cd6fca
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-let.aql
@@ -0,0 +1,33 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on strings.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create index ngram_index on DBLP(nested.authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-let.adm";
+
+for $o in dataset('DBLP')
+let $ed := edit-distance-check($o.nested.authors, "Amihay Motro", 1)
+where $ed[0]
+return $o
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-substring.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-substring.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-substring.aql
new file mode 100644
index 0000000..4196d9c
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-substring.aql
@@ -0,0 +1,35 @@
+/*
+ * Description    : Tests whether an ngram_index index is applied to optimize a selection query using the similarity-edit-distance-check function on the substring of the field.
+ *                  Tests that the optimizer rule correctly drills through the substring function.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPNestedType as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested: DBLPNestedType
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create index ngram_index on DBLP(nested.title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-substring.adm";
+
+for $paper in dataset('DBLP')
+where edit-distance-check(substring($paper.nested.title, 0, 8), "datbase", 1)[0]
+return {
+  "id" : $paper.nested.id,
+  "title" : $paper.nested.title
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-word-tokens.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-word-tokens.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-word-tokens.aql
new file mode 100644
index 0000000..9d268c4
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-edit-distance-check-word-tokens.aql
@@ -0,0 +1,37 @@
+/*
+ * Description    : Tests whether an ngram_index index is applied to optimize a selection query using the similarity-edit-distance-check function on individual word tokens.
+ *                  Tests that the optimizer rule correctly drills through the word-tokens function and existential query.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPNestedType as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested: DBLPNestedType
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create index ngram_index on DBLP(nested.title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-edit-distance-check-word-tokens.adm";
+
+for $paper in dataset('DBLP')
+for $word in word-tokens($paper.nested.title)
+where edit-distance-check($word, "Multmedia", 1)[0]
+distinct by $paper.nested.id
+return {
+  "id" : $paper.nested.id,
+  "title" : $paper.nested.title
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-jaccard-check-let.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-jaccard-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-jaccard-check-let.aql
new file mode 100644
index 0000000..fd14b17
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-jaccard-check-let.aql
@@ -0,0 +1,34 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+set import-private-functions 'true';
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create index ngram_index on DBLP(nested.title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-jaccard-check-let.adm";
+
+for $o in dataset('DBLP')
+let $jacc := similarity-jaccard-check(gram-tokens($o.nested.title, 3, false), gram-tokens("Transactions for Cooperative Environments", 3, false), 0.5f)
+where $jacc[0]
+return $o

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-jaccard-check-multi-let.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-jaccard-check-multi-let.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-jaccard-check-multi-let.aql
new file mode 100644
index 0000000..a9fb66d
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ngram-jaccard-check-multi-let.aql
@@ -0,0 +1,37 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the similarity-jaccard-check function on 3-gram tokens.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+set import-private-functions 'true';
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create index ngram_index on DBLP(nested.title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-complex_ngram-jaccard-check-multi-let.adm";
+
+// This test is complex because we have three assigns to drill into.
+for $paper in dataset('DBLP')
+let $paper_tokens := gram-tokens($paper.nested.title, 3, false)
+let $query_tokens := gram-tokens("Transactions for Cooperative Environments", 3, false)
+let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.5f)
+where $jacc[0]
+return {"Paper": $paper_tokens, "Query": $query_tokens }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-edit-distance-check-let-panic.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-edit-distance-check-let-panic.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-edit-distance-check-let-panic.aql
new file mode 100644
index 0000000..21cb93e
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-edit-distance-check-let-panic.aql
@@ -0,0 +1,43 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on lists.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should *not* be applied (see below).
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_olist-edit-distance-check-let-panic.adm";
+
+for $c in dataset('Customers')
+let $ed := edit-distance-check($c.nested.interests, ["computers", "wine", "walking"], 3)
+where $ed[0]
+order by $c.nested.cid
+return $c

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-edit-distance-check-let.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-edit-distance-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-edit-distance-check-let.aql
new file mode 100644
index 0000000..eaec66a
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-edit-distance-check-let.aql
@@ -0,0 +1,45 @@
+/*
+ * Description    : Tests whether an ngram_index is applied to optimize a selection query using the edit-distance-check function on lists.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_olist-edit-distance-check-let.adm";
+
+for $c in dataset('Customers')
+let $ed := edit-distance-check($c.nested.interests, ["computers", "wine", "walking"], 1)
+where $ed[0]
+order by $c.nested.cid
+return $c

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-jaccard-check-let.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-jaccard-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-jaccard-check-let.aql
new file mode 100644
index 0000000..3d023fa
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/olist-jaccard-check-let.aql
@@ -0,0 +1,41 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on lists.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_olist-jaccard-check-let.adm";
+
+for $c in dataset('Customers')
+let $jacc := similarity-jaccard-check($c.nested.interests, ["databases", "computers", "wine"], 0.7f)
+where $jacc[0]
+return $c

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ulist-jaccard-check-let.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ulist-jaccard-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ulist-jaccard-check-let.aql
new file mode 100644
index 0000000..d79eef3
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/ulist-jaccard-check-let.aql
@@ -0,0 +1,42 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on lists.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_ulist-jaccard-check-let.adm";
+
+for $c in dataset('Customers')
+let $jacc := similarity-jaccard-check($c.nested.interests, ["databases", "computers", "wine"], 0.7f)
+where $jacc[0]
+return $c

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/word-jaccard-check-let.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/word-jaccard-check-let.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/word-jaccard-check-let.aql
new file mode 100644
index 0000000..ae8c060
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/word-jaccard-check-let.aql
@@ -0,0 +1,34 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create index keyword_index on DBLP(nested.title) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_word-jaccard-check-let.adm";
+
+for $o in dataset('DBLP')
+let $jacc := similarity-jaccard-check(word-tokens($o.nested.title), word-tokens("Transactions for Cooperative Environments"), 0.5f)
+where $jacc[0]
+return $o
+

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/word-jaccard-check-multi-let.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/word-jaccard-check-multi-let.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/word-jaccard-check-multi-let.aql
new file mode 100644
index 0000000..d7833e4
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-complex/word-jaccard-check-multi-let.aql
@@ -0,0 +1,36 @@
+/*
+ * Description    : Tests whether a keyword index is applied to optimize a selection query using the similarity-jaccard-check function on word tokens.
+ *                  Tests that the optimizer rule correctly drills through the let clauses.
+ *                  The index should be applied.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create index keyword_index on DBLP(nested.title) type keyword;
+
+write output to nc1:"rttest/inverted-index-complex_word-jaccard-check-multi-let.adm";
+
+// This test is complex because we have three assigns to drill into.
+for $paper in dataset('DBLP')
+let $paper_tokens := word-tokens($paper.nested.title)
+let $query_tokens := word-tokens("Transactions for Cooperative Environments")
+let $jacc := similarity-jaccard-check($paper_tokens, $query_tokens, 0.8f)
+where $jacc[0]
+return {"Paper": $paper_tokens, "Query": $query_tokens }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql
new file mode 100644
index 0000000..a1d4bed
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-edit-distance-check-idx_01.aql
@@ -0,0 +1,54 @@
+/*
+ * Description  : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree.
+ * Issue        : 730, 741                
+ * Expected Res : Success
+ * Date         : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+	screen-name: string,
+	lang: string,
+	friends-count: int32,
+	statuses-count: int32,
+	name: string,
+	followers-count: int32
+}
+
+create type TweetMessageNestedType as closed {
+	tweetid: int64,
+        user: TwitterUserType,
+        sender-location: point,
+	send-time: datetime,
+        referred-topics: {{ string }},
+	message-text: string,
+	countA: int32,
+	countB: int32
+}
+
+create type TweetMessageType as closed {
+	nested: TweetMessageNestedType
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key nested.tweetid;
+
+create index msgNgramIx on TweetMessages(nested.message-text) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-edit-distance-check_idx_01.adm";
+
+for $t1 in dataset('TweetMessages')
+where $t1.nested.tweetid > int64("240")
+order by $t1.nested.tweetid
+return {
+    "tweet": {"id": $t1.nested.tweetid, "topics" : $t1.nested.message-text} ,            
+    "similar-tweets": for $t2 in dataset('TweetMessages')
+                      let $sim := edit-distance-check($t1.nested.message-text, $t2.nested.message-text, 7)
+		      where $sim[0] and
+                      $t2.nested.tweetid != $t1.nested.tweetid
+                      order by $t2.nested.tweetid
+                      return {"id": $t2.nested.tweetid, "topics" : $t2.nested.message-text}
+};
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql
new file mode 100644
index 0000000..53d2f98
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/leftouterjoin-probe-pidx-with-join-jaccard-check-idx_01.aql
@@ -0,0 +1,54 @@
+/*
+ * Description  : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree.
+ * Issue        : 730, 741                
+ * Expected Res : Success
+ * Date         : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+	screen-name: string,
+	lang: string,
+	friends-count: int32,
+	statuses-count: int32,
+	name: string,
+	followers-count: int32
+}
+
+create type TweetMessageNestedType as closed {
+	tweetid: int64,
+        user: TwitterUserType,
+        sender-location: point,
+	send-time: datetime,
+        referred-topics: {{ string }},
+	message-text: string,
+	countA: int32,
+	countB: int32
+}
+
+create type TweetMessageType as closed {
+	nested: TweetMessageNestedType
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key nested.tweetid;
+
+create index topicKeywordIx on TweetMessages(nested.referred-topics) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_leftouterjoin-probe-pidx-with-join-jaccard-check_idx_01.adm";
+
+for $t1 in dataset('TweetMessages')
+where $t1.nested.tweetid > int64("240")
+order by $t1.nested.tweetid
+return {
+    "tweet": {"id": $t1.nested.tweetid, "topics" : $t1.nested.referred-topics} ,            
+    "similar-tweets": for $t2 in dataset('TweetMessages')
+                      let $sim := similarity-jaccard-check($t1.nested.referred-topics, $t2.nested.referred-topics, 0.5f)
+		      where $sim[0] and
+                      $t2.nested.tweetid != $t1.nested.tweetid
+                      order by $t2.nested.tweetid
+                      return {"id": $t2.nested.tweetid, "topics" : $t2.nested.referred-topics}
+};
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-check_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-check_01.aql
new file mode 100644
index 0000000..8201544
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-check_01.aql
@@ -0,0 +1,46 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-check function of their authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXTypetmp as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create type CSXType as closed {
+  nested : CSXTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create dataset CSX(CSXType) primary key nested.id;
+
+create index ngram_index on DBLP(nested.authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance-check_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where edit-distance-check($a.nested.authors, $b.nested.authors, 3)[0] and $a.nested.id < $b.nested.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-contains.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-contains.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-contains.aql
new file mode 100644
index 0000000..cac970e
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-contains.aql
@@ -0,0 +1,46 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance-contains function of their authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXTypetmp as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create type CSXType as closed {
+  nested : CSXTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create dataset CSX(CSXType) primary key nested.id;
+
+create index ngram_index on DBLP(nested.authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance-contains.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where edit-distance-contains($a.nested.authors, $b.nested.authors, 3)[0] and $a.nested.id < $b.nested.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-inline.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-inline.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-inline.aql
new file mode 100644
index 0000000..a2f6e99
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance-inline.aql
@@ -0,0 +1,35 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on the edit-distance function of its authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ *                  We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index.
+ *                  We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create index ngram_index on DBLP(nested.authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join-noeqjoin_ngram-edit-distance-inline.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+let $ed := edit-distance($a.nested.authors, $b.nested.authors)
+where $ed < 3 and $a.nested.id < $b.nested.id
+return {"aauthors": $a.nested.authors, "bauthors": $b.nested.authors, "ed": $ed}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance_01.aql
new file mode 100644
index 0000000..d235cf7
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-edit-distance_01.aql
@@ -0,0 +1,46 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the edit-distance function of their authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXTypetmp as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create type CSXType as closed {
+  nested : CSXTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create dataset CSX(CSXType) primary key nested.id;
+
+create index ngram_index on DBLP(nested.authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-edit-distance_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where edit-distance($a.nested.authors, $b.nested.authors) < 3 and $a.nested.id < $b.nested.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql
new file mode 100644
index 0000000..d703b0b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-fuzzyeq-edit-distance_01.aql
@@ -0,0 +1,49 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on ~= using edit distance of their authors.
+ *                  DBLP has a 3-gram index on authors, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXTypetmp as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create type CSXType as closed {
+  nested : CSXTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create dataset CSX(CSXType) primary key nested.id;
+
+create index ngram_index on CSX(nested.authors) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-fuzzyeq-edit-distance_01.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '3';
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where $a.nested.authors ~= $b.nested.authors and $a.nested.id < $b.nested.id
+return {"arec": $a, "brec": $b }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql
new file mode 100644
index 0000000..6da2548
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-fuzzyeq-jaccard_01.aql
@@ -0,0 +1,50 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' 3-gram tokens.
+ *                  DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+set import-private-functions 'true';
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXTypetmp as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create type CSXType as closed {
+  nested : CSXTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create dataset CSX(CSXType) primary key nested.id;
+
+create index ngram_index on CSX(nested.title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-fuzzyeq-jaccard_01.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where gram-tokens($a.nested.title, 3, false) ~= gram-tokens($b.nested.title, 3, false) and $a.nested.id < $b.nested.id
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard-check_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard-check_01.aql
new file mode 100644
index 0000000..194ea9e
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard-check_01.aql
@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' 3-gram tokens.
+ *                  DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+set import-private-functions 'true';
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXTypetmp as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create type CSXType as closed {
+  nested : CSXTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create dataset CSX(CSXType) primary key nested.id;
+
+create index ngram_index on CSX(nested.title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-jaccard-check_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard-check(gram-tokens($a.nested.title, 3, false), gram-tokens($b.nested.title, 3, false), 0.5f)[0]
+      and $a.nested.id < $b.nested.id
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard-inline.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard-inline.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard-inline.aql
new file mode 100644
index 0000000..44e8238
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard-inline.aql
@@ -0,0 +1,36 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' 3-gram tokens.
+ *                  DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ *                  We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index.
+ *                  We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+set import-private-functions 'true';
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create index ngram_index on DBLP(nested.title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join-noeqjoin_ngram-jaccard-inline.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+let $jacc := similarity-jaccard(gram-tokens($a.nested.title, 3, false), gram-tokens($b.nested.title, 3, false))
+where $jacc >= 0.5f and $a.nested.id < $b.nested.id
+return {"atitle": $a.nested.title, "btitle": $b.nested.title, "jacc": $jacc}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard_01.aql
new file mode 100644
index 0000000..847058b
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ngram-jaccard_01.aql
@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' 3-gram tokens.
+ *                  DBLP has a 3-gram index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+set import-private-functions 'true';
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXTypetmp as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create type CSXType as closed {
+  nested : CSXTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create dataset CSX(CSXType) primary key nested.id;
+
+create index ngram_index on CSX(nested.title) type ngram(3);
+
+write output to nc1:"rttest/inverted-index-join_ngram-jaccard_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard(gram-tokens($a.nested.title, 3, false), gram-tokens($b.nested.title, 3, false)) >= 0.5f
+      and $a.nested.id < $b.nested.id
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance-check_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance-check_01.aql
new file mode 100644
index 0000000..fc13651
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance-check_01.aql
@@ -0,0 +1,43 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the edit-distance-check function of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-edit-distance-check_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where edit-distance-check($a.nested.interests, $b.nested.interests, 3)[0] and $a.nested.cid < $b.nested.cid
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance-inline.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance-inline.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance-inline.aql
new file mode 100644
index 0000000..3ce983d
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance-inline.aql
@@ -0,0 +1,46 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on the edit-distance function of its interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ *                  We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index.
+ *                  We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join-noeqjoin_olist-edit-distance-inline.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+let $ed := edit-distance($a.nested.interests, $b.nested.interests)
+where $ed <= 2 and $a.nested.cid < $b.nested.cid
+return {"ainterests": $a.nested.interests, "binterests": $b.nested.interests, "ed": $ed}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance_01.aql
new file mode 100644
index 0000000..36b28f0
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-edit-distance_01.aql
@@ -0,0 +1,42 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the edit-distance function of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-edit-distance_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where edit-distance($a.nested.interests, $b.nested.interests) <= 2 and $a.nested.cid < $b.nested.cid
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql
new file mode 100644
index 0000000..873392e
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-fuzzyeq-edit-distance_01.aql
@@ -0,0 +1,44 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on ~= using edit distance of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-fuzzyeq-jaccard_01.adm";
+
+set simfunction 'edit-distance';
+set simthreshold '3';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where $a.nested.interests ~= $b.nested.interests and $a.nested.cid < $b.nested.cid
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-fuzzyeq-jaccard_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-fuzzyeq-jaccard_01.aql
new file mode 100644
index 0000000..fc6fc91
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-fuzzyeq-jaccard_01.aql
@@ -0,0 +1,44 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on ~= using Jaccard of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-fuzzyeq-jaccard_01.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.7f';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where $a.nested.interests /*+ indexnl */ ~= $b.nested.interests and $a.nested.cid < $b.nested.cid
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard-check_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard-check_01.aql
new file mode 100644
index 0000000..e4e1084
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard-check_01.aql
@@ -0,0 +1,41 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard-check function of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-jaccard-check_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where /*+ indexnl */ similarity-jaccard-check($a.nested.interests, $b.nested.interests, 0.7f)[0] and $a.nested.cid < $b.nested.cid
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard-inline.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard-inline.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard-inline.aql
new file mode 100644
index 0000000..6cd2604
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard-inline.aql
@@ -0,0 +1,44 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ *                  We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index.
+ *                  We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join-noeqjoin_olist-jaccard-inline.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+let $jacc := /*+ indexnl */ similarity-jaccard($a.nested.interests, $b.nested.interests)
+where $jacc >= 0.7f and $a.nested.cid < $b.nested.cid
+return {"ainterests": $a.nested.interests, "binterests": $b.nested.interests, "jacc": $jacc }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard_01.aql
new file mode 100644
index 0000000..ef1ef34
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/olist-jaccard_01.aql
@@ -0,0 +1,41 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard function of their interest lists.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: [string],
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_olist-jaccard_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where /*+ indexnl */ similarity-jaccard($a.nested.interests, $b.nested.interests) >= 0.7f and $a.nested.cid < $b.nested.cid
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql
new file mode 100644
index 0000000..7f69984
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-fuzzyeq-jaccard_01.aql
@@ -0,0 +1,44 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on ~= using Jaccard of their interest sets.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-fuzzyeq-jaccard_01.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.7f';
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where $a.nested.interests /*+ indexnl */ ~= $b.nested.interests and $a.nested.cid < $b.nested.cid
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard-check_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard-check_01.aql
new file mode 100644
index 0000000..2c11ac5
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard-check_01.aql
@@ -0,0 +1,41 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard-check function of their interest sets.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-jaccard-check_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where /*+ indexnl */ similarity-jaccard-check($a.nested.interests, $b.nested.interests, 0.7f)[0] and $a.nested.cid < $b.nested.cid
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard-inline.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard-inline.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard-inline.aql
new file mode 100644
index 0000000..c23e4d7
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard-inline.aql
@@ -0,0 +1,44 @@
+/*
+ * Description    : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest sets.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ *                  We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index.
+ *                  We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join-noeqjoin_ulist-jaccard-inline.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers')
+let $jacc := /*+ indexnl */ similarity-jaccard($a.nested.interests, $b.nested.interests)
+where $jacc >= 0.7f and $a.nested.cid < $b.nested.cid
+return {"ainterests": $a.nested.interests, "binterests": $b.nested.interests, "jacc": $jacc}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard_01.aql
new file mode 100644
index 0000000..842235f
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/ulist-jaccard_01.aql
@@ -0,0 +1,41 @@
+/*
+ * Description    : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard function of their interest sets.
+ *                  Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type AddressType as closed {
+  number: int32,
+  street: string,
+  city: string
+}
+
+create type CustomerTypetmp as closed {
+  cid: int32,
+  name: string,
+  age: int32?,
+  address: AddressType?,
+  interests: {{string}},
+  children: [ { name: string, age: int32? } ]
+}
+
+create type CustomerType as closed {
+  nested : CustomerTypetmp
+}
+
+create dataset Customers(CustomerType) primary key nested.cid;
+ 
+create dataset Customers2(CustomerType) primary key nested.cid;
+
+create index interests_index on Customers(nested.interests) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_ulist-jaccard_01.adm";
+
+for $a in dataset('Customers')
+for $b in dataset('Customers2')
+where /*+ indexnl */ similarity-jaccard($a.nested.interests, $b.nested.interests) >= 0.7f and $a.nested.cid < $b.nested.cid
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-fuzzyeq-jaccard_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-fuzzyeq-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-fuzzyeq-jaccard_01.aql
new file mode 100644
index 0000000..8acb4c3
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-fuzzyeq-jaccard_01.aql
@@ -0,0 +1,49 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' word tokens.
+ *                  DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXTypetmp as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create type CSXType as closed {
+  nested : CSXTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create dataset CSX(CSXType) primary key nested.id;
+
+create index keyword_index on DBLP(nested.title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_01.adm";
+
+set simfunction 'jaccard';
+set simthreshold '0.5f';
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where word-tokens($a.nested.title) ~= word-tokens($b.nested.title) and $a.nested.id < $b.nested.id
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-check-after-btree-access.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-check-after-btree-access.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-check-after-btree-access.aql
new file mode 100644
index 0000000..bf818ad
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-check-after-btree-access.aql
@@ -0,0 +1,55 @@
+/*
+ * Description    : Fuzzy self joins a dataset, TweetMessages, based on the similarity-jaccard-check function of its text-messages' word tokens.
+ *                  TweetMessages has a keyword index on text-message and btree index on the primary key tweetid, and we expect the join to be
+ *					transformed into btree and inverted indexed nested-loop joins. We test whether the join condition can be transformed into
+ *					multiple indexed nested loop joins of various type of indexes.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+	screen-name: string,
+	lang: string,
+	friends-count: int32,
+	statuses-count: int32,
+	name: string,
+	followers-count: int32
+}
+
+create type TweetMessageNestedType as closed {
+	tweetid: int64,
+	user: TwitterUserType,
+	sender-location: point,
+	send-time: datetime,
+	referred-topics: {{ string }},
+	message-text: string,
+	countA: int32,
+	countB: int32
+}
+
+create type TweetMessageType as closed {
+	nested: TweetMessageNestedType
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key nested.tweetid;
+
+create index twmSndLocIx on TweetMessages(nested.sender-location) type rtree;
+create index msgCountAIx on TweetMessages(nested.countA) type btree;
+create index msgCountBIx on TweetMessages(nested.countB) type btree;
+create index msgTextIx on TweetMessages(nested.message-text) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-jaccard-check-after-btree-access.adm";
+
+for $t1 in dataset('TweetMessages')
+for $t2 in dataset('TweetMessages')
+let $sim := similarity-jaccard-check(word-tokens($t1.nested.message-text), word-tokens($t2.nested.message-text), 0.6f)
+where $sim[0] and $t1.nested.tweetid < int64("20") and $t2.nested.tweetid != $t1.nested.tweetid
+return {
+    "t1": $t1.nested.tweetid,
+    "t2": $t2.nested.tweetid,
+    "sim": $sim[1]
+}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-check_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-check_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-check_01.aql
new file mode 100644
index 0000000..c01b729
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-check_01.aql
@@ -0,0 +1,47 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' word tokens.
+ *                  DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXTypetmp as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create type CSXType as closed {
+  nested : CSXTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create dataset CSX(CSXType) primary key nested.id;
+
+create index keyword_index on DBLP(nested.title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-jaccard-check_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard-check(word-tokens($a.nested.title), word-tokens($b.nested.title), 0.5f)[0]
+      and $a.nested.id < $b.nested.id
+return {"arec": $a, "brec": $b }

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-inline.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-inline.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-inline.aql
new file mode 100644
index 0000000..4fe9784
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard-inline.aql
@@ -0,0 +1,35 @@
+/*
+ * Description    : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' word tokens.
+ *                  DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ *                  We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index.
+ *                  We expect the top-level equi join introduced because of surrogate optimization to be removed, since it is not necessary.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create index keyword_index on DBLP(nested.title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join-noeqjoin_word-jaccard-inline.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('DBLP')
+let $jacc := similarity-jaccard(word-tokens($a.nested.title), word-tokens($b.nested.title))
+where $jacc >= 0.5f and $a.nested.id < $b.nested.id
+return {"atitle": $a.nested.title, "btitle": $b.nested.title, "jacc": $jacc}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard_01.aql
new file mode 100644
index 0000000..d65bdb6
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/inverted-index-join/word-jaccard_01.aql
@@ -0,0 +1,48 @@
+/*
+ * Description    : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' word tokens.
+ *                  DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type DBLPTypetmp as closed {
+  id: int32,
+  dblpid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type CSXTypetmp as closed {
+  id: int32,
+  csxid: string,
+  title: string,
+  authors: string,
+  misc: string
+}
+
+create type DBLPType as closed {
+  nested : DBLPTypetmp
+}
+
+create type CSXType as closed {
+  nested : CSXTypetmp
+}
+
+create dataset DBLP(DBLPType) primary key nested.id;
+
+create dataset CSX(CSXType) primary key nested.id;
+
+create index keyword_index on DBLP(nested.title) type keyword;
+
+write output to nc1:"rttest/inverted-index-join_word-jaccard_01.adm";
+
+for $a in dataset('DBLP')
+for $b in dataset('CSX')
+where similarity-jaccard(word-tokens($a.nested.title), word-tokens($b.nested.title)) >= 0.5f
+      and $a.nested.id < $b.nested.id
+return {"arec": $a, "brec": $b }
+

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_01.aql
new file mode 100644
index 0000000..19b66f5
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_01.aql
@@ -0,0 +1,57 @@
+/*
+ * Description  : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree.
+ * Issue        : 730, 741                
+ * Expected Res : Success
+ * Date         : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+	screen-name: string,
+	lang: string,
+	friends-count: int32,
+	statuses-count: int32,
+	name: string,
+	followers-count: int32
+}
+
+create type TweetMessageNestedType as open {
+	tweetid: int64,
+        user: TwitterUserType,
+        sender-location: point,
+	send-time: datetime,
+        referred-topics: {{ string }},
+	message-text: string,
+	countA: int32,
+	countB: int32
+}
+
+create type TweetMessageType as open {
+	nested: TweetMessageNestedType
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key nested.tweetid;
+
+create index twmSndLocIx on TweetMessages(nested.sender-location) type rtree;
+create index msgCountAIx on TweetMessages(nested.countA) type btree;
+create index msgCountBIx on TweetMessages(nested.countB) type btree;
+create index msgTextIx on TweetMessages(nested.message-text) type keyword;
+
+write output to nc1:"rttest/rtree-index-join_leftouterjoin-probe-pidx-with-join-rtree-sidx_01.adm";
+
+for $t1 in dataset('TweetMessages')
+let $n :=  create-circle($t1.nested.sender-location, 0.5)
+where $t1.nested.tweetid < int64("10")
+order by $t1.nested.tweetid
+return {
+"tweetid1": $t1.nested.tweetid,
+"loc1":$t1.nested.sender-location,
+"nearby-message": for $t2 in dataset('TweetMessages')
+                             where spatial-intersect($t2.nested.sender-location, $n)
+                             order by $t2.tweetid
+                             return {"tweetid2":$t2.nested.tweetid, "loc2":$t2.nested.sender-location}
+};
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_02.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_02.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_02.aql
new file mode 100644
index 0000000..88f008c
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/leftouterjoin-probe-pidx-with-join-rtree-sidx_02.aql
@@ -0,0 +1,57 @@
+/*
+ * Description  : Test that left-outer-join may use two available indexes, one for primary index in prob subtree and another for secondary rtree index in index subtree.
+ * Issue        : 730, 741                
+ * Expected Res : Success
+ * Date         : 8th May 2014
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type TwitterUserType as closed {
+	screen-name: string,
+	lang: string,
+	friends-count: int32,
+	statuses-count: int32,
+	name: string,
+	followers-count: int32
+}
+
+create type TweetMessageNestedType as closed {
+	tweetid: int64,
+        user: TwitterUserType,
+        sender-location: point,
+	send-time: datetime,
+        referred-topics: {{ string }},
+	message-text: string,
+	countA: int32,
+	countB: int32
+}
+
+create type TweetMessageType as open {
+	nested: TweetMessageNestedType
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key nested.tweetid;
+
+create index twmSndLocIx on TweetMessages(nested.sender-location) type rtree;
+create index msgCountAIx on TweetMessages(nested.countA) type btree;
+create index msgCountBIx on TweetMessages(nested.countB) type btree;
+create index msgTextIx on TweetMessages(nested.message-text) type keyword;
+
+write output to nc1:"rttest/rtree-index-join_leftouterjoin-probe-pidx-with-join-rtree-sidx_02.adm";
+
+for $t1 in dataset('TweetMessages')
+let $n :=  create-circle($t1.nested.sender-location, 0.5)
+where $t1.nested.tweetid < int64("10")
+order by $t1.nested.tweetid
+return {
+"tweetid1": $t1.nested.tweetid,
+"loc1":$t1.nested.sender-location,
+"nearby-message": for $t2 in dataset('TweetMessages')
+                             where spatial-intersect($t2.nested.sender-location, $n) and $t1.nested.tweetid != $t2.nested.tweetid
+                             order by $t2.nested.tweetid
+                             return {"tweetid2":$t2.nested.tweetid, "loc2":$t2.nested.sender-location}
+};
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_01.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_01.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_01.aql
new file mode 100644
index 0000000..653ab55
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_01.aql
@@ -0,0 +1,37 @@
+/*
+ * Description    : Joins two datasets on the intersection of their point attributes.
+ *                  The dataset 'MyData1' has an enforced open RTree index, and we expect the
+ *                  join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type MyRecord as closed {
+  id: int32,
+  point: point,
+  kwds: string,
+  line1: line,
+  line2: line,
+  poly1: polygon,
+  poly2: polygon,
+  rec: rectangle
+}
+
+create type MyRecordNested as closed {
+  nested: MyRecord
+}
+
+create dataset MyData1(MyRecordNested) primary key nested.id;
+create dataset MyData2(MyRecord) primary key id;
+
+create index rtree_index on MyData1(nested.point) type rtree;
+
+write output to nc1:"rttest/index-join_rtree-spatial-intersect-point.adm";
+
+for $a in dataset('MyData1')
+for $b in dataset('MyData2')
+where spatial-intersect($a.nested.point, $b.point)
+return {"a": $a, "b": $b}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_02.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_02.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_02.aql
new file mode 100644
index 0000000..c5de5bb
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_02.aql
@@ -0,0 +1,37 @@
+/*
+ * Description    : Joins two datasets on the intersection of their point attributes.
+ *                  The dataset 'MyData2' has an enforced open RTree index, and we expect the
+ *                  join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type MyRecord as closed {
+  id: int32,
+  point: point,
+  kwds: string,
+  line1: line,
+  line2: line,
+  poly1: polygon,
+  poly2: polygon,
+  rec: rectangle
+}
+
+create type MyRecordNested as closed {
+  nested: MyRecord
+}
+
+create dataset MyData1(MyRecordNested) primary key nested.id;
+create dataset MyData2(MyRecord) primary key id;
+
+create index rtree_index on MyData2(point) type rtree;
+
+write output to nc1:"rttest/rtree-index-join_spatial-intersect-point_02.adm";
+
+for $a in dataset('MyData1')
+for $b in dataset('MyData2')
+where spatial-intersect($a.nested.point, $b.point)
+return {"a": $a, "b": $b}

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/04b2b77a/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_03.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_03.aql b/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_03.aql
new file mode 100644
index 0000000..8ba72d7
--- /dev/null
+++ b/asterix-app/src/test/resources/optimizerts/queries/nested-index/rtree-index-join/spatial-intersect-point_03.aql
@@ -0,0 +1,35 @@
+/*
+ * Description    : Self-joins a dataset on the intersection of its point attribute.
+ *                  The dataset has an enforced open RTree index, and we expect the
+ *                  join to be transformed into an indexed nested-loop join.
+ * Success        : Yes
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type MyRecord as closed {
+  id: int32,
+  point: point,
+  kwds: string,
+  line1: line,
+  line2: line,
+  poly1: polygon,
+  poly2: polygon,
+  rec: rectangle
+}
+
+create type MyRecordNested as closed {
+  nested: MyRecord
+}
+create dataset MyData(MyRecordNested) primary key nested.id;
+
+create index rtree_index on MyData(nested.point) type rtree;
+
+write output to nc1:"rttest/rtree-index-join_spatial-intersect-point_03.adm";
+
+for $a in dataset('MyData')
+for $b in dataset('MyData')
+where spatial-intersect($a.nested.point, $b.nested.point)
+return {"a": $a, "b": $b}


Mime
View raw message