From commits-return-5344-archive-asf-public=cust-asf.ponee.io@asterixdb.apache.org Fri Apr 13 22:46:51 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 9ACEE18077F for ; Fri, 13 Apr 2018 22:46:49 +0200 (CEST) Received: (qmail 15951 invoked by uid 500); 13 Apr 2018 20:46:48 -0000 Mailing-List: contact commits-help@asterixdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@asterixdb.apache.org Delivered-To: mailing list commits@asterixdb.apache.org Received: (qmail 15886 invoked by uid 99); 13 Apr 2018 20:46:48 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 13 Apr 2018 20:46:48 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 8FE8AF3329; Fri, 13 Apr 2018 20:46:47 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: xikui@apache.org To: commits@asterixdb.apache.org Date: Fri, 13 Apr 2018 20:46:52 -0000 Message-Id: In-Reply-To: <11fdf90c8fe14a1d9afd9b5687e7c395@git.apache.org> References: <11fdf90c8fe14a1d9afd9b5687e7c395@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [06/13] asterixdb git commit: [ASTERIXDB-2366][TEST] Optimizer tests cleanup for SQL++ http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql deleted file mode 100644 index 8a6d743..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.aql +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard function of their interest sets. - * Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: {{string}}, - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create dataset Customers2(CustomerType) primary key cid; - -create index interests_index on Customers2(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_ulist-jaccard_02.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers2') -where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.sqlpp new file mode 100644 index 0000000..287ef06 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_02.sqlpp @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, Customer and Customer2, based on the similarity-jaccard function of their interest sets. + * Customers2 has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : {{string}}, + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create dataset Customers2(CustomerType) primary key cid; + +create index interests_index on Customers2 (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_ulist-jaccard_02.adm"; +select element {'arec':a,'brec':b} +from Customers as a, + Customers2 as b +where (( /*+ indexnl */ test.`similarity-jaccard`(a.interests,b.interests) >= 0.700000f) and (a.cid < b.cid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql deleted file mode 100644 index ef5269a..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.aql +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest sets. - * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: {{string}}, - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create index interests_index on Customers(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_ulist-jaccard_03.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers') -where /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) >= 0.7f and $a.cid < $b.cid -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.sqlpp new file mode 100644 index 0000000..bc7b1cd --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_03.sqlpp @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest sets. + * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : {{string}}, + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create index interests_index on Customers (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_ulist-jaccard_03.adm"; +select element {'arec':a,'brec':b} +from Customers as a, + Customers as b +where (( /*+ indexnl */ test.`similarity-jaccard`(a.interests,b.interests) >= 0.700000f) and (a.cid < b.cid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_04.aql deleted file mode 100644 index 72a8dca..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_04.aql +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest sets. - * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type AddressType as closed { - number: int32, - street: string, - city: string -} - -create type CustomerType as closed { - cid: int32, - name: string, - age: int32?, - address: AddressType?, - interests: {{string}}, - children: [ { name: string, age: int32? } ] -} - -create dataset Customers(CustomerType) primary key cid; - -create index interests_index on Customers(interests) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_ulist-jaccard_04.adm"; - -for $a in dataset('Customers') -for $b in dataset('Customers') -let $jacc := /*+ indexnl */ similarity-jaccard($a.interests, $b.interests) -where $jacc >= 0.7f and $a.cid < $b.cid -return {"arec": $a, "brec": $b, "jacc": $jacc } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_04.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_04.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_04.sqlpp new file mode 100644 index 0000000..77c26cf --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/ulist-jaccard_04.sqlpp @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, Customers, based on the similarity-jaccard function of its interest sets. + * Customers has a keyword index on interests, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.AddressType as + closed { + number : integer, + street : string, + city : string +}; + +create type test.CustomerType as + closed { + cid : integer, + name : string, + age : integer?, + address : AddressType?, + interests : {{string}}, + children : [{ + name : string, + age : integer? + } +] +}; + +create dataset Customers(CustomerType) primary key cid; + +create index interests_index on Customers (interests) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_ulist-jaccard_04.adm"; +select element {'arec':a,'brec':b,'jacc':jacc} +from Customers as a, + Customers as b +with jacc as /*+ indexnl */ test.`similarity-jaccard`(a.interests,b.interests) +where ((jacc >= 0.700000f) and (a.cid < b.cid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.aql deleted file mode 100644 index 6b1feb6..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.aql +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' word tokens. - * CSX has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index keyword_index on CSX(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_02.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where word-tokens($a.title) ~= word-tokens($b.title) and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.sqlpp new file mode 100644 index 0000000..e2158e1 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_02.sqlpp @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on ~= using Jaccard of their titles' word tokens. + * CSX has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create type test.CSXType as + closed { + id : integer, + csxid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index keyword_index on CSX (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_02.adm"; +set `simfunction` `jaccard`; + +set `simthreshold` `0.5f`; + +select element {'arec':a,'brec':b} +from DBLP as a, + CSX as b +where ((test.`word-tokens`(a.title) ~= test.`word-tokens`(b.title)) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.aql deleted file mode 100644 index d305ba0..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.aql +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on ~= using Jaccard of its titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_03.adm"; - -set simfunction 'jaccard'; -set simthreshold '0.5f'; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where word-tokens($a.title) ~= word-tokens($b.title) and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.sqlpp new file mode 100644 index 0000000..c6c38b7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-fuzzyeq-jaccard_03.sqlpp @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on ~= using Jaccard of its titles' word tokens. + * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_word-fuzzyeq-jaccard_03.adm"; +set `simfunction` `jaccard`; + +set `simthreshold` `0.5f`; + +select element {'arec':a,'brec':b} +from DBLP as a, + DBLP as b +where ((test.`word-tokens`(a.title) ~= test.`word-tokens`(b.title)) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.aql deleted file mode 100644 index 5586919..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.aql +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, TweetMessages, based on the similarity-jaccard-check function of its text-messages' word tokens. - * TweetMessages has a keyword index on text-message and btree index on the primary key tweetid, and we expect the join to be - * transformed into btree and inverted indexed nested-loop joins. We test whether the join condition can be transformed into - * multiple indexed nested loop joins of various type of indexes. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type TwitterUserType as closed { - screen-name: string, - lang: string, - friends-count: int32, - statuses-count: int32, - name: string, - followers-count: int32 -} - -create type TweetMessageType as closed { - tweetid: int64, - user: TwitterUserType, - sender-location: point, - send-time: datetime, - referred-topics: {{ string }}, - message-text: string, - countA: int32, - countB: int32 -} - -create dataset TweetMessages(TweetMessageType) -primary key tweetid; - -create index twmSndLocIx on TweetMessages(sender-location) type rtree; -create index msgCountAIx on TweetMessages(countA) type btree; -create index msgCountBIx on TweetMessages(countB) type btree; -create index msgTextIx on TweetMessages(message-text) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check-after-btree-access.adm"; - -for $t1 in dataset('TweetMessages') -for $t2 in dataset('TweetMessages') -let $sim := similarity-jaccard-check(word-tokens($t1.message-text), word-tokens($t2.message-text), 0.6f) -where $sim[0] and $t1.tweetid < int64("20") and $t2.tweetid != $t1.tweetid -return { - "t1": $t1.tweetid, - "t2": $t2.tweetid, - "sim": $sim[1] -} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.sqlpp new file mode 100644 index 0000000..50d2442 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check-after-btree-access.sqlpp @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, TweetMessages, based on the similarity-jaccard-check function of its text-messages' word tokens. + * TweetMessages has a keyword index on text-message and btree index on the primary key tweetid, and we expect the join to be + * transformed into btree and inverted indexed nested-loop joins. We test whether the join condition can be transformed into + * multiple indexed nested loop joins of various type of indexes. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.TwitterUserType as + closed { + `screen-name` : string, + lang : string, + `friends-count` : integer, + `statuses-count` : integer, + name : string, + `followers-count` : integer +}; + +create type test.TweetMessageType as + closed { + tweetid : bigint, + user : TwitterUserType, + `sender-location` : point, + `send-time` : datetime, + `referred-topics` : {{string}}, + `message-text` : string, + countA : integer, + countB : integer +}; + +create dataset TweetMessages(TweetMessageType) primary key tweetid; + +create index twmSndLocIx on TweetMessages (`sender-location`) type rtree; + +create index msgCountAIx on TweetMessages (countA) type btree; + +create index msgCountBIx on TweetMessages (countB) type btree; + +create index msgTextIx on TweetMessages (`message-text`) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check-after-btree-access.adm"; +select element {'t1':t1.tweetid,'t2':t2.tweetid,'sim':sim[1]} +from TweetMessages as t1, + TweetMessages as t2 +with sim as test.`similarity-jaccard-check`(test.`word-tokens`(t1.`message-text`),test.`word-tokens`(t2.`message-text`),0.600000f) +where (sim[0] and (t1.tweetid < test.bigint('20')) and (t2.tweetid != t1.tweetid)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.aql deleted file mode 100644 index 0a63d12..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.aql +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' word tokens. - * CSX has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index keyword_index on CSX(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard-check(word-tokens($a.title), word-tokens($b.title), 0.5f)[0] - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.sqlpp new file mode 100644 index 0000000..0c2890a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_02.sqlpp @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard-check function of their titles' word tokens. + * CSX has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create type test.CSXType as + closed { + id : integer, + csxid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index keyword_index on CSX (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check_02.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + CSX as b +where (test.`similarity-jaccard-check`(test.`word-tokens`(a.title),test.`word-tokens`(b.title),0.500000f)[0] and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.aql deleted file mode 100644 index 5c97de1..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.aql +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard-check function of its titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where similarity-jaccard-check(word-tokens($a.title), word-tokens($b.title), 0.5f)[0] - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.sqlpp new file mode 100644 index 0000000..fdf6235 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_03.sqlpp @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard-check function of its titles' word tokens. + * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check_03.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + DBLP as b +where (test.`similarity-jaccard-check`(test.`word-tokens`(a.title),test.`word-tokens`(b.title),0.500000f)[0] and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_04.aql deleted file mode 100644 index 7481ed3..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_04.aql +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard-check function of its titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check_04.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $jacc := similarity-jaccard-check(word-tokens($a.title), word-tokens($b.title), 0.5f) -where $jacc[0] and $a.id < $b.id -return {"arec": $a, "brec": $b, "jacc": $jacc[1] } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_04.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_04.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_04.sqlpp new file mode 100644 index 0000000..8eb16d7 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard-check_04.sqlpp @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard-check function of its titles' word tokens. + * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard-check_04.adm"; +select element {'arec':a,'brec':b,'jacc':jacc[1]} +from DBLP as a, + DBLP as b +with jacc as test.`similarity-jaccard-check`(test.`word-tokens`(a.title),test.`word-tokens`(b.title),0.500000f) +where (jacc[0] and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.aql deleted file mode 100644 index 1b123a6..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.aql +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' word tokens. - * CSX has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create type CSXType as closed { - id: int32, - csxid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create dataset CSX(CSXType) primary key id; - -create index keyword_index on CSX(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard_02.adm"; - -for $a in dataset('DBLP') -for $b in dataset('CSX') -where similarity-jaccard(word-tokens($a.title), word-tokens($b.title)) >= 0.5f - and $a.id < $b.id -return {"arec": $a, "brec": $b } - http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.sqlpp new file mode 100644 index 0000000..b943eec --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_02.sqlpp @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy joins two datasets, DBLP and CSX, based on the similarity-jaccard function of their titles' word tokens. + * CSX has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create type test.CSXType as + closed { + id : integer, + csxid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create dataset CSX(CSXType) primary key id; + +create index keyword_index on CSX (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard_02.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + CSX as b +where ((test.`similarity-jaccard`(test.`word-tokens`(a.title),test.`word-tokens`(b.title)) >= 0.500000f) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.aql deleted file mode 100644 index e0a2568..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.aql +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard_03.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -where similarity-jaccard(word-tokens($a.title), word-tokens($b.title)) >= 0.5f - and $a.id < $b.id -return {"arec": $a, "brec": $b } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.sqlpp new file mode 100644 index 0000000..37a119b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_03.sqlpp @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' word tokens. + * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard_03.adm"; +select element {'arec':a,'brec':b} +from DBLP as a, + DBLP as b +where ((test.`similarity-jaccard`(test.`word-tokens`(a.title),test.`word-tokens`(b.title)) >= 0.500000f) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_04.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_04.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_04.aql deleted file mode 100644 index f3ca957..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_04.aql +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' word tokens. - * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. - * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. - * Success : Yes - */ - -drop dataverse test if exists; -create dataverse test; -use dataverse test; - -create type DBLPType as closed { - id: int32, - dblpid: string, - title: string, - authors: string, - misc: string -} - -create dataset DBLP(DBLPType) primary key id; - -create index keyword_index on DBLP(title) type keyword; - -write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard_04.adm"; - -for $a in dataset('DBLP') -for $b in dataset('DBLP') -let $jacc := similarity-jaccard(word-tokens($a.title), word-tokens($b.title)) -where $jacc >= 0.5f and $a.id < $b.id -return {"arec": $a, "brec": $b, "jacc": $jacc } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_04.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_04.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_04.sqlpp new file mode 100644 index 0000000..584522a --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/inverted-index-join/word-jaccard_04.sqlpp @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Description : Fuzzy self joins a dataset, DBLP, based on the similarity-jaccard function of its titles' word tokens. + * DBLP has a keyword index on title, and we expect the join to be transformed into an indexed nested-loop join. + * We test the inlining of variables that enable the select to be pushed into the join for subsequent optimization with an index. + * Success : Yes + */ + +drop dataverse test if exists; +create dataverse test; + +use test; + + +create type test.DBLPType as + closed { + id : integer, + dblpid : string, + title : string, + authors : string, + misc : string +}; + +create dataset DBLP(DBLPType) primary key id; + +create index keyword_index on DBLP (title) type keyword; + +write output to asterix_nc1:"rttest/inverted-index-join_word-jaccard_04.adm"; +select element {'arec':a,'brec':b,'jacc':jacc} +from DBLP as a, + DBLP as b +with jacc as test.`similarity-jaccard`(test.`word-tokens`(a.title),test.`word-tokens`(b.title)) +where ((jacc >= 0.500000f) and (a.id < b.id)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_01.aql deleted file mode 100644 index 65029f3..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_01.aql +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -drop dataverse join-super-key_1 if exists; - -create dataverse join-super-key_1; - -use dataverse join-super-key_1; - -create type SupplierType as closed { - s_suppkey: int32, - s_name: string, - s_address: string, - s_nationkey: int32, - s_phone: string, - s_acctbal: double, - s_comment: string -} - -create type NationType as closed { - n_nationkey: int32, - n_name: string, - n_regionkey: int32, - n_comment: string -} - -create type LineItemType as closed { - l_orderkey: int32, - l_partkey: int32, - l_suppkey: int32, - l_linenumber: int32, - l_quantity: double, - l_extendedprice: double, - l_discount: double, - l_tax: double, - l_returnflag: string, - l_linestatus: string, - l_shipdate: string, - l_commitdate: string, - l_receiptdate: string, - l_shipinstruct: string, - l_shipmode: string, - l_comment: string -} - -create type PartType as closed { - p_partkey: int32, - p_name: string, - p_mfgr: string, - p_brand: string, - p_type: string, - p_size: int32, - p_container: string, - p_retailprice: double, - p_comment: string -} - -create type PartSuppType as closed { - ps_partkey: int32, - ps_suppkey: int32, - ps_availqty: int32, - ps_supplycost: double, - ps_comment: string -} - - -create nodegroup group1 if not exists on asterix_nc1, asterix_nc2; - -write output to asterix_nc1:"/tmp/join-super-key_01.adm"; - -create dataset LineItems(LineItemType) - primary key l_partkey, l_linenumber on group1; -create dataset PartSupp(PartSuppType) - primary key ps_partkey, ps_suppkey on group1; - - -for $li in dataset('LineItems') -for $ps in dataset('PartSupp') -where $li.l_partkey = $ps.ps_partkey and $li.l_suppkey = $ps.ps_suppkey and - $li.l_extendedprice = $ps.ps_supplycost -return { - "l_partkey": $li.l_partkey -} - - http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_01.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_01.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_01.sqlpp new file mode 100644 index 0000000..0e7fc0b --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_01.sqlpp @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse `join-super-key_1` if exists; +create dataverse `join-super-key_1`; + +use `join-super-key_1`; + + +create type `join-super-key_1`.SupplierType as + closed { + s_suppkey : integer, + s_name : string, + s_address : string, + s_nationkey : integer, + s_phone : string, + s_acctbal : double, + s_comment : string +}; + +create type `join-super-key_1`.NationType as + closed { + n_nationkey : integer, + n_name : string, + n_regionkey : integer, + n_comment : string +}; + +create type `join-super-key_1`.LineItemType as + closed { + l_orderkey : integer, + l_partkey : integer, + l_suppkey : integer, + l_linenumber : integer, + l_quantity : double, + l_extendedprice : double, + l_discount : double, + l_tax : double, + l_returnflag : string, + l_linestatus : string, + l_shipdate : string, + l_commitdate : string, + l_receiptdate : string, + l_shipinstruct : string, + l_shipmode : string, + l_comment : string +}; + +create type `join-super-key_1`.PartType as + closed { + p_partkey : integer, + p_name : string, + p_mfgr : string, + p_brand : string, + p_type : string, + p_size : integer, + p_container : string, + p_retailprice : double, + p_comment : string +}; + +create type `join-super-key_1`.PartSuppType as + closed { + ps_partkey : integer, + ps_suppkey : integer, + ps_availqty : integer, + ps_supplycost : double, + ps_comment : string +}; + +create nodegroup group1 if not exists on + asterix_nc1, + asterix_nc2 +; +write output to asterix_nc1:"/tmp/join-super-key_01.adm"; +create dataset LineItems(LineItemType) primary key l_partkey,l_linenumber on group1; + +create dataset PartSupp(PartSuppType) primary key ps_partkey,ps_suppkey on group1; + +select element {'l_partkey':li.l_partkey} +from LineItems as li, + PartSupp as ps +where ((li.l_partkey = ps.ps_partkey) and (li.l_suppkey = ps.ps_suppkey) and (li.l_extendedprice = ps.ps_supplycost)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_02.aql deleted file mode 100644 index 831976f..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_02.aql +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -drop dataverse join-super-key_01 if exists; - -create dataverse join-super-key_01; - -use dataverse join-super-key_01; - -create type SupplierType as closed { - s_suppkey: int32, - s_name: string, - s_address: string, - s_nationkey: int32, - s_phone: string, - s_acctbal: double, - s_comment: string -} - -create type NationType as closed { - n_nationkey: int32, - n_name: string, - n_regionkey: int32, - n_comment: string -} - -create type LineItemType as closed { - l_orderkey: int32, - l_partkey: int32, - l_suppkey: int32, - l_linenumber: int32, - l_quantity: double, - l_extendedprice: double, - l_discount: double, - l_tax: double, - l_returnflag: string, - l_linestatus: string, - l_shipdate: string, - l_commitdate: string, - l_receiptdate: string, - l_shipinstruct: string, - l_shipmode: string, - l_comment: string -} - -create type PartType as closed { - p_partkey: int32, - p_name: string, - p_mfgr: string, - p_brand: string, - p_type: string, - p_size: int32, - p_container: string, - p_retailprice: double, - p_comment: string -} - -create type PartSuppType as closed { - ps_partkey: int32, - ps_suppkey: int32, - ps_availqty: int32, - ps_supplycost: double, - ps_comment: string -} - - -create nodegroup group1 if not exists on asterix_nc1, asterix_nc2; - -write output to asterix_nc1:"/tmp/join-super-key_01.adm"; - -create dataset LineItems(LineItemType) - primary key l_partkey, l_linenumber on group1; -create dataset PartSupp(PartSuppType) - primary key ps_partkey, ps_suppkey on group1; - - -for $ps in dataset('PartSupp') -for $li in dataset('LineItems') -where $li.l_partkey = $ps.ps_partkey and $li.l_suppkey = $ps.ps_suppkey and - $li.l_extendedprice = $ps.ps_supplycost -return { - "l_partkey": $li.l_partkey -} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_02.sqlpp new file mode 100644 index 0000000..7247cdc --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/join-super-key_02.sqlpp @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse `join-super-key_01` if exists; +create dataverse `join-super-key_01`; + +use `join-super-key_01`; + + +create type `join-super-key_01`.SupplierType as + closed { + s_suppkey : integer, + s_name : string, + s_address : string, + s_nationkey : integer, + s_phone : string, + s_acctbal : double, + s_comment : string +}; + +create type `join-super-key_01`.NationType as + closed { + n_nationkey : integer, + n_name : string, + n_regionkey : integer, + n_comment : string +}; + +create type `join-super-key_01`.LineItemType as + closed { + l_orderkey : integer, + l_partkey : integer, + l_suppkey : integer, + l_linenumber : integer, + l_quantity : double, + l_extendedprice : double, + l_discount : double, + l_tax : double, + l_returnflag : string, + l_linestatus : string, + l_shipdate : string, + l_commitdate : string, + l_receiptdate : string, + l_shipinstruct : string, + l_shipmode : string, + l_comment : string +}; + +create type `join-super-key_01`.PartType as + closed { + p_partkey : integer, + p_name : string, + p_mfgr : string, + p_brand : string, + p_type : string, + p_size : integer, + p_container : string, + p_retailprice : double, + p_comment : string +}; + +create type `join-super-key_01`.PartSuppType as + closed { + ps_partkey : integer, + ps_suppkey : integer, + ps_availqty : integer, + ps_supplycost : double, + ps_comment : string +}; + +create nodegroup group1 if not exists on + asterix_nc1, + asterix_nc2 +; +write output to asterix_nc1:"/tmp/join-super-key_01.adm"; +create dataset LineItems(LineItemType) primary key l_partkey,l_linenumber on group1; + +create dataset PartSupp(PartSuppType) primary key ps_partkey,ps_suppkey on group1; + +select element {'l_partkey':li.l_partkey} +from PartSupp as ps, + LineItems as li +where ((li.l_partkey = ps.ps_partkey) and (li.l_suppkey = ps.ps_suppkey) and (li.l_extendedprice = ps.ps_supplycost)) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_01.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_01.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_01.aql deleted file mode 100644 index ad6833d..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_01.aql +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -drop dataverse loj-super-key_01 if exists; - -create dataverse loj-super-key_01; - -use dataverse loj-super-key_01; - -create type SupplierType as closed { - s_suppkey: int32, - s_name: string, - s_address: string, - s_nationkey: int32, - s_phone: string, - s_acctbal: double, - s_comment: string -} - -create type NationType as closed { - n_nationkey: int32, - n_name: string, - n_regionkey: int32, - n_comment: string -} - -create type LineItemType as closed { - l_orderkey: int32, - l_partkey: int32, - l_suppkey: int32, - l_linenumber: int32, - l_quantity: double, - l_extendedprice: double, - l_discount: double, - l_tax: double, - l_returnflag: string, - l_linestatus: string, - l_shipdate: string, - l_commitdate: string, - l_receiptdate: string, - l_shipinstruct: string, - l_shipmode: string, - l_comment: string -} - -create type PartType as closed { - p_partkey: int32, - p_name: string, - p_mfgr: string, - p_brand: string, - p_type: string, - p_size: int32, - p_container: string, - p_retailprice: double, - p_comment: string -} - -create type PartSuppType as closed { - ps_partkey: int32, - ps_suppkey: int32, - ps_availqty: int32, - ps_supplycost: double, - ps_comment: string -} - - -create nodegroup group1 if not exists on asterix_nc1, asterix_nc2; - -write output to asterix_nc1:"/tmp/loj-super-key_01.adm"; - -create dataset LineItems(LineItemType) - primary key l_partkey, l_linenumber on group1; -create dataset PartSupp(PartSuppType) - primary key ps_partkey, ps_suppkey on group1; - - -for $li in dataset('LineItems') -let $partsupp := - for $ps in dataset('PartSupp') - where $li.l_partkey = $ps.ps_partkey - and $li.l_suppkey = $ps.ps_suppkey - and $li.l_extendedprice = $ps.ps_supplycost - return $ps -return { - "li": $li, - "partsupp": $partsupp -} - - http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_01.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_01.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_01.sqlpp new file mode 100644 index 0000000..38bd3e3 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_01.sqlpp @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse `loj-super-key_01` if exists; +create dataverse `loj-super-key_01`; + +use `loj-super-key_01`; + + +create type `loj-super-key_01`.SupplierType as + closed { + s_suppkey : integer, + s_name : string, + s_address : string, + s_nationkey : integer, + s_phone : string, + s_acctbal : double, + s_comment : string +}; + +create type `loj-super-key_01`.NationType as + closed { + n_nationkey : integer, + n_name : string, + n_regionkey : integer, + n_comment : string +}; + +create type `loj-super-key_01`.LineItemType as + closed { + l_orderkey : integer, + l_partkey : integer, + l_suppkey : integer, + l_linenumber : integer, + l_quantity : double, + l_extendedprice : double, + l_discount : double, + l_tax : double, + l_returnflag : string, + l_linestatus : string, + l_shipdate : string, + l_commitdate : string, + l_receiptdate : string, + l_shipinstruct : string, + l_shipmode : string, + l_comment : string +}; + +create type `loj-super-key_01`.PartType as + closed { + p_partkey : integer, + p_name : string, + p_mfgr : string, + p_brand : string, + p_type : string, + p_size : integer, + p_container : string, + p_retailprice : double, + p_comment : string +}; + +create type `loj-super-key_01`.PartSuppType as + closed { + ps_partkey : integer, + ps_suppkey : integer, + ps_availqty : integer, + ps_supplycost : double, + ps_comment : string +}; + +create nodegroup group1 if not exists on + asterix_nc1, + asterix_nc2 +; +write output to asterix_nc1:"/tmp/loj-super-key_01.adm"; +create dataset LineItems(LineItemType) primary key l_partkey,l_linenumber on group1; + +create dataset PartSupp(PartSuppType) primary key ps_partkey,ps_suppkey on group1; + +select element {'li':li,'partsupp':partsupp} +from LineItems as li +with partsupp as ( + select element ps + from PartSupp as ps + where ((li.l_partkey = ps.ps_partkey) and (li.l_suppkey = ps.ps_suppkey) and (li.l_extendedprice = ps.ps_supplycost)) + ) +; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_02.aql ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_02.aql b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_02.aql deleted file mode 100644 index e0678e9..0000000 --- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_02.aql +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -drop dataverse loj-super-key_02 if exists; - -create dataverse loj-super-key_02; - -use dataverse loj-super-key_02; - -create type SupplierType as closed { - s_suppkey: int32, - s_name: string, - s_address: string, - s_nationkey: int32, - s_phone: string, - s_acctbal: double, - s_comment: string -} - -create type NationType as closed { - n_nationkey: int32, - n_name: string, - n_regionkey: int32, - n_comment: string -} - -create type LineItemType as closed { - l_orderkey: int32, - l_partkey: int32, - l_suppkey: int32, - l_linenumber: int32, - l_quantity: double, - l_extendedprice: double, - l_discount: double, - l_tax: double, - l_returnflag: string, - l_linestatus: string, - l_shipdate: string, - l_commitdate: string, - l_receiptdate: string, - l_shipinstruct: string, - l_shipmode: string, - l_comment: string -} - -create type PartType as closed { - p_partkey: int32, - p_name: string, - p_mfgr: string, - p_brand: string, - p_type: string, - p_size: int32, - p_container: string, - p_retailprice: double, - p_comment: string -} - -create type PartSuppType as closed { - ps_partkey: int32, - ps_suppkey: int32, - ps_availqty: int32, - ps_supplycost: double, - ps_comment: string -} - - -create nodegroup group1 if not exists on asterix_nc1, asterix_nc2; - -write output to asterix_nc1:"/tmp/loj-super-key_01.adm"; - -create dataset LineItems(LineItemType) - primary key l_partkey, l_linenumber on group1; -create dataset PartSupp(PartSuppType) - primary key ps_partkey, ps_suppkey on group1; - - -for $ps in dataset('PartSupp') -let $items := - for $li in dataset('LineItems') - where $li.l_partkey = $ps.ps_partkey - and $li.l_suppkey = $ps.ps_suppkey - and $li.l_extendedprice = $ps.ps_supplycost - return $li -return { - "partsupp": $ps, - "li": $items -} - - http://git-wip-us.apache.org/repos/asf/asterixdb/blob/33a656d1/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_02.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_02.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_02.sqlpp new file mode 100644 index 0000000..9705785 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/loj-super-key_02.sqlpp @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +drop dataverse `loj-super-key_02` if exists; +create dataverse `loj-super-key_02`; + +use `loj-super-key_02`; + + +create type `loj-super-key_02`.SupplierType as + closed { + s_suppkey : integer, + s_name : string, + s_address : string, + s_nationkey : integer, + s_phone : string, + s_acctbal : double, + s_comment : string +}; + +create type `loj-super-key_02`.NationType as + closed { + n_nationkey : integer, + n_name : string, + n_regionkey : integer, + n_comment : string +}; + +create type `loj-super-key_02`.LineItemType as + closed { + l_orderkey : integer, + l_partkey : integer, + l_suppkey : integer, + l_linenumber : integer, + l_quantity : double, + l_extendedprice : double, + l_discount : double, + l_tax : double, + l_returnflag : string, + l_linestatus : string, + l_shipdate : string, + l_commitdate : string, + l_receiptdate : string, + l_shipinstruct : string, + l_shipmode : string, + l_comment : string +}; + +create type `loj-super-key_02`.PartType as + closed { + p_partkey : integer, + p_name : string, + p_mfgr : string, + p_brand : string, + p_type : string, + p_size : integer, + p_container : string, + p_retailprice : double, + p_comment : string +}; + +create type `loj-super-key_02`.PartSuppType as + closed { + ps_partkey : integer, + ps_suppkey : integer, + ps_availqty : integer, + ps_supplycost : double, + ps_comment : string +}; + +create nodegroup group1 if not exists on + asterix_nc1, + asterix_nc2 +; +write output to asterix_nc1:"/tmp/loj-super-key_01.adm"; +create dataset LineItems(LineItemType) primary key l_partkey,l_linenumber on group1; + +create dataset PartSupp(PartSuppType) primary key ps_partkey,ps_suppkey on group1; + +select element {'partsupp':ps,'li':items} +from PartSupp as ps +with items as ( + select element li + from LineItems as li + where ((li.l_partkey = ps.ps_partkey) and (li.l_suppkey = ps.ps_suppkey) and (li.l_extendedprice = ps.ps_supplycost)) + ) +;