asterixdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wangs...@apache.org
Subject incubator-asterixdb git commit: ASTERIXDB-1233: Fixed the bulk-loading with an inverted index on an open-type field
Date Tue, 22 Mar 2016 04:19:13 GMT
Repository: incubator-asterixdb
Updated Branches:
  refs/heads/master 79821dfe6 -> 36fbc4160


ASTERIXDB-1233: Fixed the bulk-loading with an inverted index on an open-type field

Change-Id: If58f594c0a7b6f4bca45b13ceaef07b605d2fe22
Reviewed-on: https://asterix-gerrit.ics.uci.edu/740
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ildar Absalyamov <ildar.absalyamov@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/commit/36fbc416
Tree: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/tree/36fbc416
Diff: http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/diff/36fbc416

Branch: refs/heads/master
Commit: 36fbc41601f35061186e897f035a8e8dbf0ab238
Parents: 79821df
Author: Taewoo Kim <wangsaeu@yahoo.com>
Authored: Mon Mar 21 20:14:45 2016 -0700
Committer: Taewoo Kim <wangsaeu@yahoo.com>
Committed: Mon Mar 21 21:14:04 2016 -0700

----------------------------------------------------------------------
 .../load-with-index-open_02.1.ddl.aql           | 30 +++++++++++
 .../load-with-index-open_02.2.update.aql        | 23 +++++++++
 .../load-with-index-open_02.3.query.aql         | 24 +++++++++
 .../load-with-index-open_02.1.adm               |  1 +
 .../src/test/resources/runtimets/testsuite.xml  |  5 ++
 .../metadata/declared/AqlMetadataProvider.java  | 53 +++++++++++---------
 6 files changed, 111 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/36fbc416/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql
b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql
new file mode 100644
index 0000000..f736ebc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse OpenTinySocial if exists;
+create dataverse OpenTinySocial;
+use dataverse OpenTinySocial;
+create type FacebookMessageType as
+{ message-id: int64 }
+
+create dataset FacebookMessages(FacebookMessageType)
+primary key message-id;
+create index fbAuthorIdx on FacebookMessages(author-id: int64) type btree enforced;
+create index fbSenderLocIndex on FacebookMessages(sender-location: point) type rtree enforced;
+create index fbMessageIdx on FacebookMessages(message: string) type keyword enforced;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/36fbc416/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql
b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql
new file mode 100644
index 0000000..f7740dc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse OpenTinySocial;
+
+load dataset FacebookMessages
+using localfs
+(("path"="asterix_nc1://data/tinysocial/fbm.adm"),("format"="adm")) pre-sorted;

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/36fbc416/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql
b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql
new file mode 100644
index 0000000..d97df77
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse OpenTinySocial;
+
+count(
+for $c in dataset('FacebookMessages')
+return $c
+);

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/36fbc416/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm
b/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm
new file mode 100644
index 0000000..60d3b2f
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm
@@ -0,0 +1 @@
+15

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/36fbc416/asterix-app/src/test/resources/runtimets/testsuite.xml
----------------------------------------------------------------------
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 81480fd..115cf92 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -1764,6 +1764,11 @@
             </compilation-unit>
         </test-case>
         <test-case FilePath="dml">
+            <compilation-unit name="load-with-index-open_02">
+                <output-dir compare="Text">load-with-index-open_02</output-dir>
+            </compilation-unit>
+        </test-case>
+        <test-case FilePath="dml">
             <compilation-unit name="load-with-ngram-index-open">
                 <output-dir compare="Text">load-with-ngram-index-open</output-dir>
             </compilation-unit>

http://git-wip-us.apache.org/repos/asf/incubator-asterixdb/blob/36fbc416/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
----------------------------------------------------------------------
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
index 90bec64..5eba66c 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
@@ -589,7 +589,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
 
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildExternalDatasetDataScannerRuntime(
             JobSpecification jobSpec, IAType itemType, IAdapterFactory adapterFactory, IDataFormat
format)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         if (itemType.getTypeTag() != ATypeTag.RECORD) {
             throw new AlgebricksException("Can only scan datasets of records.");
         }
@@ -676,7 +676,8 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
                 }
                 Pair<IBinaryComparatorFactory[], ITypeTraits[]> comparatorFactoriesAndTypeTraits
= getComparatorFactoriesAndTypeTraitsOfSecondaryBTreeIndex(
                         secondaryIndex.getIndexType(), secondaryIndex.getKeyFieldNames(),
-                        secondaryIndex.getKeyFieldTypes(), DatasetUtils.getPartitioningKeys(dataset),
itemType, dataset.getDatasetType());
+                        secondaryIndex.getKeyFieldTypes(), DatasetUtils.getPartitioningKeys(dataset),
itemType,
+                        dataset.getDatasetType());
                 comparatorFactories = comparatorFactoriesAndTypeTraits.first;
                 typeTraits = comparatorFactoriesAndTypeTraits.second;
                 if (filterTypeTraits != null) {
@@ -799,21 +800,21 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
         }
 
         for (int j = 0; j < pidxKeyFieldCount; ++j, ++i) {
-           IAType keyType = null;
-           try {
-               switch (dsType) {
-                   case INTERNAL:
-                   keyType = recType.getSubFieldType(pidxKeyFieldNames.get(j));
-                   break;
-               case EXTERNAL:
-                   keyType = IndexingConstants.getFieldType(j);
-                   break;
-               default:
-                   throw new AlgebricksException("Unknown Dataset Type");
-               }
-           } catch (AsterixException e) {
-               throw new AlgebricksException(e);
-           }
+            IAType keyType = null;
+            try {
+                switch (dsType) {
+                    case INTERNAL:
+                        keyType = recType.getSubFieldType(pidxKeyFieldNames.get(j));
+                        break;
+                    case EXTERNAL:
+                        keyType = IndexingConstants.getFieldType(j);
+                        break;
+                    default:
+                        throw new AlgebricksException("Unknown Dataset Type");
+                }
+            } catch (AsterixException e) {
+                throw new AlgebricksException(e);
+            }
             comparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(keyType,
                     true);
             typeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(keyType);
@@ -1363,7 +1364,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
             IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable>
secondaryKeys,
             AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext
context,
             JobSpecification spec, IndexOperation indexOp, IndexType indexType, boolean bulkload)
-                    throws AlgebricksException {
+            throws AlgebricksException {
 
         // Sanity checks.
         if (primaryKeys.size() > 1) {
@@ -1467,6 +1468,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
                     dataset.getDatasetName(), indexName);
 
             List<List<String>> secondaryKeyExprs = secondaryIndex.getKeyFieldNames();
+            List<IAType> secondaryKeyTypeEntries = secondaryIndex.getKeyFieldTypes();
 
             int numTokenFields = (!isPartitioned) ? secondaryKeys.size() : secondaryKeys.size()
+ 1;
             ITypeTraits[] tokenTypeTraits = new ITypeTraits[numTokenFields];
@@ -1476,7 +1478,8 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
             // return the derived type.
             // e.g. UNORDERED LIST -> return UNORDERED LIST type
             IAType secondaryKeyType = null;
-            Pair<IAType, Boolean> keyPairType = Index.getNonNullableKeyFieldType(secondaryKeyExprs.get(0),
recType);
+            Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(secondaryKeyTypeEntries.get(0),
+                    secondaryKeyExprs.get(0), recType);
             secondaryKeyType = keyPairType.first;
             List<List<String>> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset);
             i = 0;
@@ -1552,7 +1555,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
             IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable>
primaryKeys,
             List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields,
             ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context,
JobSpecification spec)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         return getIndexInsertOrDeleteRuntime(IndexOperation.DELETE, dataSourceIndex, propagatedSchema,
inputSchemas,
                 typeEnv, primaryKeys, secondaryKeys, additionalNonKeyFields, filterExpr,
recordDesc, context, spec,
                 false);
@@ -1560,7 +1563,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
 
     private AsterixTupleFilterFactory createTupleFilterFactory(IOperatorSchema[] inputSchemas,
             IVariableTypeEnvironment typeEnv, ILogicalExpression filterExpr, JobGenContext
context)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         // No filtering condition.
         if (filterExpr == null) {
             return null;
@@ -2233,7 +2236,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
             JobSpecification jobSpec, Dataset dataset, Index secondaryIndex, int[] ridIndexes,
boolean retainInput,
             IVariableTypeEnvironment typeEnv, List<LogicalVariable> outputVars, IOperatorSchema
opSchema,
             JobGenContext context, AqlMetadataProvider metadataProvider, boolean retainNull)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         try {
             // Get data type
             IAType itemType = null;
@@ -2485,7 +2488,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
             List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory
filterFactory,
             RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexType
indexType,
             List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         // Check the index is length-partitioned or not.
         boolean isPartitioned;
         if (indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX
@@ -2710,7 +2713,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
             List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory
filterFactory,
             RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec,
             List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         try {
             Dataset dataset = MetadataManager.INSTANCE.getDataset(mdTxnCtx, dataverseName,
datasetName);
 
@@ -2864,7 +2867,7 @@ public class AqlMetadataProvider implements IMetadataProvider<AqlSourceId,
Strin
             List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory
filterFactory,
             RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec,
             List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         // we start with the btree
         Dataset dataset = findDataset(dataverseName, datasetName);
         if (dataset == null) {


Mime
View raw message