Return-Path: X-Original-To: apmail-asterixdb-notifications-archive@minotaur.apache.org Delivered-To: apmail-asterixdb-notifications-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 02E8E18686 for ; Sun, 6 Dec 2015 20:12:13 +0000 (UTC) Received: (qmail 96101 invoked by uid 500); 6 Dec 2015 20:12:09 -0000 Delivered-To: apmail-asterixdb-notifications-archive@asterixdb.apache.org Received: (qmail 96070 invoked by uid 500); 6 Dec 2015 20:12:09 -0000 Mailing-List: contact notifications-help@asterixdb.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@asterixdb.incubator.apache.org Delivered-To: mailing list notifications@asterixdb.incubator.apache.org Received: (qmail 96061 invoked by uid 99); 6 Dec 2015 20:12:09 -0000 Received: from Unknown (HELO spamd4-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 06 Dec 2015 20:12:09 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd4-us-west.apache.org (ASF Mail Server at spamd4-us-west.apache.org) with ESMTP id 4C702C08BA for ; Sun, 6 Dec 2015 20:12:09 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd4-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: 0.92 X-Spam-Level: X-Spam-Status: No, score=0.92 tagged_above=-999 required=6.31 tests=[SPF_FAIL=0.919, URIBL_BLOCKED=0.001] autolearn=disabled Received: from mx1-us-west.apache.org ([10.40.0.8]) by localhost (spamd4-us-west.apache.org [10.40.0.11]) (amavisd-new, port 10024) with ESMTP id ad7PQpXS03y5 for ; Sun, 6 Dec 2015 20:11:51 +0000 (UTC) Received: from unhygienix.ics.uci.edu (unhygienix.ics.uci.edu [128.195.14.130]) by mx1-us-west.apache.org (ASF Mail Server at mx1-us-west.apache.org) with ESMTP id 2B1B720F4B for ; Sun, 6 Dec 2015 20:11:51 +0000 (UTC) Received: from localhost (localhost [127.0.0.1]) by unhygienix.ics.uci.edu (Postfix) with ESMTP id 69DAE24113F; Sun, 6 Dec 2015 12:08:28 -0800 (PST) Date: Sun, 6 Dec 2015 12:08:28 -0800 From: "Ildar Absalyamov (Code Review)" Message-ID: Reply-To: ildar.absalyamov@gmail.com X-Gerrit-MessageType: newchange Subject: Change in asterixdb[master]: Fist verison of LSM-based statistics X-Gerrit-Change-Id: I97a110c593a03fc98403557cbe0e77ca08fb9646 X-Gerrit-ChangeURL: X-Gerrit-Commit: 841c1f58aaf85e4f97b3ac14a7d2b65ddfc13349 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Content-Disposition: inline User-Agent: Gerrit/2.8.4 To: undisclosed-recipients:; Ildar Absalyamov has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/539 Change subject: Fist verison of LSM-based statistics ...................................................................... Fist verison of LSM-based statistics Change-Id: I97a110c593a03fc98403557cbe0e77ca08fb9646 --- M asterix-app/src/main/java/org/apache/asterix/file/DatasetOperations.java M asterix-app/src/main/java/org/apache/asterix/file/ExternalIndexingOperations.java M asterix-app/src/main/java/org/apache/asterix/file/IndexOperations.java M asterix-app/src/main/java/org/apache/asterix/file/SecondaryBTreeOperationsHelper.java M asterix-app/src/main/java/org/apache/asterix/file/SecondaryIndexOperationsHelper.java M asterix-app/src/main/resources/asterix-build-configuration.xml M asterix-common/src/main/java/org/apache/asterix/common/config/AsterixStorageProperties.java M asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapterFactory.java M asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java M asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java A asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/AqlOrdinalPrimitiveValueProviderFactory.java A asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DatePrimitiveValueProviderFactory.java A asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DateTimePrimitiveValueProviderFactory.java A asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DayTimeDurationPrimitiveValueProviderFactory.java A asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/TimePrimitiveValueProviderFactory.java A asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/YearMonthDurationPrimitiveValueProviderFactory.java M asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlTypeTraitProvider.java M asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/ExternalBTreeLocalResourceMetadata.java M asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/LSMBTreeLocalResourceMetadata.java 19 files changed, 599 insertions(+), 291 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/39/539/1 diff --git a/asterix-app/src/main/java/org/apache/asterix/file/DatasetOperations.java b/asterix-app/src/main/java/org/apache/asterix/file/DatasetOperations.java index 013e021..dd96a49 100644 --- a/asterix-app/src/main/java/org/apache/asterix/file/DatasetOperations.java +++ b/asterix-app/src/main/java/org/apache/asterix/file/DatasetOperations.java @@ -31,6 +31,7 @@ import org.apache.asterix.common.exceptions.ACIDException; import org.apache.asterix.common.exceptions.AsterixException; import org.apache.asterix.common.ioopcallbacks.LSMBTreeIOOperationCallbackFactory; +import org.apache.asterix.dataflow.data.nontagged.valueproviders.AqlOrdinalPrimitiveValueProviderFactory; import org.apache.asterix.formats.base.IDataFormat; import org.apache.asterix.metadata.MetadataManager; import org.apache.asterix.metadata.declared.AqlMetadataProvider; @@ -60,6 +61,7 @@ import org.apache.hyracks.storage.am.lsm.btree.dataflow.LSMBTreeDataflowHelperFactory; import org.apache.hyracks.storage.am.lsm.common.api.ILSMMergePolicyFactory; import org.apache.hyracks.storage.am.lsm.common.dataflow.LSMTreeIndexCompactOperatorDescriptor; +import org.apache.hyracks.storage.am.statistics.common.SynopsisType; import org.apache.hyracks.storage.common.file.ILocalResourceFactoryProvider; import org.apache.hyracks.storage.common.file.LocalResource; @@ -68,8 +70,8 @@ private static Logger LOGGER = Logger.getLogger(DatasetOperations.class.getName()); public static JobSpecification createDropDatasetJobSpec(CompiledDatasetDropStatement datasetDropStmt, - AqlMetadataProvider metadataProvider) throws AlgebricksException, HyracksDataException, RemoteException, - ACIDException, AsterixException { + AqlMetadataProvider metadataProvider) + throws AlgebricksException, HyracksDataException, RemoteException, ACIDException, AsterixException { String dataverseName = null; if (datasetDropStmt.getDataverseName() != null) { @@ -119,12 +121,13 @@ IndexDropOperatorDescriptor primaryBtreeDrop = new IndexDropOperatorDescriptor(specPrimary, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - splitsAndConstraint.first, new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider( - dataset.getDatasetId()), compactionInfo.first, compactionInfo.second, + splitsAndConstraint.first, + new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), + compactionInfo.first, compactionInfo.second, new PrimaryIndexOperationTrackerProvider(dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, - storageProperties.getBloomFilterFalsePositiveRate(), true, filterTypeTraits, - filterCmpFactories, btreeFields, filterFields, !temp)); + storageProperties.getBloomFilterFalsePositiveRate(), true, filterTypeTraits, filterCmpFactories, + btreeFields, filterFields, !temp, false, null, null)); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(specPrimary, primaryBtreeDrop, splitsAndConstraint.second); @@ -183,12 +186,14 @@ AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, splitsAndConstraint.first, typeTraits, comparatorFactories, bloomFilterKeyFields, new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), - compactionInfo.first, compactionInfo.second, new PrimaryIndexOperationTrackerProvider(dataset - .getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties - .getBloomFilterFalsePositiveRate(), true, filterTypeTraits, filterCmpFactories, - btreeFields, filterFields, !temp), localResourceFactoryProvider, - NoOpOperationCallbackFactory.INSTANCE); + compactionInfo.first, compactionInfo.second, + new PrimaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, + storageProperties.getBloomFilterFalsePositiveRate(), true, filterTypeTraits, filterCmpFactories, + btreeFields, filterFields, !temp, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, typeTraits + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE), + localResourceFactoryProvider, NoOpOperationCallbackFactory.INSTANCE); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, indexCreateOp, splitsAndConstraint.second); spec.addRoot(indexCreateOp); @@ -238,16 +243,19 @@ AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, splitsAndConstraint.first, typeTraits, comparatorFactories, blooFilterKeyFields, new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), - compactionInfo.first, compactionInfo.second, new PrimaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeIOOperationCallbackFactory.INSTANCE, - storageProperties.getBloomFilterFalsePositiveRate(), true, filterTypeTraits, - filterCmpFactories, btreeFields, filterFields, !temp), NoOpOperationCallbackFactory.INSTANCE); - AlgebricksPartitionConstraintHelper - .setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second); + compactionInfo.first, compactionInfo.second, + new PrimaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, + storageProperties.getBloomFilterFalsePositiveRate(), true, filterTypeTraits, filterCmpFactories, + btreeFields, filterFields, !temp, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, typeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE), + NoOpOperationCallbackFactory.INSTANCE); + AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, + splitsAndConstraint.second); - AlgebricksPartitionConstraintHelper - .setPartitionConstraintInJobSpec(spec, compactOp, splitsAndConstraint.second); + AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, + splitsAndConstraint.second); spec.addRoot(compactOp); return spec; } diff --git a/asterix-app/src/main/java/org/apache/asterix/file/ExternalIndexingOperations.java b/asterix-app/src/main/java/org/apache/asterix/file/ExternalIndexingOperations.java index 1b7bd1e..13f6218 100644 --- a/asterix-app/src/main/java/org/apache/asterix/file/ExternalIndexingOperations.java +++ b/asterix-app/src/main/java/org/apache/asterix/file/ExternalIndexingOperations.java @@ -27,11 +27,6 @@ import java.util.List; import java.util.Map; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - import org.apache.asterix.common.api.ILocalResourceMetadata; import org.apache.asterix.common.config.AsterixStorageProperties; import org.apache.asterix.common.config.DatasetConfig.DatasetType; @@ -80,6 +75,10 @@ import org.apache.asterix.transaction.management.service.transaction.AsterixRuntimeComponentsProvider; import org.apache.asterix.translator.CompiledStatements.CompiledCreateIndexStatement; import org.apache.asterix.translator.CompiledStatements.CompiledIndexDropStatement; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint; import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraintHelper; import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException; @@ -107,6 +106,7 @@ public static final List> FILE_INDEX_FIELD_NAMES = new ArrayList>(); public static final ArrayList FILE_INDEX_FIELD_TYPES = new ArrayList(); + static { FILE_INDEX_FIELD_NAMES.add(new ArrayList(Arrays.asList(""))); FILE_INDEX_FIELD_TYPES.add(BuiltinType.ASTRING); @@ -128,8 +128,8 @@ public static boolean datasetUsesHiveAdapter(ExternalDatasetDetails ds) { String adapter = ds.getAdapter(); - return (adapter.equalsIgnoreCase("hive") || adapter - .equalsIgnoreCase("org.apache.asterix.external.dataset.adapter.HIVEAdapter")); + return (adapter.equalsIgnoreCase("hive") + || adapter.equalsIgnoreCase("org.apache.asterix.external.dataset.adapter.HIVEAdapter")); } public static boolean isValidIndexName(String datasetName, String indexName) { @@ -154,7 +154,8 @@ return IndexingConstants.getBuddyBtreeComparatorFactories(); } - public static ArrayList getSnapshotFromExternalFileSystem(Dataset dataset) throws AlgebricksException { + public static ArrayList getSnapshotFromExternalFileSystem(Dataset dataset) + throws AlgebricksException { ArrayList files = new ArrayList(); ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails(); try { @@ -176,9 +177,9 @@ if (fileStatuses[i].isDirectory()) { listSubFiles(dataset, fs, fileStatuses[i], files); } else { - files.add(new ExternalFile(dataset.getDataverseName(), dataset.getDatasetName(), - nextFileNumber, fileStatuses[i].getPath().toUri().getPath(), new Date(fileStatuses[i] - .getModificationTime()), fileStatuses[i].getLen(), + files.add(new ExternalFile(dataset.getDataverseName(), dataset.getDatasetName(), nextFileNumber, + fileStatuses[i].getPath().toUri().getPath(), + new Date(fileStatuses[i].getModificationTime()), fileStatuses[i].getLen(), ExternalFilePendingOp.PENDING_NO_OP)); } } @@ -223,7 +224,7 @@ public static JobSpecification buildFilesIndexReplicationJobSpec(Dataset dataset, ArrayList externalFilesSnapshot, AqlMetadataProvider metadataProvider, boolean createIndex) - throws MetadataException, AlgebricksException { + throws MetadataException, AlgebricksException { JobSpecification spec = JobSpecificationUtils.createJobSpecification(); IAsterixPropertiesProvider asterixPropertiesProvider = AsterixAppContextInfo.getInstance(); AsterixStorageProperties storageProperties = asterixPropertiesProvider.getStorageProperties(); @@ -232,21 +233,21 @@ ILSMMergePolicyFactory mergePolicyFactory = compactionInfo.first; Map mergePolicyFactoryProperties = compactionInfo.second; Pair secondarySplitsAndConstraint = metadataProvider - .splitProviderAndPartitionConstraintsForFilesIndex(dataset.getDataverseName(), - dataset.getDatasetName(), getFilesIndexName(dataset.getDatasetName()), true); + .splitProviderAndPartitionConstraintsForFilesIndex(dataset.getDataverseName(), dataset.getDatasetName(), + getFilesIndexName(dataset.getDatasetName()), true); IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first; FilesIndexDescription filesIndexDescription = new FilesIndexDescription(); ILocalResourceMetadata localResourceMetadata = new ExternalBTreeLocalResourceMetadata( - filesIndexDescription.EXTERNAL_FILE_INDEX_TYPE_TRAITS, - filesIndexDescription.FILES_INDEX_COMP_FACTORIES, new int[] { 0 }, false, dataset.getDatasetId(), - mergePolicyFactory, mergePolicyFactoryProperties); + filesIndexDescription.EXTERNAL_FILE_INDEX_TYPE_TRAITS, filesIndexDescription.FILES_INDEX_COMP_FACTORIES, + new int[] { 0 }, false, dataset.getDatasetId(), mergePolicyFactory, mergePolicyFactoryProperties); PersistentLocalResourceFactoryProvider localResourceFactoryProvider = new PersistentLocalResourceFactoryProvider( localResourceMetadata, LocalResource.ExternalBTreeResource); ExternalBTreeDataflowHelperFactory indexDataflowHelperFactory = new ExternalBTreeDataflowHelperFactory( - mergePolicyFactory, mergePolicyFactoryProperties, new SecondaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), - ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true); + mergePolicyFactory, mergePolicyFactoryProperties, + new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, + storageProperties.getBloomFilterFalsePositiveRate(), + ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true, false, null, null); ExternalFilesIndexOperatorDescriptor externalFilesOp = new ExternalFilesIndexOperatorDescriptor(spec, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, secondaryFileSplitProvider, indexDataflowHelperFactory, localResourceFactoryProvider, @@ -260,7 +261,7 @@ /** * This method create an indexing operator that index records in HDFS - * + * * @param jobSpec * @param itemType * @param dataset @@ -297,7 +298,7 @@ * deleteedFiles should contain files that are no longer there in the file system * appendedFiles should have the new file information of existing files * The method should return false in case of zero delta - * + * * @param dataset * @param metadataFiles * @param addedFiles @@ -309,7 +310,7 @@ */ public static boolean isDatasetUptodate(Dataset dataset, List metadataFiles, List addedFiles, List deletedFiles, List appendedFiles) - throws MetadataException, AlgebricksException { + throws MetadataException, AlgebricksException { boolean uptodate = true; int newFileNumber = metadataFiles.get(metadataFiles.size() - 1).getFileNumber() + 1; @@ -340,9 +341,10 @@ } else { // Same file name, Different file mod date -> delete and add metadataFile.setPendingOp(ExternalFilePendingOp.PENDING_DROP_OP); - deletedFiles.add(new ExternalFile(metadataFile.getDataverseName(), metadataFile - .getDatasetName(), 0, metadataFile.getFileName(), metadataFile.getLastModefiedTime(), - metadataFile.getSize(), ExternalFilePendingOp.PENDING_DROP_OP)); + deletedFiles + .add(new ExternalFile(metadataFile.getDataverseName(), metadataFile.getDatasetName(), 0, + metadataFile.getFileName(), metadataFile.getLastModefiedTime(), + metadataFile.getSize(), ExternalFilePendingOp.PENDING_DROP_OP)); fileSystemFile.setPendingOp(ExternalFilePendingOp.PENDING_ADD_OP); fileSystemFile.setFileNumber(newFileNumber); addedFiles.add(fileSystemFile); @@ -382,8 +384,8 @@ if (metadataFile.getPendingOp() == ExternalFilePendingOp.PENDING_NO_OP) { metadataFile.setPendingOp(ExternalFilePendingOp.PENDING_DROP_OP); deletedFiles.add(new ExternalFile(metadataFile.getDataverseName(), metadataFile.getDatasetName(), - newFileNumber, metadataFile.getFileName(), metadataFile.getLastModefiedTime(), metadataFile - .getSize(), metadataFile.getPendingOp())); + newFileNumber, metadataFile.getFileName(), metadataFile.getLastModefiedTime(), + metadataFile.getSize(), metadataFile.getPendingOp())); newFileNumber++; uptodate = false; } @@ -421,13 +423,15 @@ metadataProvider.getMetadataTxnContext()); IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - splitsAndConstraint.first, new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider( - dataset.getDatasetId()), compactionInfo.first, compactionInfo.second, + splitsAndConstraint.first, + new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), + compactionInfo.first, compactionInfo.second, new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, - storageProperties.getBloomFilterFalsePositiveRate(), false, null, null, null, null, !temp)); - AlgebricksPartitionConstraintHelper - .setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second); + storageProperties.getBloomFilterFalsePositiveRate(), false, null, null, null, null, !temp, + false, null, null)); + AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, + splitsAndConstraint.second); spec.addRoot(btreeDrop); return spec; @@ -443,9 +447,9 @@ else if (file.getPendingOp() == ExternalFilePendingOp.PENDING_APPEND_OP) { for (ExternalFile appendedFile : appendedFiles) { if (appendedFile.getFileName().equals(file.getFileName())) { - files.add(new ExternalFile(file.getDataverseName(), file.getDatasetName(), - file.getFileNumber(), file.getFileName(), file.getLastModefiedTime(), appendedFile - .getSize(), ExternalFilePendingOp.PENDING_NO_OP)); + files.add(new ExternalFile(file.getDataverseName(), file.getDatasetName(), file.getFileNumber(), + file.getFileName(), file.getLastModefiedTime(), appendedFile.getSize(), + ExternalFilePendingOp.PENDING_NO_OP)); } } } @@ -549,7 +553,7 @@ new SecondaryIndexOperationTrackerProvider(ds.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), - ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(ds), true); + ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(ds), true, false, null, null); } private static ExternalBTreeWithBuddyDataflowHelperFactory getBTreeDataflowHelperFactory(Dataset ds, Index index, @@ -557,17 +561,16 @@ AsterixStorageProperties storageProperties, JobSpecification spec) { return new ExternalBTreeWithBuddyDataflowHelperFactory(mergePolicyFactory, mergePolicyFactoryProperties, new SecondaryIndexOperationTrackerProvider(ds.getDatasetId()), - AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeWithBuddyIOOperationCallbackFactory.INSTANCE, + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeWithBuddyIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), new int[] { index.getKeyFieldNames().size() }, - ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(ds), true); + ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(ds), true, false, null, null); } @SuppressWarnings("rawtypes") private static ExternalRTreeDataflowHelperFactory getRTreeDataflowHelperFactory(Dataset ds, Index index, ILSMMergePolicyFactory mergePolicyFactory, Map mergePolicyFactoryProperties, AsterixStorageProperties storageProperties, AqlMetadataProvider metadataProvider, JobSpecification spec) - throws AlgebricksException, AsterixException { + throws AlgebricksException, AsterixException { int numPrimaryKeys = getRIDSize(ds); List> secondaryKeyFields = index.getKeyFieldNames(); secondaryKeyFields.size(); @@ -594,8 +597,8 @@ .getSerializerDeserializer(nestedKeyType); secondaryRecFields[i] = keySerde; - secondaryComparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory( - nestedKeyType, true); + secondaryComparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE + .getBinaryComparatorFactory(nestedKeyType, true); secondaryTypeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(nestedKeyType); valueProviderFactories[i] = AqlPrimitiveValueProviderFactory.INSTANCE; } @@ -743,14 +746,15 @@ ILSMMergePolicyFactory mergePolicyFactory = compactionInfo.first; Map mergePolicyFactoryProperties = compactionInfo.second; Pair secondarySplitsAndConstraint = metadataProvider - .splitProviderAndPartitionConstraintsForFilesIndex(dataset.getDataverseName(), - dataset.getDatasetName(), getFilesIndexName(dataset.getDatasetName()), true); + .splitProviderAndPartitionConstraintsForFilesIndex(dataset.getDataverseName(), dataset.getDatasetName(), + getFilesIndexName(dataset.getDatasetName()), true); IFileSplitProvider secondaryFileSplitProvider = secondarySplitsAndConstraint.first; ExternalBTreeDataflowHelperFactory indexDataflowHelperFactory = new ExternalBTreeDataflowHelperFactory( - mergePolicyFactory, mergePolicyFactoryProperties, new SecondaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), - ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true); + mergePolicyFactory, mergePolicyFactoryProperties, + new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, + storageProperties.getBloomFilterFalsePositiveRate(), + ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true, false, null, null); FilesIndexDescription filesIndexDescription = new FilesIndexDescription(); LSMTreeIndexCompactOperatorDescriptor compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, diff --git a/asterix-app/src/main/java/org/apache/asterix/file/IndexOperations.java b/asterix-app/src/main/java/org/apache/asterix/file/IndexOperations.java index c5870a6..ada369a 100644 --- a/asterix-app/src/main/java/org/apache/asterix/file/IndexOperations.java +++ b/asterix-app/src/main/java/org/apache/asterix/file/IndexOperations.java @@ -56,7 +56,7 @@ public static JobSpecification buildSecondaryIndexCreationJobSpec(CompiledCreateIndexStatement createIndexStmt, ARecordType recType, ARecordType enforcedType, AqlMetadataProvider metadataProvider) - throws AsterixException, AlgebricksException { + throws AsterixException, AlgebricksException { SecondaryIndexOperationsHelper secondaryIndexHelper = SecondaryIndexOperationsHelper .createIndexOperationsHelper(createIndexStmt.getIndexType(), createIndexStmt.getDataverseName(), createIndexStmt.getDatasetName(), createIndexStmt.getIndexName(), @@ -68,7 +68,7 @@ public static JobSpecification buildSecondaryIndexLoadingJobSpec(CompiledCreateIndexStatement createIndexStmt, ARecordType recType, ARecordType enforcedType, AqlMetadataProvider metadataProvider) - throws AsterixException, AlgebricksException { + throws AsterixException, AlgebricksException { SecondaryIndexOperationsHelper secondaryIndexHelper = SecondaryIndexOperationsHelper .createIndexOperationsHelper(createIndexStmt.getIndexType(), createIndexStmt.getDataverseName(), createIndexStmt.getDatasetName(), createIndexStmt.getIndexName(), @@ -109,13 +109,15 @@ // The index drop operation should be persistent regardless of temp datasets or permanent dataset. IndexDropOperatorDescriptor btreeDrop = new IndexDropOperatorDescriptor(spec, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - splitsAndConstraint.first, new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider( - dataset.getDatasetId()), compactionInfo.first, compactionInfo.second, + splitsAndConstraint.first, + new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), + compactionInfo.first, compactionInfo.second, new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, - storageProperties.getBloomFilterFalsePositiveRate(), false, null, null, null, null, !temp)); - AlgebricksPartitionConstraintHelper - .setPartitionConstraintInJobSpec(spec, btreeDrop, splitsAndConstraint.second); + storageProperties.getBloomFilterFalsePositiveRate(), false, null, null, null, null, !temp, + false, null, null)); + AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, btreeDrop, + splitsAndConstraint.second); spec.addRoot(btreeDrop); return spec; @@ -123,7 +125,7 @@ public static JobSpecification buildSecondaryIndexCompactJobSpec(CompiledIndexCompactStatement indexCompactStmt, ARecordType recType, ARecordType enforcedType, AqlMetadataProvider metadataProvider, Dataset dataset) - throws AsterixException, AlgebricksException { + throws AsterixException, AlgebricksException { SecondaryIndexOperationsHelper secondaryIndexHelper = SecondaryIndexOperationsHelper .createIndexOperationsHelper(indexCompactStmt.getIndexType(), indexCompactStmt.getDataverseName(), indexCompactStmt.getDatasetName(), indexCompactStmt.getIndexName(), diff --git a/asterix-app/src/main/java/org/apache/asterix/file/SecondaryBTreeOperationsHelper.java b/asterix-app/src/main/java/org/apache/asterix/file/SecondaryBTreeOperationsHelper.java index 7c14e5d..09dd4f5 100644 --- a/asterix-app/src/main/java/org/apache/asterix/file/SecondaryBTreeOperationsHelper.java +++ b/asterix-app/src/main/java/org/apache/asterix/file/SecondaryBTreeOperationsHelper.java @@ -30,6 +30,7 @@ import org.apache.asterix.common.exceptions.AsterixException; import org.apache.asterix.common.ioopcallbacks.LSMBTreeIOOperationCallbackFactory; import org.apache.asterix.common.ioopcallbacks.LSMBTreeWithBuddyIOOperationCallbackFactory; +import org.apache.asterix.dataflow.data.nontagged.valueproviders.AqlOrdinalPrimitiveValueProviderFactory; import org.apache.asterix.metadata.declared.AqlMetadataProvider; import org.apache.asterix.metadata.entities.Index; import org.apache.asterix.metadata.external.IndexingConstants; @@ -71,6 +72,7 @@ import org.apache.hyracks.storage.am.lsm.btree.dataflow.ExternalBTreeWithBuddyDataflowHelperFactory; import org.apache.hyracks.storage.am.lsm.btree.dataflow.LSMBTreeDataflowHelperFactory; import org.apache.hyracks.storage.am.lsm.common.dataflow.LSMTreeIndexCompactOperatorDescriptor; +import org.apache.hyracks.storage.am.statistics.common.SynopsisType; import org.apache.hyracks.storage.common.file.ILocalResourceFactoryProvider; import org.apache.hyracks.storage.common.file.LocalResource; @@ -96,12 +98,14 @@ secondaryBTreeFields, secondaryFilterFields); localResourceFactoryProvider = new PersistentLocalResourceFactoryProvider(localResourceMetadata, LocalResource.LSMBTreeResource); - indexDataflowHelperFactory = new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider( - dataset.getDatasetId()), mergePolicyFactory, mergePolicyFactoryProperties, - new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), + indexDataflowHelperFactory = new LSMBTreeDataflowHelperFactory( + new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), mergePolicyFactory, + mergePolicyFactoryProperties, new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), false, filterTypeTraits, filterCmpFactories, - secondaryBTreeFields, secondaryFilterFields, !dataset.getDatasetDetails().isTemp()); + secondaryBTreeFields, secondaryFilterFields, !dataset.getDatasetDetails().isTemp(), + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, secondaryTypeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE); } else { // External dataset local resource and dataflow helper int[] buddyBreeFields = new int[] { numSecondaryKeys }; @@ -115,7 +119,9 @@ AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeWithBuddyIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), buddyBreeFields, - ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true); + ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, secondaryTypeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE); } TreeIndexCreateOperatorDescriptor secondaryIndexCreateOp = new TreeIndexCreateOperatorDescriptor(spec, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, @@ -163,11 +169,14 @@ // Create secondary BTree bulk load op. AbstractTreeIndexOperatorDescriptor secondaryBulkLoadOp; ExternalBTreeWithBuddyDataflowHelperFactory dataflowHelperFactory = new ExternalBTreeWithBuddyDataflowHelperFactory( - mergePolicyFactory, mergePolicyFactoryProperties, new SecondaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, + mergePolicyFactory, mergePolicyFactoryProperties, + new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeWithBuddyIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), new int[] { numSecondaryKeys }, - ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true); + ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, secondaryTypeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE); IOperatorDescriptor root; if (externalFiles != null) { // Transaction load @@ -222,19 +231,21 @@ AsterixStorageProperties storageProperties = propertiesProvider.getStorageProperties(); boolean temp = dataset.getDatasetDetails().isTemp(); // Create secondary BTree bulk load op. - TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp( - spec, - numSecondaryKeys, + TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(spec, numSecondaryKeys, new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), mergePolicyFactory, mergePolicyFactoryProperties, new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties - .getBloomFilterFalsePositiveRate(), false, filterTypeTraits, filterCmpFactories, - secondaryBTreeFields, secondaryFilterFields, !temp), GlobalConfig.DEFAULT_TREE_FILL_FACTOR); + LSMBTreeIOOperationCallbackFactory.INSTANCE, + storageProperties.getBloomFilterFalsePositiveRate(), false, filterTypeTraits, + filterCmpFactories, secondaryBTreeFields, secondaryFilterFields, !temp, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, secondaryTypeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE), + GlobalConfig.DEFAULT_TREE_FILL_FACTOR); AlgebricksMetaOperatorDescriptor metaOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 0, - new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, new RecordDescriptor[] { secondaryRecDesc }); + new IPushRuntimeFactory[] { new SinkRuntimeFactory() }, + new RecordDescriptor[] { secondaryRecDesc }); // Connect the operators. spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0); spec.connect(new OneToOneConnectorDescriptor(spec), sourceOp, 0, asterixAssignOp, 0); @@ -268,13 +279,16 @@ compactOp = new LSMTreeIndexCompactOperatorDescriptor(spec, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, secondaryFileSplitProvider, secondaryTypeTraits, - secondaryComparatorFactories, secondaryBloomFilterKeyFields, new LSMBTreeDataflowHelperFactory( - new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), mergePolicyFactory, - mergePolicyFactoryProperties, new SecondaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, + secondaryComparatorFactories, secondaryBloomFilterKeyFields, + new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), + mergePolicyFactory, mergePolicyFactoryProperties, + new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), false, filterTypeTraits, - filterCmpFactories, secondaryBTreeFields, secondaryFilterFields, !temp), + filterCmpFactories, secondaryBTreeFields, secondaryFilterFields, !temp, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, secondaryTypeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE), NoOpOperationCallbackFactory.INSTANCE); } else { // External dataset @@ -285,9 +299,11 @@ new ExternalBTreeWithBuddyDataflowHelperFactory(mergePolicyFactory, mergePolicyFactoryProperties, new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeWithBuddyIOOperationCallbackFactory.INSTANCE, storageProperties - .getBloomFilterFalsePositiveRate(), new int[] { numSecondaryKeys }, - ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true), + LSMBTreeWithBuddyIOOperationCallbackFactory.INSTANCE, + storageProperties.getBloomFilterFalsePositiveRate(), new int[] { numSecondaryKeys }, + ExternalDatasetsRegistry.INSTANCE.getDatasetVersion(dataset), true, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, secondaryTypeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE), NoOpOperationCallbackFactory.INSTANCE); } AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, compactOp, @@ -301,7 +317,7 @@ @SuppressWarnings("rawtypes") protected void setSecondaryRecDescAndComparators(IndexType indexType, List> secondaryKeyFields, List secondaryKeyTypes, int gramLength, AqlMetadataProvider metadataProvider) - throws AlgebricksException, AsterixException { + throws AlgebricksException, AsterixException { secondaryFieldAccessEvalFactories = new ICopyEvaluatorFactory[numSecondaryKeys + numFilterFields]; secondaryComparatorFactories = new IBinaryComparatorFactory[numSecondaryKeys + numPrimaryKeys]; secondaryBloomFilterKeyFields = new int[numSecondaryKeys]; diff --git a/asterix-app/src/main/java/org/apache/asterix/file/SecondaryIndexOperationsHelper.java b/asterix-app/src/main/java/org/apache/asterix/file/SecondaryIndexOperationsHelper.java index 0cc137b..4486751 100644 --- a/asterix-app/src/main/java/org/apache/asterix/file/SecondaryIndexOperationsHelper.java +++ b/asterix-app/src/main/java/org/apache/asterix/file/SecondaryIndexOperationsHelper.java @@ -152,10 +152,10 @@ } public static SecondaryIndexOperationsHelper createIndexOperationsHelper(IndexType indexType, String dataverseName, - String datasetName, String indexName, List> secondaryKeyFields, - List secondaryKeyTypes, boolean isEnforced, int gramLength, AqlMetadataProvider metadataProvider, + String datasetName, String indexName, List> secondaryKeyFields, List secondaryKeyTypes, + boolean isEnforced, int gramLength, AqlMetadataProvider metadataProvider, PhysicalOptimizationConfig physOptConf, ARecordType recType, ARecordType enforcedType) - throws AsterixException, AlgebricksException { + throws AsterixException, AlgebricksException { IAsterixPropertiesProvider asterixPropertiesProvider = AsterixAppContextInfo.getInstance(); SecondaryIndexOperationsHelper indexOperationsHelper = null; switch (indexType) { @@ -286,8 +286,8 @@ throw new AlgebricksException(e); } primaryRecFields[i] = serdeProvider.getSerializerDeserializer(keyType); - primaryComparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory( - keyType, true); + primaryComparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE + .getBinaryComparatorFactory(keyType, true); primaryTypeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(keyType); primaryBloomFilterKeyFields[i] = i; } @@ -300,8 +300,8 @@ List> secondaryKeyFields, List secondaryKeyTypes, int gramLength, AqlMetadataProvider metadataProvider) throws AlgebricksException, AsterixException; - protected AbstractOperatorDescriptor createDummyKeyProviderOp(JobSpecification spec) throws AsterixException, - AlgebricksException { + protected AbstractOperatorDescriptor createDummyKeyProviderOp(JobSpecification spec) + throws AsterixException, AlgebricksException { // Build dummy tuple containing one field with a dummy value inside. ArrayTupleBuilder tb = new ArrayTupleBuilder(1); DataOutput dos = tb.getDataOutput(); @@ -345,12 +345,12 @@ primaryFileSplitProvider, primaryRecDesc.getTypeTraits(), primaryComparatorFactories, primaryBloomFilterKeyFields, lowKeyFields, highKeyFields, true, true, new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), - mergePolicyFactory, mergePolicyFactoryProperties, new PrimaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties - .getBloomFilterFalsePositiveRate(), true, filterTypeTraits, filterCmpFactories, - primaryBTreeFields, primaryFilterFields, !temp), false, false, null, - searchCallbackFactory, null, null); + mergePolicyFactory, mergePolicyFactoryProperties, + new PrimaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, + storageProperties.getBloomFilterFalsePositiveRate(), true, filterTypeTraits, filterCmpFactories, + primaryBTreeFields, primaryFilterFields, !temp, false, null, null), + false, false, null, searchCallbackFactory, null, null); AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, primarySearchOp, primaryPartitionConstraint); @@ -433,7 +433,7 @@ protected TreeIndexBulkLoadOperatorDescriptor createTreeIndexBulkLoadOp(JobSpecification spec, int numSecondaryKeyFields, IIndexDataflowHelperFactory dataflowHelperFactory, float fillFactor) - throws MetadataException, AlgebricksException { + throws MetadataException, AlgebricksException { int[] fieldPermutation = new int[numSecondaryKeyFields + numPrimaryKeys + numFilterFields]; for (int i = 0; i < fieldPermutation.length; i++) { fieldPermutation[i] = i; @@ -541,7 +541,7 @@ protected ExternalIndexBulkModifyOperatorDescriptor createExternalIndexBulkModifyOp(JobSpecification spec, int numSecondaryKeyFields, IIndexDataflowHelperFactory dataflowHelperFactory, float fillFactor) - throws MetadataException, AlgebricksException { + throws MetadataException, AlgebricksException { int[] fieldPermutation = new int[numSecondaryKeyFields + numPrimaryKeys]; for (int i = 0; i < numSecondaryKeyFields + numPrimaryKeys; i++) { fieldPermutation[i] = i; diff --git a/asterix-app/src/main/resources/asterix-build-configuration.xml b/asterix-app/src/main/resources/asterix-build-configuration.xml index 8d0b7f3..d758aed 100644 --- a/asterix-app/src/main/resources/asterix-build-configuration.xml +++ b/asterix-app/src/main/resources/asterix-build-configuration.xml @@ -92,6 +92,13 @@ (Default = 8) + + storage.statistics.synopsis.type + Wavelet + The type of synopsis used for collecting dataset statistics. + (Default = None) + + plot.activate false diff --git a/asterix-common/src/main/java/org/apache/asterix/common/config/AsterixStorageProperties.java b/asterix-common/src/main/java/org/apache/asterix/common/config/AsterixStorageProperties.java index 7511145..55d1934 100644 --- a/asterix-common/src/main/java/org/apache/asterix/common/config/AsterixStorageProperties.java +++ b/asterix-common/src/main/java/org/apache/asterix/common/config/AsterixStorageProperties.java @@ -18,6 +18,8 @@ */ package org.apache.asterix.common.config; +import org.apache.hyracks.storage.am.statistics.common.SynopsisType; + public class AsterixStorageProperties extends AbstractAsterixProperties { private static final String STORAGE_BUFFERCACHE_PAGESIZE_KEY = "storage.buffercache.pagesize"; @@ -46,6 +48,9 @@ private static final String STORAGE_LSM_BLOOMFILTER_FALSEPOSITIVERATE_KEY = "storage.lsm.bloomfilter.falsepositiverate"; private static double STORAGE_LSM_BLOOMFILTER_FALSEPOSITIVERATE_DEFAULT = 0.01; + + private static final String STORAGE_STATISTICS_SYNOPSIS_TYPE_KEY = "storage.statistics.synopsis.type"; + private static String STORAGE_STATISTICS_SYNOPSIS_TYPE_DEFAULT = SynopsisType.None.name(); public AsterixStorageProperties(AsterixPropertiesAccessor accessor) { super(accessor); @@ -81,10 +86,9 @@ } public int getMetadataMemoryComponentNumPages() { - return accessor - .getProperty(STORAGE_METADATA_MEMORYCOMPONENT_NUMPAGES_KEY, - STORAGE_METADATA_MEMORYCOMPONENT_NUMPAGES_DEFAULT, - PropertyInterpreters.getIntegerPropertyInterpreter()); + return accessor.getProperty(STORAGE_METADATA_MEMORYCOMPONENT_NUMPAGES_KEY, + STORAGE_METADATA_MEMORYCOMPONENT_NUMPAGES_DEFAULT, + PropertyInterpreters.getIntegerPropertyInterpreter()); } public int getMemoryComponentsNum() { @@ -101,4 +105,9 @@ return accessor.getProperty(STORAGE_LSM_BLOOMFILTER_FALSEPOSITIVERATE_KEY, STORAGE_LSM_BLOOMFILTER_FALSEPOSITIVERATE_DEFAULT, PropertyInterpreters.getDoublePropertyInterpreter()); } + + public SynopsisType getStatisticsSynopsisType() { + return SynopsisType.valueOf(accessor.getProperty(STORAGE_STATISTICS_SYNOPSIS_TYPE_KEY, + STORAGE_STATISTICS_SYNOPSIS_TYPE_DEFAULT, PropertyInterpreters.getStringPropertyInterpreter())); + } } \ No newline at end of file diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapterFactory.java b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapterFactory.java index 505e1a9..a72e5a9 100644 --- a/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapterFactory.java +++ b/asterix-external-data/src/main/java/org/apache/asterix/external/indexing/dataflow/HDFSLookupAdapterFactory.java @@ -119,7 +119,8 @@ public static Pair buildExternalDataLookupRuntime( JobSpecification jobSpec, Dataset dataset, Index secondaryIndex, int[] ridIndexes, boolean retainInput, IVariableTypeEnvironment typeEnv, List outputVars, IOperatorSchema opSchema, - JobGenContext context, AqlMetadataProvider metadataProvider, boolean retainNull) throws AlgebricksException { + JobGenContext context, AqlMetadataProvider metadataProvider, boolean retainNull) + throws AlgebricksException { // Get data type IAType itemType = null; @@ -149,11 +150,12 @@ boolean temp = dataset.getDatasetDetails().isTemp(); // Create the file index data flow helper ExternalBTreeDataflowHelperFactory indexDataflowHelperFactory = new ExternalBTreeDataflowHelperFactory( - compactionInfo.first, compactionInfo.second, new SecondaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeIOOperationCallbackFactory.INSTANCE, metadataProvider.getStorageProperties() - .getBloomFilterFalsePositiveRate(), ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion( - dataset, metadataProvider), !temp); + compactionInfo.first, compactionInfo.second, + new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, + metadataProvider.getStorageProperties().getBloomFilterFalsePositiveRate(), + ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion(dataset, metadataProvider), !temp, false, + null, null); // Create the out record descriptor, appContext and fileSplitProvider for the files index RecordDescriptor outRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context); @@ -172,9 +174,9 @@ // Create the operator ExternalLoopkupOperatorDiscriptor op = new ExternalLoopkupOperatorDiscriptor(jobSpec, adapterFactory, outRecDesc, indexDataflowHelperFactory, retainInput, appContext.getIndexLifecycleManagerProvider(), - appContext.getStorageManagerInterface(), spPc.first, dataset.getDatasetId(), metadataProvider - .getStorageProperties().getBloomFilterFalsePositiveRate(), searchOpCallbackFactory, retainNull, - context.getNullWriterFactory()); + appContext.getStorageManagerInterface(), spPc.first, dataset.getDatasetId(), + metadataProvider.getStorageProperties().getBloomFilterFalsePositiveRate(), searchOpCallbackFactory, + retainNull, context.getNullWriterFactory()); // Return value return new Pair(op, spPc.second); diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java index c068657..b5a0273 100644 --- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java +++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/bootstrap/MetadataBootstrap.java @@ -402,7 +402,7 @@ .createMergePolicy(GlobalConfig.DEFAULT_COMPACTION_POLICY_PROPERTIES, dataLifecycleManager), opTracker, runtimeContext.getLSMIOScheduler(), LSMBTreeIOOperationCallbackFactory.INSTANCE.createIOOperationCallback(), index.isPrimaryIndex(), - null, null, null, null, true); + null, null, null, null, true, false, null); lsmBtree.create(); resourceID = runtimeContext.getResourceIdFactory().createId(); ILocalResourceMetadata localResourceMetadata = new LSMBTreeLocalResourceMetadata(typeTraits, @@ -426,7 +426,7 @@ GlobalConfig.DEFAULT_COMPACTION_POLICY_PROPERTIES, dataLifecycleManager), opTracker, runtimeContext.getLSMIOScheduler(), LSMBTreeIOOperationCallbackFactory.INSTANCE.createIOOperationCallback(), index.isPrimaryIndex(), - null, null, null, null, true); + null, null, null, null, true, false, null); dataLifecycleManager.register(path, lsmBtree); } } @@ -444,7 +444,7 @@ } public static void startDDLRecovery() throws RemoteException, ACIDException, MetadataException { - //#. clean up any record which has pendingAdd/DelOp flag + //#. clean up any record which has pendingAdd/DelOp flag // as traversing all records from DATAVERSE_DATASET to DATASET_DATASET, and then to INDEX_DATASET. String dataverseName = null; String datasetName = null; diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java index d31dfcf..bc0c2a0 100644 --- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java +++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java @@ -55,6 +55,7 @@ import org.apache.asterix.common.parse.IParseFileSplitsDecl; import org.apache.asterix.common.transactions.IRecoveryManager.ResourceType; import org.apache.asterix.common.transactions.JobId; +import org.apache.asterix.dataflow.data.nontagged.valueproviders.AqlOrdinalPrimitiveValueProviderFactory; import org.apache.asterix.dataflow.data.nontagged.valueproviders.AqlPrimitiveValueProviderFactory; import org.apache.asterix.formats.base.IDataFormat; import org.apache.asterix.formats.nontagged.AqlBinaryComparatorFactoryProvider; @@ -180,6 +181,7 @@ import org.apache.hyracks.storage.am.lsm.rtree.dataflow.LSMRTreeDataflowHelperFactory; import org.apache.hyracks.storage.am.rtree.dataflow.RTreeSearchOperatorDescriptor; import org.apache.hyracks.storage.am.rtree.frames.RTreePolicyType; +import org.apache.hyracks.storage.am.statistics.common.SynopsisType; public class AqlMetadataProvider implements IMetadataProvider { public static final String TEMP_DATASETS_STORAGE_FOLDER = "temp"; @@ -349,8 +351,8 @@ // querying an external dataset Dataset dataset = ((DatasetDataSource) dataSource).getDataset(); String itemTypeName = dataset.getItemTypeName(); - IAType itemType = MetadataManager.INSTANCE.getDatatype(mdTxnCtx, dataset.getDataverseName(), - itemTypeName).getDatatype(); + IAType itemType = MetadataManager.INSTANCE + .getDatatype(mdTxnCtx, dataset.getDataverseName(), itemTypeName).getDatatype(); ExternalDatasetDetails edd = (ExternalDatasetDetails) dataset.getDatasetDetails(); IAdapterFactory adapterFactory = getConfiguredAdapterFactory(dataset, edd.getAdapter(), edd.getProperties(), itemType, false, null); @@ -395,8 +397,8 @@ .getSerializerDeserializer(feedOutputType); RecordDescriptor feedDesc = new RecordDescriptor(new ISerializerDeserializer[] { payloadSerde }); - FeedPolicy feedPolicy = (FeedPolicy) ((AqlDataSource) dataSource).getProperties().get( - BuiltinFeedPolicies.CONFIG_FEED_POLICY_KEY); + FeedPolicy feedPolicy = (FeedPolicy) ((AqlDataSource) dataSource).getProperties() + .get(BuiltinFeedPolicies.CONFIG_FEED_POLICY_KEY); if (feedPolicy == null) { throw new AlgebricksException("Feed not configured with a policy"); } @@ -404,8 +406,8 @@ FeedConnectionId feedConnectionId = new FeedConnectionId(feedDataSource.getId().getDataverseName(), feedDataSource.getId().getDatasourceName(), feedDataSource.getTargetDataset()); feedCollector = new FeedCollectOperatorDescriptor(jobSpec, feedConnectionId, - feedDataSource.getSourceFeedId(), (ARecordType) feedOutputType, feedDesc, - feedPolicy.getProperties(), feedDataSource.getLocation()); + feedDataSource.getSourceFeedId(), feedOutputType, feedDesc, feedPolicy.getProperties(), + feedDataSource.getLocation()); return new Pair(feedCollector, determineLocationConstraint(feedDataSource)); @@ -435,8 +437,8 @@ if (activity.getDataverseName().equals(feedDataSource.getSourceFeedId().getDataverse()) && activity.getFeedName() .equals(feedDataSource.getSourceFeedId().getFeedName())) { - locations = activity.getFeedActivityDetails().get( - FeedActivityDetails.COMPUTE_LOCATIONS); + locations = activity.getFeedActivityDetails() + .get(FeedActivityDetails.COMPUTE_LOCATIONS); locationArray = locations.split(","); break; } @@ -490,8 +492,8 @@ private Pair buildLoadableDatasetScan(JobSpecification jobSpec, LoadableDataSource alds, IAdapterFactory adapterFactory, RecordDescriptor rDesc, boolean isPKAutoGenerated, List> primaryKeys, ARecordType recType, int pkIndex) throws AlgebricksException { - if (!(adapterFactory.getSupportedOperations().equals(SupportedOperation.READ) || adapterFactory - .getSupportedOperations().equals(SupportedOperation.READ_WRITE))) { + if (!(adapterFactory.getSupportedOperations().equals(SupportedOperation.READ) + || adapterFactory.getSupportedOperations().equals(SupportedOperation.READ_WRITE))) { throw new AlgebricksException(" External dataset adapter does not support read operation"); } ExternalDataScanOperatorDescriptor dataScanner = new ExternalDataScanOperatorDescriptor(jobSpec, rDesc, @@ -597,13 +599,13 @@ public Pair buildExternalDatasetDataScannerRuntime( JobSpecification jobSpec, IAType itemType, IAdapterFactory adapterFactory, IDataFormat format) - throws AlgebricksException { + throws AlgebricksException { if (itemType.getTypeTag() != ATypeTag.RECORD) { throw new AlgebricksException("Can only scan datasets of records."); } - if (!(adapterFactory.getSupportedOperations().equals(SupportedOperation.READ) || adapterFactory - .getSupportedOperations().equals(SupportedOperation.READ_WRITE))) { + if (!(adapterFactory.getSupportedOperations().equals(SupportedOperation.READ) + || adapterFactory.getSupportedOperations().equals(SupportedOperation.READ_WRITE))) { throw new AlgebricksException(" External dataset adapter does not support read operation"); } @@ -659,8 +661,8 @@ case EXTERNAL: String libraryName = primaryFeed.getAdaptorName().trim() .split(FeedConstants.NamingConstants.LIBRARY_NAME_SEPARATOR)[0]; - feedIngestor = new FeedIntakeOperatorDescriptor(jobSpec, primaryFeed, libraryName, adapterFactory - .getClass().getName(), factoryOutput.second, policyAccessor); + feedIngestor = new FeedIntakeOperatorDescriptor(jobSpec, primaryFeed, libraryName, + adapterFactory.getClass().getName(), factoryOutput.second, policyAccessor); break; } @@ -691,11 +693,11 @@ IBinaryComparatorFactory[] comparatorFactories; String itemTypeName = dataset.getItemTypeName(); - ARecordType itemType = (ARecordType) MetadataManager.INSTANCE.getDatatype(mdTxnCtx, - dataset.getDataverseName(), itemTypeName).getDatatype(); + ARecordType itemType = (ARecordType) MetadataManager.INSTANCE + .getDatatype(mdTxnCtx, dataset.getDataverseName(), itemTypeName).getDatatype(); ITypeTraits[] filterTypeTraits = DatasetUtils.computeFilterTypeTraits(dataset, itemType); - IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories( - dataset, itemType, context.getBinaryComparatorFactoryProvider()); + IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories(dataset, + itemType, context.getBinaryComparatorFactoryProvider()); int[] filterFields = null; int[] btreeFields = null; @@ -768,34 +770,39 @@ txnSubsystemProvider, ResourceType.LSM_BTREE); } } - Pair> compactionInfo = DatasetUtils.getMergePolicyFactory( - dataset, mdTxnCtx); + Pair> compactionInfo = DatasetUtils + .getMergePolicyFactory(dataset, mdTxnCtx); AsterixRuntimeComponentsProvider rtcProvider = AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER; BTreeSearchOperatorDescriptor btreeSearchOp; if (dataset.getDatasetType() == DatasetType.INTERNAL) { btreeSearchOp = new BTreeSearchOperatorDescriptor(jobSpec, outputRecDesc, appContext.getStorageManagerInterface(), appContext.getIndexLifecycleManagerProvider(), spPc.first, typeTraits, comparatorFactories, bloomFilterKeyFields, lowKeyFields, highKeyFields, - lowKeyInclusive, highKeyInclusive, new LSMBTreeDataflowHelperFactory( - new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), compactionInfo.first, - compactionInfo.second, isSecondary ? new SecondaryIndexOperationTrackerProvider( - dataset.getDatasetId()) : new PrimaryIndexOperationTrackerProvider( - dataset.getDatasetId()), rtcProvider, - LSMBTreeIOOperationCallbackFactory.INSTANCE, + lowKeyInclusive, highKeyInclusive, + new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), + compactionInfo.first, compactionInfo.second, + isSecondary ? new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()) + : new PrimaryIndexOperationTrackerProvider(dataset.getDatasetId()), + rtcProvider, LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), !isSecondary, filterTypeTraits, - filterCmpFactories, btreeFields, filterFields, !temp), retainInput, retainNull, - context.getNullWriterFactory(), searchCallbackFactory, minFilterFieldIndexes, - maxFilterFieldIndexes); + filterCmpFactories, btreeFields, filterFields, !temp, + getStorageProperties().getStatisticsSynopsisType() != SynopsisType.None, typeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE), + retainInput, retainNull, context.getNullWriterFactory(), searchCallbackFactory, + minFilterFieldIndexes, maxFilterFieldIndexes); } else { // External dataset <- use the btree with buddy btree-> // Be Careful of Key Start Index ? int[] buddyBreeFields = new int[] { numSecondaryKeys }; ExternalBTreeWithBuddyDataflowHelperFactory indexDataflowHelperFactory = new ExternalBTreeWithBuddyDataflowHelperFactory( - compactionInfo.first, compactionInfo.second, new SecondaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeWithBuddyIOOperationCallbackFactory.INSTANCE, getStorageProperties() - .getBloomFilterFalsePositiveRate(), buddyBreeFields, - ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion(dataset, this), !temp); + compactionInfo.first, compactionInfo.second, + new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, + LSMBTreeWithBuddyIOOperationCallbackFactory.INSTANCE, + getStorageProperties().getBloomFilterFalsePositiveRate(), buddyBreeFields, + ExternalDatasetsRegistry.INSTANCE.getAndLockDatasetVersion(dataset, this), !temp, + getStorageProperties().getStatisticsSynopsisType() != SynopsisType.None, typeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE); btreeSearchOp = new ExternalBTreeSearchOperatorDescriptor(jobSpec, outputRecDesc, rtcProvider, rtcProvider, spPc.first, typeTraits, comparatorFactories, bloomFilterKeyFields, lowKeyFields, highKeyFields, lowKeyInclusive, highKeyInclusive, indexDataflowHelperFactory, retainInput, @@ -858,17 +865,15 @@ Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(), dataset.getDatasetName(), indexName); if (secondaryIndex == null) { - throw new AlgebricksException("Code generation error: no index " + indexName + " for dataset " - + dataset.getDatasetName()); + throw new AlgebricksException( + "Code generation error: no index " + indexName + " for dataset " + dataset.getDatasetName()); } List> secondaryKeyFields = secondaryIndex.getKeyFieldNames(); List secondaryKeyTypes = secondaryIndex.getKeyFieldTypes(); int numSecondaryKeys = secondaryKeyFields.size(); if (numSecondaryKeys != 1) { - throw new AlgebricksException( - "Cannot use " - + numSecondaryKeys - + " fields as a key for the R-tree index. There can be only one field as a key for the R-tree index."); + throw new AlgebricksException("Cannot use " + numSecondaryKeys + + " fields as a key for the R-tree index. There can be only one field as a key for the R-tree index."); } Pair keyTypePair = Index.getNonNullableOpenFieldType(secondaryKeyTypes.get(0), secondaryKeyFields.get(0), recType); @@ -905,8 +910,8 @@ } ITypeTraits[] filterTypeTraits = DatasetUtils.computeFilterTypeTraits(dataset, recType); - IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories( - dataset, recType, context.getBinaryComparatorFactoryProvider()); + IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories(dataset, + recType, context.getBinaryComparatorFactoryProvider()); int[] filterFields = null; int[] rtreeFields = null; if (filterTypeTraits != null) { @@ -919,8 +924,8 @@ } IAType nestedKeyType = NonTaggedFormatUtil.getNestedSpatialType(keyType.getTypeTag()); - Pair> compactionInfo = DatasetUtils.getMergePolicyFactory( - dataset, mdTxnCtx); + Pair> compactionInfo = DatasetUtils + .getMergePolicyFactory(dataset, mdTxnCtx); ISearchOperationCallbackFactory searchCallbackFactory = temp ? NoOpOperationCallbackFactory.INSTANCE : new SecondaryIndexSearchOperationCallbackFactory(); @@ -928,17 +933,19 @@ if (dataset.getDatasetType() == DatasetType.INTERNAL) { rtreeSearchOp = new RTreeSearchOperatorDescriptor(jobSpec, outputRecDesc, appContext.getStorageManagerInterface(), appContext.getIndexLifecycleManagerProvider(), - spPc.first, typeTraits, comparatorFactories, keyFields, new LSMRTreeDataflowHelperFactory( - valueProviderFactories, RTreePolicyType.RTREE, primaryComparatorFactories, + spPc.first, typeTraits, comparatorFactories, keyFields, + new LSMRTreeDataflowHelperFactory(valueProviderFactories, RTreePolicyType.RTREE, + primaryComparatorFactories, new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), compactionInfo.first, - compactionInfo.second, new SecondaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMRTreeIOOperationCallbackFactory.INSTANCE, proposeLinearizer( - nestedKeyType.getTypeTag(), comparatorFactories.length), + compactionInfo.second, + new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, + LSMRTreeIOOperationCallbackFactory.INSTANCE, + proposeLinearizer(nestedKeyType.getTypeTag(), comparatorFactories.length), storageProperties.getBloomFilterFalsePositiveRate(), rtreeFields, btreeFields, - filterTypeTraits, filterCmpFactories, filterFields, !temp), retainInput, retainNull, - context.getNullWriterFactory(), searchCallbackFactory, minFilterFieldIndexes, - maxFilterFieldIndexes); + filterTypeTraits, filterCmpFactories, filterFields, !temp), + retainInput, retainNull, context.getNullWriterFactory(), searchCallbackFactory, + minFilterFieldIndexes, maxFilterFieldIndexes); } else { // External Dataset @@ -954,8 +961,8 @@ // Create the operator rtreeSearchOp = new ExternalRTreeSearchOperatorDescriptor(jobSpec, outputRecDesc, appContext.getStorageManagerInterface(), appContext.getIndexLifecycleManagerProvider(), - spPc.first, typeTraits, comparatorFactories, keyFields, indexDataflowHelperFactory, - retainInput, retainNull, context.getNullWriterFactory(), searchCallbackFactory); + spPc.first, typeTraits, comparatorFactories, keyFields, indexDataflowHelperFactory, retainInput, + retainNull, context.getNullWriterFactory(), searchCallbackFactory); } return new Pair(rtreeSearchOp, spPc.second); @@ -1034,8 +1041,8 @@ } String tName = dataset.getItemTypeName(); IAType itemType = MetadataManager.INSTANCE.getDatatype(mdTxnCtx, aqlId.getDataverseName(), tName).getDatatype(); - AqlDataSourceType datasourceType = dataset.getDatasetType().equals(DatasetType.EXTERNAL) ? AqlDataSourceType.EXTERNAL_DATASET - : AqlDataSourceType.INTERNAL_DATASET; + AqlDataSourceType datasourceType = dataset.getDatasetType().equals(DatasetType.EXTERNAL) + ? AqlDataSourceType.EXTERNAL_DATASET : AqlDataSourceType.INTERNAL_DATASET; return new DatasetDataSource(aqlId, aqlId.getDataverseName(), aqlId.getDatasourceName(), itemType, datasourceType); } @@ -1102,8 +1109,8 @@ String indexName = primaryIndex.getIndexName(); String itemTypeName = dataset.getItemTypeName(); - ARecordType itemType = (ARecordType) MetadataManager.INSTANCE.getDatatype(mdTxnCtx, - dataset.getDataverseName(), itemTypeName).getDatatype(); + ARecordType itemType = (ARecordType) MetadataManager.INSTANCE + .getDatatype(mdTxnCtx, dataset.getDataverseName(), itemTypeName).getDatatype(); ITypeTraits[] typeTraits = DatasetUtils.computeTupleTypeTraits(dataset, itemType); IBinaryComparatorFactory[] comparatorFactories = DatasetUtils.computeKeysBinaryComparatorFactories(dataset, itemType, context.getBinaryComparatorFactoryProvider()); @@ -1115,8 +1122,8 @@ long numElementsHint = getCardinalityPerPartitionHint(dataset); ITypeTraits[] filterTypeTraits = DatasetUtils.computeFilterTypeTraits(dataset, itemType); - IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories( - dataset, itemType, context.getBinaryComparatorFactoryProvider()); + IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories(dataset, + itemType, context.getBinaryComparatorFactoryProvider()); int[] filterFields = DatasetUtils.createFilterFields(dataset); int[] btreeFields = DatasetUtils.createBTreeFieldsWhenThereisAFilter(dataset); @@ -1125,18 +1132,21 @@ // right callback // (ex. what's the expected behavior when there is an error during // bulkload?) - Pair> compactionInfo = DatasetUtils.getMergePolicyFactory( - dataset, mdTxnCtx); + Pair> compactionInfo = DatasetUtils + .getMergePolicyFactory(dataset, mdTxnCtx); TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec, null, appContext.getStorageManagerInterface(), appContext.getIndexLifecycleManagerProvider(), splitsAndConstraint.first, typeTraits, comparatorFactories, bloomFilterKeyFields, fieldPermutation, GlobalConfig.DEFAULT_TREE_FILL_FACTOR, false, numElementsHint, true, new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), - compactionInfo.first, compactionInfo.second, new PrimaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, + compactionInfo.first, compactionInfo.second, + new PrimaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), true, filterTypeTraits, - filterCmpFactories, btreeFields, filterFields, !temp)); + filterCmpFactories, btreeFields, filterFields, !temp, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, typeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE)); return new Pair(btreeBulkLoad, splitsAndConstraint.second); } catch (MetadataException me) { @@ -1148,13 +1158,13 @@ IDataSource dataSource, IOperatorSchema propagatedSchema, IVariableTypeEnvironment typeEnv, List keys, LogicalVariable payload, List additionalNonKeyFields, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, boolean bulkload) - throws AlgebricksException { + throws AlgebricksException { String datasetName = dataSource.getId().getDatasourceName(); Dataset dataset = findDataset(dataSource.getId().getDataverseName(), datasetName); if (dataset == null) { - throw new AlgebricksException("Unknown dataset " + datasetName + " in dataverse " - + dataSource.getId().getDataverseName()); + throw new AlgebricksException( + "Unknown dataset " + datasetName + " in dataverse " + dataSource.getId().getDataverseName()); } boolean temp = dataset.getDatasetDetails().isTemp(); isTemporaryDatasetWriteJob = isTemporaryDatasetWriteJob && temp; @@ -1183,8 +1193,8 @@ String indexName = primaryIndex.getIndexName(); String itemTypeName = dataset.getItemTypeName(); - ARecordType itemType = (ARecordType) MetadataManager.INSTANCE.getDatatype(mdTxnCtx, - dataSource.getId().getDataverseName(), itemTypeName).getDatatype(); + ARecordType itemType = (ARecordType) MetadataManager.INSTANCE + .getDatatype(mdTxnCtx, dataSource.getId().getDataverseName(), itemTypeName).getDatatype(); ITypeTraits[] typeTraits = DatasetUtils.computeTupleTypeTraits(dataset, itemType); @@ -1203,24 +1213,28 @@ } ITypeTraits[] filterTypeTraits = DatasetUtils.computeFilterTypeTraits(dataset, itemType); - IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories( - dataset, itemType, context.getBinaryComparatorFactoryProvider()); + IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories(dataset, + itemType, context.getBinaryComparatorFactoryProvider()); int[] filterFields = DatasetUtils.createFilterFields(dataset); int[] btreeFields = DatasetUtils.createBTreeFieldsWhenThereisAFilter(dataset); TransactionSubsystemProvider txnSubsystemProvider = new TransactionSubsystemProvider(); - IModificationOperationCallbackFactory modificationCallbackFactory = temp ? new TempDatasetPrimaryIndexModificationOperationCallbackFactory( - jobId, datasetId, primaryKeyFields, txnSubsystemProvider, indexOp, ResourceType.LSM_BTREE) + IModificationOperationCallbackFactory modificationCallbackFactory = temp + ? new TempDatasetPrimaryIndexModificationOperationCallbackFactory(jobId, datasetId, + primaryKeyFields, txnSubsystemProvider, indexOp, ResourceType.LSM_BTREE) : new PrimaryIndexModificationOperationCallbackFactory(jobId, datasetId, primaryKeyFields, txnSubsystemProvider, indexOp, ResourceType.LSM_BTREE); - Pair> compactionInfo = DatasetUtils.getMergePolicyFactory( - dataset, mdTxnCtx); - IIndexDataflowHelperFactory idfh = new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider( - datasetId), compactionInfo.first, compactionInfo.second, new PrimaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), - true, filterTypeTraits, filterCmpFactories, btreeFields, filterFields, !temp); + Pair> compactionInfo = DatasetUtils + .getMergePolicyFactory(dataset, mdTxnCtx); + IIndexDataflowHelperFactory idfh = new LSMBTreeDataflowHelperFactory( + new AsterixVirtualBufferCacheProvider(datasetId), compactionInfo.first, compactionInfo.second, + new PrimaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, + storageProperties.getBloomFilterFalsePositiveRate(), true, filterTypeTraits, filterCmpFactories, + btreeFields, filterFields, !temp, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, typeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE); IOperatorDescriptor op; if (bulkload) { long numElementsHint = getCardinalityPerPartitionHint(dataset); @@ -1246,7 +1260,7 @@ IDataSource dataSource, IOperatorSchema propagatedSchema, IVariableTypeEnvironment typeEnv, List keys, LogicalVariable payload, List additionalNonKeyFields, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, boolean bulkload) - throws AlgebricksException { + throws AlgebricksException { return getInsertOrDeleteRuntime(IndexOperation.INSERT, dataSource, propagatedSchema, typeEnv, keys, payload, additionalNonKeyFields, recordDesc, context, spec, bulkload); } @@ -1284,14 +1298,14 @@ AsterixTupleFilterFactory filterFactory = createTupleFilterFactory(inputSchemas, typeEnv, filterExpr, context); switch (secondaryIndex.getIndexType()) { case BTREE: { - return getBTreeDmlRuntime(dataverseName, datasetName, indexName, propagatedSchema, typeEnv, - primaryKeys, secondaryKeys, additionalNonKeyFields, filterFactory, recordDesc, context, spec, - indexOp, bulkload); + return getBTreeDmlRuntime(dataverseName, datasetName, indexName, propagatedSchema, typeEnv, primaryKeys, + secondaryKeys, additionalNonKeyFields, filterFactory, recordDesc, context, spec, indexOp, + bulkload); } case RTREE: { - return getRTreeDmlRuntime(dataverseName, datasetName, indexName, propagatedSchema, typeEnv, - primaryKeys, secondaryKeys, additionalNonKeyFields, filterFactory, recordDesc, context, spec, - indexOp, bulkload); + return getRTreeDmlRuntime(dataverseName, datasetName, indexName, propagatedSchema, typeEnv, primaryKeys, + secondaryKeys, additionalNonKeyFields, filterFactory, recordDesc, context, spec, indexOp, + bulkload); } case SINGLE_PARTITION_WORD_INVIX: case SINGLE_PARTITION_NGRAM_INVIX: @@ -1302,8 +1316,8 @@ indexOp, secondaryIndex.getIndexType(), bulkload); } default: { - throw new AlgebricksException("Insert and delete not implemented for index type: " - + secondaryIndex.getIndexType()); + throw new AlgebricksException( + "Insert and delete not implemented for index type: " + secondaryIndex.getIndexType()); } } } @@ -1374,7 +1388,7 @@ IVariableTypeEnvironment typeEnv, List primaryKeys, List secondaryKeys, AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexOperation indexOp, IndexType indexType, boolean bulkload) - throws AlgebricksException { + throws AlgebricksException { // Sanity checks. if (primaryKeys.size() > 1) { @@ -1549,7 +1563,8 @@ tokenizerOp = new BinaryTokenizerOperatorDescriptor(spec, tokenKeyPairRecDesc, tokenizerFactory, docField, keyFields, isPartitioned, true); - return new Pair(tokenizerOp, splitsAndConstraint.second); + return new Pair(tokenizerOp, + splitsAndConstraint.second); } catch (MetadataException e) { throw new AlgebricksException(e); @@ -1564,7 +1579,7 @@ IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List primaryKeys, List secondaryKeys, List additionalNonKeyFields, ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec) - throws AlgebricksException { + throws AlgebricksException { return getIndexInsertOrDeleteRuntime(IndexOperation.DELETE, dataSourceIndex, propagatedSchema, inputSchemas, typeEnv, primaryKeys, secondaryKeys, additionalNonKeyFields, filterExpr, recordDesc, context, spec, false); @@ -1572,7 +1587,7 @@ private AsterixTupleFilterFactory createTupleFilterFactory(IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, ILogicalExpression filterExpr, JobGenContext context) - throws AlgebricksException { + throws AlgebricksException { // No filtering condition. if (filterExpr == null) { return null; @@ -1641,8 +1656,8 @@ dataset.getDatasetName(), indexName); ITypeTraits[] filterTypeTraits = DatasetUtils.computeFilterTypeTraits(dataset, recType); - IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories( - dataset, recType, context.getBinaryComparatorFactoryProvider()); + IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories(dataset, + recType, context.getBinaryComparatorFactoryProvider()); int[] filterFields = null; int[] btreeFields = null; if (filterTypeTraits != null) { @@ -1662,15 +1677,15 @@ Pair keyPairType = Index.getNonNullableOpenFieldType(secondaryKeyTypes.get(i), secondaryKeyNames.get(i), recType); IAType keyType = keyPairType.first; - comparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory( - keyType, true); + comparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(keyType, + true); typeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(keyType); } List> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset); for (List partitioningKey : partitioningKeys) { IAType keyType = recType.getSubFieldType(partitioningKey); - comparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory( - keyType, true); + comparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(keyType, + true); typeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(keyType); ++i; } @@ -1683,19 +1698,23 @@ JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId(); int datasetId = dataset.getDatasetId(); TransactionSubsystemProvider txnSubsystemProvider = new TransactionSubsystemProvider(); - IModificationOperationCallbackFactory modificationCallbackFactory = temp ? new TempDatasetSecondaryIndexModificationOperationCallbackFactory( - jobId, datasetId, modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, - ResourceType.LSM_BTREE) : new SecondaryIndexModificationOperationCallbackFactory(jobId, datasetId, - modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, ResourceType.LSM_BTREE); + IModificationOperationCallbackFactory modificationCallbackFactory = temp + ? new TempDatasetSecondaryIndexModificationOperationCallbackFactory(jobId, datasetId, + modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, ResourceType.LSM_BTREE) + : new SecondaryIndexModificationOperationCallbackFactory(jobId, datasetId, + modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, + ResourceType.LSM_BTREE); - Pair> compactionInfo = DatasetUtils.getMergePolicyFactory( - dataset, mdTxnCtx); - IIndexDataflowHelperFactory idfh = new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider( - datasetId), compactionInfo.first, compactionInfo.second, + Pair> compactionInfo = DatasetUtils + .getMergePolicyFactory(dataset, mdTxnCtx); + IIndexDataflowHelperFactory idfh = new LSMBTreeDataflowHelperFactory( + new AsterixVirtualBufferCacheProvider(datasetId), compactionInfo.first, compactionInfo.second, new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), false, filterTypeTraits, filterCmpFactories, - btreeFields, filterFields, !temp); + btreeFields, filterFields, !temp, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, typeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE); IOperatorDescriptor op; if (bulkload) { long numElementsHint = getCardinalityPerPartitionHint(dataset); @@ -1707,14 +1726,17 @@ op = new AsterixLSMTreeInsertDeleteOperatorDescriptor(spec, recordDesc, appContext.getStorageManagerInterface(), appContext.getIndexLifecycleManagerProvider(), splitsAndConstraint.first, typeTraits, comparatorFactories, bloomFilterKeyFields, - fieldPermutation, indexOp, new LSMBTreeDataflowHelperFactory( - new AsterixVirtualBufferCacheProvider(datasetId), compactionInfo.first, - compactionInfo.second, new SecondaryIndexOperationTrackerProvider( - dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, + fieldPermutation, indexOp, + new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(datasetId), + compactionInfo.first, compactionInfo.second, + new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), + AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMBTreeIOOperationCallbackFactory.INSTANCE, storageProperties.getBloomFilterFalsePositiveRate(), false, filterTypeTraits, - filterCmpFactories, btreeFields, filterFields, !temp), filterFactory, - modificationCallbackFactory, false, indexName); + filterCmpFactories, btreeFields, filterFields, !temp, + storageProperties.getStatisticsSynopsisType() != SynopsisType.None, typeTraits, + AqlOrdinalPrimitiveValueProviderFactory.INSTANCE), + filterFactory, modificationCallbackFactory, false, indexName); } return new Pair(op, splitsAndConstraint.second); } catch (MetadataException e) { @@ -1853,8 +1875,8 @@ secondaryKeyType.getTypeTag(), indexType, secondaryIndex.getGramLength()); ITypeTraits[] filterTypeTraits = DatasetUtils.computeFilterTypeTraits(dataset, recType); - IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories( - dataset, recType, context.getBinaryComparatorFactoryProvider()); + IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories(dataset, + recType, context.getBinaryComparatorFactoryProvider()); int[] filterFields = null; int[] invertedIndexFields = null; @@ -1884,18 +1906,20 @@ JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId(); int datasetId = dataset.getDatasetId(); TransactionSubsystemProvider txnSubsystemProvider = new TransactionSubsystemProvider(); - IModificationOperationCallbackFactory modificationCallbackFactory = temp ? new TempDatasetSecondaryIndexModificationOperationCallbackFactory( - jobId, datasetId, modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, - ResourceType.LSM_INVERTED_INDEX) : new SecondaryIndexModificationOperationCallbackFactory(jobId, - datasetId, modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, - ResourceType.LSM_INVERTED_INDEX); + IModificationOperationCallbackFactory modificationCallbackFactory = temp + ? new TempDatasetSecondaryIndexModificationOperationCallbackFactory(jobId, datasetId, + modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, + ResourceType.LSM_INVERTED_INDEX) + : new SecondaryIndexModificationOperationCallbackFactory(jobId, datasetId, + modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, + ResourceType.LSM_INVERTED_INDEX); - Pair> compactionInfo = DatasetUtils.getMergePolicyFactory( - dataset, mdTxnCtx); + Pair> compactionInfo = DatasetUtils + .getMergePolicyFactory(dataset, mdTxnCtx); IIndexDataflowHelperFactory indexDataFlowFactory; if (!isPartitioned) { - indexDataFlowFactory = new LSMInvertedIndexDataflowHelperFactory(new AsterixVirtualBufferCacheProvider( - datasetId), compactionInfo.first, compactionInfo.second, + indexDataFlowFactory = new LSMInvertedIndexDataflowHelperFactory( + new AsterixVirtualBufferCacheProvider(datasetId), compactionInfo.first, compactionInfo.second, new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMInvertedIndexIOOperationCallbackFactory.INSTANCE, @@ -1992,8 +2016,8 @@ IAType nestedKeyType = NonTaggedFormatUtil.getNestedSpatialType(spatialType.getTypeTag()); IPrimitiveValueProviderFactory[] valueProviderFactories = new IPrimitiveValueProviderFactory[numSecondaryKeys]; for (i = 0; i < numSecondaryKeys; i++) { - comparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory( - nestedKeyType, true); + comparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE + .getBinaryComparatorFactory(nestedKeyType, true); typeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(nestedKeyType); valueProviderFactories[i] = AqlPrimitiveValueProviderFactory.INSTANCE; } @@ -2015,8 +2039,8 @@ } ITypeTraits[] filterTypeTraits = DatasetUtils.computeFilterTypeTraits(dataset, recType); - IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories( - dataset, recType, context.getBinaryComparatorFactoryProvider()); + IBinaryComparatorFactory[] filterCmpFactories = DatasetUtils.computeFilterBinaryComparatorFactories(dataset, + recType, context.getBinaryComparatorFactoryProvider()); int[] filterFields = null; int[] rtreeFields = null; if (filterTypeTraits != null) { @@ -2032,17 +2056,19 @@ JobId jobId = ((JobEventListenerFactory) spec.getJobletEventListenerFactory()).getJobId(); int datasetId = dataset.getDatasetId(); TransactionSubsystemProvider txnSubsystemProvider = new TransactionSubsystemProvider(); - IModificationOperationCallbackFactory modificationCallbackFactory = temp ? new TempDatasetSecondaryIndexModificationOperationCallbackFactory( - jobId, datasetId, modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, - ResourceType.LSM_RTREE) : new SecondaryIndexModificationOperationCallbackFactory(jobId, datasetId, - modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, ResourceType.LSM_RTREE); + IModificationOperationCallbackFactory modificationCallbackFactory = temp + ? new TempDatasetSecondaryIndexModificationOperationCallbackFactory(jobId, datasetId, + modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, ResourceType.LSM_RTREE) + : new SecondaryIndexModificationOperationCallbackFactory(jobId, datasetId, + modificationCallbackPrimaryKeyFields, txnSubsystemProvider, indexOp, + ResourceType.LSM_RTREE); - Pair> compactionInfo = DatasetUtils.getMergePolicyFactory( - dataset, mdTxnCtx); + Pair> compactionInfo = DatasetUtils + .getMergePolicyFactory(dataset, mdTxnCtx); IIndexDataflowHelperFactory idfh = new LSMRTreeDataflowHelperFactory(valueProviderFactories, - RTreePolicyType.RTREE, primaryComparatorFactories, new AsterixVirtualBufferCacheProvider( - dataset.getDatasetId()), compactionInfo.first, compactionInfo.second, - new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), + RTreePolicyType.RTREE, primaryComparatorFactories, + new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), compactionInfo.first, + compactionInfo.second, new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, LSMRTreeIOOperationCallbackFactory.INSTANCE, proposeLinearizer(nestedKeyType.getTypeTag(), comparatorFactories.length), storageProperties.getBloomFilterFalsePositiveRate(), rtreeFields, btreeFields, filterTypeTraits, @@ -2059,15 +2085,16 @@ appContext.getStorageManagerInterface(), appContext.getIndexLifecycleManagerProvider(), splitsAndConstraint.first, typeTraits, comparatorFactories, null, fieldPermutation, indexOp, new LSMRTreeDataflowHelperFactory(valueProviderFactories, RTreePolicyType.RTREE, - primaryComparatorFactories, new AsterixVirtualBufferCacheProvider(dataset - .getDatasetId()), compactionInfo.first, compactionInfo.second, + primaryComparatorFactories, + new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), compactionInfo.first, + compactionInfo.second, new SecondaryIndexOperationTrackerProvider(dataset.getDatasetId()), AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, - LSMRTreeIOOperationCallbackFactory.INSTANCE, proposeLinearizer( - nestedKeyType.getTypeTag(), comparatorFactories.length), storageProperties - .getBloomFilterFalsePositiveRate(), rtreeFields, btreeFields, filterTypeTraits, - filterCmpFactories, filterFields, !temp), filterFactory, - modificationCallbackFactory, false, indexName); + LSMRTreeIOOperationCallbackFactory.INSTANCE, + proposeLinearizer(nestedKeyType.getTypeTag(), comparatorFactories.length), + storageProperties.getBloomFilterFalsePositiveRate(), rtreeFields, btreeFields, + filterTypeTraits, filterCmpFactories, filterFields, !temp), + filterFactory, modificationCallbackFactory, false, indexName); } return new Pair(op, splitsAndConstraint.second); } catch (MetadataException | IOException e) { @@ -2257,8 +2284,8 @@ try { type = MetadataManager.INSTANCE.getDatatype(mdTxnCtx, dataverse, typeName); } catch (MetadataException e) { - throw new AlgebricksException("Metadata exception while looking up type '" + typeName + "' in dataverse '" - + dataverse + "'", e); + throw new AlgebricksException( + "Metadata exception while looking up type '" + typeName + "' in dataverse '" + dataverse + "'", e); } if (type == null) { throw new AlgebricksException("Type name '" + typeName + "' unknown in dataverse '" + dataverse + "'"); @@ -2368,16 +2395,16 @@ String[] ioDevices = AsterixClusterProperties.INSTANCE.getIODevices(nd); if (create) { for (int j = 0; j < nodeStores.length; j++) { - File f = new File(ioDevices[0] + File.separator + nodeStores[j] + File.separator - + relPathFile); + File f = new File( + ioDevices[0] + File.separator + nodeStores[j] + File.separator + relPathFile); splitArray.add(new FileSplit(nd, new FileReference(f), 0)); } } else { int numIODevices = AsterixClusterProperties.INSTANCE.getNumberOfIODevices(nd); for (int j = 0; j < nodeStores.length; j++) { for (int k = 0; k < numIODevices; k++) { - File f = new File(ioDevices[0] + File.separator + nodeStores[j] + File.separator - + relPathFile); + File f = new File( + ioDevices[0] + File.separator + nodeStores[j] + File.separator + relPathFile); splitArray.add(new FileSplit(nd, new FileReference(f), 0)); } } diff --git a/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/AqlOrdinalPrimitiveValueProviderFactory.java b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/AqlOrdinalPrimitiveValueProviderFactory.java new file mode 100644 index 0000000..88f40f0 --- /dev/null +++ b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/AqlOrdinalPrimitiveValueProviderFactory.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.dataflow.data.nontagged.valueproviders; + +import org.apache.asterix.om.types.ATypeTag; +import org.apache.asterix.om.types.EnumDeserializer; +import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProvider; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProviderFactory; +import org.apache.hyracks.storage.am.rtree.impls.BytePrimitiveValueProviderFactory; +import org.apache.hyracks.storage.am.rtree.impls.IntegerPrimitiveValueProviderFactory; +import org.apache.hyracks.storage.am.rtree.impls.LongPrimitiveValueProviderFactory; +import org.apache.hyracks.storage.am.rtree.impls.ShortPrimitiveValueProviderFactory; + +public class AqlOrdinalPrimitiveValueProviderFactory implements IOrdinalPrimitiveValueProviderFactory { + + private static final long serialVersionUID = 1L; + + public static final AqlOrdinalPrimitiveValueProviderFactory INSTANCE = new AqlOrdinalPrimitiveValueProviderFactory(); + + private AqlOrdinalPrimitiveValueProviderFactory() { + } + + @Override + public IOrdinalPrimitiveValueProvider createOrdinalPrimitiveValueProvider() { + return new IOrdinalPrimitiveValueProvider() { + final IOrdinalPrimitiveValueProvider byteProvider = BytePrimitiveValueProviderFactory.INSTANCE + .createOrdinalPrimitiveValueProvider(); + final IOrdinalPrimitiveValueProvider shortProvider = ShortPrimitiveValueProviderFactory.INSTANCE + .createOrdinalPrimitiveValueProvider(); + final IOrdinalPrimitiveValueProvider intProvider = IntegerPrimitiveValueProviderFactory.INSTANCE + .createOrdinalPrimitiveValueProvider(); + final IOrdinalPrimitiveValueProvider longProvider = LongPrimitiveValueProviderFactory.INSTANCE + .createOrdinalPrimitiveValueProvider(); + final IOrdinalPrimitiveValueProvider dateTimeProvider = DateTimePrimitiveValueProviderFactory.INSTANCE + .createOrdinalPrimitiveValueProvider(); + final IOrdinalPrimitiveValueProvider dateProvider = DatePrimitiveValueProviderFactory.INSTANCE + .createOrdinalPrimitiveValueProvider(); + final IOrdinalPrimitiveValueProvider timeProvider = TimePrimitiveValueProviderFactory.INSTANCE + .createOrdinalPrimitiveValueProvider(); + final IOrdinalPrimitiveValueProvider dayTimeDurationProvider = DayTimeDurationPrimitiveValueProviderFactory.INSTANCE + .createOrdinalPrimitiveValueProvider(); + final IOrdinalPrimitiveValueProvider yearMonthDurationProvider = YearMonthDurationPrimitiveValueProviderFactory.INSTANCE + .createOrdinalPrimitiveValueProvider(); + + @Override + public long getOrdinalValue(byte[] bytes, int offset) { + + ATypeTag tag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes[offset]); + switch (tag) { + case INT8: { + return byteProvider.getOrdinalValue(bytes, offset + 1); + } + case INT16: { + return shortProvider.getOrdinalValue(bytes, offset + 1); + } + case INT32: { + return intProvider.getOrdinalValue(bytes, offset + 1); + } + case INT64: { + return longProvider.getOrdinalValue(bytes, offset + 1); + } + case DATETIME: { + return dateTimeProvider.getOrdinalValue(bytes, offset + 1); + } + case DATE: { + return dateProvider.getOrdinalValue(bytes, offset + 1); + } + case TIME: { + return timeProvider.getOrdinalValue(bytes, offset + 1); + } + case DAYTIMEDURATION: { + return dayTimeDurationProvider.getOrdinalValue(bytes, offset + 1); + } + case YEARMONTHDURATION: { + return yearMonthDurationProvider.getOrdinalValue(bytes, offset + 1); + } + default: { + throw new NotImplementedException("Value provider for type " + tag + " is not implemented"); + } + } + } + }; + } +} diff --git a/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DatePrimitiveValueProviderFactory.java b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DatePrimitiveValueProviderFactory.java new file mode 100644 index 0000000..5fd43b1 --- /dev/null +++ b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DatePrimitiveValueProviderFactory.java @@ -0,0 +1,26 @@ +package org.apache.asterix.dataflow.data.nontagged.valueproviders; + +import org.apache.asterix.dataflow.data.nontagged.serde.ADateSerializerDeserializer; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProvider; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProviderFactory; + +public class DatePrimitiveValueProviderFactory implements IOrdinalPrimitiveValueProviderFactory { + + private static final long serialVersionUID = 1L; + + public static final DatePrimitiveValueProviderFactory INSTANCE = new DatePrimitiveValueProviderFactory(); + + private DatePrimitiveValueProviderFactory() { + } + + @Override + public IOrdinalPrimitiveValueProvider createOrdinalPrimitiveValueProvider() { + return new IOrdinalPrimitiveValueProvider() { + @Override + public long getOrdinalValue(byte[] bytes, int offset) { + return ADateSerializerDeserializer.getChronon(bytes, offset); + } + }; + } + +} diff --git a/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DateTimePrimitiveValueProviderFactory.java b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DateTimePrimitiveValueProviderFactory.java new file mode 100644 index 0000000..b546320 --- /dev/null +++ b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DateTimePrimitiveValueProviderFactory.java @@ -0,0 +1,26 @@ +package org.apache.asterix.dataflow.data.nontagged.valueproviders; + +import org.apache.asterix.dataflow.data.nontagged.serde.ADateTimeSerializerDeserializer; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProvider; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProviderFactory; + +public class DateTimePrimitiveValueProviderFactory implements IOrdinalPrimitiveValueProviderFactory { + + private static final long serialVersionUID = 1L; + + public static final DateTimePrimitiveValueProviderFactory INSTANCE = new DateTimePrimitiveValueProviderFactory(); + + private DateTimePrimitiveValueProviderFactory() { + } + + @Override + public IOrdinalPrimitiveValueProvider createOrdinalPrimitiveValueProvider() { + return new IOrdinalPrimitiveValueProvider() { + @Override + public long getOrdinalValue(byte[] bytes, int offset) { + return ADateTimeSerializerDeserializer.getChronon(bytes, offset); + } + }; + } + +} diff --git a/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DayTimeDurationPrimitiveValueProviderFactory.java b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DayTimeDurationPrimitiveValueProviderFactory.java new file mode 100644 index 0000000..8bab8b7 --- /dev/null +++ b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/DayTimeDurationPrimitiveValueProviderFactory.java @@ -0,0 +1,26 @@ +package org.apache.asterix.dataflow.data.nontagged.valueproviders; + +import org.apache.asterix.dataflow.data.nontagged.serde.ADayTimeDurationSerializerDeserializer; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProvider; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProviderFactory; + +public class DayTimeDurationPrimitiveValueProviderFactory implements IOrdinalPrimitiveValueProviderFactory { + + private static final long serialVersionUID = 1L; + + public static final DayTimeDurationPrimitiveValueProviderFactory INSTANCE = new DayTimeDurationPrimitiveValueProviderFactory(); + + private DayTimeDurationPrimitiveValueProviderFactory() { + } + + @Override + public IOrdinalPrimitiveValueProvider createOrdinalPrimitiveValueProvider() { + return new IOrdinalPrimitiveValueProvider() { + @Override + public long getOrdinalValue(byte[] bytes, int offset) { + return ADayTimeDurationSerializerDeserializer.getDayTime(bytes, offset); + } + }; + } + +} diff --git a/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/TimePrimitiveValueProviderFactory.java b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/TimePrimitiveValueProviderFactory.java new file mode 100644 index 0000000..3684e8d --- /dev/null +++ b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/TimePrimitiveValueProviderFactory.java @@ -0,0 +1,26 @@ +package org.apache.asterix.dataflow.data.nontagged.valueproviders; + +import org.apache.asterix.dataflow.data.nontagged.serde.ATimeSerializerDeserializer; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProvider; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProviderFactory; + +public class TimePrimitiveValueProviderFactory implements IOrdinalPrimitiveValueProviderFactory { + + private static final long serialVersionUID = 1L; + + public static final TimePrimitiveValueProviderFactory INSTANCE = new TimePrimitiveValueProviderFactory(); + + private TimePrimitiveValueProviderFactory() { + } + + @Override + public IOrdinalPrimitiveValueProvider createOrdinalPrimitiveValueProvider() { + return new IOrdinalPrimitiveValueProvider() { + @Override + public long getOrdinalValue(byte[] bytes, int offset) { + return ATimeSerializerDeserializer.getChronon(bytes, offset); + } + }; + } + +} diff --git a/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/YearMonthDurationPrimitiveValueProviderFactory.java b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/YearMonthDurationPrimitiveValueProviderFactory.java new file mode 100644 index 0000000..ced7c97 --- /dev/null +++ b/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/valueproviders/YearMonthDurationPrimitiveValueProviderFactory.java @@ -0,0 +1,26 @@ +package org.apache.asterix.dataflow.data.nontagged.valueproviders; + +import org.apache.asterix.dataflow.data.nontagged.serde.AYearMonthDurationSerializerDeserializer; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProvider; +import org.apache.hyracks.storage.am.common.api.IOrdinalPrimitiveValueProviderFactory; + +public class YearMonthDurationPrimitiveValueProviderFactory implements IOrdinalPrimitiveValueProviderFactory { + + private static final long serialVersionUID = 1L; + + public static final YearMonthDurationPrimitiveValueProviderFactory INSTANCE = new YearMonthDurationPrimitiveValueProviderFactory(); + + private YearMonthDurationPrimitiveValueProviderFactory() { + } + + @Override + public IOrdinalPrimitiveValueProvider createOrdinalPrimitiveValueProvider() { + return new IOrdinalPrimitiveValueProvider() { + @Override + public long getOrdinalValue(byte[] bytes, int offset) { + return AYearMonthDurationSerializerDeserializer.getYearMonth(bytes, offset); + } + }; + } + +} diff --git a/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlTypeTraitProvider.java b/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlTypeTraitProvider.java index 129f14b..ae8d91d 100644 --- a/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlTypeTraitProvider.java +++ b/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlTypeTraitProvider.java @@ -31,6 +31,7 @@ private static final ITypeTraits TWOBYTETYPETRAIT = new TypeTrait(2 + 1); private static final ITypeTraits FOURBYTETYPETRAIT = new TypeTrait(4 + 1); private static final ITypeTraits EIGHTBYTETYPETRAIT = new TypeTrait(8 + 1); + private static final ITypeTraits TWELVEBYTETYPETRAIT = new TypeTrait(12 + 1); private static final ITypeTraits SIXTEENBYTETYPETRAIT = new TypeTrait(16 + 1); private static final ITypeTraits SEVENTEENBYTETYPETRAIT = new TypeTrait(17 + 1); private static final ITypeTraits THIRTYTWOBYTETYPETRAIT = new TypeTrait(32 + 1); @@ -60,8 +61,9 @@ case INT64: case DOUBLE: case DATETIME: - case DURATION: return EIGHTBYTETYPETRAIT; + case DURATION: + return TWELVEBYTETYPETRAIT; case POINT: case UUID: return SIXTEENBYTETYPETRAIT; diff --git a/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/ExternalBTreeLocalResourceMetadata.java b/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/ExternalBTreeLocalResourceMetadata.java index a75428f..ad05709 100644 --- a/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/ExternalBTreeLocalResourceMetadata.java +++ b/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/ExternalBTreeLocalResourceMetadata.java @@ -55,7 +55,7 @@ new BaseOperationTracker(datasetID, runtimeContextProvider.getDatasetLifecycleManager().getDatasetInfo(datasetID)), runtimeContextProvider.getLSMIOScheduler(), - LSMBTreeIOOperationCallbackFactory.INSTANCE.createIOOperationCallback(), -1, true); + LSMBTreeIOOperationCallbackFactory.INSTANCE.createIOOperationCallback(), -1, true, false, null); return lsmBTree; } } diff --git a/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/LSMBTreeLocalResourceMetadata.java b/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/LSMBTreeLocalResourceMetadata.java index 88e95dd..376c3b1 100644 --- a/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/LSMBTreeLocalResourceMetadata.java +++ b/asterix-transactions/src/main/java/org/apache/asterix/transaction/management/resource/LSMBTreeLocalResourceMetadata.java @@ -75,7 +75,7 @@ runtimeContextProvider.getDatasetLifecycleManager().getDatasetInfo(datasetID)), runtimeContextProvider.getLSMIOScheduler(), LSMBTreeIOOperationCallbackFactory.INSTANCE.createIOOperationCallback(), isPrimary, filterTypeTraits, - filterCmpFactories, btreeFields, filterFields, true); + filterCmpFactories, btreeFields, filterFields, true, false, null); return lsmBTree; } -- To view, visit https://asterix-gerrit.ics.uci.edu/539 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I97a110c593a03fc98403557cbe0e77ca08fb9646 Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Ildar Absalyamov