carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xuchuan...@apache.org
Subject carbondata git commit: [CARBONDATA-2747][Lucene] Fix Lucene datamap choosing and DataMapDistributable building
Date Fri, 20 Jul 2018 07:05:02 GMT
Repository: carbondata
Updated Branches:
  refs/heads/master 4a37e05ca -> 46f0c8517


[CARBONDATA-2747][Lucene] Fix Lucene datamap choosing and DataMapDistributable building

1. choose lucene datamap for query column
2. build DataMapDistributable only for target datamap

This closes #2519


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/46f0c851
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/46f0c851
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/46f0c851

Branch: refs/heads/master
Commit: 46f0c8517d4e79a402ff6dc8a077f3d3955f39b5
Parents: 4a37e05
Author: Manhua <kevinjmh@qq.com>
Authored: Wed Jul 18 10:14:40 2018 +0800
Committer: xuchuanyin <xuchuanyin@hust.edu.cn>
Committed: Fri Jul 20 15:04:25 2018 +0800

----------------------------------------------------------------------
 .../carbondata/core/datamap/DataMapChooser.java | 14 +++++++----
 .../bloom/BloomCoarseGrainDataMapFactory.java   |  1 -
 .../lucene/LuceneDataMapFactoryBase.java        | 25 +++++++++++---------
 .../lucene/LuceneFineGrainDataMapSuite.scala    |  9 +++++--
 4 files changed, 30 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/46f0c851/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
index cf5dffd..68696cf 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java
@@ -34,6 +34,7 @@ import org.apache.carbondata.core.datamap.status.DataMapStatusManager;
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
 import org.apache.carbondata.core.scan.expression.ColumnExpression;
 import org.apache.carbondata.core.scan.expression.Expression;
+import org.apache.carbondata.core.scan.expression.MatchExpression;
 import org.apache.carbondata.core.scan.expression.logical.AndExpression;
 import org.apache.carbondata.core.scan.expression.logical.OrExpression;
 import org.apache.carbondata.core.scan.filter.intf.ExpressionType;
@@ -269,6 +270,14 @@ public class DataMapChooser {
       List<ColumnExpression> columnExpressions) {
     if (expression instanceof ColumnExpression) {
       columnExpressions.add((ColumnExpression) expression);
+    } else if (expression instanceof MatchExpression) {
+      // this is a special case for lucene
+      // build a fake ColumnExpression to filter datamaps which contain target column
+      // a Lucene query string is alike "column:query term"
+      String[] queryItems = expression.getString().split(":", 2);
+      if (queryItems.length == 2) {
+        columnExpressions.add(new ColumnExpression(queryItems[0], null));
+      }
     } else if (expression != null) {
       List<Expression> children = expression.getChildren();
       if (children != null && children.size() > 0) {
@@ -303,11 +312,6 @@ public class DataMapChooser {
    */
   private boolean contains(DataMapMeta mapMeta, List<ColumnExpression> columnExpressions,
       Set<ExpressionType> expressionTypes) {
-    if (mapMeta.getOptimizedOperation().contains(ExpressionType.TEXT_MATCH) &&
-        expressionTypes.contains(ExpressionType.TEXT_MATCH)) {
-      // TODO: fix it with right logic
-      return true;
-    }
     if (mapMeta.getIndexedColumns().size() == 0 || columnExpressions.size() == 0) {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/46f0c851/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
----------------------------------------------------------------------
diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
index 4b5bc7c..652e1fc 100644
--- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
+++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
@@ -278,7 +278,6 @@ public class BloomCoarseGrainDataMapFactory extends DataMapFactory<CoarseGrainDa
     }
     if (dataMaps.size() > 0) {
       for (TableDataMap dataMap : dataMaps) {
-        // different from lucene, bloom only get corresponding directory of current datamap
         if (dataMap.getDataMapSchema().getDataMapName().equals(this.dataMapName)) {
           List<CarbonFile> indexFiles;
           String dmPath = CarbonTablePath.getDataMapStorePath(tablePath, segmentId,

http://git-wip-us.apache.org/repos/asf/carbondata/blob/46f0c851/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java
----------------------------------------------------------------------
diff --git a/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java
b/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java
index cd225f1..3179584 100644
--- a/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java
+++ b/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java
@@ -310,17 +310,20 @@ abstract class LuceneDataMapFactoryBase<T extends DataMap> extends
DataMapFactor
     }
     if (dataMaps.size() > 0) {
       for (TableDataMap dataMap : dataMaps) {
-        List<CarbonFile> indexFiles;
-        String dmPath = CarbonTablePath
-            .getDataMapStorePath(tablePath, segmentId, dataMap.getDataMapSchema().getDataMapName());
-        FileFactory.FileType fileType = FileFactory.getFileType(dmPath);
-        final CarbonFile dirPath = FileFactory.getCarbonFile(dmPath, fileType);
-        indexFiles = Arrays.asList(dirPath.listFiles(new CarbonFileFilter() {
-          @Override public boolean accept(CarbonFile file) {
-            return file.isDirectory();
-          }
-        }));
-        indexDirs.addAll(indexFiles);
+        if (dataMap.getDataMapSchema().getDataMapName().equals(this.dataMapName)) {
+          List<CarbonFile> indexFiles;
+          String dmPath = CarbonTablePath.getDataMapStorePath(tablePath, segmentId,
+              dataMap.getDataMapSchema().getDataMapName());
+          FileFactory.FileType fileType = FileFactory.getFileType(dmPath);
+          final CarbonFile dirPath = FileFactory.getCarbonFile(dmPath, fileType);
+          indexFiles = Arrays.asList(dirPath.listFiles(new CarbonFileFilter() {
+            @Override
+            public boolean accept(CarbonFile file) {
+              return file.isDirectory();
+            }
+          }));
+          indexDirs.addAll(indexFiles);
+        }
       }
     }
     return indexDirs.toArray(new CarbonFile[0]);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/46f0c851/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
index aebbde4..b56701b 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
@@ -666,13 +666,13 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll
{
       """.stripMargin)
     sql(
       s"""
-         | CREATE DATAMAP dm2 ON TABLE datamap_test5
+         | CREATE DATAMAP dm_city ON TABLE datamap_test5
          | USING 'lucene'
          | DMProperties('INDEX_COLUMNS'='city')
       """.stripMargin)
     sql(
       s"""
-         | CREATE DATAMAP dm1 ON TABLE datamap_test5
+         | CREATE DATAMAP dm_name ON TABLE datamap_test5
          | USING 'lucene'
          | DMProperties('INDEX_COLUMNS'='Name')
       """.stripMargin)
@@ -681,6 +681,11 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll
{
       sql(s"select * from datamap_test5 where name='n10'"))
     checkAnswer(sql("SELECT * FROM datamap_test5 WHERE TEXT_MATCH('city:c020')"),
       sql(s"SELECT * FROM datamap_test5 WHERE city='c020'"))
+
+    var explainString = sql("explain select * from datamap_test5 where TEXT_MATCH('name:n10')").collect()
+    assert(explainString(0).getString(0).contains(
+      "pruned by FG DataMap\n    - name: dm_name\n    - provider: lucene"))
+
     sql("DROP TABLE IF EXISTS datamap_test5")
   }
 


Mime
View raw message