drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j...@apache.org
Subject [2/3] drill git commit: DRILL-1673: Fix for error with flatten function when used with nested lists.
Date Mon, 29 Jun 2015 06:32:45 GMT
DRILL-1673: Fix for error with flatten function when used with nested lists.

Added a small reference implementation of flatten for generating baselines

remove now-unneeded code in EmptyValuePopulator as we are guarenteed to have the first offset
initialized to 0.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/233faf26
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/233faf26
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/233faf26

Branch: refs/heads/master
Commit: 233faf2674c9ea2a9e416d01091bbf3658406f69
Parents: e659f01
Author: Jason Altekruse <altekrusejason@gmail.com>
Authored: Wed Jun 17 14:12:05 2015 -0700
Committer: Jason Altekruse <altekrusejason@gmail.com>
Committed: Sun Jun 28 22:28:27 2015 -0700

----------------------------------------------------------------------
 .../vector/complex/EmptyValuePopulator.java     |   2 +-
 .../exec/vector/complex/RepeatedListVector.java |   5 +-
 .../java/org/apache/drill/DrillTestWrapper.java |   2 +-
 .../exec/physical/impl/flatten/TestFlatten.java | 180 ++++++++++++++++++-
 .../complex_transaction_example_data.json       | 100 +++++++++++
 .../src/test/resources/store/json/1673.json     |   1 +
 6 files changed, 284 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/233faf26/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/EmptyValuePopulator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/EmptyValuePopulator.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/EmptyValuePopulator.java
index 8c61a60..fa1de66 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/EmptyValuePopulator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/EmptyValuePopulator.java
@@ -43,7 +43,7 @@ public class EmptyValuePopulator {
     final UInt4Vector.Accessor accessor = offsets.getAccessor();
     final UInt4Vector.Mutator mutator = offsets.getMutator();
     final int lastSet = Math.max(accessor.getValueCount() - 1, 0);
-    final int previousEnd = accessor.get(lastSet);
+    final int previousEnd = accessor.get(lastSet);//0 ? 0 : accessor.get(lastSet);
     for (int i = lastSet; i < lastIndex; i++) {
       mutator.setSafe(i + 1, previousEnd);
     }

http://git-wip-us.apache.org/repos/asf/drill/blob/233faf26/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
index f538399..85e4d1d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/RepeatedListVector.java
@@ -153,7 +153,10 @@ public class RepeatedListVector extends AbstractContainerVector
 
       @Override
       public void splitAndTransfer(int startIndex, int length) {
-        throw new UnsupportedOperationException("Repeated list does not support split &
transfer operation");
+        target.allocateNew();
+        for (int i = 0; i < length; i++) {
+          copyValueSafe(startIndex + i, i);
+        }
       }
 
       @Override

http://git-wip-us.apache.org/repos/asf/drill/blob/233faf26/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java b/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
index d4e7ed6..77d2a54 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/DrillTestWrapper.java
@@ -538,7 +538,7 @@ public class DrillTestWrapper {
         break;
       }
       if (!found) {
-        throw new Exception("Did not find expected record in result set: " + printRecord(expectedRecord));
+        throw new Exception(String.format("After matching %d records, did not find expected
record in result set: %s", counter, printRecord(expectedRecord)));
       } else {
         actualRecords.remove(i);
         counter++;

http://git-wip-us.apache.org/repos/asf/drill/blob/233faf26/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/flatten/TestFlatten.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/flatten/TestFlatten.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/flatten/TestFlatten.java
index 6f5a303..5756e97 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/flatten/TestFlatten.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/flatten/TestFlatten.java
@@ -17,13 +17,22 @@
  ******************************************************************************/
 package org.apache.drill.exec.physical.impl.flatten;
 
+import static org.apache.drill.TestBuilder.listOf;
+import static org.apache.drill.TestBuilder.mapOf;
 import static org.junit.Assert.assertEquals;
 
+import com.google.common.collect.Lists;
 import org.apache.drill.BaseTestQuery;
+import org.apache.drill.TestBuilder;
 import org.apache.drill.common.util.FileUtils;
-import org.apache.drill.exec.proto.UserBitShared;
+import org.apache.drill.exec.fn.interp.TestConstantFolding;
+import org.apache.drill.exec.util.JsonStringHashMap;
 import org.junit.Ignore;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import java.util.List;
 
 public class TestFlatten extends BaseTestQuery {
 
@@ -36,6 +45,8 @@ public class TestFlatten extends BaseTestQuery {
    */
   public static boolean RUN_ADVANCED_TESTS = false;
 
+  @Rule
+  public TemporaryFolder folder = new TemporaryFolder();
 
   @Test
   public void testFlattenFailure() throws Exception {
@@ -44,6 +55,141 @@ public class TestFlatten extends BaseTestQuery {
   }
 
   @Test
+  public void testFlatten_Drill2162_complex() throws Exception {
+    String path = folder.getRoot().toPath().toString();
+
+    String jsonRecords = BaseTestQuery.getFile("flatten/complex_transaction_example_data.json");
+    int numCopies = 700;
+    new TestConstantFolding.SmallFileCreator(folder)
+        .setRecord(jsonRecords)
+        .createFiles(1, numCopies, "json");
+
+    List<JsonStringHashMap<String,Object>> data = Lists.newArrayList(
+        mapOf("uid", 1l,
+            "lst_lst_0", listOf(1l, 2l, 3l, 4l, 5l),
+            "lst_lst_1", listOf(2l, 3l, 4l, 5l, 6l),
+            "lst_lst", listOf(
+            listOf(1l, 2l, 3l, 4l, 5l),
+            listOf(2l, 3l, 4l, 5l, 6l))
+        ),
+        mapOf("uid", 2l,
+            "lst_lst_0", listOf(1l, 2l, 3l, 4l, 5l),
+            "lst_lst_1", listOf(2l, 3l, 4l, 5l, 6l),
+            "lst_lst", listOf(
+            listOf(1l, 2l, 3l, 4l, 5l),
+            listOf(2l, 3l, 4l, 5l, 6l))
+        )
+    );
+
+    List<JsonStringHashMap<String, Object>> result = flatten(flatten(flatten(data,
"lst_lst_1"), "lst_lst_0"), "lst_lst");
+
+    TestBuilder builder = testBuilder()
+        .sqlQuery("select uid, flatten(d.lst_lst[1]) lst1, flatten(d.lst_lst[0]) lst0, flatten(d.lst_lst)
lst from " +
+                  "dfs.`" + path + "/bigfile/bigfile.json` d")
+        .unOrdered()
+        .baselineColumns("uid", "lst1", "lst0", "lst");
+    for (int i = 0; i < numCopies; i++) {
+      for (JsonStringHashMap<String, Object> record : result) {
+        builder.baselineValues(record.get("uid"), record.get("lst_lst_1"), record.get("lst_lst_0"),
record.get("lst_lst"));
+      }
+    }
+    builder.go();
+  };
+
+  @Test
+  public void testFlattenReferenceImpl() throws Exception {
+    List<JsonStringHashMap<String,Object>> data = Lists.newArrayList(
+        mapOf("a",1,
+              "b",2,
+              "list_col", listOf(10,9),
+              "nested_list_col",listOf(
+                  listOf(100,99),
+                  listOf(1000,999)
+            )));
+    List<JsonStringHashMap<String, Object>> result = flatten(flatten(flatten(data,
"list_col"), "nested_list_col"), "nested_list_col");
+     List<JsonStringHashMap<String, Object>> expectedResult = Lists.newArrayList(
+        mapOf("nested_list_col", 100,  "list_col", 10,"a", 1, "b",2),
+        mapOf("nested_list_col", 99,   "list_col", 10,"a", 1, "b",2),
+        mapOf("nested_list_col", 1000, "list_col", 10,"a", 1, "b",2),
+        mapOf("nested_list_col", 999,  "list_col", 10,"a", 1, "b",2),
+        mapOf("nested_list_col", 100,  "list_col", 9, "a", 1, "b",2),
+        mapOf("nested_list_col", 99,   "list_col", 9, "a", 1, "b",2),
+        mapOf("nested_list_col", 1000, "list_col", 9, "a", 1, "b",2),
+        mapOf("nested_list_col", 999,  "list_col", 9, "a", 1, "b",2)
+    );
+    int i = 0;
+    for (JsonStringHashMap record : result) {
+      assertEquals(record, expectedResult.get(i));
+      i++;
+    }
+  }
+
+  private List<JsonStringHashMap<String, Object>> flatten(
+      List<JsonStringHashMap<String,Object>> incomingRecords,
+      String colToFlatten) {
+    return flatten(incomingRecords, colToFlatten, colToFlatten);
+  }
+
+  private List<JsonStringHashMap<String, Object>> flatten(
+      List<JsonStringHashMap<String,Object>> incomingRecords,
+      String colToFlatten,
+      String flattenedDataColName) {
+    List<JsonStringHashMap<String,Object>> output = Lists.newArrayList();
+    for (JsonStringHashMap<String, Object> incomingRecord : incomingRecords) {
+      List dataToFlatten = (List) incomingRecord.get(colToFlatten);
+      for (int i = 0; i < dataToFlatten.size(); i++) {
+        final JsonStringHashMap newRecord = new JsonStringHashMap();
+        newRecord.put(flattenedDataColName, dataToFlatten.get(i));
+        for (String s : incomingRecord.keySet()) {
+          if (s.equals(colToFlatten)) {
+            continue;
+          }
+          newRecord.put(s, incomingRecord.get(s));
+        }
+        output.add(newRecord);
+      }
+    }
+    return output;
+  }
+
+  @Test
+  public void testFlatten_Drill2162_simple() throws Exception {
+    String path = folder.getRoot().toPath().toString();
+
+    List<Long> inputList = Lists.newArrayList();
+    String jsonRecord = "{ \"int_list\" : [";
+    final int listSize = 30;
+    for (int i = 1; i < listSize; i++ ) {
+      jsonRecord += i + ", ";
+      inputList.add((long) i);
+    }
+    jsonRecord += listSize + "] }";
+    inputList.add((long) listSize);
+    int numRecords = 3000;
+    new TestConstantFolding.SmallFileCreator(folder)
+        .setRecord(jsonRecord)
+        .createFiles(1, numRecords, "json");
+
+    List<JsonStringHashMap<String,Object>> data = Lists.newArrayList(
+        mapOf("int_list", inputList)
+    );
+
+    List<JsonStringHashMap<String, Object>> result = flatten(data, "int_list");
+
+    TestBuilder builder = testBuilder()
+        .sqlQuery("select flatten(int_list) as int_list from dfs.`" + path + "/bigfile/bigfile.json`")
+        .unOrdered()
+        .baselineColumns("int_list");
+
+    for (int i = 0; i < numRecords; i++) {
+      for (JsonStringHashMap<String, Object> record : result) {
+        builder.baselineValues(record.get("int_list"));
+      }
+    }
+    builder.go();
+  };
+
+  @Test
   public void drill1671() throws Exception{
     int rowCount = testSql("select * from (select count(*) as cnt from (select id, flatten(evnts1),
flatten(evnts2), flatten(evnts3), flatten(evnts4), flatten(evnts5), flatten(evnts6), flatten(evnts7),
flatten(evnts8), flatten(evnts9), flatten(evnts10), flatten(evnts11) from cp.`/flatten/many-arrays-50.json`)x
)y where cnt = 2048");
     assertEquals(rowCount, 1);
@@ -55,6 +201,36 @@ public class TestFlatten extends BaseTestQuery {
     test("select * from cp.`/flatten/empty-rm.json`");
   }
 
+  @Test // repeated list within a repeated map
+  public void drill1673() throws Exception {
+    String path = folder.getRoot().toPath().toString();
+
+    String jsonRecords = BaseTestQuery.getFile("store/json/1673.json");
+    int numCopies = 25000;
+    new TestConstantFolding.SmallFileCreator(folder)
+        .setRecord(jsonRecords)
+        .createFiles(1, numCopies, "json");
+
+    TestBuilder builder = testBuilder()
+        .sqlQuery("select t.fixed_column as fixed_column, " +
+                  "flatten(t.list_column) as list_col " +
+                  "from dfs.`" + path + "/bigfile/bigfile.json` as t")
+        .baselineColumns("fixed_column", "list_col")
+        .unOrdered();
+    Object map1 = mapOf("id1", "1",
+                        "name", "zhu",
+                        "num", listOf(listOf(1l, 2l, 3l)));
+    Object map2 = mapOf("id1", "2",
+                      "name", "hao",
+                      "num", listOf(listOf(4l, 5l, 6l)));
+    for (int i = 0; i < numCopies; i++) {
+      builder.baselineValues("abc", map1);
+      builder.baselineValues("abc", map2);
+    }
+
+    builder.go();
+  }
+
   @Test
   public void drill1653() throws Exception{
     int rowCount = testSql("select * from (select sum(t.flat.`value`) as sm from (select
id, flatten(kvgen(m)) as flat from cp.`/flatten/missing-map.json`)t) where sm = 10 ");
@@ -88,8 +264,6 @@ public class TestFlatten extends BaseTestQuery {
         "on transaction_info.max_event_time = event_info.event.event_time;");
   }
 
-
-
   @Test
   public void testKVGenFlatten1() throws Exception {
     // works - TODO and verify results

http://git-wip-us.apache.org/repos/asf/drill/blob/233faf26/exec/java-exec/src/test/resources/flatten/complex_transaction_example_data.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/flatten/complex_transaction_example_data.json
b/exec/java-exec/src/test/resources/flatten/complex_transaction_example_data.json
new file mode 100644
index 0000000..039ce18
--- /dev/null
+++ b/exec/java-exec/src/test/resources/flatten/complex_transaction_example_data.json
@@ -0,0 +1,100 @@
+{
+  "uid" : 1,
+  "uid_str" : "01",
+  "type" : "web",
+  "max_trans_amount" : 1000,
+  "events" : [
+        { "evnt_id":"e1", "campaign_id":"c1", "event_name":"e1_name", "event_time":1000000,
"type" : "cmpgn1"},
+        { "evnt_id":"e2", "campaign_id":"c1", "event_name":"e2_name", "event_time":2000000,
"type" : "cmpgn4"},
+        { "evnt_id":"e3", "campaign_id":"c1", "event_name":"e3_name", "event_time":3000000,
"type" : "cmpgn1"},
+        { "evnt_id":"e4", "campaign_id":"c1", "event_name":"e4_name", "event_time":4000000,
"type" : "cmpgn1"},
+        { "evnt_id":"e5", "campaign_id":"c2", "event_name":"e5_name", "event_time":5000000,
"type" : "cmpgn3"},
+        { "evnt_id":"e6", "campaign_id":"c1", "event_name":"e6_name", "event_time":6000000,
"type" : "cmpgn9"},
+        { "evnt_id":"e7", "campaign_id":"c1", "event_name":"e7_name", "event_time":7000000,
"type" : "cmpgn3"},
+        { "evnt_id":"e8", "campaign_id":"c2", "event_name":"e8_name", "event_time":8000000,
"type" : "cmpgn2"},
+        { "evnt_id":"e9", "campaign_id":"c2", "event_name":"e9_name", "event_time":9000000,
"type" : "cmpgn4"}
+  ],
+  "transactions" : [
+       { "trans_id":"t1", "amount":100, "trans_time":7777777, "type":"sports"},
+       { "trans_id":"t2", "amount":1000, "trans_time":8888888, "type":"groceries"}
+  ],
+  "map":{"rm": [
+    {"mapid":"m1","mapvalue":{"col1":1,"col2":[0,1,2,3,4,5]},"rptd": [{ "a": "foo"},{"b":"boo"}]},
+    {"mapid":"m2","mapvalue":{"col1":0,"col2":[]},"rptd": [{ "a": "bar"},{"c":1},{"d":4.5}]}
+  ]},
+  "sub": [{"z1":"hello"},{"z2":1}],
+  "features": [
+        {
+            "type": "Feature",
+            "properties": {
+                "mag": 6.9,
+                "time": 1405954481000,
+                "updated": 1405983436259
+            },
+            "geometry": {
+                "type": "Point",
+                "coordinates": "100,90"
+            },
+            "id": "usb000ruzk",
+            "location": {
+                "zip": "95134",
+                "street": "zanker",
+                "bldgs": {
+                    "bldg1" : "HQ1",
+                    "bldg2" : "HQ2"
+                }
+            }
+        }
+    ],
+  "lst_lst" : [[1,2,3,4,5],[2,3,4,5,6]]
+}
+{
+  "uid" : 2,
+  "uid_str" : "02",
+  "type" : "store",
+  "max_trans_amount" : 2001,
+  "events" : [
+        { "evnt_id":"e1", "campaign_id":"c1", "event_name":"e1_name", "event_time":1000000,
"type" : "cmpgn9"},
+        { "evnt_id":"e2", "campaign_id":"c1", "event_name":"e2_name", "event_time":2000000,
"type" : "cmpgn4"},
+        { "evnt_id":"e3", "campaign_id":"c1", "event_name":"e3_name", "event_time":3000000,
"type" : "cmpgn1"},
+        { "evnt_id":"e4", "campaign_id":"c1", "event_name":"e4_name", "event_time":4000000,
"type" : "cmpgn1"},
+        { "evnt_id":"e5", "campaign_id":"c2", "event_name":"e5_name", "event_time":5000000,
"type" : "cmpgn2"},
+        { "evnt_id":"e6", "campaign_id":"c1", "event_name":"e6_name", "event_time":6000000,
"type" : "cmpgn9"},
+        { "evnt_id":"e7", "campaign_id":"c1", "event_name":"e7_name", "event_time":7000000,
"type" : "cmpgn3"},
+        { "evnt_id":"e8", "campaign_id":"c2", "event_name":"e8_name", "event_time":8000000,
"type" : "cmpgn2"},
+        { "evnt_id":"e9", "campaign_id":"c2", "event_name":"e9_name", "event_time":9000000,
"type" : "cmpgn4"}
+  ],
+  "transactions" : [
+       { "trans_id":"t1", "amount":100, "trans_time":7777777, "type":"sports"},
+       { "trans_id":"t2", "amount":1000, "trans_time":8888888, "type":"groceries"}
+  ],
+  "map":{"rm": [
+    {"mapid":"m0","mapvalue":{"col1":1,"col2":[0,1,2,3,4,5]},"rptd": [{ "a": "foo1"},{"b":"boo"}]},
+    {"mapid":"m1","mapvalue":{"col1":0,"col2":[]},"rptd": [{ "a": "bar"},{"c":-1},{"d":4.5}]}
+  ]},
+  "sub": [{"z1":"hello"},{"z2":10}],
+  "features": [
+        {
+            "type": "cmpgn9",
+            "properties": {
+                "mag": 6.9,
+                "time": 1405954481000,
+                "updated": 1405983436259
+            },
+            "geometry": {
+                "type": "Point",
+                "coordinates": "100,90"
+            },
+            "id": "usb000ruzk",
+            "location": {
+                "zip": "95134",
+                "street": "zanker",
+                "bldgs": {
+                    "bldg1" : "HQ1",
+                    "bldg2" : "HQ2"
+                }
+            }
+        }
+    ],
+  "lst_lst" : [[1,2,3,4,5],[2,3,4,5,6]]
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/233faf26/exec/java-exec/src/test/resources/store/json/1673.json
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/store/json/1673.json b/exec/java-exec/src/test/resources/store/json/1673.json
new file mode 100644
index 0000000..b887d40
--- /dev/null
+++ b/exec/java-exec/src/test/resources/store/json/1673.json
@@ -0,0 +1 @@
+{"fixed_column":"abc", "list_column":[{"id1":"1","name":"zhu", "num": [[1,2,3]]}, {"id1":"2","name":"hao",
"num": [[4,5,6]]} ]}
\ No newline at end of file


Mime
View raw message