tajo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hyun...@apache.org
Subject git commit: TAJO-185: Implement split_part function. (hyunsik)
Date Mon, 23 Sep 2013 04:21:59 GMT
Updated Branches:
  refs/heads/master 7b0dec6a7 -> 7a2061bb9


TAJO-185: Implement split_part function. (hyunsik)


Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/7a2061bb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/7a2061bb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/7a2061bb

Branch: refs/heads/master
Commit: 7a2061bb924a41077f8377b3392133a13f728455
Parents: 7b0dec6
Author: Hyunsik Choi <hyunsik@apache.org>
Authored: Mon Sep 23 11:29:33 2013 +0900
Committer: Hyunsik Choi <hyunsik@apache.org>
Committed: Mon Sep 23 13:20:29 2013 +0900

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 +
 .../tajo/engine/function/string/SplitPart.java  | 55 ++++++++++++++++++++
 .../apache/tajo/engine/query/ResultSetImpl.java | 44 +++-------------
 .../java/org/apache/tajo/master/TajoMaster.java |  7 +++
 .../engine/function/TestBuiltinFunctions.java   | 44 ++++++++++++++++
 .../java/org/apache/tajo/storage/LazyTuple.java |  2 +
 .../org/apache/tajo/storage/TestLazyTuple.java  | 12 ++++-
 .../org/apache/tajo/storage/TestStorages.java   |  3 +-
 8 files changed, 128 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 123ee14..dea65e4 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -4,6 +4,8 @@ Release 0.2.0 - unreleased
 
   NEW FEATURES
 
+    TAJO-185: Implement split_part function. (hyunsik)
+
     TAJO-193: Add string pattern matching operators. (hyunsik)
 
     TAJO-101: HiveQL converter. (jaehwa)

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/SplitPart.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/SplitPart.java
b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/SplitPart.java
new file mode 100644
index 0000000..bbaa441
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/SplitPart.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.string;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.catalog.function.GeneralFunction;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.datum.TextDatum;
+import org.apache.tajo.storage.Tuple;
+
+/**
+ * Function definition
+ *
+ * text split_part(string text, delimiter text, field int)
+ */
+public class SplitPart extends GeneralFunction<TextDatum> {
+  public SplitPart() {
+    super(new Column[] {
+        new Column("text", TajoDataTypes.Type.TEXT),
+        new Column("delimiter", TajoDataTypes.Type.TEXT),
+        new Column("field", TajoDataTypes.Type.INT4),
+    });
+  }
+
+  @Override
+  public Datum eval(Tuple params) {
+    String [] split = StringUtils.split(params.get(0).asChars(), params.get(1).asChars());
+    int idx = params.get(2).asInt4();
+    if (split.length > idx) {
+      return DatumFactory.createText(split[idx]);
+    } else {
+      return NullDatum.get();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/query/ResultSetImpl.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/query/ResultSetImpl.java
b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/query/ResultSetImpl.java
index 6b49558..639b43f 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/query/ResultSetImpl.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/query/ResultSetImpl.java
@@ -20,13 +20,14 @@ package org.apache.tajo.engine.query;
 
 import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.*;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
 import org.apache.tajo.QueryId;
 import org.apache.tajo.catalog.Schema;
 import org.apache.tajo.catalog.TableDesc;
 import org.apache.tajo.catalog.TableMeta;
-import org.apache.tajo.catalog.TableMetaImpl;
-import org.apache.tajo.catalog.proto.CatalogProtos.TableProto;
 import org.apache.tajo.client.TajoClient;
 import org.apache.tajo.datum.Datum;
 import org.apache.tajo.datum.NullDatum;
@@ -35,9 +36,7 @@ import org.apache.tajo.storage.Fragment;
 import org.apache.tajo.storage.MergeScanner;
 import org.apache.tajo.storage.Scanner;
 import org.apache.tajo.storage.Tuple;
-import org.apache.tajo.util.FileUtil;
 
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
@@ -49,7 +48,6 @@ import java.util.*;
 
 public class ResultSetImpl implements ResultSet {
   private final String cursorName = "tajo";
-  private Configuration conf;
   private FileSystem fs;
   private Scanner scanner;
   private TableDesc desc;
@@ -70,13 +68,12 @@ public class ResultSetImpl implements ResultSet {
   public ResultSetImpl(TajoClient tajoClient, QueryId queryId, Configuration conf, TableDesc
desc) throws IOException {
     this.tajoClient = tajoClient;
     this.queryId = queryId;
-    this.conf = conf;
     this.desc = desc;
     this.schema = desc.getMeta().getSchema();
     if(desc != null) {
       fs = desc.getPath().getFileSystem(conf);
       this.totalRow = desc.getMeta().getStat() != null ? desc.getMeta().getStat().getNumRows()
: 0;
-      Collection<Fragment> frags = getFragmentsNG(desc.getMeta(), desc.getPath());
+      Collection<Fragment> frags = getFragments(desc.getMeta(), desc.getPath());
       scanner = new MergeScanner(conf, desc.getMeta(), frags);
     }
     init();
@@ -87,19 +84,6 @@ public class ResultSetImpl implements ResultSet {
     curRow = 0;
   }
 
-  private TableMeta getMeta(Configuration conf, Path tablePath)
-      throws IOException {
-    Path tableMetaPath = new Path(tablePath, ".meta");
-    if (!fs.exists(tableMetaPath)) {
-      throw new FileNotFoundException(".meta file not found in "
-          + tablePath.toString());
-    }
-    FSDataInputStream in = fs.open(tableMetaPath);
-    TableProto tableProto = (TableProto) FileUtil.loadProto(in,
-        TableProto.getDefaultInstance());
-    return new TableMetaImpl(tableProto);
-  }
-
   class FileNameComparator implements Comparator<FileStatus> {
 
     @Override
@@ -108,7 +92,7 @@ public class ResultSetImpl implements ResultSet {
     }
   }
 
-  private Collection<Fragment> getFragmentsNG(TableMeta meta, Path tablePath)
+  private Collection<Fragment> getFragments(TableMeta meta, Path tablePath)
       throws IOException {
     List<Fragment> fraglist = Lists.newArrayList();
     FileStatus[] files = fs.listStatus(tablePath, new PathFilter() {
@@ -129,22 +113,6 @@ public class ResultSetImpl implements ResultSet {
     return fraglist;
   }
 
-  private Fragment[] getFragments(TableMeta meta, Path tablePath)
-      throws IOException {
-    List<Fragment> fraglist = Lists.newArrayList();
-    FileStatus[] files = fs.listStatus(tablePath);
-    Arrays.sort(files, new FileNameComparator());
-
-    String tbname = tablePath.getName();
-    for (int i = 0; i < files.length; i++) {
-      if (files[i].getLen() == 0) {
-        continue;
-      }
-      fraglist.add(new Fragment(tbname + "_" + i, files[i].getPath(), meta, 0l, files[i].getLen()));
-    }
-    return fraglist.toArray(new Fragment[fraglist.size()]);
-  }
-
   /*
    * (non-Javadoc)
    * 

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
index 0635156..1296ea4 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
@@ -38,12 +38,14 @@ import org.apache.hadoop.yarn.service.Service;
 import org.apache.hadoop.yarn.util.RackResolver;
 import org.apache.tajo.catalog.*;
 import org.apache.tajo.catalog.proto.CatalogProtos.FunctionType;
+import org.apache.tajo.common.TajoDataTypes;
 import org.apache.tajo.common.TajoDataTypes.Type;
 import org.apache.tajo.conf.TajoConf;
 import org.apache.tajo.conf.TajoConf.ConfVars;
 import org.apache.tajo.engine.function.Country;
 import org.apache.tajo.engine.function.InCountry;
 import org.apache.tajo.engine.function.builtin.*;
+import org.apache.tajo.engine.function.string.SplitPart;
 import org.apache.tajo.master.querymaster.QueryJobManager;
 import org.apache.tajo.master.rm.WorkerResourceManager;
 import org.apache.tajo.master.rm.YarnTajoResourceManager;
@@ -300,6 +302,11 @@ public class TajoMaster extends CompositeService {
             CatalogUtil.newDataTypesWithoutLen(Type.INT4),
             CatalogUtil.newDataTypesWithoutLen(Type.INT4)));
 
+    sqlFuncs.add(
+        new FunctionDesc("split_part", SplitPart.class, FunctionType.GENERAL,
+            CatalogUtil.newDataTypesWithoutLen(Type.TEXT),
+            CatalogUtil.newDataTypesWithoutLen(Type.TEXT, Type.TEXT, Type.INT4)));
+
     return sqlFuncs;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
index a5a89f7..0384d51 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
@@ -19,6 +19,7 @@
 package org.apache.tajo.engine.function;
 
 import com.google.common.collect.Maps;
+import org.apache.tajo.client.ResultSetUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -31,6 +32,7 @@ import java.sql.ResultSet;
 import java.util.Map;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 @Category(IntegrationTest.class)
@@ -132,4 +134,46 @@ public class TestBuiltinFunctions {
       res.close();
     }
   }
+
+  @Test
+  public void testSplitPart() throws Exception {
+    ResultSet res = tpch.execute("select split_part(l_shipinstruct, ' ', 0) from lineitem");
+
+    String [] result ={
+      "DELIVER",
+      "TAKE",
+      "TAKE",
+      "NONE",
+      "TAKE"
+    };
+
+    for (int i = 0; i < result.length; i++) {
+      assertTrue(res.next());
+      assertEquals(result[i], res.getString(1));
+    }
+    assertFalse(res.next());
+
+    res.close();
+  }
+
+  @Test
+  public void testSplitPartNested() throws Exception {
+    ResultSet res = tpch.execute("select split_part(split_part(l_shipinstruct, ' ', 0), 'A',
1) from lineitem");
+
+    String [] result ={
+        "",
+        "KE",
+        "KE",
+        "",
+        "KE"
+    };
+
+    for (int i = 0; i < result.length; i++) {
+      assertTrue(res.next());
+      assertEquals(result[i], res.getString(1));
+    }
+    assertFalse(res.next());
+
+    res.close();
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
b/tajo-core/tajo-core-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
index c2b511c..c364ae2 100644
--- a/tajo-core/tajo-core-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
+++ b/tajo-core/tajo-core-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
@@ -113,6 +113,8 @@ public class LazyTuple implements Tuple {
     else if (textBytes.length > fieldId && (textBytes[fieldId] != null)) {
       values[fieldId] = createByTextBytes(schema.getColumn(fieldId).getDataType().getType(),
textBytes[fieldId]);
       textBytes[fieldId] = null;
+    } else {
+      values[fieldId] = NullDatum.get();
     }
     return values[fieldId];
   }

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java
b/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java
index 3ca0789..9dd17e2 100644
--- a/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java
+++ b/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java
@@ -124,6 +124,14 @@ public class TestLazyTuple {
     assertTrue(t1.getFloat(11).asFloat4() == 0.76f);
   }
 
+  public VTuple createVTuple(int size) {
+    VTuple vtuple = new VTuple(size);
+    for (int i = 0; i < size; i++) {
+      vtuple.put(i, NullDatum.get());
+    }
+    return vtuple;
+  }
+
   @Test
   public void testEquals() {
     int colNum = schema.getColumnNum();
@@ -140,7 +148,7 @@ public class TestLazyTuple {
 
     assertEquals(t1, t2);
 
-    Tuple t3 = new VTuple(colNum);
+    Tuple t3 = createVTuple(colNum);
     t3.put(0, DatumFactory.createInt4(1));
     t3.put(1, DatumFactory.createInt4(2));
     t3.put(3, DatumFactory.createInt4(2));
@@ -169,7 +177,7 @@ public class TestLazyTuple {
 
     assertEquals(t1.hashCode(), t2.hashCode());
 
-    Tuple t3 = new VTuple(colNum);
+    Tuple t3 = createVTuple(colNum);
     t3.put(0, DatumFactory.createInt4(1));
     t3.put(1, DatumFactory.createInt4(2));
     t3.put(3, DatumFactory.createInt4(2));

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestStorages.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestStorages.java
b/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestStorages.java
index 364600c..7589996 100644
--- a/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestStorages.java
+++ b/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestStorages.java
@@ -44,6 +44,7 @@ import java.util.Collection;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 
 @RunWith(Parameterized.class)
 public class TestStorages {
@@ -170,7 +171,7 @@ public class TestStorages {
     Tuple tuple;
     while ((tuple = scanner.next()) != null) {
       if (storeType == StoreType.RCFILE || storeType == StoreType.TREVNI || storeType ==
StoreType.CSV) {
-        assertNull(tuple.get(0));
+        assertTrue(tuple.get(0) == null || tuple.get(0) instanceof NullDatum);
       }
       assertEquals(DatumFactory.createInt8(tupleCnt + 2), tuple.getLong(1));
       assertEquals(DatumFactory.createFloat4(tupleCnt + 3), tuple.getFloat(2));


Mime
View raw message