incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject [4/4] git commit: Adding support in BlurSerDe for multi valued columns and non-standard blur types.
Date Thu, 22 Jan 2015 03:40:55 GMT
Adding support in BlurSerDe for multi valued columns and non-standard blur types.


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/8873f455
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/8873f455
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/8873f455

Branch: refs/heads/master
Commit: 8873f455e9d5e68d782f50796a10fba0a1960aeb
Parents: 71b5c5e
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Wed Jan 21 22:40:40 2015 -0500
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Wed Jan 21 22:40:40 2015 -0500

----------------------------------------------------------------------
 .../blur/hive/BlurObjectInspectorGenerator.java |  18 ++-
 .../org/apache/blur/hive/BlurSerializer.java    |  67 ++++++-----
 .../org/apache/blur/hive/BlurSerDeTest.java     | 113 ++++++++++++++-----
 3 files changed, 142 insertions(+), 56 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/8873f455/blur-hive/src/main/java/org/apache/blur/hive/BlurObjectInspectorGenerator.java
----------------------------------------------------------------------
diff --git a/blur-hive/src/main/java/org/apache/blur/hive/BlurObjectInspectorGenerator.java
b/blur-hive/src/main/java/org/apache/blur/hive/BlurObjectInspectorGenerator.java
index c915531..30cb9bf 100644
--- a/blur-hive/src/main/java/org/apache/blur/hive/BlurObjectInspectorGenerator.java
+++ b/blur-hive/src/main/java/org/apache/blur/hive/BlurObjectInspectorGenerator.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -94,11 +95,15 @@ public class BlurObjectInspectorGenerator {
       return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
     case STRUCT:
       StructTypeInfo sti = (StructTypeInfo) ti;
-      ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>(sti.getAllStructFieldTypeInfos().size());
+      List<ObjectInspector> ois = new ArrayList<ObjectInspector>(sti.getAllStructFieldTypeInfos().size());
       for (TypeInfo typeInfo : sti.getAllStructFieldTypeInfos()) {
         ois.add(createObjectInspectorWorker(typeInfo));
       }
       return ObjectInspectorFactory.getStandardStructObjectInspector(sti.getAllStructFieldNames(),
ois);
+    case LIST:
+      ListTypeInfo lti = (ListTypeInfo) ti;
+      TypeInfo listElementTypeInfo = lti.getListElementTypeInfo();
+      return ObjectInspectorFactory.getStandardListObjectInspector(createObjectInspectorWorker(listElementTypeInfo));
     default:
       throw new SerDeException("No Hive categories matched for [" + ti + "]");
     }
@@ -106,6 +111,14 @@ public class BlurObjectInspectorGenerator {
 
   private TypeInfo getTypeInfo(ColumnDefinition columnDefinition) throws SerDeException {
     String fieldType = columnDefinition.getFieldType();
+    TypeInfo typeInfo = getTypeInfo(fieldType);
+    if (columnDefinition.isMultiValueField()) {
+      return TypeInfoFactory.getListTypeInfo(typeInfo);
+    }
+    return typeInfo;
+  }
+
+  private TypeInfo getTypeInfo(String fieldType) {
     if (fieldType.equals(TEXT) || fieldType.equals(STRING) || fieldType.equals(STORED)) {
       return TypeInfoFactory.stringTypeInfo;
     } else if (fieldType.equals(LONG)) {
@@ -124,7 +137,8 @@ public class BlurObjectInspectorGenerator {
           (TypeInfo) TypeInfoFactory.floatTypeInfo);
       return TypeInfoFactory.getStructTypeInfo(Arrays.asList(LATITUDE, LONGITUDE), typeInfos);
     }
-    throw new SerDeException("Blur Field Type [" + fieldType + "] is not supported.");
+    // Return string for anything that is not a built in type.
+    return TypeInfoFactory.stringTypeInfo;
   }
 
   public ObjectInspector getObjectInspector() {

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/8873f455/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
----------------------------------------------------------------------
diff --git a/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java b/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
index 5cd5b68..b7685b0 100644
--- a/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
+++ b/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
@@ -27,6 +27,7 @@ import java.util.Set;
 import org.apache.blur.mapreduce.lib.BlurRecord;
 import org.apache.blur.thrift.generated.ColumnDefinition;
 import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -82,38 +83,50 @@ public class BlurSerializer {
     }
 
     for (int i = 0; i < size; i++) {
+      String columnName = columnNames.get(i);
       StructField structFieldRef = outputFieldRefs.get(i);
-      Object structFieldData = structFieldsDataAsList.get(i);
-      if (structFieldData == null) {
-        continue;
-      }
       ObjectInspector fieldOI = structFieldRef.getFieldObjectInspector();
-      String columnName = columnNames.get(i);
-      if (fieldOI instanceof PrimitiveObjectInspector) {
-        PrimitiveObjectInspector primitiveObjectInspector = (PrimitiveObjectInspector) fieldOI;
-        Object primitiveJavaObject = primitiveObjectInspector.getPrimitiveJavaObject(structFieldData);
-        String strValue = toString(columnName, primitiveJavaObject);
-        if (columnName.equals(BlurObjectInspectorGenerator.ROWID)) {
-          blurRecord.setRowId(strValue);
-        } else if (columnName.equals(BlurObjectInspectorGenerator.RECORDID)) {
-          blurRecord.setRecordId(strValue);
-        } else {
-          blurRecord.addColumn(columnName, strValue);
-        }
-      } else if (fieldOI instanceof StructObjectInspector) {
-        StructObjectInspector structObjectInspector = (StructObjectInspector) fieldOI;
-        Map<String, StructField> allStructFieldRefs = toMap(structObjectInspector.getAllStructFieldRefs());
-        StructField latStructField = allStructFieldRefs.get(BlurObjectInspectorGenerator.LATITUDE);
-        StructField longStructField = allStructFieldRefs.get(BlurObjectInspectorGenerator.LONGITUDE);
-        Object latStructFieldData = structObjectInspector.getStructFieldData(structFieldData,
latStructField);
-        Object longStructFieldData = structObjectInspector.getStructFieldData(structFieldData,
longStructField);
-        blurRecord.addColumn(columnName, toLatLong(latStructFieldData, longStructFieldData));
+      Object structFieldData = structFieldsDataAsList.get(i);
+      add(blurRecord, columnName, fieldOI, structFieldData);
+    }
+    return blurRecord;
+  }
+
+  private void add(BlurRecord blurRecord, String columnName, ObjectInspector objectInspector,
Object data)
+      throws SerDeException {
+    if (data == null) {
+      return;
+    }
+    if (objectInspector instanceof PrimitiveObjectInspector) {
+      PrimitiveObjectInspector primitiveObjectInspector = (PrimitiveObjectInspector) objectInspector;
+      Object primitiveJavaObject = primitiveObjectInspector.getPrimitiveJavaObject(data);
+      String strValue = toString(columnName, primitiveJavaObject);
+      if (columnName.equals(BlurObjectInspectorGenerator.ROWID)) {
+        blurRecord.setRowId(strValue);
+      } else if (columnName.equals(BlurObjectInspectorGenerator.RECORDID)) {
+        blurRecord.setRecordId(strValue);
       } else {
-        throw new SerDeException("ObjectInspector [" + fieldOI + "] of type ["
-            + (fieldOI != null ? fieldOI.getClass() : null) + "] not supported.");
+        blurRecord.addColumn(columnName, strValue);
       }
+    } else if (objectInspector instanceof StructObjectInspector) {
+      StructObjectInspector structObjectInspector = (StructObjectInspector) objectInspector;
+      Map<String, StructField> allStructFieldRefs = toMap(structObjectInspector.getAllStructFieldRefs());
+      StructField latStructField = allStructFieldRefs.get(BlurObjectInspectorGenerator.LATITUDE);
+      StructField longStructField = allStructFieldRefs.get(BlurObjectInspectorGenerator.LONGITUDE);
+      Object latStructFieldData = structObjectInspector.getStructFieldData(data, latStructField);
+      Object longStructFieldData = structObjectInspector.getStructFieldData(data, longStructField);
+      blurRecord.addColumn(columnName, toLatLong(latStructFieldData, longStructFieldData));
+    } else if (objectInspector instanceof ListObjectInspector) {
+      ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector;
+      List<?> list = listObjectInspector.getList(data);
+      ObjectInspector listElementObjectInspector = listObjectInspector.getListElementObjectInspector();
+      for (Object obj : list) {
+        add(blurRecord, columnName, listElementObjectInspector, obj);
+      }
+    } else {
+      throw new SerDeException("ObjectInspector [" + objectInspector + "] of type ["
+          + (objectInspector != null ? objectInspector.getClass() : null) + "] not supported.");
     }
-    return blurRecord;
   }
 
   private String toLatLong(Object latStructFieldData, Object longStructFieldData) throws
SerDeException {

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/8873f455/blur-hive/src/test/java/org/apache/blur/hive/BlurSerDeTest.java
----------------------------------------------------------------------
diff --git a/blur-hive/src/test/java/org/apache/blur/hive/BlurSerDeTest.java b/blur-hive/src/test/java/org/apache/blur/hive/BlurSerDeTest.java
index ad9f92e..cf70bab 100644
--- a/blur-hive/src/test/java/org/apache/blur/hive/BlurSerDeTest.java
+++ b/blur-hive/src/test/java/org/apache/blur/hive/BlurSerDeTest.java
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.IOException;
 import java.sql.Date;
 import java.text.SimpleDateFormat;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -98,26 +99,48 @@ public class BlurSerDeTest {
       tableDescriptor.setShardCount(1);
       tableDescriptor.setTableUri(miniCluster.getFileSystemUri().toString() + "/blur/tables/test");
       client.createTable(tableDescriptor);
-      client.addColumnDefinition(TEST, new ColumnDefinition("fam0", "string-col", null, false,
"string", null, false));
-      client.addColumnDefinition(TEST, new ColumnDefinition("fam0", "text-col", null, false,
"text", null, false));
-      client.addColumnDefinition(TEST, new ColumnDefinition("fam0", "stored-col", null, false,
"stored", null, false));
-      client.addColumnDefinition(TEST, new ColumnDefinition("fam0", "double-col", null, false,
"double", null, false));
-      client.addColumnDefinition(TEST, new ColumnDefinition("fam0", "float-col", null, false,
"float", null, false));
-      client.addColumnDefinition(TEST, new ColumnDefinition("fam0", "long-col", null, false,
"long", null, false));
-      client.addColumnDefinition(TEST, new ColumnDefinition("fam0", "int-col", null, false,
"int", null, false));
+
       Map<String, String> props = new HashMap<String, String>();
       props.put("dateFormat", YYYYMMDD);
-      client.addColumnDefinition(TEST, new ColumnDefinition("fam0", "date-col", null, false,
"date", props, false));
-      client.addColumnDefinition(TEST, new ColumnDefinition("fam0", "geo-col", null, false,
"geo-pointvector", null,
-          false));
 
+      client.addColumnDefinition(TEST, cd(false, "fam0", "string-col-single", "string"));
+      client.addColumnDefinition(TEST, cd(false, "fam0", "text-col-single", "text"));
+      client.addColumnDefinition(TEST, cd(false, "fam0", "stored-col-single", "stored"));
+      client.addColumnDefinition(TEST, cd(false, "fam0", "double-col-single", "double"));
+      client.addColumnDefinition(TEST, cd(false, "fam0", "float-col-single", "float"));
+      client.addColumnDefinition(TEST, cd(false, "fam0", "long-col-single", "long"));
+      client.addColumnDefinition(TEST, cd(false, "fam0", "int-col-single", "int"));
+      client.addColumnDefinition(TEST, cd(false, "fam0", "date-col-single", "date", props));
+
+      client.addColumnDefinition(TEST, cd(false, "fam0", "geo-col-single", "geo-pointvector"));
+
+      client.addColumnDefinition(TEST, cd(true, "fam0", "string-col-multi", "string"));
+      client.addColumnDefinition(TEST, cd(true, "fam0", "text-col-multi", "text"));
+      client.addColumnDefinition(TEST, cd(true, "fam0", "stored-col-multi", "stored"));
+      client.addColumnDefinition(TEST, cd(true, "fam0", "double-col-multi", "double"));
+      client.addColumnDefinition(TEST, cd(true, "fam0", "float-col-multi", "float"));
+      client.addColumnDefinition(TEST, cd(true, "fam0", "long-col-multi", "long"));
+      client.addColumnDefinition(TEST, cd(true, "fam0", "int-col-multi", "int"));
+      client.addColumnDefinition(TEST, cd(true, "fam0", "date-col-multi", "date", props));
     }
   }
 
+  private ColumnDefinition cd(boolean multiValue, String family, String columnName, String
type) {
+    return cd(multiValue, family, columnName, type, null);
+  }
+
+  private ColumnDefinition cd(boolean multiValue, String family, String columnName, String
type,
+      Map<String, String> props) {
+    ColumnDefinition columnDefinition = new ColumnDefinition(family, columnName, null, false,
type, props, false);
+    columnDefinition.setMultiValueField(multiValue);
+    return columnDefinition;
+  }
+
   @Test
   public void test1() throws SerDeException {
     long now = System.currentTimeMillis();
     Date date = new Date(now);
+    SimpleDateFormat simpleDateFormat = new SimpleDateFormat(YYYYMMDD);
     BlurSerDe blurSerDe = new BlurSerDe();
 
     Configuration conf = new Configuration();
@@ -129,43 +152,79 @@ public class BlurSerDeTest {
     blurSerDe.initialize(conf, tbl);
 
     ObjectInspector objectInspector = blurSerDe.getObjectInspector();
-    Object[] row = new Object[11];
+    Object[] row = new Object[19];
     int c = 0;
     row[c++] = "rowid";
     row[c++] = "recordid";
+    row[c++] = new Object[] { date, date };
     row[c++] = date;
+    row[c++] = new Object[] { 1234.5678, 4321.5678 };
     row[c++] = 1234.5678;
+    row[c++] = new Object[] { 1234.567f, 4321.567f };
     row[c++] = 1234.567f;
-    row[c++] = new Object[] { 1.0f, 2.0 };
+    row[c++] = new Object[] { 1.0f, 2.0f };
+    row[c++] = new Object[] { 12345678, 87654321 };
     row[c++] = 12345678;
+    row[c++] = new Object[] { 12345678l, 87654321l };
     row[c++] = 12345678l;
+    row[c++] = new Object[] { "stored input1", "stored input2" };
     row[c++] = "stored input";
+    row[c++] = new Object[] { "string input1", "string input2" };
     row[c++] = "string input";
+    row[c++] = new Object[] { "text input1", "text input2" };
     row[c++] = "text input";
 
     BlurRecord blurRecord = (BlurRecord) blurSerDe.serialize(row, objectInspector);
     assertEquals("rowid", blurRecord.getRowId());
     assertEquals("recordid", blurRecord.getRecordId());
 
-    Map<String, String> columns = toMap(blurRecord.getColumns());
-    assertEquals("string input", columns.get("string-col"));
-    assertEquals("text input", columns.get("text-col"));
-    assertEquals("stored input", columns.get("stored-col"));
-    assertEquals("1234.5678", columns.get("double-col"));
-    assertEquals("1234.567", columns.get("float-col"));
-    assertEquals("12345678", columns.get("long-col"));
-    assertEquals("12345678", columns.get("int-col"));
-    assertEquals("1.0,2.0", columns.get("geo-col"));
-    SimpleDateFormat simpleDateFormat = new SimpleDateFormat(YYYYMMDD);
-    assertEquals(simpleDateFormat.format(date), columns.get("date-col"));
+    Map<String, List<String>> columns = toMap(blurRecord.getColumns());
+
+    assertEquals(list("string input"), columns.get("string-col-single"));
+    assertEquals(list("string input1", "string input2"), columns.get("string-col-multi"));
+
+    assertEquals(list("text input"), columns.get("text-col-single"));
+    assertEquals(list("text input1", "text input2"), columns.get("text-col-multi"));
+
+    assertEquals(list("stored input"), columns.get("stored-col-single"));
+    assertEquals(list("stored input1", "stored input2"), columns.get("stored-col-multi"));
+
+    assertEquals(list("1234.5678"), columns.get("double-col-single"));
+    assertEquals(list("1234.5678", "4321.5678"), columns.get("double-col-multi"));
+
+    assertEquals(list("1234.567"), columns.get("float-col-single"));
+    assertEquals(list("1234.567", "4321.567"), columns.get("float-col-multi"));
+
+    assertEquals(list("12345678"), columns.get("long-col-single"));
+    assertEquals(list("12345678", "87654321"), columns.get("long-col-multi"));
+
+    assertEquals(list("12345678"), columns.get("int-col-single"));
+    assertEquals(list("12345678", "87654321"), columns.get("int-col-multi"));
+
+    assertEquals(list(simpleDateFormat.format(date)), columns.get("date-col-single"));
+    assertEquals(list(simpleDateFormat.format(date), simpleDateFormat.format(date)), columns.get("date-col-multi"));
+
+    assertEquals(list("1.0,2.0"), columns.get("geo-col-single"));
   }
 
-  private Map<String, String> toMap(List<BlurColumn> columns) {
-    Map<String, String> map = new HashMap<String, String>();
+  private List<String> list(String... sarray) {
+    List<String> list = new ArrayList<String>();
+    for (String s : sarray) {
+      list.add(s);
+    }
+    return list;
+  }
+
+  private Map<String, List<String>> toMap(List<BlurColumn> columns) {
+    Map<String, List<String>> map = new HashMap<String, List<String>>();
     for (BlurColumn blurColumn : columns) {
-      map.put(blurColumn.getName(), blurColumn.getValue());
+      String name = blurColumn.getName();
+      List<String> list = map.get(name);
+      if (list == null) {
+        map.put(name, list = new ArrayList<String>());
+      }
+      list.add(blurColumn.getValue());
     }
     return map;
   }
-
 }


Mime
View raw message