tajo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From blrun...@apache.org
Subject tajo git commit: TAJO-2164: SequenceFile print wrong values with TextSerializerDeserializer.
Date Wed, 06 Jul 2016 15:22:59 GMT
Repository: tajo
Updated Branches:
  refs/heads/branch-0.11.4 926385f66 -> f02f0e393


TAJO-2164: SequenceFile print wrong values with TextSerializerDeserializer.


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/f02f0e39
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/f02f0e39
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/f02f0e39

Branch: refs/heads/branch-0.11.4
Commit: f02f0e3933f55cf048c0faac448fec3591781143
Parents: 926385f
Author: JaeHwa Jung <blrunner@apache.org>
Authored: Thu Jul 7 00:21:54 2016 +0900
Committer: JaeHwa Jung <blrunner@apache.org>
Committed: Thu Jul 7 00:21:54 2016 +0900

----------------------------------------------------------------------
 CHANGES                                         |   3 +
 .../org/apache/tajo/catalog/CatalogUtil.java    |   2 +-
 .../tajo/catalog/store/HiveCatalogStore.java    |  14 +-
 .../catalog/store/TestHiveCatalogStore.java     |   6 +
 .../planner/physical/PhysicalPlanUtil.java      |   9 +-
 .../sequencefile/SequenceFileAppender.java      |  24 +++-
 .../sequencefile/SequenceFileScanner.java       |  39 ++++--
 .../apache/tajo/storage/text/TextLineSerDe.java |   8 +-
 .../org/apache/tajo/storage/TestStorages.java   | 136 +++++++++++++++++++
 9 files changed, 211 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/f02f0e39/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 745f190..d517499 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,9 @@ Release 0.11.4 - unreleased
 
   BUG FIXES
 
+    TAJO-2164: SequenceFile print wrong values with TextSerializerDeserializer.
+    (jaehwa)
+
     TAJO-2168: NullPointerException occurs when a simple query contains a python udf. 
     (jihoon)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/f02f0e39/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java
b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java
index 1473825..c6230cf 100644
--- a/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java
+++ b/tajo-catalog/tajo-catalog-common/src/main/java/org/apache/tajo/catalog/CatalogUtil.java
@@ -1002,7 +1002,7 @@ public class CatalogUtil {
       options.set(StorageConstants.RCFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE);
     } else if (dataFormat.equalsIgnoreCase("SEQUENCEFILE")) {
       options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE);
-      options.set(StorageConstants.SEQUENCEFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
+      options.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
     } else if (dataFormat.equalsIgnoreCase("PARQUET")) {
       options.set(BLOCK_SIZE, StorageConstants.PARQUET_DEFAULT_BLOCK_SIZE);
       options.set(PAGE_SIZE, StorageConstants.PARQUET_DEFAULT_PAGE_SIZE);

http://git-wip-us.apache.org/repos/asf/tajo/blob/f02f0e39/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogStore.java
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogStore.java
b/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogStore.java
index de01063..571b103 100644
--- a/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogStore.java
+++ b/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/main/java/org/apache/tajo/catalog/store/HiveCatalogStore.java
@@ -210,8 +210,8 @@ public class HiveCatalogStore extends CatalogConstants implements CatalogStore
{
           }
 
         } else if (BuiltinStorages.SEQUENCE_FILE.equals(dataFormat)) {
-          options.set(StorageConstants.SEQUENCEFILE_DELIMITER, StringEscapeUtils.escapeJava(fieldDelimiter));
-          options.set(StorageConstants.SEQUENCEFILE_NULL, StringEscapeUtils.escapeJava(nullFormat));
+          options.set(StorageConstants.TEXT_DELIMITER, StringEscapeUtils.escapeJava(fieldDelimiter));
+          options.set(StorageConstants.TEXT_NULL, StringEscapeUtils.escapeJava(nullFormat));
           String serde = properties.getProperty(serdeConstants.SERIALIZATION_LIB);
           if (LazyBinarySerDe.class.getName().equals(serde)) {
             options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_BINARY_SERDE);
@@ -537,7 +537,7 @@ public class HiveCatalogStore extends CatalogConstants implements CatalogStore
{
         if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) {
           sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
 
-          String fieldDelimiter = tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_DELIMITER,
+          String fieldDelimiter = tableDesc.getMeta().getOption(StorageConstants.TEXT_DELIMITER,
               StorageConstants.DEFAULT_FIELD_DELIMITER);
 
           // User can use an unicode for filed delimiter such as \u0001, \001.
@@ -549,15 +549,15 @@ public class HiveCatalogStore extends CatalogConstants implements CatalogStore
{
               StringEscapeUtils.unescapeJava(fieldDelimiter));
           sd.getSerdeInfo().putToParameters(serdeConstants.FIELD_DELIM,
               StringEscapeUtils.unescapeJava(fieldDelimiter));
-          table.getParameters().remove(StorageConstants.SEQUENCEFILE_DELIMITER);
+          table.getParameters().remove(StorageConstants.TEXT_DELIMITER);
         } else {
           sd.getSerdeInfo().setSerializationLib(LazyBinarySerDe.class.getName());
         }
 
-        if (tableDesc.getMeta().containsOption(StorageConstants.SEQUENCEFILE_NULL)) {
+        if (tableDesc.getMeta().containsOption(StorageConstants.TEXT_NULL)) {
           table.putToParameters(serdeConstants.SERIALIZATION_NULL_FORMAT,
-              StringEscapeUtils.unescapeJava(tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_NULL)));
-          table.getParameters().remove(StorageConstants.SEQUENCEFILE_NULL);
+              StringEscapeUtils.unescapeJava(tableDesc.getMeta().getOption(StorageConstants.TEXT_NULL)));
+          table.getParameters().remove(StorageConstants.TEXT_NULL);
         }
       } else if (tableDesc.getMeta().getDataFormat().equalsIgnoreCase(BuiltinStorages.PARQUET))
{
         StorageFormatDescriptor descriptor = storageFormatFactory.get(IOConstants.PARQUET);

http://git-wip-us.apache.org/repos/asf/tajo/blob/f02f0e39/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/test/java/org/apache/tajo/catalog/store/TestHiveCatalogStore.java
----------------------------------------------------------------------
diff --git a/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/test/java/org/apache/tajo/catalog/store/TestHiveCatalogStore.java
b/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/test/java/org/apache/tajo/catalog/store/TestHiveCatalogStore.java
index 8cadb3a..3e796c5 100644
--- a/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/test/java/org/apache/tajo/catalog/store/TestHiveCatalogStore.java
+++ b/tajo-catalog/tajo-catalog-drivers/tajo-hive/src/test/java/org/apache/tajo/catalog/store/TestHiveCatalogStore.java
@@ -36,6 +36,7 @@ import org.apache.tajo.catalog.proto.CatalogProtos;
 import org.apache.tajo.catalog.proto.CatalogProtos.PartitionKeyProto;
 import org.apache.tajo.common.TajoDataTypes;
 import org.apache.tajo.conf.TajoConf;
+import org.apache.tajo.datum.NullDatum;
 import org.apache.tajo.storage.StorageConstants;
 import org.apache.tajo.util.CommonTestingUtil;
 import org.apache.tajo.util.KeyValueSet;
@@ -448,6 +449,8 @@ public class TestHiveCatalogStore {
   public void testTableUsingSequenceFileWithTextSerde() throws Exception {
     KeyValueSet options = new KeyValueSet();
     options.set(StorageConstants.SEQUENCEFILE_SERDE, StorageConstants.DEFAULT_TEXT_SERDE);
+    options.set(StorageConstants.TEXT_DELIMITER, "\u0001");
+    options.set(StorageConstants.TEXT_NULL, NullDatum.DEFAULT_TEXT);
     TableMeta meta = new TableMeta(BuiltinStorages.SEQUENCE_FILE, options);
 
     org.apache.tajo.catalog.Schema schema = new org.apache.tajo.catalog.Schema();
@@ -474,6 +477,9 @@ public class TestHiveCatalogStore {
     }
 
     assertEquals(StorageConstants.DEFAULT_TEXT_SERDE, table1.getMeta().getOption(StorageConstants.SEQUENCEFILE_SERDE));
+    assertEquals("\u0001", StringEscapeUtils.unescapeJava(table1.getMeta().getOption(StorageConstants
+      .TEXT_DELIMITER)));
+    assertEquals(NullDatum.DEFAULT_TEXT, table1.getMeta().getOption(StorageConstants.TEXT_NULL));
     store.dropTable(DB_NAME, REGION);
   }
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/f02f0e39/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
index e27ba5a..fef79d2 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/PhysicalPlanUtil.java
@@ -215,7 +215,7 @@ public class PhysicalPlanUtil {
     }  else if (dataFormat.equalsIgnoreCase(BuiltinStorages.RCFILE)) {
       meta.putOption(StorageConstants.RCFILE_NULL, nullChar);
     } else if (dataFormat.equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) {
-      meta.putOption(StorageConstants.SEQUENCEFILE_NULL, nullChar);
+      meta.putOption(StorageConstants.TEXT_NULL, nullChar);
     }
   }
 
@@ -232,7 +232,12 @@ public class PhysicalPlanUtil {
     } else if (dataFormat.equalsIgnoreCase(BuiltinStorages.RCFILE)) {
       return meta.containsOption(StorageConstants.RCFILE_NULL);
     } else if (dataFormat.equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) {
-      return meta.containsOption(StorageConstants.SEQUENCEFILE_NULL);
+      if (!meta.containsOption(StorageConstants.TEXT_NULL)
+        && meta.containsOption(StorageConstants.SEQUENCEFILE_NULL)) {
+        return meta.containsOption(StorageConstants.SEQUENCEFILE_NULL);
+      } else {
+        return meta.containsOption(StorageConstants.TEXT_NULL);
+      }
     } else {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/tajo/blob/f02f0e39/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java
index 8e0a88c..e799217 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java
@@ -83,17 +83,33 @@ public class SequenceFileAppender extends FileAppender {
 
     this.fs = path.getFileSystem(conf);
 
-    this.delimiter = StringEscapeUtils.unescapeJava(this.meta.getOption(StorageConstants.SEQUENCEFILE_DELIMITER,
+    // Set value of non-deprecated key for backward compatibility.
+    if (!meta.containsOption(StorageConstants.TEXT_DELIMITER)
+      && meta.containsOption(StorageConstants.SEQUENCEFILE_DELIMITER)) {
+      this.delimiter = StringEscapeUtils.unescapeJava(meta.getOption(StorageConstants.SEQUENCEFILE_DELIMITER,
         StorageConstants.DEFAULT_FIELD_DELIMITER)).charAt(0);
-    this.columnNum = schema.size();
-    String nullCharacters = StringEscapeUtils.unescapeJava(this.meta.getOption(StorageConstants.SEQUENCEFILE_NULL,
-        NullDatum.DEFAULT_TEXT));
+    } else {
+      this.delimiter = StringEscapeUtils.unescapeJava(meta.getOption(StorageConstants.TEXT_DELIMITER,
+        StorageConstants.DEFAULT_FIELD_DELIMITER)).charAt(0);
+    }
+
+    String nullCharacters;
+    if (!meta.containsOption(StorageConstants.TEXT_NULL) && meta.containsOption(StorageConstants.SEQUENCEFILE_NULL))
{
+      nullCharacters = StringEscapeUtils.unescapeJava(meta.getOption(StorageConstants.SEQUENCEFILE_NULL,
+      NullDatum.DEFAULT_TEXT));
+    } else {
+      nullCharacters = StringEscapeUtils.unescapeJava(meta.getOption(StorageConstants.TEXT_NULL,
+      NullDatum.DEFAULT_TEXT));
+    }
+
     if (StringUtils.isEmpty(nullCharacters)) {
       nullChars = NullDatum.get().asTextBytes();
     } else {
       nullChars = nullCharacters.getBytes();
     }
 
+    this.columnNum = schema.size();
+
     if(this.meta.containsOption(StorageConstants.COMPRESSION_CODEC)) {
       String codecName = this.meta.getOption(StorageConstants.COMPRESSION_CODEC);
       codecFactory = new CompressionCodecFactory(conf);

http://git-wip-us.apache.org/repos/asf/tajo/blob/f02f0e39/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileScanner.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileScanner.java
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileScanner.java
index 1c72987..a763dbd 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileScanner.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileScanner.java
@@ -32,6 +32,7 @@ import org.apache.tajo.catalog.TableMeta;
 import org.apache.tajo.conf.TajoConf;
 import org.apache.tajo.datum.Datum;
 import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.exception.TajoInternalError;
 import org.apache.tajo.exception.TajoRuntimeException;
 import org.apache.tajo.exception.UnsupportedException;
 import org.apache.tajo.plan.expr.EvalNode;
@@ -96,17 +97,35 @@ public class SequenceFileScanner extends FileScanner {
 
     reader = new SequenceFile.Reader(fs, fragment.getPath(), conf);
 
-    String nullCharacters = StringEscapeUtils.unescapeJava(this.meta.getOption(StorageConstants.SEQUENCEFILE_NULL,
-        NullDatum.DEFAULT_TEXT));
+    // Set value of non-deprecated key for backward compatibility.
+    TableMeta tableMeta;
+    try {
+      tableMeta = (TableMeta) meta.clone();
+
+      if (!tableMeta.containsOption(StorageConstants.TEXT_DELIMITER)) {
+        tableMeta.getOption(StorageConstants.TEXT_DELIMITER, tableMeta.getOption(StorageConstants
+          .SEQUENCEFILE_DELIMITER));
+      }
+
+      if (!tableMeta.containsOption(StorageConstants.TEXT_NULL) && tableMeta.containsOption(StorageConstants
+        .SEQUENCEFILE_NULL)) {
+        tableMeta.putOption(StorageConstants.TEXT_NULL, tableMeta.getOption(StorageConstants.SEQUENCEFILE_NULL));
+      }
+    } catch (CloneNotSupportedException e) {
+      throw new TajoInternalError(e);
+    }
+
+    String delim  = tableMeta.getOption(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
+    this.delimiter = StringEscapeUtils.unescapeJava(delim).charAt(0);
+
+    String nullCharacters = StringEscapeUtils.unescapeJava(tableMeta.getOption(StorageConstants.TEXT_NULL,
NullDatum
+      .DEFAULT_TEXT));
     if (StringUtils.isEmpty(nullCharacters)) {
       nullChars = NullDatum.get().asTextBytes();
     } else {
       nullChars = nullCharacters.getBytes();
     }
 
-    String delim  = meta.getOption(StorageConstants.SEQUENCEFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
-    this.delimiter = StringEscapeUtils.unescapeJava(delim).charAt(0);
-
     this.start = fragment.getStartKey();
     this.end = start + fragment.getLength();
 
@@ -120,9 +139,6 @@ public class SequenceFileScanner extends FileScanner {
     }
 
     outTuple = new VTuple(targets.length);
-    deserializer = DelimitedTextFile.getLineSerde(meta).createDeserializer(schema, meta,
targets);
-    deserializer.init();
-
     fieldIsNull = new boolean[schema.getRootColumns().size()];
     fieldStart = new int[schema.getRootColumns().size()];
     fieldLength = new int[schema.getRootColumns().size()];
@@ -130,12 +146,17 @@ public class SequenceFileScanner extends FileScanner {
     prepareProjection(targets);
 
     try {
-      String serdeClass = this.meta.getOption(StorageConstants.SEQUENCEFILE_SERDE, TextSerializerDeserializer.class.getName());
+      String serdeClass = tableMeta.getOption(StorageConstants.SEQUENCEFILE_SERDE, TextSerializerDeserializer.class
+        .getName());
+
       serde = (SerializerDeserializer) Class.forName(serdeClass).newInstance();
       serde.init(schema);
 
       if (serde instanceof BinarySerializerDeserializer) {
         hasBinarySerDe = true;
+      } else {
+        deserializer = DelimitedTextFile.getLineSerde(tableMeta).createDeserializer(schema,
tableMeta, targets);
+        deserializer.init();
       }
 
       Class<? extends Writable> keyClass = (Class<? extends Writable>)Class.forName(reader.getKeyClassName());

http://git-wip-us.apache.org/repos/asf/tajo/blob/f02f0e39/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextLineSerDe.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextLineSerDe.java
b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextLineSerDe.java
index 9688143..88f1de7 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextLineSerDe.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/text/TextLineSerDe.java
@@ -43,13 +43,7 @@ public abstract class TextLineSerDe {
   public abstract TextLineSerializer createSerializer(Schema schema, TableMeta meta);
 
   public static ByteBuf getNullChars(TableMeta meta) {
-    byte[] nullCharByteArray;
-    if (meta.getDataFormat().equals(BuiltinStorages.SEQUENCE_FILE)) {
-      nullCharByteArray = getNullCharsAsBytes(meta, StorageConstants.SEQUENCEFILE_NULL, "\\");
-    } else {
-      nullCharByteArray = getNullCharsAsBytes(meta);
-    }
-
+    byte[] nullCharByteArray = getNullCharsAsBytes(meta);
     ByteBuf nullChars = BufferPool.directBuffer(nullCharByteArray.length, nullCharByteArray.length);
     nullChars.writeBytes(nullCharByteArray);
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/f02f0e39/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
index 9d82783..f596884 100644
--- a/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
+++ b/tajo-storage/tajo-storage-hdfs/src/test/java/org/apache/tajo/storage/TestStorages.java
@@ -859,6 +859,142 @@ public class TestStorages {
   }
 
   @Test
+  public void testSequenceFileTextSerializeDeserializeWithDeprecatedProperties() throws IOException
{
+    if(!dataFormat.equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) return;
+
+    Schema schema = new Schema();
+    schema.addColumn("col1", Type.BOOLEAN);
+    schema.addColumn("col2", CatalogUtil.newDataTypeWithLen(Type.CHAR, 7));
+    schema.addColumn("col3", Type.INT2);
+    schema.addColumn("col4", Type.INT4);
+    schema.addColumn("col5", Type.INT8);
+    schema.addColumn("col6", Type.FLOAT4);
+    schema.addColumn("col7", Type.FLOAT8);
+    schema.addColumn("col8", Type.TEXT);
+    schema.addColumn("col9", Type.BLOB);
+    schema.addColumn("col10", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName()));
+
+    TableMeta meta = CatalogUtil.newTableMeta(dataFormat, conf);
+    meta.putOption(StorageConstants.SEQUENCEFILE_SERDE, TextSerializerDeserializer.class.getName());
+    meta.putOption(StorageConstants.SEQUENCEFILE_DELIMITER, "\u0001");
+    meta.putOption(StorageConstants.SEQUENCEFILE_NULL, "\\");
+
+    Path tablePath = new Path(testDir, "testSequenceFileTextSerializeDeserializeWithDeprecatedProperties.data");
+    FileTablespace sm = TablespaceManager.getLocalFs();
+    Appender appender = sm.getAppender(meta, schema, tablePath);
+    appender.enableStats();
+    appender.init();
+
+    QueryId queryid = new QueryId("12345", 5);
+    ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName());
+
+    VTuple tuple = new VTuple(new Datum[] {
+      DatumFactory.createBool(true),
+      DatumFactory.createChar("jinho"),
+      DatumFactory.createInt2((short) 17),
+      DatumFactory.createInt4(59),
+      DatumFactory.createInt8(23l),
+      DatumFactory.createFloat4(77.9f),
+      DatumFactory.createFloat8(271.9f),
+      DatumFactory.createText("jinho"),
+      DatumFactory.createBlob("hyunsik babo".getBytes()),
+      factory.createDatum(queryid.getProto())
+    });
+    appender.addTuple(tuple);
+    appender.flush();
+    appender.close();
+
+    FileStatus status = fs.getFileStatus(tablePath);
+    assertEquals(appender.getStats().getNumBytes().longValue(), status.getLen());
+
+    FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen());
+    Scanner scanner =  TablespaceManager.getLocalFs().getScanner(meta, schema, fragment,
null);
+    scanner.init();
+
+    assertTrue(scanner instanceof SequenceFileScanner);
+    Writable key = ((SequenceFileScanner) scanner).getKey();
+    assertEquals(key.getClass().getCanonicalName(), LongWritable.class.getCanonicalName());
+
+    Tuple retrieved;
+    while ((retrieved=scanner.next()) != null) {
+      for (int i = 0; i < tuple.size(); i++) {
+        assertEquals(tuple.get(i), retrieved.asDatum(i));
+      }
+    }
+    scanner.close();
+    assertEquals(appender.getStats().getNumBytes().longValue(), scanner.getInputStats().getNumBytes().longValue());
+    assertEquals(appender.getStats().getNumRows().longValue(), scanner.getInputStats().getNumRows().longValue());
+  }
+
+  @Test
+  public void testSequenceFileTextSerializeDeserializeWithNonDeprecatedProperties() throws
IOException {
+    if(!dataFormat.equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) return;
+
+    Schema schema = new Schema();
+    schema.addColumn("col1", Type.BOOLEAN);
+    schema.addColumn("col2", CatalogUtil.newDataTypeWithLen(Type.CHAR, 7));
+    schema.addColumn("col3", Type.INT2);
+    schema.addColumn("col4", Type.INT4);
+    schema.addColumn("col5", Type.INT8);
+    schema.addColumn("col6", Type.FLOAT4);
+    schema.addColumn("col7", Type.FLOAT8);
+    schema.addColumn("col8", Type.TEXT);
+    schema.addColumn("col9", Type.BLOB);
+    schema.addColumn("col10", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName()));
+
+    TableMeta meta = CatalogUtil.newTableMeta(dataFormat, conf);
+    meta.putOption(StorageConstants.SEQUENCEFILE_SERDE, TextSerializerDeserializer.class.getName());
+    meta.putOption(StorageConstants.TEXT_DELIMITER, "\u0001");
+    meta.putOption(StorageConstants.TEXT_NULL, "\\");
+
+    Path tablePath = new Path(testDir, "testSequenceFileTextSerializeDeserializeWithNonDeprecatedProperties.data");
+    FileTablespace sm = TablespaceManager.getLocalFs();
+    Appender appender = sm.getAppender(meta, schema, tablePath);
+    appender.enableStats();
+    appender.init();
+
+    QueryId queryid = new QueryId("12345", 5);
+    ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName());
+
+    VTuple tuple = new VTuple(new Datum[] {
+      DatumFactory.createBool(true),
+      DatumFactory.createChar("jinho"),
+      DatumFactory.createInt2((short) 17),
+      DatumFactory.createInt4(59),
+      DatumFactory.createInt8(23l),
+      DatumFactory.createFloat4(77.9f),
+      DatumFactory.createFloat8(271.9f),
+      DatumFactory.createText("jinho"),
+      DatumFactory.createBlob("hyunsik babo".getBytes()),
+      factory.createDatum(queryid.getProto())
+    });
+    appender.addTuple(tuple);
+    appender.flush();
+    appender.close();
+
+    FileStatus status = fs.getFileStatus(tablePath);
+    assertEquals(appender.getStats().getNumBytes().longValue(), status.getLen());
+
+    FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen());
+    Scanner scanner =  TablespaceManager.getLocalFs().getScanner(meta, schema, fragment,
null);
+    scanner.init();
+
+    assertTrue(scanner instanceof SequenceFileScanner);
+    Writable key = ((SequenceFileScanner) scanner).getKey();
+    assertEquals(key.getClass().getCanonicalName(), LongWritable.class.getCanonicalName());
+
+    Tuple retrieved;
+    while ((retrieved=scanner.next()) != null) {
+      for (int i = 0; i < tuple.size(); i++) {
+        assertEquals(tuple.get(i), retrieved.asDatum(i));
+      }
+    }
+    scanner.close();
+    assertEquals(appender.getStats().getNumBytes().longValue(), scanner.getInputStats().getNumBytes().longValue());
+    assertEquals(appender.getStats().getNumRows().longValue(), scanner.getInputStats().getNumRows().longValue());
+  }
+
+  @Test
   public void testSequenceFileBinarySerializeDeserialize() throws IOException {
     if(!dataFormat.equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) return;
 


Mime
View raw message