orc-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject orc git commit: ORC-202. Add writer implementation enum to record which software wrote the file.
Date Sun, 18 Jun 2017 02:44:11 GMT
Repository: orc
Updated Branches:
  refs/heads/master ded204a4a -> 7dd7dafa8


ORC-202. Add writer implementation enum to record which software wrote the
file.

Fixes #132

Signed-off-by: Owen O'Malley <omalley@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/7dd7dafa
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/7dd7dafa
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/7dd7dafa

Branch: refs/heads/master
Commit: 7dd7dafa8085109b71c43e3092f3b0eda8046991
Parents: ded204a
Author: Owen O'Malley <omalley@apache.org>
Authored: Fri Jun 16 08:50:53 2017 -0700
Committer: Owen O'Malley <omalley@apache.org>
Committed: Sat Jun 17 19:43:43 2017 -0700

----------------------------------------------------------------------
 .../src/java/org/apache/orc/FileMetadata.java   |   2 +
 java/core/src/java/org/apache/orc/OrcFile.java  | 118 ++++++++++++++-----
 .../src/java/org/apache/orc/impl/OrcTail.java   |   6 +-
 .../java/org/apache/orc/impl/ReaderImpl.java    |   4 +-
 .../java/org/apache/orc/impl/WriterImpl.java    |   1 +
 .../test/org/apache/orc/TestVectorOrcFile.java  |  47 +++++++-
 .../resources/orc-file-dump-bloomfilter.out     |   2 +-
 .../resources/orc-file-dump-bloomfilter2.out    |   2 +-
 .../orc-file-dump-dictionary-threshold.out      |   2 +-
 .../tools/src/test/resources/orc-file-dump.json |   2 +-
 java/tools/src/test/resources/orc-file-dump.out |   2 +-
 .../src/test/resources/orc-file-has-null.out    |   2 +-
 proto/orc_proto.proto                           |  40 +++++--
 13 files changed, 180 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/java/org/apache/orc/FileMetadata.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/FileMetadata.java b/java/core/src/java/org/apache/orc/FileMetadata.java
index 807e696..347035b 100644
--- a/java/core/src/java/org/apache/orc/FileMetadata.java
+++ b/java/core/src/java/org/apache/orc/FileMetadata.java
@@ -50,6 +50,8 @@ public interface FileMetadata {
 
   int getMetadataSize();
 
+  int getWriterImplementation();
+
   int getWriterVersionNum();
 
   List<OrcProto.Type> getTypes();

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/java/org/apache/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java
index 11281cb..d1fa3fb 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -30,7 +30,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.orc.MemoryManager;
 import org.apache.orc.impl.MemoryManagerImpl;
 import org.apache.orc.impl.OrcTail;
 import org.apache.orc.impl.ReaderImpl;
@@ -108,47 +107,91 @@ public class OrcFile {
     }
   }
 
+  public enum WriterImplementation {
+    ORC_JAVA(0), // ORC Java writer
+    ORC_CPP(1),  // ORC C++ writer
+    PRESTO(2),   // Presto writer
+    UNKNOWN(Integer.MAX_VALUE);
+
+    private final int id;
+
+    WriterImplementation(int id) {
+      this.id = id;
+    }
+
+    public int getId() {
+      return id;
+    }
+
+    public static WriterImplementation from(int id) {
+      WriterImplementation[] values = values();
+      if (id >= 0 && id < values.length - 1) {
+        return values[id];
+      }
+      return UNKNOWN;
+    }
+  }
+
   /**
    * Records the version of the writer in terms of which bugs have been fixed.
-   * For bugs in the writer, but the old readers already read the new data
-   * correctly, bump this version instead of the Version.
+   * When you fix bugs in the writer (or make substantial changes) that don't
+   * change the file format, add a new version here instead of Version.
+   *
+   * The ids are assigned sequentially from 6 per a WriterImplementation so that
+   * readers that predate ORC-202 treat the other writers correctly.
    */
   public enum WriterVersion {
-    ORIGINAL(0),
-    HIVE_8732(1), // corrupted stripe/file maximum column statistics
-    HIVE_4243(2), // use real column names from Hive tables
-    HIVE_12055(3), // vectorized writer
-    HIVE_13083(4), // decimal writer updating present stream wrongly
-    ORC_101(5),    // bloom filters use utf8
-    ORC_135(6), // timestamp stats use utc
+    // Java ORC Writer
+    ORIGINAL(WriterImplementation.ORC_JAVA, 0),
+    HIVE_8732(WriterImplementation.ORC_JAVA, 1), // fixed stripe/file maximum
+                                                 // statistics & string statistics
+                                                 // use utf8 for min/max
+    HIVE_4243(WriterImplementation.ORC_JAVA, 2), // use real column names from
+                                                 // Hive tables
+    HIVE_12055(WriterImplementation.ORC_JAVA, 3), // vectorized writer
+    HIVE_13083(WriterImplementation.ORC_JAVA, 4), // decimals write present stream correctly
+    ORC_101(WriterImplementation.ORC_JAVA, 5),   // bloom filters use utf8
+    ORC_135(WriterImplementation.ORC_JAVA, 6),   // timestamp stats use utc
+
+    // C++ ORC Writer
+    ORC_CPP_ORIGINAL(WriterImplementation.ORC_CPP, 6),
+
+    // Presto Writer
+    PRESTO_ORIGINAL(WriterImplementation.PRESTO, 6),
 
     // Don't use any magic numbers here except for the below:
-    FUTURE(Integer.MAX_VALUE); // a version from a future writer
+    FUTURE(WriterImplementation.UNKNOWN, Integer.MAX_VALUE); // a version from a future writer
 
     private final int id;
+    private final WriterImplementation writer;
+
+    public WriterImplementation getWriterImplementation() {
+      return writer;
+    }
 
     public int getId() {
       return id;
     }
 
-    WriterVersion(int id) {
+    WriterVersion(WriterImplementation writer, int id) {
+      this.writer = writer;
       this.id = id;
     }
 
-    private static final WriterVersion[] values;
+    private static final WriterVersion[][] values =
+        new WriterVersion[WriterImplementation.values().length][];
+
     static {
-      // Assumes few non-negative values close to zero.
-      int max = Integer.MIN_VALUE;
-      for (WriterVersion v : WriterVersion.values()) {
-        if (v.id < 0) throw new AssertionError();
-        if (v.id > max && FUTURE.id != v.id) {
-          max = v.id;
-        }
-      }
-      values = new WriterVersion[max + 1];
-      for (WriterVersion v : WriterVersion.values()) {
-        if (v.id < values.length) {
-          values[v.id] = v;
+      for(WriterVersion v: WriterVersion.values()) {
+        WriterImplementation writer = v.writer;
+        if (writer != WriterImplementation.UNKNOWN) {
+          if (values[writer.id] == null) {
+            values[writer.id] = new WriterVersion[WriterVersion.values().length];
+          }
+          if (values[writer.id][v.id] != null) {
+            throw new IllegalArgumentException("Duplicate WriterVersion id " + v);
+          }
+          values[writer.id][v.id] = v;
         }
       }
     }
@@ -156,18 +199,33 @@ public class OrcFile {
     /**
      * Convert the integer from OrcProto.PostScript.writerVersion
      * to the enumeration with unknown versions being mapped to FUTURE.
+     * @param writer the writer implementation
      * @param val the serialized writer version
      * @return the corresponding enumeration value
      */
-    public static WriterVersion from(int val) {
-      if (val >= values.length) {
+    public static WriterVersion from(WriterImplementation writer, int val) {
+      if (writer == WriterImplementation.UNKNOWN) {
+        return FUTURE;
+      }
+      if (writer != WriterImplementation.ORC_JAVA && val < 6) {
+        throw new IllegalArgumentException("ORC File with illegval version " +
+            val + " for writer " + writer);
+      }
+      WriterVersion[] versions = values[writer.id];
+      if (val < 0 || versions.length < val) {
         return FUTURE;
       }
-      return values[val];
+      WriterVersion result = versions[val];
+      return result == null ? FUTURE : result;
     }
 
-    public boolean includes(WriterVersion other) {
-      return id >= other.id;
+    /**
+     * Does this file include the given fix or come from a different writer?
+     * @param fix the required fix
+     * @return true if the required fix is present
+     */
+    public boolean includes(WriterVersion fix) {
+      return writer != fix.writer || id >= fix.id;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/java/org/apache/orc/impl/OrcTail.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/OrcTail.java b/java/core/src/java/org/apache/orc/impl/OrcTail.java
index f2f80a5..3c78874 100644
--- a/java/core/src/java/org/apache/orc/impl/OrcTail.java
+++ b/java/core/src/java/org/apache/orc/impl/OrcTail.java
@@ -70,8 +70,10 @@ public final class OrcTail {
 
   public OrcFile.WriterVersion getWriterVersion() {
     OrcProto.PostScript ps = fileTail.getPostscript();
-    return (ps.hasWriterVersion()
-        ? OrcFile.WriterVersion.from(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
+    OrcProto.Footer footer = fileTail.getFooter();
+    OrcFile.WriterImplementation writer =
+        OrcFile.WriterImplementation.from(footer.getWriter());
+    return OrcFile.WriterVersion.from(writer, ps.getWriterVersion());
   }
 
   public List<StripeInformation> getStripes() {

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index ad1bc1e..7702ac6 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -361,8 +361,10 @@ public class ReaderImpl implements Reader {
       this.metadataSize = fileMetadata.getMetadataSize();
       this.stripeStats = fileMetadata.getStripeStats();
       this.versionList = fileMetadata.getVersionList();
+      OrcFile.WriterImplementation writer =
+          OrcFile.WriterImplementation.from(fileMetadata.getWriterImplementation());
       this.writerVersion =
-          OrcFile.WriterVersion.from(fileMetadata.getWriterVersionNum());
+          OrcFile.WriterVersion.from(writer, fileMetadata.getWriterVersionNum());
       this.types = fileMetadata.getTypes();
       this.rowIndexStride = fileMetadata.getRowIndexStride();
       this.contentLength = fileMetadata.getContentLength();

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/java/org/apache/orc/impl/WriterImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index a5d65dd..90eaf4f 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -492,6 +492,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
       builder.addMetadata(OrcProto.UserMetadataItem.newBuilder()
         .setName(entry.getKey()).setValue(entry.getValue()));
     }
+    builder.setWriter(OrcFile.WriterImplementation.ORC_JAVA.getId());
     physicalWriter.writeFileFooter(builder);
     return writePostScript();
   }

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index f975b73..9373216 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -3039,9 +3039,50 @@ public class TestVectorOrcFile {
 
   @Test
   public void testWriterVersion() throws Exception {
-    assertEquals(OrcFile.WriterVersion.FUTURE, OrcFile.WriterVersion.from(99));
-    assertEquals(OrcFile.WriterVersion.ORIGINAL, OrcFile.WriterVersion.from(0));
-    assertEquals(OrcFile.WriterVersion.HIVE_4243, OrcFile.WriterVersion.from(2));
+    // test writer implementation serialization
+    assertEquals(OrcFile.WriterImplementation.ORC_JAVA,
+        OrcFile.WriterImplementation.from(0));
+    assertEquals(OrcFile.WriterImplementation.ORC_CPP,
+        OrcFile.WriterImplementation.from(1));
+    assertEquals(OrcFile.WriterImplementation.PRESTO,
+        OrcFile.WriterImplementation.from(2));
+    assertEquals(OrcFile.WriterImplementation.UNKNOWN,
+        OrcFile.WriterImplementation.from(99));
+
+    // test writer version serialization
+    assertEquals(OrcFile.WriterVersion.FUTURE,
+        OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_JAVA, 99));
+    assertEquals(OrcFile.WriterVersion.ORIGINAL,
+        OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_JAVA, 0));
+    assertEquals(OrcFile.WriterVersion.HIVE_4243,
+        OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_JAVA, 2));
+    assertEquals(OrcFile.WriterVersion.FUTURE,
+        OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_CPP, 99));
+    assertEquals(OrcFile.WriterVersion.ORC_CPP_ORIGINAL,
+        OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_CPP, 6));
+    assertEquals(OrcFile.WriterVersion.PRESTO_ORIGINAL,
+        OrcFile.WriterVersion.from(OrcFile.WriterImplementation.PRESTO, 6));
+    assertEquals(OrcFile.WriterVersion.FUTURE,
+        OrcFile.WriterVersion.from(OrcFile.WriterImplementation.UNKNOWN, 0));
+
+    // test compatibility
+    assertTrue(OrcFile.WriterVersion.FUTURE.includes(
+        OrcFile.WriterVersion.ORC_CPP_ORIGINAL));
+    assertTrue(OrcFile.WriterVersion.FUTURE.includes(
+        OrcFile.WriterVersion.HIVE_8732));
+    assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
+        OrcFile.WriterVersion.HIVE_4243));
+    assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
+        OrcFile.WriterVersion.HIVE_12055));
+    assertTrue(!OrcFile.WriterVersion.HIVE_4243.includes(
+        OrcFile.WriterVersion.HIVE_12055));
+    assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
+        OrcFile.WriterVersion.PRESTO_ORIGINAL));
+  }
+
+  @Test(expected=IllegalArgumentException.class)
+  public void testBadPrestoVersion() {
+    OrcFile.WriterVersion.from(OrcFile.WriterImplementation.PRESTO, 0);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
index dcf29f7..5775500 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
@@ -172,7 +172,7 @@ Stripes:
       Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp:
2.343647E-7
       Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022
expectedFpp: 2.343647E-7
 
-File length: 272450 bytes
+File length: 272452 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 ________________________________________________________________________________________________________________________

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
index 4ea04b5..8afddae 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -187,7 +187,7 @@ Stripes:
       Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp:
0.00966294
       Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154
expectedFpp: 0.00966294
 
-File length: 332511 bytes
+File length: 332513 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 ________________________________________________________________________________________________________________________

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
index 78e0258..5989250 100644
--- a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -183,7 +183,7 @@ Stripes:
     Row group indices for column 3:
       Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084
max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-
 7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936
sum: 670762 positions: 0,0,0,0,0
 
-File length: 2217611 bytes
+File length: 2217614 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 ________________________________________________________________________________________________________________________

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-dump.json
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json
index 3914f82..81c96df 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -1348,7 +1348,7 @@
       }]
     }
   ],
-  "fileLength": 272434,
+  "fileLength": 272436,
   "paddingLength": 0,
   "paddingRatio": 0,
   "status": "OK"

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-dump.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump.out b/java/tools/src/test/resources/orc-file-dump.out
index 51105f0..c8cf7ad 100644
--- a/java/tools/src/test/resources/orc-file-dump.out
+++ b/java/tools/src/test/resources/orc-file-dump.out
@@ -188,7 +188,7 @@ Stripes:
     Row group indices for column 3:
       Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions:
0,0,0
 
-File length: 270923 bytes
+File length: 270925 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 ________________________________________________________________________________________________________________________

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-has-null.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-has-null.out b/java/tools/src/test/resources/orc-file-has-null.out
index a42a62d..d7e78f7 100644
--- a/java/tools/src/test/resources/orc-file-has-null.out
+++ b/java/tools/src/test/resources/orc-file-has-null.out
@@ -105,7 +105,7 @@ Stripes:
       Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
       Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0
 
-File length: 1823 bytes
+File length: 1825 bytes
 Padding length: 0 bytes
 Padding ratio: 0%
 ________________________________________________________________________________________________________________________

http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/proto/orc_proto.proto
----------------------------------------------------------------------
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index ee2ca45..fef207c 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -200,6 +200,12 @@ message Footer {
   optional uint64 numberOfRows = 6;
   repeated ColumnStatistics statistics = 7;
   optional uint32 rowIndexStride = 8;
+
+  // Each implementation that writes ORC files should register for a code
+  // 0 = ORC Java
+  // 1 = ORC C++
+  // 2 = Presto
+  optional uint32 writer = 9;
 }
 
 enum CompressionKind {
@@ -221,15 +227,33 @@ message PostScript {
   //   [0, 12] = Hive 0.12
   repeated uint32 version = 4 [packed = true];
   optional uint64 metadataLength = 5;
-  // Version of the writer:
-  //   0 (or missing) = original
-  //   1 = HIVE-8732 fixed
-  //   2 = HIVE-4243 fixed
-  //   3 = HIVE-12055 fixed
-  //   4 = HIVE-13083 fixed
-  //   5 = ORC-101 fixed
-  //   6 = ORC-135 fixed
+
+  // The version of the writer that wrote the file. This number is
+  // updated when we make fixes or large changes to the writer so that
+  // readers can detect whether a given bug is present in the data.
+  //
+  // Only the Java ORC writer may use values under 6 (or missing) so that
+  // readers that predate ORC-202 treat the new writers correctly. Each
+  // writer should assign their own sequence of versions starting from 6.
+  //
+  // Version of the ORC Java writer:
+  //   0 = original
+  //   1 = HIVE-8732 fixed (fixed stripe/file maximum statistics &
+  //                        string statistics use utf8 for min/max)
+  //   2 = HIVE-4243 fixed (use real column names from Hive tables)
+  //   3 = HIVE-12055 fixed (vectorized writer implementation)
+  //   4 = HIVE-13083 fixed (decimals write present stream correctly)
+  //   5 = ORC-101 fixed (bloom filters use utf8 consistently)
+  //   6 = ORC-135 fixed (timestamp statistics use utc)
+  //
+  // Version of the ORC C++ writer:
+  //   6 = original
+  //
+  // Version of the Presto writer:
+  //   6 = original
+  //
   optional uint32 writerVersion = 6;
+
   // Leave this last in the record
   optional string magic = 8000;
 }


Mime
View raw message