hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1512893 - in /hive/trunk: common/src/java/org/apache/hadoop/hive/conf/ conf/ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ ql/src/test/org/apache/hadoop/hive/ql/io/orc/ ql/src/test/queries/clientpositive/ ql/src/test/resources/ ql/src/test...
Date Sun, 11 Aug 2013 08:45:20 GMT
Author: hashutosh
Date: Sun Aug 11 08:45:19 2013
New Revision: 1512893

URL: http://svn.apache.org/r1512893
Log:
HIVE-4324 : ORC Turn off dictionary encoding when number of distinct keys is greater than
threshold (Kevin Wilfong & Owen Omalley via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q
    hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
    hive/trunk/ql/src/test/results/clientpositive/orc_dictionary_threshold.q.out
Modified:
    hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/trunk/conf/hive-default.xml.template
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java

Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1512893&r1=1512892&r2=1512893&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Sun Aug 11 08:45:19
2013
@@ -503,6 +503,8 @@ public class HiveConf extends Configurat
     // Maximum fraction of heap that can be used by ORC file writers
     HIVE_ORC_FILE_MEMORY_POOL("hive.exec.orc.memory.pool", 0.5f), // 50%
 
+    HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD("hive.exec.orc.dictionary.key.size.threshold",
0.8f),
+
     HIVESKEWJOIN("hive.optimize.skewjoin", false),
     HIVECONVERTJOIN("hive.auto.convert.join", true),
     HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true),

Modified: hive/trunk/conf/hive-default.xml.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-default.xml.template?rev=1512893&r1=1512892&r2=1512893&view=diff
==============================================================================
--- hive/trunk/conf/hive-default.xml.template (original)
+++ hive/trunk/conf/hive-default.xml.template Sun Aug 11 08:45:19 2013
@@ -1701,6 +1701,15 @@
 </property>
 
 <property>
+  <name>hive.exec.orc.dictionary.key.size.threshold</name>
+  <value>0.8</value>
+  <description>
+    If the number of keys in a dictionary is greater than this fraction of the total number
of
+    non-null rows, turn off dictionary encoding.  Use 1 to always use dictionary encoding.
+  </description>
+</property>
+
+<property>
   <name>hive.multi.insert.move.tasks.share.dependencies</name>
   <value>false</value>
   <description>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1512893&r1=1512892&r2=1512893&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Sun Aug 11 08:45:19
2013
@@ -97,7 +97,7 @@ public final class OrcFile {
                                     CompressionKind compress,
                                     int bufferSize,
                                     int rowIndexStride) throws IOException {
-    return new WriterImpl(fs, path, inspector, stripeSize, compress,
+    return new WriterImpl(fs, path, conf, inspector, stripeSize, compress,
       bufferSize, rowIndexStride, getMemoryManager(conf));
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java?rev=1512893&r1=1512892&r2=1512893&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OutStream.java Sun Aug 11 08:45:19
2013
@@ -76,11 +76,7 @@ class OutStream extends PositionedOutput
   }
 
   public void clear() throws IOException {
-    uncompressedBytes = 0;
-    compressedBytes = 0;
-    compressed = null;
-    overflow = null;
-    current = null;
+    flush();
     suppress = false;
   }
 
@@ -246,7 +242,10 @@ class OutStream extends PositionedOutput
       receiver.output(compressed);
       compressed = null;
     }
-    clear();
+    uncompressedBytes = 0;
+    compressedBytes = 0;
+    overflow = null;
+    current = null;
   }
 
   @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1512893&r1=1512892&r2=1512893&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sun Aug
11 08:45:19 2013
@@ -27,6 +27,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -713,18 +714,21 @@ class RecordReaderImpl implements Record
     }
   }
 
+  /**
+   * A tree reader that will read string columns. At the start of the
+   * stripe, it creates an internal reader based on whether a direct or
+   * dictionary encoding was used.
+   */
   private static class StringTreeReader extends TreeReader {
-    private DynamicByteArray dictionaryBuffer = null;
-    private int dictionarySize;
-    private int[] dictionaryOffsets;
-    private RunLengthIntegerReader reader;
+    private TreeReader reader;
 
     StringTreeReader(Path path, int columnId) {
       super(path, columnId);
     }
 
     void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
-      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY) {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY &&
+          encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) {
         throw new IOException("Unknown encoding " + encoding + " in column " +
             columnId + " of " + path);
       }
@@ -734,10 +738,138 @@ class RecordReaderImpl implements Record
     void startStripe(Map<StreamName, InStream> streams,
                      List<OrcProto.ColumnEncoding> encodings
                     ) throws IOException {
+      // For each stripe, checks the encoding and initializes the appropriate
+      // reader
+      switch (encodings.get(columnId).getKind()) {
+        case DIRECT:
+          reader = new StringDirectTreeReader(path, columnId);
+          break;
+        case DICTIONARY:
+          reader = new StringDictionaryTreeReader(path, columnId);
+          break;
+        default:
+          throw new IllegalArgumentException("Unsupported encoding " +
+              encodings.get(columnId).getKind());
+      }
+      reader.startStripe(streams, encodings);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      reader.seek(index);
+    }
+
+    @Override
+    Object next(Object previous) throws IOException {
+      return reader.next(previous);
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      reader.skipRows(items);
+    }
+  }
+
+  /**
+   * A reader for string columns that are direct encoded in the current
+   * stripe.
+   */
+  private static class StringDirectTreeReader extends TreeReader {
+    private InStream stream;
+    private RunLengthIntegerReader lengths;
+
+    StringDirectTreeReader(Path path, int columnId) {
+      super(path, columnId);
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      // PASS
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+                     List<OrcProto.ColumnEncoding> encodings
+                    ) throws IOException {
+      super.startStripe(streams, encodings);
+      StreamName name = new StreamName(columnId,
+          OrcProto.Stream.Kind.DATA);
+      stream = streams.get(name);
+      lengths = new RunLengthIntegerReader(streams.get(new
+          StreamName(columnId, OrcProto.Stream.Kind.LENGTH)),
+          false);
+    }
+
+    @Override
+    void seek(PositionProvider[] index) throws IOException {
+      super.seek(index);
+      stream.seek(index[columnId]);
+      lengths.seek(index[columnId]);
+    }
+
+    @Override
+    Object next(Object previous) throws IOException {
+      super.next(previous);
+      Text result = null;
+      if (valuePresent) {
+        if (previous == null) {
+          result = new Text();
+        } else {
+          result = (Text) previous;
+        }
+        int len = (int) lengths.next();
+        int offset = 0;
+        byte[] bytes = new byte[len];
+        while (len > 0) {
+          int written = stream.read(bytes, offset, len);
+          if (written < 0) {
+            throw new EOFException("Can't finish byte read from " + stream);
+          }
+          len -= written;
+          offset += written;
+        }
+        result.set(bytes);
+      }
+      return result;
+    }
+
+    @Override
+    void skipRows(long items) throws IOException {
+      items = countNonNulls(items);
+      long lengthToSkip = 0;
+      for(int i=0; i < items; ++i) {
+        lengthToSkip += lengths.next();
+      }
+      stream.skip(lengthToSkip);
+    }
+  }
+
+  /**
+   * A reader for string columns that are dictionary encoded in the current
+   * stripe.
+   */
+  private static class StringDictionaryTreeReader extends TreeReader {
+    private DynamicByteArray dictionaryBuffer;
+    private int[] dictionaryOffsets;
+    private RunLengthIntegerReader reader;
+
+    StringDictionaryTreeReader(Path path, int columnId) {
+      super(path, columnId);
+    }
+
+    @Override
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      // PASS
+    }
+
+    @Override
+    void startStripe(Map<StreamName, InStream> streams,
+                     List<OrcProto.ColumnEncoding> encodings
+                    ) throws IOException {
       super.startStripe(streams, encodings);
 
       // read the dictionary blob
-      dictionarySize = encodings.get(columnId).getDictionarySize();
+      int dictionarySize = encodings.get(columnId).getDictionarySize();
       StreamName name = new StreamName(columnId,
           OrcProto.Stream.Kind.DICTIONARY_DATA);
       InStream in = streams.get(name);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java?rev=1512893&r1=1512892&r2=1512893&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/StringRedBlackTree.java Sun Aug
11 08:45:19 2013
@@ -17,11 +17,11 @@
  */
 package org.apache.hadoop.hive.ql.io.orc;
 
-import org.apache.hadoop.io.Text;
-
 import java.io.IOException;
 import java.io.OutputStream;
 
+import org.apache.hadoop.io.Text;
+
 /**
  * A red-black tree that stores strings. The strings are stored as UTF-8 bytes
  * and an offset for each entry.
@@ -147,7 +147,7 @@ class StringRedBlackTree extends RedBlac
 
   /**
    * Visit all of the nodes in the tree in sorted order.
-   * @param visitor the action to be applied to each ndoe
+   * @param visitor the action to be applied to each node
    * @throws IOException
    */
   public void visit(Visitor visitor) throws IOException {
@@ -163,6 +163,17 @@ class StringRedBlackTree extends RedBlac
     keyOffsets.clear();
   }
 
+  public void getText(Text result, int originalPosition) {
+    int offset = keyOffsets.get(originalPosition);
+    int length;
+    if (originalPosition + 1 == keyOffsets.size()) {
+      length = byteArray.size() - offset;
+    } else {
+      length = keyOffsets.get(originalPosition + 1) - offset;
+    }
+    byteArray.setText(result, offset, length);
+  }
+
   /**
    * Get the size of the character data in the table.
    * @return the bytes used by the table

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1512893&r1=1512892&r2=1512893&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Sun Aug 11 08:45:19
2013
@@ -27,11 +27,16 @@ import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
+
+import com.google.protobuf.ByteString;
+import com.google.protobuf.CodedOutputStream;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.RowIndexEntry;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -55,9 +60,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
 import org.apache.hadoop.io.BytesWritable;
-
-import com.google.protobuf.ByteString;
-import com.google.protobuf.CodedOutputStream;
+import org.apache.hadoop.io.Text;
 
 /**
  * An ORC file writer. The file is divided into stripes, which is the natural
@@ -111,8 +114,11 @@ class WriterImpl implements Writer, Memo
   private final boolean buildIndex;
   private final MemoryManager memoryManager;
 
+  private final Configuration conf;
+
   WriterImpl(FileSystem fs,
              Path path,
+             Configuration conf,
              ObjectInspector inspector,
              long stripeSize,
              CompressionKind compress,
@@ -121,6 +127,7 @@ class WriterImpl implements Writer, Memo
              MemoryManager memoryManager) throws IOException {
     this.fs = fs;
     this.path = path;
+    this.conf = conf;
     this.stripeSize = stripeSize;
     this.compress = compress;
     this.bufferSize = bufferSize;
@@ -344,6 +351,14 @@ class WriterImpl implements Writer, Memo
     public boolean isCompressed() {
       return codec != null;
     }
+
+    /**
+     * Get the writer's configuration.
+     * @return configuration
+     */
+    public Configuration getConfiguration() {
+      return conf;
+    }
   }
 
   /**
@@ -760,16 +775,22 @@ class WriterImpl implements Writer, Memo
 
   private static class StringTreeWriter extends TreeWriter {
     private static final int INITIAL_DICTIONARY_SIZE = 4096;
-    private final PositionedOutputStream stringOutput;
+    private final OutStream stringOutput;
     private final RunLengthIntegerWriter lengthOutput;
     private final RunLengthIntegerWriter rowOutput;
     private final StringRedBlackTree dictionary =
         new StringRedBlackTree(INITIAL_DICTIONARY_SIZE);
     private final DynamicIntArray rows = new DynamicIntArray();
+    private final PositionedOutputStream directStreamOutput;
+    private final RunLengthIntegerWriter directLengthOutput;
     private final List<OrcProto.RowIndexEntry> savedRowIndex =
         new ArrayList<OrcProto.RowIndexEntry>();
     private final boolean buildIndex;
     private final List<Long> rowIndexValueCount = new ArrayList<Long>();
+    // If the number of keys in a dictionary is greater than this fraction of
+    //the total number of non-null rows, turn off dictionary encoding
+    private final float dictionaryKeySizeThreshold;
+    private boolean useDictionaryEncoding = true;
 
     StringTreeWriter(int columnId,
                      ObjectInspector inspector,
@@ -785,6 +806,14 @@ class WriterImpl implements Writer, Memo
       recordPosition(rowIndexPosition);
       rowIndexValueCount.add(0L);
       buildIndex = writer.buildIndex();
+      directStreamOutput = writer.createStream(id, OrcProto.Stream.Kind.DATA);
+      directLengthOutput =
+        new RunLengthIntegerWriter(writer.createStream
+                                    (id, OrcProto.Stream.Kind.LENGTH), false);
+      dictionaryKeySizeThreshold = writer.getConfiguration().getFloat(
+        HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname,
+        HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.
+          defaultFloatVal);
     }
 
     @Override
@@ -801,22 +830,36 @@ class WriterImpl implements Writer, Memo
     @Override
     void writeStripe(OrcProto.StripeFooter.Builder builder,
                      int requiredIndexEntries) throws IOException {
-      // Traverse the red-black tree writing out the bytes and lengths; and
-      // creating the map from the original order to the final sorted order.
+      // Set the flag indicating whether or not to use dictionary encoding
+      // based on whether or not the fraction of distinct keys over number of
+      // non-null rows is less than the configured threshold
+      useDictionaryEncoding = rows.size() > 0 &&
+        (float)(dictionary.size()) / rows.size() <=
+          dictionaryKeySizeThreshold;
       final int[] dumpOrder = new int[dictionary.size()];
-      dictionary.visit(new StringRedBlackTree.Visitor() {
-        private int currentId = 0;
-        @Override
-        public void visit(StringRedBlackTree.VisitorContext context
-                         ) throws IOException {
-          context.writeBytes(stringOutput);
-          lengthOutput.write(context.getLength());
-          dumpOrder[context.getOriginalPosition()] = currentId++;
-        }
-      });
+
+      if (useDictionaryEncoding) {
+        // Write the dictionary by traversing the red-black tree writing out
+        // the bytes and lengths; and creating the map from the original order
+        // to the final sorted order.
+        dictionary.visit(new StringRedBlackTree.Visitor() {
+          private int currentId = 0;
+          @Override
+          public void visit(StringRedBlackTree.VisitorContext context
+                           ) throws IOException {
+            context.writeBytes(stringOutput);
+            lengthOutput.write(context.getLength());
+            dumpOrder[context.getOriginalPosition()] = currentId++;
+          }
+        });
+      } else {
+        // for direct encoding, we don't want the dictionary data stream
+        stringOutput.suppress();
+      }
       int length = rows.size();
       int rowIndexEntry = 0;
       OrcProto.RowIndex.Builder rowIndex = getRowIndex();
+      Text text = new Text();
       // write the values translated into the dump order.
       for(int i = 0; i <= length; ++i) {
         // now that we are writing out the row values, we can finalize the
@@ -826,12 +869,24 @@ class WriterImpl implements Writer, Memo
               rowIndexEntry < savedRowIndex.size()) {
             OrcProto.RowIndexEntry.Builder base =
                 savedRowIndex.get(rowIndexEntry++).toBuilder();
-            rowOutput.getPosition(new RowIndexPositionRecorder(base));
+            if (useDictionaryEncoding) {
+              rowOutput.getPosition(new RowIndexPositionRecorder(base));
+            } else {
+              PositionRecorder posn = new RowIndexPositionRecorder(base);
+              directStreamOutput.getPosition(posn);
+              directLengthOutput.getPosition(posn);
+            }
             rowIndex.addEntry(base.build());
           }
         }
         if (i != length) {
-          rowOutput.write(dumpOrder[rows.get(i)]);
+          if (useDictionaryEncoding) {
+            rowOutput.write(dumpOrder[rows.get(i)]);
+          } else {
+            dictionary.getText(text, rows.get(i));
+            directStreamOutput.write(text.getBytes(), 0, text.getLength());
+            directLengthOutput.write(text.getLength());
+          }
         }
       }
       // we need to build the rowindex before calling super, since it
@@ -840,6 +895,8 @@ class WriterImpl implements Writer, Memo
       stringOutput.flush();
       lengthOutput.flush();
       rowOutput.flush();
+      directStreamOutput.flush();
+      directLengthOutput.flush();
       // reset all of the fields to be ready for the next stripe.
       dictionary.clear();
       rows.clear();
@@ -849,11 +906,27 @@ class WriterImpl implements Writer, Memo
       rowIndexValueCount.add(0L);
     }
 
+    // Calls getPosition on the row output stream if dictionary encoding is used, and the
direct
+    // output stream if direct encoding is used
+    private void recordOutputPosition(OrcProto.RowIndexEntry.Builder base) throws IOException
{
+      if (useDictionaryEncoding) {
+        rowOutput.getPosition(new RowIndexPositionRecorder(base));
+      } else {
+        directStreamOutput.getPosition(new RowIndexPositionRecorder(base));
+      }
+    }
+
     @Override
     OrcProto.ColumnEncoding getEncoding() {
-      return OrcProto.ColumnEncoding.newBuilder().setKind(
-          OrcProto.ColumnEncoding.Kind.DICTIONARY).
-          setDictionarySize(dictionary.size()).build();
+      // Returns the encoding used for the last call to writeStripe
+      if (useDictionaryEncoding) {
+        return OrcProto.ColumnEncoding.newBuilder().setKind(
+            OrcProto.ColumnEncoding.Kind.DICTIONARY).
+            setDictionarySize(dictionary.size()).build();
+      } else {
+        return OrcProto.ColumnEncoding.newBuilder().setKind(
+            OrcProto.ColumnEncoding.Kind.DIRECT).build();
+      }
     }
 
     /**

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java?rev=1512893&r1=1512892&r2=1512893&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFileDump.java Sun Aug 11 08:45:19
2013
@@ -18,15 +18,8 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.rules.TestName;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -35,8 +28,14 @@ import java.io.FileReader;
 import java.io.PrintStream;
 import java.util.Random;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.junit.Before;
+import org.junit.Test;
 
 public class TestFileDump {
 
@@ -69,9 +68,6 @@ public class TestFileDump {
     }
   }
 
-  private static final String outputFilename =
-      File.separator + "orc-file-dump.out";
-
   private static void checkOutput(String expected,
                                   String actual) throws Exception {
     BufferedReader eStream =
@@ -114,8 +110,62 @@ public class TestFileDump {
     }
     writer.close();
     PrintStream origOut = System.out;
-    FileOutputStream myOut = new FileOutputStream(workDir + File.separator +
-        "orc-file-dump.out");
+    String outputFilename = File.separator + "orc-file-dump.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + outputFilename);
+
+    // replace stdout and run command
+    System.setOut(new PrintStream(myOut));
+    FileDump.main(new String[]{testFilePath.toString()});
+    System.out.flush();
+    System.setOut(origOut);
+
+
+    checkOutput(resourceDir + outputFilename, workDir + outputFilename);
+  }
+
+  // Test that if the fraction of rows that have distinct strings is greater than the configured
+  // threshold dictionary encoding is turned off.  If dictionary encoding is turned off the
length
+  // of the dictionary stream for the column will be 0 in the ORC file dump.
+  @Test
+  public void testDictionaryThreshold() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory.getReflectionObjectInspector
+          (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Configuration conf = new Configuration();
+    conf.setFloat(HiveConf.ConfVars.HIVE_ORC_DICTIONARY_KEY_SIZE_THRESHOLD.varname, 0.49f);
+    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
+        100000, CompressionKind.ZLIB, 10000, 10000);
+    Random r1 = new Random(1);
+    String[] words = new String[]{"It", "was", "the", "best", "of", "times,",
+        "it", "was", "the", "worst", "of", "times,", "it", "was", "the", "age",
+        "of", "wisdom,", "it", "was", "the", "age", "of", "foolishness,", "it",
+        "was", "the", "epoch", "of", "belief,", "it", "was", "the", "epoch",
+        "of", "incredulity,", "it", "was", "the", "season", "of", "Light,",
+        "it", "was", "the", "season", "of", "Darkness,", "it", "was", "the",
+        "spring", "of", "hope,", "it", "was", "the", "winter", "of", "despair,",
+        "we", "had", "everything", "before", "us,", "we", "had", "nothing",
+        "before", "us,", "we", "were", "all", "going", "direct", "to",
+        "Heaven,", "we", "were", "all", "going", "direct", "the", "other",
+        "way"};
+    int nextInt = 0;
+    for(int i=0; i < 21000; ++i) {
+      // Write out the same string twice, this guarantees the fraction of rows with
+      // distinct strings is 0.5
+      if (i % 2 == 0) {
+        nextInt = r1.nextInt(words.length);
+        // Append the value of i to the word, this guarantees when an index or word is repeated
+        // the actual string is unique.
+        words[nextInt] += "-" + i;
+      }
+      writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
+          words[nextInt]));
+    }
+    writer.close();
+    PrintStream origOut = System.out;
+    String outputFilename = File.separator + "orc-file-dump-dictionary-threshold.out";
+    FileOutputStream myOut = new FileOutputStream(workDir + outputFilename);
 
     // replace stdout and run command
     System.setOut(new PrintStream(myOut));

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java?rev=1512893&r1=1512892&r2=1512893&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFile.java Sun Aug 11 08:45:19
2013
@@ -1028,7 +1028,7 @@ public class TestOrcFile {
               ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
     }
     MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
-    Writer writer = new WriterImpl(fs, testFilePath, inspector,
+    Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
         50000, CompressionKind.NONE, 100, 0, memory);
     assertEquals(testFilePath, memory.path);
     for(int i=0; i < 2500; ++i) {

Added: hive/trunk/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q?rev=1512893&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/orc_dictionary_threshold.q Sun Aug 11 08:45:19
2013
@@ -0,0 +1,59 @@
+set hive.exec.orc.dictionary.key.size.threshold=-1;
+
+-- Set the threshold to -1 to guarantee dictionary encoding is turned off
+-- Tests that the data can be read back correctly when a string column is stored
+-- without dictionary encoding
+
+CREATE TABLE test_orc (key STRING)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat';
+
+INSERT OVERWRITE TABLE test_orc SELECT key FROM src limit 10;
+
+-- Test reading the column back
+
+SELECT * FROM test_orc; 
+
+ALTER TABLE test_orc SET SERDEPROPERTIES ('orc.stripe.size' = '1');
+
+CREATE TABLE src_thousand(key STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/kv1kv2.cogroup.txt' 
+     INTO TABLE src_thousand;
+
+set hive.exec.orc.dictionary.key.size.threshold=0.5;
+
+-- Add data to the table in such a way that alternate stripes encode the column
+-- differently.  Setting orc.stripe.size = 1 guarantees the stripes each have 
+-- 5000 rows.  The first stripe will have 5 * 630 distinct rows and thus be
+-- above the cutoff of 50% and will be direct encoded. The second stripe
+-- will have 5 * 1 distinct rows and thus be under the cutoff and will be
+-- dictionary encoded. The final stripe will have 630 out of 1000 and be 
+-- direct encoded.
+
+INSERT OVERWRITE TABLE test_orc
+SELECT key FROM (
+SELECT CONCAT("a", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("b", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("c", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("d", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("e", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("f", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("g", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("h", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("i", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("j", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("k", key) AS key FROM src_thousand
+) a ORDER BY key LIMIT 11000;
+
+SELECT SUM(HASH(key)) FROM test_orc;

Added: hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out?rev=1512893&view=auto
==============================================================================
--- hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out (added)
+++ hive/trunk/ql/src/test/resources/orc-file-dump-dictionary-threshold.out Sun Aug 11 08:45:19
2013
@@ -0,0 +1,78 @@
+Structure for TestFileDump.testDump.orc
+Rows: 21000
+Compression: ZLIB
+Compression size: 10000
+Type: struct<i:int,l:bigint,s:string>
+
+Statistics:
+  Column 0: count: 21000
+  Column 1: count: 21000 min: -2147390285 max: 2147453086 sum: 109128518326
+  Column 2: count: 21000 min: -9222731174895935707 max: 9222919052987871506
+  Column 3: count: 21000 min: Darkness,-230 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936
+
+Stripes:
+  Stripe: offset: 3 data: 107035 rows: 4000 tail: 65 index: 217
+    Stream: column 0 section ROW_INDEX start: 3 length 10
+    Stream: column 1 section ROW_INDEX start: 13 length 36
+    Stream: column 2 section ROW_INDEX start: 49 length 39
+    Stream: column 3 section ROW_INDEX start: 88 length 132
+    Stream: column 1 section DATA start: 220 length 18043
+    Stream: column 2 section DATA start: 18263 length 34740
+    Stream: column 3 section DATA start: 53003 length 50887
+    Stream: column 3 section LENGTH start: 103890 length 3365
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT
+    Encoding column 2: DIRECT
+    Encoding column 3: DIRECT
+  Stripe: offset: 107320 data: 289727 rows: 5000 tail: 65 index: 349
+    Stream: column 0 section ROW_INDEX start: 107320 length 10
+    Stream: column 1 section ROW_INDEX start: 107330 length 36
+    Stream: column 2 section ROW_INDEX start: 107366 length 39
+    Stream: column 3 section ROW_INDEX start: 107405 length 264
+    Stream: column 1 section DATA start: 107669 length 22581
+    Stream: column 2 section DATA start: 130250 length 43426
+    Stream: column 3 section DATA start: 173676 length 219588
+    Stream: column 3 section LENGTH start: 393264 length 4132
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT
+    Encoding column 2: DIRECT
+    Encoding column 3: DIRECT
+  Stripe: offset: 397461 data: 496162 rows: 5000 tail: 66 index: 536
+    Stream: column 0 section ROW_INDEX start: 397461 length 10
+    Stream: column 1 section ROW_INDEX start: 397471 length 36
+    Stream: column 2 section ROW_INDEX start: 397507 length 39
+    Stream: column 3 section ROW_INDEX start: 397546 length 451
+    Stream: column 1 section DATA start: 397997 length 22605
+    Stream: column 2 section DATA start: 420602 length 43444
+    Stream: column 3 section DATA start: 464046 length 425862
+    Stream: column 3 section LENGTH start: 889908 length 4251
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT
+    Encoding column 2: DIRECT
+    Encoding column 3: DIRECT
+  Stripe: offset: 894225 data: 711982 rows: 5000 tail: 65 index: 677
+    Stream: column 0 section ROW_INDEX start: 894225 length 10
+    Stream: column 1 section ROW_INDEX start: 894235 length 36
+    Stream: column 2 section ROW_INDEX start: 894271 length 39
+    Stream: column 3 section ROW_INDEX start: 894310 length 592
+    Stream: column 1 section DATA start: 894902 length 22591
+    Stream: column 2 section DATA start: 917493 length 43414
+    Stream: column 3 section DATA start: 960907 length 641580
+    Stream: column 3 section LENGTH start: 1602487 length 4397
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT
+    Encoding column 2: DIRECT
+    Encoding column 3: DIRECT
+  Stripe: offset: 1606949 data: 350645 rows: 2000 tail: 66 index: 786
+    Stream: column 0 section ROW_INDEX start: 1606949 length 10
+    Stream: column 1 section ROW_INDEX start: 1606959 length 36
+    Stream: column 2 section ROW_INDEX start: 1606995 length 39
+    Stream: column 3 section ROW_INDEX start: 1607034 length 701
+    Stream: column 1 section DATA start: 1607735 length 9027
+    Stream: column 2 section DATA start: 1616762 length 17375
+    Stream: column 3 section DATA start: 1634137 length 322259
+    Stream: column 3 section LENGTH start: 1956396 length 1984
+    Encoding column 0: DIRECT
+    Encoding column 1: DIRECT
+    Encoding column 2: DIRECT
+    Encoding column 3: DIRECT

Added: hive/trunk/ql/src/test/results/clientpositive/orc_dictionary_threshold.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/orc_dictionary_threshold.q.out?rev=1512893&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/orc_dictionary_threshold.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/orc_dictionary_threshold.q.out Sun Aug 11
08:45:19 2013
@@ -0,0 +1,158 @@
+PREHOOK: query: -- Set the threshold to -1 to guarantee dictionary encoding is turned off
+-- Tests that the data can be read back correctly when a string column is stored
+-- without dictionary encoding
+
+CREATE TABLE test_orc (key STRING)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Set the threshold to -1 to guarantee dictionary encoding is turned off
+-- Tests that the data can be read back correctly when a string column is stored
+-- without dictionary encoding
+
+CREATE TABLE test_orc (key STRING)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_orc
+PREHOOK: query: INSERT OVERWRITE TABLE test_orc SELECT key FROM src limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_orc
+POSTHOOK: query: INSERT OVERWRITE TABLE test_orc SELECT key FROM src limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_orc
+POSTHOOK: Lineage: test_orc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+PREHOOK: query: -- Test reading the column back
+
+SELECT * FROM test_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_orc
+#### A masked pattern was here ####
+POSTHOOK: query: -- Test reading the column back
+
+SELECT * FROM test_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_orc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+238
+86
+311
+27
+165
+409
+255
+278
+98
+484
+PREHOOK: query: ALTER TABLE test_orc SET SERDEPROPERTIES ('orc.stripe.size' = '1')
+PREHOOK: type: ALTERTABLE_SERDEPROPERTIES
+PREHOOK: Input: default@test_orc
+PREHOOK: Output: default@test_orc
+POSTHOOK: query: ALTER TABLE test_orc SET SERDEPROPERTIES ('orc.stripe.size' = '1')
+POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES
+POSTHOOK: Input: default@test_orc
+POSTHOOK: Output: default@test_orc
+POSTHOOK: Lineage: test_orc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+PREHOOK: query: CREATE TABLE src_thousand(key STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE src_thousand(key STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@src_thousand
+POSTHOOK: Lineage: test_orc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv1kv2.cogroup.txt' 
+     INTO TABLE src_thousand
+PREHOOK: type: LOAD
+PREHOOK: Output: default@src_thousand
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv1kv2.cogroup.txt' 
+     INTO TABLE src_thousand
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@src_thousand
+POSTHOOK: Lineage: test_orc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+PREHOOK: query: -- Add data to the table in such a way that alternate stripes encode the
column
+-- differently.  Setting orc.stripe.size = 1 guarantees the stripes each have 
+-- 5000 rows.  The first stripe will have 5 * 630 distinct rows and thus be
+-- above the cutoff of 50% and will be direct encoded. The second stripe
+-- will have 5 * 1 distinct rows and thus be under the cutoff and will be
+-- dictionary encoded. The final stripe will have 630 out of 1000 and be 
+-- direct encoded.
+
+INSERT OVERWRITE TABLE test_orc
+SELECT key FROM (
+SELECT CONCAT("a", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("b", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("c", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("d", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("e", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("f", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("g", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("h", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("i", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("j", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("k", key) AS key FROM src_thousand
+) a ORDER BY key LIMIT 11000
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_thousand
+PREHOOK: Output: default@test_orc
+POSTHOOK: query: -- Add data to the table in such a way that alternate stripes encode the
column
+-- differently.  Setting orc.stripe.size = 1 guarantees the stripes each have 
+-- 5000 rows.  The first stripe will have 5 * 630 distinct rows and thus be
+-- above the cutoff of 50% and will be direct encoded. The second stripe
+-- will have 5 * 1 distinct rows and thus be under the cutoff and will be
+-- dictionary encoded. The final stripe will have 630 out of 1000 and be 
+-- direct encoded.
+
+INSERT OVERWRITE TABLE test_orc
+SELECT key FROM (
+SELECT CONCAT("a", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("b", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("c", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("d", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("e", key) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("f", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("g", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("h", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("i", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("j", 1) AS key FROM src_thousand
+UNION ALL
+SELECT CONCAT("k", key) AS key FROM src_thousand
+) a ORDER BY key LIMIT 11000
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_thousand
+POSTHOOK: Output: default@test_orc
+POSTHOOK: Lineage: test_orc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: test_orc.key EXPRESSION [(src_thousand)src_thousand.FieldSchema(name:key,
type:string, comment:null), (src_thousand)src_thousand.FieldSchema(name:key, type:string,
comment:null), (src_thousand)src_thousand.FieldSchema(name:key, type:string, comment:null),
(src_thousand)src_thousand.FieldSchema(name:key, type:string, comment:null), (src_thousand)src_thousand.FieldSchema(name:key,
type:string, comment:null), (src_thousand)src_thousand.FieldSchema(name:key, type:string,
comment:null), ]
+PREHOOK: query: SELECT SUM(HASH(key)) FROM test_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_orc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT SUM(HASH(key)) FROM test_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_orc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default),
]
+POSTHOOK: Lineage: test_orc.key EXPRESSION [(src_thousand)src_thousand.FieldSchema(name:key,
type:string, comment:null), (src_thousand)src_thousand.FieldSchema(name:key, type:string,
comment:null), (src_thousand)src_thousand.FieldSchema(name:key, type:string, comment:null),
(src_thousand)src_thousand.FieldSchema(name:key, type:string, comment:null), (src_thousand)src_thousand.FieldSchema(name:key,
type:string, comment:null), (src_thousand)src_thousand.FieldSchema(name:key, type:string,
comment:null), ]
+1082202951192



Mime
View raw message