hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject svn commit: r1650707 - in /hive/branches/llap: llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/ llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/ ql/src/java/org/apache/ha...
Date Sat, 10 Jan 2015 01:19:32 GMT
Author: prasanthj
Date: Sat Jan 10 01:19:32 2015
New Revision: 1650707

URL: http://svn.apache.org/r1650707
Log:
experimental version of orc metadata cache

Added:
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java
Removed:
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcMetadataCache.java
Modified:
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java

Modified: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java?rev=1650707&r1=1650706&r2=1650707&view=diff
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java
(original)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataProducer.java
Sat Jan 10 01:19:32 2015
@@ -20,10 +20,7 @@ package org.apache.hadoop.hive.llap.io.e
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -38,19 +35,21 @@ import org.apache.hadoop.hive.llap.io.ap
 import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl;
 import org.apache.hadoop.hive.llap.io.api.orc.OrcBatchKey;
 import org.apache.hadoop.hive.llap.io.api.orc.OrcCacheKey;
+import org.apache.hadoop.hive.llap.io.metadata.OrcMetadataCache;
 import org.apache.hadoop.hive.ql.io.orc.OrcFile;
 import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
 import org.apache.hadoop.hive.ql.io.orc.Reader;
 import org.apache.hadoop.hive.ql.io.orc.RecordReader;
 import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
-import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputSplit;
 
 public class OrcEncodedDataProducer implements EncodedDataProducer<OrcBatchKey> {
   private FileSystem cachedFs = null;
-  private final OrcMetadataCache metadataCache = new OrcMetadataCache();
+  private Configuration conf;
+  private OrcMetadataCache metadataCache;
   private final Allocator allocator;
   private final Cache<OrcCacheKey> cache;
 
@@ -101,14 +100,8 @@ public class OrcEncodedDataProducer impl
       orcReader = null;
       if (stripes == null || types == null) {
         orcReader = createOrcReader(split);
-        if (stripes == null) {
-          stripes = orcReader.getStripes();
-          metadataCache.cacheStripes(internedFilePath, stripes);
-        }
-        if (types == null) {
-          types = orcReader.getTypes();
-          metadataCache.cacheTypes(internedFilePath, types);
-        }
+        stripes = metadataCache.getStripes(internedFilePath);
+        types = metadataCache.getTypes(internedFilePath);
       }
 
       if (columnIds == null) {
@@ -298,10 +291,12 @@ public class OrcEncodedDataProducer impl
   private Reader createOrcReader(FileSplit fileSplit) throws IOException {
     FileSystem fs = cachedFs;
     Path path = fileSplit.getPath();
-    Configuration conf = new Configuration();
     if ("pfile".equals(path.toUri().getScheme())) {
       fs = path.getFileSystem(conf); // Cannot use cached FS due to hive tests' proxy FS.
     }
+    if (metadataCache == null) {
+      metadataCache = new OrcMetadataCache(cachedFs, path, conf);
+    }
     return OrcFile.createReader(path, OrcFile.readerOptions(conf).filesystem(fs));
   }
 
@@ -315,6 +310,8 @@ public class OrcEncodedDataProducer impl
     this.cachedFs = FileSystem.get(conf);
     this.cache = cache;
     this.allocator = allocator;
+    this.conf = conf;
+    this.metadataCache = null;
   }
 
   @Override

Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java
(added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/CompressionBuffer.java
Sat Jan 10 01:19:32 2015
@@ -0,0 +1,119 @@
+/**
+ *   Copyright 2014 Prasanth Jayachandran
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.metadata;
+
+/**
+ *
+ */
+public class CompressionBuffer {
+  // stripe position within file
+  private int stripePos;
+
+  // row group position within stripe
+  private int rowGroupPos;
+
+  // start offset of compression buffer corresponding to above row group
+  private long startOffset;
+
+  // length of compression buffer (compressed or uncompressed length)
+  private long length;
+
+  // offset within compression buffer where the row group begins
+  private int uncompressedOffset;
+
+  // if uncompressedOffset is in a middle of integer encoding runs (RLE, Delta etc.), consume
+  // these many values to reach beginning of the row group
+  private int consume;
+
+  // For run length byte encoding, record the number of bits within current byte to consume
to
+  // reach beginning of the row group. This is required for IS_PRESENT stream.
+  private int consumeBits;
+
+  // if last row group is set to true, it means the above row group spans compression buffer
+  // boundary. Length will span two compression buffers.
+  private boolean lastRowGroup;
+
+  private CompressionBuffer(int sp, int rgp, long s, long len, int u, int c, int cb, boolean
last) {
+    this.stripePos = sp;
+    this.rowGroupPos = rgp;
+    this.startOffset = s;
+    this.length = len;
+    this.uncompressedOffset = u;
+    this.consume = c;
+    this.consumeBits = cb;
+    this.lastRowGroup = last;
+  }
+
+  private static class Builder {
+    private int stripePos;
+    private int rowGroupPos;
+    private long startOffset;
+    private long length;
+    private int offsetWithinBuffer;
+    private int consume;
+    private int consumeBits;
+    private boolean lastRowGroup;
+
+    public Builder setStripePosition(int stripePos) {
+      this.stripePos = stripePos;
+      return this;
+    }
+
+    public Builder setRowGroupPosition(int rowGroupPos) {
+      this.rowGroupPos = rowGroupPos;
+      return this;
+    }
+
+    public Builder setStartOffset(long startOffset) {
+      this.startOffset = startOffset;
+      return this;
+    }
+
+    public Builder setLength(long length) {
+      this.length = length;
+      return this;
+    }
+
+    public Builder setOffsetWithInBuffer(int offsetWithInBuffer) {
+      this.offsetWithinBuffer = offsetWithInBuffer;
+      return this;
+    }
+
+    public Builder consumeRuns(int consume) {
+      this.consume = consume;
+      return this;
+    }
+
+    public Builder consumeBits(int consumeBits) {
+      this.consumeBits = consumeBits;
+      return this;
+    }
+
+    public Builder setLastRowGroup(boolean lastRowGroup) {
+      this.lastRowGroup = lastRowGroup;
+      return this;
+    }
+
+    public CompressionBuffer build() {
+      return new CompressionBuffer(stripePos, rowGroupPos, startOffset, length, offsetWithinBuffer,
+          consume, consumeBits, lastRowGroup);
+    }
+  }
+
+  public static Builder builder() {
+    return new Builder();
+  }
+}

Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java
(added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadata.java
Sat Jan 10 01:19:32 2015
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.llap.io.metadata;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
+
+public class OrcMetadata {
+  private CompressionKind compressionKind;
+  private int compressionBufferSize;
+  private List<OrcProto.Type> types;
+  private List<StripeInformation> stripes;
+  private Map<Integer, List<OrcProto.ColumnEncoding>> stripeToColEncodings;
+  private Map<Integer, OrcProto.RowIndex[]> stripeToRowIndexEntries;
+
+  public Map<Integer, List<OrcProto.ColumnEncoding>> getStripeToColEncodings()
{
+    return stripeToColEncodings;
+  }
+
+  public void setStripeToColEncodings(
+      Map<Integer, List<OrcProto.ColumnEncoding>> stripeToColEncodings) {
+    this.stripeToColEncodings = stripeToColEncodings;
+  }
+
+  public Map<Integer, OrcProto.RowIndex[]> getStripeToRowIndexEntries() {
+    return stripeToRowIndexEntries;
+  }
+
+  public void setStripeToRowIndexEntries(
+      Map<Integer, OrcProto.RowIndex[]> stripeToRowIndexEntries) {
+    this.stripeToRowIndexEntries = stripeToRowIndexEntries;
+  }
+
+  public List<StripeInformation> getStripes() {
+    return stripes;
+  }
+
+  public void setStripes(List<StripeInformation> stripes) {
+    this.stripes = stripes;
+  }
+
+  public CompressionKind getCompressionKind() {
+    return compressionKind;
+  }
+
+  public void setCompressionKind(CompressionKind compressionKind) {
+    this.compressionKind = compressionKind;
+  }
+
+  public int getCompressionBufferSize() {
+    return compressionBufferSize;
+  }
+
+  public void setCompressionBufferSize(int compressionBufferSize) {
+    this.compressionBufferSize = compressionBufferSize;
+  }
+
+  public List<OrcProto.Type> getTypes() {
+    return types;
+  }
+
+  public void setTypes(List<OrcProto.Type> types) {
+    this.types = types;
+  }
+}

Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
(added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
Sat Jan 10 01:19:32 2015
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.llap.io.metadata;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
+
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
+/**
+ * ORC-specific metadata cache.
+ */
+public class OrcMetadataCache {
+  private static final int DEFAULT_CACHE_ACCESS_CONCURRENCY = 10;
+  private static final int DEFAULT_MAX_CACHE_ENTRIES = 100;
+  private static Cache<String, OrcMetadata> METADATA;
+
+  static {
+    METADATA = CacheBuilder.newBuilder()
+        .concurrencyLevel(DEFAULT_CACHE_ACCESS_CONCURRENCY)
+        .maximumSize(DEFAULT_MAX_CACHE_ENTRIES)
+        .build();
+  }
+
+  private Path path;
+  private OrcMetadataLoader loader;
+
+  public OrcMetadataCache(FileSystem fs, Path path, Configuration conf) {
+    this.path = path;
+    this.loader = new OrcMetadataLoader(fs, path, conf);
+  }
+
+  public CompressionKind getCompression(String pathString) throws IOException {
+    try {
+      return METADATA.get(pathString, loader).getCompressionKind();
+    } catch (ExecutionException e) {
+      throw new IOException("Unable to load orc metadata for " + path.toString(), e);
+    }
+  }
+
+  public int getCompressionBufferSize(String pathString) throws IOException {
+    try {
+      return METADATA.get(pathString, loader).getCompressionBufferSize();
+    } catch (ExecutionException e) {
+      throw new IOException("Unable to load orc metadata for " + path.toString(), e);
+    }
+  }
+
+  public List<OrcProto.Type> getTypes(String pathString) throws IOException {
+    try {
+      return METADATA.get(pathString, loader).getTypes();
+    } catch (ExecutionException e) {
+      throw new IOException("Unable to load orc metadata for " + path.toString(), e);
+    }
+  }
+
+  public List<StripeInformation> getStripes(String pathString) throws IOException {
+    try {
+      return METADATA.get(pathString, loader).getStripes();
+    } catch (ExecutionException e) {
+      throw new IOException("Unable to load orc metadata for " + path.toString(), e);
+    }
+  }
+
+  //  public boolean[] getIncludedRowGroups(String pathString, SearchArgument sarg, int stripeIdx)
throws IOException {
+  //    try {
+  //      return METADATA.get(pathString, loader).getStripeToRowIndexEntries();
+  //    } catch (ExecutionException e) {
+  //      throw new IOException("Unable to load orc metadata for " + path.toString(), e);
+  //    }
+  //  }
+}

Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java
(added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataLoader.java
Sat Jan 10 01:19:32 2015
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.llap.io.metadata;
+
+import static org.apache.hadoop.hive.ql.io.orc.OrcFile.readerOptions;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Callable;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.llap.io.orc.OrcFile;
+import org.apache.hadoop.hive.llap.io.orc.Reader;
+import org.apache.hadoop.hive.llap.io.orc.RecordReader;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+import org.apache.hadoop.hive.ql.io.orc.StripeInformation;
+
+public class OrcMetadataLoader implements Callable<OrcMetadata> {
+  private FileSystem fs;
+  private Path path;
+  private Configuration conf;
+
+  public OrcMetadataLoader(FileSystem fs, Path path, Configuration conf) {
+    this.fs = fs;
+    this.path = path;
+    this.conf = conf;
+  }
+
+  @Override
+  public OrcMetadata call() throws Exception {
+    Reader reader = OrcFile.createLLAPReader(path, readerOptions(conf).filesystem(fs));
+    OrcMetadata orcMetadata = new OrcMetadata();
+    orcMetadata.setCompressionKind(reader.getCompression());
+    orcMetadata.setCompressionBufferSize(reader.getCompressionSize());
+    List<StripeInformation> stripes = reader.getStripes();
+    orcMetadata.setStripes(stripes);
+    Map<Integer, List<OrcProto.ColumnEncoding>> stripeColEnc = new HashMap<Integer,
List<OrcProto.ColumnEncoding>>();
+    Map<Integer, OrcProto.RowIndex[]> stripeRowIndices = new HashMap<Integer, OrcProto.RowIndex[]>();
+    RecordReader rows = reader.rows();
+    for (int i = 0; i < stripes.size(); i++) {
+      stripeColEnc.put(i, rows.getColumnEncodings(i));
+      stripeRowIndices.put(i, rows.getRowIndexEntries(i));
+    }
+    orcMetadata.setStripeToColEncodings(stripeColEnc);
+    orcMetadata.setStripeToRowIndexEntries(stripeRowIndices);
+    return orcMetadata;
+  }
+}

Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java
(added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPReaderImpl.java
Sat Jan 10 01:19:32 2015
@@ -0,0 +1,57 @@
+/**
+ *   Copyright 2014 Prasanth Jayachandran
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.orc;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.llap.io.metadata.CompressionBuffer;
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.hadoop.hive.ql.io.orc.OrcFile;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+import org.apache.hadoop.hive.ql.io.orc.ReaderImpl;
+
+/**
+ *
+ */
+public class LLAPReaderImpl extends ReaderImpl implements Reader {
+
+  public LLAPReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
+    super(path, options);
+  }
+
+  @Override
+  public RecordReader rows() throws IOException {
+    Reader.Options options = new Options();
+    boolean[] include = options.getInclude();
+    // if included columns is null, then include all columns
+    if (include == null) {
+      include = new boolean[footer.getTypesCount()];
+      Arrays.fill(include, true);
+      options.include(include);
+    }
+    return new LLAPRecordReaderImpl(this.getStripes(), fileSystem, path,
+        options, footer.getTypesList(), codec, bufferSize,
+        footer.getRowIndexStride(), conf);
+  }
+
+  @Override
+  public RecordReader rows(CompressionBuffer buffer, CompressionKind kind,
+      OrcProto.ColumnEncoding encoding) throws IOException {
+    return null;
+  }
+}

Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java
(added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/LLAPRecordReaderImpl.java
Sat Jan 10 01:19:32 2015
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.orc;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.llap.Consumer;
+import org.apache.hadoop.hive.llap.io.api.EncodedColumn;
+import org.apache.hadoop.hive.llap.io.api.cache.Allocator;
+import org.apache.hadoop.hive.llap.io.api.orc.OrcBatchKey;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.orc.*;
+import org.apache.hadoop.hive.ql.io.orc.Reader;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+
+/**
+ *
+ */
+public class LLAPRecordReaderImpl extends RecordReaderImpl implements RecordReader {
+  LLAPRecordReaderImpl(List<StripeInformation> stripes,
+      FileSystem fileSystem, Path path,
+      Reader.Options options,
+      List<OrcProto.Type> types, CompressionCodec codec,
+      int bufferSize, long strideRate, Configuration conf) throws IOException {
+    super(stripes, fileSystem, path, options, types, codec, bufferSize, strideRate, conf);
+  }
+
+  @Override
+  public OrcProto.RowIndex[] getRowIndexEntries(int stripeIdx) throws IOException {
+    return readRowIndex(stripeIdx);
+  }
+
+  @Override
+  public List<OrcProto.ColumnEncoding> getColumnEncodings(int stripeIdx) throws IOException
{
+    StripeInformation si = stripes.get(stripeIdx);
+    OrcProto.StripeFooter sf = readStripeFooter(si);
+    return sf.getColumnsList();
+  }
+
+  @Override
+  public boolean[] getIncludedRowGroups(int stripeIdx) throws IOException {
+    currentStripe = stripeIdx;
+    return pickRowGroups();
+  }
+
+  @Override
+  public boolean hasNext() throws IOException {
+    return false;
+  }
+
+  @Override
+  public Object next(Object previous) throws IOException {
+    return null;
+  }
+
+  @Override
+  public VectorizedRowBatch nextBatch(VectorizedRowBatch previousBatch) throws IOException
{
+    return null;
+  }
+
+  @Override
+  public long getRowNumber() {
+    return 0;
+  }
+
+  @Override
+  public float getProgress() {
+    return 0;
+  }
+
+  @Override
+  public void close() throws IOException {
+
+  }
+
+  @Override
+  public void seekToRow(long rowCount) throws IOException {
+
+  }
+
+  @Override
+  public void readEncodedColumns(long[][] colRgs, int rgCount, SearchArgument sarg,
+      Consumer<EncodedColumn<OrcBatchKey>> consumer, Allocator allocator) {
+
+  }
+}

Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java
(added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/OrcFile.java
Sat Jan 10 01:19:32 2015
@@ -0,0 +1,31 @@
+/**
+ *   Copyright 2014 Prasanth Jayachandran
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.orc;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+
+/**
+ *
+ */
+public class OrcFile {
+
+  public static Reader createLLAPReader(Path path,
+      org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions options) throws IOException
{
+    return new LLAPReaderImpl(path, options);
+  }
+}

Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java
(added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/Reader.java
Sat Jan 10 01:19:32 2015
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.orc;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hive.llap.io.metadata.CompressionBuffer;
+import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+
+/**
+ *
+ */
+public interface Reader extends org.apache.hadoop.hive.ql.io.orc.Reader {
+
+  public RecordReader rows() throws IOException;
+
+  /**
+   * Read rows out of given compression buffer.
+   *
+   * @param buffer   - compression buffer
+   * @param kind     - compression kind
+   * @param encoding - column encoding
+   * @return - record reader to read rows out of it
+   * @throws IOException
+   */
+  public RecordReader rows(CompressionBuffer buffer, CompressionKind kind,
+      OrcProto.ColumnEncoding encoding) throws IOException;
+}

Added: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java?rev=1650707&view=auto
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java
(added)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/orc/RecordReader.java
Sat Jan 10 01:19:32 2015
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.llap.io.orc;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.io.orc.OrcProto;
+
+/**
+ *
+ */
+public interface RecordReader extends org.apache.hadoop.hive.ql.io.orc.RecordReader {
+  /**
+   * Return all row index entries for the specified stripe index.
+   *
+   * @param stripeIdx - stripe index within orc file
+   * @return - all row index entries
+   */
+  OrcProto.RowIndex[] getRowIndexEntries(int stripeIdx) throws IOException;
+
+  /**
+   * Return column encodings of all columns for the specified stripe index.
+   *
+   * @param stripeIdx - stripe index within orc file
+   * @return - column encodings of all columns
+   */
+  List<OrcProto.ColumnEncoding> getColumnEncodings(int stripeIdx) throws IOException;
+
+  /**
+   * Return the row groups that satisfy the SARG condition for the specified stripe index.
+   *
+   * @param stripeIdx - stripe index within orc file
+   * @return - row groups qualifying the SARG
+   */
+  boolean[] getIncludedRowGroups(int stripeIdx) throws IOException;
+}

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java?rev=1650707&r1=1650706&r2=1650707&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
(original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/CompressionCodec.java
Sat Jan 10 01:19:32 2015
@@ -23,7 +23,7 @@ import java.util.EnumSet;
 
 import javax.annotation.Nullable;
 
-interface CompressionCodec {
+public interface CompressionCodec {
 
   public enum Modifier {
     /* speed/compression tradeoffs */

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1650707&r1=1650706&r2=1650707&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Sat Jan 10
01:19:32 2015
@@ -18,6 +18,14 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_PADDING;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BLOCK_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_BUFFER_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_COMPRESS;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_ROW_INDEX_STRIDE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_DEFAULT_STRIPE_SIZE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_ORC_WRITE_FORMAT;
+
 import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -26,8 +34,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 
-import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.*;
-
 /**
  * Contains factory methods to read or write ORC files.
  */
@@ -184,7 +190,7 @@ public final class OrcFile {
     private ReaderImpl.FileMetaInfo fileMetaInfo;
     private long maxLength = Long.MAX_VALUE;
 
-    ReaderOptions(Configuration conf) {
+    public ReaderOptions(Configuration conf) {
       this.conf = conf;
     }
     ReaderOptions fileMetaInfo(ReaderImpl.FileMetaInfo info) {

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1650707&r1=1650706&r2=1650707&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Sat Jan
10 01:19:32 2015
@@ -44,23 +44,23 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 import com.google.protobuf.CodedInputStream;
 
-final class ReaderImpl implements Reader {
+public class ReaderImpl implements Reader {
 
   private static final Log LOG = LogFactory.getLog(ReaderImpl.class);
 
   private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
 
-  private final FileSystem fileSystem;
-  private final Path path;
-  private final CompressionKind compressionKind;
-  private final CompressionCodec codec;
-  private final int bufferSize;
+  protected final FileSystem fileSystem;
+  protected final Path path;
+  protected final CompressionKind compressionKind;
+  protected final CompressionCodec codec;
+  protected final int bufferSize;
   private OrcProto.Metadata metadata = null;
   private final int metadataSize;
-  private final OrcProto.Footer footer;
+  protected final OrcProto.Footer footer;
   private final ObjectInspector inspector;
   private long deserializedSize = -1;
-  private final Configuration conf;
+  protected final Configuration conf;
   private final List<Integer> versionList;
   private final OrcFile.WriterVersion writerVersion;
 
@@ -295,7 +295,7 @@ final class ReaderImpl implements Reader
    * @param options options for reading
    * @throws IOException
    */
-  ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
+  public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException {
     FileSystem fs = options.getFilesystem();
     if (fs == null) {
       fs = path.getFileSystem(options.getConfiguration());

Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1650707&r1=1650706&r2=1650707&view=diff
==============================================================================
--- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
(original)
+++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
Sat Jan 10 01:19:32 2015
@@ -55,7 +55,6 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
-import org.apache.hadoop.hive.ql.io.orc.LlapUtils.PresentStreamReadResult;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
@@ -81,14 +80,14 @@ import org.apache.hadoop.io.Text;
 
 import com.google.common.collect.ComparisonChain;
 
-class RecordReaderImpl implements RecordReader {
+public class RecordReaderImpl implements RecordReader {
 
   private static final Log LOG = LogFactory.getLog(RecordReaderImpl.class);
   private static final boolean isLogTraceEnabled = LOG.isTraceEnabled();
 
   private final FSDataInputStream file;
   private final long firstRow;
-  private final List<StripeInformation> stripes =
+  protected final List<StripeInformation> stripes =
     new ArrayList<StripeInformation>();
   private OrcProto.StripeFooter stripeFooter;
   private final long totalRowCount;
@@ -98,7 +97,7 @@ class RecordReaderImpl implements Record
   private final boolean[] included;
   private final long rowIndexStride;
   private long rowInStripe = 0;
-  private int currentStripe = -1;
+  protected int currentStripe = -1;
   private long rowBaseInStripe = 0;
   private long rowCountInStripe = 0;
   private final Map<StreamName, InStream> streams =
@@ -236,16 +235,16 @@ class RecordReaderImpl implements Record
     return result;
   }
 
-  RecordReaderImpl(List<StripeInformation> stripes,
-                   FileSystem fileSystem,
-                   Path path,
-                   Reader.Options options,
-                   List<OrcProto.Type> types,
-                   CompressionCodec codec,
-                   int bufferSize,
-                   long strideRate,
-                   Configuration conf
-                  ) throws IOException {
+  protected RecordReaderImpl(List<StripeInformation> stripes,
+      FileSystem fileSystem,
+      Path path,
+      Reader.Options options,
+      List<OrcProto.Type> types,
+      CompressionCodec codec,
+      int bufferSize,
+      long strideRate,
+      Configuration conf
+  ) throws IOException {
     this.file = fileSystem.open(path);
     this.codec = codec;
     this.types = types;
@@ -2271,7 +2270,7 @@ class RecordReaderImpl implements Record
     }
   }
 
-  OrcProto.StripeFooter readStripeFooter(StripeInformation stripe
+  protected OrcProto.StripeFooter readStripeFooter(StripeInformation stripe
                                          ) throws IOException {
     long offset = stripe.getOffset() + stripe.getIndexLength() +
         stripe.getDataLength();
@@ -2568,7 +2567,7 @@ class RecordReaderImpl implements Record
    *    row groups must be read.
    * @throws IOException
    */
-  private boolean[] pickRowGroups() throws IOException {
+  protected boolean[] pickRowGroups() throws IOException {
     // if we don't have a sarg or indexes, we read everything
     if (sarg == null || rowIndexStride == 0) {
       return null;
@@ -3236,7 +3235,7 @@ class RecordReaderImpl implements Record
     throw new IllegalArgumentException("Seek after the end of reader range");
   }
 
-  OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException {
+  protected OrcProto.RowIndex[] readRowIndex(int stripeIndex) throws IOException {
     long offset = stripes.get(stripeIndex).getOffset();
     OrcProto.StripeFooter stripeFooter;
     OrcProto.RowIndex[] indexes;



Mime
View raw message