orc-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From asandryh <...@git.apache.org>
Subject [GitHub] orc pull request: ORC-8. Reimplement file-metadata to use the read...
Date Tue, 19 Jan 2016 18:42:41 GMT
Github user asandryh commented on a diff in the pull request:

    https://github.com/apache/orc/pull/15#discussion_r50154368
  
    --- Diff: tools/src/FileMetadata.cc ---
    @@ -16,168 +16,162 @@
      * limitations under the License.
      */
     
    +#include <getopt.h>
     #include <iostream>
     #include <fstream>
     #include <vector>
     #include <string>
     #include <sstream>
    -#include <iomanip>
     
    -#include "wrap/orc-proto-wrapper.hh"
     #include "orc/OrcFile.hh"
     
    -using namespace orc::proto;
    -
    -uint64_t getTotalPaddingSize(const Footer& footer) {
    -  uint64_t paddedBytes = 0;
    -  StripeInformation stripe;
    -  for (int stripeIx=1; stripeIx<footer.stripes_size(); stripeIx++) {
    -      stripe = footer.stripes(stripeIx-1);
    -      uint64_t prevStripeOffset = stripe.offset();
    -      uint64_t prevStripeLen = stripe.datalength() + stripe.indexlength() +
    -        stripe.footerlength();
    -      paddedBytes += footer.stripes(stripeIx).offset() -
    -        (prevStripeOffset + prevStripeLen);
    -  };
    -  return paddedBytes;
    +void printStripeInformation(std::ostream& out,
    +                            uint64_t index,
    +                            uint64_t columns,
    +                            std::unique_ptr<orc::StripeInformation> stripe,
    +                            bool verbose) {
    +  out << "    { \"stripe\": " << index
    +      << ", \"rows\": " << stripe->getNumberOfRows() << ",\n";
    +  out << "      \"offset\": " << stripe->getOffset()
    +      << ", \"length\": " << stripe->getLength() << ",\n";
    +  out << "      \"index\": " << stripe->getIndexLength()
    +      << ", \"data\": " << stripe->getDataLength()
    +      << ", \"footer\": " << stripe->getFooterLength();
    +  if (verbose) {
    +    out << ",\n      \"encodings\": [\n";
    +    for(uint64_t col=0; col < columns; ++col) {
    +      if (col != 0) {
    +        out << ",\n";
    +      }
    +      orc::ColumnEncodingKind encoding = stripe->getColumnEncoding(col);
    +      out << "         { \"column\": " << col
    +          << ", \"encoding\": \""
    +          << columnEncodingKindToString(encoding) << "\"";
    +      if (encoding == orc::ColumnEncodingKind_DICTIONARY ||
    +          encoding == orc::ColumnEncodingKind_DICTIONARY_V2) {
    +        out << ", \"count\": " << stripe->getDictionarySize(col);
    +      }
    +      out << " }";
    +    }
    +    out << "\n      ],\n";
    +    out << "      \"streams\": [\n";
    +    for(uint64_t str = 0; str < stripe->getNumberOfStreams(); ++str) {
    +      if (str != 0) {
    +        out << ",\n";
    +      }
    +      ORC_UNIQUE_PTR<orc::StreamInformation> stream =
    +        stripe->getStreamInformation(str);
    +      out << "        { \"id\": " << str
    +          << ", \"column\": " << stream->getColumnId()
    +          << ", \"kind\": \"" << streamKindToString(stream->getKind())
    +          << "\",\n";
    +      out << "          \"offset\": " << stream->getOffset()
    --- End diff --
    
    This is may be nit-picking, but I find the output more readable if lines 67-68 are replaced
with:
    << ", \"offset\": " << stream->getOffset()


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message