parquet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From b...@apache.org
Subject incubator-parquet-mr git commit: PARQUET-157: Divide by zero fix
Date Fri, 30 Jan 2015 01:32:57 GMT
Repository: incubator-parquet-mr
Updated Branches:
  refs/heads/master b4380f200 -> 32a9c6d42


PARQUET-157: Divide by zero fix

There is a divide by zero error in logging code inside the InternalParquetRecordReader. I've
been running with this fixed for a while but everytime I revert I hit the problem again. I
can't believe anyone else hasn't had this problem. I submitted a Jira ticket a few weeks ago
but didn't hear anything on the list so here's the fix.

This also avoids compiling log statements in some cases where it's unnecessary inside the
checkRead method of InternalParquetRecordReader.

Also added a .gitignore entry to clean up a build artifact.

Author: Jim Carroll <jim@dontcallme.com>

Closes #102 from jimfcarroll/divide-by-zero-fix and squashes the following commits:

423200c [Jim Carroll] Filter out parquet-scrooge build artifact from git.
22337f3 [Jim Carroll] PARQUET-157: Fix a divide by zero error when Parquet runs quickly. Also
avoid compiling log statements in some cases where it's unnecessary.


Project: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/commit/32a9c6d4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/tree/32a9c6d4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/diff/32a9c6d4

Branch: refs/heads/master
Commit: 32a9c6d42a3a48314d3f9fe2956bfc8bf49ac5d5
Parents: b4380f2
Author: Jim Carroll <jim@dontcallme.com>
Authored: Thu Jan 29 17:32:54 2015 -0800
Committer: Ryan Blue <blue@apache.org>
Committed: Thu Jan 29 17:32:54 2015 -0800

----------------------------------------------------------------------
 .gitignore                                      |  1 +
 .../hadoop/InternalParquetRecordReader.java     | 21 +++++++++++---------
 2 files changed, 13 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/32a9c6d4/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 18748a7..cd3c066 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,5 +13,6 @@ target
 *.orig
 *.rej
 dependency-reduced-pom.xml
+parquet-scrooge/.cache
 .idea/*
 target/

http://git-wip-us.apache.org/repos/asf/incubator-parquet-mr/blob/32a9c6d4/parquet-hadoop/src/main/java/parquet/hadoop/InternalParquetRecordReader.java
----------------------------------------------------------------------
diff --git a/parquet-hadoop/src/main/java/parquet/hadoop/InternalParquetRecordReader.java
b/parquet-hadoop/src/main/java/parquet/hadoop/InternalParquetRecordReader.java
index 9a0d5d0..ca601a8 100644
--- a/parquet-hadoop/src/main/java/parquet/hadoop/InternalParquetRecordReader.java
+++ b/parquet-hadoop/src/main/java/parquet/hadoop/InternalParquetRecordReader.java
@@ -102,13 +102,16 @@ class InternalParquetRecordReader<T> {
   private void checkRead() throws IOException {
     if (current == totalCountLoadedSoFar) {
       if (current != 0) {
-        long timeAssembling = System.currentTimeMillis() - startedAssemblingCurrentBlockAt;
-        totalTimeSpentProcessingRecords += timeAssembling;
-        LOG.info("Assembled and processed " + totalCountLoadedSoFar + " records from " +
columnCount + " columns in " + totalTimeSpentProcessingRecords + " ms: "+((float)totalCountLoadedSoFar
/ totalTimeSpentProcessingRecords) + " rec/ms, " + ((float)totalCountLoadedSoFar * columnCount
/ totalTimeSpentProcessingRecords) + " cell/ms");
-        long totalTime = totalTimeSpentProcessingRecords + totalTimeSpentReadingBytes;
-        long percentReading = 100 * totalTimeSpentReadingBytes / totalTime;
-        long percentProcessing = 100 * totalTimeSpentProcessingRecords / totalTime;
-        LOG.info("time spent so far " + percentReading + "% reading ("+totalTimeSpentReadingBytes+"
ms) and " + percentProcessing + "% processing ("+totalTimeSpentProcessingRecords+" ms)");
+        totalTimeSpentProcessingRecords += (System.currentTimeMillis() - startedAssemblingCurrentBlockAt);
+        if (Log.INFO) {
+            LOG.info("Assembled and processed " + totalCountLoadedSoFar + " records from
" + columnCount + " columns in " + totalTimeSpentProcessingRecords + " ms: "+((float)totalCountLoadedSoFar
/ totalTimeSpentProcessingRecords) + " rec/ms, " + ((float)totalCountLoadedSoFar * columnCount
/ totalTimeSpentProcessingRecords) + " cell/ms");
+            final long totalTime = totalTimeSpentProcessingRecords + totalTimeSpentReadingBytes;
+            if (totalTime != 0) {
+                final long percentReading = 100 * totalTimeSpentReadingBytes / totalTime;
+                final long percentProcessing = 100 * totalTimeSpentProcessingRecords / totalTime;
+                LOG.info("time spent so far " + percentReading + "% reading ("+totalTimeSpentReadingBytes+"
ms) and " + percentProcessing + "% processing ("+totalTimeSpentProcessingRecords+" ms)");
+            }
+        }
       }
 
       LOG.info("at row " + current + ". reading next block");
@@ -120,7 +123,7 @@ class InternalParquetRecordReader<T> {
       long timeSpentReading = System.currentTimeMillis() - t0;
       totalTimeSpentReadingBytes += timeSpentReading;
       BenchmarkCounter.incrementTime(timeSpentReading);
-      LOG.info("block read in memory in " + timeSpentReading + " ms. row count = " + pages.getRowCount());
+      if (Log.INFO) LOG.info("block read in memory in " + timeSpentReading + " ms. row count
= " + pages.getRowCount());
       if (Log.DEBUG) LOG.debug("initializing Record assembly with requested schema " + requestedSchema);
       MessageColumnIO columnIO = columnIOFactory.getColumnIO(requestedSchema, fileSchema,
strictTypeChecking);
       recordReader = columnIO.getRecordReader(pages, recordConverter, filter);
@@ -217,4 +220,4 @@ class InternalParquetRecordReader<T> {
     }
     return true;
   }
-}
\ No newline at end of file
+}


Mime
View raw message