crunch-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mkw...@apache.org
Subject crunch git commit: CRUNCH-513 recursively browsing children directories to determine the path size
Date Thu, 07 May 2015 02:12:28 GMT
Repository: crunch
Updated Branches:
  refs/heads/master 87318ca7e -> 504a6194e


CRUNCH-513 recursively browsing children directories to determine the path size


Project: http://git-wip-us.apache.org/repos/asf/crunch/repo
Commit: http://git-wip-us.apache.org/repos/asf/crunch/commit/504a6194
Tree: http://git-wip-us.apache.org/repos/asf/crunch/tree/504a6194
Diff: http://git-wip-us.apache.org/repos/asf/crunch/diff/504a6194

Branch: refs/heads/master
Commit: 504a6194edb5e066e248e78f83e4519bb6e82c58
Parents: 87318ca
Author: Andy Nelson <andy.nelson@cerner.com>
Authored: Tue Apr 28 15:49:32 2015 -0500
Committer: Micah Whitacre <micah.whitacre@cerner.com>
Committed: Wed May 6 20:58:04 2015 -0500

----------------------------------------------------------------------
 .../apache/crunch/io/SourceTargetHelper.java    |  2 +-
 .../apache/crunch/io/hbase/HFileSourceIT.java   | 22 ++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/crunch/blob/504a6194/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
----------------------------------------------------------------------
diff --git a/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java b/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
index 7b9bea0..8fb7065 100644
--- a/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
+++ b/crunch-core/src/main/java/org/apache/crunch/io/SourceTargetHelper.java
@@ -43,7 +43,7 @@ public class SourceTargetHelper {
     for (FileStatus status : stati) {
       if (status.isDir()) {
         for (FileStatus st : fs.listStatus(status.getPath())) {
-          size += st.getLen();
+          size += getPathSize(fs, st.getPath());
         }
       } else {
         size += status.getLen();

http://git-wip-us.apache.org/repos/asf/crunch/blob/504a6194/crunch-hbase/src/it/java/org/apache/crunch/io/hbase/HFileSourceIT.java
----------------------------------------------------------------------
diff --git a/crunch-hbase/src/it/java/org/apache/crunch/io/hbase/HFileSourceIT.java b/crunch-hbase/src/it/java/org/apache/crunch/io/hbase/HFileSourceIT.java
index e82102b..6f418a5 100644
--- a/crunch-hbase/src/it/java/org/apache/crunch/io/hbase/HFileSourceIT.java
+++ b/crunch-hbase/src/it/java/org/apache/crunch/io/hbase/HFileSourceIT.java
@@ -26,11 +26,13 @@ import org.apache.crunch.MapFn;
 import org.apache.crunch.PCollection;
 import org.apache.crunch.Pipeline;
 import org.apache.crunch.PipelineResult;
+import org.apache.crunch.Source;
 import org.apache.crunch.impl.mr.MRPipeline;
 import org.apache.crunch.io.To;
 import org.apache.crunch.test.TemporaryPath;
 import org.apache.crunch.test.TemporaryPaths;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.KeyValue;
@@ -266,6 +268,26 @@ public class HFileSourceIT implements Serializable {
     assertArrayEquals(VALUE3, results.get(0).getValue(FAMILY1, QUALIFIER3));
   }
 
+  @Test
+  public void testHFileSize() throws IOException {
+    Path inputPath = tmpDir.getPath("in");
+    List<KeyValue> kvs = ImmutableList.of(
+        new KeyValue(ROW1, FAMILY1, QUALIFIER1, 1, VALUE1),
+        new KeyValue(ROW1, FAMILY1, QUALIFIER2, 2, VALUE2),
+        new KeyValue(ROW1, FAMILY1, QUALIFIER2, 3, VALUE3));
+    writeKeyValuesToHFile(inputPath, kvs);
+
+    FileSystem fs = FileSystem.get(conf);
+    FileStatus[] fileStatuses = fs.listStatus(inputPath.getParent());
+    long size = 0;
+    for(FileStatus s: fileStatuses){
+      size += s.getLen();
+    }
+
+    Source<KeyValue> hfile = FromHBase.hfile(inputPath);
+    assertTrue(hfile.getSize(conf) >= size);
+  }
+
   private List<Result> doTestScanHFiles(List<KeyValue> kvs, Scan scan) throws
IOException {
     Path inputPath = tmpDir.getPath("in");
     writeKeyValuesToHFile(inputPath, kvs);


Mime
View raw message