hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r758232 - in /hadoop/core/trunk: ./ src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/ src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/
Date Wed, 25 Mar 2009 11:44:14 GMT
Author: cutting
Date: Wed Mar 25 11:44:09 2009
New Revision: 758232

URL: http://svn.apache.org/viewvc?rev=758232&view=rev
Log:
HADOOP-1491.  In contrib/index, better control memory usage.  Contributed by Ning Li.

Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java
    hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java
    hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java
    hadoop/core/trunk/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=758232&r1=758231&r2=758232&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Wed Mar 25 11:44:09 2009
@@ -181,6 +181,9 @@
     HADOOP-4788. Set Fair scheduler to assign both a map and a reduce on each
     heartbeat by default. (matei)
 
+    HADOOP-5491.  In contrib/index, better control memory usage.
+    (Ning Li via cutting)
+
   OPTIMIZATIONS
 
   BUG FIXES

Modified: hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java?rev=758232&r1=758231&r2=758232&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java
(original)
+++ hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java
Wed Mar 25 11:44:09 2009
@@ -40,6 +40,8 @@
   static final Log LOG = LogFactory.getLog(IndexUpdateCombiner.class);
 
   IndexUpdateConfiguration iconf;
+  long maxSizeInBytes;
+  long nearMaxSizeInBytes;
 
   /* (non-Javadoc)
    * @see org.apache.hadoop.mapred.Reducer#reduce(java.lang.Object, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector,
org.apache.hadoop.mapred.Reporter)
@@ -48,17 +50,47 @@
       OutputCollector<Shard, IntermediateForm> output, Reporter reporter)
       throws IOException {
 
-    LOG.info("Construct a form writer for " + key);
-    IntermediateForm form = new IntermediateForm();
-    form.configure(iconf);
+    String message = key.toString();
+    IntermediateForm form = null;
+
     while (values.hasNext()) {
       IntermediateForm singleDocForm = values.next();
-      form.process(singleDocForm);
+      long formSize = form == null ? 0 : form.totalSizeInBytes();
+      long singleDocFormSize = singleDocForm.totalSizeInBytes();
+
+      if (form != null && formSize + singleDocFormSize > maxSizeInBytes) {
+        closeForm(form, message);
+        output.collect(key, form);
+        form = null;
+      }
+
+      if (form == null && singleDocFormSize >= nearMaxSizeInBytes) {
+        output.collect(key, singleDocForm);
+      } else {
+        if (form == null) {
+          form = createForm(message);
+        }
+        form.process(singleDocForm);
+      }
     }
-    form.closeWriter();
-    LOG.info("Closed the form writer for " + key + ", form = " + form);
 
-    output.collect(key, form);
+    if (form != null) {
+      closeForm(form, message);
+      output.collect(key, form);
+    }
+  }
+
+  private IntermediateForm createForm(String message) throws IOException {
+    LOG.info("Construct a form writer for " + message);
+    IntermediateForm form = new IntermediateForm();
+    form.configure(iconf);
+    return form;
+  }
+
+  private void closeForm(IntermediateForm form, String message)
+      throws IOException {
+    form.closeWriter();
+    LOG.info("Closed the form writer for " + message + ", form = " + form);
   }
 
   /* (non-Javadoc)
@@ -66,6 +98,8 @@
    */
   public void configure(JobConf job) {
     iconf = new IndexUpdateConfiguration(job);
+    maxSizeInBytes = iconf.getMaxRAMSizeInBytes();
+    nearMaxSizeInBytes = maxSizeInBytes - (maxSizeInBytes >>> 3); // 7/8 of max
   }
 
   /* (non-Javadoc)

Modified: hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java?rev=758232&r1=758231&r2=758232&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java
(original)
+++ hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java
Wed Mar 25 11:44:09 2009
@@ -235,4 +235,20 @@
     conf.setBoolean("sea.use.compound.file", useCompoundFile);
   }
 
+  /**
+   * Get the max ram index size in bytes. The default is 50M.
+   * @return the max ram index size in bytes
+   */
+  public long getMaxRAMSizeInBytes() {
+    return conf.getLong("sea.max.ramsize.bytes", 50L << 20);
+  }
+
+  /**
+   * Set the max ram index size in bytes.
+   * @param b  the max ram index size in bytes
+   */
+  public void setMaxRAMSizeInBytes(long b) {
+    conf.setLong("sea.max.ramsize.bytes", b);
+  }
+
 }

Modified: hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java?rev=758232&r1=758231&r2=758232&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java
(original)
+++ hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java
Wed Mar 25 11:44:09 2009
@@ -151,6 +151,19 @@
     }
   }
 
+  /**
+   * The total size of files in the directory and ram used by the index writer.
+   * It does not include memory used by the delete list.
+   * @return the total size in bytes
+   */
+  public long totalSizeInBytes() throws IOException {
+    long size = dir.sizeInBytes();
+    if (writer != null) {
+      size += writer.ramSizeInBytes();
+    }
+    return size;
+  }
+
   /* (non-Javadoc)
    * @see java.lang.Object#toString()
    */

Modified: hadoop/core/trunk/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java?rev=758232&r1=758231&r2=758232&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java
(original)
+++ hadoop/core/trunk/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java
Wed Mar 25 11:44:09 2009
@@ -151,6 +151,7 @@
     iconf.setIndexMaxFieldLength(2);
     iconf.setIndexUseCompoundFile(true);
     iconf.setIndexMaxNumSegments(1);
+    iconf.setMaxRAMSizeInBytes(20480);
 
     long versionNumber = -1;
     long generation = -1;



Mime
View raw message