chukwa-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From asrab...@apache.org
Subject svn commit: r799718 - in /hadoop/chukwa/trunk: ./ src/java/org/apache/hadoop/chukwa/extraction/archive/ src/test/org/apache/hadoop/chukwa/extraction/archive/
Date Fri, 31 Jul 2009 19:46:18 GMT
Author: asrabkin
Date: Fri Jul 31 19:46:17 2009
New Revision: 799718

URL: http://svn.apache.org/viewvc?rev=799718&view=rev
Log:
CHUKWA-362.  Archiver group-by-cluster conf name shouldn't be hardcoded

Modified:
    hadoop/chukwa/trunk/CHANGES.txt
    hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveBuilder.java
    hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveDataTypeOutputFormat.java
    hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveDataTypePartitioner.java
    hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/SinkArchiver.java
    hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/extraction/archive/TestArchive.java

Modified: hadoop/chukwa/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/CHANGES.txt?rev=799718&r1=799717&r2=799718&view=diff
==============================================================================
--- hadoop/chukwa/trunk/CHANGES.txt (original)
+++ hadoop/chukwa/trunk/CHANGES.txt Fri Jul 31 19:46:17 2009
@@ -44,6 +44,8 @@
 
   IMPROVEMENTS
 
+    CHUKWA-362.  Archiver group-by-cluster conf name shouldn't be hardcoded. (asrabkin)
+
     CHUKWA-355.  Remove obsolete datacollection.protocol package. (asrabkin)
 
     CHUKWA-270. ChukwaAgentController no longer uses stdout. (Rushin Barot via asrabkin)

Modified: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveBuilder.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveBuilder.java?rev=799718&r1=799717&r2=799718&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveBuilder.java
(original)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveBuilder.java
Fri Jul 31 19:46:17 2009
@@ -68,6 +68,12 @@
         throws IOException {
       ChunkImpl i = vals.next();
       out.collect(key, i);
+      int dups = 0;
+      while(vals.hasNext()) {
+        vals.next();
+        dups ++;
+      }
+      r.incrCounter("app", "duplicate chunks", dups);
     }
   
   }

Modified: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveDataTypeOutputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveDataTypeOutputFormat.java?rev=799718&r1=799717&r2=799718&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveDataTypeOutputFormat.java
(original)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveDataTypeOutputFormat.java
Fri Jul 31 19:46:17 2009
@@ -33,6 +33,8 @@
 
 public class ChukwaArchiveDataTypeOutputFormat extends
     MultipleSequenceFileOutputFormat<ChukwaArchiveKey, ChunkImpl> {
+  
+  static final String GROUP_BY_CLUSTER_OPTION_NAME = "archive.groupByClusterName";
   static Logger log = Logger.getLogger(ChukwaArchiveDataTypeOutputFormat.class);
   SimpleDateFormat sdf = new SimpleDateFormat("yyyy_MM_dd");
   boolean useClusterID;
@@ -41,8 +43,8 @@
       JobConf job, String name, Progressable arg3) 
   throws java.io.IOException{
 
-    log.info("archive.addClusterName is " + job.get("archive.groupByClusterName"));
-    useClusterID = "true".equals(job.get("archive.groupByClusterName"));
+    log.info(GROUP_BY_CLUSTER_OPTION_NAME + " is " + job.get(GROUP_BY_CLUSTER_OPTION_NAME));
+    useClusterID = "true".equals(job.get(GROUP_BY_CLUSTER_OPTION_NAME));
 
     return super.getRecordWriter(fs, job, name, arg3);
   }

Modified: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveDataTypePartitioner.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveDataTypePartitioner.java?rev=799718&r1=799717&r2=799718&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveDataTypePartitioner.java
(original)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/ChukwaArchiveDataTypePartitioner.java
Fri Jul 31 19:46:17 2009
@@ -32,7 +32,8 @@
 
   boolean useClusterID = false;
   public void configure(JobConf conf) {
-    useClusterID = "true".equals(conf.get("archive.groupByClusterName"));
+    useClusterID = "true".equals(conf.get(ChukwaArchiveDataTypeOutputFormat.
+        GROUP_BY_CLUSTER_OPTION_NAME));
   }
 
   public int getPartition(ChukwaArchiveKey key, ChunkImpl chunk,
@@ -43,10 +44,11 @@
       return ((chunk.getDataType() + "_" + clusterID + "_" + sdf.format(key.getTimePartition()))
           .hashCode() & Integer.MAX_VALUE)
           % numReduceTasks;
-    } else
+    } else {
       return ((chunk.getDataType() + "_" + sdf.format(key.getTimePartition()))
         .hashCode() & Integer.MAX_VALUE)
         % numReduceTasks;
+    }
   }
 
 }

Modified: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/SinkArchiver.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/SinkArchiver.java?rev=799718&r1=799717&r2=799718&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/SinkArchiver.java
(original)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/extraction/archive/SinkArchiver.java
Fri Jul 31 19:46:17 2009
@@ -52,6 +52,8 @@
   public static void main(String[] args) {
     try {
       Configuration conf = new ChukwaConfiguration();
+      if(conf.get(ChukwaArchiveDataTypeOutputFormat.GROUP_BY_CLUSTER_OPTION_NAME) == null
)
+        conf.set(ChukwaArchiveDataTypeOutputFormat.GROUP_BY_CLUSTER_OPTION_NAME, "true");
       FileSystem fs = FileSystem.get(conf);
       SinkArchiver archiver = new SinkArchiver();
       archiver.exec(fs, conf);    

Modified: hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/extraction/archive/TestArchive.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/extraction/archive/TestArchive.java?rev=799718&r1=799717&r2=799718&view=diff
==============================================================================
--- hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/extraction/archive/TestArchive.java
(original)
+++ hadoop/chukwa/trunk/src/test/org/apache/hadoop/chukwa/extraction/archive/TestArchive.java
Fri Jul 31 19:46:17 2009
@@ -123,7 +123,7 @@
     conf.setInt("io.sort.factor", 5);
     conf.setInt("mapred.tasktracker.map.tasks.maximum", 2);
     conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2);
-    conf.set("archive.groupByClusterName", "true");
+    conf.set(ChukwaArchiveDataTypeOutputFormat.GROUP_BY_CLUSTER_OPTION_NAME, "true");
     
     System.setProperty("hadoop.log.dir", System.getProperty(
         "test.build.data", "/tmp"));



Mime
View raw message