chukwa-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ey...@apache.org
Subject svn commit: r767743 - in /hadoop/chukwa: branches/chukwa-0.1/CHANGES.txt branches/chukwa-0.1/src/java/org/apache/hadoop/chukwa/util/WatchDog.java trunk/CHANGES.txt trunk/src/java/org/apache/hadoop/chukwa/util/WatchDog.java
Date Thu, 23 Apr 2009 00:39:58 GMT
Author: eyang
Date: Thu Apr 23 00:39:57 2009
New Revision: 767743

URL: http://svn.apache.org/viewvc?rev=767743&view=rev
Log:
CHUKWA-168. Added watchdog for database. (Eric Yang)

Added:
    hadoop/chukwa/branches/chukwa-0.1/src/java/org/apache/hadoop/chukwa/util/WatchDog.java
    hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/WatchDog.java
Modified:
    hadoop/chukwa/branches/chukwa-0.1/CHANGES.txt
    hadoop/chukwa/trunk/CHANGES.txt

Modified: hadoop/chukwa/branches/chukwa-0.1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/chukwa/branches/chukwa-0.1/CHANGES.txt?rev=767743&r1=767742&r2=767743&view=diff
==============================================================================
--- hadoop/chukwa/branches/chukwa-0.1/CHANGES.txt (original)
+++ hadoop/chukwa/branches/chukwa-0.1/CHANGES.txt Thu Apr 23 00:39:57 2009
@@ -36,6 +36,16 @@
 
   IMPROVEMENTS
 
+    CHUKWA-176. Rearrange the parameter order for aggregator.sh. (Eric Yang)
+
+    CHUKWA-174. Added test cases to test database partitioning, database aggregation, and
data loading. (Eric Yang)
+
+    CHUKWA-173. Parameterize configuration, and enable substitution at build time. (Jerome
Boulon via Eric Yang)
+
+    CHUKWA-131. Added additional Mapred job/task metrics.  (Eric Yang)
+
+    CHUKWA-145. Tuned hadoop parameters for demux. (Jerome Boulon via Eric Yang)
+
     CHUKWA-163. Updated reference to mdl.xml file. (Terence Kwan via Eric Yang)
 
     CHUKWA-157. Added javadoc target, api-xml, api-report, and change log 2 html. (Eric Yang)
@@ -97,11 +107,21 @@
 
   BUG FIXES
 
-    CHUKWA-156. Test Macro testcase changed to use timestamp check for the generated macros.
(Eric Yang)
+    CHUKWA-168. Added watchdog for database. (Eric Yang)
+
+    CHUKWA-175.  Removed error message for shutting down data processors. (Eric Yang)
+
+    CHUKWA-155.  Store final job status only (Cheng Zhang via Eric Yang)
+
+    CHUKWA-164.  Use year corresponding to the sender time stamp. (Cheng Zhang via Eric Yang)
+
+    CHUKWA-166.  Handle null parameter case for XSSFilter. (Terence Kwan via Eric Yang)
+
+    CHUKWA-156.  Test Macro testcase changed to use timestamp check for the generated macros.
(Eric Yang)
 
     CHUKWA-154.  Handle adaptor exception, close file pointers on failure condition.  (Jerome
Boulon via Eric Yang)
 
-    CHUKWA-139. Rewrite collector bail out code.  (Cheng Zhang via Eric Yang)
+    CHUKWA-139.  Rewrite collector bail out code.  (Cheng Zhang via Eric Yang)
 
     CHUKWA-119.  Removed dependency of ChukwaAgent from ChunkImpl for preventing multiple

                  MetricsContext to be initialized in the same VM. (Jerome Boulon via Eric
Yang)

Added: hadoop/chukwa/branches/chukwa-0.1/src/java/org/apache/hadoop/chukwa/util/WatchDog.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/branches/chukwa-0.1/src/java/org/apache/hadoop/chukwa/util/WatchDog.java?rev=767743&view=auto
==============================================================================
--- hadoop/chukwa/branches/chukwa-0.1/src/java/org/apache/hadoop/chukwa/util/WatchDog.java
(added)
+++ hadoop/chukwa/branches/chukwa-0.1/src/java/org/apache/hadoop/chukwa/util/WatchDog.java
Thu Apr 23 00:39:57 2009
@@ -0,0 +1,88 @@
+package org.apache.hadoop.chukwa.util;
+
+import java.sql.ResultSet;
+import java.util.Calendar;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.chukwa.util.DatabaseWriter;
+import org.apache.hadoop.chukwa.database.DatabaseConfig;
+public class WatchDog {
+    private static Log log = LogFactory.getLog(WatchDog.class);
+    private String cluster=null;
+    
+    public WatchDog(String cluster){
+        this.cluster=cluster;
+    }
+    
+                            
+    public void run(){
+       long updates = 0;
+       boolean error = false;
+       try{
+           // SQL query to monitor database
+           DatabaseConfig dbc = new DatabaseConfig();
+           log.info("cluster:"+cluster);
+           DatabaseWriter db = new DatabaseWriter(cluster);
+           Calendar c = Calendar.getInstance();
+           long now = c.getTimeInMillis();
+           String[] tableName = dbc.findTableName("system_metrics", now, now);
+           String query = "select unix_TIMESTAMP(now()) - unix_timestamp(max(timestamp))
as delay from "+tableName[0]+" ;";
+           ResultSet rs = db.query(query);
+           while(rs.next()) {
+               long delay = rs.getLong(1);
+               if(delay>600) {
+                   log.error("Chukwa: "+cluster+": No new data for the past 30 minutes for
system metrics");                   
+                   error=true;
+               }
+           }
+           query = "select count(*) as UpdatesPerHr from "+tableName[0]+" where Timestamp
> date_sub(now(), interval 60 minute) ;";
+           rs = db.query(query);
+           while(rs.next()) {
+               updates = rs.getLong(1);
+               if(updates==0) {
+                   log.error("Chukwa: "+cluster+": No system metrics data received for the
past 60 minutes");                   
+                   error=true;
+               }
+           }
+           String[] hodTableNames = dbc.findTableName("HodJob", now, now);
+           query = "select count(*) as UpdatesPerHr from "+hodTableNames[0]+" where StartTime
> date_sub(now(), interval 60 minute) ;";
+           rs = db.query(query);           
+           while(rs.next()) {
+               long updatesHod = rs.getLong(1);
+               if(updatesHod==0) {
+                   log.error("Chukwa: "+cluster+": No hod job data received for the past
60 minutes");
+               }
+           }
+           String[] mrTableNames = dbc.findTableName("mr_job", now, now);
+           query = "select count(*) as UpdatesPerHr from "+mrTableNames+" where FINISH_TIME
> date_sub(now(), interval 1440 minute) ;";
+           rs = db.query(query);                      
+           while(rs.next()) {
+               long updatesMR = rs.getLong(1);
+               if(updatesMR==0) {
+                   log.error("MDL: no map reduce job data received for the past day.");
+                   error=true;
+               }
+           }
+           db.close();
+       }catch (Exception ex){
+           log.error("Unexpected error:"+ex.getStackTrace().toString());
+           System.exit(1);
+       }
+       if(!error) {
+           log.info("MDL: Status OK");
+       }
+           
+    }
+    
+    
+     public static void main(String[] args) {
+         String cluster = System.getProperty("CLUSTER");
+         if(cluster!=null) {
+             WatchDog wd = new WatchDog(cluster);
+             wd.run();
+         } else {
+             log.error("Chukwa: jdbc.conf is not configured");
+         }
+     }        
+}

Modified: hadoop/chukwa/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/CHANGES.txt?rev=767743&r1=767742&r2=767743&view=diff
==============================================================================
--- hadoop/chukwa/trunk/CHANGES.txt (original)
+++ hadoop/chukwa/trunk/CHANGES.txt Thu Apr 23 00:39:57 2009
@@ -38,6 +38,14 @@
 
   IMPROVEMENTS
 
+    CHUKWA-176. Rearrange the parameter order for aggregator.sh. (Eric Yang)
+
+    CHUKWA-173. Parameterize configuration, and enable substitution at build time. (Jerome
Boulon via Eric Yang)
+
+    CHUKWA-131. Added additional Mapred job/task metrics.  (Eric Yang)
+
+    CHUKWA-145. Tuned hadoop parameters for demux. (Jerome Boulon via Eric Yang)
+
     CHUKWA-163. Updated reference to mdl.xml file. (Terence Kwan via Eric Yang)
 
     CHUKWA-157. Added javadoc target, api-xml, api-report, and change log 2 html. (Eric Yang)
@@ -46,9 +54,9 @@
 
     CHUKWA-138. Updated Chukwa Admin Guide. (Corinne Chandel via Eric Yang)
 
-    CHUKWA-128.  Added tools to compute aggregation in database. (Eric Yang)
+    CHUKWA-128. Added tools to compute aggregation in database. (Eric Yang)
 
-    CHUKWA-112.  Updated README file.  (Corinne Chandel via Eric Yang)
+    CHUKWA-112. Updated README file.  (Corinne Chandel via Eric Yang)
 
     CHUKWA-134. Add release audit target. (Giridharan Kesavan via Eric Yang)
 
@@ -105,6 +113,16 @@
 
   BUG FIXES
 
+    CHUKWA-168. Added watchdog for database. (Eric Yang)
+
+    CHUKWA-175.  Removed error message for shutting down data processors. (Eric Yang)
+
+    CHUKWA-155.  Store final job status only (Cheng Zhang via Eric Yang)
+
+    CHUKWA-164.  Use year corresponding to the sender time stamp. (Cheng Zhang via Eric Yang)
+
+    CHUKWA-166.  Handle null parameter case for XSSFilter. (Terence Kwan via Eric Yang)
+
     CHUKWA-156. Test Macro testcase changed to use timestamp check for the generated macros.
(Eric Yang)
 
     CHUKWA-154.  Handle adaptor exception, close file pointers on failure condition.  (Jerome
Boulon via Eric Yang)

Added: hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/WatchDog.java
URL: http://svn.apache.org/viewvc/hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/WatchDog.java?rev=767743&view=auto
==============================================================================
--- hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/WatchDog.java (added)
+++ hadoop/chukwa/trunk/src/java/org/apache/hadoop/chukwa/util/WatchDog.java Thu Apr 23 00:39:57
2009
@@ -0,0 +1,88 @@
+package org.apache.hadoop.chukwa.util;
+
+import java.sql.ResultSet;
+import java.util.Calendar;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.chukwa.util.DatabaseWriter;
+import org.apache.hadoop.chukwa.database.DatabaseConfig;
+public class WatchDog {
+    private static Log log = LogFactory.getLog(WatchDog.class);
+    private String cluster=null;
+    
+    public WatchDog(String cluster){
+        this.cluster=cluster;
+    }
+    
+                            
+    public void run(){
+       long updates = 0;
+       boolean error = false;
+       try{
+           // SQL query to monitor database
+           DatabaseConfig dbc = new DatabaseConfig();
+           log.info("cluster:"+cluster);
+           DatabaseWriter db = new DatabaseWriter(cluster);
+           Calendar c = Calendar.getInstance();
+           long now = c.getTimeInMillis();
+           String[] tableName = dbc.findTableName("system_metrics", now, now);
+           String query = "select unix_TIMESTAMP(now()) - unix_timestamp(max(timestamp))
as delay from "+tableName[0]+" ;";
+           ResultSet rs = db.query(query);
+           while(rs.next()) {
+               long delay = rs.getLong(1);
+               if(delay>600) {
+                   log.error("Chukwa: "+cluster+": No new data for the past 30 minutes for
system metrics");                   
+                   error=true;
+               }
+           }
+           query = "select count(*) as UpdatesPerHr from "+tableName[0]+" where Timestamp
> date_sub(now(), interval 60 minute) ;";
+           rs = db.query(query);
+           while(rs.next()) {
+               updates = rs.getLong(1);
+               if(updates==0) {
+                   log.error("Chukwa: "+cluster+": No system metrics data received for the
past 60 minutes");                   
+                   error=true;
+               }
+           }
+           String[] hodTableNames = dbc.findTableName("HodJob", now, now);
+           query = "select count(*) as UpdatesPerHr from "+hodTableNames[0]+" where StartTime
> date_sub(now(), interval 60 minute) ;";
+           rs = db.query(query);           
+           while(rs.next()) {
+               long updatesHod = rs.getLong(1);
+               if(updatesHod==0) {
+                   log.error("Chukwa: "+cluster+": No hod job data received for the past
60 minutes");
+               }
+           }
+           String[] mrTableNames = dbc.findTableName("mr_job", now, now);
+           query = "select count(*) as UpdatesPerHr from "+mrTableNames+" where FINISH_TIME
> date_sub(now(), interval 1440 minute) ;";
+           rs = db.query(query);                      
+           while(rs.next()) {
+               long updatesMR = rs.getLong(1);
+               if(updatesMR==0) {
+                   log.error("MDL: no map reduce job data received for the past day.");
+                   error=true;
+               }
+           }
+           db.close();
+       }catch (Exception ex){
+           log.error("Unexpected error:"+ex.getStackTrace().toString());
+           System.exit(1);
+       }
+       if(!error) {
+           log.info("MDL: Status OK");
+       }
+           
+    }
+    
+    
+     public static void main(String[] args) {
+         String cluster = System.getProperty("CLUSTER");
+         if(cluster!=null) {
+             WatchDog wd = new WatchDog(cluster);
+             wd.run();
+         } else {
+             log.error("Chukwa: jdbc.conf is not configured");
+         }
+     }        
+}



Mime
View raw message