lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cpoersc...@apache.org
Subject [2/2] lucene-solr:branch_7x: SOLR-12454: Tweak Overseer leadership transition related logging for easier troubleshooting.
Date Tue, 17 Jul 2018 19:38:11 GMT
SOLR-12454: Tweak Overseer leadership transition related logging for easier troubleshooting.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/df08f564
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/df08f564
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/df08f564

Branch: refs/heads/branch_7x
Commit: df08f56420738aa38b8db011ef956b3b488b86c5
Parents: bb57871
Author: Christine Poerschke <cpoerschke@apache.org>
Authored: Tue Jul 17 17:17:27 2018 +0100
Committer: Christine Poerschke <cpoerschke@apache.org>
Committed: Tue Jul 17 20:17:48 2018 +0100

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  2 ++
 .../java/org/apache/solr/cloud/Overseer.java    | 21 ++++++++++++--------
 .../solr/cloud/OverseerTaskProcessor.java       |  8 ++++++--
 3 files changed, 21 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/df08f564/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a115752..2956fc5 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -157,6 +157,8 @@ Other Changes
 
 * SOLR-12464: Reduce Overseer.close() logging (for non-Overseer leaders) (Christine Poerschke)
 
+* SOLR-12454: Tweak Overseer leadership transition related logging for easier troubleshooting.
(Christine Poerschke)
+
 ==================  7.4.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/df08f564/solr/core/src/java/org/apache/solr/cloud/Overseer.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index 0a991ff..8c7e964 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -132,7 +132,7 @@ public class Overseer implements SolrCloseable {
         isLeader = amILeader();  // not a no, not a yes, try ask again
       }
 
-      log.debug("Starting to work on the main queue");
+      log.info("Starting to work on the main queue : {}", LeaderElector.getNodeName(myId));
       try {
         ZkStateWriter zkStateWriter = null;
         ClusterState clusterState = null;
@@ -302,12 +302,12 @@ public class Overseer implements SolrCloseable {
         return;//shutting down no need to go further
       }
       org.apache.zookeeper.data.Stat stat = new org.apache.zookeeper.data.Stat();
-      String path = OVERSEER_ELECT + "/leader";
+      final String path = OVERSEER_ELECT + "/leader";
       byte[] data;
       try {
         data = zkClient.getData(path, null, stat, true);
       } catch (Exception e) {
-        log.error("could not read the data" ,e);
+        log.error("could not read the "+path+" data" ,e);
         return;
       }
       try {
@@ -315,16 +315,17 @@ public class Overseer implements SolrCloseable {
         String id = (String) m.get(ID);
         if(overseerCollectionConfigSetProcessor.getId().equals(id)){
           try {
-            log.warn("I'm exiting, but I'm still the leader");
+            log.warn("I (id={}) am exiting, but I'm still the leader",
+                overseerCollectionConfigSetProcessor.getId());
             zkClient.delete(path,stat.getVersion(),true);
           } catch (KeeperException.BadVersionException e) {
             //no problem ignore it some other Overseer has already taken over
           } catch (Exception e) {
-            log.error("Could not delete my leader node ", e);
+            log.error("Could not delete my leader node "+path, e);
           }
 
         } else{
-          log.debug("somebody else has already taken up the overseer position");
+          log.info("somebody else (id={}) has already taken up the overseer position", id);
         }
       } finally {
         //if I am not shutting down, Then I need to rejoin election
@@ -413,10 +414,12 @@ public class Overseer implements SolrCloseable {
     private LeaderStatus amILeader() {
       Timer.Context timerContext = stats.time("am_i_leader");
       boolean success = true;
+      String propsId = null;
       try {
         ZkNodeProps props = ZkNodeProps.load(zkClient.getData(
             OVERSEER_ELECT + "/leader", null, null, true));
-        if (myId.equals(props.getStr(ID))) {
+        propsId = props.getStr(ID);
+        if (myId.equals(propsId)) {
           return LeaderStatus.YES;
         }
       } catch (KeeperException e) {
@@ -426,6 +429,8 @@ public class Overseer implements SolrCloseable {
           return LeaderStatus.DONT_KNOW;
         } else if (e.code() != KeeperException.Code.SESSIONEXPIRED) {
           log.warn("", e);
+        } else {
+          log.debug("", e);
         }
       } catch (InterruptedException e) {
         success = false;
@@ -438,7 +443,7 @@ public class Overseer implements SolrCloseable {
           stats.error("am_i_leader");
         }
       }
-      log.info("According to ZK I (id=" + myId + ") am no longer a leader.");
+      log.info("According to ZK I (id={}) am no longer a leader. propsId={}", myId, propsId);
       return LeaderStatus.NO;
     }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/df08f564/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java
index 86e3564..01afc43 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerTaskProcessor.java
@@ -388,10 +388,12 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
     String statsName = "collection_am_i_leader";
     Timer.Context timerContext = stats.time(statsName);
     boolean success = true;
+    String propsId = null;
     try {
       ZkNodeProps props = ZkNodeProps.load(zkStateReader.getZkClient().getData(
           Overseer.OVERSEER_ELECT + "/leader", null, null, true));
-      if (myId.equals(props.getStr(ID))) {
+      propsId = props.getStr(ID);
+      if (myId.equals(propsId)) {
         return LeaderStatus.YES;
       }
     } catch (KeeperException e) {
@@ -401,6 +403,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
         return LeaderStatus.DONT_KNOW;
       } else if (e.code() != KeeperException.Code.SESSIONEXPIRED) {
         log.warn("", e);
+      } else {
+        log.debug("", e);
       }
     } catch (InterruptedException e) {
       success = false;
@@ -413,7 +417,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
         stats.error(statsName);
       }
     }
-    log.info("According to ZK I (id=" + myId + ") am no longer a leader.");
+    log.info("According to ZK I (id={}) am no longer a leader. propsId={}", myId, propsId);
     return LeaderStatus.NO;
   }
 


Mime
View raw message