trafodion-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lium...@apache.org
Subject [1/2] trafodion git commit: TRAFODION-2885 dcs server cant be restart while switching master
Date Sun, 14 Jan 2018 07:51:11 GMT
Repository: trafodion
Updated Branches:
  refs/heads/master 0c049d784 -> 27897caaf


TRAFODION-2885 dcs server cant be restart while switching master


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/4fe1e198
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/4fe1e198
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/4fe1e198

Branch: refs/heads/master
Commit: 4fe1e198185daa7f5132901bc5a16d6e14b2a7c4
Parents: 33325b6
Author: aven <shengchen.ma@esgyn.cn>
Authored: Fri Jan 5 16:29:49 2018 +0800
Committer: aven <shengchen.ma@esgyn.cn>
Committed: Fri Jan 5 16:29:49 2018 +0800

----------------------------------------------------------------------
 .../org/trafodion/dcs/master/ServerManager.java | 58 +++++++++++++++++---
 1 file changed, 51 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/4fe1e198/dcs/src/main/java/org/trafodion/dcs/master/ServerManager.java
----------------------------------------------------------------------
diff --git a/dcs/src/main/java/org/trafodion/dcs/master/ServerManager.java b/dcs/src/main/java/org/trafodion/dcs/master/ServerManager.java
index 8594c36..37c7963 100644
--- a/dcs/src/main/java/org/trafodion/dcs/master/ServerManager.java
+++ b/dcs/src/main/java/org/trafodion/dcs/master/ServerManager.java
@@ -23,14 +23,12 @@ under the License.
 package org.trafodion.dcs.master;
 
 import java.net.InetAddress;
-
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.BufferedReader;
 import java.io.FileReader;
 import java.io.FileNotFoundException;
-
 import java.util.Scanner;
 import java.util.Collections;
 import java.util.Iterator;
@@ -47,17 +45,13 @@ import java.util.Date;
 import java.util.Comparator;
 import java.util.Map;
 import java.util.HashMap;
-
 import java.text.DateFormat;
 
 import org.apache.zookeeper.*;
 import org.apache.zookeeper.data.Stat;
-
 import org.apache.hadoop.conf.Configuration;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-
 import org.trafodion.dcs.master.RunningServer;
 import org.trafodion.dcs.master.RegisteredServer;
 import org.trafodion.dcs.master.Metrics;
@@ -66,7 +60,6 @@ import org.trafodion.dcs.script.ScriptContext;
 import org.trafodion.dcs.Constants;
 import org.trafodion.dcs.zookeeper.ZkClient;
 import org.trafodion.dcs.util.*;
-
 import org.codehaus.jettison.json.JSONArray;
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONObject;
@@ -314,6 +307,7 @@ public class ServerManager implements Callable {
             getServersFile();
             createServersPortMap();
             getZkRunning();
+            getUnwathedServers();
             getZkRegistered();
 
             while (true) {
@@ -506,6 +500,56 @@ public class ServerManager implements Callable {
         }
     }
 
+    private void getUnwathedServers() {
+        // In some situation, if DCS Server does not have znode info in zookeeper
+        // when DCS Master is starting, then server will never be watched by zookeeper,
+        // and if it downs, it will never be restarted.
+
+        // configuredServers
+        // hostName + ":" + lineNum + ":" + serverCount
+        // runningServers
+        // hostName + ":" + instance + ":" + infoPort + ":" + serverStartTimestamp
+        // eg : gy26.esgyncn.local:3:24413:1515056285028
+        // RestartHandler need to know hostName, instanceNum(lineNum), serverStartTimestamp(for
if condition)
+        if (runningServers.size() == configuredServers.size()) {
+            if (LOG.isDebugEnabled()) {
+                LOG.debug("all dcs servers have started, no need to add watchers");
+            }
+            return;
+        }
+
+        boolean found = false;
+        for (String configured : configuredServers) {
+            Scanner configuredScn = new Scanner(configured);
+            configuredScn.useDelimiter(":");
+            String hostName = configuredScn.next();
+            int instance = Integer.parseInt(configuredScn.next());
+            int serverCount = Integer.parseInt(configuredScn.next());
+            configuredScn.close();
+            for (String running : runningServers) {
+                Scanner runningScn = new Scanner(running);
+                runningScn.useDelimiter(":");
+                String runningHostName = runningScn.next();
+
+                runningScn.close();
+                if (runningHostName.equals(hostName)) {
+                    found = true;
+                    break;
+                }
+            }
+            if (found) {
+                found = false;
+                continue;
+            } else {
+                LOG.error("DcsServer [" + hostName + ":" + instance + "] does not started
when starting DcsMaster [" + master.getServerName() + "] add to restart queue.");
+                // add to the restart handler
+                String simulatePath = hostName + ":" + instance + ":0:" + System.currentTimeMillis();
+                RestartHandler handler = new RestartHandler(simulatePath, serverCount);
+                restartQueue.add(handler);
+            }
+        }
+    }
+
     private synchronized void restartServer(String znodePath) throws Exception {
         String child = znodePath.replace(parentZnode
                 + Constants.DEFAULT_ZOOKEEPER_ZNODE_SERVERS_RUNNING + "/", "");


Mime
View raw message