helix-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zzh...@apache.org
Subject git commit: HELIX-59: controller not cleaning dead external view generated from old sessions
Date Thu, 21 Mar 2013 21:50:52 GMT
Updated Branches:
  refs/heads/master dc554a083 -> 9eecbc319


HELIX-59: controller not cleaning dead external view generated from old sessions


Project: http://git-wip-us.apache.org/repos/asf/incubator-helix/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-helix/commit/9eecbc31
Tree: http://git-wip-us.apache.org/repos/asf/incubator-helix/tree/9eecbc31
Diff: http://git-wip-us.apache.org/repos/asf/incubator-helix/diff/9eecbc31

Branch: refs/heads/master
Commit: 9eecbc319181843cb7eee13c3cef345763c23006
Parents: dc554a0
Author: zzhang <zzhang5@uci.edu>
Authored: Thu Mar 21 14:50:45 2013 -0700
Committer: zzhang <zzhang5@uci.edu>
Committed: Thu Mar 21 14:50:45 2013 -0700

----------------------------------------------------------------------
 .../java/org/apache/helix/HelixDataAccessor.java   |   17 +-
 .../stages/ExternalViewComputeStage.java           |   25 ++-
 .../test/java/org/apache/helix/ZkTestHelper.java   |   42 +++++-
 .../helix/integration/TestCleanupExternalView.java |  124 +++++++++++++++
 4 files changed, 189 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/9eecbc31/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java b/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java
index b465dca..946787a 100644
--- a/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java
+++ b/helix-core/src/main/java/org/apache/helix/HelixDataAccessor.java
@@ -27,7 +27,7 @@ import org.I0Itec.zkclient.DataUpdater;
 /**
  * Interface used to interact with Helix Data Types like IdealState, Config,
  * LiveInstance, Message, ExternalView etc PropertyKey represent the HelixData
- * type. See {@link Builder} to get more information on building a propertyKey.
+ * type. See {@link PropertyKey.Builder} to get more information on building a propertyKey.
  * 
  * 
  */
@@ -96,7 +96,7 @@ public interface HelixDataAccessor
    * Return the child names for a property. PropertyKey needs to refer to a
    * collection like instances, resources. PropertyKey.isLeaf must be false
    * 
-   * @param type
+   * @param key
    * @return SubPropertyNames
    */
   List<String> getChildNames(PropertyKey key);
@@ -105,7 +105,7 @@ public interface HelixDataAccessor
    * Get the child values for a property. PropertyKey needs to refer to just one
    * level above the non leaf. PropertyKey.isCollection must be true.
    * 
-   * @param type
+   * @param key
    * @return subPropertyValues
    */
   <T extends HelixProperty> List<T> getChildValues(PropertyKey key);
@@ -123,7 +123,7 @@ public interface HelixDataAccessor
   /**
    * Adds multiple children to a parent.
    * 
-   * @param key
+   * @param keys
    * @param children
    * @return
    */
@@ -133,16 +133,17 @@ public interface HelixDataAccessor
   /**
    * Sets multiple children under one parent
    * 
-   * @param externalViews
-   * @param views
+   * @param keys
+   * @param children
    */
   <T extends HelixProperty> boolean[] setChildren(List<PropertyKey> keys, List<T>
children);
   
   /**
    * Updates multiple children under one parent
+   * TODO: change to use property-keys instead of paths
    * 
-   * @param externalViews
-   * @param views
+   * @param paths
+   * @param updaters
    */
   <T extends HelixProperty> boolean[] updateChildren(List<String> paths,
       List<DataUpdater<ZNRecord>> updaters,

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/9eecbc31/helix-core/src/main/java/org/apache/helix/controller/stages/ExternalViewComputeStage.java
----------------------------------------------------------------------
diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/ExternalViewComputeStage.java
b/helix-core/src/main/java/org/apache/helix/controller/stages/ExternalViewComputeStage.java
index c8f423a..69a676a 100644
--- a/helix-core/src/main/java/org/apache/helix/controller/stages/ExternalViewComputeStage.java
+++ b/helix-core/src/main/java/org/apache/helix/controller/stages/ExternalViewComputeStage.java
@@ -69,6 +69,7 @@ public class ExternalViewComputeStage extends AbstractBaseStage
     }
 
     HelixDataAccessor dataAccessor = manager.getHelixDataAccessor();
+    PropertyKey.Builder keyBuilder = dataAccessor.keyBuilder();
 
     CurrentStateOutput currentStateOutput =
         event.getAttribute(AttributeName.CURRENT_STATE.toString());
@@ -76,6 +77,9 @@ public class ExternalViewComputeStage extends AbstractBaseStage
     List<ExternalView> newExtViews = new ArrayList<ExternalView>();
     List<PropertyKey> keys = new ArrayList<PropertyKey>();
 
+    Map<String, ExternalView> curExtViews =
+          dataAccessor.getChildValuesMap(keyBuilder.externalViews());
+
     for (String resourceName : resourceMap.keySet())
     {
       ExternalView view = new ExternalView(resourceName);
@@ -123,19 +127,14 @@ public class ExternalViewComputeStage extends AbstractBaseStage
                                                   cache._idealStateMap.get(view.getResourceName()));
         }
       }
-      // compare the new external view with current one, set only on different
-      Map<String, ExternalView> curExtViews =
-          dataAccessor.getChildValuesMap(manager.getHelixDataAccessor()
-                                                .keyBuilder()
-                                                .externalViews());
 
+      // compare the new external view with current one, set only on different
       ExternalView curExtView = curExtViews.get(resourceName);
       if (curExtView == null || !curExtView.getRecord().equals(view.getRecord()))
       {
-        keys.add(manager.getHelixDataAccessor().keyBuilder().externalView(resourceName));
+        keys.add(keyBuilder.externalView(resourceName));
         newExtViews.add(view);
-        // dataAccessor.setProperty(PropertyType.EXTERNALVIEW, view, resourceName);
-        
+
         // For SCHEDULER_TASK_RESOURCE resource group (helix task queue), we need to find
out which task 
         // partitions are finished (COMPLETED or ERROR), update the status update of the
original scheduler 
         // message, and then remove the partitions from the ideal state
@@ -147,12 +146,20 @@ public class ExternalViewComputeStage extends AbstractBaseStage
     }
     // TODO: consider not setting the externalview of SCHEDULER_TASK_QUEUE at all. 
     // Are there any entity that will be interested in its change?
-    
+
+    // add/update external-views
     if (newExtViews.size() > 0)
     {
       dataAccessor.setChildren(keys, newExtViews);
     }
 
+    // remove dead external-views
+    for (String resourceName : curExtViews.keySet()) {
+        if (!resourceMap.keySet().contains(resourceName)) {
+            dataAccessor.removeProperty(keyBuilder.externalView(resourceName));
+        }
+    }
+
     long endTime = System.currentTimeMillis();
     log.info("END ExternalViewComputeStage.process(). took: " + (endTime - startTime)
         + " ms");

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/9eecbc31/helix-core/src/test/java/org/apache/helix/ZkTestHelper.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/ZkTestHelper.java b/helix-core/src/test/java/org/apache/helix/ZkTestHelper.java
index 9f074e9..a5b5681 100644
--- a/helix-core/src/test/java/org/apache/helix/ZkTestHelper.java
+++ b/helix-core/src/test/java/org/apache/helix/ZkTestHelper.java
@@ -28,6 +28,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
+import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.CountDownLatch;
 
 import org.I0Itec.zkclient.IZkStateListener;
@@ -317,7 +318,6 @@ public class ZkTestHelper
   /**
    * return a map from session-id to a set of zk-path that the session has watches on
    * 
-   * @param listenerMap
    * @return
    */
   public static Map<String, Set<String>> getListenersBySession(String zkAddr)
throws Exception {
@@ -336,5 +336,43 @@ public class ZkTestHelper
 
 	  return listenerMapBySession;
   }
-  
+    static java.lang.reflect.Field getField(Class clazz, String fieldName) throws NoSuchFieldException
{
+        try {
+            return clazz.getDeclaredField(fieldName);
+        } catch (NoSuchFieldException e) {
+            Class superClass = clazz.getSuperclass();
+            if (superClass == null) {
+                throw e;
+            } else {
+                return getField(superClass, fieldName);
+            }
+        }
+    }
+
+    public static boolean tryWaitZkEventsCleaned(ZkClient zkclient) throws Exception {
+        java.lang.reflect.Field field = getField(zkclient.getClass(), "_eventThread");
+        field.setAccessible(true);
+        Object eventThread = field.get(zkclient);
+        // System.out.println("field: " + eventThread);
+
+        java.lang.reflect.Field field2 = getField(eventThread.getClass(), "_events");
+        field2.setAccessible(true);
+        BlockingQueue queue = (BlockingQueue) field2.get(eventThread);
+        // System.out.println("field2: " + queue + ", " + queue.size());
+
+
+        if (queue == null) {
+            LOG.error("fail to get event-queue from zkclient. skip waiting");
+            return false;
+        }
+
+        for (int i = 0; i < 20; i++) {
+            if (queue.size() == 0) {
+                return true;
+            }
+            Thread.sleep(100);
+            System.out.println("pending zk-events in queue: " + queue);
+        }
+        return false;
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-helix/blob/9eecbc31/helix-core/src/test/java/org/apache/helix/integration/TestCleanupExternalView.java
----------------------------------------------------------------------
diff --git a/helix-core/src/test/java/org/apache/helix/integration/TestCleanupExternalView.java
b/helix-core/src/test/java/org/apache/helix/integration/TestCleanupExternalView.java
new file mode 100644
index 0000000..5f1a7b6
--- /dev/null
+++ b/helix-core/src/test/java/org/apache/helix/integration/TestCleanupExternalView.java
@@ -0,0 +1,124 @@
+package org.apache.helix.integration;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import org.apache.helix.*;
+import org.apache.helix.manager.zk.ZKHelixAdmin;
+import org.apache.helix.manager.zk.ZKHelixDataAccessor;
+import org.apache.helix.manager.zk.ZkBaseDataAccessor;
+import org.apache.helix.mock.controller.ClusterController;
+import org.apache.helix.mock.participant.MockParticipant;
+import org.apache.helix.model.ExternalView;
+import org.apache.helix.model.LiveInstance;
+import org.apache.helix.tools.ClusterStateVerifier;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.util.Date;
+
+/**
+ *
+ * Test clean external-view - if current-state is remove externally, controller should remove
the orphan external-view
+ */
+public class TestCleanupExternalView extends ZkUnitTestBase {
+    @Test
+    public void test() throws Exception {
+        // Logger.getRootLogger().setLevel(Level.INFO);
+        String className = TestHelper.getTestClassName();
+        String methodName = TestHelper.getTestMethodName();
+        String clusterName = className + "_" + methodName;
+        int n = 2;
+
+        System.out.println("START " + clusterName + " at "
+                + new Date(System.currentTimeMillis()));
+
+        TestHelper.setupCluster(clusterName,
+                ZK_ADDR,
+                12918, // participant port
+                "localhost", // participant name prefix
+                "TestDB", // resource name prefix
+                1, // resources
+                2, // partitions per resource
+                n, // number of nodes
+                2, // replicas
+                "MasterSlave",
+                true); // do rebalance
+
+        ClusterController controller =
+                new ClusterController(clusterName, "controller_0", ZK_ADDR);
+        controller.syncStart();
+
+        // start participants
+        MockParticipant[] participants = new MockParticipant[n];
+        for (int i = 0; i < n; i++)
+        {
+            String instanceName = "localhost_" + (12918 + i);
+
+            participants[i] = new MockParticipant(clusterName, instanceName, ZK_ADDR, null);
+            participants[i].syncStart();
+        }
+
+        boolean result =
+                ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR,
+                        clusterName));
+        Assert.assertTrue(result);
+
+        // disable controller
+        ZKHelixAdmin admin = new ZKHelixAdmin(_gZkClient);
+        admin.enableCluster(clusterName, false);
+        // wait all pending zk-events being processed, otherwise remove current-state will
cause controller send O->S message
+        ZkTestHelper.tryWaitZkEventsCleaned(controller.getManager().getZkClient());
+        // System.out.println("paused controller");
+
+        // drop resource
+        admin.dropResource(clusterName, "TestDB0");
+
+        // delete current-state manually, controller shall remove external-view when cluster
is enabled again
+        ZKHelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
+        PropertyKey.Builder keyBuilder = accessor.keyBuilder();
+
+        // System.out.println("remove current-state");
+        LiveInstance liveInstance = accessor.getProperty(keyBuilder.liveInstance("localhost_12918"));
+        accessor.removeProperty(keyBuilder.currentState("localhost_12918", liveInstance.getSessionId(),
"TestDB0"));
+        liveInstance = accessor.getProperty(keyBuilder.liveInstance("localhost_12919"));
+        accessor.removeProperty(keyBuilder.currentState("localhost_12919", liveInstance.getSessionId(),
"TestDB0"));
+
+        // re-enable controller shall remove orphan external-view
+        // System.out.println("re-enabling controller");
+        admin.enableCluster(clusterName, true);
+
+        ExternalView externalView = null;
+        for (int i = 0; i < 10; i++) {
+            Thread.sleep(100);
+            externalView = accessor.getProperty(keyBuilder.externalView("TestDB0"));
+            // System.out.println("externalView: " + externalView);
+            if (externalView == null) {
+                break;
+            }
+        }
+
+        Assert.assertNull(externalView, "external-view for TestDB0 should be removed, but
was: " + externalView);
+
+        System.out.println("END " + clusterName + " at "
+                + new Date(System.currentTimeMillis()));
+
+    }
+
+}


Mime
View raw message