accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ktur...@apache.org
Subject svn commit: r1443790 - in /accumulo/trunk: fate/src/main/java/org/apache/accumulo/fate/zookeeper/ server/src/main/java/org/apache/accumulo/server/gc/ server/src/main/java/org/apache/accumulo/server/master/ server/src/main/java/org/apache/accumulo/serve...
Date Thu, 07 Feb 2013 23:42:57 GMT
Author: kturner
Date: Thu Feb  7 23:42:57 2013
New Revision: 1443790

URL: http://svn.apache.org/r1443790
Log:
ACCUMULO-954 made zoolock report when its no longer able to monitor lock node and there does
not know the status of the lock

Modified:
    accumulo/trunk/fate/src/main/java/org/apache/accumulo/fate/zookeeper/ZooLock.java
    accumulo/trunk/server/src/main/java/org/apache/accumulo/server/gc/SimpleGarbageCollector.java
    accumulo/trunk/server/src/main/java/org/apache/accumulo/server/master/Master.java
    accumulo/trunk/server/src/main/java/org/apache/accumulo/server/master/TServerLockWatcher.java
    accumulo/trunk/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
    accumulo/trunk/server/src/main/java/org/apache/accumulo/server/zookeeper/ZooLock.java
    accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/SplitRecoveryTest.java
    accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/ZombieTServer.java
    accumulo/trunk/test/src/test/java/org/apache/accumulo/fate/zookeeper/ZooLockTest.java

Modified: accumulo/trunk/fate/src/main/java/org/apache/accumulo/fate/zookeeper/ZooLock.java
URL: http://svn.apache.org/viewvc/accumulo/trunk/fate/src/main/java/org/apache/accumulo/fate/zookeeper/ZooLock.java?rev=1443790&r1=1443789&r2=1443790&view=diff
==============================================================================
--- accumulo/trunk/fate/src/main/java/org/apache/accumulo/fate/zookeeper/ZooLock.java (original)
+++ accumulo/trunk/fate/src/main/java/org/apache/accumulo/fate/zookeeper/ZooLock.java Thu
Feb  7 23:42:57 2013
@@ -38,11 +38,18 @@ public class ZooLock implements Watcher 
   public static final String LOCK_PREFIX = "zlock-";
   
   public enum LockLossReason {
-    LOCK_DELETED, SESSION_EXPIRED, UNKNOWN
+    LOCK_DELETED, SESSION_EXPIRED
   }
   
   public interface LockWatcher {
     void lostLock(LockLossReason reason);
+    
+    /**
+     * lost the ability to monitor the lock node, and its status is unknown
+     * 
+     * @param e
+     */
+    void unableToMonitorLockNode(Throwable e);
   }
   
   public interface AsyncLockWatcher extends LockWatcher {
@@ -98,6 +105,11 @@ public class ZooLock implements Watcher 
       lw.lostLock(reason);
     }
     
+    @Override
+    public void unableToMonitorLockNode(Throwable e) {
+      lw.unableToMonitorLockNode(e);
+    }
+    
   }
   
   public synchronized boolean tryLock(LockWatcher lw, byte data[]) throws KeeperException,
InterruptedException {
@@ -133,8 +145,9 @@ public class ZooLock implements Watcher 
     Collections.sort(children);
     
     if (children.get(0).equals(myLock)) {
-      if (!watchingParent)
-        throw new RuntimeException("Can not acquire lock, no longer watching parent");
+      if (!watchingParent) {
+        throw new IllegalStateException("Can not acquire lock, no longer watching parent
: " + path);
+      }
       this.lockWatcher = lw;
       this.lock = myLock;
       asyncLock = null;
@@ -222,8 +235,8 @@ public class ZooLock implements Watcher 
               lostLock(LockLossReason.LOCK_DELETED);
             } else if (asyncLock != null && event.getType() == EventType.NodeDeleted
&& event.getPath().equals(path + "/" + asyncLock)) {
               failedToAcquireLock();
-            } else if(event.getState() != KeeperState.Expired) {
-              log.warn("Unexpected event watching lock node "+event+" "+asyncLockPath);
+            } else if (event.getState() != KeeperState.Expired && (lock != null ||
asyncLock != null)) {
+              log.debug("Unexpected event watching lock node "+event+" "+asyncLockPath);
               try {
                 Stat stat2 = zooKeeper.getStatus(asyncLockPath, this);
                 if(stat2 == null){
@@ -232,20 +245,9 @@ public class ZooLock implements Watcher 
                   else if(asyncLock != null)
                     failedToAcquireLock();
                 }
-              } catch (Exception e) {
+              } catch (Throwable e) {
+                lockWatcher.unableToMonitorLockNode(e);
                 log.error("Failed to stat lock node " + asyncLockPath, e);
-
-                try {
-                  // not sure what happened... try to clean lock node up....
-                  zooKeeper.delete(asyncLockPath, -1);
-                } catch (Throwable e2) {
-                  log.debug("Failed to clean up lock node " + asyncLockPath, e2);
-                }
-                
-                if(lock != null)
-                  lostLock(LockLossReason.UNKNOWN);
-                else if(asyncLock != null)
-                  failedToAcquireLock();
               }
             }
            
@@ -348,10 +350,9 @@ public class ZooLock implements Watcher 
         zooKeeper.getStatus(path, this);
         watchingParent = true;
       } catch (Exception ex) {
-        log.warn("Error resetting watch on ZooLock", ex);
-        
-        if (lock != null) {
-          lostLock(LockLossReason.UNKNOWN);
+        if (lock != null || asyncLock != null) {
+          lockWatcher.unableToMonitorLockNode(ex);
+          log.error("Error resetting watch on ZooLock " + lock == null ? asyncLock : lock
+ " " + event, ex);
         }
       }
        

Modified: accumulo/trunk/server/src/main/java/org/apache/accumulo/server/gc/SimpleGarbageCollector.java
URL: http://svn.apache.org/viewvc/accumulo/trunk/server/src/main/java/org/apache/accumulo/server/gc/SimpleGarbageCollector.java?rev=1443790&r1=1443789&r2=1443790&view=diff
==============================================================================
--- accumulo/trunk/server/src/main/java/org/apache/accumulo/server/gc/SimpleGarbageCollector.java
(original)
+++ accumulo/trunk/server/src/main/java/org/apache/accumulo/server/gc/SimpleGarbageCollector.java
Thu Feb  7 23:42:57 2013
@@ -32,12 +32,6 @@ import java.util.concurrent.ExecutorServ
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 
-import org.apache.accumulo.trace.instrument.CountSampler;
-import org.apache.accumulo.trace.instrument.Sampler;
-import org.apache.accumulo.trace.instrument.Span;
-import org.apache.accumulo.trace.instrument.Trace;
-import org.apache.accumulo.trace.instrument.thrift.TraceWrap;
-import org.apache.accumulo.trace.thrift.TInfo;
 import org.apache.accumulo.core.Constants;
 import org.apache.accumulo.core.cli.Help;
 import org.apache.accumulo.core.client.AccumuloException;
@@ -64,6 +58,7 @@ import org.apache.accumulo.core.gc.thrif
 import org.apache.accumulo.core.gc.thrift.GCStatus;
 import org.apache.accumulo.core.gc.thrift.GcCycleStats;
 import org.apache.accumulo.core.master.state.tables.TableState;
+import org.apache.accumulo.core.security.SecurityUtil;
 import org.apache.accumulo.core.security.thrift.Credentials;
 import org.apache.accumulo.core.util.CachedConfiguration;
 import org.apache.accumulo.core.util.NamingThreadFactory;
@@ -79,13 +74,18 @@ import org.apache.accumulo.server.client
 import org.apache.accumulo.server.conf.ServerConfiguration;
 import org.apache.accumulo.server.master.state.tables.TableManager;
 import org.apache.accumulo.server.security.SecurityConstants;
-import org.apache.accumulo.core.security.SecurityUtil;
 import org.apache.accumulo.server.trace.TraceFileSystem;
 import org.apache.accumulo.server.util.Halt;
 import org.apache.accumulo.server.util.OfflineMetadataScanner;
 import org.apache.accumulo.server.util.TServerUtils;
 import org.apache.accumulo.server.util.TabletIterator;
 import org.apache.accumulo.server.zookeeper.ZooLock;
+import org.apache.accumulo.trace.instrument.CountSampler;
+import org.apache.accumulo.trace.instrument.Sampler;
+import org.apache.accumulo.trace.instrument.Span;
+import org.apache.accumulo.trace.instrument.Trace;
+import org.apache.accumulo.trace.instrument.thrift.TraceWrap;
+import org.apache.accumulo.trace.thrift.TInfo;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -373,6 +373,18 @@ public class SimpleGarbageCollector impl
       public void lostLock(LockLossReason reason) {
         Halt.halt("GC lock in zookeeper lost (reason = " + reason + "), exiting!");
       }
+      
+      @Override
+      public void unableToMonitorLockNode(final Throwable e) {
+        Halt.halt(-1, new Runnable() {
+          
+          @Override
+          public void run() {
+            log.fatal("No longer able to monitor lock node ", e);
+          }
+        });
+        
+      }
     };
     
     while (true) {

Modified: accumulo/trunk/server/src/main/java/org/apache/accumulo/server/master/Master.java
URL: http://svn.apache.org/viewvc/accumulo/trunk/server/src/main/java/org/apache/accumulo/server/master/Master.java?rev=1443790&r1=1443789&r2=1443790&view=diff
==============================================================================
--- accumulo/trunk/server/src/main/java/org/apache/accumulo/server/master/Master.java (original)
+++ accumulo/trunk/server/src/main/java/org/apache/accumulo/server/master/Master.java Thu
Feb  7 23:42:57 2013
@@ -37,8 +37,6 @@ import java.util.TreeSet;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import org.apache.accumulo.trace.instrument.thrift.TraceWrap;
-import org.apache.accumulo.trace.thrift.TInfo;
 import org.apache.accumulo.core.Constants;
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
@@ -80,8 +78,8 @@ import org.apache.accumulo.core.master.t
 import org.apache.accumulo.core.master.thrift.TabletServerStatus;
 import org.apache.accumulo.core.master.thrift.TabletSplit;
 import org.apache.accumulo.core.security.SecurityUtil;
-import org.apache.accumulo.core.security.thrift.SecurityErrorCode;
 import org.apache.accumulo.core.security.thrift.Credentials;
+import org.apache.accumulo.core.security.thrift.SecurityErrorCode;
 import org.apache.accumulo.core.security.thrift.ThriftSecurityException;
 import org.apache.accumulo.core.util.ByteBufferUtil;
 import org.apache.accumulo.core.util.CachedConfiguration;
@@ -157,6 +155,8 @@ import org.apache.accumulo.server.util.t
 import org.apache.accumulo.server.zookeeper.ZooLock;
 import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
 import org.apache.accumulo.start.classloader.vfs.AccumuloVFSClassLoader;
+import org.apache.accumulo.trace.instrument.thrift.TraceWrap;
+import org.apache.accumulo.trace.thrift.TInfo;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -2211,6 +2211,17 @@ public class Master implements LiveTServ
       public void lostLock(LockLossReason reason) {
         Halt.halt("Master lock in zookeeper lost (reason = " + reason + "), exiting!", -1);
       }
+      
+      @Override
+      public void unableToMonitorLockNode(final Throwable e) {
+        Halt.halt(-1, new Runnable() {
+          @Override
+          public void run() {
+            log.fatal("No longer able to monitor master lock node", e);
+          }
+        });
+        
+      }
     };
     long current = System.currentTimeMillis();
     final long waitTime = getSystemConfiguration().getTimeInMillis(Property.INSTANCE_ZK_TIMEOUT);

Modified: accumulo/trunk/server/src/main/java/org/apache/accumulo/server/master/TServerLockWatcher.java
URL: http://svn.apache.org/viewvc/accumulo/trunk/server/src/main/java/org/apache/accumulo/server/master/TServerLockWatcher.java?rev=1443790&r1=1443789&r2=1443790&view=diff
==============================================================================
--- accumulo/trunk/server/src/main/java/org/apache/accumulo/server/master/TServerLockWatcher.java
(original)
+++ accumulo/trunk/server/src/main/java/org/apache/accumulo/server/master/TServerLockWatcher.java
Thu Feb  7 23:42:57 2013
@@ -40,4 +40,10 @@ class TServerLockWatcher implements Asyn
   @Override
   public void lostLock(LockLossReason reason) {}
   
+  @Override
+  public void unableToMonitorLockNode(Throwable e) {
+    // TODO Auto-generated method stub
+    
+  }
+  
 }

Modified: accumulo/trunk/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
URL: http://svn.apache.org/viewvc/accumulo/trunk/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java?rev=1443790&r1=1443789&r2=1443790&view=diff
==============================================================================
--- accumulo/trunk/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
(original)
+++ accumulo/trunk/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
Thu Feb  7 23:42:57 2013
@@ -64,10 +64,6 @@ import java.util.concurrent.atomic.Atomi
 import javax.management.ObjectName;
 import javax.management.StandardMBean;
 
-import org.apache.accumulo.trace.instrument.Span;
-import org.apache.accumulo.trace.instrument.Trace;
-import org.apache.accumulo.trace.instrument.thrift.TraceWrap;
-import org.apache.accumulo.trace.thrift.TInfo;
 import org.apache.accumulo.core.Constants;
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
@@ -107,8 +103,8 @@ import org.apache.accumulo.core.master.t
 import org.apache.accumulo.core.master.thrift.TabletServerStatus;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.security.SecurityUtil;
-import org.apache.accumulo.core.security.thrift.SecurityErrorCode;
 import org.apache.accumulo.core.security.thrift.Credentials;
+import org.apache.accumulo.core.security.thrift.SecurityErrorCode;
 import org.apache.accumulo.core.security.thrift.ThriftSecurityException;
 import org.apache.accumulo.core.tabletserver.thrift.ActiveCompaction;
 import org.apache.accumulo.core.tabletserver.thrift.ActiveScan;
@@ -202,6 +198,10 @@ import org.apache.accumulo.start.Platfor
 import org.apache.accumulo.start.classloader.vfs.AccumuloVFSClassLoader;
 import org.apache.accumulo.start.classloader.vfs.ContextManager;
 import org.apache.accumulo.start.classloader.vfs.ContextManager.ContextConfig;
+import org.apache.accumulo.trace.instrument.Span;
+import org.apache.accumulo.trace.instrument.Trace;
+import org.apache.accumulo.trace.instrument.thrift.TraceWrap;
+import org.apache.accumulo.trace.thrift.TInfo;
 import org.apache.commons.collections.map.LRUMap;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
@@ -2703,6 +2703,17 @@ public class TabletServer extends Abstra
             }
           });
         }
+        
+        @Override
+        public void unableToMonitorLockNode(final Throwable e) {
+          Halt.halt(0, new Runnable() {
+            @Override
+            public void run() {
+              log.fatal("Lost ability to monitor tablet server lock, exiting.", e);
+            }
+          });
+          
+        }
       };
       
       byte[] lockContent = new ServerServices(getClientAddressString(), Service.TSERV_CLIENT).toString().getBytes();

Modified: accumulo/trunk/server/src/main/java/org/apache/accumulo/server/zookeeper/ZooLock.java
URL: http://svn.apache.org/viewvc/accumulo/trunk/server/src/main/java/org/apache/accumulo/server/zookeeper/ZooLock.java?rev=1443790&r1=1443789&r2=1443790&view=diff
==============================================================================
--- accumulo/trunk/server/src/main/java/org/apache/accumulo/server/zookeeper/ZooLock.java
(original)
+++ accumulo/trunk/server/src/main/java/org/apache/accumulo/server/zookeeper/ZooLock.java
Thu Feb  7 23:42:57 2013
@@ -31,34 +31,4 @@ public class ZooLock extends org.apache.
   public static boolean deleteLock(String path, String lockData) throws InterruptedException,
KeeperException {
     return deleteLock(ZooReaderWriter.getInstance(), path, lockData);
   }
-  
-  public static void main(String[] args) throws Exception {
-    String node = "/test/lock1";
-    ZooLock zl = new ZooLock(node);
-    
-    zl.lockAsync(new AsyncLockWatcher() {
-      
-      @Override
-      public void acquiredLock() {
-        System.out.println("I got the lock");
-      }
-      
-      @Override
-      public void lostLock(LockLossReason reason) {
-        System.out.println("OMG I lost my lock, reason = " + reason);
-        
-      }
-      
-      @Override
-      public void failedToAcquireLock(Exception e) {
-        System.out.println("Failed to acquire lock  ");
-        e.printStackTrace();
-      }
-      
-    }, new byte[0]);
-    
-    while (true) {
-      Thread.sleep(1000);
-    }
-  }
 }

Modified: accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/SplitRecoveryTest.java
URL: http://svn.apache.org/viewvc/accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/SplitRecoveryTest.java?rev=1443790&r1=1443789&r2=1443790&view=diff
==============================================================================
--- accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/SplitRecoveryTest.java
(original)
+++ accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/SplitRecoveryTest.java
Thu Feb  7 23:42:57 2013
@@ -87,6 +87,11 @@ public class SplitRecoveryTest extends F
         System.exit(-1);
         
       }
+      
+      @Override
+      public void unableToMonitorLockNode(Throwable e) {
+        System.exit(-1);
+      }
     }, "foo".getBytes());
     
     if (!gotLock) {

Modified: accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/ZombieTServer.java
URL: http://svn.apache.org/viewvc/accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/ZombieTServer.java?rev=1443790&r1=1443789&r2=1443790&view=diff
==============================================================================
--- accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/ZombieTServer.java
(original)
+++ accumulo/trunk/test/src/main/java/org/apache/accumulo/test/functional/ZombieTServer.java
Thu Feb  7 23:42:57 2013
@@ -21,8 +21,6 @@ import java.net.InetSocketAddress;
 import java.util.HashMap;
 import java.util.Random;
 
-import org.apache.accumulo.trace.instrument.Tracer;
-import org.apache.accumulo.trace.thrift.TInfo;
 import org.apache.accumulo.core.Constants;
 import org.apache.accumulo.core.client.Instance;
 import org.apache.accumulo.core.master.thrift.TableInfo;
@@ -45,6 +43,8 @@ import org.apache.accumulo.server.util.T
 import org.apache.accumulo.server.zookeeper.TransactionWatcher;
 import org.apache.accumulo.server.zookeeper.ZooLock;
 import org.apache.accumulo.server.zookeeper.ZooReaderWriter;
+import org.apache.accumulo.trace.instrument.Tracer;
+import org.apache.accumulo.trace.thrift.TInfo;
 import org.apache.log4j.Logger;
 import org.apache.thrift.TException;
 
@@ -119,6 +119,16 @@ public class ZombieTServer {
           System.exit(1);
         }
       }
+      
+      @Override
+      public void unableToMonitorLockNode(Throwable e) {
+        try {
+          tch.halt(Tracer.traceInfo(), null, null);
+        } catch (Exception ex) {
+          log.error(ex, ex);
+          System.exit(1);
+        }
+      }
     };
     
     byte[] lockContent = new ServerServices(addressString, Service.TSERV_CLIENT).toString().getBytes();

Modified: accumulo/trunk/test/src/test/java/org/apache/accumulo/fate/zookeeper/ZooLockTest.java
URL: http://svn.apache.org/viewvc/accumulo/trunk/test/src/test/java/org/apache/accumulo/fate/zookeeper/ZooLockTest.java?rev=1443790&r1=1443789&r2=1443790&view=diff
==============================================================================
--- accumulo/trunk/test/src/test/java/org/apache/accumulo/fate/zookeeper/ZooLockTest.java
(original)
+++ accumulo/trunk/test/src/test/java/org/apache/accumulo/fate/zookeeper/ZooLockTest.java
Thu Feb  7 23:42:57 2013
@@ -76,6 +76,12 @@ public class ZooLockTest {
         this.wait();
       }
     }
+    
+    @Override
+    public synchronized void unableToMonitorLockNode(Throwable e) {
+      changes++;
+      this.notifyAll();
+    }
   }
 
   @BeforeClass
@@ -123,6 +129,8 @@ public class ZooLockTest {
     Assert.assertTrue(zl.isLocked());
     Assert.assertNull(lw.exception);
     Assert.assertNull(lw.reason);
+    
+    zl.unlock();
   }
   
   @Test(timeout = 10000)
@@ -244,6 +252,8 @@ public class ZooLockTest {
     Assert.assertTrue(zl3.isLocked());
     Assert.assertNull(lw3.exception);
     Assert.assertNull(lw3.reason);
+    
+    zl3.unlock();
 
   }
   
@@ -285,11 +295,11 @@ public class ZooLockTest {
 
     Assert.assertEquals(LockLossReason.LOCK_DELETED, lw.reason);
     Assert.assertNull(lw.exception);
+
   }
 
   @Test(timeout = 10000)
   public void testTryLock() throws Exception {
-    
     String parent = "/zltest-" + this.hashCode() + "-l" + pdCount++;
     
     ZooLock zl = new ZooLock(accumulo.getZooKeepers(), 1000, "digest", "secret".getBytes(),
parent);
@@ -311,9 +321,13 @@ public class ZooLockTest {
     Assert.assertTrue(ret);
     
     // make sure still watching parent even though a lot of events occurred for the parent
-    Field field = zl.getClass().getDeclaredField("watchingParent");
-    field.setAccessible(true);
-    Assert.assertTrue((Boolean) field.get(zl));
+    synchronized (zl) {
+      Field field = zl.getClass().getDeclaredField("watchingParent");
+      field.setAccessible(true);
+      Assert.assertTrue((Boolean) field.get(zl));
+    }
+    
+    zl.unlock();
   }
 
   @AfterClass



Mime
View raw message