hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bus...@apache.org
Subject hbase git commit: HBASE-14274 Deadlock in region metrics on shutdown: MetricsRegionSourceImpl vs MetricsRegionAggregateSourceImpl
Date Fri, 21 Aug 2015 06:03:16 GMT
Repository: hbase
Updated Branches:
  refs/heads/branch-1.2 1fa33e897 -> 909e2fe50


HBASE-14274 Deadlock in region metrics on shutdown: MetricsRegionSourceImpl vs MetricsRegionAggregateSourceImpl

Signed-off-by: stack <stack@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/909e2fe5
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/909e2fe5
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/909e2fe5

Branch: refs/heads/branch-1.2
Commit: 909e2fe504169f978989e4c1c3778291b3da2418
Parents: 1fa33e8
Author: Elliott Clark <eclark@apache.org>
Authored: Thu Aug 20 13:32:39 2015 -0700
Committer: Sean Busbey <busbey@cloudera.com>
Committed: Fri Aug 21 00:46:41 2015 -0500

----------------------------------------------------------------------
 .../MetricsRegionAggregateSourceImpl.java       | 53 +++++-------------
 .../regionserver/MetricsRegionSourceImpl.java   | 56 +++++++++++---------
 2 files changed, 44 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/909e2fe5/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionAggregateSourceImpl.java
----------------------------------------------------------------------
diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionAggregateSourceImpl.java
b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionAggregateSourceImpl.java
index fdb3e83..009fa9c 100644
--- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionAggregateSourceImpl.java
+++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionAggregateSourceImpl.java
@@ -18,10 +18,10 @@
 
 package org.apache.hadoop.hbase.regionserver;
 
-import java.util.HashSet;
+import java.util.Collections;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.metrics.BaseSourceImpl;
@@ -34,12 +34,11 @@ import org.apache.hadoop.metrics2.lib.MetricsExecutorImpl;
 @InterfaceAudience.Private
 public class MetricsRegionAggregateSourceImpl extends BaseSourceImpl
     implements MetricsRegionAggregateSource {
-  // lock to guard against concurrent access to regionSources
-  private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
+
   private final MetricsExecutorImpl executor = new MetricsExecutorImpl();
 
-  private final HashSet<MetricsRegionSource> regionSources =
-      new HashSet<MetricsRegionSource>();
+  private final Set<MetricsRegionSource> regionSources =
+      Collections.newSetFromMap(new ConcurrentHashMap<MetricsRegionSource, Boolean>());
 
   public MetricsRegionAggregateSourceImpl() {
     this(METRICS_NAME, METRICS_DESCRIPTION, METRICS_CONTEXT, METRICS_JMX_CONTEXT);
@@ -62,36 +61,18 @@ public class MetricsRegionAggregateSourceImpl extends BaseSourceImpl
 
   @Override
   public void register(MetricsRegionSource source) {
-    Lock l = lock.writeLock();
-    l.lock();
-    try {
-      regionSources.add(source);
-      clearCache();
-    } finally {
-      l.unlock();
-    }
+    regionSources.add(source);
+    clearCache();
   }
 
   @Override
   public void deregister(MetricsRegionSource toRemove) {
-    Lock l = lock.writeLock();
-    l.lock();
-    try {
-      regionSources.remove(toRemove);
-      clearCache();
-    } finally {
-      l.unlock();
-    }
+    regionSources.remove(toRemove);
+    clearCache();
   }
 
   private synchronized void clearCache() {
     JmxCacheBuster.clearJmxCache(true);
-    executor.getExecutor().schedule(new Runnable() {
-      @Override
-      public void run() {
-        JmxCacheBuster.clearJmxCache();
-      }
-    }, 5, TimeUnit.MINUTES);
   }
 
   /**
@@ -107,18 +88,12 @@ public class MetricsRegionAggregateSourceImpl extends BaseSourceImpl
     MetricsRecordBuilder mrb = collector.addRecord(metricsName);
 
     if (regionSources != null) {
-      Lock l = lock.readLock();
-      l.lock();
-      try {
-        for (MetricsRegionSource regionMetricSource : regionSources) {
-          if (regionMetricSource instanceof MetricsRegionSourceImpl) {
-            ((MetricsRegionSourceImpl) regionMetricSource).snapshot(mrb, all);
-          }
+      for (MetricsRegionSource regionMetricSource : regionSources) {
+        if (regionMetricSource instanceof MetricsRegionSourceImpl) {
+          ((MetricsRegionSourceImpl) regionMetricSource).snapshot(mrb, all);
         }
-        mrb.addGauge(Interns.info(NUM_REGIONS, NUMBER_OF_REGIONS_DESC), regionSources.size());
-      } finally {
-        l.unlock();
       }
+      mrb.addGauge(Interns.info(NUM_REGIONS, NUMBER_OF_REGIONS_DESC), regionSources.size());
       metricsRegistry.snapshot(mrb, all);
     }
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/909e2fe5/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
----------------------------------------------------------------------
diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
index 7290c56..5b3e6c4 100644
--- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
+++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionSourceImpl.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hbase.regionserver;
 
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -39,12 +40,7 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
 
   private static final Log LOG = LogFactory.getLog(MetricsRegionSourceImpl.class);
 
-  private boolean closed = false;
-
-  // lock to ensure that lock and pushing metrics can't race.
-  // When changing or acting on the closed boolean this lock must be held.
-  // The write lock must be held when changing closed.
-  private final ReadWriteLock readWriteLock = new ReentrantReadWriteLock(false);
+  private AtomicBoolean closed = new AtomicBoolean(false);
 
   // Non-final so that we can null out the wrapper
   // This is just paranoia. We really really don't want to
@@ -109,16 +105,20 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource
{
 
   @Override
   public void close() {
-    Lock lock = readWriteLock.writeLock();
-    lock.lock();
-    try {
-      if (closed) {
-        return;
-      }
+    boolean wasClosed = closed.getAndSet(false);
+
+    // Has someone else already closed this for us?
+    if (wasClosed) {
+      return;
+    }
 
-      closed = true;
-      agg.deregister(this);
+    // Before removing the metrics remove this region from the aggregate region bean.
+    // This should mean that it's unlikely that snapshot and close happen at the same time.
+    agg.deregister(this);
 
+    // While it's un-likely that snapshot and close happen at the same time it's still possible.
+    // So grab the lock to ensure that all calls to snapshot are done before we remove the
metrics
+    synchronized (this) {
       if (LOG.isTraceEnabled()) {
         LOG.trace("Removing region Metrics: " + regionWrapper.getRegionName());
       }
@@ -133,10 +133,6 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
       registry.removeHistogramMetrics(regionScanNextKey);
 
       regionWrapper = null;
-
-      JmxCacheBuster.clearJmxCache();
-    } finally {
-      lock.unlock();
     }
   }
 
@@ -186,13 +182,23 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource
{
   }
 
   void snapshot(MetricsRecordBuilder mrb, boolean ignored) {
-    Lock lock = readWriteLock.readLock();
 
-    // Grab the read lock.
-    // This ensures that
-    lock.lock();
-    try {
-      if (closed) {
+    // If there is a close that started be double extra sure
+    // that we're not getting any locks and not putting data
+    // into the metrics that should be removed. So early out
+    // before even getting the lock.
+    if (closed.get()) {
+      return;
+    }
+
+    // Grab the read
+    // This ensures that removes of the metrics
+    // can't happen while we are putting them back in.
+    synchronized (this) {
+
+      // It's possible that a close happened between checking
+      // the closed variable and getting the lock.
+      if (closed.get()) {
         return;
       }
 
@@ -254,8 +260,6 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
             MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS_DESC + "99th percentile:
"), ds
             .getPercentile(99d) / 1000);
       }
-    } finally {
-      lock.unlock();
     }
   }
 


Mime
View raw message