accumulo-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [accumulo] keith-turner commented on a change in pull request #1391: Include additional details in FATE metrics reported.
Date Wed, 06 Nov 2019 14:58:54 GMT
keith-turner commented on a change in pull request #1391: Include additional details in FATE
metrics reported.
URL: https://github.com/apache/accumulo/pull/1391#discussion_r343140669
 
 

 ##########
 File path: server/master/src/main/java/org/apache/accumulo/master/metrics/fate/FateMetrics.java
 ##########
 @@ -16,61 +16,234 @@
  */
 package org.apache.accumulo.master.metrics.fate;
 
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
 
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.fate.AdminUtil;
+import org.apache.accumulo.fate.ReadOnlyTStore;
+import org.apache.accumulo.fate.ZooStore;
+import org.apache.accumulo.fate.zookeeper.IZooReaderWriter;
 import org.apache.accumulo.master.metrics.MasterMetrics;
 import org.apache.accumulo.server.ServerContext;
 import org.apache.hadoop.metrics2.lib.MetricsRegistry;
 import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.data.Stat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class FateMetrics extends MasterMetrics {
 
+  private static final Logger log = LoggerFactory.getLogger(FateMetrics.class);
+
   // limit calls to update fate counters to guard against hammering zookeeper.
   private static final long DEFAULT_MIN_REFRESH_DELAY = TimeUnit.SECONDS.toMillis(10);
+  private long minimumRefreshDelay;
 
-  private volatile long minimumRefreshDelay;
-
-  private final ServerContext context;
+  private static final String FATE_TX_STATE_METRIC_PREFIX = "FateTxState_";
+  private static final String FATE_OP_TYPE_METRIC_PREFIX = "FateTxOpType_";
 
   private final MutableGaugeLong currentFateOps;
   private final MutableGaugeLong zkChildFateOpsTotal;
   private final MutableGaugeLong zkConnectionErrorsTotal;
 
+  private final Map<String,MutableGaugeLong> fateTypeCounts = new TreeMap<>();
+  private final Map<String,MutableGaugeLong> fateOpCounts = new TreeMap<>();
+
   private final AtomicReference<FateMetricValues> metricValues;
 
   private volatile long lastUpdate = 0;
 
+  private final IZooReaderWriter zooReaderWriter;
+  private final ReadOnlyTStore<FateMetrics> zooStore;
+  private final String fateRootPath;
+
   public FateMetrics(final ServerContext context, final long minimumRefreshDelay) {
     super("Fate", "Fate Metrics", "fate");
 
-    this.context = context;
+    zooReaderWriter = context.getZooReaderWriter();
+    fateRootPath = context.getZooKeeperRoot() + Constants.ZFATE;
+
+    try {
+
+      zooStore = new ZooStore<>(fateRootPath, zooReaderWriter);
+
+    } catch (KeeperException ex) {
+      throw new IllegalStateException(
+          "FATE Metrics - Failed to create zoo store - metrics unavailable", ex);
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+      throw new IllegalStateException(
+          "FATE Metrics - Interrupt received while initializing zoo store");
+    }
 
     this.minimumRefreshDelay = Math.max(DEFAULT_MIN_REFRESH_DELAY, minimumRefreshDelay);
 
-    metricValues = new AtomicReference<>(FateMetricValues.updateFromZookeeper(context,
null));
+    metricValues = new AtomicReference<>(updateFromZookeeper());
 
     MetricsRegistry registry = super.getRegistry();
+
     currentFateOps = registry.newGauge("currentFateOps", "Current number of FATE Ops", 0L);
     zkChildFateOpsTotal = registry.newGauge("totalFateOps", "Total FATE Ops", 0L);
     zkConnectionErrorsTotal =
         registry.newGauge("totalZkConnErrors", "Total ZK Connection Errors", 0L);
 
+    for (ReadOnlyTStore.TStatus t : ReadOnlyTStore.TStatus.values()) {
+      MutableGaugeLong g = registry.newGauge(FATE_TX_STATE_METRIC_PREFIX + t.name().toUpperCase(),
+          "Transaction count for " + t.name() + " transactions", 0L);
+      fateTypeCounts.put(t.name(), g);
+    }
+  }
+
+  /**
+   * For testing only: allow refresh delay to be set to any value, over riding the enforced
minimum.
+   *
+   * @param minimumRefreshDelay
+   *          set new min refresh value, in seconds.
+   */
+  void overrideRefresh(final long minimumRefreshDelay) {
+    long delay = Math.max(0, minimumRefreshDelay);
+    this.minimumRefreshDelay = TimeUnit.SECONDS.toMillis(delay);
   }
 
   @Override
   protected void prepareMetrics() {
-    FateMetricValues fateMetrics = metricValues.get();
+
     long now = System.currentTimeMillis();
+
     if ((lastUpdate + minimumRefreshDelay) < now) {
-      fateMetrics = FateMetricValues.updateFromZookeeper(context, fateMetrics);
-      metricValues.set(fateMetrics);
+      metricValues.set(updateFromZookeeper());
+
       lastUpdate = now;
-      // update individual gauges that are reported.
-      currentFateOps.set(fateMetrics.getCurrentFateOps());
-      zkChildFateOpsTotal.set(fateMetrics.getZkFateChildOpsTotal());
-      zkConnectionErrorsTotal.set(fateMetrics.getZkConnectionErrors());
+
+      recordValues();
     }
   }
 
 Review comment:
   It may be useful to add a comment about why the locking is there.  Even if the comment
just says the locking may be not be needed and was added as a precaution.  Later people may
encounter this lock and think its absolutely necessary for some reason they can not discern.

   
   Adding locks in plugins that are called by an external system and the plugin calls back
out to the external system does have a chance of causing deadlock.  I doubt that is the case
here, but can't be sure w/o fully understanding the threading and locking model of the external
system.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message