accumulo-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [accumulo] EdColeman commented on a change in pull request #1391: Include additional details in FATE metrics reported.
Date Wed, 06 Nov 2019 18:19:39 GMT
EdColeman commented on a change in pull request #1391: Include additional details in FATE metrics
reported.
URL: https://github.com/apache/accumulo/pull/1391#discussion_r343252046
 
 

 ##########
 File path: server/master/src/main/java/org/apache/accumulo/master/metrics/fate/FateMetrics.java
 ##########
 @@ -16,61 +16,251 @@
  */
 package org.apache.accumulo.master.metrics.fate;
 
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicReference;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
 
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.fate.AdminUtil;
+import org.apache.accumulo.fate.ReadOnlyTStore;
+import org.apache.accumulo.fate.ZooStore;
+import org.apache.accumulo.fate.zookeeper.IZooReaderWriter;
 import org.apache.accumulo.master.metrics.MasterMetrics;
 import org.apache.accumulo.server.ServerContext;
 import org.apache.hadoop.metrics2.lib.MetricsRegistry;
 import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.data.Stat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class FateMetrics extends MasterMetrics {
 
+  private static final Logger log = LoggerFactory.getLogger(FateMetrics.class);
+
   // limit calls to update fate counters to guard against hammering zookeeper.
   private static final long DEFAULT_MIN_REFRESH_DELAY = TimeUnit.SECONDS.toMillis(10);
+  private long minimumRefreshDelay;
 
-  private volatile long minimumRefreshDelay;
-
-  private final ServerContext context;
+  private static final String FATE_TX_STATE_METRIC_PREFIX = "FateTxState_";
+  private static final String FATE_OP_TYPE_METRIC_PREFIX = "FateTxOpType_";
 
   private final MutableGaugeLong currentFateOps;
   private final MutableGaugeLong zkChildFateOpsTotal;
   private final MutableGaugeLong zkConnectionErrorsTotal;
 
-  private final AtomicReference<FateMetricValues> metricValues;
+  private final Map<String,MutableGaugeLong> fateTypeCounts = new TreeMap<>();
+  private final Map<String,MutableGaugeLong> fateOpCounts = new TreeMap<>();
 
+  /*
+   * lock should be used to guard read and write access to metricValues and the lastUpdate
+   * timestamp.
+   */
+  private final Lock metricsValuesLock = new ReentrantLock();
+  private FateMetricValues metricValues;
   private volatile long lastUpdate = 0;
 
+  private final IZooReaderWriter zooReaderWriter;
+  private final ReadOnlyTStore<FateMetrics> zooStore;
+  private final String fateRootPath;
+
   public FateMetrics(final ServerContext context, final long minimumRefreshDelay) {
     super("Fate", "Fate Metrics", "fate");
 
-    this.context = context;
+    zooReaderWriter = context.getZooReaderWriter();
+    fateRootPath = context.getZooKeeperRoot() + Constants.ZFATE;
+
+    try {
+
+      zooStore = new ZooStore<>(fateRootPath, zooReaderWriter);
+
+    } catch (KeeperException ex) {
+      throw new IllegalStateException(
+          "FATE Metrics - Failed to create zoo store - metrics unavailable", ex);
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+      throw new IllegalStateException(
+          "FATE Metrics - Interrupt received while initializing zoo store");
+    }
 
     this.minimumRefreshDelay = Math.max(DEFAULT_MIN_REFRESH_DELAY, minimumRefreshDelay);
 
-    metricValues = new AtomicReference<>(FateMetricValues.updateFromZookeeper(context,
null));
+    metricsValuesLock.lock();
+    try {
+      metricValues = updateFromZookeeper();
+    } finally {
+      metricsValuesLock.unlock();
+    }
 
     MetricsRegistry registry = super.getRegistry();
+
     currentFateOps = registry.newGauge("currentFateOps", "Current number of FATE Ops", 0L);
     zkChildFateOpsTotal = registry.newGauge("totalFateOps", "Total FATE Ops", 0L);
     zkConnectionErrorsTotal =
         registry.newGauge("totalZkConnErrors", "Total ZK Connection Errors", 0L);
 
+    for (ReadOnlyTStore.TStatus t : ReadOnlyTStore.TStatus.values()) {
+      MutableGaugeLong g = registry.newGauge(FATE_TX_STATE_METRIC_PREFIX + t.name().toUpperCase(),
+          "Transaction count for " + t.name() + " transactions", 0L);
+      fateTypeCounts.put(t.name(), g);
+    }
+  }
+
+  /**
+   * For testing only: allow refresh delay to be set to any value, over riding the enforced
minimum.
+   *
+   * @param minimumRefreshDelay
+   *          set new min refresh value, in seconds.
+   */
+  void overrideRefresh(final long minimumRefreshDelay) {
+    long delay = Math.max(0, minimumRefreshDelay);
+    this.minimumRefreshDelay = TimeUnit.SECONDS.toMillis(delay);
   }
 
   @Override
   protected void prepareMetrics() {
-    FateMetricValues fateMetrics = metricValues.get();
-    long now = System.currentTimeMillis();
-    if ((lastUpdate + minimumRefreshDelay) < now) {
-      fateMetrics = FateMetricValues.updateFromZookeeper(context, fateMetrics);
-      metricValues.set(fateMetrics);
-      lastUpdate = now;
-      // update individual gauges that are reported.
-      currentFateOps.set(fateMetrics.getCurrentFateOps());
-      zkChildFateOpsTotal.set(fateMetrics.getZkFateChildOpsTotal());
-      zkConnectionErrorsTotal.set(fateMetrics.getZkConnectionErrors());
+
+    metricsValuesLock.lock();
+    try {
+
+      long now = System.currentTimeMillis();
+
+      if ((lastUpdate + minimumRefreshDelay) < now) {
+        metricValues = updateFromZookeeper();
+        lastUpdate = now;
+      }
+
+      recordValues();
 
 Review comment:
   The underlying metrics hadoop metric classes track if a value has changed - this had the
effect of always publishing the metrics.  Running tests now to see if only updating when changed
impacts reporting.  Basically, for the use case that I'm familiar with, the metric value should
always be published.  The collector has the option of always collecting - but not sure what
the underlying system is requesting.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message