accumulo-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] ivakegg closed pull request #368: ACCUMULO-4777: fix log messages
Date Thu, 01 Jan 1970 00:00:00 GMT
ivakegg closed pull request #368: ACCUMULO-4777: fix log messages
URL: https://github.com/apache/accumulo/pull/368
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/fate/src/main/java/org/apache/accumulo/fate/zookeeper/Retry.java b/fate/src/main/java/org/apache/accumulo/fate/zookeeper/Retry.java
index 1f55d72fe6..d710e2cb8b 100644
--- a/fate/src/main/java/org/apache/accumulo/fate/zookeeper/Retry.java
+++ b/fate/src/main/java/org/apache/accumulo/fate/zookeeper/Retry.java
@@ -19,6 +19,8 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * Encapsulates the retrying implementation for some operation. Provides bounded retry attempts
with a bounded, linear backoff.
  */
@@ -30,6 +32,9 @@
   private long maxRetries, maxWait, waitIncrement;
   private long retriesDone, currentWait;
 
+  private long logInterval;
+  private long lastRetryLog;
+
   /**
    * @param maxRetries
    *          Maximum times to retry or MAX_RETRY_DISABLED if no maximum
@@ -39,13 +44,18 @@
    *          The maximum wait (ms)
    * @param waitIncrement
    *          The amount of time (ms) to increment next wait time by
+   * @param logInterval
+   *          The amount of time (ms) between logging retries
    */
-  public Retry(long maxRetries, long startWait, long waitIncrement, long maxWait) {
+
+  public Retry(long maxRetries, long startWait, long waitIncrement, long maxWait, long logInterval)
{
     this.maxRetries = maxRetries;
     this.maxWait = maxWait;
     this.waitIncrement = waitIncrement;
     this.retriesDone = 0l;
     this.currentWait = startWait;
+    this.logInterval = logInterval;
+    this.lastRetryLog = -1;
   }
 
   /**
@@ -55,47 +65,57 @@ public Retry(long maxRetries, long startWait, long waitIncrement, long
maxWait)
    *          The maximum wait (ms)
    * @param waitIncrement
    *          The amount of time (ms) to increment next wait time by
+   * @param logInterval
+   *          The amount of time (ms) between logging retries
    */
-  public Retry(long startWait, long waitIncrement, long maxWait) {
-    this(MAX_RETRY_DISABLED, startWait, waitIncrement, maxWait);
+  public Retry(long startWait, long waitIncrement, long maxWait, long logInterval) {
+    this(MAX_RETRY_DISABLED, startWait, waitIncrement, maxWait, logInterval);
   }
 
   // Visible for testing
+  @VisibleForTesting
   long getMaxRetries() {
     return maxRetries;
   }
 
   // Visible for testing
+  @VisibleForTesting
   long getCurrentWait() {
     return currentWait;
   }
 
   // Visible for testing
+  @VisibleForTesting
   long getWaitIncrement() {
     return waitIncrement;
   }
 
   // Visible for testing
+  @VisibleForTesting
   long getMaxWait() {
     return maxWait;
   }
 
   // Visible for testing
+  @VisibleForTesting
   void setMaxRetries(long maxRetries) {
     this.maxRetries = maxRetries;
   }
 
   // Visible for testing
+  @VisibleForTesting
   void setStartWait(long startWait) {
     this.currentWait = startWait;
   }
 
   // Visible for testing
+  @VisibleForTesting
   void setWaitIncrement(long waitIncrement) {
     this.waitIncrement = waitIncrement;
   }
 
   // Visible for testing
+  @VisibleForTesting
   void setMaxWait(long maxWait) {
     this.maxWait = maxWait;
   }
@@ -104,6 +124,15 @@ public boolean isMaxRetryDisabled() {
     return maxRetries < 0;
   }
 
+  // Visible for testing
+  void setLogInterval(long logInterval) {
+    this.logInterval = logInterval;
+  }
+
+  public long getLogInterval() {
+    return logInterval;
+  }
+
   public boolean canRetry() {
     return isMaxRetryDisabled() || (retriesDone < maxRetries);
   }
@@ -125,7 +154,9 @@ public long retriesCompleted() {
   }
 
   public void waitForNextAttempt() throws InterruptedException {
-    log.debug("Sleeping for " + currentWait + "ms before retrying operation");
+    if (log.isDebugEnabled()) {
+      log.debug("Sleeping for " + currentWait + "ms before retrying operation");
+    }
     sleep(currentWait);
     currentWait = Math.min(maxWait, currentWait + waitIncrement);
   }
@@ -133,4 +164,25 @@ public void waitForNextAttempt() throws InterruptedException {
   protected void sleep(long wait) throws InterruptedException {
     Thread.sleep(wait);
   }
+
+  public void logRetry(Logger log, String message, Throwable t) {
+    // log the first time, and then after every logInterval
+    if (lastRetryLog < 0 || (System.currentTimeMillis() - lastRetryLog) > logInterval)
{
+      log.warn(getMessage(message), t);
+      lastRetryLog = System.currentTimeMillis();
+    }
+  }
+
+  public void logRetry(Logger log, String message) {
+    // log the first time, and then after every logInterval
+    if (lastRetryLog < 0 || (System.currentTimeMillis() - lastRetryLog) > logInterval)
{
+      log.warn(getMessage(message));
+      lastRetryLog = System.currentTimeMillis();
+    }
+  }
+
+  private String getMessage(String message) {
+    return message + ", retrying attempt " + (retriesDone + 1) + " (suppressing retry messages
for " + logInterval + "ms)";
+  }
+
 }
diff --git a/fate/src/main/java/org/apache/accumulo/fate/zookeeper/RetryFactory.java b/fate/src/main/java/org/apache/accumulo/fate/zookeeper/RetryFactory.java
index aa6da20fa8..e56eddc8f5 100644
--- a/fate/src/main/java/org/apache/accumulo/fate/zookeeper/RetryFactory.java
+++ b/fate/src/main/java/org/apache/accumulo/fate/zookeeper/RetryFactory.java
@@ -21,9 +21,12 @@
  */
 public class RetryFactory {
   public static final long DEFAULT_MAX_RETRIES = 10l, DEFAULT_START_WAIT = 250l, DEFAULT_WAIT_INCREMENT
= 250l, DEFAULT_MAX_WAIT = 5000l;
-  public static final RetryFactory DEFAULT_INSTANCE = new RetryFactory(DEFAULT_MAX_RETRIES,
DEFAULT_START_WAIT, DEFAULT_WAIT_INCREMENT, DEFAULT_MAX_WAIT);
+  // default logging interval: 3 minutes
+  public static final long DEFAULT_LOG_INTERVAL = 3 * 60 * 1000;
+  public static final RetryFactory DEFAULT_INSTANCE = new RetryFactory(DEFAULT_MAX_RETRIES,
DEFAULT_START_WAIT, DEFAULT_WAIT_INCREMENT, DEFAULT_MAX_WAIT,
+      DEFAULT_LOG_INTERVAL);
 
-  private final long maxRetries, startWait, maxWait, waitIncrement;
+  private final long maxRetries, startWait, maxWait, waitIncrement, logInterval;
 
   /**
    * Create a retry factor for retries with a limit
@@ -36,12 +39,15 @@
    *          The amount of ms to increment the wait on subsequent retries
    * @param maxWait
    *          The max amount of wait time between retries
+   * @param logInterval
+   *          The amount of time (ms) between logging retries
    */
-  public RetryFactory(long maxRetries, long startWait, long waitIncrement, long maxWait)
{
+  public RetryFactory(long maxRetries, long startWait, long waitIncrement, long maxWait,
long logInterval) {
     this.maxRetries = maxRetries;
     this.startWait = startWait;
     this.maxWait = maxWait;
     this.waitIncrement = waitIncrement;
+    this.logInterval = logInterval;
   }
 
   /**
@@ -54,14 +60,15 @@ public RetryFactory(long maxRetries, long startWait, long waitIncrement,
long ma
    * @param maxWait
    *          The max amount of wait time between retries
    */
-  public RetryFactory(long startWait, long waitIncrement, long maxWait) {
+  public RetryFactory(long startWait, long waitIncrement, long maxWait, long logInterval)
{
     this.maxRetries = Retry.MAX_RETRY_DISABLED;
     this.startWait = startWait;
     this.maxWait = maxWait;
     this.waitIncrement = waitIncrement;
+    this.logInterval = logInterval;
   }
 
   public Retry create() {
-    return new Retry(maxRetries, startWait, waitIncrement, maxWait);
+    return new Retry(maxRetries, startWait, waitIncrement, maxWait, logInterval);
   }
 }
diff --git a/fate/src/test/java/org/apache/accumulo/fate/zookeeper/RetryFactoryTest.java b/fate/src/test/java/org/apache/accumulo/fate/zookeeper/RetryFactoryTest.java
index 9ba19a4e67..cc5d4bc647 100644
--- a/fate/src/test/java/org/apache/accumulo/fate/zookeeper/RetryFactoryTest.java
+++ b/fate/src/test/java/org/apache/accumulo/fate/zookeeper/RetryFactoryTest.java
@@ -26,26 +26,28 @@
 
   @Test
   public void properArgumentsInRetry() {
-    long maxRetries = 10, startWait = 50l, maxWait = 5000l, waitIncrement = 500l;
-    RetryFactory factory = new RetryFactory(maxRetries, startWait, waitIncrement, maxWait);
+    long maxRetries = 10, startWait = 50l, maxWait = 5000l, waitIncrement = 500l, logInterval
= 10000l;
+    RetryFactory factory = new RetryFactory(maxRetries, startWait, waitIncrement, maxWait,
logInterval);
     Retry retry = factory.create();
 
     Assert.assertEquals(maxRetries, retry.getMaxRetries());
     Assert.assertEquals(startWait, retry.getCurrentWait());
     Assert.assertEquals(maxWait, retry.getMaxWait());
     Assert.assertEquals(waitIncrement, retry.getWaitIncrement());
+    Assert.assertEquals(logInterval, retry.getLogInterval());
   }
 
   @Test
   public void properArgumentsInUnlimitedRetry() {
-    long startWait = 50l, maxWait = 5000l, waitIncrement = 500l;
-    RetryFactory factory = new RetryFactory(startWait, waitIncrement, maxWait);
+    long startWait = 50l, maxWait = 5000l, waitIncrement = 500l, logInterval = 10000l;
+    RetryFactory factory = new RetryFactory(startWait, waitIncrement, maxWait, logInterval);
     Retry retry = factory.create();
 
     Assert.assertEquals(Retry.MAX_RETRY_DISABLED, retry.getMaxRetries());
     Assert.assertEquals(startWait, retry.getCurrentWait());
     Assert.assertEquals(maxWait, retry.getMaxWait());
     Assert.assertEquals(waitIncrement, retry.getWaitIncrement());
+    Assert.assertEquals(logInterval, retry.getLogInterval());
   }
 
 }
diff --git a/fate/src/test/java/org/apache/accumulo/fate/zookeeper/RetryTest.java b/fate/src/test/java/org/apache/accumulo/fate/zookeeper/RetryTest.java
index 6bbd1ffb80..fddef47ea6 100644
--- a/fate/src/test/java/org/apache/accumulo/fate/zookeeper/RetryTest.java
+++ b/fate/src/test/java/org/apache/accumulo/fate/zookeeper/RetryTest.java
@@ -20,6 +20,7 @@
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
+import org.slf4j.Logger;
 
 /**
  *
@@ -27,15 +28,15 @@
 public class RetryTest {
 
   private Retry retry;
-  long initialWait = 1000l, waitIncrement = 1000l, maxRetries = 5;
+  long initialWait = 1000l, waitIncrement = 1000l, maxRetries = 5, logInterval = 1000l;
   private Retry unlimitedRetry1;
   private Retry unlimitedRetry2;
 
   @Before
   public void setup() {
-    retry = new Retry(maxRetries, initialWait, waitIncrement, maxRetries * 1000l);
-    unlimitedRetry1 = new Retry(initialWait, waitIncrement, maxRetries * 1000l);
-    unlimitedRetry2 = new Retry(-10, initialWait, waitIncrement, maxRetries * 1000l);
+    retry = new Retry(maxRetries, initialWait, waitIncrement, maxRetries * 1000l, logInterval);
+    unlimitedRetry1 = new Retry(initialWait, waitIncrement, maxRetries * 1000l, logInterval);
+    unlimitedRetry2 = new Retry(-10, initialWait, waitIncrement, maxRetries * 1000l, logInterval);
   }
 
   @Test
@@ -147,4 +148,27 @@ public void testUnlimitedRetry() {
       unlimitedRetry2.useRetry();
     }
   }
+
+  @Test
+  public void testLogging() {
+    Logger testLogger = EasyMock.createMock(Logger.class);
+    testLogger.warn(EasyMock.anyObject(String.class));
+    EasyMock.expectLastCall().times(4, 6);
+    EasyMock.replay(testLogger);
+
+    // we want to do this for 5 second and observe the log messages
+    long start = System.currentTimeMillis();
+    long end = System.currentTimeMillis();
+    int i = 0;
+    for (; (end - start < 5000l) && (i < Integer.MAX_VALUE); i++) {
+      unlimitedRetry1.logRetry(testLogger, "failure message");
+      unlimitedRetry1.useRetry();
+      end = System.currentTimeMillis();
+    }
+
+    // now observe what log messages we got which should be around 5 +- 1
+    EasyMock.verify(testLogger);
+    Assert.assertTrue(i > 10);
+
+  }
 }
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
index f20e145522..7809e2e2b1 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java
@@ -355,9 +355,10 @@ public void run() {
     // Tolerate `toleratedWalCreationFailures` failures, waiting `walFailureRetryIncrement`
milliseconds after the first failure,
     // incrementing the next wait period by the same value, for a maximum of `walFailureRetryMax`
retries.
     final RetryFactory walCreationRetryFactory = new RetryFactory(toleratedWalCreationFailures,
walFailureRetryIncrement, walFailureRetryIncrement,
-        walFailureRetryMax);
+        walFailureRetryMax, RetryFactory.DEFAULT_LOG_INTERVAL);
     // Tolerate infinite failures for the write, however backing off the same as for creation
failures.
-    final RetryFactory walWritingRetryFactory = new RetryFactory(walFailureRetryIncrement,
walFailureRetryIncrement, walFailureRetryMax);
+    final RetryFactory walWritingRetryFactory = new RetryFactory(walFailureRetryIncrement,
walFailureRetryIncrement, walFailureRetryMax,
+        RetryFactory.DEFAULT_LOG_INTERVAL);
 
     logger = new TabletServerLogger(this, walogMaxSize, syncCounter, flushCounter, walCreationRetryFactory,
walWritingRetryFactory, walogMaxAge);
     this.resourceManager = new TabletServerResourceManager(this, fs);
diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
index 1487199cae..3843210b3a 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
@@ -338,9 +338,10 @@ private void write(final Collection<CommitSession> sessions, boolean
mincFinish,
           success = (currentLogSet == logSetId.get());
         }
       } catch (DfsLogger.LogClosedException ex) {
-        log.debug("Logs closed while writing, retrying attempt " + writeRetry.retriesCompleted());
+        writeRetry.logRetry(log, "Logs closed while writing");
       } catch (Exception t) {
-        log.warn("Failed to write to WAL, retrying attempt " + writeRetry.retriesCompleted(),
t);
+        if (writeRetry.retriesCompleted() != 0)
+          writeRetry.logRetry(log, "Failed to write to WAL", t);
 
         try {
           // Backoff


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message