accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ktur...@apache.org
Subject [accumulo] branch 1.9 updated: Fix WAL race condition between zookeeper and metadata table (#539)
Date Fri, 29 Jun 2018 16:45:11 GMT
This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch 1.9
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/1.9 by this push:
     new fa20939  Fix WAL race condition between zookeeper and metadata table (#539)
fa20939 is described below

commit fa209394241f2eacf01bfaba1f82fe45a20237d5
Author: Keith Turner <keith@deenlo.com>
AuthorDate: Fri Jun 29 12:45:07 2018 -0400

    Fix WAL race condition between zookeeper and metadata table (#539)
---
 .../org/apache/accumulo/tserver/tablet/Tablet.java  | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
index 1e0ffcc..508e87e 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
@@ -2458,11 +2458,22 @@ public class Tablet implements TabletCommitter {
 
   private Set<DfsLogger> currentLogs = new HashSet<>();
 
-  public synchronized void removeInUseLogs(Set<DfsLogger> candidates) {
-    // remove logs related to minor compacting data
-    candidates.removeAll(otherLogs);
-    // remove logs related to tablets in memory data
-    candidates.removeAll(currentLogs);
+  public void removeInUseLogs(Set<DfsLogger> candidates) {
+    // This lock is held while clearing otherLogs and adding a minc file to metadata table.
Not
+    // holding this lock leads to a small chance of data loss if tserver dies between clearing
+    // otherLogs and adding file to metadata table AND this method was called in the time
between.
+    logLock.lock();
+    try {
+      // acquire locks in same order as other places in code to avoid deadlock
+      synchronized (this) {
+        // remove logs related to minor compacting data
+        candidates.removeAll(otherLogs);
+        // remove logs related to tablets in memory data
+        candidates.removeAll(currentLogs);
+      }
+    } finally {
+      logLock.unlock();
+    }
   }
 
   Set<String> beginClearingUnusedLogs() {


Mime
View raw message