zookeeper-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From iv...@apache.org
Subject svn commit: r1575246 - in /zookeeper/bookkeeper/trunk: CHANGES.txt bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java
Date Fri, 07 Mar 2014 12:30:54 GMT
Author: ivank
Date: Fri Mar  7 12:30:54 2014
New Revision: 1575246

URL: http://svn.apache.org/r1575246
Log:
BOOKKEEPER-715: bookie: delay dropping journal cached pages (sijie via ivank)

Modified:
    zookeeper/bookkeeper/trunk/CHANGES.txt
    zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java
    zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java

Modified: zookeeper/bookkeeper/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/CHANGES.txt?rev=1575246&r1=1575245&r2=1575246&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/CHANGES.txt (original)
+++ zookeeper/bookkeeper/trunk/CHANGES.txt Fri Mar  7 12:30:54 2014
@@ -170,6 +170,8 @@ Trunk (unreleased changes)
 
         BOOKKEEPER-717: journal should look forward to group time-out entries (sijie via
ivank)
 
+        BOOKKEEPER-715: bookie: delay dropping journal cached pages (sijie via ivank)
+
       hedwig-server:
 
         BOOKKEEPER-601: readahead cache size isn't updated correctly (sijie via fpj)

Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java?rev=1575246&r1=1575245&r2=1575246&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java
Fri Mar  7 12:30:54 2014
@@ -45,6 +45,7 @@ class JournalChannel implements Closeabl
     private final static Logger LOG = LoggerFactory.getLogger(JournalChannel.class);
 
     final RandomAccessFile randomAccessFile;
+    final int fd;
     final FileChannel fc;
     final BufferedChannel bc;
     final int formatVersion;
@@ -53,6 +54,7 @@ class JournalChannel implements Closeabl
     final byte[] MAGIC_WORD = "BKLG".getBytes(UTF_8);
 
     private final static int START_OF_FILE = -12345;
+    private static long CACHE_DROP_LAG_BYTES = 8 * 1024 * 1024;
 
     int HEADER_SIZE = 8; // 4byte magic word, 4 byte version
     int MIN_COMPAT_JOURNAL_FORMAT_VERSION = 1;
@@ -62,8 +64,8 @@ class JournalChannel implements Closeabl
     private boolean fRemoveFromPageCache;
     public final static ByteBuffer zeros = ByteBuffer.allocate(512);
 
-    // The position of the file channel's last force write.
-    private long lastForceWritePosition = 0;
+    // The position of the file channel's last drop position
+    private long lastDropPosition = 0L;
 
     // Mostly used by tests
     JournalChannel(File journalDirectory, long logId) throws IOException {
@@ -159,10 +161,8 @@ class JournalChannel implements Closeabl
             } catch (IOException e) {
                 LOG.error("Bookie journal file can seek to position :", e);
             }
-
-            // Anything we read has been force written
-            lastForceWritePosition = fc.position();
         }
+        this.fd = NativeIO.getSysFileDescriptor(randomAccessFile.getFD());
     }
 
     int getFormatVersion() {
@@ -198,14 +198,25 @@ class JournalChannel implements Closeabl
             LOG.debug("Journal ForceWrite");
         }
         long newForceWritePosition = bc.forceWrite(forceMetadata);
-        if (newForceWritePosition > lastForceWritePosition) {
-            if (fRemoveFromPageCache) {
-                NativeIO.bestEffortRemoveFromPageCache(randomAccessFile.getFD(),
-                    lastForceWritePosition, (int)(newForceWritePosition - lastForceWritePosition));
-            }
-            synchronized (this) {
-                lastForceWritePosition = newForceWritePosition;
+        //
+        // For POSIX_FADV_DONTNEED, we want to drop from the beginning
+        // of the file to a position prior to the current position.
+        //
+        // The CACHE_DROP_LAG_BYTES is to prevent dropping a page that will
+        // be appended again, which would introduce random seeking on journal
+        // device.
+        //
+        // <======== drop ==========>
+        //                           <-----------LAG------------>
+        // +------------------------+---------------------------O
+        // lastDropPosition     newDropPos             lastForceWritePosition
+        //
+        if (fRemoveFromPageCache) {
+            long newDropPos = newForceWritePosition - CACHE_DROP_LAG_BYTES;
+            if (lastDropPosition < newDropPos) {
+                NativeIO.bestEffortRemoveFromPageCache(fd, lastDropPosition, newDropPos -
lastDropPosition);
             }
+            this.lastDropPosition = newDropPos;
         }
     }
 }

Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java?rev=1575246&r1=1575245&r2=1575246&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java
Fri Mar  7 12:30:54 2014
@@ -30,12 +30,11 @@ public final class NativeIO {
 
     private static final int POSIX_FADV_DONTNEED = 4; /* fadvise.h */
 
-    private static boolean initializationAttempted = false;
     private static boolean initialized = false;
+    private static boolean fadvisePossible = true;
 
-    private static void onDemandInitialization() {
+    static {
         try {
-            initializationAttempted = true;
             Native.register("c");
             initialized = true;
         } catch (NoClassDefFoundError e) {
@@ -48,7 +47,7 @@ public final class NativeIO {
     }
 
     // fadvice
-    public static native int posix_fadvise(int fd, long offset, int len, int flag) throws
LastErrorException;
+    public static native int posix_fadvise(int fd, long offset, long len, int flag) throws
LastErrorException;
 
     private NativeIO() {}
 
@@ -72,8 +71,7 @@ public final class NativeIO {
      * @param descriptor - FileDescriptor object to get fd from
      * @return file descriptor, -1 or error
      */
-    private static int getSysFileDescriptor(FileDescriptor descriptor) {
-        // field would not be null due to 'assert false' in getFieldByReflection
+    public static int getSysFileDescriptor(FileDescriptor descriptor) {
         Field field = getFieldByReflection(descriptor.getClass(), "fd");
         try {
             return field.getInt(descriptor);
@@ -88,44 +86,33 @@ public final class NativeIO {
      * Remove pages from the file system page cache when they wont
      * be accessed again
      *
-     * @param fileDescriptor     The file descriptor of the source file.
+     * @param fd     The file descriptor of the source file.
      * @param offset The offset within the file.
      * @param len    The length to be flushed.
      *
      * @throws nothing => Best effort
      */
 
-    public static void bestEffortRemoveFromPageCache(FileDescriptor fileDescriptor, long
offset, int len) {
-        if (!initializationAttempted) {
-            onDemandInitialization();
-        }
-
-        if (!initialized) {
-            return;
-        }
-
-        int sysFileDesc = getSysFileDescriptor(fileDescriptor);
-
-        if (sysFileDesc < 0) {
+    public static void bestEffortRemoveFromPageCache(int fd, long offset, long len) {
+        if (!initialized || !fadvisePossible || fd < 0) {
             return;
         }
-
         try {
-            if (System.getProperty("os.name").toLowerCase().contains("linux")) {
-                posix_fadvise(sysFileDesc, offset, len, POSIX_FADV_DONTNEED);
-            } else {
-                LOG.debug("posix_fadvise skipped on file descriptor {}, offset {}", fileDescriptor,
offset);
-            }
-        } catch (UnsatisfiedLinkError e) {
+            posix_fadvise(fd, offset, len, POSIX_FADV_DONTNEED);
+        } catch (UnsupportedOperationException uoe) {
+            LOG.warn("posix_fadvise is not supported : ", uoe);
+            fadvisePossible = false;
+        } catch (UnsatisfiedLinkError ule) {
             // if JNA is unavailable just skipping Direct I/O
             // instance of this class will act like normal RandomAccessFile
-            LOG.warn("Unsatisfied Link error: posix_fadvise failed on file descriptor {},
offset {}",
-                fileDescriptor, offset);
+            LOG.warn("Unsatisfied Link error: posix_fadvise failed on file descriptor {},
offset {} : ",
+                    new Object[] { fd, offset, ule });
+            fadvisePossible = false;
         } catch (Exception e) {
             // This is best effort anyway so lets just log that there was an
             // exception and forget
-            LOG.warn("Unknown exception: posix_fadvise failed on file descriptor {}, offset
{}",
-                fileDescriptor, offset);
+            LOG.warn("Unknown exception: posix_fadvise failed on file descriptor {}, offset
{} : ",
+                    new Object[] { fd, offset, e });
         }
     }
 



Mime
View raw message