Author: ivank
Date: Fri Mar 7 12:30:54 2014
New Revision: 1575246
URL: http://svn.apache.org/r1575246
Log:
BOOKKEEPER-715: bookie: delay dropping journal cached pages (sijie via ivank)
Modified:
zookeeper/bookkeeper/trunk/CHANGES.txt
zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java
zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java
Modified: zookeeper/bookkeeper/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/CHANGES.txt?rev=1575246&r1=1575245&r2=1575246&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/CHANGES.txt (original)
+++ zookeeper/bookkeeper/trunk/CHANGES.txt Fri Mar 7 12:30:54 2014
@@ -170,6 +170,8 @@ Trunk (unreleased changes)
BOOKKEEPER-717: journal should look forward to group time-out entries (sijie via
ivank)
+ BOOKKEEPER-715: bookie: delay dropping journal cached pages (sijie via ivank)
+
hedwig-server:
BOOKKEEPER-601: readahead cache size isn't updated correctly (sijie via fpj)
Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java?rev=1575246&r1=1575245&r2=1575246&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/JournalChannel.java
Fri Mar 7 12:30:54 2014
@@ -45,6 +45,7 @@ class JournalChannel implements Closeabl
private final static Logger LOG = LoggerFactory.getLogger(JournalChannel.class);
final RandomAccessFile randomAccessFile;
+ final int fd;
final FileChannel fc;
final BufferedChannel bc;
final int formatVersion;
@@ -53,6 +54,7 @@ class JournalChannel implements Closeabl
final byte[] MAGIC_WORD = "BKLG".getBytes(UTF_8);
private final static int START_OF_FILE = -12345;
+ private static long CACHE_DROP_LAG_BYTES = 8 * 1024 * 1024;
int HEADER_SIZE = 8; // 4byte magic word, 4 byte version
int MIN_COMPAT_JOURNAL_FORMAT_VERSION = 1;
@@ -62,8 +64,8 @@ class JournalChannel implements Closeabl
private boolean fRemoveFromPageCache;
public final static ByteBuffer zeros = ByteBuffer.allocate(512);
- // The position of the file channel's last force write.
- private long lastForceWritePosition = 0;
+ // The position of the file channel's last drop position
+ private long lastDropPosition = 0L;
// Mostly used by tests
JournalChannel(File journalDirectory, long logId) throws IOException {
@@ -159,10 +161,8 @@ class JournalChannel implements Closeabl
} catch (IOException e) {
LOG.error("Bookie journal file can seek to position :", e);
}
-
- // Anything we read has been force written
- lastForceWritePosition = fc.position();
}
+ this.fd = NativeIO.getSysFileDescriptor(randomAccessFile.getFD());
}
int getFormatVersion() {
@@ -198,14 +198,25 @@ class JournalChannel implements Closeabl
LOG.debug("Journal ForceWrite");
}
long newForceWritePosition = bc.forceWrite(forceMetadata);
- if (newForceWritePosition > lastForceWritePosition) {
- if (fRemoveFromPageCache) {
- NativeIO.bestEffortRemoveFromPageCache(randomAccessFile.getFD(),
- lastForceWritePosition, (int)(newForceWritePosition - lastForceWritePosition));
- }
- synchronized (this) {
- lastForceWritePosition = newForceWritePosition;
+ //
+ // For POSIX_FADV_DONTNEED, we want to drop from the beginning
+ // of the file to a position prior to the current position.
+ //
+ // The CACHE_DROP_LAG_BYTES is to prevent dropping a page that will
+ // be appended again, which would introduce random seeking on journal
+ // device.
+ //
+ // <======== drop ==========>
+ // <-----------LAG------------>
+ // +------------------------+---------------------------O
+ // lastDropPosition newDropPos lastForceWritePosition
+ //
+ if (fRemoveFromPageCache) {
+ long newDropPos = newForceWritePosition - CACHE_DROP_LAG_BYTES;
+ if (lastDropPosition < newDropPos) {
+ NativeIO.bestEffortRemoveFromPageCache(fd, lastDropPosition, newDropPos -
lastDropPosition);
}
+ this.lastDropPosition = newDropPos;
}
}
}
Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java?rev=1575246&r1=1575245&r2=1575246&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/NativeIO.java
Fri Mar 7 12:30:54 2014
@@ -30,12 +30,11 @@ public final class NativeIO {
private static final int POSIX_FADV_DONTNEED = 4; /* fadvise.h */
- private static boolean initializationAttempted = false;
private static boolean initialized = false;
+ private static boolean fadvisePossible = true;
- private static void onDemandInitialization() {
+ static {
try {
- initializationAttempted = true;
Native.register("c");
initialized = true;
} catch (NoClassDefFoundError e) {
@@ -48,7 +47,7 @@ public final class NativeIO {
}
// fadvice
- public static native int posix_fadvise(int fd, long offset, int len, int flag) throws
LastErrorException;
+ public static native int posix_fadvise(int fd, long offset, long len, int flag) throws
LastErrorException;
private NativeIO() {}
@@ -72,8 +71,7 @@ public final class NativeIO {
* @param descriptor - FileDescriptor object to get fd from
* @return file descriptor, -1 or error
*/
- private static int getSysFileDescriptor(FileDescriptor descriptor) {
- // field would not be null due to 'assert false' in getFieldByReflection
+ public static int getSysFileDescriptor(FileDescriptor descriptor) {
Field field = getFieldByReflection(descriptor.getClass(), "fd");
try {
return field.getInt(descriptor);
@@ -88,44 +86,33 @@ public final class NativeIO {
* Remove pages from the file system page cache when they wont
* be accessed again
*
- * @param fileDescriptor The file descriptor of the source file.
+ * @param fd The file descriptor of the source file.
* @param offset The offset within the file.
* @param len The length to be flushed.
*
* @throws nothing => Best effort
*/
- public static void bestEffortRemoveFromPageCache(FileDescriptor fileDescriptor, long
offset, int len) {
- if (!initializationAttempted) {
- onDemandInitialization();
- }
-
- if (!initialized) {
- return;
- }
-
- int sysFileDesc = getSysFileDescriptor(fileDescriptor);
-
- if (sysFileDesc < 0) {
+ public static void bestEffortRemoveFromPageCache(int fd, long offset, long len) {
+ if (!initialized || !fadvisePossible || fd < 0) {
return;
}
-
try {
- if (System.getProperty("os.name").toLowerCase().contains("linux")) {
- posix_fadvise(sysFileDesc, offset, len, POSIX_FADV_DONTNEED);
- } else {
- LOG.debug("posix_fadvise skipped on file descriptor {}, offset {}", fileDescriptor,
offset);
- }
- } catch (UnsatisfiedLinkError e) {
+ posix_fadvise(fd, offset, len, POSIX_FADV_DONTNEED);
+ } catch (UnsupportedOperationException uoe) {
+ LOG.warn("posix_fadvise is not supported : ", uoe);
+ fadvisePossible = false;
+ } catch (UnsatisfiedLinkError ule) {
// if JNA is unavailable just skipping Direct I/O
// instance of this class will act like normal RandomAccessFile
- LOG.warn("Unsatisfied Link error: posix_fadvise failed on file descriptor {},
offset {}",
- fileDescriptor, offset);
+ LOG.warn("Unsatisfied Link error: posix_fadvise failed on file descriptor {},
offset {} : ",
+ new Object[] { fd, offset, ule });
+ fadvisePossible = false;
} catch (Exception e) {
// This is best effort anyway so lets just log that there was an
// exception and forget
- LOG.warn("Unknown exception: posix_fadvise failed on file descriptor {}, offset
{}",
- fileDescriptor, offset);
+ LOG.warn("Unknown exception: posix_fadvise failed on file descriptor {}, offset
{} : ",
+ new Object[] { fd, offset, e });
}
}
|