hadoop-zookeeper-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maha...@apache.org
Subject svn commit: r685624 [3/3] - in /hadoop/zookeeper/trunk/src/contrib: ./ zkfuse/ zkfuse/src/
Date Wed, 13 Aug 2008 17:59:00 GMT
Added: hadoop/zookeeper/trunk/src/contrib/zkfuse/src/zkfuse.cc
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/trunk/src/contrib/zkfuse/src/zkfuse.cc?rev=685624&view=auto
==============================================================================
--- hadoop/zookeeper/trunk/src/contrib/zkfuse/src/zkfuse.cc (added)
+++ hadoop/zookeeper/trunk/src/contrib/zkfuse/src/zkfuse.cc Wed Aug 13 10:58:59 2008
@@ -0,0 +1,4492 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define FUSE_USE_VERSION 26
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+
+extern "C" {
+#include <fuse.h>
+#include <ulockmgr.h>
+}
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/time.h>
+#ifdef HAVE_SETXATTR
+#include <sys/xattr.h>
+#endif
+
+#include <getopt.h>
+
+#include <iostream>
+#include <sstream>
+#include <map>
+#include <string>
+#include <boost/utility.hpp>
+#include <boost/weak_ptr.hpp>
+
+#include "log.h"
+#include "mutex.h"
+#include "zkadapter.h"
+
+#define ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG
+
+/**
+   Typedef for ZooKeeperAdapter::Data.
+*/
+typedef std::string Data;
+/**
+   Typedef for ZooKeeperAdapter::NodeNames.
+*/
+typedef vector<std::string> NodeNames;
+
+#define MAX_DATA_SIZE 1024;
+
+DEFINE_LOGGER(LOG, "zkfuse");
+
+inline 
+uint64_t millisecsToSecs(uint64_t millisecs)
+{
+    return millisecs / 1000;
+}
+inline
+uint64_t secsToMillisecs(uint64_t secs)
+{
+    return secs * 1000;
+}
+inline
+uint64_t nanosecsToMillisecs(uint64_t nanosecs)
+{
+    return nanosecs * 1000000;
+}
+inline
+uint64_t timespecToMillisecs(const struct timespec & ts)
+{ 
+    return secsToMillisecs(ts.tv_sec) + nanosecsToMillisecs(ts.tv_nsec);
+}
+
+typedef boost::shared_ptr<ZooKeeperAdapter> ZooKeeperAdapterSharedPtr;
+
+/**
+ * ZkFuseCommon - holds immutable configuration objects.
+ *
+ * No locks are required to access these objects.
+ * A ZkFuseCommon instance is considered to be a data object and may be copied.
+ */
+class ZkFuseCommon 
+{
+  private:
+    /**
+      References the ZooKeeperAdapter instance to be used.
+     */
+    ZooKeeperAdapterSharedPtr _zkAdapter;
+    /** 
+      Path to the ZooKeeper root node.
+     */
+    std::string _rootPathName;
+    /**
+      Name used to access data "file" when the ZK node has 
+      children.
+     */
+    std::string _dataFileName;
+    /**
+      Suffix added to path components to force interpretation of 
+      path components as directory. This is usually only required
+      for the last component. For example, ZkFuse may consider
+      a leaf node a regular file, e.g. /a/b/c/leaf. The suffix
+      can be used to create child under this node, e.g.
+      mkdir /a/b/c/leaf{forceDirSuffix}/new_leaf.
+     */
+    std::string _forceDirSuffix;
+    /**
+      Prefix common to all metadata nodes created by ZkFuse.
+     */  
+    std::string _metadataNamePrefix;
+    /**
+      Path component name that identifies a directory metadata node.
+      A directory metadata node is currently empty. It is used by ZkFuse
+      to create a child when mkdir is used. This prevents ZkFuse
+      from interpreting the new child as a regular file.
+     */
+    std::string _dirMetadataName;
+    /**
+      Path component name that identifies a regular file metadata node.
+      A regular metadata node holds metadata required to implement
+      Posix regular file semantics, such as setting mtime.
+     */
+    std::string _regMetadataName;
+    /**
+      Number of not-in-use nodes to cache.
+     */
+    unsigned _cacheSize;
+    /**
+      Assume this userid owns all nodes.
+     */
+    const uid_t _uid;
+    /**
+      Assume this groupid owns all nodes.
+     */
+    const gid_t _gid;
+    /**
+      Blocksize used to calculate number of blocks used for stat.
+     */
+    const unsigned _blkSize;
+
+  public:
+    /**
+      Constructor.
+     */
+    ZkFuseCommon()
+      : _zkAdapter(),
+        _rootPathName("/"),
+        _dataFileName(),
+        _forceDirSuffix(),
+        _metadataNamePrefix(".zkfuse."),
+        _dirMetadataName(_metadataNamePrefix + "dir"),
+        _regMetadataName(_metadataNamePrefix + "file"),
+        _cacheSize(256),
+        _uid(geteuid()),
+        _gid(getegid()),
+        _blkSize(8192)
+    {
+    }
+    /**
+      Get root path name. Always "/".
+      \see _rootPathName
+     */
+    const std::string & getRootPathName() const
+    {
+        return _rootPathName;
+    }
+    /**
+      Get dataFileName - the name for synthesized files to access
+      ZooKeeper node data.
+      \see _dataFileName
+     */
+    const std::string & getDataFileName() const
+    {
+        return _dataFileName;
+    }
+    /**
+      Set dataFileName.
+      \see getDataFileName
+      \see _dataFileName
+     */
+    void setDataFileName(const std::string & dataFileName)
+    {
+        _dataFileName = dataFileName;
+    }
+    /**
+      Get metadataNamePrefix - the common prefix for all ZkFuse created
+      metadata ZooKeeper nodes.
+      \see _metadataNamePrefix
+     */
+    const std::string & getMetadataNamePrefix() const
+    {
+        return _metadataNamePrefix;
+    }
+    /**
+      Get forceDirSuffix - the suffix added to a path component to force
+      the path component to be treated like a directory.
+      \see _forceDirSuffix
+     */
+    const std::string & getForceDirSuffix() const
+    {
+        return _forceDirSuffix;
+    }
+    /**
+      Set forceDirSuffix.
+      \see getForceDirSuffix
+      \see _forceDirSuffix
+     */
+    void setForceDirSuffix(const std::string & forceDirSuffix)
+    {
+        _forceDirSuffix = forceDirSuffix;
+    }
+    /**
+      Get dirMetadataName - path component name of all directory 
+      metadata ZooKeeper nodes. 
+      \see _dirMetadataname
+     */
+    const std::string & getDirMetadataName() const
+    {
+        return _dirMetadataName;
+    }
+    /**
+      Get regMetadataName - path component name of all regular file 
+      metadata ZooKeeper nodes. 
+      \see _regMetadataname
+     */
+    const std::string & getRegMetadataName() const
+    {
+        return _regMetadataName;
+    }
+    /**
+      Get number of not-in-use ZkFuseFile instances to to cache.
+      \see _cacheSize
+     */
+    unsigned getCacheSize() const
+    {
+        return _cacheSize;
+    }
+    /**
+      Set cache size.
+      \see getCacheSize
+      \see _cacheSize
+     */
+    void setCacheSize(unsigned v) 
+    {
+        _cacheSize = v;
+    }
+    /** 
+      Get userid.
+      \see _uid
+     */
+    uid_t getUid() const
+    {
+        return _uid;
+    }
+    /**
+      Get groupid.
+      \see _gid
+     */
+    gid_t getGid() const
+    {
+        return _gid;
+    }
+    /**
+      Get block size.
+      \see _blkSize
+     */
+    unsigned getBlkSize() const
+    {
+        return _blkSize;
+    }
+    /**
+      Get ZooKeeperAdapter.
+      \see _zkAdapter.
+     */
+    const ZooKeeperAdapterSharedPtr & getZkAdapter() const
+    {
+        return _zkAdapter;
+    }
+    /**
+      Set ZooKeeperAdapter.
+      \see _zkAdaptor
+     */
+    void setZkAdapter(const ZooKeeperAdapterSharedPtr & zkAdapter)
+    {
+        _zkAdapter = zkAdapter;
+    }
+};
+
+/**
+  ZkFuseNameType - identifies the type of the ZkFuse path.
+ */
+enum ZkFuseNameType {
+    /**
+      ZkFuse path is not syntheiszed. 
+      ZkFuse should use its default rules to determine the Posix representation
+      of the path.
+     */
+    ZkFuseNameDefaultType = 0, 
+    /**
+      ZkFuse path is synthesized and identifies the data part of a
+      ZooKeeper node, i.e.  Posix regular file semantics is expected.
+     */
+    ZkFuseNameRegType = 1,
+    /**
+      ZkFuse path is synthesized and identifies the chidlren part of a
+      ZooKeeper node, i.e.  Posix directory semantics is expected.
+     */
+    ZkFuseNameDirType = 2
+};
+
+class ZkFuseFile;
+
+typedef ZkFuseFile * ZkFuseFilePtr;
+
+class ZkFuseHandleManagerFactory;
+
+/**
+  ZkFuseHandleManager - keeps track of all the ZkFuseFile instances 
+  allocated by a ZkFuseHandleManager instance and provides them
+  with a handle that can be used by FUSE. 
+
+  It maps a ZooKeeper path to a handle and a handle to a ZkFuse instance.
+  It also implements the methods that takes path names as arguments, such
+  as open, mknod, rmdir, and rename.
+
+  Memory management
+  - References ZkFuseFile instances using regular pointers
+    Smart pointer is not used because reference counts are needed to
+    determine how many time a node is opened as a regular file or
+    directory. This also avoids circular smart pointer references.
+  - Each ZkFuseFile instance holds a reference to its ZkFuseHandleManager
+    using a boost::shared_ptr. This ensures that the ZkFuseHandleManager
+    instance that has the handle for the ZkFuseFile instance does not
+    get garbage collected while the ZkFuseFile instance exists.
+
+  Concurrency control
+  - Except for the immutable ZkFuseCommon, all other member variables
+    are protected by _mutex.
+  - A method in this class can hold _mutex when it directly or
+    indirectly invokes ZkFuseFile methods. A ZkFuseFile method that holds
+    a ZkFuseFile instance _mutex cannot invoke a ZkFuseHandleManager
+    method that acquires the ZkFuseHandleManager instance's _mutex.
+    Otherwise, this may cause a dead lock.
+  - Methods that with names that begin with "_" do not acquire _mutex. 
+    They are usually called by public methods that acquire and hold _mutex.
+ */
+class ZkFuseHandleManager : boost::noncopyable
+{
+  private:
+    /**
+      Typedef of handle, which is an int.
+     */
+    typedef int Handle;
+    /**
+      Typedef of std::map used to map path to handle.
+     */
+    typedef std::map<std::string, Handle> Map;
+    /**
+      Typedef of std::vector used to map handle to ZkFuseFile instances.
+     */
+    typedef std::vector<ZkFuseFilePtr> Files;
+    /**
+      Typedef of std::vector used to hold unused handles.
+     */
+    typedef std::vector<Handle> FreeList;
+    /**
+      Typedef of boost::weak_ptr to the ZkFuseHandleManager instance.
+     */
+    typedef boost::weak_ptr<ZkFuseHandleManager> WeakPtr;
+
+    /* Only ZkFuseHandleManagerFactory can create instances of this class */
+    friend class ZkFuseHandleManagerFactory;
+
+    /**
+      Contains common configuration.
+      Immutable so that it can be accessed without locks.
+     */
+    const ZkFuseCommon _common;
+    /**
+      Maps a path name to a Handle.
+     */
+    Map _map;
+    /**
+      Maps a handle to a ZkFuseFile instances.
+      Also holds pointers to all known ZkFuseFile instances.
+      An element may point to an allocated ZkFuseFile instance or be NULL.
+
+      An allocated ZkFuseFile instance may be in one of the following states:
+      - in-use
+        Currently open, i.e. the ZkFuseFile instance's reference count 
+        greater than 0.
+      - in-cache
+        Not currently open, i.e. the ZkFuseFile instances's 
+        reference count is 0.
+     */
+    Files _files;
+    /**
+      List of free'ed handles.
+     */
+    FreeList _freeList;
+    /**
+      Mutex used to protect this instance.
+     */
+    mutable zkfuse::Mutex _mutex;
+    /**
+      Count of number of in-use entries.
+      It used to calculate number of cached nodes.
+      Number cached nodes is (_files.size() - _numInUse).
+     */
+    unsigned _numInUse;
+    /**
+      WeakPtr to myself.
+     */
+    WeakPtr _thisWeakPtr;
+   
+    /**
+      Obtain a handle for the given path.
+      - If path is not known, then allocate a new handle and increment
+        _numInUse, and set newFile to true. The allocated 
+        ZkFuseFile instance's reference count should be 1.
+      - If path is known, increase the corresponding 
+        ZkFuseFile instance's reference count.
+
+      \return the allocated handle.
+      \param path the path to lookup.
+      \param newFile indicates whether a new handle has been allocated.
+     */
+    Handle allocate(const std::string & path, bool & newFile);
+
+    /**
+      Constructor.
+
+      \param common the immutable common configuration.
+      \param reserve number of elements to pre-allocate for 
+                     _files and _freeList.
+     */
+    ZkFuseHandleManager(
+            const ZkFuseCommon & common, 
+            const unsigned reserve) 
+      : _common(common),
+        _files(), 
+        _freeList(), 
+        _mutex(),
+        _numInUse(0)
+    {
+        _files.reserve(reserve);
+        _files[0] = NULL; /* 0 never allocated */
+        _files.resize(1); 
+        _freeList.reserve(reserve);
+    }
+
+  public:
+    /** 
+      Typedef for boost::shared_ptr for this ZkFuseHandleManager class.
+     */
+    typedef boost::shared_ptr<ZkFuseHandleManager> SharedPtr;
+
+    /**
+      Destructor.
+     */
+    ~ZkFuseHandleManager()
+    {
+    }
+    /** 
+      Get the ZkFuseFile instance for a handle.
+
+      \return the ZkFuseFile instance identified by the handle.
+      \param handle get ZkFuseFile instance for this handle.
+     */
+    ZkFuseFilePtr getFile(Handle handle) const
+    {
+        AutoLock lock(_mutex);
+        return _files[handle];
+    }
+    /**
+      Get the immutable common configuration.
+
+      \return the common configuration instance.
+     */
+    const ZkFuseCommon & getCommon() const
+    {
+        return _common;
+    }
+    /**
+      Deallocate a previously allocated handle.
+      This decrements the reference count of the corresponding
+      ZkFuseFile instance. If the reference count becomes zero,
+      decrement _numInUse. It may also cause the ZkFuseFile instance
+      to be reclaimed if there are too many cached ZkFuseFile instances.
+
+      The ZkFuseFile instance should be reclaimed if the number of
+      unused ZkFuseFile instances exceeds the configured cache size, i.e.
+      (_files.size() - _numInUse) > _common.getCacheSize()
+      and the ZkFuseFile instance has a reference count of zero.
+
+      Reclaiming a ZkFuseFile instance involves removing the ZkFuseFile
+      instance's path to handle mapping from _map and the handle to the 
+      ZkFuseFile instance mapping from _files, adding the handle to 
+      the _freeList, and finally deleting the ZkFuseFile instance.
+
+      \param handle the handle that should be deallocated.
+     */
+    void deallocate(Handle handle);
+    /**
+      Handles ZooKeeper session events.
+      It invokes the known ZkFuseFile instances to let them know
+      that their watches will no longer be valid. 
+     */
+    void eventReceived(const ZKWatcherEvent & event);
+    /**
+      Get data from the specified the ZooKeeper path.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path the path of the ZooKeeper node.
+      \param data return data read.
+     */
+    int getData(const std::string & path, Data & data);
+    /**
+      Set data into the specified ZooKeeper path.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path the path of the ZooKeeper node.
+      \param data the data to be written.
+      \param exists set to true if this path exists.
+      \param doFlush set to true if new data should be flushed to ZooKeeper.
+     */
+    int setData(const std::string & path,
+                const Data & data,
+                bool exists,
+                bool doFlush);
+    /**
+      Create a ZooKeeper node to represent a ZkFuse file or directory.
+
+      \return handle if successful, otherwise return negative errno.
+      \param path to create.
+      \param mode should be either S_IFDIR for directory or 
+                  S_IFREG for regular file.
+      \param mayExist if set and the ZooKeeper node already exist, return
+                      valid handle instead of -EEXIST.
+      \param created returns whether a new ZooKeeper node had been created.
+     */
+    int mknod(const std::string & path, 
+              mode_t mode, 
+              bool mayExist, 
+              bool & created);
+    /**
+      Open a ZooKeeper node.  
+
+      The justCreated argument is used to differentiate if the _deleted flag 
+      of the ZkFuseFile instance is to be trusted  (i.e. the path 
+      does not exist in ZooKeeper.) The _deleted flag is trusted 
+      if the ZkFuseFile instance is known to exist in ZooKeeper after
+      invoking ZooKeeper with the path. 
+      
+      If justCreated is true, then the ZkFuseFile instance was just created. 
+      The ZkFuseFile constructor sets the _deleted flag to true because 
+      path is not known to exist and hence should not be accessed. 
+      The justCreated flag will force the ZkFuseFile instance to invoke 
+      ZooKeeper to determine if the path exists.
+
+      \return handle if successful, otherwise return negative errno.
+      \param path the path to open.
+      \param justCreated indicates if this is newly created ZkFuseFile instance.
+     */
+    int open(const std::string & path, bool justCreated);
+    /**
+      Remove a ZkFuse directory.
+
+      If force is not set, then the ZooKeeper node will be removed only
+      if it has no data and no child nodes except ZkFuse metadata nodes.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path the path to remove.
+      \param force force removal, i.e. bypass checks.
+      */
+    int rmdir(const char * path, bool force = false);
+    /**
+      Make a ZkFuse directory.
+
+      ZkFuse represents a ZooKeeper node with no data and no children 
+      as a regular file. In order to differentiate a newly created
+      directory from an empty regular file, mkdir will create a directory
+      metadata node as a child of the directory.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path the path of the directory to create.
+      \param mode create directory with this mode 
+                  (mode currently not implemented).
+     */
+    int mkdir(const char * path, mode_t mode);
+    /**
+      Remove a ZkFuse regular file.
+
+      A file is the abstraction for the data part of a ZooKeeper node.
+      - If ZkFuse represents a ZooKeeper node as a directory, the data part
+        of the node is represented by synthesizing a name for this file. This
+        synthesized name is visible through readdir if the ZooKeeper node's
+        data is not empty. Removing such a file is done by truncating 
+        the ZooKeeper node's data to 0 length.
+      - If ZkFuse represents a ZooKeeper node as a file, then removing the
+        is done by removing the ZooKeeper node (and its metadata).
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path the path of the file to remove.
+     */
+    int unlink(const char * path);
+    /**
+      Get attributes of a ZkFuse regular file or directory.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param path get attributes for this path
+      \param stbuf store attributes here.
+     */
+    int getattr(const char * path, struct stat & stbuf);
+    /**
+      Rename a ZkFuse regular file.
+
+      It creates a new ZooKeeper node at toPath, copies data and file
+      metadata from the ZooKeeper node at fromPath to the new node, 
+      and deletes the current ZooKeeper node. If the current ZooKeeper 
+      node is not deleted if the new ZooKeeper node cannot be created 
+      or the data copy fails.
+
+      It cannot be used to rename a directory.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param fromPath the current path.
+      \param toPath rename to this path.
+     */
+    int rename(const char * fromPath, const char * toPath);
+    /**
+      Add a child ZooKeeper path to the children information cache
+      of the ZkFuseFile instance that caches the parent ZooKeeper node.
+
+      This is used to add a child path after a new ZooKeeper node has
+      been created to the children information cache of the parent
+      ZooKeeper node. This is needed because waiting for the children
+      changed event to update the cache may result in inconsistent local
+      views of the changes.
+      \see removeChildFromParent
+
+      \parama childPath the path of the child ZooKeeper node.
+     */
+    void addChildToParent(const std::string & childPath) const;
+    /**
+      Remove a child ZooKeeper path from the children information cache
+      of the ZkFuseFile instance that caches the parent ZooKeeper node.
+      
+      For example, this should happen whenever a path is deleted.
+      This child information cache of the parent will eventually be 
+      invalidated by watches. However, the delivery of the children 
+      change event may come after the next access and thus provide 
+      the client with an inconsistent view. One example is that 
+      client deletes the last file in a directory, but the children
+      changed event is not delivered before the client invokes rmdir.
+      to remove the parent. In this case, the rmdir fails because 
+      the cached children information of the parent indicates the 
+      "directory" is not empty.
+
+      \param childPath the path of the child ZooKeeper node.
+     */
+    void removeChildFromParent(const std::string & childPath) const;
+    /**
+      Return the path for the parent of the specified ZooKeeper path.
+
+      \return the parent path.
+      \param childPath the child path.
+     */
+    std::string getParentPath(const std::string & childPath) const;
+    /**
+      Return the ZooKeeper path from a ZkFuse path.
+
+      The ZkFuse path may be a synthesized path. For example, a synthesized
+      path is required to access the data part of a ZooKeeper node's 
+      data when ZkFuse represents the ZooKeeper node as directory. 
+      A synthesized path is also required to create a child ZooKeeper node
+      under a ZooKeeper node that is represented by a regular file.
+
+      \return the ZooKeeper path for path.
+      \param path the ZkFuse path, which may be a synthesized path.
+      \param nameType indicate whether the ZkFuse path is synthesized and
+                      whether the synthesized ZkFuse path identifies a
+                      directory or a regular file.
+     */
+    std::string getZkPath(const char * path, ZkFuseNameType & nameType) const;
+};
+
+/**
+  ZkFuseHandleManagerFactory - factory for ZkFuseHandleManager.
+  
+  This is the only way to create a ZkFuseHandleManager instance. 
+  to make sure that _thisWeakPtr of the instance is intialized 
+  after the instance is created.
+ */
+class ZkFuseHandleManagerFactory
+{
+  public:
+    /**
+      Create an instance of ZkFuseHandleManager.
+      
+      \return the created ZkFuseHandleManager instance.
+      \param common the common configuration.
+      \param reserve initially reserve space for this number of handles.
+     */
+    static ZkFuseHandleManager::SharedPtr create(
+       const ZkFuseCommon & common, 
+       unsigned reserve = 1000)
+    {
+        ZkFuseHandleManager::SharedPtr manager
+            (new ZkFuseHandleManager(common, reserve));
+        manager->_thisWeakPtr = manager;
+        return manager;
+    }
+};
+
+/**
+  ZkFuseAutoHandle - automatically closes handle.
+
+  It holds an opened handle and automatically closes this handle
+  when it is destroyed. This enables code that open a handle
+  to be exception safe.
+ */
+class ZkFuseAutoHandle
+{
+  private:
+    /**
+      Typedef for Handle which is an int.
+     */
+    typedef int Handle;
+    /**
+      Holds a reference to the ZkFuseHandlerManager instance that
+      allocated the handle.
+     */
+    ZkFuseHandleManager::SharedPtr _manager;
+    /**
+      The handle that should be closed when this instance is destroyed.
+      A valid handle has value that is equal or greater than 0.
+      A negative value indicates an error condition, usually the value
+      is a negative errno.
+     */
+    Handle _handle;
+    /**
+      Caches a reference to the ZkFuseFile instance with this handle.
+      This is a performance optimization so that _manager.getFile(_handle) 
+      is only called once when the handle is initialized.
+     */
+    ZkFuseFilePtr _file;
+
+    /**
+      Initialize reference to the ZkFuseFile instance with this handle.
+     */
+    void _initFile()
+    {
+        if (_handle >= 0) {
+            _file = _manager->getFile(_handle);
+        } else {
+            _file = NULL;
+        }
+    }
+
+  public:
+    /**
+      Constructor - takes an previously opened handle.
+
+      \param manager the ZkFuseHandleManager instance who allocated the handle.
+      \param handle the handle.
+     */
+    ZkFuseAutoHandle(
+        const ZkFuseHandleManager::SharedPtr & manager, 
+        int handle)
+      : _manager(manager),
+        _handle(handle),
+        _file()
+    {
+        _initFile();
+    }
+    /**
+      Constructor - open path and remember handle.
+
+      \param manager the ZkFuseHandleManager instance who allocated the handle.
+      \param path open this path and remember its handle in this instance.
+     */
+    ZkFuseAutoHandle( 
+        const ZkFuseHandleManager::SharedPtr & manager, 
+        const std::string & path)
+      : _manager(manager),
+        _handle(_manager->open(path, false)),
+        _file()
+    {
+        _initFile();
+    }
+    /**
+      Constructor - create path and remember handle.
+
+      The creation mode indicates whether the path identifies a regular file
+      or a directory.
+
+      \param manager the ZkFuseHandleManager instance who allocated the handle.
+      \param path create this path and remember its handle in this instance.
+      \param mode the creation mode for the path, should be either
+                  S_IFDIR or S_IFDIR.
+      \param mayExist, if set and the path already exists, 
+                       then the ZkFuseAutoHandle will hold the handle
+                       for the path instead of -EEXIST.
+                       If not set and the path does not exist, then the handle
+                       be -EEXIST.
+     */
+    ZkFuseAutoHandle( 
+        const ZkFuseHandleManager::SharedPtr & manager, 
+        const std::string & path,
+        mode_t mode,
+        bool mayExist)
+      : _manager(manager),
+        _handle(-1),
+        _file()
+    {
+        bool created;
+        _handle = _manager->mknod(path, mode, mayExist, created);
+        _initFile();
+    }
+    /**
+      Destructor - closes the handle.
+     */
+    ~ZkFuseAutoHandle()
+    {
+        reset();
+    }
+    /**
+      Get the handle.
+      \see _handle
+     */
+    int get() const
+    {
+        return _handle;
+    }
+    /**
+      Get the ZkFuseFile instance of the handle.
+      \see _file
+     */
+    ZkFuseFilePtr getFile() const
+    {
+        return _file;
+    }
+    /**
+      Forget the handle, don't close the handle.
+     */
+    void release() 
+    {
+        _handle = -1;
+        _file = NULL;
+    }
+    /**
+      Change the remembered handle.
+
+      It will close the current handle (if valid).
+     */
+    void reset(int handle = -1);
+};
+
+/**
+  ZkFuseStat - C++ wrapper for ZooKeeper Stat.
+
+  This wrapper provides ZooKeeper Stat will constructors that
+  initializes the instance variables of Stat.
+ */
+class ZkFuseStat : public Stat 
+{
+  public:
+    /**
+      Constructor - clear instance variables.
+     */
+    ZkFuseStat() 
+    {
+        clear();
+    }
+    /**
+      Destructor - do nothing.
+     */
+    ~ZkFuseStat()
+    {
+    }
+    /**
+      Clear instance variables.
+     */
+    void clear()
+    {
+        czxid = 0;
+        mzxid = 0;
+        ctime = 0;
+        mtime = 0;
+        version = 0;
+        cversion = 0;
+        aversion = 0;
+    }
+};
+
+/**
+  ZkFuseFile - an instance encapsulates the runtime state of an allocated
+  ZooKeeper node.
+
+  Memory management
+  - Referenced by the ZkFuseHandleManager that created this instance.
+  - Uses boost::shared_ptr to reference the ZkFuseHandleManager that 
+    created this instance. This makes sure that this ZkFuseHandleManager
+    instance cannot be deleted when it has allocated ZkFuseFile instances.
+  - A ZkFuseHandleManager deletes itself if it can be reclaimed.
+    It can be reclaimed if it has no watches, its reference count is zero,
+    and the ZkFuseHandleManager instance would have more than the 
+    configured number of cached ZkFuseFile instances. 
+  - A ZkFuseFile instance cannot be deleted if it has active watches on
+    its ZooKeeper node. When one of its watches fires, the ZkFuseFile
+    instance must exist because one of its methods will be invoked 
+    to process the event. If the ZkFuseFile instance has been deleted,
+    the method will access previously freed memory.
+
+  Concurrency control
+  - _mutex protects the instance variables of an instance.
+  - Callers should assume that a public method will acquire _mutex. 
+  - Methods of this class may not hold _mutex while invoking an
+    ZkFuseHandleManager instance.
+  - Methods that with names that begin with "_" do not acquire _mutex. 
+    They are usually called by public methods that acquire and hold _mutex.
+*/
+class ZkFuseFile : boost::noncopyable
+{
+  public:
+    /**
+      Maximum size for the data part of a ZooKeeper node.
+     */
+    static const unsigned maxDataFileSize = MAX_DATA_SIZE;
+
+  private:
+    /**
+      Mode returned by getattr for a ZkFuse directory.
+     */
+    static const mode_t dirMode = (S_IFDIR | 0777);
+    /**
+      Mode returned by getattr for a ZkFuse regular file.
+     */
+    static const mode_t regMode = (S_IFREG | 0777);
+
+    /**
+      References the ZkFuseHandleManager that created this instance.
+     */
+    ZkFuseHandleManager::SharedPtr _manager;
+    /**
+      Handle for this instance.
+     */
+    const int _handle;
+    /**
+      Path of the ZooKeeper node represented by this instance.
+     */
+    const std::string _path;
+    /**
+      Mutex that protects the instance variables of this instance.
+     */
+    mutable zkfuse::Mutex _mutex;
+    /**
+      Reference count for this instance, i.e. the number of opens 
+      minus the number of closes.
+     */
+    int _refCount;
+    /**
+      Indicates whether the ZooKeeper node exist.
+      This flag allows caching of deleted ZooKeeper node to avoid
+      repeated ZooKeeper lookups for a non-existent path, and avoid
+      using cached information. 
+      
+      Its value is true if 
+      - it is verified to exist (by calling ZooKeeper), or
+      - it is existence is unknown because ZooKeeper has not been
+        invoked to verify its path's existence.
+     */
+    bool _deleted;
+    /**
+      Count of current number directory opens minus directory closes.
+     */
+    int _openDirCount;
+    /**
+      Indicates whether cached children information is valid.
+      
+      It is true if the cached children information is valid.
+     */
+    bool _initializedChildren;
+    /**
+      Indicates whether there is an outstanding children watch.
+
+      It is true if it has an outstanding children watch.
+     */
+    bool _hasChildrenListener;
+    /**
+      Cached children information. 
+
+      The cache is valid if _initializedChildren is true.
+     */
+    NodeNames _children;
+
+    /**
+      Indicates whether the cached data is valid.
+
+      It is true if the cached data and ZooKeeper Stat are valid.
+     */
+    bool _initializedData;
+    /**
+      Indicates whether there is an outstanding data watch.
+
+      It is true if it has an outstanding data watch.
+     */
+    bool _hasDataListener;
+    /**
+      Indicates whether the cached data (_activeData) has been modified.
+
+      It is true if the cached data has been modified.
+     */
+    bool _dirtyData;
+    /**
+      Currently active data.
+
+      To maintain atomicity of updates and emulate Posix semantics, 
+      when a ZkFuse file remains open, the same data will be accessed
+      by the file's clients. The data will be flushed to ZooKeeper when
+      the flush method is called. The flush method may be called
+      explicitly by a client or implicitly when the ZkFuse file is no 
+      longer currently open.
+
+      _activeData and _activeStat stores the data and ZooKeeper Stat
+      that will be accessed by the file's clients.
+
+      If there are changes when the ZkFuse file is open, new data is
+      cached as latest data (by _latestData and _latestStat).
+     */
+    Data _activeData;
+    /**
+      Currently active ZooKeeper Stat.
+      \see _activeData
+     */
+    ZkFuseStat _activeStat;
+    /**
+      Latest data.
+      This is either the same as _activeData or it is newer. It is newer
+      is it has been updated by event triggered by a data watch.
+     */
+    Data _latestData;
+    /**
+      Latest ZooKeeper data.
+      This is either the same as _activeStat or it is newer. It is newer
+      is it has been updated by event triggered by a data watch.
+     */
+    ZkFuseStat _latestStat;
+
+    /**
+      Get userid.
+
+      \return the userid.
+     */
+    uid_t _getUid() const
+    {
+        return _manager->getCommon().getUid();
+    }
+    /**
+      Get groupid.
+
+      \return the groupid.
+     */
+    gid_t _getGid() const
+    {
+        return _manager->getCommon().getGid();
+    }
+    /** 
+      Get block size.
+
+      \return the block size.
+     */
+    unsigned _getBlkSize() const
+    {
+        return _manager->getCommon().getBlkSize();
+    }
+    /**
+      Get number of children, include metadata children in the count.
+
+      \return the number of children including metadata children.
+     */
+    unsigned _numChildrenIncludeMeta() const
+    {
+        unsigned count = _children.size();
+        LOG_DEBUG(LOG, "numChildrenIncludeMeta() returns %u", count);
+        return count;
+    }
+    /**
+      Get number of children, exclude metadata children in the count.
+
+      \return the number of children excluding metadata children.
+     */
+    unsigned _numChildrenExcludeMeta() const
+    {
+        unsigned count = 0;
+        for (NodeNames::const_iterator it = _children.begin();
+             it != _children.end();
+             it++) {
+            if (!_isMeta(*it)) {
+                count++;
+            }
+        }
+        LOG_DEBUG(LOG, "numChildrenExcludeMeta() returns %u", count);
+        return count;
+    }
+    /**
+      Whether the ZooKeeper node has children, include metadata
+      children.
+
+      \return true if it has children including metadata children.
+     */
+    bool _hasChildrenIncludeMeta() const
+    { 
+        return _numChildrenIncludeMeta() != 0;
+    }
+    /**
+      Return true if the ZooKeeper node has children, include metadata
+      children.
+
+      \return true if it has children excluding metadata children.
+     */
+    bool _hasChildrenExcludeMeta() const
+    {
+        return _numChildrenExcludeMeta() != 0;
+    }
+    /**
+      Whether the ZooKeeper node has data.
+
+      \return true if _activeData is not empty.
+     */
+    bool _hasData() const
+    {
+        return _activeData.empty() == false;
+    }
+    /**
+      Whether the ZooKeeper node has child with the specified path.
+
+      \return true if the ZooKeeper node has a child with the specified path.
+      \param childPath the path of the child.
+     */
+    bool _hasChildPath(const std::string & childPath) const
+    {
+        bool hasChild =
+            std::find(_children.begin(), _children.end(), childPath) 
+            != _children.end();
+        LOG_DEBUG(LOG, "hasChild(childPath %s) returns %d", 
+                  childPath.c_str(), hasChild);
+        return hasChild;
+    }
+    /**
+      Whether the given path component is a ZkFuse synthesized path
+      component.
+
+      A ZkFuse synthesized path component will begin with 
+      the metadataNamePrefix obtained from the common configuration.
+      \see _metadataNamePrefix
+
+      \return true if the path component is a ZkFuse synthesized path
+                   component.
+      \param childName the path component to check if it is synthesized by
+                       ZkFuse.
+     */
+    bool _isMeta(const std::string & childName) const
+    {
+        bool isMeta;
+        const std::string & prefix = 
+            _manager->getCommon().getMetadataNamePrefix();
+        unsigned offset = 
+            (_path.length() > 1 ?
+             _path.length() + 1 :
+             1 /* special case for root dir */ ); 
+        unsigned minLength = offset + prefix.length();
+        if (childName.length() < minLength ||
+            childName.compare(offset, prefix.length(), prefix) != 0) {
+            isMeta = false;
+        } else {
+            isMeta = true;
+        }
+        LOG_DEBUG(LOG, "isMeta(childName %s) returns %d", 
+                  childName.c_str(), isMeta);
+        return isMeta;
+    }
+    /**
+      Build a path for a specific child of the ZooKeeper node.
+ 
+      This is done by appending "/" (unless it is the ZooKeeper node
+      is the root node) and the name of the child.
+
+      \return the path for the specified child of the ZooKeeper node.
+      \param name the name of the child.
+     */
+    std::string _getChildPath(const std::string & name) const
+    {
+        return buildChildPath(_path, name);
+    }
+    /**
+      Whether the ZooKeeper node has a regular file metadata child node.
+
+      \return true if the ZooKeeper node has a regular file metadata child
+                   node.
+     */
+    bool _hasRegMetadata() const
+    {
+        bool res = _hasChildPath(
+                _getChildPath(_manager->getCommon().getRegMetadataName()));
+        LOG_DEBUG(LOG, "hasRegMetadata() returns %d", res);
+        return res;
+    }
+    /**
+      Whether the ZooKeeper node has a directory metadata child node.
+
+      \return true if the ZooKeeper node has a directory metadata child
+                   node.
+     */
+    bool _hasDirMetadata() const
+    {
+        bool res = _hasChildPath(
+                _getChildPath(_manager->getCommon().getDirMetadataName()));
+        LOG_DEBUG(LOG, "hasDirMetadata() returns %d", res);
+        return res;
+    }
+    /** 
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse regular
+      file.
+     
+      It should be a ZkFuse regular file it has no children or its 
+      only children is its regular file metadata child node.
+
+      \return true if the Zookeeper node should be presented as a ZkFuse
+                   regular file.
+     */
+    bool _isReg() const
+    {
+        unsigned numChildrenIncludeMeta = _numChildrenIncludeMeta();
+        bool res =
+            (numChildrenIncludeMeta == 0) ||
+            (numChildrenIncludeMeta == 1 && _hasRegMetadata() == true);
+        LOG_DEBUG(LOG, "isReg() returns %d", res);
+        return res;
+    }
+    /**
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse directory.
+     
+      It should be a ZkFuse directory if it should not be presented as
+      a ZkFuse regular directory.
+      \see _isReg
+
+      \return true if the Zookeeper node should be presented as a ZkFuse
+                   directory.
+     */
+    bool _isDir() const 
+    {
+        return !_isReg();
+    }
+    /**
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse regular
+      file by taking into account the specified ZkFuseNameType.
+
+      The ZkFuseNameType may override the default ZkFuse presentation of
+      a ZooKeeper node. 
+
+      \return true if ZkFuse should present the ZooKeeper node as a ZkFuse
+                   regular file.
+      \param nameType specifies the ZkFuseNameType.
+      \param doLock whether _mutex should be acquired, it should be true
+                    if the caller did not acquire _mutex.
+     */
+    bool _isRegNameType(ZkFuseNameType nameType, bool doLock = false) const
+    {
+        bool res;
+        switch (nameType) {
+          case ZkFuseNameRegType:
+            res = true;
+            break;
+          case ZkFuseNameDirType:
+            res = false;
+            break;
+          case ZkFuseNameDefaultType:
+          default: 
+            if (doLock) {
+                AutoLock lock(_mutex);
+                res = _isReg();
+            } else {
+                res = _isReg();
+            }
+            break;
+        }
+        LOG_DEBUG(LOG, "isRegNameType(nameType %d) returns %d", 
+                  int(nameType), res);
+        return res;
+    }
+    /**
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse 
+      directory by taking into account the specified ZkFuseNameType.
+
+      The ZkFuseNameType may override the default ZkFuse presentation of
+      a ZooKeeper node. 
+
+      \return true if ZkFuse should present the ZooKeeper node as a ZkFuse
+                   directory.
+      \param nameType specifies the ZkFuseNameType.
+      \param doLock whether _mutex should be acquired, it should be true
+                    if the caller did not acquire _mutex.
+     */
+    bool _isDirNameType(ZkFuseNameType nameType, bool doLock = false) const
+    {
+        bool res;
+        switch (nameType) {
+          case ZkFuseNameRegType:
+            res = false; 
+            break;
+          case ZkFuseNameDirType:
+            res = true;
+            break;
+          case ZkFuseNameDefaultType:
+          default: 
+            if (doLock) {
+                AutoLock lock(_mutex);
+                res = _isDir();
+            } else {
+                res = _isDir();
+            }
+            break;
+        }
+        LOG_DEBUG(LOG, "isDirNameType(nameType %d) returns %d", 
+                  int(nameType), res);
+        return res;
+    }
+    /**
+      ZkFuse regular file metadata.
+     */
+    struct Metadata {
+        /**
+          Version of the ZooKeeper node data that this metadata is good for.
+         */
+        uint32_t version;
+        /**
+          Acces time in milliseconds.
+         */
+        uint64_t atime;
+        /**
+          Modified time in milliseconds.
+         */
+        uint64_t mtime;
+
+        /**
+          Constructor.
+         */
+        Metadata() 
+          : version(0),
+            atime(0),
+            mtime(0)
+        {
+        }
+    };
+    /**
+      Encode Metadata into Data so that it can be stored in a metadata
+      ZooKeeper node.
+
+      Each Metadata attribute is encoded as "<key>: <value>" on single line
+      terminated by newline.
+
+      \param meta the input Metadata.
+      \param data the output Data after encoding.
+     */
+    void _encodeMetadata(const Metadata & meta, Data & data) const
+    {
+        LOG_DEBUG(LOG, "encodeMetadata()");
+        std::ostringstream oss;
+        oss << "version: " << meta.version << endl
+            << "atime: " << meta.atime << endl
+            << "mtime: " << meta.mtime << endl;
+        data = oss.str();
+    }
+    /**
+      Decode Data from a metadata child ZooKeeper node into Metadata. 
+
+      Data is a stream of "<key>: <value>" records separated by newline.
+
+      \param data the input Data.
+      \param meta the output Metadata after decoding.
+     */
+    void _decodeMetadata(const Data & data, Metadata & meta) const
+    {
+        LOG_DEBUG(LOG, "decodeMetadata(data %s)", data.c_str());
+        std::istringstream iss(data);
+        char key[128];
+        char value[1024];
+        while (!iss.eof()) {
+            key[0] = 0;
+            value[0] = 0;
+            iss.get(key, sizeof(key), ' ');
+            if (iss.eof()) {
+                break;
+            }
+            iss.ignore(32, ' ');
+            iss.getline(value, sizeof(value));
+            LOG_DEBUG(LOG, "key %s value %s", key, value);
+            if (strcmp(key, "version:") == 0) {
+                unsigned long long v = strtoull(value, NULL, 0);
+                LOG_DEBUG(LOG, "version: %llu", v);
+                meta.version = v;
+            }
+            else if (strcmp(key, "atime:") == 0) {
+                unsigned long long v = strtoull(value, NULL, 0);
+                LOG_DEBUG(LOG, "atime: %llu", v);
+                meta.atime = v;
+            }
+            else if (strcmp(key, "mtime:") == 0) {
+                unsigned long long v = strtoull(value, NULL, 0);
+                LOG_DEBUG(LOG, "mtime: %llu", v);
+                meta.mtime = v;
+            }
+            else {
+                LOG_WARN(LOG, "decodeMetadata: path %s unknown key %s %s\n",
+                         _path.c_str(), key, value);
+            }
+        }
+        LOG_DEBUG(LOG, "decodeMetadata done");
+    }
+    /**
+      Flush data to the ZooKeeper node.
+
+      If cached active data has been modified, flush it to the ZooKeeper node.
+      Returns -EIO if the data cannot be written because the cached active
+      data is not the expected version, i.e. ZooKeeper returns ZBADVERSION.
+      -EIO may also indicate a more general failure, such as unable to 
+      communicate with ZooKeeper.
+
+      \return 0 if successful, otherwise negative errno.
+     */
+    int _flush()
+    {
+        LOG_DEBUG(LOG, "flush() path %s", _path.c_str());
+
+        int res = 0;
+        try {
+            if (_dirtyData) {
+                LOG_DEBUG(LOG, "is dirty, active version %d",
+                          _activeStat.version);
+                _manager->getCommon().getZkAdapter()->
+                    setNodeData(_path, _activeData, _activeStat.version);
+                /* assumes version always increments by one if successful */
+                _deleted = false;
+                _activeStat.version++;
+                _dirtyData = false;
+                res = 0;
+            } 
+            else {
+                LOG_DEBUG(LOG, "not dirty");
+                res = 0;
+            }
+        } catch (const ZooKeeperException & e) {
+            if (e.getZKErrorCode() == ZBADVERSION) {
+                LOG_ERROR(LOG, "flush %s bad version, was %d",
+                          _path.c_str(), _activeStat.version);
+                res = -EIO;
+            } 
+            else {
+                LOG_ERROR(LOG, "flush %s exception %s", 
+                          _path.c_str(), e.what());
+                res = -EIO;
+            }
+        }
+
+        LOG_DEBUG(LOG, "flush returns %d", res);
+        return res;
+    }
+    /**
+      Truncate or expand the size of the cached active data.
+
+      This method only changes the size of the cached active data. 
+      This change is committed to ZooKeeper when the cached data 
+      is written to the ZooKeeper node by flush().
+
+      Return -EFBIG is the requested size exceeds the maximum.
+
+      \return 0 if successful, otherwise negative errno.
+      \param size the requested size.
+     */
+    int _truncate(off_t size) 
+    {
+        LOG_DEBUG(LOG, "truncate(size %zu) path %s", size, _path.c_str());
+        
+        int res = 0;
+
+        if (!_isInitialized()) {
+            LOG_DEBUG(LOG, "not initialized");
+            res = -EIO;
+        }
+        else if (size > _activeData.size()) {
+            if (size > maxDataFileSize) {
+                LOG_DEBUG(LOG, "size > maxDataFileSize");
+                res = -EFBIG;
+            } else {
+                LOG_DEBUG(LOG, "increase to size");
+                _activeData.insert(_activeData.begin() + 
+                                   (size - _activeData.size()), 0);
+                _dirtyData = true;
+                res = 0;
+            }
+        }
+        else if (size < _activeData.size()) {
+            LOG_DEBUG(LOG, "decrease to size");
+            _activeData.resize(size);
+            _dirtyData = true;
+            res = 0;
+        }
+        else {
+            LOG_DEBUG(LOG, "do nothing, same size");
+        }
+
+        LOG_DEBUG(LOG, "truncate returns %d", res);
+        return res;
+    }
+    /**
+      Remove a ZkFuse directory.
+
+      If force is true, then the ZooKeeper node and its decendants
+      will be deleted.
+
+      If force is false, then this method implements the semantics
+      of removing a ZkFuse directory. It will delete the ZooKeeper node
+      only if the ZooKeeper node have no data and no non-metadata 
+      children.
+      - Return -ENOTDIR if the ZooKeeper node is not considered
+        to be a directory (after taking into consideration the specified
+        ZkFuseNameType). 
+      - Return -ENOTEMPTY if the ZooKeeper node has data or it has 
+        non-metadata children.
+      - Return -ENOENT if the ZooKeeper cannot be deleted, usually this
+        is because it does not exist.
+
+      \return 0 if successful, otherwise negative errno.
+      \param nameType the ZkFuseNameType of the path used to specify the
+                      directory to be removed. It influences whether ZkFuse
+                      considers the ZooKeeper node to be a regular file or
+                      directory. \see ZkFuseNameType
+      \param force    set to true to bypass ZkFuse rmdir semantic check.
+     */
+    int _rmdir(ZkFuseNameType nameType, bool force)
+    {
+        LOG_DEBUG(LOG, "rmdir(nameType %d, force %d) path %s", 
+                  int(nameType), force, _path.c_str());
+
+        int res = 0;
+        try {
+            if (!force && !_isDirNameType(nameType)) {
+                LOG_DEBUG(LOG, "failed because not directory");
+                res = -ENOTDIR;
+            } 
+            else if (!force && _hasData()) {
+                /* rmdir cannot occur if there non-empty "data file" */
+                LOG_DEBUG(LOG, "failed because node has data");
+                res = -ENOTEMPTY;
+            } 
+            else if (!force && _hasChildrenExcludeMeta()) {
+                /* rmdir cannot occur if there are "subdirs" */
+                LOG_DEBUG(LOG, "failed because node has children");
+                res = -ENOTEMPTY;
+            } 
+            else {
+                LOG_DEBUG(LOG, "delete node");
+                bool deleted = _manager->getCommon().getZkAdapter()->
+                     deleteNode(_path, true);
+                if (deleted) {
+                    _deleted = true;
+                    _clearChildren();
+                    res = 0;
+                } else {
+                    /* TODO: differentiate delete error conditions,
+                     * e.g. access permission, not exists, ... ?
+                     */
+                    LOG_DEBUG(LOG, "delete failed");
+                    res = -ENOENT;
+                }
+            }
+        } catch (const std::exception & e) {
+            LOG_ERROR(LOG, "rmdir %s exception %s", _path.c_str(), e.what());
+            res = -EIO;
+        }
+
+        LOG_DEBUG(LOG, "rmdir returns %d", res);
+        return res;
+    }
+    /**
+      Remove a ZkFuse regular file.
+
+      This method implements the semantics of removing a ZkFuse regular file.
+      - If the ZkFuse regular file represents the data part of the 
+        ZooKeeper node which is presented as a ZkFuse directory, 
+        the regular file is virtually deleted by truncating the
+        ZooKeeper node's data. Readdir will not synthesize a regular 
+        file entry for the data part of a ZooKeeper node if 
+        the ZooKeeper node has no data.
+      - If the ZkFuse regular file represents the data part of the 
+        ZooKeeper node which is presented as a ZkFuse regular file,
+        the ZooKeeper node and its decendants are deleted.
+
+      Returns -EISDIR if the ZkFuse regular file cannot be deleted
+      because ZkFuse consider it to be a directory.
+
+      \return 0 if successful, otherwise negative errno.
+      \param nameType the ZkFuseNameType of the path used to specify the
+                      directory to be removed. It influences whether ZkFuse
+                      considers the ZooKeeper node to be a regular file or
+                      directory. \see ZkFuseNameType
+    */
+    int _unlink(ZkFuseNameType nameType) 
+    {
+        LOG_DEBUG(LOG, "unlink(nameType %d) path %s", 
+                  int(nameType), _path.c_str());
+
+        int res = 0;
+        switch (nameType) {
+          case ZkFuseNameRegType:
+            if (_isDir()) {
+                res = _truncate(0);
+            } else {
+                res = _rmdir(nameType, true);
+            }
+            break;
+          case ZkFuseNameDirType:
+            res = -EISDIR;
+            break;
+          case ZkFuseNameDefaultType:
+          default:
+            if (_isReg()) {
+                res = _rmdir(nameType, true);
+            } else {
+                res = -EISDIR;
+            }
+            break;
+        }
+
+        LOG_DEBUG(LOG, "unlink returns %d", res);
+        return res;
+    }
+    /**
+      Whether cached children and data are valid.
+
+      \return true if cached children and data are valid.
+     */
+    bool _isInitialized() const
+    {
+        return _initializedChildren && _initializedData;
+    }
+    /**
+      Clear and invalidate cached children information.
+     */
+    void _clearChildren()
+    {
+        _initializedChildren = false;
+        _children.clear();
+    }
+    /**
+      Clear and invalidate cached data.
+     */
+    void _clearData() 
+    {
+        _initializedData = false;
+        _dirtyData = false;
+        _activeData.clear();
+        _activeStat.clear();
+        _latestData.clear();
+        _latestStat.clear();
+    }
+    /**
+      Whether the ZkFuseFile instance is a zombie.
+      
+      It is a zombie if it is not currently open, i.e. its reference count
+      is 0.
+     */
+    bool _isZombie() const 
+    {
+        return (_refCount == 0);
+    }
+    /**
+      Whether the ZkFuseFile instance is currently opened as a regular file
+      only once.
+      
+      It is used to determine when the cached data can be replaced with
+      the latest data. \see _activeData.
+      
+      \return true if its currently opened as a regular file only once.
+     */
+    bool _isOnlyRegOpen() const
+    {
+        return ((_refCount - _openDirCount) == 1);
+    }
+    /**
+      Get attributes without accessing metadata.
+      
+      The atime and mtime returned does not take into consideration
+      overrides present in a matadata file.
+
+      \return 0 if successful, otherwise negative errno.
+      \param stbuf return attributes here.
+      \param nameType specifies the ZkFuseNameType of the ZkFuse path used
+                      to get attributes. It influences whether the directory
+                      or regular file attributes are returned.
+     */
+    int _getattrNoMetaAccess(struct stat & stbuf, ZkFuseNameType nameType) const
+    {
+        int res = 0;
+        if (_deleted) {
+            LOG_DEBUG(LOG, "deleted");
+            res = -ENOENT;
+        } 
+        else if (!_isInitialized()) {
+            LOG_DEBUG(LOG, "not initialized");
+            res = -EIO;
+        }
+        else {   
+            assert(_isInitialized());
+            bool isRegular = _isRegNameType(nameType);
+            if (isRegular) {
+                LOG_DEBUG(LOG, "regular");
+                stbuf.st_mode = regMode;
+                stbuf.st_nlink = 1;
+                stbuf.st_size = _activeData.size();
+            } else {
+                LOG_DEBUG(LOG, "directory");
+                stbuf.st_mode = dirMode;
+                stbuf.st_nlink = 
+                    _children.size() + (_activeData.empty() ? 0 : 1);
+                stbuf.st_size = stbuf.st_nlink;
+            }
+            stbuf.st_uid = _getUid();
+            stbuf.st_gid = _getGid();
+            /* IMPORTANT:
+             * Conversion to secs from millisecs must occur before 
+             * assigning to st_atime, st_mtime, and st_ctime. Otherwise
+             * truncating from 64-bit to 32-bit will cause lost of
+             * most significant 32-bits before converting to secs.
+             */
+            stbuf.st_atime = millisecsToSecs(_activeStat.mtime);
+            stbuf.st_mtime = millisecsToSecs(_activeStat.mtime);
+            stbuf.st_ctime = millisecsToSecs(_activeStat.ctime);
+            stbuf.st_blksize = _getBlkSize();
+            stbuf.st_blocks = 
+                (stbuf.st_size + stbuf.st_blksize - 1) / stbuf.st_blksize;
+            res = 0;
+        }
+        return res;
+    }
+    /**
+      Get the context that should be registered with the data and
+      children watches.
+
+      The returned context is a pointer to the ZkFuseFile instance
+      cast to the desired ContextType.
+
+      \return the context.
+     */
+    ZooKeeperAdapter::ContextType _getZkContext() const
+    {
+        return (ZooKeeperAdapter::ContextType) NULL;
+    }
+
+    /**
+      DataListener - listener that listens for ZooKeeper data events
+      and calls dataEventReceived on the ZkFuseFile instance 
+      identified by the event context.
+      \see dataEventReceived
+     */
+    class DataListener : public ZKEventListener {
+      public:
+       /**
+         Received a data event and invoke ZkFuseFile instance obtained from
+         event context to handle the event.
+        */
+        virtual void eventReceived(const ZKEventSource & source,
+                                   const ZKWatcherEvent & event)
+        {
+            assert(event.getContext() != 0);
+            ZkFuseFile * file = static_cast<ZkFuseFile *>(event.getContext());
+            file->dataEventReceived(event);
+        }
+    };
+    
+    /**
+      DataListener - listener that listens for ZooKeeper children events
+      and calls childrenEventReceived on the ZkFuseFile instance 
+      identified by the event context.
+      \see childrenEventReceived
+     */
+    class ChildrenListener : public ZKEventListener {
+      public:
+       /**
+         Received a children event and invoke ZkFuseFile instance obtained from
+         event context to handle the event.
+        */
+        virtual void eventReceived(const ZKEventSource & source,
+                                   const ZKWatcherEvent & event)
+        {
+            assert(event.getContext() != 0);
+            ZkFuseFile * file = static_cast<ZkFuseFile *>(event.getContext());
+            file->childrenEventReceived(event);
+        }
+    };
+    
+    /**
+      Globally shared DataListener. 
+     */
+    static DataListener _dataListener;
+    /**
+      Globally shared ChildrenListener. 
+     */
+    static ChildrenListener _childrenListener;
+
+  public:
+    /**
+      Constructor.
+
+      Sets reference count to one, i.e. it has been constructed because
+      a client is trying to open the path. \see _refCount.
+      Sets deleted to true. \see _deleted.
+      Sets number of currently directory opens to zero. \see _openDirCount.
+      Invalidate cach for children information and data. 
+
+      \param manager the ZkFuseHandleManager instance who is creating this 
+                     ZkFuseFile instance.
+      \param handle  the handle assigned by the ZkFuseHandleManager instance
+                     for this ZkFuseFile instance.
+      \param path    the ZooKeeper path represented by this ZkFuseFile instance.
+     */
+    ZkFuseFile(const ZkFuseHandleManager::SharedPtr & manager,
+               const int handle,
+               const std::string & path)
+      : _manager(manager),
+        _handle(handle),
+        _path(path),
+        _mutex(),
+        _refCount(1),
+        _deleted(true),
+        /* children stuff */
+        _openDirCount(0),
+        _initializedChildren(false),
+        _hasChildrenListener(false),
+        _children(),
+        /* data stuff */
+        _initializedData(false),
+        _hasDataListener(false),
+        _dirtyData(false), 
+        _activeData(),
+        _activeStat(),
+        _latestData(),
+        _latestStat()
+    {
+        LOG_DEBUG(LOG, "constructor() path %s", _path.c_str());
+    }
+    /**
+      Destructor.
+     */
+    ~ZkFuseFile()
+    {
+        LOG_DEBUG(LOG, "destructor() path %s", _path.c_str());
+
+        assert(_isZombie());
+        _clearChildren();
+        _clearData();
+    }
+    /**
+      Whether the ZooKeeper node represented by this ZkFuseFile instance
+      has been deleted.
+      \see _deleted
+
+      \return true if it is deleted.
+     */
+    bool isDeleted() const 
+    { 
+        AutoLock lock(_mutex);
+        return _deleted;
+    }
+    /**
+      Return the path of the ZooKeeper node represented by this ZkFuseFile
+      instance.
+      \see _path.
+
+      \return the ZooKeeper node's path.
+     */
+    const string & getPath() const 
+    {
+        return _path;
+    }
+    /**
+      Add a childPath to the children information cache.
+      
+      \return 0 if successful, otherwise return negative errno.
+      \param childPath the ZooKeeper path of the child.
+     */
+    int addChild(const std::string & childPath) 
+    {
+        LOG_DEBUG(LOG, "addChild(childPath %s) path %s", 
+                  childPath.c_str(), _path.c_str());
+
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+            if (_initializedChildren) {
+                NodeNames::iterator it = 
+                    std::find(_children.begin(), _children.end(), childPath);
+                if (it == _children.end()) {
+                    LOG_DEBUG(LOG, "child not found, adding child path");
+                    _children.push_back(childPath);
+                    res = 0;
+                } 
+                else {
+                    LOG_DEBUG(LOG, "child found");
+                    res = -EEXIST;
+                }
+            }
+        }
+        
+        LOG_DEBUG(LOG, "addChild returns %d", res);
+        return res;
+    }
+    /**
+      Remove a childPath from the children information cache.
+      
+      \return 0 if successful, otherwise return negative errno.
+      \param childPath the ZooKeeper path of the child.
+     */
+    int removeChild(const std::string & childPath) 
+    {
+        LOG_DEBUG(LOG, "removeChild(childPath %s) path %s", 
+                  childPath.c_str(), _path.c_str());
+
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+            if (_initializedChildren) {
+                NodeNames::iterator it = 
+                    std::find(_children.begin(), _children.end(), childPath);
+                if (it != _children.end()) {
+                    LOG_DEBUG(LOG, "child found");
+                    _children.erase(it);
+                    res = 0;
+                } 
+                else {
+                    LOG_DEBUG(LOG, "child not found");
+                    res = -ENOENT;
+                }
+            }
+        }
+        
+        LOG_DEBUG(LOG, "removeChild returns %d", res);
+        return res;
+    }
+    /**
+      Invalidate the cached children information and cached data.
+      \see _clearChildren
+      \see _clearData
+
+      \param clearChildren set to true to invalidate children information cache.
+      \param clearData set to true to invalidate data cache.
+     */
+    void clear(bool clearChildren = true, bool clearData = true)
+    {
+        LOG_DEBUG(LOG, "clear(clearChildren %d, clearData %d) path %s", 
+                  clearChildren, clearData, _path.c_str());
+
+        {
+            AutoLock lock(_mutex);
+            if (clearChildren) {
+                _clearChildren();
+            }
+            if (clearData) {
+                _clearData();
+            }
+        }
+    }
+    /** 
+      Whether reference count is zero.
+      \see _refCount
+
+      \return true if reference count is zero.
+     */
+    bool isZombie() const 
+    {
+        AutoLock lock(_mutex);
+
+        return (_refCount == 0);
+    }
+    /**
+      Increment the reference count of the ZkFuseFile instance.
+
+      This method may be called by a ZkFuseFileManager instance while
+      holding the ZkFuseFileManager's _mutex. To avoid deadlocks, 
+      this methods must never invoke a ZkFuseFileManager instance 
+      directly or indirectly while holding the ZkFuseFile instance's
+      _mutex.
+      \see _refCount
+
+      \return the post-increment reference count.
+      \param count value to increment the reference count by.
+     */
+    int incRefCount(int count = 1)
+    {
+        LOG_DEBUG(LOG, "incRefCount(count %d) path %s", count, _path.c_str());
+
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+            _refCount += count;
+            assert(_refCount >= 0);
+            res = _refCount;
+        }
+
+        LOG_DEBUG(LOG, "incRefCount returns %d", res); 
+        return res;
+    }
+    /**
+      Decrement the reference count of the ZkFuseFile instance.
+
+      This method may be called by a ZkFuseFileManager instance while
+      holding the ZkFuseFileManager's _mutex. To avoid deadlocks, 
+      this methods must never invoke a ZkFuseFileManager instance 
+      directly or indirectly while holding the ZkFuseFile instance's
+      _mutex.
+      \see _refCount
+
+      \return the post-decrement reference count.
+      \param count value to decrement the reference count by.
+     */
+    int decRefCount(int count = 1)
+    {
+        return incRefCount(-count);
+    }
+    /**
+      Increment the count of number times the ZkFuseFile instance has
+      been opened as a directory.
+      
+      This count is incremented by opendir and decremented by releasedir.
+      \see _openDirCount.
+
+      \return the post-increment count.
+      \param count the value to increment the count by.
+     */
+    int incOpenDirCount(int count = 1)
+    {
+        LOG_DEBUG(LOG, "incOpenDirCount(count %d) path %s", 
+                  count, _path.c_str());
+
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+            _openDirCount += count;
+            assert(_openDirCount >= 0);
+            res = _openDirCount;
+            assert(_openDirCount <= _refCount);
+        }
+
+        LOG_DEBUG(LOG, "incOpenDirCount returns %d", res); 
+        return res;
+
+    }
+    /**
+      Decrement the count of number times the ZkFuseFile instance has
+      been opened as a directory.
+      
+      This count is incremented by opendir and decremented by releasedir.
+      \see _openDirCount.
+
+      \return the post-decrement count.
+      \param count the value to decrement the count by.
+     */
+    int decOpenDirCount(int count = 1)
+    {
+        return incOpenDirCount(-count);
+    }
+    /**
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse 
+      directory by taking into account the specified ZkFuseNameType.
+
+      The ZkFuseNameType may override the default ZkFuse presentation of
+      a ZooKeeper node. 
+      \see _isDirNameType
+
+      \return true if ZkFuse should present the ZooKeeper node as a ZkFuse
+                   directory.
+      \param nameType specifies the ZkFuseNameType.
+     */
+    bool isDirNameType(ZkFuseNameType nameType) const
+    {
+        return _isDirNameType(nameType, true);
+    }
+    /**
+      Whether ZkFuse should present the ZooKeeper node as a ZkFuse 
+      regular file by taking into account the specified ZkFuseNameType.
+
+      The ZkFuseNameType may override the default ZkFuse presentation of
+      a ZooKeeper node. 
+      \see _isRegNameType
+
+      \return true if ZkFuse should present the ZooKeeper node as a ZkFuse
+                   regular file.
+      \param nameType specifies the ZkFuseNameType.
+     */
+    bool isRegNameType(ZkFuseNameType nameType) const
+    {
+        return _isRegNameType(nameType, true);
+    }
+    /**
+      Get the active data.
+      \see _activeData
+
+      \param data return data here.
+     */
+    void getData(Data & data) const
+    {
+        AutoLock lock(_mutex);
+
+        data = _activeData;
+    }
+    /**
+      Set the active data.
+      \see _activeData
+
+      Return -EFBIG is the data to be written is bigger than the maximum
+      permitted size (and no data is written).
+
+      \return 0 if successful, otherwise return negative errno.
+      \param data set to this data.
+      \param doFlush whether to flush the data to the ZooKeeper node.
+     */
+    int setData(const Data & data, bool doFlush)
+    {
+        LOG_DEBUG(LOG, "setData(doFlush %d) path %s", doFlush, _path.c_str());
+        int res = 0;
+
+        if (data.size() > maxDataFileSize) {
+            res = -EFBIG;
+        } 
+        else {
+            AutoLock lock(_mutex);
+            _activeData = data;
+            _dirtyData = true;
+            if (doFlush) {
+                res = _flush();
+            }
+        }
+
+        LOG_DEBUG(LOG, "setData() returns %d", res);
+        return res;
+    }
+    /**
+      Update the children information and the data caches as needed.
+
+      This method is invoked when a ZkFuse regular file or directory 
+      implemented by this ZkFuseFile instance is opened, e.g.
+      using open or opendir. It attempts to:
+      - make sure that the cache has valid children information
+      - register for watches for changes if no previous watches have
+        been registered.
+
+      The newFile flag indicates if the ZkFuseFile instance has just
+      been constructed and that ZooKeeper has not been contacted to
+      determine if the ZooKeeper path for this file really exist.
+      When a ZkFuseFile instance is created, the _deleted flag is set to
+      true because it is safer to assume that the ZooKeeper node does
+      not exist. The newFile flag causes the _deleted flag to be
+      ignored and ZooKeeper to be contacted to update the caches.
+
+      If the newFile flag is false, then the ZkFuseFile instance is
+      currently open and have been opened before. Hence, these previous
+      opens should have contacted ZooKeeper and would like learned from
+      ZooKeeper whether the ZooKeeper path exists. Therefore, 
+      the _deleted flag should be trustworthy, i.e. it has accurate 
+      information on whether the ZooKeeper path actually exists.
+
+      \return 0 if successful, otherwise return negative errno.
+      \param newFile set to true if the ZkFuseFile instance is newly created.
+     */
+    int update(bool newFile)
+    {
+        LOG_DEBUG(LOG, "update(newFile %d) path %s", newFile, _path.c_str());
+
+        int res = 0;
+        {
+            AutoLock lock(_mutex);
+
+            /* At this point, cannot be zombie.
+             */
+            assert(!_isZombie());
+            if (!newFile && _deleted) {
+                /* Deleted file, don't bother to update caches */
+                LOG_DEBUG(LOG, "deleted, not new file"); 
+                res = -ENOENT;
+            }
+            else {
+                try {
+                    LOG_DEBUG(LOG, "initialized children %d, data %d",
+                              _initializedChildren, _initializedData);
+                    LOG_DEBUG(LOG, "has children watch %d, data watch %d",
+                              _hasChildrenListener, _hasDataListener);
+                    /*
+                     * Children handling starts here.
+                     * If don't have children listener,
+                     *    then must establish listener.
+                     * If don't have cached children information, 
+                     *    then must get children information. 
+                     * It just happens, that the same ZooKeeper API 
+                     * is used for both.
+                     */
+                    if (_initializedChildren == false ||
+                        _hasChildrenListener == false
+#ifdef ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG
+                        /* HACK for root node because changes to children
+                         * on a root node does not cause children watches to
+                         * fire.
+                         */
+                        || _path.length() == 1
+#endif // ZOOKEEPER_ROOT_CHILDREN_WATCH_BUG
+                    ) {
+                        LOG_DEBUG(LOG, "update children");
+                        NodeNames children;
+                        _manager->getCommon().getZkAdapter()->
+                          getNodeChildren( children, _path, 
+                                          &_childrenListener, _getZkContext());
+                        _hasChildrenListener = true;
+                        LOG_DEBUG(LOG, "update children done"); 
+                        _children.swap(children);
+                        _initializedChildren = true;
+                        /* Since getNodeChildren is successful, the
+                         * path must exist */
+                        _deleted = false;
+                    }
+                    else {
+                        /* Children information is fresh since 
+                         * it is initialized and and have been 
+                         * updated by listener.
+                         */
+                    }
+                    /*
+                     * Data handling starts here.
+                     */
+                    assert(newFile == false || _isOnlyRegOpen());
+                    if (!_isOnlyRegOpen()) {
+                        /* If is already currently opened by someone,
+                         * then don't update data with latest from ZooKeeper,
+                         * use current active data (which may be initialized 
+                         * or not).
+                         * \see _activeData
+                         */
+                        LOG_DEBUG(LOG, "node currently in-use, no data update");
+                    } 
+                    else {
+                        /* If not opened/reopened by someone else, 
+                         *    then perform more comprehensive checks of
+                         *    to make data and listener is setup correctly.
+                         * If don't have data listener,
+                         *    then must establish listener.
+                         * If don't have cached data, 
+                         *    then must get data.
+                         * It just happens, that the same ZooKeeper API 
+                         * is used for both.  
+                         */
+                        LOG_DEBUG(LOG, "node first use or reuse");
+                        if (_initializedData == false ||
+                            _hasDataListener == false) {
+                            /* Don't have any data for now or need to register
+                             * for callback */
+                            LOG_DEBUG(LOG, "update data");
+                            _latestData = 
+                                _manager->getCommon().getZkAdapter()->
+                                getNodeData(_path, &_dataListener, 
+                                            _getZkContext(), 
+                                            &_latestStat);
+                            _hasDataListener = true;
+                            LOG_DEBUG(LOG, 
+                                      "update data done, latest version %d",
+                                      _latestStat.version);
+                            /* Since getNodeData is successful, the
+                             * path must exist. */
+                            _deleted = false;
+                        } 
+                        else {
+                            /* Data is fresh since it is initialized and
+                             * and have been updated by listener.
+                             */
+                        }
+                        /* Update active data to the same as the most 
+                         * recently acquire data.
+                         */
+                        _activeData = _latestData;
+                        _activeStat = _latestStat;
+                        _initializedData = true;
+                        _dirtyData = false;
+                        LOG_DEBUG(LOG, "update set active version %d",
+                                  _activeStat.version);
+                    } 
+                    res = 0;
+                } catch (const ZooKeeperException & e) {
+                    /* May have ZNONODE exception if path does exist. */
+                    if (e.getZKErrorCode() == ZNONODE) {
+                        LOG_DEBUG(LOG, "update %s exception %s", 
+                                  _path.c_str(), e.what());
+                        /* Path does not exist, set _deleted, 
+                         * clear children information cache 
+                         */
+                        _deleted = true;
+                        _clearChildren();
+                        res = -ENOENT;
+                    } else {
+                        LOG_ERROR(LOG, "update %s exception %s", 
+                                  _path.c_str(), e.what());
+                        res = -EIO;
+                    }
+                }
+            }
+        }
+    
+        LOG_DEBUG(LOG, "update returns %d", res);
+        return res;
+    }
+    /**
+      Process a data event.
+
+      This method may:
+      - Invalidate the data cache.
+      - Invoke ZooKeeper to update the data cache and register a new
+        data watch so that the cache can be kept in-sync with the
+        ZooKeeper node's data.
+
+      This method does not change the active data. Active data will be
+      changed to a later version by update() at the appropriate time.
+      \see update.
+     */
+    void dataEventReceived(const ZKWatcherEvent & event) 
+    {
+        bool reclaim = false;
+        int eventType = event.getType();
+        int eventState = event.getState();
+
+        /*
+          IMPORTANT: 
+          
+          Do not mark ZkFuseFile instance as deleted when a DELETED_EVENT 
+          is received without checking with ZooKeeper. An example of 
+          problematic sequence would be:
+
+          1. Create node.
+          2. Set data and watch.
+          3. Delete node.
+          4. Create node.
+          5. Deleted event received.
+
+          It is a bug to mark the ZkFuseFile instance as deleted after 
+          step 5 because the node exists.
+          
+          Therefore, this method should always contact ZooKeeper to keep the
+          data cache (and deleted status) up-to-date if necessary.
+         */
+        LOG_DEBUG(LOG, "dataEventReceived() path %s, type %d, state %d",
+                  _path.c_str(), eventType, eventState);
+        {
+            AutoLock lock(_mutex);
+
+            _hasDataListener = false;
+            /* If zombie, then invalidate cached data.
+             * This clears _initializedData and eliminate 
+             * the need to get the latest data from ZooKeeper and
+             * re-register data watch. 
+             */
+            if (_isZombie() && _initializedData) {
+                LOG_DEBUG(LOG, "invalidate data");
+                _clearData();
+            }
+            else if ((_refCount - _openDirCount) > 0) {
+                /* Don't invalidate cached data because clients of currently
+                 * open files don't expect the data to change from under them.
+                 * If data acted upon by these clients have become stale,
+                 * then the clients will get an error when ZkFuse attempts to
+                 * flush dirty data. The clients will not get error 
+                 * notification if they don't modify the stale data.
+                 *
+                 * If data cache is cleared here, then the following code 
+                 * to update data cache and re-register data watch will not 
+                 * be executed and may result in the cached data being
+                 * out-of-sync with ZooKeeper.
+                 */
+                LOG_WARN(LOG, 
+                         "%s data has changed while in-use, "
+                         "type %d, state %d, refCount %d",
+                         _path.c_str(), eventType, eventState, _refCount);
+            }
+            /* If cache was valid and still connected
+             * then get the latest data from ZooKeeper 
+             * and re-register data watch. This is required to keep 
+             * the data cache in-sync with ZooKeeper.
+             */ 
+            if (_initializedData && 
+                eventState == CONNECTED_STATE 
+               ) {
+                try {
+                    LOG_DEBUG(LOG, "register data watcher");
+                    _latestData = 
+                        _manager->getCommon().getZkAdapter()->
+                        getNodeData(_path, &_dataListener, _getZkContext(), 
+                                    &_latestStat);
+                    _hasDataListener = true;
+                    LOG_DEBUG(LOG, 
+                              "get data done, version %u, cversion %u done",
+                              _latestStat.version, _latestStat.cversion);
+                    _deleted = false;
+                } catch (const ZooKeeperException & e) {
+                    if (e.getZKErrorCode() == ZNONODE) {
+                        _deleted = true;
+                        _clearChildren();
+                    }
+                    LOG_ERROR(LOG, "dataEventReceived %s exception %s", 
+                              _path.c_str(), e.what());
+                }
+            }
+        }
+        LOG_DEBUG(LOG, "dataEventReceived return %d", reclaim);
+    }
+    /**
+      Process a children event.
+
+      This method may:
+      - Invalidate the children information cache.
+      - Invoke ZooKeeper to update the children cache and register a new
+        data watch so that the cache can be kept in-sync with the
+        ZooKeeper node's children information.
+     */
+    void childrenEventReceived(const ZKWatcherEvent & event) 
+    {
+        bool reclaim = false;
+        int eventType = event.getType();
+        int eventState = event.getState();
+
+        LOG_DEBUG(LOG, "childrenEventReceived() path %s, type %d, state %d",
+                  _path.c_str(), eventType, eventState);
+        {
+            AutoLock lock(_mutex);
+
+            _hasChildrenListener = false;
+            /* If zombie or disconnected, then invalidate cached children 
+             * information. This clears _initializedChildren and eliminate 
+             * the need to get the latest children information and
+             * re-register children watch.
+             */
+            if (_initializedChildren && 
+                (_isZombie() || eventState != CONNECTED_STATE)) {
+                LOG_DEBUG(LOG, "invalidate children");
+                _clearChildren();
+            }
+            else if (_initializedChildren) {
+                /* Keep cached children information so that we have some
+                 * children information if get new children information
+                 * fails. If there is failure, then on next open, 
+                 * update() will attempt again to get children information
+                 * again because _hasChildrenListener will be false.
+                 *
+                 * If children information cache is cleared here, then
+                 * the following code to update children information cache
+                 * and re-register children watch will not be executed
+                 * and may result in the cached children information being
+                 * out-of-sync with ZooKeeper.
+                 *
+                 * The children cache will be cleared if unable to 
+                 * get children and re-establish watch.
+                 */
+                LOG_WARN(LOG, 
+                         "%s children has changed while in-use, "
+                         "type %d, state %d, refCount %d",
+                         _path.c_str(), eventType, eventState, _refCount);
+            }
+            /* If children cache was valid and still connected, 
+             * then get the latest children information from ZooKeeper 
+             * and re-register children watch. This is required to 
+             * keep the children information cache in-sync with ZooKeeper.
+             */ 
+            if (_initializedChildren && 
+                eventState == CONNECTED_STATE 
+               ) {
+                /* Should try to keep the cache in-sync, register call 
+                 * callback again and get current children.
+                 */ 
+                try {
+                    LOG_DEBUG(LOG, "update children");
+                    NodeNames children;
+                    _manager->getCommon().getZkAdapter()->
+                      getNodeChildren(children, _path, 
+                                      &_childrenListener, _getZkContext());
+                    _hasChildrenListener = true;
+                    LOG_DEBUG(LOG, "update children done");
+                    _children.swap(children);
+                    _deleted = false;
+                } catch (const ZooKeeperException & e) {
+                    if (e.getZKErrorCode() == ZNONODE) {
+                        _deleted = true;
+                        _clearChildren();
+                    }
+                    LOG_ERROR(LOG, "childrenEventReceived %s exception %s", 
+                              _path.c_str(), e.what());
+                    _children.clear();
+                }
+            }
+        }
+        LOG_DEBUG(LOG, "childrenEventReceived returns %d", reclaim);
+    }
+    /**
+      Truncate or expand the size of the cached active data.
+
+      This method only changes the size of the cached active data. 
+      This change is committed to ZooKeeper when the cached data 
+      is written to the ZooKeeper node by flush().
+
+      Return -EFBIG is the requested size exceeds the maximum.
+
+      \return 0 if successful, otherwise negative errno.
+      \param size the requested size.
+     */
+    int truncate(off_t size) 
+    {
+        int res = 0;
+
+        {
+            AutoLock lock(_mutex); 
+            res = _truncate(size);
+        }
+
+        return res;
+    }
+    /**
+      Copy range of active data into specified output buffer.
+
+      \return if successful, return number of bytes copied, otherwise
+              return negative errno.
+      \param buf  address of the output buffer.
+      \param size size of the output buffer and desired number of bytes to copy.
+      \param offset offset into active data to start copying from.
+     */
+    int read(char *buf, size_t size, off_t offset) const
+    {
+        LOG_DEBUG(LOG, "read(size %zu, off_t %zu) path %s", 
+                  size, offset, _path.c_str());
+
+        int res = 0;
+
+        {
+            AutoLock lock(_mutex);
+            if (!_initializedData) {
+                LOG_DEBUG(LOG, "not initialized");
+                res = -EIO;
+            }
+            else {
+                off_t fileSize = _activeData.size();
+                if (offset > fileSize) {
+                    LOG_DEBUG(LOG, "offset > fileSize %zu", fileSize);
+                    res = 0;
+                } 
+                else {
+                    if (offset + size > fileSize) {
+                        size = fileSize - offset;
+                        LOG_DEBUG(LOG, 
+                                  "reducing read size to %zu for fileSize %zu",
+                                  size, fileSize);
+                    }
+                    copy(_activeData.begin() + offset,
+                         _activeData.begin() + offset + size,
+                         buf);
+                    res = size;
+                }
+            }
+        }
+
+        LOG_DEBUG(LOG, "read returns %d", res);
+        return res; 
+    }
+    /**
+      Copy buffer content to active data.
+
+      \return if successful, return number of bytes copied, otherwise
+              return negative errno.
+      \param buf  address of the buffer.
+      \param size size of the input buffer and desired number of bytes to copy.
+      \param offset offset into active data to start copying to.
+     */
+    int write(const char *buf, size_t size, off_t offset)
+    {
+        LOG_DEBUG(LOG, "write(size %zu, off_t %zu) path %s", 
+                  size, offset, _path.c_str());
+
+        int res = 0;
+
+        {
+            AutoLock lock(_mutex);
+            if (!_initializedData) {
+                LOG_DEBUG(LOG, "not initialized");
+                res = -EIO;
+            }

[... 1878 lines stripped ...]


Mime
View raw message