sentry-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ak...@apache.org
Subject sentry git commit: SENTRY-1811: Optimize data structures used in HDFS sync (Misha Dmitriev, reviewed by Alex Kolbasov)
Date Fri, 23 Jun 2017 23:05:58 GMT
Repository: sentry
Updated Branches:
  refs/heads/master 28f9610a8 -> 8d53da5fb


SENTRY-1811: Optimize data structures used in HDFS sync (Misha Dmitriev, reviewed by Alex
Kolbasov)


Project: http://git-wip-us.apache.org/repos/asf/sentry/repo
Commit: http://git-wip-us.apache.org/repos/asf/sentry/commit/8d53da5f
Tree: http://git-wip-us.apache.org/repos/asf/sentry/tree/8d53da5f
Diff: http://git-wip-us.apache.org/repos/asf/sentry/diff/8d53da5f

Branch: refs/heads/master
Commit: 8d53da5fbd70236728c69e3f2faada1cdc73cbb5
Parents: 28f9610
Author: Alexander Kolbasov <akolb@cloudera.com>
Authored: Fri Jun 23 16:05:40 2017 -0700
Committer: Alexander Kolbasov <akolb@cloudera.com>
Committed: Fri Jun 23 16:05:40 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/sentry/hdfs/HMSPaths.java   | 109 +++++++++++++------
 .../org/apache/sentry/hdfs/HMSPathsDumper.java  |  15 ++-
 .../org/apache/sentry/hdfs/TestHMSPaths.java    |  16 +--
 3 files changed, 91 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sentry/blob/8d53da5f/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
index 18c921d..409d9f3 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
@@ -133,12 +133,13 @@ public class HMSPaths implements AuthzPaths {
     private String pathElement;
 
     // A set of authorizable objects associated with this entry. Authorizable
-    // object should be case insensitive.
-    private Set<String> authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
+    // object should be case insensitive. The set is allocated lazily to avoid
+    // wasting memory due to empty sets.
+    private Set<String> authzObjs;
 
-    // Path of child element to the path entry mapping.
-    // e.g. 'b' -> '/a/b'
-    private final Map<String, Entry> children = new HashMap<>();
+    // Path of child element to the path entry mapping, e.g. 'b' -> '/a/b'
+    // This is allocated lazily to avoid wasting memory due to empty maps.
+    private Map<String, Entry> children;
 
     /**
      * Construct an Entry with one authzObj.
@@ -151,7 +152,7 @@ public class HMSPaths implements AuthzPaths {
     Entry(Entry parent, String pathElement, EntryType type, String authzObj) {
       this.parent = parent;
       this.type = type;
-      this.pathElement = pathElement;
+      this.pathElement = pathElement.intern();
       addAuthzObj(authzObj);
     }
 
@@ -165,33 +166,65 @@ public class HMSPaths implements AuthzPaths {
     Entry(Entry parent, String pathElement, EntryType type, Set<String> authzObjs)
{
       this.parent = parent;
       this.type = type;
-      this.pathElement = pathElement;
+      this.pathElement = pathElement.intern();
       addAuthzObjs(authzObjs);
     }
 
-    // Get all the mapping of the children element to
-    // the path entries.
-    Map<String, Entry> getChildren() {
-      return children;
+    Entry getChild(String pathElement) {
+      if (children == null) {
+        return null;
+      }
+      return children.get(pathElement);
+    }
+
+    void putChild(String pathElement, Entry entry) {
+      if (children == null) {
+        // We allocate this map lazily and with small initial capacity to avoid
+        // memory waste due to empty and underpopulated maps.
+        children = new HashMap<>(2);
+      }
+      children.put(pathElement.intern(), entry);
+    }
+
+    Entry removeChild(String pathElement) {
+      return children.remove(pathElement);
+    }
+
+    boolean hasChildren() { return children != null && !children.isEmpty(); }
+
+    int numChildren() { return children == null ? 0 : children.size(); }
+
+    Collection<Entry> childrenValues() {
+      return children != null ? children.values() : Collections.<Entry>emptyList();
     }
 
     void clearAuthzObjs() {
-      authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
+      authzObjs = null;
     }
 
     void removeAuthzObj(String authzObj) {
-      authzObjs.remove(authzObj);
+      if (authzObjs != null) {
+        authzObjs.remove(authzObj);
+      }
     }
 
     void addAuthzObj(String authzObj) {
       if (authzObj != null) {
-        authzObjs.add(authzObj);
+        if (authzObjs == null) {
+          authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
+        }
+        authzObjs.add(authzObj.intern());
       }
     }
 
     void addAuthzObjs(Set<String> authzObjs) {
       if (authzObjs != null) {
-        this.authzObjs.addAll(authzObjs);
+        if (this.authzObjs == null) {
+          this.authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
+        }
+        for (String authzObj : authzObjs) {
+          this.authzObjs.add(authzObj.intern());
+        }
       }
     }
 
@@ -205,7 +238,7 @@ public class HMSPaths implements AuthzPaths {
 
     public String toString() {
       return String.format("Entry[fullPath: %s, type: %s, authObject: %s]",
-          getFullPath(), type, Joiner.on(",").join(authzObjs));
+          getFullPath(), type, authzObjs != null ? Joiner.on(",").join(authzObjs) : "");
     }
 
     /**
@@ -223,11 +256,11 @@ public class HMSPaths implements AuthzPaths {
       // The loop is resilient to 0 or 1 element list.
       for (int i = 0; i < pathElements.size() - 1; i++) {
         String elem = pathElements.get(i);
-        Entry child = parent.getChildren().get(elem);
+        Entry child = parent.getChild(elem);
 
         if (child == null) {
           child = new Entry(parent, elem, EntryType.DIR, (String) null);
-          parent.getChildren().put(elem, child);
+          parent.putChild(elem, child);
         }
 
         parent = child;
@@ -251,7 +284,7 @@ public class HMSPaths implements AuthzPaths {
       Entry entryParent = createParent(pathElements);
 
       String lastPathElement = pathElements.get(pathElements.size() - 1);
-      Entry child = entryParent.getChildren().get(lastPathElement);
+      Entry child = entryParent.getChild(lastPathElement);
 
       // Create the child entry if not found. If found and the entry is
       // already a prefix or authzObj type, then only add the authzObj.
@@ -259,7 +292,7 @@ public class HMSPaths implements AuthzPaths {
       // and add the authzObj.
       if (child == null) {
         child = new Entry(entryParent, lastPathElement, type, authzObj);
-        entryParent.getChildren().put(lastPathElement, child);
+        entryParent.putChild(lastPathElement, child);
       } else if (type == EntryType.AUTHZ_OBJECT &&
           (child.getType() == EntryType.PREFIX || child.getType() == EntryType.AUTHZ_OBJECT))
{
         child.addAuthzObj(authzObj);
@@ -313,7 +346,7 @@ public class HMSPaths implements AuthzPaths {
      */
     private void deleteFromParent() {
       if (getParent() != null) {
-        getParent().getChildren().remove(getPathElement());
+        getParent().removeChild(getPathElement());
         getParent().deleteIfDangling();
         parent = null;
       }
@@ -321,13 +354,15 @@ public class HMSPaths implements AuthzPaths {
 
     public void deleteAuthzObject(String authzObj) {
       if (getParent() != null) {
-        if (getChildren().isEmpty()) {
+        if (!hasChildren()) {
 
           // Remove the authzObj on the path entry. If the path
           // entry no longer maps to any authzObj, removes the
           // entry recursively.
-          authzObjs.remove(authzObj);
-          if (authzObjs.isEmpty()) {
+          if (authzObjs != null) {
+            authzObjs.remove(authzObj);
+          }
+          if (authzObjs == null || authzObjs.isEmpty()) {
             deleteFromParent();
           }
         } else {
@@ -337,7 +372,9 @@ public class HMSPaths implements AuthzPaths {
           // the path entry.
           if (getType() == EntryType.AUTHZ_OBJECT) {
             setType(EntryType.DIR);
-            authzObjs.remove(authzObj);
+            if (authzObjs != null) {
+              authzObjs.remove(authzObj);
+            }
           }
         }
       }
@@ -353,7 +390,7 @@ public class HMSPaths implements AuthzPaths {
     private void moveTo(Entry newParent, String pathElem) {
       Preconditions.checkNotNull(newParent);
       Preconditions.checkArgument(!pathElem.isEmpty());
-      if (newParent.children.containsKey(pathElem)) {
+      if (newParent.getChild(pathElem) != null) {
         LOG.warn(String.format(
             "Attempt to move %s to %s: entry with the same name %s already exists",
             this.getFullPath(), newParent.getFullPath(), pathElem));
@@ -361,13 +398,13 @@ public class HMSPaths implements AuthzPaths {
       }
       deleteFromParent();
       parent = newParent;
-      parent.children.put(pathElem, this);
-      pathElement = pathElem;
+      parent.putChild(pathElem, this);
+      pathElement = pathElem.intern();
     }
 
     public void delete() {
       if (getParent() != null) {
-        if (getChildren().isEmpty()) {
+        if (!hasChildren()) {
           deleteFromParent();
         } else {
           // if the entry was for an authz object and has children, we
@@ -381,7 +418,7 @@ public class HMSPaths implements AuthzPaths {
     }
 
     private void deleteIfDangling() {
-      if (getChildren().isEmpty() && getType().isRemoveIfDangling()) {
+      if (!hasChildren() && getType().isRemoveIfDangling()) {
         delete();
       }
     }
@@ -398,12 +435,14 @@ public class HMSPaths implements AuthzPaths {
       return pathElement;
     }
 
+    /**
+     * @return the set of auth objects. The returned set should be used only
+     * for querying, not for any modifications.
+     */
     public Set<String> getAuthzObjs() {
-      return authzObjs;
+      return authzObjs != null ? authzObjs : Collections.<String>emptySet();
     }
 
-
-
     public Entry findPrefixEntry(List<String> pathElements) {
       Preconditions.checkArgument(pathElements != null,
           "pathElements cannot be NULL");
@@ -416,7 +455,7 @@ public class HMSPaths implements AuthzPaths {
       if (index == pathElements.size()) {
         prefixEntry = null;
       } else {
-        Entry child = getChildren().get(pathElements.get(index));
+        Entry child = getChild(pathElements.get(index));
         if (child != null) {
           if (child.getType() == EntryType.PREFIX) {
             prefixEntry = child;
@@ -443,7 +482,7 @@ public class HMSPaths implements AuthzPaths {
           found = this;
         }
       } else {
-        Entry child = getChildren().get(pathElements[index]);
+        Entry child = getChild(pathElements[index]);
         if (child != null) {
           if (index == pathElements.length - 1) {
             found = (child.getAuthzObjs().size() != 0) ? child : lastAuthObj;

http://git-wip-us.apache.org/repos/asf/sentry/blob/8d53da5f/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
index 7dfb5fa..480a29d 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
@@ -17,6 +17,7 @@
  */
 package org.apache.sentry.hdfs;
 
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
@@ -58,9 +59,9 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths>
{
 
   private void cloneToTPathEntry(Entry parent, TPathEntry tParent,
       AtomicInteger counter, Map<Integer, TPathEntry> idMap) {
-    for (Entry child : parent.getChildren().values()) {
+    for (Entry child : parent.childrenValues()) {
       Tuple childTuple = createTPathEntry(child, counter, idMap);
-      tParent.getChildren().add(childTuple.id);
+      tParent.addToChildren(childTuple.id);
       cloneToTPathEntry(child, childTuple.entry, counter, idMap);
     }
   }
@@ -68,9 +69,11 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths>
{
   private Tuple createTPathEntry(Entry entry, AtomicInteger idCounter,
       Map<Integer, TPathEntry> idMap) {
     int myId = idCounter.incrementAndGet();
+    Set<Integer> children = entry.hasChildren() ?
+        new HashSet<Integer>(entry.numChildren()) : Collections.<Integer>emptySet();
     TPathEntry tEntry = new TPathEntry(entry.getType().getByte(),
-        entry.getPathElement(), new HashSet<Integer>());
-    if (entry.getAuthzObjs().size() != 0) {
+        entry.getPathElement(), children);
+    if (!entry.getAuthzObjs().isEmpty()) {
       tEntry.setAuthzObjs(entry.getAuthzObjs());
     }
     idMap.put(myId, tEntry);
@@ -99,7 +102,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths>
{
       Entry child = null;
       boolean isChildPrefix = hasCrossedPrefix;
       if (!hasCrossedPrefix) {
-        child = parent.getChildren().get(tChild.getPathElement());
+        child = parent.getChild(tChild.getPathElement());
         // If we havn't reached a prefix entry yet, then child should
         // already exists.. else it is not part of the prefix
         if (child == null) {
@@ -126,7 +129,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths>
{
           paths.add(child);
         }
       }
-      parent.getChildren().put(child.getPathElement(), child);
+      parent.putChild(child.getPathElement(), child);
       cloneToEntry(tChild, child, idMap, authzObjToPath, isChildPrefix);
     }
   }

http://git-wip-us.apache.org/repos/asf/sentry/blob/8d53da5f/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
b/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
index f31ec21..a0d7bdc 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
@@ -65,7 +65,7 @@ public class TestHMSPaths {
     Assert.assertEquals(HMSPaths.EntryType.DIR, root.getType());
     Assert.assertTrue(root.getAuthzObjs().size() == 0);
     Assert.assertEquals(Path.SEPARATOR, root.getFullPath());
-    Assert.assertTrue(root.getChildren().isEmpty());
+    Assert.assertFalse(root.hasChildren());
     root.delete();
     try {
       root.find(null, true);
@@ -122,14 +122,14 @@ public class TestHMSPaths {
     HMSPaths.Entry entry = root.createPrefix(Lists.newArrayList("a"));
     entry.toString();
     
-    Assert.assertEquals(1, root.getChildren().size());
+    Assert.assertEquals(1, root.childrenValues().size());
 
     Assert.assertEquals(root, entry.getParent());
     Assert.assertEquals(HMSPaths.EntryType.PREFIX, entry.getType());
     Assert.assertEquals("a", entry.getPathElement());
     Assert.assertEquals(0, entry.getAuthzObjs().size());
     Assert.assertEquals(Path.SEPARATOR + "a", entry.getFullPath());
-    Assert.assertTrue(entry.getChildren().isEmpty());
+    Assert.assertFalse(entry.hasChildren());
 
     Assert.assertEquals(entry, root.findPrefixEntry(Lists.newArrayList("a")));
     Assert.assertEquals(entry, root.findPrefixEntry(Lists.newArrayList("a", "b")));
@@ -154,7 +154,7 @@ public class TestHMSPaths {
     }
 
     entry.delete();
-    Assert.assertTrue(root.getChildren().isEmpty());
+    Assert.assertFalse(root.hasChildren());
   }
 
   @Test
@@ -163,7 +163,7 @@ public class TestHMSPaths {
     HMSPaths.Entry entry = root.createPrefix(Lists.newArrayList("a", "b"));
     entry.toString();
 
-    Assert.assertEquals(1, root.getChildren().size());
+    Assert.assertEquals(1, root.childrenValues().size());
 
     Assert.assertEquals(root, entry.getParent().getParent());
     Assert.assertEquals(HMSPaths.EntryType.PREFIX, entry.getType());
@@ -176,8 +176,8 @@ public class TestHMSPaths {
     Assert.assertEquals(Path.SEPARATOR + "a" + Path.SEPARATOR + "b",
         entry.getFullPath());
     Assert.assertEquals(Path.SEPARATOR + "a", entry.getParent().getFullPath());
-    Assert.assertTrue(entry.getChildren().isEmpty());
-    Assert.assertEquals(1, entry.getParent().getChildren().size());
+    Assert.assertFalse(entry.hasChildren());
+    Assert.assertEquals(1, entry.getParent().childrenValues().size());
 
     Assert.assertEquals(entry, root.findPrefixEntry(Lists.newArrayList("a", "b")));
     Assert.assertNull(root.findPrefixEntry(Lists.newArrayList("a")));
@@ -199,7 +199,7 @@ public class TestHMSPaths {
     }
 
     entry.delete();
-    Assert.assertTrue(root.getChildren().isEmpty());
+    Assert.assertFalse(root.hasChildren());
   }
 
   @Test


Mime
View raw message