sentry-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ak...@apache.org
Subject sentry git commit: SENTRY-1892: Reduce memory consumption of HMSPath$Entry and TPathEntry (Misha Dmitriev, reviewed by Alex Kolbasov and Vadim Spector)
Date Thu, 24 Aug 2017 00:11:15 GMT
Repository: sentry
Updated Branches:
  refs/heads/master a41a1447b -> 201808de0


SENTRY-1892: Reduce memory consumption of HMSPath$Entry and TPathEntry (Misha Dmitriev, reviewed
by Alex Kolbasov and Vadim Spector)


Project: http://git-wip-us.apache.org/repos/asf/sentry/repo
Commit: http://git-wip-us.apache.org/repos/asf/sentry/commit/201808de
Tree: http://git-wip-us.apache.org/repos/asf/sentry/tree/201808de
Diff: http://git-wip-us.apache.org/repos/asf/sentry/diff/201808de

Branch: refs/heads/master
Commit: 201808de059b9cda38a34c3aefb4253b402b891d
Parents: a41a144
Author: Alexander Kolbasov <akolb1@gmail.com>
Authored: Wed Aug 23 17:02:58 2017 -0700
Committer: Alexander Kolbasov <akolb1@gmail.com>
Committed: Wed Aug 23 17:02:58 2017 -0700

----------------------------------------------------------------------
 .../sentry/hdfs/service/thrift/TPathEntry.java  |  76 ++++++-------
 .../java/org/apache/sentry/hdfs/HMSPaths.java   | 106 ++++++++++++++-----
 .../org/apache/sentry/hdfs/HMSPathsDumper.java  |  28 +++--
 .../main/resources/sentry_hdfs_service.thrift   |   4 +-
 .../org/apache/sentry/hdfs/TestHMSPaths.java    |  13 +++
 5 files changed, 157 insertions(+), 70 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java
b/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java
index 2a89368..210d219 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/gen/thrift/gen-javabean/org/apache/sentry/hdfs/service/thrift/TPathEntry.java
@@ -34,14 +34,14 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 @SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)", date = "2017-04-26")
+@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)", date = "2017-08-21")
 public class TPathEntry implements org.apache.thrift.TBase<TPathEntry, TPathEntry._Fields>,
java.io.Serializable, Cloneable, Comparable<TPathEntry> {
   private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TPathEntry");
 
   private static final org.apache.thrift.protocol.TField TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("type",
org.apache.thrift.protocol.TType.BYTE, (short)1);
   private static final org.apache.thrift.protocol.TField PATH_ELEMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("pathElement",
org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField CHILDREN_FIELD_DESC = new org.apache.thrift.protocol.TField("children",
org.apache.thrift.protocol.TType.SET, (short)4);
-  private static final org.apache.thrift.protocol.TField AUTHZ_OBJS_FIELD_DESC = new org.apache.thrift.protocol.TField("authzObjs",
org.apache.thrift.protocol.TType.SET, (short)5);
+  private static final org.apache.thrift.protocol.TField CHILDREN_FIELD_DESC = new org.apache.thrift.protocol.TField("children",
org.apache.thrift.protocol.TType.LIST, (short)4);
+  private static final org.apache.thrift.protocol.TField AUTHZ_OBJS_FIELD_DESC = new org.apache.thrift.protocol.TField("authzObjs",
org.apache.thrift.protocol.TType.LIST, (short)5);
 
   private static final Map<Class<? extends IScheme>, SchemeFactory> schemes =
new HashMap<Class<? extends IScheme>, SchemeFactory>();
   static {
@@ -51,8 +51,8 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
 
   private byte type; // required
   private String pathElement; // required
-  private Set<Integer> children; // required
-  private Set<String> authzObjs; // optional
+  private List<Integer> children; // required
+  private List<String> authzObjs; // optional
 
   /** The set of fields this struct contains, along with convenience methods for finding
and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
@@ -133,10 +133,10 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
     tmpMap.put(_Fields.PATH_ELEMENT, new org.apache.thrift.meta_data.FieldMetaData("pathElement",
org.apache.thrift.TFieldRequirementType.REQUIRED, 
         new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
     tmpMap.put(_Fields.CHILDREN, new org.apache.thrift.meta_data.FieldMetaData("children",
org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.SetMetaData(org.apache.thrift.protocol.TType.SET,

+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST,

             new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))));
     tmpMap.put(_Fields.AUTHZ_OBJS, new org.apache.thrift.meta_data.FieldMetaData("authzObjs",
org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.SetMetaData(org.apache.thrift.protocol.TType.SET,

+        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST,

             new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
     metaDataMap = Collections.unmodifiableMap(tmpMap);
     org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TPathEntry.class, metaDataMap);
@@ -148,7 +148,7 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
   public TPathEntry(
     byte type,
     String pathElement,
-    Set<Integer> children)
+    List<Integer> children)
   {
     this();
     this.type = type;
@@ -167,11 +167,11 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
       this.pathElement = other.pathElement;
     }
     if (other.isSetChildren()) {
-      Set<Integer> __this__children = new HashSet<Integer>(other.children);
+      List<Integer> __this__children = new ArrayList<Integer>(other.children);
       this.children = __this__children;
     }
     if (other.isSetAuthzObjs()) {
-      Set<String> __this__authzObjs = new HashSet<String>(other.authzObjs);
+      List<String> __this__authzObjs = new ArrayList<String>(other.authzObjs);
       this.authzObjs = __this__authzObjs;
     }
   }
@@ -244,16 +244,16 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
 
   public void addToChildren(int elem) {
     if (this.children == null) {
-      this.children = new HashSet<Integer>();
+      this.children = new ArrayList<Integer>();
     }
     this.children.add(elem);
   }
 
-  public Set<Integer> getChildren() {
+  public List<Integer> getChildren() {
     return this.children;
   }
 
-  public void setChildren(Set<Integer> children) {
+  public void setChildren(List<Integer> children) {
     this.children = children;
   }
 
@@ -282,16 +282,16 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
 
   public void addToAuthzObjs(String elem) {
     if (this.authzObjs == null) {
-      this.authzObjs = new HashSet<String>();
+      this.authzObjs = new ArrayList<String>();
     }
     this.authzObjs.add(elem);
   }
 
-  public Set<String> getAuthzObjs() {
+  public List<String> getAuthzObjs() {
     return this.authzObjs;
   }
 
-  public void setAuthzObjs(Set<String> authzObjs) {
+  public void setAuthzObjs(List<String> authzObjs) {
     this.authzObjs = authzObjs;
   }
 
@@ -332,7 +332,7 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
       if (value == null) {
         unsetChildren();
       } else {
-        setChildren((Set<Integer>)value);
+        setChildren((List<Integer>)value);
       }
       break;
 
@@ -340,7 +340,7 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
       if (value == null) {
         unsetAuthzObjs();
       } else {
-        setAuthzObjs((Set<String>)value);
+        setAuthzObjs((List<String>)value);
       }
       break;
 
@@ -634,17 +634,17 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
             }
             break;
           case 4: // CHILDREN
-            if (schemeField.type == org.apache.thrift.protocol.TType.SET) {
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
               {
-                org.apache.thrift.protocol.TSet _set32 = iprot.readSetBegin();
-                struct.children = new HashSet<Integer>(2*_set32.size);
+                org.apache.thrift.protocol.TList _list32 = iprot.readListBegin();
+                struct.children = new ArrayList<Integer>(_list32.size);
                 int _elem33;
-                for (int _i34 = 0; _i34 < _set32.size; ++_i34)
+                for (int _i34 = 0; _i34 < _list32.size; ++_i34)
                 {
                   _elem33 = iprot.readI32();
                   struct.children.add(_elem33);
                 }
-                iprot.readSetEnd();
+                iprot.readListEnd();
               }
               struct.setChildrenIsSet(true);
             } else { 
@@ -652,17 +652,17 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
             }
             break;
           case 5: // AUTHZ_OBJS
-            if (schemeField.type == org.apache.thrift.protocol.TType.SET) {
+            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
               {
-                org.apache.thrift.protocol.TSet _set35 = iprot.readSetBegin();
-                struct.authzObjs = new HashSet<String>(2*_set35.size);
+                org.apache.thrift.protocol.TList _list35 = iprot.readListBegin();
+                struct.authzObjs = new ArrayList<String>(_list35.size);
                 String _elem36;
-                for (int _i37 = 0; _i37 < _set35.size; ++_i37)
+                for (int _i37 = 0; _i37 < _list35.size; ++_i37)
                 {
                   _elem36 = iprot.readString();
                   struct.authzObjs.add(_elem36);
                 }
-                iprot.readSetEnd();
+                iprot.readListEnd();
               }
               struct.setAuthzObjsIsSet(true);
             } else { 
@@ -693,12 +693,12 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
       if (struct.children != null) {
         oprot.writeFieldBegin(CHILDREN_FIELD_DESC);
         {
-          oprot.writeSetBegin(new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.I32,
struct.children.size()));
+          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32,
struct.children.size()));
           for (int _iter38 : struct.children)
           {
             oprot.writeI32(_iter38);
           }
-          oprot.writeSetEnd();
+          oprot.writeListEnd();
         }
         oprot.writeFieldEnd();
       }
@@ -706,12 +706,12 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
         if (struct.isSetAuthzObjs()) {
           oprot.writeFieldBegin(AUTHZ_OBJS_FIELD_DESC);
           {
-            oprot.writeSetBegin(new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.STRING,
struct.authzObjs.size()));
+            oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING,
struct.authzObjs.size()));
             for (String _iter39 : struct.authzObjs)
             {
               oprot.writeString(_iter39);
             }
-            oprot.writeSetEnd();
+            oprot.writeListEnd();
           }
           oprot.writeFieldEnd();
         }
@@ -766,10 +766,10 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
       struct.pathElement = iprot.readString();
       struct.setPathElementIsSet(true);
       {
-        org.apache.thrift.protocol.TSet _set42 = new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.I32,
iprot.readI32());
-        struct.children = new HashSet<Integer>(2*_set42.size);
+        org.apache.thrift.protocol.TList _list42 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32,
iprot.readI32());
+        struct.children = new ArrayList<Integer>(_list42.size);
         int _elem43;
-        for (int _i44 = 0; _i44 < _set42.size; ++_i44)
+        for (int _i44 = 0; _i44 < _list42.size; ++_i44)
         {
           _elem43 = iprot.readI32();
           struct.children.add(_elem43);
@@ -779,10 +779,10 @@ public class TPathEntry implements org.apache.thrift.TBase<TPathEntry,
TPathEntr
       BitSet incoming = iprot.readBitSet(1);
       if (incoming.get(0)) {
         {
-          org.apache.thrift.protocol.TSet _set45 = new org.apache.thrift.protocol.TSet(org.apache.thrift.protocol.TType.STRING,
iprot.readI32());
-          struct.authzObjs = new HashSet<String>(2*_set45.size);
+          org.apache.thrift.protocol.TList _list45 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING,
iprot.readI32());
+          struct.authzObjs = new ArrayList<String>(_list45.size);
           String _elem46;
-          for (int _i47 = 0; _i47 < _set45.size; ++_i47)
+          for (int _i47 = 0; _i47 < _list45.size; ++_i47)
           {
             _elem46 = iprot.readString();
             struct.authzObjs.add(_elem46);

http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
index 6e71561..5a98263 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPaths.java
@@ -132,10 +132,10 @@ public class HMSPaths implements AuthzPaths {
     private EntryType type;
     private String pathElement;
 
-    // A set of authorizable objects associated with this entry. Authorizable
-    // object should be case insensitive. The set is allocated lazily to avoid
-    // wasting memory due to empty sets.
-    private Set<String> authzObjs;
+    // A set (or single object when set size is 1) of authorizable objects associated
+    // with this entry. Authorizable object should be case insensitive. The set is
+    // allocated lazily to avoid wasting memory due to empty sets.
+    private Object authzObjs;
 
     // Path of child element to the path entry mapping, e.g. 'b' -> '/a/b'
     // This is allocated lazily to avoid wasting memory due to empty maps.
@@ -161,9 +161,9 @@ public class HMSPaths implements AuthzPaths {
      * @param parent the parent node. If not specified, this entry is a root entry.
      * @param pathElement the path element of this entry on the tree.
      * @param type entry type.
-     * @param authzObjs a set of authz objects.
+     * @param authzObjs a collection of authz objects.
      */
-    Entry(Entry parent, String pathElement, EntryType type, Set<String> authzObjs)
{
+    Entry(Entry parent, String pathElement, EntryType type, Collection<String> authzObjs)
{
       this.parent = parent;
       this.type = type;
       this.pathElement = pathElement.intern();
@@ -204,26 +204,42 @@ public class HMSPaths implements AuthzPaths {
 
     void removeAuthzObj(String authzObj) {
       if (authzObjs != null) {
-        authzObjs.remove(authzObj);
+        if (authzObjs instanceof Set) {
+          Set<String> authzObjsSet = (Set<String>) authzObjs;
+          authzObjsSet.remove(authzObj);
+          if (authzObjsSet.size() == 1) {
+            authzObjs = authzObjsSet.iterator().next();
+          }
+        } else if (authzObjs.equals(authzObj)){
+          authzObjs = null;
+        }
       }
     }
 
     void addAuthzObj(String authzObj) {
       if (authzObj != null) {
         if (authzObjs == null) {
-          authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
+          authzObjs = authzObj;
+        } else {
+          Set<String> authzObjsSet;
+          if (authzObjs instanceof String) {
+            if (authzObjs.equals(authzObj)) {
+              return;
+            } else {
+              authzObjs = authzObjsSet = newTreeSetWithElement((String) authzObjs);
+            }
+          } else {
+            authzObjsSet = (Set) authzObjs;
+          }
+          authzObjsSet.add(authzObj.intern());
         }
-        authzObjs.add(authzObj.intern());
       }
     }
 
-    void addAuthzObjs(Set<String> authzObjs) {
+    void addAuthzObjs(Collection<String> authzObjs) {
       if (authzObjs != null) {
-        if (this.authzObjs == null) {
-          this.authzObjs = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
-        }
         for (String authzObj : authzObjs) {
-          this.authzObjs.add(authzObj.intern());
+          addAuthzObj(authzObj.intern());
         }
       }
     }
@@ -238,7 +254,17 @@ public class HMSPaths implements AuthzPaths {
 
     public String toString() {
       return String.format("Entry[fullPath: %s, type: %s, authObject: %s]",
-          getFullPath(), type, authzObjs != null ? Joiner.on(",").join(authzObjs) : "");
+          getFullPath(), type, authzObjsToString());
+    }
+
+    private String authzObjsToString() {
+      if (authzObjs == null) {
+        return "";
+      } else if (authzObjs instanceof String) {
+        return (String) authzObjs;
+      } else {
+        return Joiner.on(",").join((Set) authzObjs);
+      }
     }
 
     @Override
@@ -420,9 +446,9 @@ public class HMSPaths implements AuthzPaths {
           // entry no longer maps to any authzObj, removes the
           // entry recursively.
           if (authzObjs != null) {
-            authzObjs.remove(authzObj);
+            removeAuthzObj(authzObj);
           }
-          if (authzObjs == null || authzObjs.isEmpty()) {
+          if (authzObjs == null) {
             deleteFromParent();
           }
         } else {
@@ -433,7 +459,7 @@ public class HMSPaths implements AuthzPaths {
           if (getType() == EntryType.AUTHZ_OBJECT) {
             setType(EntryType.DIR);
             if (authzObjs != null) {
-              authzObjs.remove(authzObj);
+              removeAuthzObj(authzObj);
             }
           }
         }
@@ -497,10 +523,42 @@ public class HMSPaths implements AuthzPaths {
 
     /**
      * @return the set of auth objects. The returned set should be used only
-     * for querying, not for any modifications.
+     * for querying, not for any modifications. If you just want to find out
+     * the set size or whether it's empty, use the specialized getAuthzObjsSize()
+     * and isAuthzObjsEmpty() methods that performs better.
      */
-    public Set<String> getAuthzObjs() {
-      return authzObjs != null ? authzObjs : Collections.<String>emptySet();
+    Set<String> getAuthzObjs() {
+      if (authzObjs != null) {
+        if (authzObjs instanceof Set) {
+          return (Set<String>) authzObjs;
+        } else {
+          return newTreeSetWithElement((String) authzObjs);
+        }
+      } else {
+        return Collections.<String>emptySet();
+      }
+    }
+
+    int getAuthzObjsSize() {
+      if (authzObjs != null) {
+        if (authzObjs instanceof Set) {
+          return ((Set<String>) authzObjs).size();
+        } else {
+          return 1;
+        }
+      } else {
+        return 0;
+      }
+    }
+
+    boolean isAuthzObjsEmpty() {
+      return authzObjs == null;
+    }
+
+    private Set<String> newTreeSetWithElement(String el) {
+      Set<String> result = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
+      result.add(el);
+      return result;
     }
 
     public Entry findPrefixEntry(List<String> pathElements) {
@@ -538,17 +596,17 @@ public class HMSPaths implements AuthzPaths {
         boolean isPartialMatchOk, Entry lastAuthObj) {
       Entry found = null;
       if (index == pathElements.length) {
-        if (isPartialMatchOk && getAuthzObjs().size() != 0) {
+        if (isPartialMatchOk && !isAuthzObjsEmpty()) {
           found = this;
         }
       } else {
         Entry child = getChild(pathElements[index]);
         if (child != null) {
           if (index == pathElements.length - 1) {
-            found = (child.getAuthzObjs().size() != 0) ? child : lastAuthObj;
+            found = (!child.isAuthzObjsEmpty()) ? child : lastAuthObj;
           } else {
             found = child.find(pathElements, index + 1, isPartialMatchOk,
-                (child.getAuthzObjs().size() != 0) ? child : lastAuthObj);
+                (!child.isAuthzObjsEmpty()) ? child : lastAuthObj);
           }
         } else {
           if (isPartialMatchOk) {

http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
index bf6c3de..1267093 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/main/java/org/apache/sentry/hdfs/HMSPathsDumper.java
@@ -17,6 +17,7 @@
  */
 package org.apache.sentry.hdfs;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
@@ -30,11 +31,15 @@ import org.apache.sentry.hdfs.HMSPaths.Entry;
 import org.apache.sentry.hdfs.HMSPaths.EntryType;
 import org.apache.sentry.hdfs.service.thrift.TPathEntry;
 import org.apache.sentry.hdfs.service.thrift.TPathsDump;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths> {
 
   private final HMSPaths hmsPaths;
 
+  private static final Logger LOG = LoggerFactory.getLogger(HMSPathsDumper.class);
+
   static class Tuple {
     private final TPathEntry entry;
     private final int id;
@@ -65,9 +70,16 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths>
{
         hmsPaths.getRootEntry(), tRootTuple.entry, counter, idMap, dups);
     TPathsDump dump = new TPathsDump(tRootTuple.id, idMap);
 
+    String stringDupMsg = "";
     if (minimizeSize) {
-      dump.setDupStringValues(Arrays.asList(dups.getDupStringValues()));
+      String[] dupStringValues = dups.getDupStringValues();
+      dump.setDupStringValues(Arrays.asList(dupStringValues));
+      stringDupMsg = String.format(" %d total path strings, %d duplicate strings found, "
+
+          "compacted to %d unique strings.", dups.nTotalStrings, dups.nDupStrings,
+          dupStringValues.length);
     }
+    LOG.info("Paths Dump created." + stringDupMsg);
+
     return dump;
   }
 
@@ -83,14 +95,14 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths>
{
   private Tuple createTPathEntry(Entry entry, AtomicInteger idCounter,
       Map<Integer, TPathEntry> idMap, DupDetector dups) {
     int myId = idCounter.incrementAndGet();
-    Set<Integer> children = entry.hasChildren() ?
-        new HashSet<Integer>(entry.numChildren()) : Collections.<Integer>emptySet();
+    List<Integer> children = entry.hasChildren() ?
+        new ArrayList<Integer>(entry.numChildren()) : Collections.<Integer>emptyList();
     String pathElement = entry.getPathElement();
     String sameOrReplacementId =
         dups != null ? dups.getReplacementString(pathElement) : pathElement;
     TPathEntry tEntry = new TPathEntry(entry.getType().getByte(), sameOrReplacementId, children);
-    if (!entry.getAuthzObjs().isEmpty()) {
-      tEntry.setAuthzObjs(entry.getAuthzObjs());
+    if (!entry.isAuthzObjsEmpty()) {
+      tEntry.setAuthzObjs(new ArrayList<>(entry.getAuthzObjs()));
     }
     idMap.put(myId, tEntry);
     return new Tuple(tEntry, myId);
@@ -142,7 +154,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths>
{
         child = new Entry(parent, tChildPathElement,
             EntryType.fromByte(tChild.getType()), tChild.getAuthzObjs());
       }
-      if (child.getAuthzObjs().size() != 0) {
+      if (!child.isAuthzObjsEmpty()) {
         for (String authzObj: child.getAuthzObjs()) {
           Set<Entry> paths = authzObjToPath.get(authzObj);
           if (paths == null) {
@@ -200,6 +212,8 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths>
{
     // Size of the auxiliary string array - essentially the number of duplicate
     // strings that we detected and encoded.
     private int auxArraySize;
+    // For statistics/debugging
+    int nTotalStrings, nDupStrings;
 
     /**
      * Finds duplicate strings in the tree of Entry objects with the given root,
@@ -215,6 +229,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths>
{
         if (keys[i] != null) {
           if (values[i] >= MIN_NUM_DUPLICATES) {
             values[i] = auxArraySize++;
+            nDupStrings += values[i];
           } else {  // No duplication for this string
             keys[i] = null;
             values[i] = -1;  // Just to mark invalid slots
@@ -258,6 +273,7 @@ public class HMSPathsDumper implements AuthzPathsDumper<HMSPaths>
{
     }
 
     private void inspectEntry(Entry entry) {
+      nTotalStrings++;
       String pathElement = entry.getPathElement();
       if (pathElement.length() > AVG_ID_LENGTH) {
         // In the serialized data, it doesn't make sense to replace string origS

http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift
b/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift
index 22dea02..465b421 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift
+++ b/sentry-hdfs/sentry-hdfs-common/src/main/resources/sentry_hdfs_service.thrift
@@ -49,10 +49,10 @@ struct TPathEntry {
 2: required string pathElement;
 
 # The child tuple id of the Path Entry.
-4: required set<i32> children;
+4: required list<i32> children;
 
 # A set of authzObjs associated with the Path Entry.
-5: optional set<string> authzObjs;
+5: optional list<string> authzObjs;
 }
 
 struct TPathsDump {

http://git-wip-us.apache.org/repos/asf/sentry/blob/201808de/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
b/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
index a0d7bdc..20ed97c 100644
--- a/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
+++ b/sentry-hdfs/sentry-hdfs-common/src/test/java/org/apache/sentry/hdfs/TestHMSPaths.java
@@ -63,6 +63,9 @@ public class TestHMSPaths {
     root.toString();
     Assert.assertNull(root.getParent());
     Assert.assertEquals(HMSPaths.EntryType.DIR, root.getType());
+    Assert.assertTrue(root.getAuthzObjsSize() == 0);
+    Assert.assertTrue(root.isAuthzObjsEmpty());
+    // getAuthzObjs().size() is not recommended, but let's check that it works correctly
     Assert.assertTrue(root.getAuthzObjs().size() == 0);
     Assert.assertEquals(Path.SEPARATOR, root.getFullPath());
     Assert.assertFalse(root.hasChildren());
@@ -127,6 +130,9 @@ public class TestHMSPaths {
     Assert.assertEquals(root, entry.getParent());
     Assert.assertEquals(HMSPaths.EntryType.PREFIX, entry.getType());
     Assert.assertEquals("a", entry.getPathElement());
+    Assert.assertEquals(0, entry.getAuthzObjsSize());
+    Assert.assertTrue(entry.isAuthzObjsEmpty());
+    // getAuthzObjs().size() is not recommended, but let's check that it works correctly
     Assert.assertEquals(0, entry.getAuthzObjs().size());
     Assert.assertEquals(Path.SEPARATOR + "a", entry.getFullPath());
     Assert.assertFalse(entry.hasChildren());
@@ -171,6 +177,10 @@ public class TestHMSPaths {
         entry.getParent().getType());
     Assert.assertEquals("b", entry.getPathElement());
     Assert.assertEquals("a", entry.getParent().getPathElement());
+    Assert.assertTrue(entry.getAuthzObjsSize() == 0);
+    Assert.assertTrue(entry.isAuthzObjsEmpty());
+    Assert.assertTrue(entry.getParent().getAuthzObjsSize() == 0);
+    // getAuthzObjs().size() is not recommended, but let's check that it works correctly
     Assert.assertTrue(entry.getAuthzObjs().size() == 0);
     Assert.assertTrue(entry.getParent().getAuthzObjs().size() == 0);
     Assert.assertEquals(Path.SEPARATOR + "a" + Path.SEPARATOR + "b",
@@ -296,6 +306,9 @@ public class TestHMSPaths {
     Assert.assertNull(root.find(new String[]{"a", "b", "t", "p1"},
         true));
     Assert.assertEquals(HMSPaths.EntryType.DIR, entry.getType());
+    Assert.assertEquals(entry.getAuthzObjsSize(), 0);
+    Assert.assertTrue(entry.isAuthzObjsEmpty());
+    // getAuthzObjs().size() is not recommended, but let's check that it works correctly
     Assert.assertEquals(entry.getAuthzObjs().size(), 0);
 
     Assert.assertNull(root.find(new String[]{"a", "b", "t", "p1"}, false));


Mime
View raw message