accumulo-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From keith-turner <...@git.apache.org>
Subject [GitHub] accumulo pull request #275: ACCUMULO-4667 Reworked the LocalityGroupIterator...
Date Fri, 30 Jun 2017 15:57:55 GMT
Github user keith-turner commented on a diff in the pull request:

    https://github.com/apache/accumulo/pull/275#discussion_r125073040
  
    --- Diff: core/src/main/java/org/apache/accumulo/core/iterators/system/LocalityGroupIterator.java
---
    @@ -97,75 +133,117 @@ public static final int seek(HeapIterator hiter, LocalityGroup[]
groups, Set<Byt
         else
           cfSet = Collections.emptySet();
     
    -    for (LocalityGroup lgr : groups) {
    -      // when include is set to true it means this locality groups contains
    -      // wanted column families
    -      boolean include = false;
    +    // determine the set of groups to use
    +    Set<LocalityGroup> groupsToUse = new HashSet<LocalityGroup>();
     
    -      if (cfSet.size() == 0) {
    -        include = !inclusive;
    -      } else if (lgr.isDefaultLocalityGroup && lgr.columnFamilies == null) {
    -        // do not know what column families are in the default locality group,
    -        // only know what column families are not in it
    +    // if no column families specified, then include all groups unless !inclusive
    +    if (cfSet.size() == 0) {
    +      if (!inclusive) {
    +        groupsToUse.addAll(Arrays.asList(groups.groups));
    +      }
    +    } else {
     
    +      // do not know what column families are in the default locality group,
    +      // only know what column families are not in it
    +      if (groups.defaultGroup != null) {
             if (inclusive) {
    -          if (!nonDefaultColumnFamilies.containsAll(cfSet)) {
    +          if (!groups.groupByCf.keySet().containsAll(cfSet)) {
                 // default LG may contain wanted and unwanted column families
    -            include = true;
    +            groupsToUse.add(groups.defaultGroup);
               }// else - everything wanted is in other locality groups, so nothing to do
             } else {
    -          // must include, if all excluded column families are in other locality groups
    -          // then there are not unwanted column families in default LG
    -          include = true;
    +          // must include the default group as it may include cfs not in our cfSet
    +          groupsToUse.add(groups.defaultGroup);
    +        }
    +      }
    +
    +      /*
    +       * Need to consider the following cases for inclusive and exclusive (lgcf:locality
group column family set, cf:column family set) lgcf and cf are disjoint
    +       * lgcf and cf are the same cf contains lgcf lgcf contains cf lgccf and cf intersect
but neither is a subset of the other
    +       */
    +      if (!inclusive) {
    +        for (Entry<ByteSequence,LocalityGroup> entry : groups.groupByCf.entrySet())
{
    +          if (!cfSet.contains(entry.getKey())) {
    +            groupsToUse.add(entry.getValue());
    +          }
    +        }
    +      } else if (groups.groupByCf.size() <= cfSet.size()) {
    +        for (Entry<ByteSequence,LocalityGroup> entry : groups.groupByCf.entrySet())
{
    +          if (cfSet.contains(entry.getKey())) {
    +            groupsToUse.add(entry.getValue());
    +          }
             }
           } else {
    -        /*
    -         * Need to consider the following cases for inclusive and exclusive (lgcf:locality
group column family set, cf:column family set) lgcf and cf are
    -         * disjoint lgcf and cf are the same cf contains lgcf lgcf contains cf lgccf
and cf intersect but neither is a subset of the other
    -         */
    -
    -        for (Entry<ByteSequence,MutableLong> entry : lgr.columnFamilies.entrySet())
    -          if (entry.getValue().longValue() > 0)
    -            if (cfSet.contains(entry.getKey())) {
    -              if (inclusive)
    -                include = true;
    -            } else if (!inclusive) {
    -              include = true;
    -            }
    +        for (ByteSequence cf : cfSet) {
    +          LocalityGroup group = groups.groupByCf.get(cf);
    +          if (group != null) {
    +            groupsToUse.add(group);
    +          }
    +        }
           }
    +    }
     
    -      if (include) {
    -        lgr.getIterator().seek(range, EMPTY_CF_SET, false);
    -        hiter.addSource(lgr.getIterator());
    -        numLGSeeked++;
    -      }// every column family is excluded, zero count, or not present
    +    for (LocalityGroup lgr : groupsToUse) {
    +      lgr.getIterator().seek(range, EMPTY_CF_SET, false);
    +      hiter.addSource(lgr.getIterator());
    +      numLGSeeked++;
    +    }
    +
    +    if (used != null) {
    +      used.addAll(groupsToUse);
         }
     
         return numLGSeeked;
       }
     
       @Override
       public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean
inclusive) throws IOException {
    -    seek(this, groups, nonDefaultColumnFamilies, range, columnFamilies, inclusive);
    +    Set<ByteSequence> cfSet;
    +    if (columnFamilies.size() > 0)
    +      if (columnFamilies instanceof Set<?>) {
    +        cfSet = (Set<ByteSequence>) columnFamilies;
    +      } else {
    +        cfSet = new HashSet<>();
    +        cfSet.addAll(columnFamilies);
    +      }
    +    else
    +      cfSet = Collections.emptySet();
    +
    +    if (lastUsed != null && cfSet.equals(lastColumnFamilies) && inclusive
== lastInclusive) {
    +      clear();
    +      for (LocalityGroup lgr : lastUsed) {
    +        lgr.getIterator().seek(range, EMPTY_CF_SET, false);
    +        addSource(lgr.getIterator());
    +      }
    +    } else {
    +      if (lastUsed == null) {
    +        lastUsed = new ArrayList<LocalityGroup>(lgContext.groups.length);
    +      } else {
    +        lastUsed.clear();
    +      }
    +      lastColumnFamilies = cfSet;
    --- End diff --
    
    Need to be careful with this, it could be a ref to a set a user passed.  The user could
call `seek(setA)` then modify `setA` and then call `seek(setA)`.  In this use case it seems
like the wrong loc groups could be used.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message