lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dsmi...@apache.org
Subject lucene-solr git commit: LUCENE-5735: remove NumberRangePrefixTreeStrategy.calcFacets from 6x for now
Date Fri, 04 Mar 2016 15:50:59 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 1cbf22569 -> 0b15fd863


LUCENE-5735: remove NumberRangePrefixTreeStrategy.calcFacets from 6x for now


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/0b15fd86
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/0b15fd86
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/0b15fd86

Branch: refs/heads/branch_6x
Commit: 0b15fd86364dbafe08abc2939d54749e69bca23f
Parents: 1cbf225
Author: David Smiley <dsmiley@apache.org>
Authored: Fri Mar 4 10:50:53 2016 -0500
Committer: David Smiley <dsmiley@apache.org>
Committed: Fri Mar 4 10:50:53 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   4 -
 .../prefix/NumberRangePrefixTreeStrategy.java   | 141 +---------
 .../spatial/prefix/NumberRangeFacetsTest.java   | 275 -------------------
 3 files changed, 2 insertions(+), 418 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b15fd86/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fd01a92..71a715b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -25,10 +25,6 @@ New Features
   input tokens. Useful for normalizing short text in clustering/linking 
   tasks. (Mark Harwood, Adrien Grand)
 
-* LUCENE-5735: NumberRangePrefixTreeStrategy now includes interval/range faceting
-  for counting ranges that align with the underlying terms as defined by the
-  NumberRangePrefixTree (e.g. familiar date units like days).  (David Smiley)
-
 * LUCENE-6711: Use CollectionStatistics.docCount() for IDF and average field
   length computations, to avoid skew from documents that don't have the field.
   (Ahmet Arslan via Robert Muir)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b15fd86/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java
----------------------------------------------------------------------
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java
b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java
index c727c0d..b310de3 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java
@@ -16,22 +16,13 @@
  */
 package org.apache.lucene.spatial.prefix;
 
-import java.io.IOException;
-import java.util.Arrays;
 import java.util.Iterator;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TreeMap;
 
-import org.locationtech.spatial4j.shape.Point;
-import org.locationtech.spatial4j.shape.Shape;
-import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.spatial.prefix.tree.Cell;
 import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
-import org.apache.lucene.util.Bits;
-
-import static org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
+import org.locationtech.spatial4j.shape.Point;
+import org.locationtech.spatial4j.shape.Shape;
 
 /** A PrefixTree based on Number/Date ranges. This isn't very "spatial" on the surface (to
the user) but
  * it's implemented using spatial so that's why it's here extending a SpatialStrategy. When
using this class, you will
@@ -68,132 +59,4 @@ public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy
{
     throw new UnsupportedOperationException();
   }
 
-  /** Calculates facets between {@code start} and {@code end} to a detail level one greater
than that provided by the
-   * arguments. For example providing March to October of 2014 would return facets to the
day level of those months.
-   * This is just a convenience method.
-   * @see #calcFacets(IndexReaderContext, Bits, Shape, int)
-   */
-  public Facets calcFacets(IndexReaderContext context, Bits topAcceptDocs, UnitNRShape start,
UnitNRShape end)
-      throws IOException {
-    Shape facetRange = getGrid().toRangeShape(start, end);
-    int detailLevel = Math.max(start.getLevel(), end.getLevel()) + 1;
-    return calcFacets(context, topAcceptDocs, facetRange, detailLevel);
-  }
-
-  /**
-   * Calculates facets (aggregated counts) given a range shape (start-end span) and a level,
which specifies the detail.
-   * To get the level of an existing shape, say a Calendar, call
-   * {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree#toUnitShape(Object)}
then call
-   * {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape#getLevel()}.
-   * Facet computation is implemented by navigating the underlying indexed terms efficiently.
-   */
-  public Facets calcFacets(IndexReaderContext context, Bits topAcceptDocs, Shape facetRange,
final int level)
-      throws IOException {
-    final Facets facets = new Facets(level);
-    PrefixTreeFacetCounter.compute(this, context, topAcceptDocs, facetRange, level,
-        new PrefixTreeFacetCounter.FacetVisitor() {
-          Facets.FacetParentVal parentFacet;
-          UnitNRShape parentShape;
-
-          @Override
-          public void visit(Cell cell, int count) {
-            if (cell.getLevel() < level - 1) {//some ancestor of parent facet level, direct
or distant
-              parentFacet = null;//reset
-              parentShape = null;//reset
-              facets.topLeaves += count;
-            } else if (cell.getLevel() == level - 1) {//parent
-              //set up FacetParentVal
-              setupParent((UnitNRShape) cell.getShape());
-              parentFacet.parentLeaves += count;
-            } else {//at facet level
-              UnitNRShape unitShape = (UnitNRShape) cell.getShape();
-              UnitNRShape unitShapeParent = unitShape.getShapeAtLevel(unitShape.getLevel()
- 1);
-              if (parentFacet == null || !parentShape.equals(unitShapeParent)) {
-                setupParent(unitShapeParent);
-              }
-              //lazy init childCounts
-              if (parentFacet.childCounts == null) {
-                parentFacet.childCounts = new int[parentFacet.childCountsLen];
-              }
-              parentFacet.childCounts[unitShape.getValAtLevel(cell.getLevel())] += count;
-            }
-          }
-
-          private void setupParent(UnitNRShape unitShape) {
-            parentShape = unitShape.clone();
-            //Look for existing parentFacet (from previous segment), or create anew if needed
-            parentFacet = facets.parents.get(parentShape);
-            if (parentFacet == null) {//didn't find one; make a new one
-              parentFacet = new Facets.FacetParentVal();
-              parentFacet.childCountsLen = getGrid().getNumSubCells(parentShape);
-              facets.parents.put(parentShape, parentFacet);
-            }
-          }
-        });
-    return facets;
-  }
-
-  /** Facet response information */
-  public static class Facets {
-    //TODO consider a variable-level structure -- more general purpose.
-
-    public Facets(int detailLevel) {
-      this.detailLevel = detailLevel;
-    }
-
-    /** The bottom-most detail-level counted, as requested. */
-    public final int detailLevel;
-
-    /**
-     * The count of documents with ranges that completely spanned the parents of the detail
level. In more technical
-     * terms, this is the count of leaf cells 2 up and higher from the bottom. Usually you
only care about counts at
-     * detailLevel, and so you will add this number to all other counts below, including
to omitted/implied children
-     * counts of 0. If there are no indexed ranges (just instances, i.e. fully specified
dates) then this value will
-     * always be 0.
-     */
-    public int topLeaves;
-
-    /** Holds all the {@link FacetParentVal} instances in order of the key. This is sparse;
there won't be an
-     * instance if it's count and children are all 0. The keys are {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape}
shapes, which can be
-     * converted back to the original Object (i.e. a Calendar) via
-     * {@link NumberRangePrefixTree#toObject(org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape)}.
*/
-    public final SortedMap<UnitNRShape,FacetParentVal> parents = new TreeMap<>();
-
-    /** Holds a block of detailLevel counts aggregated to their parent level. */
-    public static class FacetParentVal {
-
-      /** The count of ranges that span all of the childCounts.  In more technical terms,
this is the number of leaf
-       * cells found at this parent.  Treat this like {@link Facets#topLeaves}. */
-      public int parentLeaves;
-
-      /** The length of {@link #childCounts}. If childCounts is not null then this is childCounts.length,
otherwise it
-       * says how long it would have been if it weren't null. */
-      public int childCountsLen;
-
-      /** The detail level counts. It will be null if there are none, and thus they are assumed
0. Most apps, when
-       * presenting the information, will add {@link #topLeaves} and {@link #parentLeaves}
to each count. */
-      public int[] childCounts;
-      //assert childCountsLen == childCounts.length
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder buf = new StringBuilder(2048);
-      buf.append("Facets: level=" + detailLevel + " topLeaves=" + topLeaves + " parentCount="
+ parents.size());
-      for (Map.Entry<UnitNRShape, FacetParentVal> entry : parents.entrySet()) {
-        buf.append('\n');
-        if (buf.length() > 1000) {
-          buf.append("...");
-          break;
-        }
-        final FacetParentVal pVal = entry.getValue();
-        buf.append(' ').append(entry.getKey()+" leafCount=" + pVal.parentLeaves);
-        if (pVal.childCounts != null) {
-          buf.append(' ').append(Arrays.toString(pVal.childCounts));
-        }
-      }
-      return buf.toString();
-    }
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/0b15fd86/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java
----------------------------------------------------------------------
diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java
b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java
deleted file mode 100644
index 514c18e..0000000
--- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.spatial.prefix;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Collections;
-import java.util.List;
-
-import com.carrotsearch.randomizedtesting.annotations.Repeat;
-import org.locationtech.spatial4j.shape.Shape;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.queries.TermsQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.SimpleCollector;
-import org.apache.lucene.spatial.StrategyTestCase;
-import org.apache.lucene.spatial.prefix.NumberRangePrefixTreeStrategy.Facets;
-import org.apache.lucene.spatial.prefix.tree.Cell;
-import org.apache.lucene.spatial.prefix.tree.CellIterator;
-import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree;
-import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
-import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.FixedBitSet;
-import org.junit.Before;
-import org.junit.Test;
-
-import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
-import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
-
-public class NumberRangeFacetsTest extends StrategyTestCase {
-
-  DateRangePrefixTree tree;
-
-  int randomCalWindowField;
-  long randomCalWindowMs;
-
-  @Before
-  public void setUp() throws Exception {
-    super.setUp();
-    tree = DateRangePrefixTree.INSTANCE;
-    strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
-    Calendar tmpCal = tree.newCal();
-    randomCalWindowField = randomIntBetween(1, Calendar.ZONE_OFFSET - 1);//we're not allowed
to add zone offset
-    tmpCal.add(randomCalWindowField, 2_000);
-    randomCalWindowMs = Math.max(2000L, tmpCal.getTimeInMillis());
-  }
-
-  @Repeat(iterations = 20)
-  @Test
-  public void test() throws IOException {
-    //generate test data
-    List<Shape> indexedShapes = new ArrayList<>();
-    final int numIndexedShapes = random().nextInt(15);
-    for (int i = 0; i < numIndexedShapes; i++) {
-      indexedShapes.add(randomShape());
-    }
-
-    //Main index loop:
-    for (int i = 0; i < indexedShapes.size(); i++) {
-      Shape shape = indexedShapes.get(i);
-      adoc(""+i, shape);
-
-      if (random().nextInt(10) == 0)
-        commit();//intermediate commit, produces extra segments
-    }
-
-    //delete some documents randomly
-    for (int id = 0; id < indexedShapes.size(); id++) {
-      if (random().nextInt(10) == 0) {
-        deleteDoc(""+id);
-        indexedShapes.set(id, null);
-      }
-    }
-
-    commit();
-
-    //Main query loop:
-    for (int queryIdx = 0; queryIdx < 10; queryIdx++) {
-      preQueryHavoc();
-
-      // We need to have a facet range window to do the facets between (a start time &
end time). We randomly
-      // pick a date, decide the level we want to facet on, and then pick a right end time
that is up to 2 thousand
-      // values later.
-      int calFieldFacet = randomCalWindowField - 1;
-      if (calFieldFacet > 1 && rarely()) {
-        calFieldFacet--;
-      }
-      final Calendar leftCal = randomCalendar();
-      leftCal.add(calFieldFacet, -1 * randomInt(1000));
-      Calendar rightCal = (Calendar) leftCal.clone();
-      rightCal.add(calFieldFacet, randomInt(2000));
-      // Pick facet detail level based on cal field.
-      int detailLevel = tree.getTreeLevelForCalendarField(calFieldFacet);
-      if (detailLevel < 0) {//no exact match
-        detailLevel = -1 * detailLevel;
-      }
-
-      //Randomly pick a filter/acceptDocs
-      Bits topAcceptDocs = null;
-      List<Integer> acceptFieldIds = new ArrayList<>();
-      if (usually()) {
-        //get all possible IDs into a list, random shuffle it, then randomly choose how many
of the first we use to
-        // replace the list.
-        for (int i = 0; i < indexedShapes.size(); i++) {
-          if (indexedShapes.get(i) == null) { // we deleted this one
-            continue;
-          }
-          acceptFieldIds.add(i);
-        }
-        Collections.shuffle(acceptFieldIds, random());
-        acceptFieldIds = acceptFieldIds.subList(0, randomInt(acceptFieldIds.size()));
-        if (!acceptFieldIds.isEmpty()) {
-          List<Term> terms = new ArrayList<>();
-          for (Integer acceptDocId : acceptFieldIds) {
-            terms.add(new Term("id", acceptDocId.toString()));
-          }
-
-          topAcceptDocs = searchForDocBits(new TermsQuery(terms));
-        }
-      }
-
-      //Lets do it!
-      NumberRangePrefixTree.NRShape facetRange = tree.toRangeShape(tree.toShape(leftCal),
tree.toShape(rightCal));
-      Facets facets = ((NumberRangePrefixTreeStrategy) strategy)
-          .calcFacets(indexSearcher.getTopReaderContext(), topAcceptDocs, facetRange, detailLevel);
-
-      //System.out.println("Q: " + queryIdx + " " + facets);
-
-      //Verify results. We do it by looping over indexed shapes and reducing the facet counts.
-      Shape facetShapeRounded = facetRange.roundToLevel(detailLevel);
-      for (int indexedShapeId = 0; indexedShapeId < indexedShapes.size(); indexedShapeId++)
{
-        if (topAcceptDocs != null && !acceptFieldIds.contains(indexedShapeId)) {
-          continue;// this doc was filtered out via acceptDocs
-        }
-        Shape indexedShape = indexedShapes.get(indexedShapeId);
-        if (indexedShape == null) {//was deleted
-          continue;
-        }
-        Shape indexedShapeRounded = ((NumberRangePrefixTree.NRShape) indexedShape).roundToLevel(detailLevel);
-        if (!indexedShapeRounded.relate(facetShapeRounded).intersects()) { // no intersection
at all
-          continue;
-        }
-        // walk the cells
-        final CellIterator cellIterator = tree.getTreeCellIterator(indexedShape, detailLevel);
-        while (cellIterator.hasNext()) {
-          Cell cell = cellIterator.next();
-          if (!cell.getShape().relate(facetShapeRounded).intersects()) {
-            cellIterator.remove();//no intersection; prune
-            continue;
-          }
-          assert cell.getLevel() <= detailLevel;
-
-          if (cell.getLevel() == detailLevel) {
-            //count it
-            UnitNRShape shape = (UnitNRShape) cell.getShape();
-            final UnitNRShape parentShape = shape.getShapeAtLevel(detailLevel - 1);//get
parent
-            final Facets.FacetParentVal facetParentVal = facets.parents.get(parentShape);
-            assertNotNull(facetParentVal);
-            int index = shape.getValAtLevel(shape.getLevel());
-            assertNotNull(facetParentVal.childCounts);
-            assert facetParentVal.childCounts[index] > 0;
-            facetParentVal.childCounts[index]--;
-
-          } else if (cell.isLeaf()) {
-            //count it, and remove/prune.
-            if (cell.getLevel() < detailLevel - 1) {
-              assert facets.topLeaves > 0;
-              facets.topLeaves--;
-            } else {
-              UnitNRShape shape = (UnitNRShape) cell.getShape();
-              final UnitNRShape parentShape = shape.getShapeAtLevel(detailLevel - 1);//get
parent
-              final Facets.FacetParentVal facetParentVal = facets.parents.get(parentShape);
-              assertNotNull(facetParentVal);
-              assert facetParentVal.parentLeaves > 0;
-              facetParentVal.parentLeaves--;
-            }
-
-            cellIterator.remove();
-          }
-        }
-      }
-      // At this point; all counts should be down to zero.
-      assertTrue(facets.topLeaves == 0);
-      for (Facets.FacetParentVal facetParentVal : facets.parents.values()) {
-        assertTrue(facetParentVal.parentLeaves == 0);
-        if (facetParentVal.childCounts != null) {
-          for (int childCount : facetParentVal.childCounts) {
-            assertTrue(childCount == 0);
-          }
-        }
-      }
-
-    }
-  }
-
-  private Bits searchForDocBits(Query query) throws IOException {
-    FixedBitSet bitSet = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
-    indexSearcher.search(query,
-        new SimpleCollector() {
-          int leafDocBase;
-          @Override
-          public void collect(int doc) throws IOException {
-            bitSet.set(leafDocBase + doc);
-          }
-
-          @Override
-          protected void doSetNextReader(LeafReaderContext context) throws IOException {
-            leafDocBase = context.docBase;
-          }
-
-          @Override
-          public boolean needsScores() {
-            return false;
-          }
-        });
-    return bitSet;
-  }
-
-  private void preQueryHavoc() {
-    if (strategy instanceof RecursivePrefixTreeStrategy) {
-      RecursivePrefixTreeStrategy rpts = (RecursivePrefixTreeStrategy) strategy;
-      int scanLevel = randomInt(rpts.getGrid().getMaxLevels());
-      rpts.setPrefixGridScanLevel(scanLevel);
-    }
-  }
-
-  protected Shape randomShape() {
-    Calendar cal1 = randomCalendar();
-    UnitNRShape s1 = tree.toShape(cal1);
-    if (rarely()) {
-      return s1;
-    }
-    try {
-      Calendar cal2 = randomCalendar();
-      UnitNRShape s2 = tree.toShape(cal2);
-      if (cal1.compareTo(cal2) < 0) {
-        return tree.toRangeShape(s1, s2);
-      } else {
-        return tree.toRangeShape(s2, s1);
-      }
-    } catch (IllegalArgumentException e) {
-      assert e.getMessage().startsWith("Differing precision");
-      return s1;
-    }
-  }
-
-  private Calendar randomCalendar() {
-    Calendar cal = tree.newCal();
-    cal.setTimeInMillis(random().nextLong() % randomCalWindowMs);
-    try {
-      tree.clearFieldsAfter(cal, random().nextInt(Calendar.FIELD_COUNT+1)-1);
-    } catch (AssertionError e) {
-      if (!e.getMessage().equals("Calendar underflow"))
-        throw e;
-    }
-    return cal;
-  }
-}


Mime
View raw message