lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sh...@apache.org
Subject svn commit: r1487465 - in /lucene/dev/trunk/lucene: ./ facet/src/java/org/apache/lucene/facet/search/ facet/src/test/org/apache/lucene/facet/search/
Date Wed, 29 May 2013 12:47:06 GMT
Author: shaie
Date: Wed May 29 12:47:05 2013
New Revision: 1487465

URL: http://svn.apache.org/r1487465
Log:
LUCENE-5022: Add FacetResult.mergeHierarchies

Added:
    lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java
  (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java
    lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1487465&r1=1487464&r2=1487465&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Wed May 29 12:47:05 2013
@@ -161,6 +161,10 @@ New Features
 * LUCENE-4975: Added a new Replicator module which can replicate index 
   revisions between server and client. (Shai Erera, Mike McCandless)
 
+* LUCENE-5022: Added FacetResult.mergeHierarchies to merge multiple
+  FacetResult of the same dimension into a single one with the reconstructed
+  hierarchy. (Shai Erera)
+  
 Build
 
 * LUCENE-4987: Upgrade randomized testing to version 2.0.10: 

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java?rev=1487465&r1=1487464&r2=1487465&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java
(original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java
Wed May 29 12:47:05 2013
@@ -1,5 +1,16 @@
 package org.apache.lucene.facet.search;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.util.CollectionUtil;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -24,6 +35,140 @@ package org.apache.lucene.facet.search;
  */
 public class FacetResult {
   
+  private static FacetResultNode addIfNotExist(Map<CategoryPath, FacetResultNode> nodes,
FacetResultNode node) {
+    FacetResultNode n = nodes.get(node.label);
+    if (n == null) {
+      nodes.put(node.label, node);
+      n = node;
+    }
+    return n;
+  }
+
+  /**
+   * A utility for merging multiple {@link FacetResult} of the same
+   * (hierarchical) dimension into a single {@link FacetResult}, to reconstruct
+   * the hierarchy. The results are merged according to the following rules:
+   * <ul>
+   * <li>If two results share the same dimension (first component in their
+   * {@link CategoryPath}), they are merged.
+   * <li>If a result is missing ancestors in the other results, e.g. A/B/C but
+   * no corresponding A or A/B, these nodes are 'filled' with their label,
+   * ordinal and value (obtained from the respective {@link FacetArrays}).
+   * <li>If a result does not share a dimension with other results, it is
+   * returned as is.
+   * </ul>
+   * <p>
+   * <b>NOTE:</b> the returned results are not guaranteed to be in the same
+   * order of the input ones.
+   * 
+   * @param results
+   *          the results to merge
+   * @param taxoReader
+   *          the {@link TaxonomyReader} to use when creating missing ancestor
+   *          nodes
+   * @param dimArrays
+   *          a mapping from a dimension to the respective {@link FacetArrays}
+   *          from which to pull the nodes values
+   */
+  public static List<FacetResult> mergeHierarchies(List<FacetResult> results,
TaxonomyReader taxoReader,
+      Map<String, FacetArrays> dimArrays) throws IOException {
+    final Map<String, List<FacetResult>> dims = new HashMap<>();
+    for (FacetResult fr : results) {
+      String dim = fr.getFacetRequest().categoryPath.components[0];
+      List<FacetResult> frs = dims.get(dim);
+      if (frs == null) {
+        frs = new ArrayList<>();
+        dims.put(dim, frs);
+      }
+      frs.add(fr);
+    }
+
+    final List<FacetResult> res = new ArrayList<>();
+    for (List<FacetResult> frs : dims.values()) {
+      FacetResult mergedResult = frs.get(0);
+      if (frs.size() > 1) {
+        CollectionUtil.introSort(frs, new Comparator<FacetResult>() {
+          @Override
+          public int compare(FacetResult fr1, FacetResult fr2) {
+            return fr1.getFacetRequest().categoryPath.compareTo(fr2.getFacetRequest().categoryPath);
+          }
+        });
+        Map<CategoryPath, FacetResultNode> mergedNodes = new HashMap<>();
+        FacetArrays arrays = dimArrays != null ? dimArrays.get(frs.get(0).getFacetRequest().categoryPath.components[0])
: null;
+        for (FacetResult fr : frs) {
+          FacetResultNode frn = fr.getFacetResultNode();
+          FacetResultNode merged = mergedNodes.get(frn.label);
+          if (merged == null) {
+            CategoryPath parent = frn.label.subpath(frn.label.length - 1);
+            FacetResultNode childNode = frn;
+            FacetResultNode parentNode = null;
+            while (parent.length > 0 && (parentNode = mergedNodes.get(parent))
== null) {
+              int parentOrd = taxoReader.getOrdinal(parent);
+              double parentValue = arrays != null ? fr.getFacetRequest().getValueOf(arrays,
parentOrd) : -1;
+              parentNode = new FacetResultNode(parentOrd, parentValue);
+              parentNode.label = parent;
+              parentNode.subResults = new ArrayList<>();
+              parentNode.subResults.add(childNode);
+              mergedNodes.put(parent, parentNode);
+              childNode = parentNode;
+              parent = parent.subpath(parent.length - 1);
+            }
+
+            // at least one parent was added, so link the final (existing)
+            // parent with the child
+            if (parent.length > 0) {
+              if (!(parentNode.subResults instanceof ArrayList)) {
+                parentNode.subResults = new ArrayList<>(parentNode.subResults);
+              }
+              parentNode.subResults.add(childNode);
+            }
+
+            // for missing FRNs, add new ones with label and value=-1
+            // first time encountered this label, add it and all its children to
+            // the map.
+            mergedNodes.put(frn.label, frn);
+            for (FacetResultNode child : frn.subResults) {
+              addIfNotExist(mergedNodes, child);
+            }
+          } else {
+            if (!(merged.subResults instanceof ArrayList)) {
+              merged.subResults = new ArrayList<>(merged.subResults);
+            }
+            for (FacetResultNode sub : frn.subResults) {
+              // make sure sub wasn't already added
+              sub = addIfNotExist(mergedNodes, sub);
+              if (!merged.subResults.contains(sub)) {
+                merged.subResults.add(sub);
+              }
+            }
+          }
+        }
+        
+        // find the 'first' node to put on the FacetResult root
+        CategoryPath min = null;
+        for (CategoryPath cp : mergedNodes.keySet()) {
+          if (min == null || cp.compareTo(min) < 0) {
+            min = cp;
+          }
+        }
+        FacetRequest dummy = new FacetRequest(min, frs.get(0).getFacetRequest().numResults)
{
+          @Override
+          public double getValueOf(FacetArrays arrays, int idx) {
+            throw new UnsupportedOperationException("not supported by this request");
+          }
+          
+          @Override
+          public FacetArraysSource getFacetArraysSource() {
+            throw new UnsupportedOperationException("not supported by this request");
+          }
+        };
+        mergedResult = new FacetResult(dummy, mergedNodes.get(min), -1);
+      }
+      res.add(mergedResult);
+    }
+    return res;
+  }
+
   private final FacetRequest facetRequest;
   private final FacetResultNode rootNode;
   private final int numValidDescendants;

Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java?rev=1487465&r1=1487464&r2=1487465&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java
(original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java
Wed May 29 12:47:05 2013
@@ -23,7 +23,7 @@ import org.junit.Test;
  */
 
 public class FacetRequestTest extends FacetTestCase {
-
+  
   @Test(expected=IllegalArgumentException.class)
   public void testIllegalNumResults() throws Exception {
     assertNotNull(new CountFacetRequest(new CategoryPath("a", "b"), 0));
@@ -33,7 +33,7 @@ public class FacetRequestTest extends Fa
   public void testIllegalCategoryPath() throws Exception {
     assertNotNull(new CountFacetRequest(null, 1));
   }
-
+  
   @Test
   public void testHashAndEquals() {
     CountFacetRequest fr1 = new CountFacetRequest(new CategoryPath("a"), 8);

Added: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java?rev=1487465&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java
(added)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java
Wed May 29 12:47:05 2013
@@ -0,0 +1,204 @@
+package org.apache.lucene.facet.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.facet.FacetTestCase;
+import org.apache.lucene.facet.FacetTestUtils;
+import org.apache.lucene.facet.index.FacetFields;
+import org.apache.lucene.facet.params.FacetIndexingParams;
+import org.apache.lucene.facet.params.FacetSearchParams;
+import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.CollectionUtil;
+import org.apache.lucene.util.IOUtils;
+import org.junit.Test;
+
+public class FacetResultTest extends FacetTestCase {
+  
+  private Document newDocument(FacetFields facetFields, String... categories) throws IOException
{
+    Document doc = new Document();
+    List<CategoryPath> cats = new ArrayList<CategoryPath>();
+    for (String cat : categories) {
+      cats.add(new CategoryPath(cat, '/'));
+    }
+    facetFields.addFields(doc, cats);
+    return doc;
+  }
+  
+  private void initIndex(Directory indexDir, Directory taxoDir) throws IOException {
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    IndexWriter indexWriter = new IndexWriter(indexDir, conf);
+    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+    FacetFields facetFields = new FacetFields(taxoWriter);
+    indexWriter.addDocument(newDocument(facetFields, "Date/2010/March/12", "A/1"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2010/March/23", "A/2"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2010/April/17", "A/3"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2010/May/18", "A/1"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2011/January/1", "A/3"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2011/February/12", "A/1"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2011/February/18", "A/4"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2012/August/15", "A/1"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2012/July/5", "A/2"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2013/September/13", "A/1"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2013/September/25", "A/4"));
+    IOUtils.close(indexWriter, taxoWriter);
+  }
+  
+  private void searchIndex(TaxonomyReader taxoReader, IndexSearcher searcher, boolean fillMissingCounts,
String[] exp,
+      String[][] drillDowns, int[] numResults) throws IOException {
+    CategoryPath[][] cps = new CategoryPath[drillDowns.length][];
+    for (int i = 0; i < cps.length; i++) {
+      cps[i] = new CategoryPath[drillDowns[i].length];
+      for (int j = 0; j < cps[i].length; j++) {
+        cps[i][j] = new CategoryPath(drillDowns[i][j], '/');
+      }
+    }
+    DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery());
+    for (CategoryPath[] cats : cps) {
+      ddq.add(cats);
+    }
+    
+    List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
+    for (CategoryPath[] cats : cps) {
+      for (int i = 0; i < cats.length; i++) {
+        CategoryPath cp = cats[i];
+        int numres = numResults == null ? 2 : numResults[i];
+        // for each drill-down, add itself as well as its parent as requests, so
+        // we get the drill-sideways
+        facetRequests.add(new CountFacetRequest(cp, numres));
+        CountFacetRequest parent = new CountFacetRequest(cp.subpath(cp.length - 1), numres);
+        if (!facetRequests.contains(parent) && parent.categoryPath.length > 0)
{
+          facetRequests.add(parent);
+        }
+      }
+    }
+    
+    FacetSearchParams fsp = new FacetSearchParams(facetRequests);
+    final DrillSideways ds;
+    final Map<String,FacetArrays> dimArrays;
+    if (fillMissingCounts) {
+      dimArrays = new HashMap<String,FacetArrays>();
+      ds = new DrillSideways(searcher, taxoReader) {
+        @Override
+        protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams
fsp) throws IOException {
+          FacetsAccumulator fa = super.getDrillSidewaysAccumulator(dim, fsp);
+          dimArrays.put(dim, fa.facetArrays);
+          return fa;
+        }
+      };
+    } else {
+      ds = new DrillSideways(searcher, taxoReader);
+      dimArrays = null;
+    }
+    
+    final DrillSidewaysResult sidewaysRes = ds.search(null, ddq, 5, fsp);
+    List<FacetResult> facetResults = FacetResult.mergeHierarchies(sidewaysRes.facetResults,
taxoReader, dimArrays);
+    CollectionUtil.introSort(facetResults, new Comparator<FacetResult>() {
+      @Override
+      public int compare(FacetResult o1, FacetResult o2) {
+        return o1.getFacetRequest().categoryPath.compareTo(o2.getFacetRequest().categoryPath);
+      }
+    });
+    assertEquals(exp.length, facetResults.size()); // A + single one for date
+    for (int i = 0; i < facetResults.size(); i++) {
+      assertEquals(exp[i], FacetTestUtils.toSimpleString(facetResults.get(i)));
+    }
+  }
+  
+  @Test
+  public void testMergeHierarchies() throws Exception {
+    Directory indexDir = new RAMDirectory(), taxoDir = new RAMDirectory();
+    initIndex(indexDir, taxoDir);
+    
+    DirectoryReader indexReader = DirectoryReader.open(indexDir);
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
+    IndexSearcher searcher = new IndexSearcher(indexReader);
+    
+    String[] exp = new String[] { "Date (0)\n  2010 (4)\n  2011 (3)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date"
} }, null);
+    
+    // two dimensions
+    exp = new String[] { "A (0)\n  1 (5)\n  4 (2)\n", "Date (0)\n  2010 (4)\n  2011 (3)\n"
};
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date"
}, new String[] { "A" } }, null);
+    
+    // both parent and child are OR'd
+    exp = new String[] { "Date (-1)\n  2010 (4)\n    March (2)\n      23 (1)\n      12 (1)\n
   May (1)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010/March",
"Date/2010/March/23" }}, null);
+    
+    // both parent and child are OR'd (fill counts)
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n      23 (1)\n      12 (1)\n
   May (1)\n" };
+    searchIndex(taxoReader, searcher, true, exp, new String[][] { new String[] { "Date/2010/March",
"Date/2010/March/23" }}, null);
+    
+    // same DD twice
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n    May (1)\n  2011 (3)\n"
};
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010",
"Date/2010" }}, null);
+    
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n    May (1)\n  2011 (3)\n"
};
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010"
}}, null);
+    
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n    May (1)\n  2011 (3)\n
   February (2)\n    January (1)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010",
"Date/2011" }}, null);
+    
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n      23 (1)\n      12 (1)\n
   May (1)\n  2011 (3)\n    February (2)\n    January (1)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010/March",
"Date/2011" }}, null);
+    
+    // Date/2010/April not in top-2 of Date/2010
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n      23 (1)\n      12 (1)\n
   May (1)\n    April (1)\n      17 (1)\n  2011 (3)\n    February (2)\n    January (1)\n"
};
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010/March",
"Date/2010/April", "Date/2011" }}, null);
+    
+    // missing ancestors
+    exp = new String[] { "Date (-1)\n  2010 (4)\n    March (2)\n    May (1)\n    April (1)\n
     17 (1)\n  2011 (-1)\n    January (1)\n      1 (1)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2011/January/1",
"Date/2010/April" }}, null);
+    
+    // missing ancestors (fill counts)
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n    May (1)\n    April (1)\n
     17 (1)\n  2011 (3)\n    January (1)\n      1 (1)\n" };
+    searchIndex(taxoReader, searcher, true, exp, new String[][] { new String[] { "Date/2011/January/1",
"Date/2010/April" }}, null);
+    
+    // non-hierarchical dimension with both parent and child
+    exp = new String[] { "A (0)\n  1 (5)\n  4 (2)\n  3 (2)\n" };
+    searchIndex(taxoReader, searcher, INFOSTREAM, exp, new String[][] { new String[] { "A",
"A/3" }}, null);
+    
+    // non-hierarchical dimension with same request but different numResults
+    exp = new String[] { "A (0)\n  1 (5)\n  4 (2)\n  3 (2)\n  2 (2)\n" };
+    searchIndex(taxoReader, searcher, INFOSTREAM, exp, new String[][] { new String[] { "A",
"A" }}, new int[] { 2, 4 });
+    
+    IOUtils.close(indexReader, taxoReader);
+    
+    IOUtils.close(indexDir, taxoDir);
+  }
+  
+}



Mime
View raw message