lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sh...@apache.org
Subject svn commit: r1141060 [6/21] - in /lucene/dev/branches/branch_3x: dev-tools/eclipse/ dev-tools/maven/lucene/contrib/facet/ lucene/contrib/ lucene/contrib/facet/ lucene/contrib/facet/src/ lucene/contrib/facet/src/examples/ lucene/contrib/facet/src/exampl...
Date Wed, 29 Jun 2011 11:53:19 GMT
Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,797 @@
+package org.apache.lucene.facet.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.util.PriorityQueue;
+
+import org.apache.lucene.facet.search.params.FacetRequest;
+import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
+import org.apache.lucene.facet.search.results.FacetResult;
+import org.apache.lucene.facet.search.results.FacetResultNode;
+import org.apache.lucene.facet.search.results.MutableFacetResultNode;
+import org.apache.lucene.facet.search.results.IntermediateFacetResult;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
+import org.apache.lucene.util.collections.IntIterator;
+import org.apache.lucene.util.collections.IntToObjectMap;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Generates {@link FacetResult} from the count arrays aggregated for a particular 
+ * {@link FacetRequest}.
+ * The generated {@link FacetResult} is a subtree of the taxonomy tree.
+ * Its root node, {@link FacetResult#getFacetResultNode()}, 
+ * is the facet specified by {@link FacetRequest#getCategoryPath()},
+ * and the enumerated children, {@link FacetResultNode#getSubResults()}, of each node in that 
+ * {@link FacetResult} are the top K ( = {@link FacetRequest#getNumResults()}) among its children
+ * in the taxonomy.
+ * Top in the sense {@link FacetRequest#getSortBy()},
+ * which can be by the values aggregated in the count arrays, or by ordinal numbers;
+ * also specified is the sort order, {@link FacetRequest#getSortOrder()}, 
+ * ascending or descending, of these values or ordinals before their top K are selected. 
+ * The depth (number of levels excluding the root) of the 
+ * {@link FacetResult} tree is specified by {@link FacetRequest#getDepth()}.  
+ * <p>
+ * Because the number of selected children of each node is restricted,
+ * and not the overall number of nodes in the {@link FacetResult}, facets not selected 
+ * into {@link FacetResult} might have better values, or ordinals, (typically,
+ * higher counts), than facets that are selected into the {@link FacetResult}.
+ * <p>
+ * The generated {@link FacetResult} also provides with 
+ * {@link FacetResult#getNumValidDescendants()}, which returns the total number of facets 
+ * that are descendants of the root node, no deeper than {@link FacetRequest#getDepth()}, and
+ * which have valid value. The rootnode itself is not counted here. 
+ * Valid value is determined by the {@link FacetResultsHandler}. 
+ * {@link TopKInEachNodeHandler} defines valid as != 0. 
+ * <p>
+ * <b>NOTE:</b> this code relies on the assumption that {@link TaxonomyReader#INVALID_ORDINAL} == -1, a smaller
+ * value than any valid ordinal.
+ * 
+ * @lucene.experimental
+ */
+public class TopKInEachNodeHandler extends FacetResultsHandler {
+
+  public TopKInEachNodeHandler(TaxonomyReader taxonomyReader,
+                                FacetRequest facetRequest) {
+    super(taxonomyReader, facetRequest);
+  }
+
+  /**
+   * Recursively explore all facets that can be potentially included in the
+   * {@link FacetResult} to be generated, and that belong to the given
+   * partition, so that values can be examined and collected. For each such
+   * node, gather its top K ({@link FacetRequest#getNumResults()}) children
+   * among its children that are encountered in the given particular partition
+   * (aka current counting list).
+   * 
+   * @return {@link IntermediateFacetResult} consisting of
+   *         {@link IntToObjectMap} that maps potential
+   *         {@link FacetResult} nodes to their top K children encountered in
+   *         the current partition. Note that the mapped potential tree nodes
+   *         need not belong to the given partition, only the top K children
+   *         mapped to. The aim is to identify nodes that are certainly excluded
+   *         from the {@link FacetResult} to be eventually (after going through
+   *         all the partitions) returned by this handler, because they have K
+   *         better siblings, already identified in this partition. For the
+   *         identified excluded nodes, we only count number of their
+   *         descendants in the subtree (to be included in
+   *         {@link FacetResult#getNumValidDescendants()}), but not bother with
+   *         selecting top K in these generations, which, by definition, are,
+   *         too, excluded from the FacetResult tree.
+   * @param arrays the already filled in count array, potentially only covering
+   *        one partition: the ordinals ranging from
+   * @param offset to <code>offset</code> + the length of the count arrays
+   *        within <code>arrays</code> (exclusive)
+   * @throws IOException in case
+   *         {@link TaxonomyReader#getOrdinal(org.apache.lucene.facet.taxonomy.CategoryPath)}
+   *         does.
+   * @see FacetResultsHandler#fetchPartitionResult(FacetArrays, int)
+   */
+  @Override
+  public IntermediateFacetResult fetchPartitionResult(FacetArrays arrays, int offset) throws IOException {
+
+    // get the root of the result tree to be returned, and the depth of that result tree
+    // (depth means number of node levels excluding the root). 
+    int rootNode = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath());
+    if (rootNode == TaxonomyReader.INVALID_ORDINAL) {
+      return null;
+    }
+
+    int K = Math.min(facetRequest.getNumResults(),taxonomyReader.getSize()); // number of best results in each node
+
+    // this will grow into the returned IntermediateFacetResult
+    IntToObjectMap<AACO> AACOsOfOnePartition = new IntToObjectMap<AACO>();
+
+    int partitionSize = arrays.getArraysLength(); // all partitions, except, possibly, the last,
+    // have the same length. Hence modulo is OK.
+
+    int depth = facetRequest.getDepth();
+
+    if (depth == 0) {
+      // Need to only have root node.
+      IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash(
+          facetRequest, AACOsOfOnePartition);
+      if (isSelfPartition(rootNode, arrays, offset)) {
+        tempFRWH.isRootNodeIncluded = true;
+        tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize);
+      }
+      return tempFRWH;
+    }
+
+    if (depth > Short.MAX_VALUE - 3) {
+      depth = Short.MAX_VALUE -3;
+    }
+
+    int endOffset = offset + partitionSize; // one past the largest ordinal in the partition
+    ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays();
+    int[] youngestChild = childrenArray.getYoungestChildArray();
+    int[] olderSibling = childrenArray.getOlderSiblingArray();
+    int totalNumOfDescendantsConsidered = 0; // total number of facets with value != 0, 
+    // in the tree. These include those selected as top K in each node, and all the others that
+    // were not. Not including rootNode
+
+    // the following priority queue will be used again and again for each node recursed into
+    // to select its best K children among its children encountered in the given partition
+    PriorityQueue<AggregatedCategory> pq = 
+      new AggregatedCategoryHeap(K, this.getSuitableACComparator());
+
+    // reusables will feed the priority queue in each use 
+    AggregatedCategory [] reusables = new AggregatedCategory[2+K];
+    for (int i = 0; i < reusables.length; i++) {
+      reusables[i] = new AggregatedCategory(1,0);
+    }
+
+    /*
+     * The returned map is built by a recursive visit of potential tree nodes. Nodes 
+     * determined to be excluded from the FacetResult are not recursively explored as others,
+     * they are only recursed in order to count the number of their descendants.
+     * Also, nodes that they and any of their descendants can not be mapped into facets encountered 
+     * in this partition, are, too, explored no further. These are facets whose ordinal 
+     * numbers are greater than the ordinals of the given partition. (recall that the Taxonomy
+     * maintains that a parent ordinal is smaller than any of its descendants' ordinals).  
+     * So, when scanning over all children of a potential tree node n: (1) all children with ordinal number
+     * greater than those in the given partition are skipped over, (2) among the children of n residing
+     * in this partition, the best K children are selected (using pq) for usual further recursion 
+     * and the rest (those rejected out from the pq) are only recursed for counting total number
+     * of descendants, and (3) all the children of ordinal numbers smaller than the given partition 
+     * are further explored in the usual way, since these may lead to descendants residing in this partition.
+     * 
+     * ordinalStack drives the recursive descent. 
+     * Top of stack holds the current node which we recurse from.
+     * ordinalStack[0] holds the root of the facetRequest, and
+     * it is always maintained that parent(ordianlStack[i]) = ordinalStack[i-1]. 
+     * localDepth points to the current top of ordinalStack.
+     * Only top of ordinalStack can be TaxonomyReader.INVALID_ORDINAL, and this if and only if
+     * the element below it explored all its relevant children.
+     */
+    int[] ordinalStack = new int[depth+2]; // for 0 and for invalid on top
+    ordinalStack[0] = rootNode;
+    int localDepth = 0;
+
+    /* 
+     * bestSignlingsStack[i] maintains the best K children of ordinalStack[i-1], namely,
+     * the best K siblings of ordinalStack[i], best K among those residing in the given partition.
+     * Note that the residents of ordinalStack need not belong
+     * to the current partition, only the residents of bestSignlingsStack.
+     * When exploring the children of ordianlStack[i-1] that reside in the current partition
+     * (after the top K of them have been determined and stored into bestSignlingsStack[i]),
+     * siblingExplored[i] points into bestSignlingsStack[i], to the child now explored, hence
+     * residing in ordinalStack[i], and firstToTheLeftOfPartition[i] holds the largest ordinal of
+     * a sibling smaller than the ordinals in the partition.  
+     * When siblingExplored[i] == max int, the top K siblings of ordinalStack[i] among those siblings
+     * that reside in this partition have not been determined yet. 
+     * if siblingExplored[i] < 0, the node in ordinalStack[i] is to the left of partition 
+     * (i.e. of a smaller ordinal than the current partition) 
+     * (step (3) above is executed for the children of ordianlStack[i-1])   
+     */
+    int[][] bestSignlingsStack = new int[depth+2][];
+    int[] siblingExplored = new int[depth+2];
+    int[] firstToTheLeftOfPartition = new int [depth+2];
+
+    int tosOrdinal; // top of stack element, the ordinal at the top of stack
+
+    /*
+     * to start the loop, complete the datastructures for root node: 
+     * push its youngest child to ordinalStack; make a note in siblingExplored[] that the children
+     * of rootNode, which reside in the current partition have not been read yet to select the top
+     * K of them.  Also, make rootNode as if, related to its parent, rootNode belongs to the children
+     * of ordinal numbers smaller than those of the current partition (this will ease on end condition -- 
+     * we can continue to the older sibling of rootNode once the localDepth goes down, before we verify that 
+     * it went that down)
+     */
+    ordinalStack[++localDepth] = youngestChild[rootNode];
+    siblingExplored[localDepth] = Integer.MAX_VALUE;  // we have not verified position wrt current partition
+    siblingExplored[0] = -1; // as if rootNode resides to the left of current position
+
+    /*
+     * now the whole recursion: loop as long as stack is not empty of elements descendants of 
+     * facetRequest's root.
+     */
+
+    while (localDepth > 0) {
+      tosOrdinal = ordinalStack[localDepth];
+      if (tosOrdinal == TaxonomyReader.INVALID_ORDINAL) {
+        // the brotherhood that has been occupying the top of stack is all exhausted.  
+        // Hence, element below tos, namely, father of tos, has all its children, 
+        // and itself, all explored. 
+        localDepth--;
+        // replace this father, now on top of stack, by this father's sibling:
+        // this parent's ordinal can not be greater than current partition, as otherwise
+        // its child, now just removed, would not have been pushed on it.
+        // so the father is either inside the partition, or smaller ordinal
+        if (siblingExplored[localDepth] < 0 ) {
+          ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]];
+          continue;
+        } 
+        // in this point, siblingExplored[localDepth] between 0 and number of bestSiblings
+        // it can not be max int
+        siblingExplored[localDepth]--;
+        if (siblingExplored[localDepth] == -1 ) {
+          //siblings residing in the partition have been all processed, we now move
+          // to those of ordinal numbers smaller than the partition
+          ordinalStack[localDepth] = firstToTheLeftOfPartition[localDepth];
+        } else {
+          // still explore siblings residing in the partition
+          // just move to the next one
+          ordinalStack[localDepth] = bestSignlingsStack[localDepth][siblingExplored[localDepth]];
+        }
+        continue;
+      } // endof tosOrdinal is invalid, and hence removed, and its parent was replaced by this 
+      // parent's sibling
+
+      // now try to push a kid, but first look at tos whether it 'deserves' its kids explored:
+      // it is not to the right of current partition, and we know whether to only count or to 
+      // select best K siblings.
+      if (siblingExplored[localDepth] == Integer.MAX_VALUE) {
+        //tosOrdinal was not examined yet for its position relative to current partition
+        // and the best K of current partition, among its siblings, have not been determined yet
+        while (tosOrdinal >= endOffset) {
+          tosOrdinal = olderSibling[tosOrdinal];
+        }
+        // now it is inside. Run it and all its siblings inside the partition through a heap
+        // and in doing so, count them, find best K, and sum into residue
+        double residue = 0f;  // the sum of all the siblings from this partition that do not make 
+        // it to top K
+        pq.clear();
+
+        //reusables are consumed as from a stack. The stack starts full and returns full.
+        int tosReuslables = reusables.length -1;  
+
+        while (tosOrdinal >= offset) { // while tosOrdinal belongs to the given partition; here, too, we use the fact
+          // that TaxonomyReader.INVALID_ORDINAL == -1 < offset
+          double value = facetRequest.getValueOf(arrays, tosOrdinal % partitionSize);
+          if (value != 0) { // the value of yc is not 0, it is to be considered.  
+            totalNumOfDescendantsConsidered++;
+
+            // consume one reusable, and push to the priority queue
+            AggregatedCategory ac = reusables[tosReuslables--];  
+            ac.ordinal = tosOrdinal;
+            ac.value = value; 
+            ac = pq.insertWithOverflow(ac);
+            if (null != ac) {
+              residue += ac.value;
+              // TODO (Facet): could it be that we need to do something
+              // else, not add, depending on the aggregator?
+
+              /* when a facet is excluded from top K, because already in this partition it has
+               * K better siblings, it is only recursed for count only.
+               */ 
+              // update totalNumOfDescendants by the now excluded node and all its descendants
+              totalNumOfDescendantsConsidered--; // reduce the 1 earned when the excluded node entered the heap
+              // and now return it and all its descendants. These will never make it to FacetResult
+              totalNumOfDescendantsConsidered += countOnly (ac.ordinal, youngestChild, 
+                  olderSibling, arrays, partitionSize, offset, endOffset, localDepth, depth);
+              reusables[++tosReuslables] = ac;
+            }
+          }
+          tosOrdinal = olderSibling[tosOrdinal];  
+        }
+        // now pq has best K children of ordinals that belong to the given partition.   
+        // Populate a new AACO with them.
+        // tosOrdinal is now first sibling smaller than partition, make a note of that
+        firstToTheLeftOfPartition[localDepth] = tosOrdinal;
+        int aaci = pq.size();
+        int[] ords = new int[aaci];
+        double [] vals = new double [aaci];
+        while (aaci > 0) {
+          AggregatedCategory ac = pq.pop();
+          ords[--aaci] = ac.ordinal;
+          vals[aaci] = ac.value;
+          reusables[++tosReuslables] = ac;
+        }
+        // if more than 0 ordinals, add this AACO to the map to be returned, 
+        // and add ords to sibling stack, and make a note in siblingExplored that these are to 
+        // be visited now
+        if (ords.length > 0) {
+          AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals,residue));
+          bestSignlingsStack[localDepth] = ords;
+          siblingExplored[localDepth] = ords.length-1;
+          ordinalStack[localDepth] = ords[ords.length-1];
+        } else {
+          // no ordinals siblings of tosOrdinal in current partition, move to the left of it
+          // tosOrdinal is already there (to the left of partition).
+          // make a note of it in siblingExplored
+          ordinalStack[localDepth] = tosOrdinal;
+          siblingExplored[localDepth] = -1;
+        }
+        continue;
+      } // endof we did not check the position of a valid ordinal wrt partition
+
+      // now tosOrdinal is a valid ordinal, inside partition or to the left of it, we need 
+      // to push its kids on top of it, if not too deep. 
+      // Make a note that we did not check them yet
+      if (localDepth >= depth) { 
+        // localDepth == depth; current tos exhausted its possible children, mark this by pushing INVALID_ORDINAL
+        ordinalStack[++localDepth] = TaxonomyReader.INVALID_ORDINAL;
+        continue;
+      }
+      ordinalStack[++localDepth] = youngestChild[tosOrdinal];
+      siblingExplored[localDepth] = Integer.MAX_VALUE;
+    } // endof loop while stack is not empty
+
+    // now generate a TempFacetResult from AACOsOfOnePartition, and consider self.
+    IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash(
+        facetRequest, AACOsOfOnePartition);
+    if (isSelfPartition(rootNode, arrays, offset)) {
+      tempFRWH.isRootNodeIncluded = true;
+      tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize);
+    }
+    tempFRWH.totalNumOfFacetsConsidered = totalNumOfDescendantsConsidered;
+    return tempFRWH;
+
+  }
+
+  /**
+   * Recursively count <code>ordinal</code>, whose depth is <code>currentDepth</code>, 
+   * and all its descendants down to <code>maxDepth</code> (including), 
+   * descendants whose value in the count arrays, <code>arrays</code>, is != 0. 
+   * The count arrays only includes the current partition, from <code>offset</code>, to (exclusive) 
+   * <code>endOffset</code>.
+   * It is assumed that <code>ordinal</code> < <code>endOffset</code>, 
+   * otherwise, not <code>ordinal</code>, and none of its descendants, reside in
+   * the current partition. <code>ordinal</code> < <code>offset</code> is allowed, 
+   * as ordinal's descendants might be >= <code>offeset</code>.
+   * 
+   * @param ordinal a facet ordinal. 
+   * @param youngestChild mapping a given ordinal to its youngest child in the taxonomy (of largest ordinal number),
+   * or to -1 if has no children.  
+   * @param olderSibling  mapping a given ordinal to its older sibling, or to -1
+   * @param arrays  values for the ordinals in the given partition
+   * @param offset  the first (smallest) ordinal in the given partition
+   * @param partitionSize  number of ordinals in the given partition
+   * @param endOffset one larger than the largest ordinal that belong to this partition
+   * @param currentDepth the depth or ordinal in the TaxonomyTree (relative to rootnode of the facetRequest)
+   * @param maxDepth maximal depth of descendants to be considered here (measured relative to rootnode of the 
+   * facetRequest).
+   * 
+   * @return the number of nodes, from ordinal down its descendants, of depth <= maxDepth,
+   * which reside in the current partition, and whose value != 0
+   */
+  private int countOnly(int ordinal, int[] youngestChild, int[] olderSibling,
+                        FacetArrays arrays, int partitionSize, int offset,
+                        int endOffset, int currentDepth, int maxDepth) {
+    int ret = 0;
+    if (offset <= ordinal) {
+      // ordinal belongs to the current partition
+      if (0 != facetRequest.getValueOf(arrays, ordinal % partitionSize)) {
+        ret++;
+      }
+    }
+    // now consider children of ordinal, if not too deep
+    if (currentDepth >= maxDepth) {
+      return ret;
+    }
+
+    int yc = youngestChild[ordinal];
+    while (yc >= endOffset) {
+      yc = olderSibling[yc];
+    }
+    while (yc > TaxonomyReader.INVALID_ORDINAL) { // assuming this is -1, smaller than any legal ordinal
+      ret += countOnly (yc, youngestChild, olderSibling, arrays, 
+          partitionSize, offset, endOffset, currentDepth+1, maxDepth);
+      yc = olderSibling[yc];
+    }
+    return ret;
+  }
+
+  /**
+   * Merge several partitions' {@link IntermediateFacetResult}-s into one of the
+   * same format
+   * 
+   * @see FacetResultsHandler#mergeResults(IntermediateFacetResult...)
+   */
+  @Override
+  public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults)
+  throws ClassCastException, IllegalArgumentException {
+
+    if (tmpResults.length == 0) {
+      return null;
+    }
+
+    int i=0;
+    // skip over null tmpResults
+    for (; (i < tmpResults.length)&&(tmpResults[i] == null); i++) {}
+    if (i == tmpResults.length) {
+      // all inputs are null
+      return null;
+    }
+
+    // i points to the first non-null input 
+    int K = this.facetRequest.getNumResults(); // number of best result in each node
+    IntermediateFacetResultWithHash tmpToReturn = (IntermediateFacetResultWithHash)tmpResults[i++];
+
+    // now loop over the rest of tmpResults and merge each into tmpToReturn
+    for ( ; i < tmpResults.length; i++) {
+      IntermediateFacetResultWithHash tfr = (IntermediateFacetResultWithHash)tmpResults[i];
+      tmpToReturn.totalNumOfFacetsConsidered += tfr.totalNumOfFacetsConsidered;
+      if (tfr.isRootNodeIncluded) {
+        tmpToReturn.isRootNodeIncluded = true;
+        tmpToReturn.rootNodeValue = tfr.rootNodeValue;
+      }
+      // now merge the HashMap of tfr into this of tmpToReturn
+      IntToObjectMap<AACO> tmpToReturnMapToACCOs = tmpToReturn.mapToAACOs;
+      IntToObjectMap<AACO> tfrMapToACCOs = tfr.mapToAACOs;
+      IntIterator tfrIntIterator = tfrMapToACCOs.keyIterator();
+      //iterate over all ordinals in tfr that are maps to their children (and the residue over 
+      // non included chilren)
+      while (tfrIntIterator.hasNext()) {
+        int tfrkey = tfrIntIterator.next();
+        AACO tmpToReturnAACO = null;
+        if (null == (tmpToReturnAACO = tmpToReturnMapToACCOs.get(tfrkey))) {
+          // if tmpToReturn does not have any kids of tfrkey, map all the kids
+          // from tfr to it as one package, along with their redisude
+          tmpToReturnMapToACCOs.put(tfrkey, tfrMapToACCOs.get(tfrkey));
+        } else {
+          // merge the best K children of tfrkey as appear in tmpToReturn and in tfr
+          AACO tfrAACO = tfrMapToACCOs.get(tfrkey);
+          int resLength = tfrAACO.ordinals.length + tmpToReturnAACO.ordinals.length;
+          if (K < resLength) {
+            resLength = K;
+          }
+          int[] resOrds = new int [resLength];
+          double[] resVals = new double [resLength];
+          double resResidue = tmpToReturnAACO.residue + tfrAACO.residue;
+          int indexIntoTmpToReturn = 0;
+          int indexIntoTFR = 0;
+          ACComparator merger = getSuitableACComparator(); // by facet Request
+          for (int indexIntoRes = 0; indexIntoRes < resLength; indexIntoRes++) {
+            if (indexIntoTmpToReturn >= tmpToReturnAACO.ordinals.length) {
+              //tmpToReturnAACO (former result to return) ran out of indices
+              // it is all merged into resOrds and resVal
+              resOrds[indexIntoRes] = tfrAACO.ordinals[indexIntoTFR];
+              resVals[indexIntoRes] = tfrAACO.values[indexIntoTFR];
+              indexIntoTFR++;
+              continue;
+            }
+            if (indexIntoTFR >= tfrAACO.ordinals.length) {
+              // tfr ran out of indices
+              resOrds[indexIntoRes] = tmpToReturnAACO.ordinals[indexIntoTmpToReturn];
+              resVals[indexIntoRes] = tmpToReturnAACO.values[indexIntoTmpToReturn];
+              indexIntoTmpToReturn++;
+              continue;
+            }
+            // select which goes now to res: next (ord, value) from tmpToReturn or from tfr:
+            if (merger.leftGoesNow(  tmpToReturnAACO.ordinals[indexIntoTmpToReturn], 
+                tmpToReturnAACO.values[indexIntoTmpToReturn], 
+                tfrAACO.ordinals[indexIntoTFR], 
+                tfrAACO.values[indexIntoTFR])) {
+              resOrds[indexIntoRes] = tmpToReturnAACO.ordinals[indexIntoTmpToReturn];
+              resVals[indexIntoRes] = tmpToReturnAACO.values[indexIntoTmpToReturn];
+              indexIntoTmpToReturn++;
+            } else {
+              resOrds[indexIntoRes] = tfrAACO.ordinals[indexIntoTFR];
+              resVals[indexIntoRes] = tfrAACO.values[indexIntoTFR];
+              indexIntoTFR++;
+            }
+          } // end of merge of best kids of tfrkey that appear in tmpToReturn and its kids that appear in tfr
+          // altogether yielding no more that best K kids for tfrkey, not to appear in the new shape of 
+          // tmpToReturn
+
+          while (indexIntoTmpToReturn < tmpToReturnAACO.ordinals.length) {
+            resResidue += tmpToReturnAACO.values[indexIntoTmpToReturn++];
+          }
+          while (indexIntoTFR < tfrAACO.ordinals.length) {
+            resResidue += tfrAACO.values[indexIntoTFR++];
+          }
+          //update the list of best kids of tfrkey as appear in tmpToReturn
+          tmpToReturnMapToACCOs.put(tfrkey, new AACO(resOrds, resVals, resResidue));
+        } // endof need to merge both AACO -- children and residue for same ordinal
+
+      } // endof loop over all ordinals in tfr 
+    } // endof loop over all temporary facet results to merge
+
+    return tmpToReturn;
+  }
+
+  private static class AggregatedCategoryHeap extends PriorityQueue<AggregatedCategory> {
+    
+    private ACComparator merger;
+    public AggregatedCategoryHeap(int size, ACComparator merger) {
+      this.merger = merger;
+      initialize(size);
+    }
+
+    @Override
+    protected boolean lessThan(AggregatedCategory arg1, AggregatedCategory arg2) {
+      return merger.leftGoesNow(arg2.ordinal, arg2.value, arg1.ordinal, arg1.value);
+    }
+
+  }
+
+  private static class ResultNodeHeap extends PriorityQueue<FacetResultNode> {
+    private ACComparator merger;
+    public ResultNodeHeap(int size, ACComparator merger) {
+      this.merger = merger;
+      initialize(size);
+    }
+
+    @Override
+    protected boolean lessThan(FacetResultNode arg1, FacetResultNode arg2) {
+      return merger.leftGoesNow(arg2.getOrdinal(), arg2.getValue(), arg1.getOrdinal(), arg1.getValue());
+    }
+
+  }
+
+  /**
+   * @return the {@link ACComparator} that reflects the order,
+   * expressed in the {@link FacetRequest}, of 
+   * facets in the {@link FacetResult}. 
+   */
+
+  private ACComparator getSuitableACComparator() {
+    if (facetRequest.getSortOrder() == SortOrder.ASCENDING) {
+      switch (facetRequest.getSortBy()) {
+        case VALUE:
+          return new AscValueACComparator();
+        case ORDINAL:
+          return new AscOrdACComparator();
+      }
+    } else {
+      switch (facetRequest.getSortBy()) {
+        case VALUE:
+          return new DescValueACComparator();
+        case ORDINAL:
+          return new DescOrdACComparator();
+      }
+    }
+    return null;
+  }
+
+  /**
+   * A comparator of two Aggregated Categories according to the order
+   * (ascending / descending) and item (ordinal or value) specified in the 
+   * FacetRequest for the FacetResult to be generated
+   */
+
+  private static abstract class ACComparator {
+    ACComparator() { }
+    protected abstract boolean leftGoesNow (int ord1, double val1, int ord2, double val2); 
+  }
+
+  private static final class AscValueACComparator extends ACComparator {
+    
+    AscValueACComparator() { }
+    
+    @Override
+    protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
+      return (val1 < val2);
+    }
+  }
+
+  private static final class DescValueACComparator extends ACComparator {
+    
+    DescValueACComparator() { }
+    
+    @Override
+    protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
+      return (val1 > val2);
+    }
+  }
+
+  private static final class AscOrdACComparator extends ACComparator {
+    
+    AscOrdACComparator() { }
+    
+    @Override
+    protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
+      return (ord1 < ord2);
+    }
+  }
+
+  private static final class DescOrdACComparator extends ACComparator {
+    
+    DescOrdACComparator() { }
+    
+    @Override
+    protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
+      return (ord1 > ord2);
+    }
+  }
+
+  /**
+   * Intermediate result to hold counts from one or more partitions processed
+   * thus far. Its main field, constructor parameter <i>mapToAACOs</i>, is a map
+   * from ordinals to AACOs. The AACOs mapped to contain ordinals and values
+   * encountered in the count arrays of the partitions processed thus far. The
+   * ordinals mapped from are their parents, and they may be not contained in
+   * the partitions processed thus far. All nodes belong to the taxonomy subtree
+   * defined at the facet request, constructor parameter <i>facetReq</i>, by its
+   * root and depth.
+   */
+  public static class IntermediateFacetResultWithHash implements IntermediateFacetResult {
+    protected IntToObjectMap<AACO> mapToAACOs;
+    FacetRequest facetRequest;
+    boolean isRootNodeIncluded; // among the ordinals in the partitions 
+    // processed thus far
+    double rootNodeValue; // the value of it, in case encountered.
+    int totalNumOfFacetsConsidered; // total number of facets 
+    // which belong to facetRequest subtree and have value != 0,
+    // and have been encountered thus far in the partitions processed. 
+    // root node of result tree is not included in this count.
+
+    public IntermediateFacetResultWithHash(FacetRequest facetReq,
+                                    IntToObjectMap<AACO> mapToAACOs) {
+      this.mapToAACOs = mapToAACOs;
+      this.facetRequest = facetReq;
+      this.isRootNodeIncluded = false;
+      this.rootNodeValue = 0.0;
+      this.totalNumOfFacetsConsidered = 0;
+    }
+
+    public FacetRequest getFacetRequest() {
+      return this.facetRequest;
+    }
+  } // endof FacetResultWithHash
+
+  /**
+   * Maintains info of one entry in the filled up count array:
+   * an ordinal number of a category and the value aggregated for it 
+   * (typically, that value is the count for that ordinal).
+   */
+  private static final class AggregatedCategory {
+    int ordinal;
+    double value;
+    AggregatedCategory(int ord, double val) {
+      this.ordinal = ord;
+      this.value = val;
+    }
+  }
+
+  /**
+   * Maintains an array of {@link AggregatedCategory}. For space consideration, this is implemented as 
+   * a pair of arrays, <i>ordinals</i> and <i>values</i>, rather than one array of pairs.
+   * Enumerated in <i>ordinals</i> are siblings,  
+   * potential nodes of the {@link FacetResult} tree  
+   * (i.e., the descendants of the root node, no deeper than the specified depth).
+   * No more than K ( = {@link FacetRequest#getNumResults()}) 
+   * siblings are enumerated, and  
+   * <i>residue</i> holds the sum of values of the siblings rejected from the 
+   * enumerated top K.
+   */
+  private static final class AACO {
+    int [] ordinals; // ordinals of the best K children, sorted from best to least
+    double [] values; // the respective values for these children
+    double residue; // sum of values of all other children, that did not get into top K
+    AACO (int[] ords, double[] vals, double r) {
+      this.ordinals = ords;
+      this.values = vals;
+      this.residue = r;
+    }
+  }
+
+  @Override
+  /**
+   * Recursively label the first facetRequest.getNumLabel() sub results 
+   * of the root of a given {@link FacetResult}, or of an already labeled node in it.
+   * I.e., a node is labeled only if it is the root or all its ancestors are labeled. 
+   */
+  public void labelResult(FacetResult facetResult) throws IOException {
+    if (facetResult == null) {
+      return; // any result to label?
+    }
+    FacetResultNode rootNode = facetResult.getFacetResultNode();
+    recursivelyLabel(rootNode, facetRequest.getNumLabel());
+  }
+
+  private void recursivelyLabel(FacetResultNode node, int numToLabel) throws IOException {
+    if (node == null) {
+      return;
+    }
+    node.getLabel(this.taxonomyReader); // attach a label -- category path -- to the node
+    if (null == node.getSubResults()) {
+      return;  // if node has no children -- done 
+    }
+
+    // otherwise, label the first numToLabel of these children, and recursively -- their children.
+    int numLabeled = 0;
+    for (FacetResultNode frn : node.getSubResults()) { 
+      // go over the children of node from first to last, no more than numToLable of them
+      recursivelyLabel(frn, numToLabel);
+      if (++numLabeled >= numToLabel) {
+        return;
+      }
+    }
+  }
+
+  @Override
+  // verifies that the children of each node are sorted by the order
+  // specified by the facetRequest.
+  // the values in these nodes may have changed due to a re-count, for example
+  // following the accumulation by Sampling.
+  // so now we test and re-order if necessary.
+  public FacetResult rearrangeFacetResult(FacetResult facetResult) {
+    PriorityQueue<FacetResultNode> nodesHeap = 
+      new ResultNodeHeap(this.facetRequest.getNumResults(), this.getSuitableACComparator());
+    MutableFacetResultNode topFrn = (MutableFacetResultNode) facetResult.getFacetResultNode(); // safe cast
+    rearrangeChilrenOfNode(topFrn, nodesHeap);
+    return facetResult;
+  }
+
+  private void rearrangeChilrenOfNode(FacetResultNode node,
+                                      PriorityQueue<FacetResultNode> nodesHeap) {
+    nodesHeap.clear(); // just to be safe
+    for (FacetResultNode frn : node.getSubResults()) {
+      nodesHeap.add(frn);
+    }
+    int size = nodesHeap.size();
+    ArrayList<FacetResultNode> subResults = new ArrayList<FacetResultNode>(size);
+    while (nodesHeap.size()>0) {
+      subResults.add(0,nodesHeap.pop());
+    }
+    ((MutableFacetResultNode)node).setSubResults(subResults);
+    for (FacetResultNode frn : node.getSubResults()) {
+      rearrangeChilrenOfNode(frn, nodesHeap);
+    }
+
+  }
+
+  @Override
+  public FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException {
+    IntermediateFacetResultWithHash tmp = (IntermediateFacetResultWithHash) tmpResult;
+    int ordinal = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath());
+    if ((tmp == null) || (ordinal == TaxonomyReader.INVALID_ORDINAL)) {
+      return null;
+    }
+    double value = Double.NaN;
+    if (tmp.isRootNodeIncluded) {
+      value = tmp.rootNodeValue;
+    }
+    MutableFacetResultNode root = generateNode (ordinal, value, tmp.mapToAACOs);
+    return new FacetResult (tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered); 
+
+  }
+
+  private MutableFacetResultNode generateNode (int ordinal, double val,  IntToObjectMap<AACO> mapToAACOs) {
+    MutableFacetResultNode node = new MutableFacetResultNode(ordinal, val);
+    AACO aaco = mapToAACOs.get(ordinal);
+    if (null == aaco) {
+      return node;
+    }
+    List<FacetResultNode> list = new ArrayList<FacetResultNode>();
+    for (int i = 0; i < aaco.ordinals.length; i++) {
+      list.add(generateNode(aaco.ordinals[i], aaco.values[i], mapToAACOs));
+    }
+    node.setSubResults(list);
+    node.setResidue(aaco.residue);
+    return node;  
+  }
+
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,188 @@
+package org.apache.lucene.facet.search;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.store.LockObtainFailedException;
+
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.search.aggregator.Aggregator;
+import org.apache.lucene.facet.search.aggregator.CountingAggregator;
+import org.apache.lucene.facet.search.cache.CategoryListCache;
+import org.apache.lucene.facet.search.cache.CategoryListData;
+import org.apache.lucene.facet.search.params.FacetSearchParams;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.util.PartitionsUtils;
+import org.apache.lucene.facet.util.ScoredDocIdsUtils;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Maintain Total Facet Counts per partition, for given parameters:
+ * <ul> 
+ *  <li>Index reader of an index</li>
+ *  <li>Taxonomy index reader</li>
+ *  <li>Facet indexing params (and particularly the category list params)</li>
+ *  <li></li>
+ * </ul>
+ * The total facet counts are maintained as an array of arrays of integers, 
+ * where a separate array is kept for each partition.
+ * 
+ * @lucene.experimental
+ */
+public class TotalFacetCounts {
+  
+  /** total facet counts per partition: totalCounts[partition][ordinal%partitionLength] */
+  private int[][] totalCounts = null;
+  
+  private final TaxonomyReader taxonomy;
+  private final FacetIndexingParams facetIndexingParams;
+
+  private final static AtomicInteger atomicGen4Test = new AtomicInteger(1);
+  /** Creation type for test purposes */
+  enum CreationType { Computed, Loaded } // for testing
+  final int gen4test;
+  final CreationType createType4test;
+  
+  /** 
+   * Construct by key - from index Directory or by recomputing.
+   * @param key the key mapping of this total facet counts (index, taxonomy, category lists...) 
+   */
+  private TotalFacetCounts (TaxonomyReader taxonomy, FacetIndexingParams facetIndexingParams,
+      int[][] counts, CreationType createType4Test) throws IOException, LockObtainFailedException {
+    this.taxonomy = taxonomy;
+    this.facetIndexingParams = facetIndexingParams;
+    this.totalCounts = counts;
+    this.createType4test = createType4Test;
+    this.gen4test = atomicGen4Test.incrementAndGet();
+  }
+
+  /**
+   * Fill a partition's array with the TotalCountsArray values.
+   * @param partitionArray array to fill
+   * @param partition number of required partition 
+   */
+  public void fillTotalCountsForPartition(int[] partitionArray, int partition) {
+    int partitionSize = partitionArray.length;
+    int[] countArray = totalCounts[partition];
+    if (countArray == null) {
+      countArray = new int[partitionSize];
+      totalCounts[partition] = countArray;
+    }
+    int length = Math.min(partitionSize, countArray.length);
+    System.arraycopy(countArray, 0, partitionArray, 0, length);
+  }
+  
+  /**
+   * Return the total count of an input category
+   * @param ordinal ordinal of category whose total count is required 
+   */
+  public int getTotalCount(int ordinal) {
+    int partition = PartitionsUtils.partitionNumber(facetIndexingParams,ordinal);
+    int offset = ordinal % PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
+    return totalCounts[partition][offset];
+  }
+  
+  static TotalFacetCounts loadFromFile(File inputFile, TaxonomyReader taxonomy, 
+      FacetIndexingParams facetIndexingParams) throws IOException {
+    DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(inputFile)));
+    try {
+      int[][] counts = new int[dis.readInt()][];
+      for (int i=0; i<counts.length; i++) {
+        int size = dis.readInt();
+        if (size<0) {
+          counts[i] = null;
+        } else {
+          counts[i] = new int[size];
+          for (int j=0; j<size; j++) {
+            counts[i][j] = dis.readInt();
+          }
+        }
+      }
+      return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Loaded);
+    } finally {
+      dis.close();
+    }
+  }
+
+  static void storeToFile(File outputFile, TotalFacetCounts tfc) throws IOException {
+    DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
+    try {
+      dos.writeInt(tfc.totalCounts.length);
+      for (int[] counts : tfc.totalCounts) {
+        if (counts == null) {
+          dos.writeInt(-1);
+        } else {
+          dos.writeInt(counts.length);
+          for (int i : counts) {
+            dos.writeInt(i);
+          }
+        }
+      }
+    } finally {
+      dos.close();
+    }
+  }
+
+  static TotalFacetCounts compute(final IndexReader indexReader,
+      final TaxonomyReader taxonomy, final FacetIndexingParams facetIndexingParams,
+      final CategoryListCache clCache) throws IOException {
+    int partitionSize = PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
+    final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize()  /(float) partitionSize)][partitionSize];
+    FacetSearchParams newSearchParams = new FacetSearchParams(facetIndexingParams); 
+      //createAllListsSearchParams(facetIndexingParams,  this.totalCounts);
+    FacetsAccumulator fe = new StandardFacetsAccumulator(newSearchParams, indexReader, taxonomy) {
+      @Override
+      protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(
+          FacetArrays facetArrays, int partition) throws IOException {
+        
+        Aggregator aggregator = new CountingAggregator(counts[partition]);
+        HashMap<CategoryListIterator, Aggregator> map = new HashMap<CategoryListIterator, Aggregator>();
+        for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) {
+          final CategoryListIterator cli = clIteraor(clCache, clp, indexReader, partition);
+          map.put(cli, aggregator);
+        }
+        return map;
+      }
+    };
+    fe.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT);
+    fe.accumulate(ScoredDocIdsUtils.createAllDocsScoredDocIDs(indexReader));
+    return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed);
+  }
+  
+  static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp, 
+      IndexReader indexReader, int partition) throws IOException {
+    if (clCache != null) {
+      CategoryListData cld = clCache.get(clp);
+      if (cld != null) {
+        return cld.iterator(partition);
+      }
+    }
+    return clp.createCategoryListIterator(indexReader, partition);
+  }
+}
\ No newline at end of file

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,285 @@
+package org.apache.lucene.facet.search;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+import org.apache.lucene.index.IndexReader;
+
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.search.cache.CategoryListCache;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Manage an LRU cache for {@link TotalFacetCounts} per index, taxonomy, and
+ * facet indexing params.
+ * 
+ * @lucene.experimental
+ */
+public final class TotalFacetCountsCache {
+  
+  /**
+   * Default size of in memory cache for computed total facet counts.
+   * Set to 2 for the case when an application reopened a reader and 
+   * the original one is still in use (Otherwise there will be 
+   * switching again and again between the two.) 
+   */
+  public static final int DEFAULT_CACHE_SIZE = 2; 
+
+  private static final TotalFacetCountsCache singleton = new TotalFacetCountsCache();
+  
+  /**
+   * Get the single instance of this cache
+   */
+  public static TotalFacetCountsCache getSingleton() {
+    return singleton;
+  }
+  
+  /**
+   * In-memory cache of TFCs.
+   * <ul>  
+   * <li>It's size is kept within limits through {@link #trimCache()}.
+   * <li>An LRU eviction policy is applied, by maintaining active keys in {@link #lruKeys}. 
+   * <li>After each addition to the cache, trimCache is called, to remove entries least recently used.
+   * </ul>  
+   * @see #markRecentlyUsed(TFCKey)
+   */
+  private ConcurrentHashMap<TFCKey,TotalFacetCounts> cache = new ConcurrentHashMap<TFCKey,TotalFacetCounts>();
+  
+  /**
+   * A queue of active keys for applying LRU policy on eviction from the {@link #cache}.
+   * @see #markRecentlyUsed(TFCKey)
+   */
+  private ConcurrentLinkedQueue<TFCKey> lruKeys = new ConcurrentLinkedQueue<TFCKey>();
+  
+  private int maxCacheSize = DEFAULT_CACHE_SIZE; 
+  
+  /** private constructor for singleton pattern */ 
+  private TotalFacetCountsCache() {
+  }
+  
+  /**
+   * Get the total facet counts for a reader/taxonomy pair and facet indexing parameters.
+   * If not in cache, computed here and added to the cache for later use.
+   * @param indexReader the documents index
+   * @param taxonomy the taxonomy index
+   * @param facetIndexingParams facet indexing parameters
+   * @param clCache category list cache for faster computation, can be null 
+   * @return the total facet counts.
+   */
+  public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy,
+      FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
+    // create the key
+    TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
+    // it is important that this call is not synchronized, so that available TFC 
+    // would not wait for one that needs to be computed.  
+    TotalFacetCounts tfc = cache.get(key);
+    if (tfc != null) {
+      markRecentlyUsed(key); 
+      return tfc;
+    }
+    return computeAndCache(key, clCache);
+  }
+
+  /**
+   * Mark key as it as recently used.
+   * <p>
+   * <b>Implementation notes: Synchronization considerations and the interaction between lruKeys and cache:</b>
+   * <ol>
+   *  <li>A concurrent {@link LinkedHashMap} would have made this class much simpler.
+   *      But unfortunately, Java does not provide one.
+   *      Instead, we combine two concurrent objects:
+   *  <ul>
+   *   <li>{@link ConcurrentHashMap} for the cached TFCs.
+   *   <li>{@link ConcurrentLinkedQueue} for active keys
+   *  </ul>
+   *  <li>Both {@link #lruKeys} and {@link #cache} are concurrently safe.
+   *  <li>Checks for a cached item through getTotalCounts() are not synchronized.
+   *      Therefore, the case that a needed TFC is in the cache is very fast:
+   *      it does not wait for the computation of other TFCs.
+   *  <li>computeAndCache() is synchronized, and, has a (double) check of the required
+   *       TFC, to avoid computing the same TFC twice. 
+   *  <li>A race condition in this method (markRecentlyUsed) might result in two copies 
+   *      of the same 'key' in lruKeys, but this is handled by the loop in trimCache(), 
+   *      where an attempt to remove the same key twice is a no-op.
+   * </ol>
+   */
+  private void markRecentlyUsed(TFCKey key) {
+    lruKeys.remove(key);  
+    lruKeys.add(key);
+  }
+
+  private synchronized void trimCache() {
+    // loop until cache is of desired  size.
+    while (cache.size()>maxCacheSize ) { 
+      TFCKey key = lruKeys.poll();
+      if (key==null) { //defensive
+        // it is defensive since lruKeys presumably covers the cache keys 
+        key = cache.keys().nextElement(); 
+      }
+      // remove this element. Note that an attempt to remove with the same key again is a no-op,
+      // which gracefully handles the possible race in markRecentlyUsed(). 
+      cache.remove(key);
+    }
+  }
+  
+  /**
+   * compute TFC and cache it, after verifying it was not just added - for this
+   * matter this method is synchronized, which is not too bad, because there is
+   * lots of work done in the computations.
+   */
+  private synchronized TotalFacetCounts computeAndCache(TFCKey key, CategoryListCache clCache) throws IOException {
+    TotalFacetCounts tfc = cache.get(key); 
+    if (tfc == null) {
+      tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams, clCache);
+      lruKeys.add(key);
+      cache.put(key,tfc);
+      trimCache();
+    }
+    return tfc;
+  }
+
+  /**
+   * Load {@link TotalFacetCounts} matching input parameters from the provided outputFile 
+   * and add them into the cache for the provided indexReader, taxonomy, and facetIndexingParams.
+   * If a {@link TotalFacetCounts} for these parameters already exists in the cache, it will be
+   * replaced by the loaded one.
+   * @param inputFile file from which to read the data 
+   * @param indexReader the documents index
+   * @param taxonomy the taxonomy index
+   * @param facetIndexingParams the facet indexing parameters
+   * @throws IOException on error
+   * @see #store(File, IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
+   */
+  public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy,
+      FacetIndexingParams facetIndexingParams) throws IOException {
+    if (!inputFile.isFile() || !inputFile.exists() || !inputFile.canRead()) {
+      throw new IllegalArgumentException("Exepecting an existing readable file: "+inputFile);
+    }
+    TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
+    TotalFacetCounts tfc = TotalFacetCounts.loadFromFile(inputFile, taxonomy, facetIndexingParams);
+    cache.put(key,tfc);
+    trimCache();
+    markRecentlyUsed(key);
+  }
+  
+  /**
+   * Store the {@link TotalFacetCounts} matching input parameters into the provided outputFile,
+   * making them available for a later call to {@link #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)}.
+   * If these {@link TotalFacetCounts} are available in the cache, they are used. But if they are
+   * not in the cache, this call will first compute them (which will also add them to the cache). 
+   * @param outputFile file to store in.
+   * @param indexReader the documents index
+   * @param taxonomy the taxonomy index
+   * @param facetIndexingParams the facet indexing parameters
+   * @param clCache category list cache for faster computation, can be null
+   * @throws IOException on error
+   * @see #load(File, IndexReader, TaxonomyReader, FacetIndexingParams)
+   * @see #getTotalCounts(IndexReader, TaxonomyReader, FacetIndexingParams, CategoryListCache)
+   */
+  public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy,
+      FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
+    File parentFile = outputFile.getParentFile();
+    if (
+        ( outputFile.exists() && (!outputFile.isFile()      || !outputFile.canWrite())) ||
+        (!outputFile.exists() && (!parentFile.isDirectory() || !parentFile.canWrite()))
+      ) {
+      throw new IllegalArgumentException("Exepecting a writable file: "+outputFile);
+    }
+    TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams, clCache);
+    TotalFacetCounts.storeToFile(outputFile, tfc);  
+  }
+  
+  private static class TFCKey {
+    final IndexReader indexReader;
+    final TaxonomyReader taxonomy;
+    private final Iterable<CategoryListParams> clps;
+    private final int hashCode;
+    private final int nDels; // needed when a reader used for faceted search was just used for deletion. 
+    final FacetIndexingParams facetIndexingParams;
+    
+    public TFCKey(IndexReader indexReader, TaxonomyReader taxonomy,
+        FacetIndexingParams facetIndexingParams) {
+      this.indexReader = indexReader;
+      this.taxonomy = taxonomy;
+      this.facetIndexingParams = facetIndexingParams;
+      this.clps = facetIndexingParams.getAllCategoryListParams();
+      this.nDels = indexReader.numDeletedDocs();
+      hashCode = indexReader.hashCode() ^ taxonomy.hashCode();
+    }
+    
+    @Override
+    public int hashCode() {
+      return hashCode;
+    }
+    
+    @Override
+    public boolean equals(Object other) {
+      TFCKey o = (TFCKey) other; 
+      if (indexReader != o.indexReader || taxonomy != o.taxonomy || nDels != o.nDels) {
+        return false;
+      }
+      Iterator<CategoryListParams> it1 = clps.iterator();
+      Iterator<CategoryListParams> it2 = o.clps.iterator();
+      while (it1.hasNext() && it2.hasNext()) {
+        if (!it1.next().equals(it2.next())) {
+          return false;
+        }
+      }
+      return it1.hasNext() == it2.hasNext();
+    }
+  }
+
+  /**
+   * Clear the cache.
+   */
+  public synchronized void clear() {
+    cache.clear();
+    lruKeys.clear();
+  }
+  
+  /**
+   * @return the maximal cache size
+   */
+  public int getCacheSize() {
+    return maxCacheSize;
+  }
+
+  /**
+   * Set the number of TotalFacetCounts arrays that will remain in memory cache.
+   * <p>
+   * If new size is smaller than current size, the cache is appropriately trimmed.
+   * <p>
+   * Minimal size is 1, so passing zero or negative size would result in size of 1.
+   * @param size new size to set
+   */
+  public void setCacheSize(int size) {
+    if (size < 1) size = 1;
+    int origSize = maxCacheSize;
+    maxCacheSize = size;
+    if (maxCacheSize < origSize) { // need to trim only if the cache was reduced
+      trimCache();
+    }
+  }
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,51 @@
+package org.apache.lucene.facet.search.aggregator;
+
+import java.io.IOException;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An Aggregator is the analogue of Lucene's Collector (see
+ * {@link org.apache.lucene.search.Collector}), for processing the categories
+ * belonging to a certain document. The Aggregator is responsible for doing
+ * whatever it wishes with the categories it is fed, e.g., counting the number
+ * of times that each category appears, or performing some computation on their
+ * association values.
+ * <P>
+ * Much of the function of an Aggregator implementation is not described by this
+ * interface. This includes the constructor and getter methods to retrieve the
+ * results of the aggregation.
+ * 
+ * @lucene.experimental
+ */
+public interface Aggregator {
+
+  /**
+   * Specify the document (and its score in the search) that the following
+   * {@link #aggregate(int)} calls will pertain to.
+   */
+  void setNextDoc(int docid, float score) throws IOException;
+
+  /**
+   * Collect (and do whatever an implementation deems appropriate) the
+   * category given by its ordinal. This category belongs to a document
+   * given earlier by {@link #setNextDoc(int, float)}.
+   */
+  void aggregate(int ordinal);
+
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/ComplementCountingAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/ComplementCountingAggregator.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/ComplementCountingAggregator.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/ComplementCountingAggregator.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,37 @@
+package org.apache.lucene.facet.search.aggregator;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A {@link CountingAggregator} used during complement counting.
+ * 
+ * @lucene.experimental
+ */
+public class ComplementCountingAggregator extends CountingAggregator {
+
+  public ComplementCountingAggregator(int[] counterArray) {
+    super(counterArray);
+  }
+
+  @Override
+  public void aggregate(int ordinal) {
+    assert counterArray[ordinal]!=0:"complement aggregation: count is about to become negative for ordinal "+ordinal;
+    --counterArray[ordinal];
+  }
+
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,59 @@
+package org.apache.lucene.facet.search.aggregator;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A CountingAggregator updates a counter array with the size of the whole
+ * taxonomy, counting the number of times each category appears in the given set
+ * of documents.
+ * 
+ * @lucene.experimental
+ */
+public class CountingAggregator implements Aggregator {
+
+  protected int[] counterArray;
+
+  public void aggregate(int ordinal) {
+    ++counterArray[ordinal];
+  }
+
+  public void setNextDoc(int docid, float score) {
+    // There's nothing for us to do here since we only increment the count by 1
+    // in this aggregator.
+  }
+
+  public CountingAggregator(int[] counterArray) {
+    this.counterArray = counterArray;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null || obj.getClass() != this.getClass()) {
+      return false;
+    }
+    CountingAggregator that = (CountingAggregator) obj;
+    return that.counterArray == this.counterArray;
+  }
+
+  @Override
+  public int hashCode() {
+    int hashCode = counterArray == null ? 0 : counterArray.hashCode();
+
+    return hashCode;
+  }
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,58 @@
+package org.apache.lucene.facet.search.aggregator;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An {@link Aggregator} which updates the weight of a category according to the
+ * scores of the documents it was found in.
+ * 
+ * @lucene.experimental
+ */
+public class ScoringAggregator implements Aggregator {
+
+  private final float[] scoreArray;
+  private float score;
+  private final int hashCode;
+  
+  public ScoringAggregator(float[] counterArray) {
+    this.scoreArray = counterArray;
+    this.hashCode = scoreArray == null ? 0 : scoreArray.hashCode();
+  }
+
+  public void aggregate(int ordinal) {
+    scoreArray[ordinal] += score;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null || obj.getClass() != this.getClass()) {
+      return false;
+    }
+    ScoringAggregator that = (ScoringAggregator) obj;
+    return that.scoreArray == this.scoreArray;
+  }
+
+  @Override
+  public int hashCode() {
+    return hashCode;
+  }
+
+  public void setNextDoc(int docid, float score) {
+    this.score = score;
+  }
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/association/AssociationFloatSumAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/association/AssociationFloatSumAggregator.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/association/AssociationFloatSumAggregator.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/association/AssociationFloatSumAggregator.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,74 @@
+package org.apache.lucene.facet.search.aggregator.association;
+
+import java.io.IOException;
+
+import org.apache.lucene.facet.enhancements.association.AssociationsPayloadIterator;
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.search.aggregator.Aggregator;
+import org.apache.lucene.index.IndexReader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An {@link Aggregator} which updates the weight of a category by summing the
+ * weights of the float association it finds for every document.
+ * 
+ * @lucene.experimental
+ */
+public class AssociationFloatSumAggregator implements Aggregator {
+
+  protected final String field;
+  protected final float[] sumArray;
+  protected final AssociationsPayloadIterator associationsPayloadIterator;
+
+  public AssociationFloatSumAggregator(IndexReader reader, float[] sumArray) throws IOException {
+    this(CategoryListParams.DEFAULT_TERM.field(), reader, sumArray);
+  }
+  
+  public AssociationFloatSumAggregator(String field, IndexReader reader, float[] sumArray) throws IOException {
+    this.field = field;
+    associationsPayloadIterator = new AssociationsPayloadIterator(reader, field);
+    this.sumArray = sumArray;
+  }
+
+  public void aggregate(int ordinal) {
+    long association = associationsPayloadIterator.getAssociation(ordinal);
+    if (association != AssociationsPayloadIterator.NO_ASSOCIATION) {
+      sumArray[ordinal] += Float.intBitsToFloat((int) association);
+    }
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null || obj.getClass() != this.getClass()) {
+      return false;
+    }
+    AssociationFloatSumAggregator that = (AssociationFloatSumAggregator) obj;
+    return that.field.equals(field) && that.sumArray == sumArray;
+  }
+
+  @Override
+  public int hashCode() {
+    return field.hashCode();
+  }
+
+  public void setNextDoc(int docid, float score) throws IOException {
+    associationsPayloadIterator.setNextDoc(docid);
+  }
+  
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/association/AssociationIntSumAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/association/AssociationIntSumAggregator.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/association/AssociationIntSumAggregator.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/association/AssociationIntSumAggregator.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,74 @@
+package org.apache.lucene.facet.search.aggregator.association;
+
+import java.io.IOException;
+
+import org.apache.lucene.facet.enhancements.association.AssociationsPayloadIterator;
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.search.aggregator.Aggregator;
+import org.apache.lucene.index.IndexReader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * An {@link Aggregator} which updates the weight of a category by summing the
+ * weights of the integer association it finds for every document.
+ * 
+ * @lucene.experimental
+ */
+public class AssociationIntSumAggregator implements Aggregator {
+
+  protected final String field;
+  protected final int[] sumArray;
+  protected final AssociationsPayloadIterator associationsPayloadIterator;
+
+  public AssociationIntSumAggregator(IndexReader reader, int[] sumArray) throws IOException {
+    this(CategoryListParams.DEFAULT_TERM.field(), reader, sumArray);
+  }
+  
+  public AssociationIntSumAggregator(String field, IndexReader reader, int[] sumArray) throws IOException {
+    this.field = field;
+    associationsPayloadIterator = new AssociationsPayloadIterator(reader, field);
+    this.sumArray = sumArray;
+  }
+
+  public void aggregate(int ordinal) {
+    long association = associationsPayloadIterator.getAssociation(ordinal);
+    if (association != AssociationsPayloadIterator.NO_ASSOCIATION) {
+      sumArray[ordinal] += association;
+    }
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == null || obj.getClass() != this.getClass()) {
+      return false;
+    }
+    AssociationIntSumAggregator that = (AssociationIntSumAggregator) obj;
+    return that.field.equals(field) && that.sumArray == sumArray;
+  }
+
+  @Override
+  public int hashCode() {
+    return field.hashCode();
+  }
+
+  public void setNextDoc(int docid, float score) throws IOException {
+    associationsPayloadIterator.setNextDoc(docid);
+  }
+  
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/package.html?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/package.html (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/aggregator/package.html Wed Jun 29 11:53:10 2011
@@ -0,0 +1,12 @@
+<html>
+  <head>
+    <title>Aggregating Facets during Faceted Search</title>
+  </head>
+  <body>
+    <h1>Aggregating Facets during Faceted Search</h1>
+    
+    A facets aggregator is the parallel of Lucene's Collector. 
+    While Collector collected matching documents,
+    an aggregator aggregates facets of a matching document.
+  </body>
+</html>
\ No newline at end of file

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,61 @@
+package org.apache.lucene.facet.search.cache;
+
+import java.io.IOException;
+import java.util.HashMap;
+
+import org.apache.lucene.index.IndexReader;
+
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Cache for {@link CategoryListData}, per {@link CategoryListParams}. 
+ * 
+ * @lucene.experimental
+ */
+public class CategoryListCache {
+
+  private HashMap<CategoryListParams, CategoryListData> 
+    cldMap = new HashMap<CategoryListParams,CategoryListData>();
+
+  /**
+   * Fetch the cached {@link CategoryListData} for a given {@link CategoryListParams}.
+   */
+  public CategoryListData get(CategoryListParams clp) {
+    return cldMap.get(clp);
+  }
+  
+  /**
+   * Register a pre-computed {@link CategoryListData}.
+   */
+  public void register(CategoryListParams clp, CategoryListData clData) {
+    cldMap.put(clp,clData);
+  }
+  
+  /**
+   * Load and register {@link CategoryListData}.
+   */
+  public void loadAndRegister(CategoryListParams clp, 
+      IndexReader reader, TaxonomyReader taxo, FacetIndexingParams iparams) throws IOException {
+    CategoryListData clData = new CategoryListData(reader, taxo, iparams, clp);
+    register(clp,clData);
+  }
+}

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java Wed Jun 29 11:53:10 2011
@@ -0,0 +1,135 @@
+package org.apache.lucene.facet.search.cache;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.search.CategoryListIterator;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.util.collections.IntArray;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Category list data maintained in RAM.
+ * <p>
+ * Speeds up facets accumulation when more RAM is available.
+ * <p>
+ * Note that this will consume more memory: one int (4 bytes) for each category
+ * of each document.
+ * <p>
+ * Note: at the moment this class is insensitive to updates of the index, and,
+ * in particular, does not make use of Lucene's ability to refresh a single
+ * segment.
+ * <p>
+ * See {@link CategoryListCache#register(CategoryListParams, CategoryListData)}
+ * and
+ * {@link CategoryListCache#loadAndRegister(CategoryListParams, IndexReader, TaxonomyReader, FacetIndexingParams)}.
+ * 
+ * @lucene.experimental
+ */
+public class CategoryListData {
+  
+  // TODO (Facet): experiment with different orders - p-d-c vs. current d-p-c.
+  private transient volatile int[][][] docPartitionCategories;  
+  
+  /**
+   * Empty constructor for extensions with modified computation of the data.
+   */
+  protected CategoryListData() {
+  }
+  
+  /**
+   * Compute category list data for caching for faster iteration.
+   */
+  CategoryListData(IndexReader reader, TaxonomyReader taxo, 
+      FacetIndexingParams iparams, CategoryListParams clp) throws IOException {
+  
+    final int maxDoc = reader.maxDoc();
+    int[][][]dpf  = new int[maxDoc][][];
+    int numPartitions = (int)Math.ceil(taxo.getSize()/(double)iparams.getPartitionSize());
+    IntArray docCategories = new IntArray(); 
+    for (int part=0; part<numPartitions; part++) {
+      CategoryListIterator cli = clp.createCategoryListIterator(reader, part);
+      if (cli.init()) {
+        for (int doc=0; doc<maxDoc; doc++) {
+          if (cli.skipTo(doc)) {
+            docCategories.clear(false);
+            if (dpf[doc]==null) {
+              dpf[doc] = new int[numPartitions][];
+            }
+            long category;
+            while ((category = cli.nextCategory()) <= Integer.MAX_VALUE) {
+              docCategories.addToArray((int)category);
+            }
+            final int size = docCategories.size();
+            dpf[doc][part] = new int[size];
+            for (int i=0; i<size; i++) {
+              dpf[doc][part][i] = docCategories.get(i);
+            }
+          }
+        }
+      }
+    }
+    docPartitionCategories = dpf;
+  }
+  
+  /**
+   * Iterate on the category list data for the specified partition.
+   */
+  public CategoryListIterator iterator(int partition) throws IOException {
+    return new RAMCategoryListIterator(partition, docPartitionCategories);
+  }
+
+  /**
+   * Internal: category list iterator over uncompressed category info in RAM
+   */
+  private static class RAMCategoryListIterator implements CategoryListIterator {
+    private final int part;
+    private final int[][][] dpc;
+    private int currDoc = -1;
+    private int nextCategoryIndex = -1;  
+    
+    RAMCategoryListIterator(int part, int[][][] docPartitionCategories) {
+      this.part = part;
+      dpc = docPartitionCategories;
+    }
+
+    public boolean init() throws IOException {
+      return dpc!=null && dpc.length>part;
+    }
+
+    public long nextCategory() throws IOException {
+      if (nextCategoryIndex >= dpc[currDoc][part].length) {
+        return 1L+Integer.MAX_VALUE;
+      }
+      return dpc[currDoc][part][nextCategoryIndex++]; 
+    }
+
+    public boolean skipTo(int docId) throws IOException {
+      final boolean res = dpc.length>docId && dpc[docId]!=null && dpc[docId][part]!=null;
+      if (res) {
+        currDoc = docId;
+        nextCategoryIndex = 0;
+      }
+      return res;
+    }
+  }
+}
\ No newline at end of file

Added: lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/package.html?rev=1141060&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/package.html (added)
+++ lucene/dev/branches/branch_3x/lucene/contrib/facet/src/java/org/apache/lucene/facet/search/package.html Wed Jun 29 11:53:10 2011
@@ -0,0 +1,42 @@
+<html>
+  <head>
+    <title>Faceted Search API</title>
+  </head>
+  <body>
+    <h1>Faceted Search API</h1>
+    
+    API for faceted search has several interfaces - simple, top level ones, adequate for most users,
+    and advanced, more complicated ones, for the more advanced users. 
+    
+    <p>
+    
+    We now describe the simpler interfaces.
+    There are mainly 3 interfaces for faceted search:
+    <ol>
+      <li>{@link org.apache.lucene.facet.search.params.FacetRequest Facets Request}
+          defines requirements:
+          <ul> 
+              <li>which facets are required, e.g. depth</li>
+              <li>what is computed for each facet - e.g. count, score.</li>
+          </ul>
+      </li>
+      <li>{@link org.apache.lucene.facet.search.FacetsAccumulator Facets Extractor}
+          Controls how facets are extracted, with variations of:
+          <ul> 
+              <li>default (partitioned, like all extractors). </li>
+              <li>sampled - inspects only a fraction of the documents.</li>
+          </ul>
+      </li>
+      <li>{@link org.apache.lucene.facet.search.FacetResultsHandler Facet Results Handler }
+          Controls how results are further processed and merged (also between partitions):
+          <ul> 
+              <li>Top K.</li>
+              <li>Tree.</li>
+              <li>Tree with top K at each level</li>
+              <li>...</li>
+          </ul>
+      </li>
+    </ol>
+
+  </body>
+</html>
\ No newline at end of file



Mime
View raw message