lucene-solr-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r813874 - in /lucene/solr/trunk: ./ src/java/org/apache/solr/handler/component/ src/java/org/apache/solr/request/ src/test/org/apache/solr/handler/component/ src/test/test-files/solr/conf/
Date Fri, 11 Sep 2009 15:21:08 GMT
Author: gsingers
Date: Fri Sep 11 15:21:05 2009
New Revision: 813874

URL: http://svn.apache.org/viewvc?rev=813874&view=rev
Log:
SOLR-1380: Added support for multi-valued fields to StatsComponent

Added:
    lucene/solr/trunk/src/java/org/apache/solr/handler/component/FieldFacetStats.java   (with
props)
    lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsValues.java   (with
props)
Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsComponent.java
    lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
    lucene/solr/trunk/src/test/org/apache/solr/handler/component/StatsComponentTest.java
    lucene/solr/trunk/src/test/test-files/solr/conf/schema11.xml

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=813874&r1=813873&r2=813874&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Fri Sep 11 15:21:05 2009
@@ -98,6 +98,8 @@
  7. SOLR-680: Add StatsComponent. This gets simple statistics on matched numeric fields,
     including: min, max, mean, median, stddev.  (koji, ryan)
 
+    7.1 SOLR-1380: Added support for multi-valued fields (Harish Agarwal via gsingers)
+
  8. SOLR-561: Added Replication implemented in Java as a request handler. Supports index
replication
     as well as configuration replication and exposes detailed statistics and progress information
     on the Admin page. Works on all platforms. (Noble Paul, yonik, Akshay Ukey, shalin)

Added: lucene/solr/trunk/src/java/org/apache/solr/handler/component/FieldFacetStats.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/component/FieldFacetStats.java?rev=813874&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/component/FieldFacetStats.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/component/FieldFacetStats.java Fri
Sep 11 15:21:05 2009
@@ -0,0 +1,144 @@
+package org.apache.solr.handler.component;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.FieldCache;
+import org.apache.solr.schema.FieldType;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * 9/10/2009 - Moved out of StatsComponent to allow open access to UnInvertedField
+ * FieldFacetStats is a utility to accumulate statistics on a set of values in one field,
+ * for facet values present in another field.
+ * <p/>
+ * @see org.apache.solr.handler.component.StatsComponent
+ *
+ */
+
+public class FieldFacetStats {
+  public final String name;
+  final FieldCache.StringIndex si;
+  final FieldType ft;
+
+  final String[] terms;
+  final int[] termNum;
+
+  final int startTermIndex;
+  final int endTermIndex;
+  final int nTerms;
+
+  final int numStatsTerms;
+
+  public final Map<String, StatsValues> facetStatsValues;
+
+  final List<HashMap<String, Integer>> facetStatsTerms;
+
+  public FieldFacetStats(String name, FieldCache.StringIndex si, FieldType ft, int numStatsTerms)
{
+    this.name = name;
+    this.si = si;
+    this.ft = ft;
+    this.numStatsTerms = numStatsTerms;
+
+    terms = si.lookup;
+    termNum = si.order;
+    startTermIndex = 1;
+    endTermIndex = terms.length;
+    nTerms = endTermIndex - startTermIndex;
+
+    facetStatsValues = new HashMap<String, StatsValues>();
+
+    // for mv stats field, we'll want to keep track of terms
+    facetStatsTerms = new ArrayList<HashMap<String, Integer>>();
+    if (numStatsTerms == 0) return;
+    int i = 0;
+    for (; i < numStatsTerms; i++) {
+      facetStatsTerms.add(new HashMap<String, Integer>());
+    }
+  }
+
+  String getTermText(int docID) {
+    return terms[termNum[docID]];
+  }
+
+
+  public boolean facet(int docID, Double v) {
+    if (v == null) return false;
+
+    int term = termNum[docID];
+    int arrIdx = term - startTermIndex;
+    if (arrIdx >= 0 && arrIdx < nTerms) {
+      String key = ft.indexedToReadable(terms[term]);
+      StatsValues stats = facetStatsValues.get(key);
+      if (stats == null) {
+        stats = new StatsValues();
+        facetStatsValues.put(key, stats);
+      }
+      stats.accumulate(v);
+      return true;
+    }
+    return false;
+  }
+
+
+  //function to keep track of facet counts for term number
+  public boolean facetTermNum(int docID, int statsTermNum) {
+
+    int term = termNum[docID];
+    int arrIdx = term - startTermIndex;
+    if (arrIdx >= 0 && arrIdx < nTerms) {
+      String key = ft.indexedToReadable(terms[term]);
+      HashMap<String, Integer> statsTermCounts = facetStatsTerms.get(statsTermNum);
+      Integer statsTermCount = statsTermCounts.get(key);
+      if (statsTermCount == null) {
+        statsTermCounts.put(key, 1);
+      } else {
+        statsTermCounts.put(key, statsTermCount + 1);
+      }
+      return true;
+    }
+    return false;
+  }
+
+
+  //function to accumulate counts for statsTermNum to specified value
+  public boolean accumulateTermNum(int statsTermNum, Double value) {
+    if (value == null) return false;
+    for (Map.Entry<String, Integer> stringIntegerEntry : facetStatsTerms.get(statsTermNum).entrySet())
{
+      Map.Entry pairs = (Map.Entry) stringIntegerEntry;
+      String key = (String) pairs.getKey();
+      StatsValues facetStats = facetStatsValues.get(key);
+      if (facetStats == null) {
+        facetStats = new StatsValues();
+        facetStatsValues.put(key, facetStats);
+      }
+      Integer count = (Integer) pairs.getValue();
+      if (count != null) {
+        facetStats.accumulate(value, count);
+      }
+    }
+    return true;
+  }
+
+}
+
+

Propchange: lucene/solr/trunk/src/java/org/apache/solr/handler/component/FieldFacetStats.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsComponent.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsComponent.java?rev=813874&r1=813873&r2=813874&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsComponent.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsComponent.java Fri Sep
11 15:21:05 2009
@@ -28,11 +28,16 @@
 import org.apache.solr.common.params.StatsParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.handler.component.StatsValues;
+import org.apache.solr.handler.component.FieldFacetStats;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.request.UnInvertedField;
+import org.apache.solr.core.SolrCore;
 
 /**
  * Stats component calculates simple statistics on numeric field values
@@ -173,166 +178,6 @@
   }
 }
 
-class StatsValues {
-  private static final String FACETS = "facets";
-  double min;
-  double max;
-  double sum;
-  double sumOfSquares;
-  long count;
-  long missing;
-  
-  // facetField   facetValue
-  Map<String, Map<String,StatsValues>> facets;
-  
-  public StatsValues() {
-    reset();
-  }
-
-  public void accumulate(NamedList stv){
-    min = Math.min(min, (Double)stv.get("min"));
-    max = Math.max(max, (Double)stv.get("max"));
-    sum += (Double)stv.get("sum");
-    count += (Long)stv.get("count");
-    missing += (Long)stv.get("missing");
-    sumOfSquares += (Double)stv.get("sumOfSquares");
-    
-    NamedList f = (NamedList)stv.get( FACETS );
-    if( f != null ) {
-      if( facets == null ) {
-        facets = new HashMap<String, Map<String,StatsValues>>();
-      }
-      
-      for( int i=0; i< f.size(); i++ ) {
-        String field = f.getName(i);
-        NamedList vals = (NamedList)f.getVal( i );
-        Map<String,StatsValues> addTo = facets.get( field );
-        if( addTo == null ) {
-          addTo = new HashMap<String,StatsValues>();
-          facets.put( field, addTo );
-        }
-        for( int j=0; j< vals.size(); j++ ) {
-          String val = f.getName(i);
-          StatsValues vvals = addTo.get( val );
-          if( vvals == null ) {
-            vvals = new StatsValues();
-            addTo.put( val, vvals );
-          }
-          vvals.accumulate( (NamedList)f.getVal( i ) );
-        }
-      }
-    }
-  }
-
-  public void accumulate(double v){
-    sumOfSquares += (v*v); // for std deviation
-    min = Math.min(min, v);
-    max = Math.max(max, v);
-    sum += v;
-    count++;
-  }
-  
-  public double getAverage(){
-    return sum / count;
-  }
-  
-  public double getStandardDeviation()
-  {
-    if( count <= 1.0D ) 
-      return 0.0D;
-    
-    return Math.sqrt( ( ( count * sumOfSquares ) - ( sum * sum ) )
-                      / ( count * ( count - 1.0D ) ) );  
-  }
-  
-  public void reset(){
-    min = Double.MAX_VALUE;
-    max = Double.NEGATIVE_INFINITY;
-    sum = count = missing = 0;
-    sumOfSquares = 0;
-    facets = null;
-  }
-  
-  public NamedList<?> getStatsValues(){
-    NamedList<Object> res = new SimpleOrderedMap<Object>();
-    res.add("min", min);
-    res.add("max", max);
-    res.add("sum", sum);
-    res.add("count", count);
-    res.add("missing", missing);
-    res.add("sumOfSquares", sumOfSquares );
-    res.add("mean", getAverage());
-    res.add( "stddev", getStandardDeviation() );
-    
-    // add the facet stats
-    if( facets != null && facets.size() > 0 ) {
-      NamedList<NamedList<?>> nl = new SimpleOrderedMap<NamedList<?>>();
-      for( Map.Entry<String, Map<String,StatsValues>> entry : facets.entrySet()
) {
-        NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<NamedList<?>>();
-        nl.add( entry.getKey(), nl2 );
-        for( Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet() ) {
-          nl2.add( e2.getKey(), e2.getValue().getStatsValues() );
-        }
-      }
-      res.add( FACETS, nl );
-    }
-    return res;
-  }
-}
-
-class FieldFacetStats {
-  final String name;
-  final FieldCache.StringIndex si;
-  final FieldType ft;
-
-  final String[] terms;
-  final int[] termNum;
-  
-  final int startTermIndex;
-  final int endTermIndex;
-  final int nTerms;
-  
-  final Map<String,StatsValues> facetStatsValues;
-  
-  FieldFacetStats( String name, FieldCache.StringIndex si, FieldType ft )
-  {
-    this.name = name;
-    this.si = si;
-    this.ft = ft;
-    
-    terms = si.lookup;
-    termNum = si.order;
-    startTermIndex = 1;
-    endTermIndex = terms.length;
-    nTerms = endTermIndex - startTermIndex;
-    
-    facetStatsValues = new HashMap<String, StatsValues>();
-  }
-  
-  String getTermText( int docID )
-  {
-    return terms[termNum[docID]];
-  }
-  
-  public boolean facet( int docID, Double v )
-  {
-    if( v == null ) return false;
-    
-    int term = termNum[docID];
-    int arrIdx = term-startTermIndex;
-    if (arrIdx>=0 && arrIdx<nTerms) {
-      String key = ft.indexedToReadable( terms[term] );
-      StatsValues stats = facetStatsValues.get( key );
-      if( stats == null ) {
-        stats = new StatsValues();
-        facetStatsValues.put(key, stats);
-      }
-      stats.accumulate( v );
-      return true;
-    }
-    return false;
-  }
-}
 
 class SimpleStats {
 
@@ -353,22 +198,35 @@
     this.params = params;
   }
 
-  public NamedList<Object> getStatsCounts() {
+  public NamedList<Object> getStatsCounts() throws IOException {
     NamedList<Object> res = new SimpleOrderedMap<Object>();
     res.add("stats_fields", getStatsFields());
     return res;
   }
 
-  public NamedList getStatsFields() {
+  public NamedList getStatsFields() throws IOException {
     NamedList<NamedList<Number>> res = new SimpleOrderedMap<NamedList<Number>>();
     String[] statsFs = params.getParams(StatsParams.STATS_FIELD);
     if (null != statsFs) {
       for (String f : statsFs) {
-        String[] facets = params.getFieldParams( f, StatsParams.STATS_FACET );
-        if( facets == null ) {
+        String[] facets = params.getFieldParams(f, StatsParams.STATS_FACET);
+        if (facets == null) {
           facets = new String[0]; // make sure it is something...
         }
-        res.add(f, getFieldCacheStats(f, facets));
+        SchemaField sf = searcher.getSchema().getField(f);
+        FieldType ft = sf.getType();
+        if (ft.isTokenized() || sf.multiValued()) {
+          //use UnInvertedField for multivalued fields
+          UnInvertedField uif = UnInvertedField.getUnInvertedField(f, searcher);
+          StatsValues allstats = uif.getStats(searcher, docs, facets);
+          if (allstats != null) {
+            res.add(f, (NamedList) allstats.getStatsValues());
+          } else {
+            res.add(f, null);
+          }
+        } else {
+          res.add(f, getFieldCacheStats(f, facets));
+        }
       }
     }
     return res;
@@ -376,10 +234,6 @@
   
   public NamedList getFieldCacheStats(String fieldName, String[] facet ) {
     FieldType ft = searcher.getSchema().getFieldType(fieldName);
-    if( ft.isTokenized() || ft.isMultiValued() ) {
-      throw new SolrException( ErrorCode.BAD_REQUEST, 
-          "Stats are valid for single valued numeric values.  not: "+fieldName + "["+ft+"]"
);
-    }
 
     FieldCache.StringIndex si = null;
     try {
@@ -388,7 +242,7 @@
     catch (IOException e) {
       throw new RuntimeException( "failed to open field cache for: "+fieldName, e );
     }
-    FieldFacetStats all = new FieldFacetStats( "all", si, ft );
+    FieldFacetStats all = new FieldFacetStats( "all", si, ft, 0 );
     if ( all.nTerms <= 0 || docs.size() <= 0 ) return null;
     StatsValues allstats = new StatsValues();
 
@@ -397,17 +251,13 @@
     final FieldFacetStats[] finfo = new FieldFacetStats[facet.length];
     for( String f : facet ) {
       ft = searcher.getSchema().getFieldType(f);
-      if( ft.isTokenized() || ft.isMultiValued() ) {
-        throw new SolrException( ErrorCode.BAD_REQUEST, 
-            "Stats can only facet on single valued fields, not: "+f + "["+ft+"]" );
-      }
       try {
         si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f);
       } 
       catch (IOException e) {
         throw new RuntimeException( "failed to open field cache for: "+f, e );
       }
-      finfo[i++] = new FieldFacetStats( f, si, ft );
+      finfo[i++] = new FieldFacetStats( f, si, ft, 0 );
     }
     
     
@@ -438,4 +288,6 @@
     }
     return allstats.getStatsValues();
   }
+
+
 }

Added: lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsValues.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsValues.java?rev=813874&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsValues.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsValues.java Fri Sep
11 15:21:05 2009
@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.handler.component;
+
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+
+
+/** 2/11/2009 - Moved out of StatsComponent to allow open access to UnInvertedField
+ * StatsValues is a utility to accumulate statistics on a set of values
+ *
+ * </p>
+ * @see org.apache.solr.handler.component.StatsComponent
+ *
+*/
+
+public class StatsValues {
+  private static final String FACETS = "facets";
+  double min;
+  double max;
+  double sum;
+  double sumOfSquares;
+  long count;
+  long missing;
+  
+  // facetField   facetValue
+  public Map<String, Map<String,StatsValues>> facets;
+  
+  public StatsValues() {
+    reset();
+  }
+
+  public void accumulate(NamedList stv){
+    min = Math.min(min, (Double)stv.get("min"));
+    max = Math.max(max, (Double)stv.get("max"));
+    sum += (Double)stv.get("sum");
+    count += (Long)stv.get("count");
+    missing += (Long)stv.get("missing");
+    sumOfSquares += (Double)stv.get("sumOfSquares");
+    
+    NamedList f = (NamedList)stv.get( FACETS );
+    if( f != null ) {
+      if( facets == null ) {
+        facets = new HashMap<String, Map<String,StatsValues>>();
+      }
+      
+      for( int i=0; i< f.size(); i++ ) {
+        String field = f.getName(i);
+        NamedList vals = (NamedList)f.getVal( i );
+        Map<String,StatsValues> addTo = facets.get( field );
+        if( addTo == null ) {
+          addTo = new HashMap<String,StatsValues>();
+          facets.put( field, addTo );
+        }
+        for( int j=0; j< vals.size(); j++ ) {
+          String val = f.getName(i);
+          StatsValues vvals = addTo.get( val );
+          if( vvals == null ) {
+            vvals = new StatsValues();
+            addTo.put( val, vvals );
+          }
+          vvals.accumulate( (NamedList)f.getVal( i ) );
+        }
+      }
+    }
+  }
+
+  public void accumulate(double v){
+    sumOfSquares += (v*v); // for std deviation
+    min = Math.min(min, v);
+    max = Math.max(max, v);
+    sum += v;
+    count++;
+  }
+  
+  public void accumulate(double v, int c){
+    sumOfSquares += (v*v*c); // for std deviation
+    min = Math.min(min, v);
+    max = Math.max(max, v);
+    sum += v*c;
+    count+= c;
+  }
+
+  public void addMissing(int c){
+	missing += c;
+  }
+  
+  public double getAverage(){
+    return sum / count;
+  }
+  
+  public double getStandardDeviation()
+  {
+    if( count <= 1.0D ) 
+      return 0.0D;
+    
+    return Math.sqrt( ( ( count * sumOfSquares ) - ( sum * sum ) )
+                      / ( count * ( count - 1.0D ) ) );  
+  }
+
+  public long getCount()
+  {
+	return count;
+  }
+  
+  public void reset(){
+    min = Double.MAX_VALUE;
+    max = -1.0*Double.MAX_VALUE;
+    sum = count = missing = 0;
+    sumOfSquares = 0;
+    facets = null;
+  }
+  
+  public NamedList<?> getStatsValues(){
+    NamedList<Object> res = new SimpleOrderedMap<Object>();
+    res.add("min", min);
+    res.add("max", max);
+    res.add("sum", sum);
+    res.add("count", count);
+    res.add("missing", missing);
+    res.add("sumOfSquares", sumOfSquares );
+    res.add("mean", getAverage());
+    res.add( "stddev", getStandardDeviation() );
+    
+    // add the facet stats
+    if( facets != null && facets.size() > 0 ) {
+      NamedList<NamedList<?>> nl = new SimpleOrderedMap<NamedList<?>>();
+      for( Map.Entry<String, Map<String,StatsValues>> entry : facets.entrySet()
) {
+        NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<NamedList<?>>();
+        nl.add( entry.getKey(), nl2 );
+        for( Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet() ) {
+          nl2.add( e2.getKey(), e2.getValue().getStatsValues() );
+        }
+      }
+      res.add( FACETS, nl );
+    }
+    return res;
+  }
+}

Propchange: lucene/solr/trunk/src/java/org/apache/solr/handler/component/StatsValues.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java?rev=813874&r1=813873&r2=813874&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/request/UnInvertedField.java Fri Sep 11 15:21:05
2009
@@ -17,6 +17,7 @@
 
 package org.apache.solr.request;
 
+import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
@@ -26,10 +27,12 @@
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.core.SolrCore;
-import org.apache.solr.request.SimpleFacets;
+
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.search.*;
 import org.apache.solr.util.BoundedTreeSet;
+import org.apache.solr.handler.component.StatsValues;
+import org.apache.solr.handler.component.FieldFacetStats;
 import org.apache.lucene.util.OpenBitSet;
 
 import java.io.IOException;
@@ -38,7 +41,7 @@
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.Map;
-import java.util.WeakHashMap;
+
 import java.util.concurrent.atomic.AtomicLong;
 
 /**
@@ -593,9 +596,6 @@
 
         for (; i<endTerm; i++) {
           int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
-          if (c==0) {
-
-          }
           if (c<mincount || --off>=0) continue;
           if (--lim<0) break;
 
@@ -616,6 +616,176 @@
     return res;
   }
 
+  /**
+   * Collect statistics about the UninvertedField.  Code is very similar to {@link #getCounts(org.apache.solr.search.SolrIndexSearcher,
org.apache.solr.search.DocSet, int, int, Integer, boolean, String, String)}
+   * It can be used to calculate stats on multivalued fields.
+   * <p/>
+   * This method is mainly used by the {@link org.apache.solr.handler.component.StatsComponent}.
+   *
+   * @param searcher The Searcher to use to gather the statistics
+   * @param baseDocs The {@link org.apache.solr.search.DocSet} to gather the stats on
+   * @param facet One or more fields to facet on.
+   * @return The {@link org.apache.solr.handler.component.StatsValues} collected
+   * @throws IOException
+   */
+  public StatsValues getStats(SolrIndexSearcher searcher, DocSet baseDocs, String[] facet)
throws IOException {
+    //this function is ripped off nearly wholesale from the getCounts function to use
+    //for multiValued fields within the StatsComponent.  may be useful to find common
+    //functionality between the two and refactor code somewhat
+    use.incrementAndGet();
+
+    FieldType ft = searcher.getSchema().getFieldType(field);
+    StatsValues allstats = new StatsValues();
+
+    int i = 0;
+    final FieldFacetStats[] finfo = new FieldFacetStats[facet.length];
+    //Initialize facetstats, if facets have been passed in
+    FieldCache.StringIndex si;
+    for (String f : facet) {
+      ft = searcher.getSchema().getFieldType(f);
+      try {
+        si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f);
+      }
+      catch (IOException e) {
+        throw new RuntimeException("failed to open field cache for: " + f, e);
+      }
+      finfo[i++] = new FieldFacetStats(f, si, ft, numTermsInField);
+    }
+
+
+    DocSet docs = baseDocs;
+    int baseSize = docs.size();
+    int maxDoc = searcher.maxDoc();
+
+    if (baseSize > 0) {
+
+      final int[] index = this.index;
+      final int[] counts = new int[numTermsInField];
+
+      NumberedTermEnum te = ti.getEnumerator(searcher.getReader());
+
+
+      boolean doNegative = false;
+      if (finfo.length == 0) {
+        //if we're collecting statistics with a facet field, can't do inverted counting
+        doNegative = baseSize > maxDoc >> 1 && termInstances > 0
+                && docs instanceof BitDocSet;
+      }
+
+      if (doNegative) {
+        OpenBitSet bs = (OpenBitSet) ((BitDocSet) docs).getBits().clone();
+        bs.flip(0, maxDoc);
+        // TODO: when iterator across negative elements is available, use that
+        // instead of creating a new bitset and inverting.
+        docs = new BitDocSet(bs, maxDoc - baseSize);
+        // simply negating will mean that we have deleted docs in the set.
+        // that should be OK, as their entries in our table should be empty.
+      }
+
+      // For the biggest terms, do straight set intersections
+      for (TopTerm tt : bigTerms.values()) {
+        // TODO: counts could be deferred if sorted==false
+        if (tt.termNum >= 0 && tt.termNum < numTermsInField) {
+          if (finfo.length == 0) {
+            counts[tt.termNum] = searcher.numDocs(new TermQuery(tt.term), docs);
+          } else {
+            //COULD BE VERY SLOW
+            //if we're collecting stats for facet fields, we need to iterate on all matching
documents
+            DocSet bigTermDocSet = searcher.getDocSet(new TermQuery(tt.term)).intersection(docs);
+            DocIterator iter = bigTermDocSet.iterator();
+            while (iter.hasNext()) {
+              int doc = iter.nextDoc();
+              counts[tt.termNum]++;
+              for (FieldFacetStats f : finfo) {
+                f.facetTermNum(doc, tt.termNum);
+              }
+            }
+          }
+        }
+      }
+
+
+      if (termInstances > 0) {
+        DocIterator iter = docs.iterator();
+        while (iter.hasNext()) {
+          int doc = iter.nextDoc();
+          int code = index[doc];
+
+          if ((code & 0xff) == 1) {
+            int pos = code >>> 8;
+            int whichArray = (doc >>> 16) & 0xff;
+            byte[] arr = tnums[whichArray];
+            int tnum = 0;
+            for (; ;) {
+              int delta = 0;
+              for (; ;) {
+                byte b = arr[pos++];
+                delta = (delta << 7) | (b & 0x7f);
+                if ((b & 0x80) == 0) break;
+              }
+              if (delta == 0) break;
+              tnum += delta - TNUM_OFFSET;
+              counts[tnum]++;
+              for (FieldFacetStats f : finfo) {
+                f.facetTermNum(doc, tnum);
+              }
+            }
+          } else {
+            int tnum = 0;
+            int delta = 0;
+            for (; ;) {
+              delta = (delta << 7) | (code & 0x7f);
+              if ((code & 0x80) == 0) {
+                if (delta == 0) break;
+                tnum += delta - TNUM_OFFSET;
+                counts[tnum]++;
+                for (FieldFacetStats f : finfo) {
+                  f.facetTermNum(doc, tnum);
+                }
+                delta = 0;
+              }
+              code >>>= 8;
+            }
+          }
+        }
+      }
+
+      // add results in index order
+
+      for (i = 0; i < numTermsInField; i++) {
+        int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
+        if (c == 0) {
+          continue;
+        }
+        Double value = Double.parseDouble(ft.indexedToReadable(getTermText(te, i)));
+        allstats.accumulate(value, c);
+        //as we've parsed the termnum into a value, lets also accumulate fieldfacet statistics
+        for (FieldFacetStats f : finfo) {
+          f.accumulateTermNum(i, value);
+        }
+      }
+      te.close();
+      int c = SimpleFacets.getFieldMissingCount(searcher, baseDocs, field);
+      if (c > 0) {
+        allstats.addMissing(c);
+      }
+    }
+    if (allstats.getCount() > 0) {
+      if (finfo.length > 0) {
+        allstats.facets = new HashMap<String, Map<String, StatsValues>>();
+        for (FieldFacetStats f : finfo) {
+          allstats.facets.put(f.name, f.facetStatsValues);
+        }
+      }
+      return allstats;
+    } else {
+      return null;
+    }
+
+  }
+
+
+
 
   String getTermText(NumberedTermEnum te, int termNum) throws IOException {
     if (bigTerms.size() > 0) {

Modified: lucene/solr/trunk/src/test/org/apache/solr/handler/component/StatsComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/handler/component/StatsComponentTest.java?rev=813874&r1=813873&r2=813874&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/handler/component/StatsComponentTest.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/handler/component/StatsComponentTest.java Fri
Sep 11 15:21:05 2009
@@ -19,6 +19,7 @@
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.MapSolrParams;
 import org.apache.solr.common.params.StatsParams;
+import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
@@ -76,6 +77,69 @@
   }
 
 
+  public void testMVFieldStatisticsResult() throws Exception {
+    SolrCore core = h.getCore();
+
+    assertU(adoc("id", "1", "stats_ii", "-10", "stats_ii", "-100", "active_s", "true"));
+    assertU(adoc("id", "2", "stats_ii", "-20", "stats_ii", "200", "active_s", "true"));
+    assertU(adoc("id", "3", "stats_ii", "-30", "stats_ii", "-1", "active_s", "false"));
+    assertU(adoc("id", "4", "stats_ii", "-40", "stats_ii", "10", "active_s", "false"));
+    assertU(commit());
+
+
+    Map<String, String> args = new HashMap<String, String>();
+    args.put(CommonParams.Q, "*:*");
+    args.put(StatsParams.STATS, "true");
+    args.put(StatsParams.STATS_FIELD, "stats_ii");
+    args.put("indent", "true");
+    SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args));
+
+
+    assertQ("test statistics values", req
+            , "//double[@name='min'][.='-100.0']"
+            , "//double[@name='max'][.='200.0']"
+            , "//double[@name='sum'][.='9.0']"
+            , "//long[@name='count'][.='8']"
+            , "//long[@name='missing'][.='0']"
+            , "//double[@name='sumOfSquares'][.='53101.0']"
+            , "//double[@name='mean'][.='1.125']"
+            , "//double[@name='stddev'][.='87.08852228787508']"
+    );
+
+
+
+    args.put(StatsParams.STATS_FACET, "active_s");
+    req = new LocalSolrQueryRequest(core, new MapSolrParams(args));
+
+    assertQ("test statistics values", req
+            , "//double[@name='min'][.='-100.0']"
+            , "//double[@name='max'][.='200.0']"
+            , "//double[@name='sum'][.='9.0']"
+            , "//long[@name='count'][.='8']"
+            , "//long[@name='missing'][.='0']"
+            , "//double[@name='sumOfSquares'][.='53101.0']"
+            , "//double[@name='mean'][.='1.125']"
+            , "//double[@name='stddev'][.='87.08852228787508']"
+    );
+
+
+
+
+    assertQ("test value for active_s=true", req
+            , "//lst[@name='true']/double[@name='min'][.='-100.0']"
+            , "//lst[@name='true']/double[@name='max'][.='200.0']"
+            , "//lst[@name='true']/double[@name='sum'][.='70.0']"
+            , "//lst[@name='true']/long[@name='count'][.='4']"
+            , "//lst[@name='true']/long[@name='missing'][.='0']"
+            , "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']"
+            , "//lst[@name='true']/double[@name='mean'][.='17.5']"
+            , "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']"
+    );
+
+
+  }
+
+
   public void testFieldStatisticsMissingResult() throws Exception {
     SolrCore core = h.getCore();
     assertU(adoc("id", "1", "stats_i", "-10"));

Modified: lucene/solr/trunk/src/test/test-files/solr/conf/schema11.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema11.xml?rev=813874&r1=813873&r2=813874&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema11.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema11.xml Fri Sep 11 15:21:05 2009
@@ -286,6 +286,7 @@
         both match, the first appearing in the schema will be used.  -->
    <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
    <dynamicField name="*_ss"  type="string"  indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="*_ii"  type="integer"    indexed="true"  stored="true" multiValued="true"/>
    <dynamicField name="*_i"  type="sint"    indexed="true"  stored="true"/>
    <dynamicField name="*_l"  type="slong"   indexed="true"  stored="true"/>
    <dynamicField name="*_f"  type="sfloat"  indexed="true"  stored="true"/>



Mime
View raw message