lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hoss...@apache.org
Subject svn commit: r1668926 - in /lucene/dev/branches/branch_5x: ./ dev-tools/ lucene/ lucene/analysis/ lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/ lucene...
Date Tue, 24 Mar 2015 16:23:52 GMT
Author: hossman
Date: Tue Mar 24 16:23:50 2015
New Revision: 1668926

URL: http://svn.apache.org/r1668926
Log:
SOLR-6350: StatsComponent now supports Percentiles (merge r1668922)

Added:
    lucene/dev/branches/branch_5x/solr/licenses/t-digest-3.0.jar.sha1
      - copied unchanged from r1668922, lucene/dev/trunk/solr/licenses/t-digest-3.0.jar.sha1
    lucene/dev/branches/branch_5x/solr/licenses/t-digest-LICENSE-ASL.txt
      - copied unchanged from r1668922, lucene/dev/trunk/solr/licenses/t-digest-LICENSE-ASL.txt
    lucene/dev/branches/branch_5x/solr/licenses/t-digest-NOTICE.txt
      - copied unchanged from r1668922, lucene/dev/trunk/solr/licenses/t-digest-NOTICE.txt
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/dev-tools/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/BUILD.txt   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_5x/lucene/JRE_VERSION_MIGRATION.txt   (props changed)
    lucene/dev/branches/branch_5x/lucene/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_5x/lucene/MIGRATE.txt   (props changed)
    lucene/dev/branches/branch_5x/lucene/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_5x/lucene/README.txt   (props changed)
    lucene/dev/branches/branch_5x/lucene/SYSTEM_REQUIREMENTS.txt   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/Lucene47WordDelimiterFilter.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/ASCIITLD.jflex-macro   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/SUPPLEMENTARY.jflex-macro   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizerImpl40.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizerImpl40.jflex   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/UAX29URLEmailTokenizerImpl40.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/UAX29URLEmailTokenizerImpl40.jflex   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/package.html   (props changed)
    lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/backward-codecs/   (props changed)
    lucene/dev/branches/branch_5x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_5x/lucene/build.xml   (props changed)
    lucene/dev/branches/branch_5x/lucene/classification/   (props changed)
    lucene/dev/branches/branch_5x/lucene/classification/build.xml   (props changed)
    lucene/dev/branches/branch_5x/lucene/classification/ivy.xml   (props changed)
    lucene/dev/branches/branch_5x/lucene/classification/src/   (props changed)
    lucene/dev/branches/branch_5x/lucene/codecs/   (props changed)
    lucene/dev/branches/branch_5x/lucene/common-build.xml   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions2.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestSort.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestTotalHitCountCollector.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/demo/   (props changed)
    lucene/dev/branches/branch_5x/lucene/expressions/   (props changed)
    lucene/dev/branches/branch_5x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_5x/lucene/grouping/   (props changed)
    lucene/dev/branches/branch_5x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_5x/lucene/ivy-ignore-conflicts.properties   (props changed)
    lucene/dev/branches/branch_5x/lucene/ivy-settings.xml   (props changed)
    lucene/dev/branches/branch_5x/lucene/ivy-versions.properties   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/join/   (props changed)
    lucene/dev/branches/branch_5x/lucene/licenses/   (props changed)
    lucene/dev/branches/branch_5x/lucene/memory/   (props changed)
    lucene/dev/branches/branch_5x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_5x/lucene/module-build.xml   (props changed)
    lucene/dev/branches/branch_5x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionQuerySort.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/queryparser/   (props changed)
    lucene/dev/branches/branch_5x/lucene/replicator/   (props changed)
    lucene/dev/branches/branch_5x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_5x/lucene/site/   (props changed)
    lucene/dev/branches/branch_5x/lucene/spatial/   (props changed)
    lucene/dev/branches/branch_5x/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/   (props changed)
    lucene/dev/branches/branch_5x/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeFacetCounter.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/spatial/src/java/org/apache/lucene/spatial/util/ShapeAreaValueSource.java   (props changed)
    lucene/dev/branches/branch_5x/lucene/spatial/src/test-files/data/simple-bbox.txt   (props changed)
    lucene/dev/branches/branch_5x/lucene/spatial/src/test-files/simple-Queries-BBox.txt   (props changed)
    lucene/dev/branches/branch_5x/lucene/spatial/src/test/org/apache/lucene/spatial/bbox/   (props changed)
    lucene/dev/branches/branch_5x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_5x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/   (props changed)
    lucene/dev/branches/branch_5x/lucene/tools/   (props changed)
    lucene/dev/branches/branch_5x/lucene/version.properties   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_5x/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_5x/solr/README.txt   (props changed)
    lucene/dev/branches/branch_5x/solr/bin/   (props changed)
    lucene/dev/branches/branch_5x/solr/build.xml   (props changed)
    lucene/dev/branches/branch_5x/solr/cloud-dev/   (props changed)
    lucene/dev/branches/branch_5x/solr/common-build.xml   (props changed)
    lucene/dev/branches/branch_5x/solr/contrib/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/ivy.xml
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/request/DocValuesStats.java   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaManager.java   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/core/TestConfig.java   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
    lucene/dev/branches/branch_5x/solr/example/   (props changed)
    lucene/dev/branches/branch_5x/solr/licenses/   (props changed)
    lucene/dev/branches/branch_5x/solr/licenses/httpclient-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_5x/solr/licenses/httpclient-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_5x/solr/licenses/httpcore-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_5x/solr/licenses/httpcore-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_5x/solr/licenses/httpmime-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_5x/solr/licenses/httpmime-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_5x/solr/scripts/   (props changed)
    lucene/dev/branches/branch_5x/solr/server/   (props changed)
    lucene/dev/branches/branch_5x/solr/site/   (props changed)
    lucene/dev/branches/branch_5x/solr/site/SYSTEM_REQUIREMENTS.mdtext   (props changed)
    lucene/dev/branches/branch_5x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java
    lucene/dev/branches/branch_5x/solr/test-framework/   (props changed)
    lucene/dev/branches/branch_5x/solr/webapp/   (props changed)

Modified: lucene/dev/branches/branch_5x/lucene/ivy-versions.properties
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/ivy-versions.properties?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/ivy-versions.properties (original)
+++ lucene/dev/branches/branch_5x/lucene/ivy-versions.properties Tue Mar 24 16:23:50 2015
@@ -50,6 +50,8 @@ com.sun.jersey.version = 1.9
 /com.sun.mail/javax.mail = 1.5.1
 
 /com.sun.xml.bind/jaxb-impl = 2.2.3-1
+
+/com.tdunning/t-digest = 3.0
 /com.thoughtworks.paranamer/paranamer = 2.3
 /com.typesafe/config = 1.0.2
 /com.uwyn/jhighlight = 1.0
@@ -243,3 +245,4 @@ org.slf4j.version = 1.7.7
 /org.xerial.snappy/snappy-java = 1.0.5
 /rome/rome = 1.0
 /xerces/xercesImpl = 2.9.1
+

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Tue Mar 24 16:23:50 2015
@@ -146,6 +146,8 @@ New Features
 * SOLR-7245: Temporary ZK election or connection loss should not stall indexing
   due to leader initiated recovery (Ramkumar Aiyengar)
 
+* SOLR-6350: StatsComponent now supports Percentiles (Xu Zhang, hossman)
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/branches/branch_5x/solr/core/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/ivy.xml?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/ivy.xml (original)
+++ lucene/dev/branches/branch_5x/solr/core/ivy.xml Tue Mar 24 16:23:50 2015
@@ -87,6 +87,9 @@
     <dependency org="org.apache.hadoop" name="hadoop-minikdc" rev="${/org.apache.hadoop/hadoop-minikdc}" conf="test.MiniKdc"/>
     <dependency org="org.apache.directory.server" name="apacheds-all" rev="${/org.apache.directory.server/apacheds-all}" conf="test.MiniKdc"/>
 
+    <!-- StatsComponents percentiles Dependencies-->
+    <dependency org="com.tdunning" name="t-digest" rev="${/com.tdunning/t-digest}" conf="compile->*"/>
+
     <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
   </dependencies>
 </ivy-module>

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java Tue Mar 24 16:23:50 2015
@@ -52,8 +52,6 @@ public class StatsComponent extends Sear
   @Override
   public void process(ResponseBuilder rb) throws IOException {
     if (!rb.doStats) return;
-    
-    boolean isShard = rb.req.getParams().getBool(ShardParams.IS_SHARD, false);
     Map<String, StatsValues> statsValues = new LinkedHashMap<>();
 
     for (StatsField statsField : rb._statsInfo.getStatsFields()) {

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java Tue Mar 24 16:23:50 2015
@@ -81,8 +81,34 @@ public class StatsField {
     mean(false, sum, count),
     sumOfSquares(true),
     stddev(false, sum, count, sumOfSquares),
-    calcdistinct(true);
-    
+    calcdistinct(true),
+    percentiles(true){
+      /** special for percentiles **/
+      boolean parseParams(StatsField sf) {
+        String percentileParas = sf.localParams.get(this.name());
+        if (percentileParas != null) {
+          List<Double> percentiles = new ArrayList<Double>();
+          try {
+            for (String percentile : StrUtils.splitSmart(percentileParas, ',')) {
+              percentiles.add(Double.parseDouble(percentile));
+            }
+            if (!percentiles.isEmpty()) {
+              sf.percentilesList.addAll(percentiles);
+              sf.tdigestCompression = sf.localParams.getDouble("tdigestCompression", 
+                                                               sf.tdigestCompression);
+              return true;
+            }
+          } catch (NumberFormatException e) {
+            throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to parse "
+                + StatsParams.STATS_FIELD + " local params: " + sf.localParams + " due to: "
+                + e.getMessage(), e);
+          }
+
+        }
+        return false;
+      }
+    };
+
     private final List<Stat> distribDeps;
     
     /**
@@ -123,6 +149,12 @@ public class StatsField {
     public EnumSet<Stat> getDistribDeps() {
       return EnumSet.copyOf(this.distribDeps);
     }
+    
+    /** return value of true means user is requesting this stat */
+    boolean parseParams(StatsField sf) {
+      return sf.localParams.getBool(this.name(), false);
+    }
+    
   }
 
   /**
@@ -144,8 +176,12 @@ public class StatsField {
   private final List<String> excludeTagList;
   private final EnumSet<Stat> statsToCalculate = EnumSet.noneOf(Stat.class);
   private final EnumSet<Stat> statsInResponse = EnumSet.noneOf(Stat.class);
+  private final List<Double> percentilesList= new ArrayList<Double>();
   private final boolean isShard;
-
+  
+  private double tdigestCompression = 100.0D;
+  
+  
   /**
    * @param rb the current request/response
    * @param statsParam the raw {@link StatsParams#STATS_FIELD} string
@@ -168,7 +204,6 @@ public class StatsField {
 
       this.localParams = localParams;
       
-
       String parserName = localParams.get(QueryParsing.TYPE);
       SchemaField sf = null;
       ValueSource vs = null;
@@ -220,7 +255,7 @@ public class StatsField {
     this.topLevelCalcDistinct = null == schemaField
         ? params.getBool(StatsParams.STATS_CALC_DISTINCT, false) 
         : params.getFieldBool(schemaField.getName(), StatsParams.STATS_CALC_DISTINCT, false);
-        
+
     populateStatsSets();
         
     String[] facets = params.getFieldParams(key, StatsParams.STATS_FACET);
@@ -451,30 +486,28 @@ public class StatsField {
     return "StatsField<" + originalParam + ">";
   }
 
-
   /**
    * A helper method which inspects the {@link #localParams} associated with this StatsField, 
    * and uses them to populate the {@link #statsInResponse} and {@link #statsToCalculate} data 
    * structures
    */
   private void populateStatsSets() {
-    
     boolean statSpecifiedByLocalParam = false;
     // local individual stat
     Iterator<String> itParams = localParams.getParameterNamesIterator();
+    
     while (itParams.hasNext()) {
       String paramKey = itParams.next();
-        Stat stat = Stat.forName(paramKey);
-        if (stat != null) {
-          statSpecifiedByLocalParam = true;
-          // TODO: this isn't going to work for planned "non-boolean' stats - eg: SOLR-6350, SOLR-6968
-          if (localParams.getBool(paramKey, false)) {
-            statsInResponse.add(stat);
-            statsToCalculate.addAll(stat.getDistribDeps());
-          }
+      Stat stat = Stat.forName(paramKey);
+      if (stat != null) {
+        statSpecifiedByLocalParam = true;
+        if (stat.parseParams(this)) {
+          statsInResponse.add(stat);
+          statsToCalculate.addAll(stat.getDistribDeps());
         }
+      }
     }
-    
+
     // if no individual stat setting. 
     if ( ! statSpecifiedByLocalParam ) {
       statsInResponse.addAll(DEFAULT_STATS);
@@ -505,5 +538,15 @@ public class StatsField {
     return false;
   }
 
-
+  public List<Double> getPercentilesList() {
+    return percentilesList;
+  }
+  
+  public boolean getIsShard() {
+    return isShard;
+  }
+  
+  public double getTdigestCompression() {
+    return tdigestCompression;
+  }
 }

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java Tue Mar 24 16:23:50 2015
@@ -19,6 +19,7 @@ package org.apache.solr.handler.componen
 
 import java.io.IOException;
 import java.util.*;
+import java.nio.ByteBuffer;
 
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.queries.function.FunctionValues;
@@ -31,26 +32,33 @@ import org.apache.solr.common.util.Simpl
 import org.apache.solr.handler.component.StatsField.Stat;
 import org.apache.solr.schema.*;
 
+import com.tdunning.math.stats.AVLTreeDigest;
+
 /**
- * Factory class for creating instance of {@link org.apache.solr.handler.component.StatsValues}
+ * Factory class for creating instance of 
+ * {@link org.apache.solr.handler.component.StatsValues}
  */
 public class StatsValuesFactory {
 
   /**
-   * Creates an instance of StatsValues which supports values from the specified {@link StatsField}
+   * Creates an instance of StatsValues which supports values from the specified 
+   * {@link StatsField}
    *
-   * @param statsField {@link StatsField} whose statistics will be created by the resulting {@link StatsValues}
-   * @return Instance of {@link StatsValues} that will create statistics from values from the specified {@link StatsField}
+   * @param statsField
+   *          {@link StatsField} whose statistics will be created by the
+   *          resulting {@link StatsValues}
+   * @return Instance of {@link StatsValues} that will create statistics from
+   *         values from the specified {@link StatsField}
    */
   public static StatsValues createStatsValues(StatsField statsField) {
-
+    
     final SchemaField sf = statsField.getSchemaField();
-
+    
     if (null == sf) {
       // function stats
       return new NumericStatsValues(statsField);
-    } 
-
+    }
+    
     final FieldType fieldType = sf.getType(); // TODO: allow FieldType to provide impl.
     
     if (TrieDateField.class.isInstance(fieldType)) {
@@ -62,27 +70,32 @@ public class StatsValuesFactory {
     } else if (sf.getType().getClass().equals(EnumField.class)) {
       return new EnumStatsValues(statsField);
     } else {
-      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field type " + fieldType + " is not currently supported");
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+          "Field type " + fieldType + " is not currently supported");
     }
   }
 }
 
 /**
- * Abstract implementation of {@link org.apache.solr.handler.component.StatsValues} 
- * that provides the default behavior for most StatsValues implementations.
+ * Abstract implementation of
+ * {@link org.apache.solr.handler.component.StatsValues} that provides the
+ * default behavior for most StatsValues implementations.
  *
- * There are very few requirements placed on what statistics concrete implementations 
- * should collect, with the only required statistics being the minimum and maximum values.
+ * There are very few requirements placed on what statistics concrete
+ * implementations should collect, with the only required statistics being the
+ * minimum and maximum values.
  */
 abstract class AbstractStatsValues<T> implements StatsValues {
   private static final String FACETS = "facets";
-
+  
   /** Tracks all data about tthe stats we need to collect */
   final protected StatsField statsField;
 
   /** may be null if we are collecting stats directly from a function ValueSource */
   final protected SchemaField sf;
-  /** may be null if we are collecting stats directly from a function ValueSource */
+  /**
+   * may be null if we are collecting stats directly from a function ValueSource
+   */
   final protected FieldType ft;
 
   // final booleans from StatsField to allow better inlining & JIT optimizing
@@ -99,17 +112,17 @@ abstract class AbstractStatsValues<T> im
    * {@link #setNextReader} is called at least once
    */
   private ValueSource valueSource;
-  /** 
-   * Context to use when retrieving FunctionValues, will be null until/unless 
+  /**
+   * Context to use when retrieving FunctionValues, will be null until/unless
    * {@link #setNextReader} is called at least once
    */
   private Map vsContext;
-  /** 
-   * Values to collect, will be null until/unless {@link #setNextReader} is called 
-   * at least once 
+  /**
+   * Values to collect, will be null until/unless {@link #setNextReader} is
+   * called at least once
    */
   protected FunctionValues values;
-
+  
   protected T max;
   protected T min;
   protected long missing;
@@ -117,9 +130,9 @@ abstract class AbstractStatsValues<T> im
   protected long countDistinct;
   protected final Set<T> distinctValues;
   
-  // facetField   facetValue
-  protected Map<String, Map<String, StatsValues>> facets = new HashMap<>();
-
+  // facetField facetValue
+  protected Map<String,Map<String, StatsValues>> facets = new HashMap<>();
+  
   protected AbstractStatsValues(StatsField statsField) {
     this.statsField = statsField;
     this.computeCount = statsField.calculateStats(Stat.count);
@@ -136,10 +149,11 @@ abstract class AbstractStatsValues<T> im
     // duplicate code between "NumericSchemaFieldStatsValues" and 
     // "NumericValueSourceStatsValues" which would have diff parent classes
     //
-    // part of the complexity here being that the StatsValues API serves two 
-    // masters: collecting concrete Values from things like DocValuesStats and 
-    // the distributed aggregation logic, but also collecting docIds which it then
-    // uses to go out and pull concreate values from the ValueSource 
+    // part of the complexity here being that the StatsValues API serves two
+    // masters: collecting concrete Values from things like DocValuesStats and
+    // the distributed aggregation logic, but also collecting docIds which it
+    // then
+    // uses to go out and pull concreate values from the ValueSource
     // (from a func, or single valued field)
     if (null != statsField.getSchemaField()) {
       assert null == statsField.getValueSource();
@@ -152,7 +166,7 @@ abstract class AbstractStatsValues<T> im
       this.ft = null;
     }
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -173,12 +187,12 @@ abstract class AbstractStatsValues<T> im
       updateMinMax((T) stv.get("min"), (T) stv.get("max"));
     }
     updateTypeSpecificStats(stv);
-
+    
     NamedList f = (NamedList) stv.get(FACETS);
     if (f == null) {
       return;
     }
-
+    
     for (int i = 0; i < f.size(); i++) {
       String field = f.getName(i);
       NamedList vals = (NamedList) f.getVal(i);
@@ -198,16 +212,18 @@ abstract class AbstractStatsValues<T> im
       }
     }
   }
-
+  
   /**
    * {@inheritDoc}
    */
   @Override
   public void accumulate(BytesRef value, int count) {
     if (null == ft) {
-      throw new IllegalStateException("Can't collect & convert BytesRefs on stats that do't use a a FieldType: " + statsField);
+      throw new IllegalStateException(
+          "Can't collect & convert BytesRefs on stats that do't use a a FieldType: "
+              + statsField);
     }
-    T typedValue = (T)ft.toObject(sf, value);
+    T typedValue = (T) ft.toObject(sf, value);
     accumulate(typedValue, count);
   }
 
@@ -224,7 +240,7 @@ abstract class AbstractStatsValues<T> im
     }
     updateTypeSpecificStats(value, count);
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -234,7 +250,7 @@ abstract class AbstractStatsValues<T> im
       missing++;
     }
   }
-   
+  
   /**
    * {@inheritDoc}
    */
@@ -242,7 +258,7 @@ abstract class AbstractStatsValues<T> im
   public void addMissing(int count) {
     missing += count;
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -250,7 +266,7 @@ abstract class AbstractStatsValues<T> im
   public void addFacet(String facetName, Map<String, StatsValues> facetValues) {
     facets.put(facetName, facetValues);
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -274,113 +290,138 @@ abstract class AbstractStatsValues<T> im
       res.add("distinctValues", distinctValues);
       res.add("countDistinct", countDistinct);
     }
-
+    
     addTypeSpecificStats(res);
     
     if (!facets.isEmpty()) {
-
+      
       // add the facet stats
-     NamedList<NamedList<?>> nl = new SimpleOrderedMap<>();
-     for (Map.Entry<String, Map<String, StatsValues>> entry : facets.entrySet()) {
-       NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<>();
-       nl.add(entry.getKey(), nl2);
-       for (Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet()) {
-         nl2.add(e2.getKey(), e2.getValue().getStatsValues());
-       }
-     }
-     res.add(FACETS, nl);
+      NamedList<NamedList<?>> nl = new SimpleOrderedMap<>();
+      for (Map.Entry<String,Map<String,StatsValues>> entry : facets.entrySet()) {
+        NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<>();
+        nl.add(entry.getKey(), nl2);
+        for (Map.Entry<String,StatsValues> e2 : entry.getValue().entrySet()) {
+          nl2.add(e2.getKey(), e2.getValue().getStatsValues());
+        }
+      }
+
+      res.add(FACETS, nl);
     }
 
     return res;
   }
-
+  
   /**
    * {@inheritDoc}
    */
-   public void setNextReader(LeafReaderContext ctx) throws IOException {
+  public void setNextReader(LeafReaderContext ctx) throws IOException {
     if (valueSource == null) {
       // first time we've collected local values, get the right ValueSource
       valueSource = (null == ft) 
-        ? statsField.getValueSource()
+        ? statsField.getValueSource() 
         : ft.getValueSource(sf, null);
       vsContext = ValueSource.newContext(statsField.getSearcher());
     }
     values = valueSource.getValues(vsContext, ctx);
   }
-
+  
   /**
    * Updates the minimum and maximum statistics based on the given values
    *
-   * @param min Value that the current minimum should be updated against
-   * @param max Value that the current maximum should be updated against
+   * @param min
+   *          Value that the current minimum should be updated against
+   * @param max
+   *          Value that the current maximum should be updated against
    */
   protected abstract void updateMinMax(T min, T max);
-
+  
   /**
    * Updates the type specific statistics based on the given value
    *
-   * @param value Value the statistics should be updated against
-   * @param count Number of times the value is being accumulated
+   * @param value
+   *          Value the statistics should be updated against
+   * @param count
+   *          Number of times the value is being accumulated
    */
   protected abstract void updateTypeSpecificStats(T value, int count);
-
+  
   /**
    * Updates the type specific statistics based on the values in the given list
    *
-   * @param stv List containing values the current statistics should be updated against
+   * @param stv
+   *          List containing values the current statistics should be updated
+   *          against
    */
   protected abstract void updateTypeSpecificStats(NamedList stv);
-
+  
   /**
    * Add any type specific statistics to the given NamedList
    *
-   * @param res NamedList to add the type specific statistics too
+   * @param res
+   *          NamedList to add the type specific statistics too
    */
   protected abstract void addTypeSpecificStats(NamedList<Object> res);
 }
 
- /**
+/**
  * Implementation of StatsValues that supports Double values
  */
 class NumericStatsValues extends AbstractStatsValues<Number> {
-
+  
   double sum;
   double sumOfSquares;
+  
+  AVLTreeDigest tdigest;
 
   double minD; // perf optimization, only valid if (null != this.min)
   double maxD; // perf optimization, only valid if (null != this.max)
-
+  
   final protected boolean computeSum;
   final protected boolean computeSumOfSquares;
+  final protected boolean computePercentiles;
 
   public NumericStatsValues(StatsField statsField) {
     super(statsField);
+
     this.computeSum = statsField.calculateStats(Stat.sum);
     this.computeSumOfSquares = statsField.calculateStats(Stat.sumOfSquares);
+    
+    this.computePercentiles = statsField.calculateStats(Stat.percentiles);
+    if ( computePercentiles ) {
+      
+      tdigest = new AVLTreeDigest(statsField.getTdigestCompression()); 
+    }
   }
-
+  
   @Override
   public void accumulate(int docID) {
     if (values.exists(docID)) {
-      accumulate((Number) values.objectVal(docID), 1);
+      Number value = (Number) values.objectVal(docID);
+      accumulate(value, 1);
     } else {
       missing();
     }
   }
-
+  
   /**
    * {@inheritDoc}
    */
   @Override
   public void updateTypeSpecificStats(NamedList stv) {
     if (computeSum) {
-      sum += ((Number)stv.get("sum")).doubleValue();
+      sum += ((Number) stv.get("sum")).doubleValue();
     }
     if (computeSumOfSquares) {
-      sumOfSquares += ((Number)stv.get("sumOfSquares")).doubleValue();
+      sumOfSquares += ((Number) stv.get("sumOfSquares")).doubleValue();
+    }
+    
+    if (computePercentiles) {      
+      byte[] data = (byte[]) stv.get("percentiles");
+      ByteBuffer buf = ByteBuffer.wrap(data);
+      tdigest.add(AVLTreeDigest.fromBytes(buf));
     }
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -393,9 +434,12 @@ class NumericStatsValues extends Abstrac
     if (computeSum) {
       sum += value * count;
     }
+    if (computePercentiles) {
+      tdigest.add(value, count);
+    }
   }
-
-   /**
+  
+  /**
    * {@inheritDoc}
    */
   @Override
@@ -424,11 +468,13 @@ class NumericStatsValues extends Abstrac
       }
     }
   }
-
+  
   /**
-   * Adds sum, sumOfSquares, mean and standard deviation statistics to the given NamedList
+   * Adds sum, sumOfSquares, mean, stddev, and percentiles to the given
+   * NamedList
    *
-   * @param res NamedList to add the type specific statistics too
+   * @param res
+   *          NamedList to add the type specific statistics too
    */
   @Override
   protected void addTypeSpecificStats(NamedList<Object> res) {
@@ -444,8 +490,32 @@ class NumericStatsValues extends Abstrac
     if (statsField.includeInResponse(Stat.stddev)) {
       res.add("stddev", getStandardDeviation());
     }
+    if (statsField.includeInResponse(Stat.percentiles)) {
+      if (statsField.getIsShard()) {
+        // as of current t-digest version, smallByteSize() internally does a full conversion in 
+        // order to determine what the size is (can't be precomputed?) .. so rather then
+        // serialize to a ByteBuffer twice, allocate the max possible size buffer,
+        // serialize once, and then copy only the byte[] subset that we need, and free up the buffer
+        ByteBuffer buf = ByteBuffer.allocate(tdigest.byteSize()); // upper bound
+        tdigest.asSmallBytes(buf);
+        res.add("percentiles", Arrays.copyOf(buf.array(), buf.position()) );
+      } else {
+        NamedList<Object> percentileNameList = new NamedList<Object>();
+        for (Double percentile : statsField.getPercentilesList()) {
+          // Empty document set case
+          if (tdigest.size() == 0) {
+            percentileNameList.add(percentile.toString(), null);
+          } else {
+            Double cutoff = tdigest.quantile(percentile / 100);
+            percentileNameList.add(percentile.toString(), cutoff);
+          }
+        }
+        res.add("percentiles", percentileNameList);
+      }
+    }
   }
-
+  
+  
   /**
    * Calculates the standard deviation statistic
    *
@@ -455,8 +525,9 @@ class NumericStatsValues extends Abstrac
     if (count <= 1.0D) {
       return 0.0D;
     }
-
+    
     return Math.sqrt(((count * sumOfSquares) - (sum * sum)) / (count * (count - 1.0D)));
+                     
   }
 }
 
@@ -464,11 +535,11 @@ class NumericStatsValues extends Abstrac
  * Implementation of StatsValues that supports EnumField values
  */
 class EnumStatsValues extends AbstractStatsValues<EnumFieldValue> {
-
+  
   public EnumStatsValues(StatsField statsField) {
     super(statsField);
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -483,7 +554,7 @@ class EnumStatsValues extends AbstractSt
       missing();
     }
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -503,7 +574,7 @@ class EnumStatsValues extends AbstractSt
       }
     }
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -511,7 +582,7 @@ class EnumStatsValues extends AbstractSt
   protected void updateTypeSpecificStats(NamedList stv) {
     // No type specific stats
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -519,7 +590,7 @@ class EnumStatsValues extends AbstractSt
   protected void updateTypeSpecificStats(EnumFieldValue value, int count) {
     // No type specific stats
   }
-
+  
   /**
    * Adds no type specific statistics
    */
@@ -527,19 +598,17 @@ class EnumStatsValues extends AbstractSt
   protected void addTypeSpecificStats(NamedList<Object> res) {
     // Add no statistics
   }
-
-
+  
 }
 
 /**
- * /**
- * Implementation of StatsValues that supports Date values
+ * /** Implementation of StatsValues that supports Date values
  */
 class DateStatsValues extends AbstractStatsValues<Date> {
-
+  
   private long sum = 0;
   double sumOfSquares = 0;
-
+  
   final protected boolean computeSum;
   final protected boolean computeSumOfSquares;
 
@@ -548,7 +617,7 @@ class DateStatsValues extends AbstractSt
     this.computeSum = statsField.calculateStats(Stat.sum);
     this.computeSumOfSquares = statsField.calculateStats(Stat.sumOfSquares);
   }
-
+  
   @Override
   public void accumulate(int docID) {
     if (values.exists(docID)) {
@@ -557,7 +626,7 @@ class DateStatsValues extends AbstractSt
       missing();
     }
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -567,10 +636,10 @@ class DateStatsValues extends AbstractSt
       sum += ((Date) stv.get("sum")).getTime();
     }
     if (computeSumOfSquares) {
-      sumOfSquares += ((Number)stv.get("sumOfSquares")).doubleValue();
+      sumOfSquares += ((Number) stv.get("sumOfSquares")).doubleValue();
     }
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -584,8 +653,8 @@ class DateStatsValues extends AbstractSt
       sum += value * count;
     }
   }
-
-   /**
+  
+  /**
    * {@inheritDoc}
    */
   @Override
@@ -601,11 +670,12 @@ class DateStatsValues extends AbstractSt
       }
     }
   }
-
+  
   /**
    * Adds sum and mean statistics to the given NamedList
    *
-   * @param res NamedList to add the type specific statistics too
+   * @param res
+   *          NamedList to add the type specific statistics too
    */
   @Override
   protected void addTypeSpecificStats(NamedList<Object> res) {
@@ -623,10 +693,9 @@ class DateStatsValues extends AbstractSt
     }
   }
   
-
-  
   /**
-   * Calculates the standard deviation.  For dates, this is really the MS deviation
+   * Calculates the standard deviation. For dates, this is really the MS
+   * deviation
    *
    * @return Standard deviation statistic
    */
@@ -634,7 +703,8 @@ class DateStatsValues extends AbstractSt
     if (count <= 1) {
       return 0.0D;
     }
-    return Math.sqrt(((count * sumOfSquares) - (sum * sum)) / (count * (count - 1.0D)));
+    return Math.sqrt(((count * sumOfSquares) - (sum * sum))
+        / (count * (count - 1.0D)));
   }
 }
 
@@ -642,24 +712,25 @@ class DateStatsValues extends AbstractSt
  * Implementation of StatsValues that supports String values
  */
 class StringStatsValues extends AbstractStatsValues<String> {
-
+  
   public StringStatsValues(StatsField statsField) {
     super(statsField);
   }
-
+  
   @Override
   public void accumulate(int docID) {
     if (values.exists(docID)) {
       String value = values.strVal(docID);
-      if (value != null)
+      if (value != null) {
         accumulate(value, 1);
-      else
+      } else { 
         missing();
+      }
     } else {
       missing();
     }
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -667,7 +738,7 @@ class StringStatsValues extends Abstract
   protected void updateTypeSpecificStats(NamedList stv) {
     // No type specific stats
   }
-
+  
   /**
    * {@inheritDoc}
    */
@@ -675,8 +746,8 @@ class StringStatsValues extends Abstract
   protected void updateTypeSpecificStats(String value, int count) {
     // No type specific stats
   }
-
-   /**
+  
+  /**
    * {@inheritDoc}
    */
   @Override
@@ -688,7 +759,7 @@ class StringStatsValues extends Abstract
       this.max = max(this.max, max);
     }
   }
-
+  
   /**
    * Adds no type specific statistics
    */
@@ -696,13 +767,17 @@ class StringStatsValues extends Abstract
   protected void addTypeSpecificStats(NamedList<Object> res) {
     // Add no statistics
   }
-
-  /** 
-   * Determines which of the given Strings is the maximum, as computed by {@link String#compareTo(String)}
+  
+  /**
+   * Determines which of the given Strings is the maximum, as computed by
+   * {@link String#compareTo(String)}
    *
-   * @param str1 String to compare against b
-   * @param str2 String compared against a
-   * @return str1 if it is considered greater by {@link String#compareTo(String)}, str2 otherwise
+   * @param str1
+   *          String to compare against b
+   * @param str2
+   *          String compared against a
+   * @return str1 if it is considered greater by
+   *         {@link String#compareTo(String)}, str2 otherwise
    */
   private static String max(String str1, String str2) {
     if (str1 == null) {
@@ -712,13 +787,17 @@ class StringStatsValues extends Abstract
     }
     return (str1.compareTo(str2) > 0) ? str1 : str2;
   }
-
+  
   /**
-   * Determines which of the given Strings is the minimum, as computed by {@link String#compareTo(String)}
+   * Determines which of the given Strings is the minimum, as computed by
+   * {@link String#compareTo(String)}
    *
-   * @param str1 String to compare against b
-   * @param str2 String compared against a
-   * @return str1 if it is considered less by {@link String#compareTo(String)}, str2 otherwise
+   * @param str1
+   *          String to compare against b
+   * @param str2
+   *          String compared against a
+   * @return str1 if it is considered less by {@link String#compareTo(String)},
+   *         str2 otherwise
    */
   private static String min(String str1, String str2) {
     if (str1 == null) {

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java Tue Mar 24 16:23:50 2015
@@ -51,6 +51,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.EnumSet;
@@ -392,6 +393,48 @@ public class TestDistributedSearch exten
     query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", i1);
     query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_a);
     query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_b);
+    
+    query("q", "*:*", "sort", i1 + " desc", "stats", "true", "stats.field",
+        "{!percentiles='1,2,3,4,5'}" + i1);
+    
+    query("q", "*:*", "sort", i1 + " desc", "stats", "true", "stats.field",
+        "{!percentiles='1,20,30,40,98,99,99.9'}" + i1);
+    
+    rsp = query("q", "*:*", "sort", i1 + " desc", "stats", "true", "stats.field",
+                "{!percentiles='1.0,99.999,0.001'}" + tlong);
+    { // don't leak variabls
+      Double[] expectedKeys = new Double[] { 1.0D, 99.999D, 0.001D };
+      Double[] expectedVals = new Double[] { 2.0D, 4320.0D, 2.0D }; 
+      FieldStatsInfo s = rsp.getFieldStatsInfo().get(tlong);
+      assertNotNull("no stats for " + tlong, s);
+
+      Map<Double,Double> p = s.getPercentiles();
+      assertNotNull("no percentils", p);
+      assertEquals("insufficient percentiles", expectedKeys.length, p.size());
+      Iterator<Double> actualKeys = p.keySet().iterator();
+      for (int i = 0; i < expectedKeys.length; i++) {
+        Double expectedKey = expectedKeys[i];
+        assertTrue("Ran out of actual keys as of : "+ i + "->" +expectedKey,
+                   actualKeys.hasNext());
+        assertEquals(expectedKey, actualKeys.next());
+        assertEquals("percentiles are off: " + p.toString(),
+                     expectedVals[i], p.get(expectedKey), 1.0D);
+      }
+
+      //
+      assertNull("expected null for count", s.getMin());
+      assertNull("expected null for count", s.getMean());
+      assertNull("expected null for count", s.getCount());
+      assertNull("expected null for calcDistinct", s.getCountDistinct());
+      assertNull("expected null for distinct vals", s.getDistinctValues());
+      assertNull("expected null for max", s.getMax());
+      assertNull("expected null for missing", s.getMissing());
+      assertNull("expected null for stddev", s.getStddev());
+      assertNull("expected null for sum", s.getSum());
+    }
+    
+    query("q", "*:*", "sort", i1 + " desc", "stats", "true", "stats.field",
+        "{!percentiles='1,20,50,80,99'}" + tdate_a);
 
     query("q","*:*", "sort",i1+" desc", "stats", "true", 
           "fq", "{!tag=nothing}-*:*",
@@ -437,6 +480,7 @@ public class TestDistributedSearch exten
       assertNull("expected null for missing", s.getMissing());
       assertNull("expected null for stddev", s.getStddev());
       assertNull("expected null for sum", s.getSum());
+      assertNull("expected null for percentiles", s.getPercentiles());
 
       // sanity check deps relationship
       for (Stat dep : EnumSet.of(Stat.sum, Stat.count)) {
@@ -492,6 +536,7 @@ public class TestDistributedSearch exten
       assertNull("expected null for max", s.getMax());
       assertNull("expected null for missing", s.getMissing());
       assertNull("expected null for sum", s.getSum());
+      assertNull("expected null for percentiles", s.getPercentiles());
     }
 
     // request stats, but disable them all via param refs
@@ -512,6 +557,7 @@ public class TestDistributedSearch exten
       assertNull("expected null for max", s.getMax());
       assertNull("expected null for missing", s.getMissing());
       assertNull("expected null for sum", s.getSum());
+      assertNull("expected null for percentiles", s.getPercentiles());
     }
 
     final String[] stats = new String[] {
@@ -596,6 +642,7 @@ public class TestDistributedSearch exten
       assertNull(p+" expected null for missing", s.getMissing());
       assertNull(p+" expected null for stddev", s.getStddev());
       assertNull(p+" expected null for sum", s.getSum());
+      assertNull(p+" expected null for percentiles", s.getPercentiles());
       
     }
 
@@ -630,6 +677,7 @@ public class TestDistributedSearch exten
       assertNull(p+" expected null for missing", s.getMissing());
       assertNull(p+" expected null for stddev", s.getStddev());
       assertNull(p+" expected null for sum", s.getSum());
+      assertNull(p+"expected null for percentiles", s.getPercentiles());
       
     }
 
@@ -654,6 +702,7 @@ public class TestDistributedSearch exten
       assertNull("expected null for max", s.getMax());
       assertNull("expected null for missing", s.getMissing());
       assertNull("expected null for sum", s.getSum());
+      assertNull("expected null for percentiles", s.getPercentiles());
     }
 
     // look at stats on non numeric fields
@@ -662,7 +711,11 @@ public class TestDistributedSearch exten
     // result in no stats being computed but this at least lets us sanity check that for each 
     // of these field+stats(s) combinations we get consistent results between the distribted 
     // request and the single node situation.
-    EnumSet<Stat> allStats = EnumSet.allOf(Stat.class);
+    //
+    // NOTE: percentiles excluded because it doesn't support simple 'true/false' syntax
+    // (and since it doesn't work for non-numerics anyway, we aren't missing any coverage here)
+    EnumSet<Stat> allStats = EnumSet.complementOf(EnumSet.of(Stat.percentiles));
+
     int numTotalStatQueries = 0;
     // don't go overboard, just do all permutations of 1 or 2 stat params, for each field & query
     final int numStatParamsAtOnce = 2; 

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java Tue Mar 24 16:23:50 2015
@@ -16,6 +16,7 @@ package org.apache.solr.handler.componen
  * limitations under the License.
  */
 
+import java.nio.ByteBuffer;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
@@ -36,6 +37,9 @@ import org.apache.solr.common.params.Com
 import org.apache.solr.common.params.MapSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.StatsParams;
+import org.apache.solr.common.util.Base64;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.component.StatsField.Stat;
 import org.apache.solr.request.LocalSolrQueryRequest;
@@ -45,10 +49,10 @@ import org.apache.solr.schema.SchemaFiel
 import org.apache.solr.util.AbstractSolrTestCase;
 
 import org.apache.commons.math3.util.Combinations;
+import com.tdunning.math.stats.AVLTreeDigest;
 
 import org.junit.BeforeClass;
 
-
 /**
  * Statistics Component Test
  */
@@ -1051,7 +1055,7 @@ public class StatsComponentTest extends
               );
     }
   }
-
+  
   public void testEnumFieldTypeStatus() throws Exception {
     clearIndex();
     
@@ -1141,8 +1145,9 @@ public class StatsComponentTest extends
     assertU(adoc("id", "1", "a_f", "2.3", "b_f", "9.7", "a_i", "9", "foo_t", "how now brown cow"));
     assertU(commit());
     
+    AVLTreeDigest tdigest = new AVLTreeDigest(100);
+    
     // some quick sanity check assertions...
-
     // trivial check that we only get the exact 2 we ask for
     assertQ("ask for and get only 2 stats",
             req("q","*:*", "stats", "true",
@@ -1169,40 +1174,59 @@ public class StatsComponentTest extends
             , "count(" + kpre + "*)=0"
             );
 
-   double sum = 0;
-   double sumOfSquares = 0;
-   final int count = 20;
-   for (int i = 0; i < count; i++) {
-     assertU(adoc("id", String.valueOf(i), "a_f", "2.3", "b_f", "9.7", "a_i", String.valueOf(i%10), "foo_t", "how now brown cow"));
-     sum+=i%10;
-     sumOfSquares+=(i%10)*(i%10);
-   }
+    double sum = 0;
+    double sumOfSquares = 0;
+    final int count = 20;
+    for (int i = 0; i < count; i++) {
+      assertU(adoc("id", String.valueOf(i), "a_f", "2.3", "b_f", "9.7", "a_i",
+          String.valueOf(i % 10), "foo_t", "how now brown cow"));
+      tdigest.add(i % 10);
+      sum += i % 10;
+      sumOfSquares += (i % 10) * (i % 10);
+    }
    
-   assertU(commit());
-
-   EnumSet<Stat> allStats = EnumSet.allOf(Stat.class);
-
-   Map<Stat, String> expectedStats = new HashMap<>();
-   expectedStats.put(Stat.min, "0.0");
-   expectedStats.put(Stat.max, "9.0");
-   expectedStats.put(Stat.missing, "0");
-   expectedStats.put(Stat.sum, String.valueOf(sum));
-   expectedStats.put(Stat.count, String.valueOf(count));
-   expectedStats.put(Stat.mean, String.valueOf(sum/count));
-   expectedStats.put(Stat.sumOfSquares, String.valueOf(sumOfSquares));
-   expectedStats.put(Stat.stddev, String.valueOf(Math.sqrt(((count * sumOfSquares) - (sum * sum)) / (20 * (count - 1.0D)))));
-   expectedStats.put(Stat.calcdistinct, "10");
+    assertU(commit());
+    
+    ByteBuffer buf = ByteBuffer.allocate(tdigest.smallByteSize());
+    tdigest.asSmallBytes(buf);
+    EnumSet<Stat> allStats = EnumSet.allOf(Stat.class);
+    
+    Map<Stat,String> expectedStats = new HashMap<>();
+    expectedStats.put(Stat.min, "0.0");
+    expectedStats.put(Stat.max, "9.0");
+    expectedStats.put(Stat.missing, "0");
+    expectedStats.put(Stat.sum, String.valueOf(sum));
+    expectedStats.put(Stat.count, String.valueOf(count));
+    expectedStats.put(Stat.mean, String.valueOf(sum / count));
+    expectedStats.put(Stat.sumOfSquares, String.valueOf(sumOfSquares));
+    expectedStats.put(Stat.stddev, String.valueOf(Math.sqrt(((count * sumOfSquares) - (sum * sum))/ (20 * (count - 1.0D)))));
+    expectedStats.put(Stat.calcdistinct, "10");
+    // NOTE: per shard expected value
+    expectedStats.put(Stat.percentiles, Base64.byteArrayToBase64(buf.array(), 0, buf.array().length));
+    
+    Map<Stat,String> expectedType = new HashMap<>();
+    expectedType.put(Stat.min, "double");
+    expectedType.put(Stat.max, "double");
+    expectedType.put(Stat.missing, "long");
+    expectedType.put(Stat.sum, "double");
+    expectedType.put(Stat.count, "long");
+    expectedType.put(Stat.mean, "double");
+    expectedType.put(Stat.sumOfSquares, "double");
+    expectedType.put(Stat.stddev, "double");
+    expectedType.put(Stat.calcdistinct, "long");
+    expectedType.put(Stat.percentiles, "str");
    
-   Map<Stat, String> expectedType = new HashMap<>();
-   expectedType.put(Stat.min, "double");
-   expectedType.put(Stat.max, "double");
-   expectedType.put(Stat.missing, "long");
-   expectedType.put(Stat.sum, "double");
-   expectedType.put(Stat.count, "long");
-   expectedType.put(Stat.mean, "double");
-   expectedType.put(Stat.sumOfSquares, "double");
-   expectedType.put(Stat.stddev, "double");
-   expectedType.put(Stat.calcdistinct, "long");
+    Map<Stat,String> localParasInput = new HashMap<>();
+    localParasInput.put(Stat.min, "true");
+    localParasInput.put(Stat.max, "true");
+    localParasInput.put(Stat.missing, "true");
+    localParasInput.put(Stat.sum, "true");
+    localParasInput.put(Stat.count, "true");
+    localParasInput.put(Stat.mean, "true");
+    localParasInput.put(Stat.sumOfSquares, "true");
+    localParasInput.put(Stat.stddev, "true");
+    localParasInput.put(Stat.calcdistinct, "true");
+    localParasInput.put(Stat.percentiles, "'90, 99'");
 
    // canary in the coal mine
    assertEquals("size of expectedStats doesn't match all known stats; " + 
@@ -1233,13 +1257,15 @@ public class StatsComponentTest extends
                      "[@name='" + key + "'][.='" + expectedStats.get(perShardStat) + "']");
        // even if we go out of our way to exclude the dependent stats, 
        // the shard should return them since they are a dependency for the requested stat
-       exclude.append(perShardStat + "=false ");
+       if (!stat.equals(Stat.percentiles)){
+         exclude.append(perShardStat + "=false ");
+       }
      }
      testParas.add("count(" + kpre + "*)=" + (distribDeps.size() + calcdistinctFudge));
 
      assertQ("ask for only "+stat+", with isShard=true, and expect only deps: " + distribDeps,
              req("q", "*:*", "isShard", "true", "stats", "true", 
-                 "stats.field", "{!key=k " + exclude + stat + "=true}a_i")
+                 "stats.field", "{!key=k " + exclude + stat +"=" + localParasInput.get(stat) + "}a_i")
              , testParas.toArray(new String[testParas.size()])
              );
    }
@@ -1265,8 +1291,17 @@ public class StatsComponentTest extends
            calcdistinctFudge++; 
            testParas.add("count(" + kpre + "arr[@name='distinctValues']/*)=10");
          }
-         paras.append(stat + "=true ");
-         testParas.add(kpre + expectedType.get(stat) + "[@name='" + key + "'][.='" + expectedStats.get(stat) + "']");
+         paras.append(stat + "=" + localParasInput.get(stat)+ " ");
+         
+         if (!stat.equals(Stat.percentiles)){
+           testParas.add(kpre + expectedType.get(stat) + "[@name='" + key + "'][.='" + expectedStats.get(stat) + "']");
+         } else {
+           testParas.add("count(" + kpre + "lst[@name='percentiles']/*)=2");
+           String p90 = "" + tdigest.quantile(0.90D);
+           String p99 = "" + tdigest.quantile(0.99D);
+           testParas.add(kpre + "lst[@name='percentiles']/double[@name='90.0'][.="+p90+"]");
+           testParas.add(kpre + "lst[@name='percentiles']/double[@name='99.0'][.="+p99+"]");
+         }
        }
 
        paras.append("}a_i");
@@ -1279,7 +1314,6 @@ public class StatsComponentTest extends
                );
      }
    }
-
   }
   
   // Test for Solr-6349
@@ -1402,6 +1436,90 @@ public class StatsComponentTest extends
     }
   }
 
+  // simple percentiles test
+  public void testPercentiles() throws Exception {
+    
+    // NOTE: deliberately not in numeric order
+    String percentiles = "10.0,99.9,1.0,2.0,20.0,30.0,40.0,50.0,60.0,70.0,80.0,98.0,99.0";
+    List <String> percentilesList = StrUtils.splitSmart(percentiles, ',');
+    
+    // test empty case 
+    SolrQueryRequest query = req("q", "*:*", "stats", "true",
+                                 "stats.field", "{!percentiles='" + percentiles + "'}stat_f");
+    try {
+      SolrQueryResponse rsp = h.queryAndResponse(null, query);
+      NamedList<Double> pout = extractPercentils(rsp, "stat_f");
+      for (int i = 0; i < percentilesList.size(); i++) {
+        // ensure exact order, but all values should be null (empty result set)
+        assertEquals(percentilesList.get(i), pout.getName(i));
+        assertEquals(null, pout.getVal(i));
+      }
+    } finally {
+      query.close();
+    }
+    
+    int id = 0;
+    // add trivial docs to test basic percentiles
+    for (int i = 0; i < 100; i++) {
+      // add the same values multiple times (diff docs)
+      for (int j =0; j < 5; j++) {
+        assertU(adoc("id", ++id+"", "stat_f", ""+i));
+      }
+    }
+
+    assertU(commit());
+
+    query = req("q", "*:*", "stats", "true", 
+                "stats.field", "{!percentiles='" + percentiles + "'}stat_f");
+    try {
+      SolrQueryResponse rsp = h.queryAndResponse(null, query);
+      NamedList<Double> pout = extractPercentils(rsp, "stat_f");
+      for (int i = 0; i < percentilesList.size(); i++) { 
+        String p = percentilesList.get(i);
+        assertEquals(p, pout.getName(i));
+        assertEquals(Double.parseDouble(p), pout.getVal(i), 1.0D);
+                     
+      }
+    } finally {
+      query.close();
+    }
+    
+    // test request for no percentiles
+    query = req("q", "*:*", "stats", "true", 
+                "stats.field", "{!percentiles=''}stat_f");
+    try {
+      SolrQueryResponse rsp = h.queryAndResponse(null, query);
+      NamedList<Double> pout = extractPercentils(rsp, "stat_f");
+      assertNull(pout);
+    } finally {
+      query.close();
+    }
+
+    // non-numeric types don't support percentiles
+    assertU(adoc("id", ++id+"", "stat_dt", "1999-05-03T04:55:01Z"));
+    assertU(adoc("id", ++id+"", "stat_s", "cow"));
+    
+    assertU(commit());
+
+    query = req("q", "*:*", "stats", "true", 
+                "stats.field", "{!percentiles='" + percentiles + "'}stat_dt",
+                "stats.field", "{!percentiles='" + percentiles + "'}stat_s");
+
+    try {
+      SolrQueryResponse rsp = h.queryAndResponse(null, query);
+      assertNull(extractPercentils(rsp, "stat_dt"));
+      assertNull(extractPercentils(rsp, "stat_s"));
+    } finally {
+      query.close();
+    }
+    
+  }
+
+  private NamedList<Double> extractPercentils(SolrQueryResponse rsp, String key) {
+    return ((NamedList<NamedList<NamedList<NamedList<Double>>>> )
+            rsp.getValues().get("stats")).get("stats_fields").get(key).get("percentiles");
+  }
+
   /** 
    * given a comboSize and an EnumSet of Stats, generates iterators that produce every possible
    * enum combination of that size 
@@ -1435,5 +1553,4 @@ public class StatsComponentTest extends
       };
     }
   }
-
 }

Modified: lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java (original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java Tue Mar 24 16:23:50 2015
@@ -22,6 +22,7 @@ import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
@@ -47,6 +48,8 @@ public class FieldStatsInfo implements S
   
   Map<String,List<FieldStatsInfo>> facets;
   
+  Map<Double, Double> percentiles;
+  
   public FieldStatsInfo( NamedList<Object> nl, String fname )
   {
     name = fname;
@@ -96,6 +99,13 @@ public class FieldStatsInfo implements S
             vals.add( new FieldStatsInfo( vnl.getVal(i), n ) );
           }
         }
+      } else if ( "percentiles".equals( entry.getKey() ) ){
+        @SuppressWarnings("unchecked")
+        NamedList<Object> fields = (NamedList<Object>) entry.getValue();
+        percentiles = new LinkedHashMap<>();
+        for( Map.Entry<String, Object> ev : fields ) {
+          percentiles.put(Double.parseDouble(ev.getKey()), (Double)ev.getValue());
+        }
       }
       else {
         throw new RuntimeException( "unknown key: "+entry.getKey() + " ["+entry.getValue()+"]" );
@@ -136,6 +146,10 @@ public class FieldStatsInfo implements S
     if( stddev != null ) {
       sb.append( " stddev:").append(stddev);
     }
+    if( percentiles != null ) {
+      sb.append( " percentiles:").append(percentiles);
+    }
+    
     sb.append( " }" );
     return sb.toString();
   }
@@ -155,7 +169,7 @@ public class FieldStatsInfo implements S
   public Object getSum() {
     return sum;
   }
-
+     
   public Long getCount() {
     return count;
   }
@@ -188,4 +202,11 @@ public class FieldStatsInfo implements S
     return facets;
   }
   
+  /**
+   * The percentiles requested if any, otherwise null.  If non-null then the
+   * iteration order will match the order the percentiles were originally specified in.
+   */
+  public Map<Double, Double> getPercentiles() {
+    return percentiles;
+  }
 }



Mime
View raw message