calcite-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jh...@apache.org
Subject [2/5] calcite git commit: [CALCITE-1616] Data profiler
Date Wed, 29 Nov 2017 04:09:01 GMT
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java b/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
index ad808a9..a3db6db 100644
--- a/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
+++ b/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
@@ -16,7 +16,11 @@
  */
 package org.apache.calcite.util;
 
-import java.util.AbstractList;
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
 import java.util.AbstractSet;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
@@ -61,7 +65,21 @@ import java.util.Set;
  * @param <E> Element type
  */
 public class PartiallyOrderedSet<E> extends AbstractSet<E> {
+  /** Ordering that orders bit sets by inclusion.
+   *
+   * <p>For example, the children of 14 (1110) are 12 (1100), 10 (1010) and
+   * 6 (0110).
+   */
+  public static final Ordering<ImmutableBitSet> BIT_SET_INCLUSION_ORDERING =
+      new Ordering<ImmutableBitSet>() {
+        public boolean lessThan(ImmutableBitSet e1, ImmutableBitSet e2) {
+          return e1.contains(e2);
+        }
+      };
+
   private final Map<E, Node<E>> map;
+  private final Function<E, Iterable<E>> parentFunction;
+  private final Function<E, Iterable<E>> childFunction;
   private final Ordering<E> ordering;
 
   /**
@@ -71,7 +89,9 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
   private final Node<E> topNode;
   private final Node<E> bottomNode;
 
-  private static final boolean DEBUG = Math.random() >= 0;
+  /** Whether to check internal consistency all the time.
+   * False unless you specify "-Dcalcite.debug" on the command line. */
+  private static final boolean DEBUG = Util.getBooleanProperty("calcite.debug");
 
   /**
    * Creates a partially-ordered set.
@@ -79,7 +99,19 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    * @param ordering Ordering relation
    */
   public PartiallyOrderedSet(Ordering<E> ordering) {
-    this(ordering, new HashMap<E, Node<E>>());
+    this(ordering, new HashMap<E, Node<E>>(), null, null);
+  }
+
+  /**
+   * Creates a partially-ordered set with a parent-generating function.
+   *
+   * @param ordering Ordering relation
+   * @param parentFunction Function to compute parents of a node; may be null
+   */
+  public PartiallyOrderedSet(Ordering<E> ordering,
+      Function<E, Iterable<E>> childFunction,
+      Function<E, Iterable<E>> parentFunction) {
+    this(ordering, new HashMap<E, Node<E>>(), childFunction, parentFunction);
   }
 
   /**
@@ -90,7 +122,8 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    * @param collection Initial contents of partially-ordered set
    */
   public PartiallyOrderedSet(Ordering<E> ordering, Collection<E> collection) {
-    this(ordering, new HashMap<E, Node<E>>(collection.size() * 3 / 2));
+    this(ordering, new HashMap<E, Node<E>>(collection.size() * 3 / 2), null,
+        null);
     addAll(collection);
   }
 
@@ -99,10 +132,15 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    *
    * @param ordering Ordering relation
    * @param map Map from values to nodes
+   * @param parentFunction Function to compute parents of a node; may be null
    */
-  private PartiallyOrderedSet(Ordering<E> ordering, Map<E, Node<E>> map) {
+  private PartiallyOrderedSet(Ordering<E> ordering, Map<E, Node<E>> map,
+      Function<E, Iterable<E>> childFunction,
+      Function<E, Iterable<E>> parentFunction) {
     this.ordering = ordering;
     this.map = map;
+    this.childFunction = childFunction;
+    this.parentFunction = parentFunction;
     this.topNode = new TopBottomNode<>(true);
     this.bottomNode = new TopBottomNode<>(false);
     this.topNode.childList.add(bottomNode);
@@ -528,7 +566,7 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    * Returns the values in this partially-ordered set that are less-than
    * a given value and there are no intervening values.
    *
-   * <p>If the value is not in this set, returns the empty list.</p>
+   * <p>If the value is not in this set, returns null.
    *
    * @see #getDescendants
    *
@@ -537,15 +575,33 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    *   value
    */
   public List<E> getChildren(E e) {
+    return getChildren(e, false);
+  }
+
+  /**
+   * Returns the values in this partially-ordered set that are less-than
+   * a given value and there are no intervening values.
+   *
+   * <p>If the value is not in this set, returns null if {@code hypothetical}
+   * is false.
+   *
+   * @see #getDescendants
+   *
+   * @param e Value
+   * @param hypothetical Whether to generate a list if value is not in the set
+   * @return List of values in this set that are directly less than the given
+   *   value
+   */
+  public List<E> getChildren(E e, boolean hypothetical) {
     final Node<E> node = map.get(e);
     if (node == null) {
-      return null;
-    } else if (node.childList.get(0).e == null) {
-      // child list contains bottom element, so officially there are no
-      // children
-      return Collections.emptyList();
+      if (hypothetical) {
+        return strip(findChildren(e));
+      } else {
+        return null;
+      }
     } else {
-      return new StripList<>(node.childList);
+      return strip(node.childList);
     }
   }
 
@@ -553,7 +609,7 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    * Returns the values in this partially-ordered set that are greater-than
    * a given value and there are no intervening values.
    *
-   * <p>If the value is not in this set, returns the empty list.</p>
+   * <p>If the value is not in this set, returns null.
    *
    * @see #getAncestors
    *
@@ -562,32 +618,61 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    *   given value
    */
   public List<E> getParents(E e) {
+    return getParents(e, false);
+  }
+
+  /**
+   * Returns the values in this partially-ordered set that are greater-than
+   * a given value and there are no intervening values.
+   *
+   * <p>If the value is not in this set, returns {@code null} if
+   * {@code hypothetical} is false.
+   *
+   * @see #getAncestors
+   *
+   * @param e Value
+   * @param hypothetical Whether to generate a list if value is not in the set
+   * @return List of values in this set that are directly greater than the
+   *   given value
+   */
+  public List<E> getParents(E e, boolean hypothetical) {
     final Node<E> node = map.get(e);
     if (node == null) {
-      return null;
-    } else if (node.parentList.get(0).e == null) {
-      // parent list contains top element, so officially there are no
-      // parents
-      return Collections.emptyList();
+      if (hypothetical) {
+        if (parentFunction != null) {
+          final List<E> list = new ArrayList<>();
+          closure(parentFunction, e, list, new HashSet<E>());
+          return list;
+        } else {
+          return ImmutableList.copyOf(strip(findParents(e)));
+        }
+      } else {
+        return null;
+      }
     } else {
-      return new StripList<>(node.parentList);
+      return strip(node.parentList);
     }
   }
 
-  public List<E> getNonChildren() {
-    if (topNode.childList.size() == 1
-        && topNode.childList.get(0).e == null) {
-      return Collections.emptyList();
+  private void closure(Function<E, Iterable<E>> generator, E e, List<E> list,
+      Set<E> set) {
+    for (E p : Preconditions.checkNotNull(generator.apply(e))) {
+      if (set.add(e)) {
+        if (map.containsKey(p)) {
+          list.add(p);
+        } else {
+          closure(generator, p, list, set);
+        }
+      }
     }
-    return new StripList<>(topNode.childList);
+  }
+
+  public List<E> getNonChildren() {
+    return strip(topNode.childList);
   }
 
   public List<E> getNonParents() {
-    if (bottomNode.parentList.size() == 1
-        && bottomNode.parentList.get(0).e == null) {
-      return Collections.emptyList();
-    }
-    return new StripList<>(bottomNode.parentList);
+    return strip(bottomNode.parentList);
   }
 
   @Override public void clear() {
@@ -612,6 +697,57 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
     return descendants(e, true);
   }
 
+  /** Returns a list, backed by a list of
+   * {@link org.apache.calcite.util.PartiallyOrderedSet.Node}s, that strips
+   * away the node and returns the element inside.
+   *
+   * @param <E> Element type
+   */
+  public static <E> List<E> strip(List<Node<E>> list) {
+    if (list.size() == 1
+        && list.get(0).e == null) {
+      // If parent list contains top element, a node whose element is null,
+      // officially there are no parents.
+      // Similarly child list and bottom element.
+      return ImmutableList.of();
+    }
+    return Lists.transform(list,
+      new Function<Node<E>, E>() {
+        public E apply(Node<E> node) {
+          return node.e;
+        }
+      });
+  }
+
+  /** Converts an iterable of nodes into the list of the elements inside.
+   * If there is one node whose element is null, it represents a list
+   * containing either the top or bottom element, so we return the empty list.
+   *
+   * @param <E> Element type
+   */
+  private static <E> ImmutableList<E> strip(Iterable<Node<E>> iterable) {
+    final Iterator<Node<E>> iterator = iterable.iterator();
+    if (!iterator.hasNext()) {
+      return ImmutableList.of();
+    }
+    Node<E> node = iterator.next();
+    if (!iterator.hasNext()) {
+      if (node.e == null) {
+        return ImmutableList.of();
+      } else {
+        return ImmutableList.of(node.e);
+      }
+    }
+    final ImmutableList.Builder<E> builder = ImmutableList.builder();
+    for (;;) {
+      builder.add(node.e);
+      if (!iterator.hasNext()) {
+        return builder.build();
+      }
+      node = iterator.next();
+    }
+  }
+
   /**
    * Returns a list of values in the set that are less-than a given value.
    * The list is in topological order but order is otherwise
@@ -725,27 +861,6 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
      */
     boolean lessThan(E e1, E e2);
   }
-
-  /** List, backed by a list of {@link Node}s, that strips away the
-   * node and returns the element inside.
-   *
-   * @param <E> Element type
-   */
-  private static class StripList<E> extends AbstractList<E> {
-    private final List<Node<E>> list;
-
-    StripList(List<Node<E>> list) {
-      this.list = list;
-    }
-
-    @Override public E get(int index) {
-      return list.get(index).e;
-    }
-
-    @Override public int size() {
-      return list.size();
-    }
-  }
 }
 
 // End PartiallyOrderedSet.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java b/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
new file mode 100644
index 0000000..e45db8b
--- /dev/null
+++ b/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
@@ -0,0 +1,682 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.profile;
+
+import org.apache.calcite.jdbc.CalciteConnection;
+import org.apache.calcite.linq4j.AbstractEnumerable;
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.Enumerator;
+import org.apache.calcite.rel.metadata.NullSentinel;
+import org.apache.calcite.runtime.PredicateImpl;
+import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.test.Matchers;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.JsonBuilder;
+import org.apache.calcite.util.Pair;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.base.Supplier;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Ordering;
+
+import org.hamcrest.Matcher;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+
+/**
+ * Unit tests for {@link Profiler}.
+ */
+public class ProfilerTest {
+  @Test public void testProfileZeroRows() throws Exception {
+    final String sql = "select * from \"scott\".dept where false";
+    sql(sql).unordered(
+        "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:0.0}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:0.0}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:0.0}",
+        "{type:distribution,columns:[DEPTNO],values:[],cardinality:0.0}",
+        "{type:distribution,columns:[DNAME,LOC],cardinality:0.0}",
+        "{type:distribution,columns:[DNAME],values:[],cardinality:0.0}",
+        "{type:distribution,columns:[LOC],values:[],cardinality:0.0}",
+        "{type:distribution,columns:[],cardinality:0.0}",
+        "{type:rowCount,rowCount:0}",
+        "{type:unique,columns:[]}");
+  }
+
+  @Test public void testProfileOneRow() throws Exception {
+    final String sql = "select * from \"scott\".dept where deptno = 10";
+    sql(sql).unordered(
+        "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:1.0}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:1.0}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:1.0}",
+        "{type:distribution,columns:[DEPTNO],values:[10],cardinality:1.0}",
+        "{type:distribution,columns:[DNAME,LOC],cardinality:1.0}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING],cardinality:1.0}",
+        "{type:distribution,columns:[LOC],values:[NEWYORK],cardinality:1.0}",
+        "{type:distribution,columns:[],cardinality:1.0}",
+        "{type:rowCount,rowCount:1}",
+        "{type:unique,columns:[]}");
+  }
+
+  @Test public void testProfileTwoRows() throws Exception {
+    final String sql = "select * from \"scott\".dept where deptno in (10, 20)";
+    sql(sql).unordered(
+        "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:2.0}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:2.0}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:2.0}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20],cardinality:2.0}",
+        "{type:distribution,columns:[DNAME,LOC],cardinality:2.0}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH],cardinality:2.0}",
+        "{type:distribution,columns:[LOC],values:[DALLAS,NEWYORK],cardinality:2.0}",
+        "{type:distribution,columns:[],cardinality:1.0}",
+        "{type:rowCount,rowCount:2}",
+        "{type:unique,columns:[DEPTNO]}",
+        "{type:unique,columns:[DNAME]}",
+        "{type:unique,columns:[LOC]}");
+  }
+
+  @Test public void testProfileScott() throws Exception {
+    final String sql = "select * from \"scott\".emp\n"
+        + "join \"scott\".dept using (deptno)";
+    sql(sql)
+        .where(new PredicateImpl<Profiler.Statistic>() {
+          public boolean test(Profiler.Statistic statistic) {
+            return !(statistic instanceof Profiler.Distribution)
+                || ((Profiler.Distribution) statistic).cardinality < 14
+                && ((Profiler.Distribution) statistic).minimal;
+          }
+        }).unordered(
+        "{type:distribution,columns:[COMM,DEPTNO0],cardinality:5.0}",
+        "{type:distribution,columns:[COMM,DEPTNO],cardinality:5.0}",
+        "{type:distribution,columns:[COMM,DNAME],cardinality:5.0}",
+        "{type:distribution,columns:[COMM,LOC],cardinality:5.0}",
+        "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO0,DNAME],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO0,LOC],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0}",
+        "{type:distribution,columns:[DNAME,LOC],cardinality:3.0}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0}",
+        "{type:distribution,columns:[HIREDATE,COMM],cardinality:5.0}",
+        "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0}",
+        "{type:distribution,columns:[JOB,COMM],cardinality:5.0}",
+        "{type:distribution,columns:[JOB,DEPTNO0],cardinality:9.0}",
+        "{type:distribution,columns:[JOB,DEPTNO],cardinality:9.0}",
+        "{type:distribution,columns:[JOB,DNAME],cardinality:9.0}",
+        "{type:distribution,columns:[JOB,LOC],cardinality:9.0}",
+        "{type:distribution,columns:[JOB,MGR,DEPTNO0],cardinality:10.0}",
+        "{type:distribution,columns:[JOB,MGR,DEPTNO],cardinality:10.0}",
+        "{type:distribution,columns:[JOB,MGR,DNAME],cardinality:10.0}",
+        "{type:distribution,columns:[JOB,MGR,LOC],cardinality:10.0}",
+        "{type:distribution,columns:[JOB,MGR],cardinality:8.0}",
+        "{type:distribution,columns:[JOB,SAL],cardinality:12.0}",
+        "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0}",
+        "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0}",
+        "{type:distribution,columns:[MGR,COMM],cardinality:5.0}",
+        "{type:distribution,columns:[MGR,DEPTNO0],cardinality:9.0}",
+        "{type:distribution,columns:[MGR,DEPTNO],cardinality:9.0}",
+        "{type:distribution,columns:[MGR,DNAME],cardinality:9.0}",
+        "{type:distribution,columns:[MGR,LOC],cardinality:9.0}",
+        "{type:distribution,columns:[MGR,SAL],cardinality:12.0}",
+        "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1}",
+        "{type:distribution,columns:[SAL,COMM],cardinality:5.0}",
+        "{type:distribution,columns:[SAL,DEPTNO0],cardinality:12.0}",
+        "{type:distribution,columns:[SAL,DEPTNO],cardinality:12.0}",
+        "{type:distribution,columns:[SAL,DNAME],cardinality:12.0}",
+        "{type:distribution,columns:[SAL,LOC],cardinality:12.0}",
+        "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0}",
+        "{type:distribution,columns:[],cardinality:1.0}",
+        "{type:fd,columns:[DEPTNO0],dependentColumn:DEPTNO}",
+        "{type:fd,columns:[DEPTNO0],dependentColumn:DNAME}",
+        "{type:fd,columns:[DEPTNO0],dependentColumn:LOC}",
+        "{type:fd,columns:[DEPTNO],dependentColumn:DEPTNO0}",
+        "{type:fd,columns:[DEPTNO],dependentColumn:DNAME}",
+        "{type:fd,columns:[DEPTNO],dependentColumn:LOC}",
+        "{type:fd,columns:[DNAME],dependentColumn:DEPTNO0}",
+        "{type:fd,columns:[DNAME],dependentColumn:DEPTNO}",
+        "{type:fd,columns:[DNAME],dependentColumn:LOC}",
+        "{type:fd,columns:[JOB],dependentColumn:COMM}",
+        "{type:fd,columns:[LOC],dependentColumn:DEPTNO0}",
+        "{type:fd,columns:[LOC],dependentColumn:DEPTNO}",
+        "{type:fd,columns:[LOC],dependentColumn:DNAME}",
+        "{type:fd,columns:[SAL],dependentColumn:DEPTNO0}",
+        "{type:fd,columns:[SAL],dependentColumn:DEPTNO}",
+        "{type:fd,columns:[SAL],dependentColumn:DNAME}",
+        "{type:fd,columns:[SAL],dependentColumn:JOB}",
+        "{type:fd,columns:[SAL],dependentColumn:LOC}",
+        "{type:fd,columns:[SAL],dependentColumn:MGR}",
+        "{type:rowCount,rowCount:14}",
+        "{type:unique,columns:[EMPNO]}",
+        "{type:unique,columns:[ENAME]}",
+        "{type:unique,columns:[HIREDATE,DEPTNO0]}",
+        "{type:unique,columns:[HIREDATE,DEPTNO]}",
+        "{type:unique,columns:[HIREDATE,DNAME]}",
+        "{type:unique,columns:[HIREDATE,LOC]}",
+        "{type:unique,columns:[HIREDATE,SAL]}",
+        "{type:unique,columns:[JOB,HIREDATE]}");
+  }
+
+  /** As {@link #testProfileScott()}, but prints only the most surprising
+   * distributions. */
+  @Test public void testProfileScott2() throws Exception {
+    scott().factory(Fluid.SIMPLE_FACTORY).unordered(
+        "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO0,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO0,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[HIREDATE,COMM],cardinality:5.0,expectedCardinality:12.682618485430247,surprise:0.4344728973121492}",
+        "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+        "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[MGR,COMM],cardinality:5.0,expectedCardinality:11.675074674157162,surprise:0.400302535646339}",
+        "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+        "{type:distribution,columns:[SAL,COMM],cardinality:5.0,expectedCardinality:12.579960871109892,surprise:0.43117052004174}",
+        "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+        "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+  }
+
+  /** As {@link #testProfileScott2()}, but uses the breadth-first profiler.
+   * Results should be the same, but are slightly different (extra EMPNO
+   * and ENAME distributions). */
+  @Test public void testProfileScott3() throws Exception {
+    scott().factory(Fluid.BETTER_FACTORY).unordered(
+        "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+        "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+        "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+        "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+  }
+
+  /** As {@link #testProfileScott3()}, but uses the breadth-first profiler
+   * and deems everything uninteresting. Only first-level combinations (those
+   * consisting of a single column) are computed. */
+  @Test public void testProfileScott4() throws Exception {
+    scott().factory(Fluid.INCURIOUS_PROFILER_FACTORY).unordered(
+        "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+        "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+        "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+        "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+  }
+
+  /** As {@link #testProfileScott3()}, but uses the breadth-first profiler. */
+  @Ignore
+  @Test public void testProfileScott5() throws Exception {
+    scott().factory(Fluid.PROFILER_FACTORY).unordered(
+        "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+        "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+        "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+        "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+  }
+
+  /** Profiles a star-join query on the Foodmart schema using the breadth-first
+   * profiler. */
+  @Ignore
+  @Test public void testProfileFoodmart() throws Exception {
+    foodmart().factory(Fluid.PROFILER_FACTORY).unordered(
+        "{type:distribution,columns:[brand_name],cardinality:111.0,expectedCardinality:86837.0,surprise:0.9974467497814786}",
+        "{type:distribution,columns:[cases_per_pallet],values:[5,6,7,8,9,10,11,12,13,14],cardinality:10.0,expectedCardinality:86837.0,surprise:0.9997697099496816}",
+        "{type:distribution,columns:[day_of_month],cardinality:30.0,expectedCardinality:86837.0,surprise:0.9993092889129359}",
+        "{type:distribution,columns:[fiscal_period],values:[],cardinality:1.0,nullCount:86837,expectedCardinality:86837.0,surprise:0.999976968608213}",
+        "{type:distribution,columns:[low_fat],values:[false,true],cardinality:2.0,expectedCardinality:86837.0,surprise:0.9999539377468649}",
+        "{type:distribution,columns:[month_of_year],values:[1,2,3,4,5,6,7,8,9,10,11,12],cardinality:12.0,expectedCardinality:86837.0,surprise:0.9997236583034923}",
+        "{type:distribution,columns:[product_category],cardinality:45.0,expectedCardinality:86837.0,surprise:0.9989641122441932}",
+        "{type:distribution,columns:[product_class_id0,product_subcategory,product_category,product_department,product_family],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+        "{type:distribution,columns:[product_class_id0],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+        "{type:distribution,columns:[product_class_id],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+        "{type:distribution,columns:[product_department],cardinality:22.0,expectedCardinality:86837.0,surprise:0.9994934318838578}",
+        "{type:distribution,columns:[product_family],values:[Drink,Food,Non-Consumable],cardinality:3.0,expectedCardinality:86837.0,surprise:0.9999309074159374}",
+        "{type:distribution,columns:[product_subcategory],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+        "{type:distribution,columns:[quarter],values:[Q1,Q2,Q3,Q4],cardinality:4.0,expectedCardinality:86837.0,surprise:0.9999078776154121}",
+        "{type:distribution,columns:[recyclable_package],values:[false,true],cardinality:2.0,expectedCardinality:86837.0,surprise:0.9999539377468649}",
+        "{type:distribution,columns:[store_cost,fiscal_period],cardinality:10601.0,nullCount:86724,expectedCardinality:10.0,surprise:0.9981151635095655}",
+        "{type:distribution,columns:[store_cost,low_fat],cardinality:17673.0,expectedCardinality:20.0,surprise:0.99773921890013}",
+        "{type:distribution,columns:[store_cost,product_family],cardinality:19453.0,expectedCardinality:30.0,surprise:0.9969203921367346}",
+        "{type:distribution,columns:[store_cost,quarter],cardinality:29590.0,expectedCardinality:40.0,surprise:0.9973000337495781}",
+        "{type:distribution,columns:[store_cost,recyclable_package],cardinality:17847.0,expectedCardinality:20.0,surprise:0.9977612357978396}",
+        "{type:distribution,columns:[store_cost,the_year],cardinality:10944.0,expectedCardinality:10.0,surprise:0.9981741829468688}",
+        "{type:distribution,columns:[store_cost],cardinality:10.0,expectedCardinality:86837.0,surprise:0.9997697099496816}",
+        "{type:distribution,columns:[store_id],values:[2,3,6,7,11,13,14,15,16,17,22,23,24],cardinality:13.0,expectedCardinality:86837.0,surprise:0.9997006332757629}",
+        "{type:distribution,columns:[store_sales],cardinality:21.0,expectedCardinality:86837.0,surprise:0.999516452140275}",
+        "{type:distribution,columns:[the_day],values:[Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday],cardinality:7.0,expectedCardinality:86837.0,surprise:0.9998387913960665}",
+        "{type:distribution,columns:[the_month],values:[April,August,December,February,January,July,June,March,May,November,October,September],cardinality:12.0,expectedCardinality:86837.0,surprise:0.9997236583034923}",
+        "{type:distribution,columns:[the_year],values:[1997],cardinality:1.0,expectedCardinality:86837.0,surprise:0.999976968608213}",
+        "{type:distribution,columns:[unit_sales],values:[1.0000,2.0000,3.0000,4.0000,5.0000,6.0000],cardinality:6.0,expectedCardinality:86837.0,surprise:0.999861819605495}",
+        "{type:distribution,columns:[units_per_case],cardinality:36.0,expectedCardinality:86837.0,surprise:0.9991712039413857}",
+        "{type:distribution,columns:[week_of_year],cardinality:52.0,expectedCardinality:86837.0,surprise:0.9988030705843087}",
+        "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+  }
+
+  /** Tests
+   * {@link org.apache.calcite.profile.ProfilerImpl.SurpriseQueue}. */
+  @Test public void testSurpriseQueue() {
+    ProfilerImpl.SurpriseQueue q = new ProfilerImpl.SurpriseQueue(4, 3);
+    assertThat(q.offer(2), is(true));
+    assertThat(q.toString(), is("min: 2.0, contents: [2.0]"));
+    assertThat(q.isValid(), is(true));
+
+    assertThat(q.offer(4), is(true));
+    assertThat(q.toString(), is("min: 2.0, contents: [2.0, 4.0]"));
+    assertThat(q.isValid(), is(true));
+
+    // Since we're in the warm-up period, a value lower than the minimum is
+    // accepted.
+    assertThat(q.offer(1), is(true));
+    assertThat(q.toString(), is("min: 1.0, contents: [2.0, 4.0, 1.0]"));
+    assertThat(q.isValid(), is(true));
+
+    assertThat(q.offer(5), is(true));
+    assertThat(q.toString(), is("min: 1.0, contents: [4.0, 1.0, 5.0]"));
+    assertThat(q.isValid(), is(true));
+
+    assertThat(q.offer(3), is(true));
+    assertThat(q.toString(), is("min: 1.0, contents: [1.0, 5.0, 3.0]"));
+    assertThat(q.isValid(), is(true));
+
+    // Duplicate entry
+    assertThat(q.offer(5), is(true));
+    assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+    assertThat(q.isValid(), is(true));
+
+    // Now that the list is full, a value below the minimum is refused.
+    // "offer" returns false, and the value is not added to the queue.
+    // Thus the median never decreases.
+    assertThat(q.offer(2), is(false));
+    assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+    assertThat(q.isValid(), is(true));
+
+    // Same applies for a value equal to the minimum.
+    assertThat(q.offer(3), is(false));
+    assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+    assertThat(q.isValid(), is(true));
+
+    // Add a value that is above the minimum.
+    assertThat(q.offer(4.5), is(true));
+    assertThat(q.toString(), is("min: 3.0, contents: [3.0, 5.0, 4.5]"));
+    assertThat(q.isValid(), is(true));
+  }
+
+  private Fluid scott() throws Exception {
+    final String sql = "select * from \"scott\".emp\n"
+        + "join \"scott\".dept using (deptno)";
+    return sql(sql)
+        .where(Fluid.STATISTIC_PREDICATE)
+        .sort(Fluid.ORDERING.reverse())
+        .limit(30)
+        .project(Fluid.EXTENDED_COLUMNS);
+  }
+
+  private Fluid foodmart() throws Exception {
+    final String sql = "select \"s\".*, \"p\".*, \"t\".*, \"pc\".*\n"
+        + "from \"foodmart\".\"sales_fact_1997\" as \"s\"\n"
+        + "join \"foodmart\".\"product\" as \"p\" using (\"product_id\")\n"
+        + "join \"foodmart\".\"time_by_day\" as \"t\" using (\"time_id\")\n"
+        + "join \"foodmart\".\"product_class\" as \"pc\"\n"
+        + "  on \"p\".\"product_class_id\" = \"pc\".\"product_class_id\"\n";
+    return sql(sql)
+        .config(CalciteAssert.Config.JDBC_FOODMART)
+        .where(Fluid.STATISTIC_PREDICATE)
+        .sort(Fluid.ORDERING.reverse())
+        .limit(30)
+        .project(Fluid.EXTENDED_COLUMNS);
+  }
+
+  private static Fluid sql(String sql) {
+    return new Fluid(CalciteAssert.Config.SCOTT, sql, Fluid.SIMPLE_FACTORY,
+        Predicates.<Profiler.Statistic>alwaysTrue(), null, -1,
+        Fluid.DEFAULT_COLUMNS);
+  }
+
+  /** Fluid interface for writing profiler test cases. */
+  private static class Fluid {
+    static final Supplier<Profiler> SIMPLE_FACTORY =
+        new Supplier<Profiler>() {
+          public Profiler get() {
+            return new SimpleProfiler();
+          }
+        };
+
+    static final Supplier<Profiler> BETTER_FACTORY =
+        new Supplier<Profiler>() {
+          public Profiler get() {
+            final Predicate<Pair<ProfilerImpl.Space, Profiler.Column>>
+                predicate = Predicates.alwaysTrue();
+            return new ProfilerImpl(600, 200, predicate);
+          }
+        };
+
+    static final Ordering<Profiler.Statistic> ORDERING =
+        new Ordering<Profiler.Statistic>() {
+          public int compare(Profiler.Statistic left,
+              Profiler.Statistic right) {
+            int c = left.getClass().getSimpleName()
+                .compareTo(right.getClass().getSimpleName());
+            if (c == 0
+                && left instanceof Profiler.Distribution
+                && right instanceof Profiler.Distribution) {
+              final Profiler.Distribution d0 = (Profiler.Distribution) left;
+              final Profiler.Distribution d1 = (Profiler.Distribution) right;
+              c = Double.compare(d0.surprise(), d1.surprise());
+              if (c == 0) {
+                c = d0.columns.toString().compareTo(d1.columns.toString());
+              }
+            }
+            return c;
+          }
+        };
+
+    static final Predicate<Profiler.Statistic> STATISTIC_PREDICATE =
+        new PredicateImpl<Profiler.Statistic>() {
+          public boolean test(Profiler.Statistic statistic) {
+            // Include distributions of zero columns (the grand total)
+            // and singleton columns, plus "surprising" distributions
+            // (with significantly higher NDVs than predicted from their
+            // constituent columns).
+            return statistic instanceof Profiler.Distribution
+                && (((Profiler.Distribution) statistic).columns.size() < 2
+                || ((Profiler.Distribution) statistic).surprise() > 0.4D)
+                && ((Profiler.Distribution) statistic).minimal;
+          }
+        };
+
+    static final List<String> DEFAULT_COLUMNS =
+        ImmutableList.of("type", "distribution", "columns", "cardinality",
+            "values", "nullCount", "dependentColumn", "rowCount");
+
+    static final List<String> EXTENDED_COLUMNS =
+        ImmutableList.<String>builder().addAll(DEFAULT_COLUMNS)
+            .add("expectedCardinality", "surprise")
+            .build();
+
+    private static final Supplier<Profiler> PROFILER_FACTORY =
+        new Supplier<Profiler>() {
+          public Profiler get() {
+            return new ProfilerImpl(7500, 100,
+                new PredicateImpl<Pair<ProfilerImpl.Space, Profiler.Column>>() {
+                  public boolean test(
+                      Pair<ProfilerImpl.Space, Profiler.Column> p) {
+                    final Profiler.Distribution distribution =
+                        p.left.distribution();
+                    if (distribution == null) {
+                      // We don't have a distribution yet, because this space
+                      // has not yet been evaluated. Let's do it anyway.
+                      return true;
+                    }
+                    return distribution.surprise() >= 0.3D;
+                  }
+                });
+          }
+        };
+
+    private static final Supplier<Profiler> INCURIOUS_PROFILER_FACTORY =
+        new Supplier<Profiler>() {
+          public Profiler get() {
+            final Predicate<Pair<ProfilerImpl.Space, Profiler.Column>> p =
+                Predicates.alwaysFalse();
+            return new ProfilerImpl(10, 200, p);
+          }
+        };
+
+    private final String sql;
+    private final List<String> columns;
+    private final Comparator<Profiler.Statistic> comparator;
+    private final int limit;
+    private final Predicate<Profiler.Statistic> predicate;
+    private final Supplier<Profiler> factory;
+    private final CalciteAssert.Config config;
+
+    Fluid(CalciteAssert.Config config, String sql, Supplier<Profiler> factory,
+        Predicate<Profiler.Statistic> predicate,
+        Comparator<Profiler.Statistic> comparator, int limit,
+        List<String> columns) {
+      this.sql = Preconditions.checkNotNull(sql);
+      this.factory = Preconditions.checkNotNull(factory);
+      this.columns = ImmutableList.copyOf(columns);
+      this.predicate = Preconditions.checkNotNull(predicate);
+      this.comparator = comparator; // null means sort on JSON representation
+      this.limit = limit;
+      this.config = config;
+    }
+
+    Fluid config(CalciteAssert.Config config) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid factory(Supplier<Profiler> factory) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid project(List<String> columns) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid sort(Ordering<Profiler.Statistic> comparator) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid limit(int limit) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid where(Predicate<Profiler.Statistic> predicate) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid unordered(String... lines) throws Exception {
+      return check(Matchers.equalsUnordered(lines));
+    }
+
+    public Fluid check(final Matcher<Iterable<String>> matcher)
+        throws Exception {
+      CalciteAssert.that(config)
+          .doWithConnection(new Function<CalciteConnection, Void>() {
+            public Void apply(CalciteConnection c) {
+              try (PreparedStatement s = c.prepareStatement(sql)) {
+                final ResultSetMetaData m = s.getMetaData();
+                final List<Profiler.Column> columns = new ArrayList<>();
+                final int columnCount = m.getColumnCount();
+                for (int i = 0; i < columnCount; i++) {
+                  columns.add(new Profiler.Column(i, m.getColumnLabel(i + 1)));
+                }
+
+                // Create an initial group for each table in the query.
+                // Columns in the same table will tend to have the same
+                // cardinality as the table, and as the table's primary key.
+                final Multimap<String, Integer> groups = HashMultimap.create();
+                for (int i = 0; i < m.getColumnCount(); i++) {
+                  groups.put(m.getTableName(i + 1), i);
+                }
+                final SortedSet<ImmutableBitSet> initialGroups =
+                    new TreeSet<>();
+                for (Collection<Integer> integers : groups.asMap().values()) {
+                  initialGroups.add(ImmutableBitSet.of(integers));
+                }
+                final Profiler p = factory.get();
+                final Enumerable<List<Comparable>> rows = getRows(s);
+                final Profiler.Profile profile =
+                    p.profile(rows, columns, initialGroups);
+                final List<Profiler.Statistic> statistics =
+                    ImmutableList.copyOf(
+                        Iterables.filter(profile.statistics(), predicate));
+
+                // If no comparator specified, use the function that converts to
+                // JSON strings
+                final Function<Profiler.Statistic, String> toJson =
+                    toJsonFunction();
+                Ordering<Profiler.Statistic> comp = comparator != null
+                    ? Ordering.from(comparator)
+                    : Ordering.natural().onResultOf(toJson);
+                ImmutableList<Profiler.Statistic> statistics2 =
+                    comp.immutableSortedCopy(statistics);
+                if (limit >= 0 && limit < statistics2.size()) {
+                  statistics2 = statistics2.subList(0, limit);
+                }
+
+                final List<String> strings =
+                    Lists.transform(statistics2, toJson);
+                assertThat(strings, matcher);
+              } catch (SQLException e) {
+                throw new RuntimeException(e);
+              }
+              return null;
+            }
+          });
+      return this;
+    }
+
+    /** Returns a function that converts a statistic to a JSON string. */
+    Function<Profiler.Statistic, String> toJsonFunction() {
+      return new Function<Profiler.Statistic, String>() {
+        final JsonBuilder jb = new JsonBuilder();
+
+        public String apply(Profiler.Statistic statistic) {
+          Object map = statistic.toMap(jb);
+          if (map instanceof Map) {
+            @SuppressWarnings("unchecked")
+            final Map<String, Object> map1 = (Map) map;
+            map1.keySet().retainAll(Fluid.this.columns);
+          }
+          final String json = jb.toJsonString(map);
+          return json.replaceAll("\n", "").replaceAll(" ", "")
+              .replaceAll("\"", "");
+        }
+      };
+    }
+
+    private Enumerable<List<Comparable>> getRows(final PreparedStatement s) {
+      return new AbstractEnumerable<List<Comparable>>() {
+        public Enumerator<List<Comparable>> enumerator() {
+          try {
+            final ResultSet r = s.executeQuery();
+            return getListEnumerator(r, r.getMetaData().getColumnCount());
+          } catch (SQLException e) {
+            throw new RuntimeException(e);
+          }
+        }
+      };
+    }
+
+    private Enumerator<List<Comparable>> getListEnumerator(
+        final ResultSet r, final int columnCount) {
+      return new Enumerator<List<Comparable>>() {
+        final Comparable[] values = new Comparable[columnCount];
+
+        public List<Comparable> current() {
+          for (int i = 0; i < columnCount; i++) {
+            try {
+              final Comparable value = (Comparable) r.getObject(i + 1);
+              values[i] = NullSentinel.mask(value);
+            } catch (SQLException e) {
+              throw new RuntimeException(e);
+            }
+          }
+          return ImmutableList.copyOf(values);
+        }
+
+        public boolean moveNext() {
+          try {
+            return r.next();
+          } catch (SQLException e) {
+            throw new RuntimeException(e);
+          }
+        }
+
+        public void reset() {
+        }
+
+        public void close() {
+          try {
+            r.close();
+          } catch (SQLException e) {
+            throw new RuntimeException(e);
+          }
+        }
+      };
+    }
+  }
+}
+
+// End ProfilerTest.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/CalciteSuite.java b/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
index d577057..585e8ed 100644
--- a/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
+++ b/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
@@ -28,6 +28,7 @@ import org.apache.calcite.plan.volcano.TraitPropagationTest;
 import org.apache.calcite.plan.volcano.VolcanoPlannerTest;
 import org.apache.calcite.plan.volcano.VolcanoPlannerTraitTest;
 import org.apache.calcite.prepare.LookupOperatorOverloadsTest;
+import org.apache.calcite.profile.ProfilerTest;
 import org.apache.calcite.rel.RelCollationTest;
 import org.apache.calcite.rel.RelDistributionTest;
 import org.apache.calcite.rel.rel2sql.RelToSqlConverterTest;
@@ -156,6 +157,7 @@ import org.junit.runners.Suite;
     LinqFrontJdbcBackTest.class,
     JdbcFrontJdbcBackLinqMiddleTest.class,
     CalciteSqlOperatorTest.class,
+    ProfilerTest.class,
     LatticeTest.class,
     ReflectiveSchemaTest.class,
     JdbcTest.class,

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java b/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
index 3f36bf9..4eae872 100644
--- a/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
+++ b/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
@@ -23,6 +23,8 @@ import org.apache.calcite.materialize.Lattices;
 
 import com.google.common.collect.ImmutableMap;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -33,10 +35,15 @@ import java.util.Map;
  */
 public class FoodMartLatticeStatisticProvider
     extends DelegatingLatticeStatisticProvider {
-  public static final FoodMartLatticeStatisticProvider INSTANCE =
-      new FoodMartLatticeStatisticProvider(Lattices.CACHED_SQL);
+  public static final FoodMartLatticeStatisticProvider.Factory FACTORY =
+      new Factory() {
+        public LatticeStatisticProvider apply(Lattice lattice) {
+          return new FoodMartLatticeStatisticProvider(lattice,
+              Lattices.CACHED_SQL.apply(lattice));
+        }
+      };
 
-  public static final Map<String, Integer> CARDINALITY_MAP =
+  private static final Map<String, Integer> CARDINALITY_MAP =
       ImmutableMap.<String, Integer>builder()
           .put("brand_name", 111)
           .put("cases_per_pallet", 10)
@@ -75,18 +82,29 @@ public class FoodMartLatticeStatisticProvider
           .put("week_of_year", 52)
           .build();
 
-  private FoodMartLatticeStatisticProvider(LatticeStatisticProvider provider) {
+  private final Lattice lattice;
+
+  private FoodMartLatticeStatisticProvider(Lattice lattice,
+      LatticeStatisticProvider provider) {
     super(provider);
+    this.lattice = lattice;
   }
 
-  /** Returns an estimate of the number of distinct values in a column. */
-  public int cardinality(Lattice lattice, Lattice.Column column) {
+  private int cardinality(Lattice.Column column) {
     final Integer integer = CARDINALITY_MAP.get(column.alias);
     if (integer != null && integer > 0) {
       return integer;
     }
     return column.alias.length();
   }
+
+  @Override public double cardinality(List<Lattice.Column> columns) {
+    final List<Double> cardinalityList = new ArrayList<>();
+    for (Lattice.Column column : columns) {
+      cardinalityList.add((double) cardinality(column));
+    }
+    return Lattice.getRowCount(lattice.getFactRowCount(), cardinalityList);
+  }
 }
 
 // End FoodMartLatticeStatisticProvider.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/LatticeTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/LatticeTest.java b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
index 54f4ea7..473e5f5 100644
--- a/core/src/test/java/org/apache/calcite/test/LatticeTest.java
+++ b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
@@ -16,11 +16,16 @@
  */
 package org.apache.calcite.test;
 
+import org.apache.calcite.jdbc.CalciteConnection;
+import org.apache.calcite.jdbc.CalciteSchema;
+import org.apache.calcite.materialize.Lattice;
 import org.apache.calcite.materialize.Lattices;
 import org.apache.calcite.materialize.MaterializationService;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.runtime.Hook;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.TestUtil;
 import org.apache.calcite.util.Util;
 
@@ -29,6 +34,7 @@ import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 
+import org.junit.Assume;
 import org.junit.Ignore;
 import org.junit.Test;
 
@@ -39,11 +45,15 @@ import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import static org.apache.calcite.test.Matchers.within;
+
 import static org.hamcrest.CoreMatchers.anyOf;
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 
@@ -110,8 +120,8 @@ public class LatticeTest {
       + "  } ]\n"
       + "}\n";
 
-  private CalciteAssert.AssertThat modelWithLattice(String name, String sql,
-      String... extras) {
+  private static CalciteAssert.AssertThat modelWithLattice(String name,
+      String sql, String... extras) {
     final StringBuilder buf = new StringBuilder("{ name: '")
         .append(name)
         .append("', sql: ")
@@ -123,7 +133,8 @@ public class LatticeTest {
     return modelWithLattices(buf.toString());
   }
 
-  private CalciteAssert.AssertThat modelWithLattices(String... lattices) {
+  private static CalciteAssert.AssertThat modelWithLattices(
+      String... lattices) {
     final Class<JdbcTest.EmpDeptTableFactory> clazz =
         JdbcTest.EmpDeptTableFactory.class;
     return CalciteAssert.model(""
@@ -153,6 +164,36 @@ public class LatticeTest {
 
   /** Tests that it's OK for a lattice to have the same name as a table in the
    * schema. */
+  @Test public void testLatticeSql() throws Exception {
+    modelWithLattice("EMPLOYEES", "select * from \"foodmart\".\"days\"")
+        .doWithConnection(new Function<CalciteConnection, Void>() {
+          public Void apply(CalciteConnection c) {
+            final SchemaPlus schema = c.getRootSchema();
+            final SchemaPlus adhoc = schema.getSubSchema("adhoc");
+            assertThat(adhoc.getTableNames().contains("EMPLOYEES"), is(true));
+            final Map.Entry<String, CalciteSchema.LatticeEntry> entry =
+                adhoc.unwrap(CalciteSchema.class).getLatticeMap().firstEntry();
+            final Lattice lattice = entry.getValue().getLattice();
+            final String sql = "SELECT \"days\".\"day\"\n"
+                + "FROM \"foodmart\".\"days\" AS \"days\"\n"
+                + "GROUP BY \"days\".\"day\"";
+            assertThat(
+                lattice.sql(ImmutableBitSet.of(0),
+                    ImmutableList.<Lattice.Measure>of()), is(sql));
+            final String sql2 = "SELECT"
+                + " \"days\".\"day\", \"days\".\"week_day\"\n"
+                + "FROM \"foodmart\".\"days\" AS \"days\"";
+            assertThat(
+                lattice.sql(ImmutableBitSet.of(0, 1), false,
+                    ImmutableList.<Lattice.Measure>of()),
+                is(sql2));
+            return null;
+          }
+        });
+  }
+
+  /** Tests that it's OK for a lattice to have the same name as a table in the
+   * schema. */
   @Test public void testLatticeWithSameNameAsTable() {
     modelWithLattice("EMPLOYEES", "select * from \"foodmart\".\"days\"")
         .query("select count(*) from EMPLOYEES")
@@ -377,28 +418,57 @@ public class LatticeTest {
    * Use optimization algorithm to suggest which tiles of a lattice to
    * materialize</a>. */
   @Test public void testTileAlgorithm() {
-    checkTileAlgorithm(FoodMartLatticeStatisticProvider.class.getCanonicalName(),
-        "EnumerableAggregate(group=[{2, 3}])\n"
-            + "  EnumerableTableScan(table=[[adhoc, m{16, 17, 27, 31}]])");
+    final String explain = "EnumerableAggregate(group=[{2, 3}])\n"
+        + "  EnumerableTableScan(table=[[adhoc, m{16, 17, 27, 31, 32, 37}]])";
+    checkTileAlgorithm(
+        FoodMartLatticeStatisticProvider.class.getCanonicalName() + "#FACTORY",
+        explain);
   }
 
+  /** As {@link #testTileAlgorithm()}, but uses the
+   * {@link Lattices#CACHED_SQL} statistics provider. */
   @Test public void testTileAlgorithm2() {
     // Different explain than above, but note that it still selects columns
     // (27, 31).
+    final String explain = "EnumerableAggregate(group=[{0, 1}])\n"
+        + "  EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]";
     checkTileAlgorithm(Lattices.class.getCanonicalName() + "#CACHED_SQL",
-        "EnumerableAggregate(group=[{0, 1}])\n"
-            + "  EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]");
+        explain);
+  }
+
+  /** As {@link #testTileAlgorithm()}, but uses the
+   * {@link Lattices#PROFILER} statistics provider. */
+  @Test public void testTileAlgorithm3() {
+    Assume.assumeTrue("Yahoo sketches requires JDK 8 or higher",
+        TestUtil.getJavaMajorVersion() >= 8);
+    final String explain = "EnumerableAggregate(group=[{0, 1}])\n"
+        + "  EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]";
+    checkTileAlgorithm(Lattices.class.getCanonicalName() + "#PROFILER",
+        explain);
   }
 
   private void checkTileAlgorithm(String statisticProvider,
       String expectedExplain) {
     MaterializationService.setThreadLocal();
     MaterializationService.instance().clear();
-    foodmartModel(
-        " auto: false,\n"
+    foodmartLatticeModel(statisticProvider)
+        .query("select distinct t.\"the_year\", t.\"quarter\"\n"
+            + "from \"foodmart\".\"sales_fact_1997\" as s\n"
+            + "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
+        .enableMaterializations(true)
+        .explainContains(expectedExplain)
+        .returnsUnordered("the_year=1997; quarter=Q1",
+            "the_year=1997; quarter=Q2",
+            "the_year=1997; quarter=Q3",
+            "the_year=1997; quarter=Q4");
+  }
+
+  private static CalciteAssert.AssertThat foodmartLatticeModel(
+      String statisticProvider) {
+    return foodmartModel(" auto: false,\n"
         + "  algorithm: true,\n"
         + "  algorithmMaxMillis: -1,\n"
-        + "  rowCountEstimate: 86000,\n"
+        + "  rowCountEstimate: 87000,\n"
         + "  defaultMeasures: [ {\n"
         + "      agg: 'sum',\n"
         + "      args: 'unit_sales'\n"
@@ -414,17 +484,7 @@ public class LatticeTest {
         + "  tiles: [ {\n"
         + "    dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
         + "    measures: [ ]\n"
-        + "  } ]\n")
-        .query("select distinct t.\"the_year\", t.\"quarter\"\n"
-            + "from \"foodmart\".\"sales_fact_1997\" as s\n"
-            + "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
-        .enableMaterializations(true)
-        .explainContains(expectedExplain)
-        .returnsUnordered("the_year=1997; quarter=Q1",
-            "the_year=1997; quarter=Q2",
-            "the_year=1997; quarter=Q3",
-            "the_year=1997; quarter=Q4")
-        .returnsCount(4);
+        + "  } ]\n");
   }
 
   /** Tests a query that is created within {@link #testTileAlgorithm()}. */
@@ -704,7 +764,7 @@ public class LatticeTest {
         .returns("EXPR$0=1\n");
   }
 
-  private CalciteAssert.AssertThat foodmartModel(String... extras) {
+  private static CalciteAssert.AssertThat foodmartModel(String... extras) {
     return modelWithLattice("star",
         "select 1 from \"foodmart\".\"sales_fact_1997\" as \"s\"\n"
             + "join \"foodmart\".\"product\" as \"p\" using (\"product_id\")\n"
@@ -741,6 +801,17 @@ public class LatticeTest {
     System.out.println(CalciteAssert.toString(resultSet));
     connection.close();
   }
+
+  /** Unit test for {@link Lattice#getRowCount(double, List)}. */
+  @Test public void testColumnCount() {
+    assertThat(Lattice.getRowCount(10, 2, 3), within(5.03D, 0.01D));
+    assertThat(Lattice.getRowCount(10, 9, 8), within(9.4D, 0.01D));
+    assertThat(Lattice.getRowCount(100, 9, 8), within(54.2D, 0.1D));
+    assertThat(Lattice.getRowCount(1000, 9, 8), within(72D, 0.01D));
+    assertThat(Lattice.getRowCount(1000, 1, 1), is(1D));
+    assertThat(Lattice.getRowCount(1, 3, 5), within(1D, 0.01D));
+    assertThat(Lattice.getRowCount(1, 3, 5, 13, 4831), within(1D, 0.01D));
+  }
 }
 
 // End LatticeTest.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/Matchers.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/Matchers.java b/core/src/test/java/org/apache/calcite/test/Matchers.java
index fc9e0fd..08733a2 100644
--- a/core/src/test/java/org/apache/calcite/test/Matchers.java
+++ b/core/src/test/java/org/apache/calcite/test/Matchers.java
@@ -16,10 +16,16 @@
  */
 package org.apache.calcite.test;
 
+import org.apache.calcite.util.Util;
+
+import com.google.common.base.Functions;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 
+import org.hamcrest.BaseMatcher;
 import org.hamcrest.CustomTypeSafeMatcher;
 import org.hamcrest.Description;
+import org.hamcrest.Factory;
 import org.hamcrest.Matcher;
 
 import java.sql.ResultSet;
@@ -79,6 +85,81 @@ public class Matchers {
       }
     };
   }
+
+  public static <E extends Comparable> Matcher<Iterable<E>> equalsUnordered(
+      E... lines) {
+    final List<String> expectedList =
+        Lists.newArrayList(toStringList(Arrays.asList(lines)));
+    Collections.sort(expectedList);
+    final String description = Util.lines(expectedList);
+    return new CustomTypeSafeMatcher<Iterable<E>>(description) {
+      @Override protected void describeMismatchSafely(Iterable<E> actuals,
+          Description description) {
+        final List<String> actualList =
+            Lists.newArrayList(toStringList(actuals));
+        Collections.sort(actualList);
+        description.appendText("was ")
+            .appendValue(Util.lines(actualList));
+      }
+
+      protected boolean matchesSafely(Iterable<E> actuals) {
+        final List<String> actualList =
+            Lists.newArrayList(toStringList(actuals));
+        Collections.sort(actualList);
+        return actualList.equals(expectedList);
+      }
+    };
+  }
+
+  private static <E> Iterable<String> toStringList(Iterable<E> items) {
+    return Iterables.transform(items, Functions.toStringFunction());
+  }
+
+  /**
+   * Creates a matcher that matches when the examined object is within
+   * {@code epsilon} of the specified <code>operand</code>.
+   */
+  @Factory
+  public static <T extends Number> Matcher<T> within(T value, double epsilon) {
+    return new IsWithin<T>(value, epsilon);
+  }
+
+  /**
+   * Is the numeric value within a given difference another value?
+   *
+   * @param <T> Value type
+   */
+  public static class IsWithin<T extends Number> extends BaseMatcher<T> {
+    private final T expectedValue;
+    private final double epsilon;
+
+    public IsWithin(T expectedValue, double epsilon) {
+      this.expectedValue = expectedValue;
+      this.epsilon = epsilon;
+    }
+
+    public boolean matches(Object actualValue) {
+      return areEqual(actualValue, expectedValue, epsilon);
+    }
+
+    public void describeTo(Description description) {
+      description.appendValue(expectedValue + " +/-" + epsilon);
+    }
+
+    private static boolean areEqual(Object actual, Number expected,
+        double epsilon) {
+      if (actual == null) {
+        return expected == null;
+      }
+      if (actual.equals(expected)) {
+        return true;
+      }
+      final double a = ((Number) actual).doubleValue();
+      final double min = expected.doubleValue() - epsilon;
+      final double max = expected.doubleValue() + epsilon;
+      return min <= a && a <= max;
+    }
+  }
 }
 
 // End Matchers.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java b/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
index d37c223..8a6a9da 100644
--- a/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
+++ b/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
@@ -18,9 +18,13 @@ package org.apache.calcite.util;
 
 import org.apache.calcite.test.CalciteAssert;
 
+import com.google.common.base.Function;
+
+import org.junit.Assume;
 import org.junit.Test;
 
 import java.util.AbstractList;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -28,7 +32,10 @@ import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
 
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 /**
@@ -133,6 +140,13 @@ public class PartiallyOrderedSetTest {
 
     // "bcd" is child of "abcd" and parent of ""
     final String bcd = "'bcd'";
+    assertEquals("['abcd']", poset.getParents(bcd, true).toString());
+    assertThat(poset.getParents(bcd, false), nullValue());
+    assertThat(poset.getParents(bcd), nullValue());
+    assertEquals("['']", poset.getChildren(bcd, true).toString());
+    assertThat(poset.getChildren(bcd, false), nullValue());
+    assertThat(poset.getChildren(bcd), nullValue());
+
     poset.add(bcd);
     printValidate(poset);
     assertTrue(poset.isValid(false));
@@ -210,6 +224,66 @@ public class PartiallyOrderedSetTest {
     printValidate(poset);
   }
 
+  @Test public void testPosetBitsLarge() {
+    final PartiallyOrderedSet<Integer> poset =
+        new PartiallyOrderedSet<>(IS_BIT_SUPERSET);
+    checkPosetBitsLarge(poset, 30000, 2921, 164782);
+  }
+
+  @Test public void testPosetBitsLarge2() {
+    Assume.assumeTrue("too slow to run every day", CalciteAssert.ENABLE_SLOW);
+    final int n = 30000;
+    final PartiallyOrderedSet<Integer> poset =
+        new PartiallyOrderedSet<>(IS_BIT_SUPERSET,
+          new Function<Integer, Iterable<Integer>>() {
+            public Iterable<Integer> apply(Integer input) {
+              final int i = input;
+              int r = i; // bits not yet cleared
+              final List<Integer> list = new ArrayList<>();
+              for (int z = 1; r != 0; z <<= 1) {
+                if ((i & z) != 0) {
+                  list.add(i ^ z);
+                  r ^= z;
+                }
+              }
+              return list;
+            }
+          },
+          new Function<Integer, Iterable<Integer>>() {
+            public Iterable<Integer> apply(Integer input) {
+              final int i = input;
+              final List<Integer> list = new ArrayList<>();
+              for (int z = 1; z <= n; z <<= 1) {
+                if ((i & z) == 0) {
+                  list.add(i | z);
+                }
+              }
+              return list;
+            }
+          });
+    checkPosetBitsLarge(poset, n, 2921, 11961);
+  }
+
+  void checkPosetBitsLarge(PartiallyOrderedSet<Integer> poset, int n,
+      int expectedSize, int expectedParentCount) {
+    final Random random = new Random(1);
+    int count = 0;
+    int parentCount = 0;
+    for (int i = 0; i < n; i++) {
+      if (random.nextInt(10) == 0) {
+        if (poset.add(random.nextInt(n * 2))) {
+          ++count;
+        }
+      }
+      final List<Integer> parents =
+          poset.getParents(random.nextInt(n * 2), true);
+      parentCount += parents.size();
+    }
+    assertThat(poset.size(), is(count));
+    assertThat(poset.size(), is(expectedSize));
+    assertThat(parentCount, is(expectedParentCount));
+  }
+
   @Test public void testPosetBitsRemoveParent() {
     final PartiallyOrderedSet<Integer> poset =
         new PartiallyOrderedSet<Integer>(IS_BIT_SUPERSET);
@@ -223,9 +297,6 @@ public class PartiallyOrderedSetTest {
   }
 
   @Test public void testDivisorPoset() {
-    if (!CalciteAssert.ENABLE_SLOW) {
-      return;
-    }
     PartiallyOrderedSet<Integer> integers =
         new PartiallyOrderedSet<Integer>(IS_DIVISOR, range(1, 1000));
     assertEquals(
@@ -405,6 +476,7 @@ public class PartiallyOrderedSetTest {
         expected,
         new TreeSet<String>(ss).toString());
   }
+
 }
 
 // End PartiallyOrderedSetTest.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/util/TestUtil.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/TestUtil.java b/core/src/test/java/org/apache/calcite/util/TestUtil.java
index 5a283d2..b530d3b 100644
--- a/core/src/test/java/org/apache/calcite/util/TestUtil.java
+++ b/core/src/test/java/org/apache/calcite/util/TestUtil.java
@@ -34,6 +34,9 @@ public abstract class TestUtil {
   private static final String LINE_BREAK =
       "\\\\n\"" + Util.LINE_SEPARATOR + " + \"";
 
+  private static final String JAVA_VERSION =
+      System.getProperties().getProperty("java.version");
+
   //~ Methods ----------------------------------------------------------------
 
   public static void assertEqualsVerbose(
@@ -190,6 +193,20 @@ public abstract class TestUtil {
         .replaceAll("\\[", "\\\\[")
         .replaceAll("\\]", "\\\\]");
   }
+
+  /** Returns the Java major version: 7 for JDK 1.7, 8 for JDK 8, 10 for
+   * JDK 10, etc. */
+  public static int getJavaMajorVersion() {
+    if (JAVA_VERSION.startsWith("1.7")) {
+      return 7;
+    } else if (JAVA_VERSION.startsWith("1.8")) {
+      return 8;
+    } else if (JAVA_VERSION.startsWith("1.9")) {
+      return 9;
+    } else {
+      return 10;
+    }
+  }
 }
 
 // End TestUtil.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
----------------------------------------------------------------------
diff --git a/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java b/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
index 7dee05d..eaf9c91 100644
--- a/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
+++ b/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
@@ -19,6 +19,7 @@ package org.apache.calcite.adapter.tpch;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.util.TestUtil;
 import org.apache.calcite.util.Util;
 
 import com.google.common.base.Function;
@@ -40,11 +41,8 @@ import static org.junit.Assert.assertThat;
  * if {@code -Dcalcite.test.slow} is specified on the command-line.
  * (See {@link org.apache.calcite.test.CalciteAssert#ENABLE_SLOW}.)</p> */
 public class TpchTest {
-  public static final String JAVA_VERSION =
-      System.getProperties().getProperty("java.version");
-
   public static final boolean ENABLE =
-      CalciteAssert.ENABLE_SLOW && JAVA_VERSION.compareTo("1.7") >= 0;
+      CalciteAssert.ENABLE_SLOW && TestUtil.getJavaMajorVersion() >= 7;
 
   private static String schema(String name, String scaleFactor) {
     return "     {\n"

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1feec71..9ea9ecc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -129,6 +129,7 @@ limitations under the License.
     <sqlline.version>1.3.0</sqlline.version>
     <xalan.version>2.7.1</xalan.version>
     <xerces.version>2.9.1</xerces.version>
+    <sketches.version>0.9.0</sketches.version>
   </properties>
 
   <issueManagement>
@@ -259,6 +260,11 @@ limitations under the License.
         <version>${guava.version}</version>
       </dependency>
       <dependency>
+        <groupId>com.h2database</groupId>
+        <artifactId>h2</artifactId>
+        <version>${h2.version}</version>
+      </dependency>
+      <dependency>
         <groupId>com.joestelmach</groupId>
         <artifactId>natty</artifactId>
         <version>${natty.version}</version>
@@ -269,9 +275,9 @@ limitations under the License.
         <version>${oracle-jdbc6-driver.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.h2database</groupId>
-        <artifactId>h2</artifactId>
-        <version>${h2.version}</version>
+        <groupId>com.yahoo.datasketches</groupId>
+        <artifactId>sketches-core</artifactId>
+        <version>${sketches.version}</version>
       </dependency>
       <dependency>
         <groupId>javax.servlet</groupId>


Mime
View raw message