Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 5D1C310B0E for ; Tue, 3 Dec 2013 18:22:43 +0000 (UTC) Received: (qmail 78216 invoked by uid 500); 3 Dec 2013 18:22:43 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 78209 invoked by uid 99); 3 Dec 2013 18:22:43 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 03 Dec 2013 18:22:43 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 03 Dec 2013 18:22:38 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 8B83A23888E7; Tue, 3 Dec 2013 18:22:15 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1547511 - in /lucene/dev/branches/lucene5339/lucene/facet/src: java/org/apache/lucene/facet/ java/org/apache/lucene/facet/taxonomy/ java/org/apache/lucene/facet/taxonomy/writercache/ test/org/apache/lucene/facet/taxonomy/ test/org/apache/l... Date: Tue, 03 Dec 2013 18:22:15 -0000 To: commits@lucene.apache.org From: mikemccand@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20131203182215.8B83A23888E7@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: mikemccand Date: Tue Dec 3 18:22:14 2013 New Revision: 1547511 URL: http://svn.apache.org/r1547511 Log: LUCENE-5339: small opto for range facets, and factor out base class; put longHashCode back Added: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java (with props) Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FacetLabel.java lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/NameHashIntCacheLRU.java lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java?rev=1547511&r1=1547510&r2=1547511&view=diff ============================================================================== --- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java (original) +++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java Tue Dec 3 18:22:14 2013 @@ -31,8 +31,8 @@ import org.apache.lucene.util.Bits; /** Represents a range over double values. */ public final class DoubleRange extends Range { - private final double minIncl; - private final double maxIncl; + final double minIncl; + final double maxIncl; public final double min; public final double max; Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java?rev=1547511&r1=1547510&r2=1547511&view=diff ============================================================================== --- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java (original) +++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java Tue Dec 3 18:22:14 2013 @@ -45,11 +45,7 @@ import org.apache.lucene.queries.functio * pass just a the field name). * * @lucene.experimental */ -public class DoubleRangeFacetCounts extends Facets { - private final DoubleRange[] ranges; - private final int[] counts; - private final String field; - private int totCount; +public class DoubleRangeFacetCounts extends RangeFacetCounts { /** Create {@code RangeFacetCounts}, using {@link * DoubleFieldSource} from the specified field. */ @@ -60,14 +56,22 @@ public class DoubleRangeFacetCounts exte /** Create {@code RangeFacetCounts}, using the provided * {@link ValueSource}. */ public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, DoubleRange... ranges) throws IOException { - this.ranges = ranges; - this.field = field; - counts = new int[ranges.length]; + super(field, ranges); count(valueSource, hits.getMatchingDocs()); } private void count(ValueSource valueSource, List matchingDocs) throws IOException { + DoubleRange[] ranges = (DoubleRange[]) this.ranges; + + // Compute min & max over all ranges: + double minIncl = Double.POSITIVE_INFINITY; + double maxIncl = Double.NEGATIVE_INFINITY; + for(DoubleRange range : ranges) { + minIncl = Math.min(minIncl, range.minIncl); + maxIncl = Math.max(maxIncl, range.maxIncl); + } + // TODO: test if this is faster (in the past it was // faster to do MatchingDocs on the inside) ... see // patches on LUCENE-4965): @@ -81,6 +85,10 @@ public class DoubleRangeFacetCounts exte if (fv.exists(doc)) { double v = fv.doubleVal(doc); + if (v < minIncl || v > maxIncl) { + doc++; + continue; + } // TODO: if all ranges are non-overlapping, we // should instead do a bin-search up front @@ -98,35 +106,4 @@ public class DoubleRangeFacetCounts exte } } } - - // nocommit all args are ... unused ... this doesn't "fit" - // very well: - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) { - if (dim.equals(field) == false) { - throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\""); - } - if (path.length != 0) { - throw new IllegalArgumentException("path.length should be 0"); - } - LabelAndValue[] labelValues = new LabelAndValue[counts.length]; - for(int i=0;i getAllDims(int topN) throws IOException { - return Collections.singletonList(getTopChildren(topN, null)); - } } Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java?rev=1547511&r1=1547510&r2=1547511&view=diff ============================================================================== --- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java (original) +++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java Tue Dec 3 18:22:14 2013 @@ -31,8 +31,8 @@ import org.apache.lucene.util.Bits; /** Represents a range over long values. */ public final class LongRange extends Range { - private final long minIncl; - private final long maxIncl; + final long minIncl; + final long maxIncl; public final long min; public final long max; Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java?rev=1547511&r1=1547510&r2=1547511&view=diff ============================================================================== --- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java (original) +++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java Tue Dec 3 18:22:14 2013 @@ -35,13 +35,9 @@ import org.apache.lucene.queries.functio * distance dimension like "< 1 km", "< 2 km", etc.). * * @lucene.experimental */ -public class LongRangeFacetCounts extends Facets { - private final LongRange[] ranges; - private final int[] counts; - private final String field; - private int totCount; +public class LongRangeFacetCounts extends RangeFacetCounts { - /** Create {@code RangeFacetCounts}, using {@link + /** Create {@code LongRangeFacetCounts}, using {@link * LongFieldSource} from the specified field. */ public LongRangeFacetCounts(String field, FacetsCollector hits, LongRange... ranges) throws IOException { this(field, new LongFieldSource(field), hits, ranges); @@ -50,14 +46,22 @@ public class LongRangeFacetCounts extend /** Create {@code RangeFacetCounts}, using the provided * {@link ValueSource}. */ public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, LongRange... ranges) throws IOException { - this.ranges = ranges; - this.field = field; - counts = new int[ranges.length]; + super(field, ranges); count(valueSource, hits.getMatchingDocs()); } private void count(ValueSource valueSource, List matchingDocs) throws IOException { + LongRange[] ranges = (LongRange[]) this.ranges; + + // Compute min & max over all ranges: + long minIncl = Long.MAX_VALUE; + long maxIncl = Long.MIN_VALUE; + for(LongRange range : ranges) { + minIncl = Math.min(minIncl, range.minIncl); + maxIncl = Math.max(maxIncl, range.maxIncl); + } + // TODO: test if this is faster (in the past it was // faster to do MatchingDocs on the inside) ... see // patches on LUCENE-4965): @@ -71,6 +75,10 @@ public class LongRangeFacetCounts extend if (fv.exists(doc)) { long v = fv.longVal(doc); + if (v < minIncl || v > maxIncl) { + doc++; + continue; + } // TODO: if all ranges are non-overlapping, we // should instead do a bin-search up front @@ -88,35 +96,4 @@ public class LongRangeFacetCounts extend } } } - - // nocommit all args are ... unused ... this doesn't "fit" - // very well: - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) { - if (dim.equals(field) == false) { - throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\""); - } - if (path.length != 0) { - throw new IllegalArgumentException("path.length should be 0"); - } - LabelAndValue[] labelValues = new LabelAndValue[counts.length]; - for(int i=0;i getAllDims(int topN) throws IOException { - return Collections.singletonList(getTopChildren(topN, null)); - } } Added: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java?rev=1547511&view=auto ============================================================================== --- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java (added) +++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java Tue Dec 3 18:22:14 2013 @@ -0,0 +1,70 @@ +package org.apache.lucene.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collections; +import java.util.List; + + +/** Base class for range faceting. + * + * @lucene.experimental */ +abstract class RangeFacetCounts extends Facets { + protected final Range[] ranges; + protected final int[] counts; + protected final String field; + protected int totCount; + + /** Create {@code RangeFacetCounts}, using {@link + * LongFieldSource} from the specified field. */ + protected RangeFacetCounts(String field, Range[] ranges) throws IOException { + this.field = field; + this.ranges = ranges; + counts = new int[ranges.length]; + } + + // nocommit all args are ... unused ... this doesn't "fit" + // very well: + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) { + if (dim.equals(field) == false) { + throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\""); + } + if (path.length != 0) { + throw new IllegalArgumentException("path.length should be 0"); + } + LabelAndValue[] labelValues = new LabelAndValue[counts.length]; + for(int i=0;i getAllDims(int topN) throws IOException { + return Collections.singletonList(getTopChildren(topN, null)); + } +} Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FacetLabel.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FacetLabel.java?rev=1547511&r1=1547510&r2=1547511&view=diff ============================================================================== --- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FacetLabel.java (original) +++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FacetLabel.java Tue Dec 3 18:22:14 2013 @@ -41,9 +41,6 @@ public class FacetLabel implements Compa */ public final static int MAX_CATEGORY_PATH_LENGTH = (BYTE_BLOCK_SIZE - 2) / 4; - /** An empty {@link FacetLabel} */ - //public static final FacetLabel EMPTY = new FacetLabel(); - /** * The components of this {@link FacetLabel}. Note that this array may be * shared with other {@link FacetLabel} instances, e.g. as a result of @@ -114,8 +111,12 @@ public class FacetLabel implements Compa final int len = length < other.length ? length : other.length; for (int i = 0, j = 0; i < len; i++, j++) { int cmp = components[i].compareTo(other.components[j]); - if (cmp < 0) return -1; // this is 'before' - if (cmp > 0) return 1; // this is 'after' + if (cmp < 0) { + return -1; // this is 'before' + } + if (cmp > 0) { + return 1; // this is 'after' + } } // one is a prefix of the other @@ -156,6 +157,23 @@ public class FacetLabel implements Compa return hash; } + /** Calculate a 64-bit hash function for this path. This + * is necessary for {@link NameHashIntCacheLRU} (the + * default cache impl for {@link + * LruTaxonomyWriterCache}) to reduce the chance of + * "silent but deadly" collisions. */ + public long longHashCode() { + if (length == 0) { + return 0; + } + + long hash = length; + for (int i = 0; i < length; i++) { + hash = hash * 65599 + components[i].hashCode(); + } + return hash; + } + /** Returns a sub-path of this path up to {@code length} components. */ public FacetLabel subpath(final int length) { if (length >= this.length || length < 0) { Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/NameHashIntCacheLRU.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/NameHashIntCacheLRU.java?rev=1547511&r1=1547510&r2=1547511&view=diff ============================================================================== --- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/NameHashIntCacheLRU.java (original) +++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/NameHashIntCacheLRU.java Tue Dec 3 18:22:14 2013 @@ -36,12 +36,12 @@ public class NameHashIntCacheLRU extends @Override Object key(FacetLabel name) { - return new Integer(name.hashCode()); + return new Long(name.longHashCode()); } @Override Object key(FacetLabel name, int prefixLen) { - return new Integer(name.subpath(prefixLen).hashCode()); + return new Long(name.subpath(prefixLen).longHashCode()); } } Modified: lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java?rev=1547511&r1=1547510&r2=1547511&view=diff ============================================================================== --- lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java (original) +++ lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java Tue Dec 3 18:22:14 2013 @@ -108,6 +108,13 @@ public class TestFacetLabel extends Face } @Test + public void testLongHashCode() { + assertEquals(new FacetLabel().longHashCode(), new FacetLabel().longHashCode()); + assertFalse(new FacetLabel().longHashCode() == new FacetLabel("hi").longHashCode()); + assertEquals(new FacetLabel("hello", "world").longHashCode(), new FacetLabel("hello", "world").longHashCode()); + } + + @Test public void testArrayConstructor() { FacetLabel p = new FacetLabel("hello", "world", "yo"); assertEquals(3, p.length); Modified: lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java?rev=1547511&r1=1547510&r2=1547511&view=diff ============================================================================== --- lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java (original) +++ lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java Tue Dec 3 18:22:14 2013 @@ -128,14 +128,23 @@ public class TestConcurrentFacetedIndexi for (Thread t : indexThreads) t.join(); DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(tw); - assertEquals("mismatch number of categories", values.size() + 1, tr.getSize()); // +1 for root category + // +1 for root category + if (values.size() + 1 != tr.getSize()) { + for(String value : values.keySet()) { + FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value)); + if (tr.getOrdinal(label) == -1) { + System.out.println("FAIL: path=" + label + " not recognized"); + } + } + fail("mismatch number of categories"); + } int[] parents = tr.getParallelTaxonomyArrays().parents(); for (String cat : values.keySet()) { FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat)); assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0); int level = cp.length; int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0) - FacetLabel path = new FacetLabel(); + FacetLabel path = null; for (int i = 0; i < level; i++) { path = cp.subpath(i + 1); int ord = tr.getOrdinal(path); Modified: lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java?rev=1547511&r1=1547510&r2=1547511&view=diff ============================================================================== --- lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (original) +++ lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java Tue Dec 3 18:22:14 2013 @@ -256,6 +256,9 @@ public class TestDirectoryTaxonomyWriter // this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too. cache = new LruTaxonomyWriterCache(ncats / 10); } + if (VERBOSE) { + System.out.println("TEST: use cache=" + cache); + } final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache); Thread[] addThreads = new Thread[atLeast(4)]; for (int z = 0; z < addThreads.length; z++) { @@ -291,7 +294,17 @@ public class TestDirectoryTaxonomyWriter tw.close(); DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir); - assertEquals("mismatch number of categories", values.size() + 1, dtr.getSize()); // +1 for root category + // +1 for root category + if (values.size() + 1 != dtr.getSize()) { + for(String value : values.keySet()) { + FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value)); + if (dtr.getOrdinal(label) == -1) { + System.out.println("FAIL: path=" + label + " not recognized"); + } + } + fail("mismatch number of categories"); + } + int[] parents = dtr.getParallelTaxonomyArrays().parents(); for (String cat : values.keySet()) { FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat)); @@ -306,9 +319,8 @@ public class TestDirectoryTaxonomyWriter parentOrd = ord; // next level should have this parent } } - dtr.close(); - - dir.close(); + + IOUtils.close(dtr, dir); } private long getEpoch(Directory taxoDir) throws IOException {