Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 8395F9C91 for ; Mon, 23 Jan 2012 18:34:44 +0000 (UTC) Received: (qmail 71572 invoked by uid 500); 23 Jan 2012 18:34:43 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 71558 invoked by uid 99); 23 Jan 2012 18:34:42 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 23 Jan 2012 18:34:42 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 23 Jan 2012 18:34:35 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 1456D23889CB; Mon, 23 Jan 2012 18:34:13 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1234932 [2/5] - in /lucene/dev/branches/solrcloud: ./ dev-tools/idea/lucene/contrib/ dev-tools/maven/ dev-tools/maven/solr/ dev-tools/maven/solr/contrib/analysis-extras/ dev-tools/maven/solr/contrib/clustering/ dev-tools/maven/solr/contrib... Date: Mon, 23 Jan 2012 18:34:08 -0000 To: commits@lucene.apache.org From: markrmiller@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120123183413.1456D23889CB@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1234932&r1=1234931&r2=1234932&view=diff ============================================================================== --- lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original) +++ lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Mon Jan 23 18:34:04 2012 @@ -695,12 +695,12 @@ public class TestIndexWriterExceptions e MockDirectoryWrapper dir = newDirectory(); { - final IndexWriter writer = new IndexWriter( - dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer). - setMaxBufferedDocs(-1). - setMergePolicy(newLogMergePolicy(10)) - ); + final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( + TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(-1) + .setMergePolicy( + random.nextBoolean() ? NoMergePolicy.COMPOUND_FILES + : NoMergePolicy.NO_COMPOUND_FILES)); + // don't use a merge policy here they depend on the DWPThreadPool and its max thread states etc. final int finalI = i; Thread[] threads = new Thread[NUM_THREAD]; Modified: lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/util/TestSentinelIntSet.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/util/TestSentinelIntSet.java?rev=1234932&r1=1234931&r2=1234932&view=diff ============================================================================== --- lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/util/TestSentinelIntSet.java (original) +++ lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/util/TestSentinelIntSet.java Mon Jan 23 18:34:04 2012 @@ -20,6 +20,8 @@ package org.apache.lucene.util; import org.junit.Test; +import java.util.HashSet; + /** * * @@ -45,4 +47,32 @@ public class TestSentinelIntSet extends assertEquals(20, set.size()); assertEquals(24, set.rehashCount); } + + + @Test + public void testRandom() throws Exception { + for (int i=0; i<10000; i++) { + int initSz = random.nextInt(20); + int num = random.nextInt(30); + int maxVal = (random.nextBoolean() ? random.nextInt(50) : random.nextInt(Integer.MAX_VALUE)) + 1; + + HashSet a = new HashSet(initSz); + SentinelIntSet b = new SentinelIntSet(initSz, -1); + + for (int j=0; j=0); + b.put(val); + + assertEquals(a.size(), b.size()); + } + + } + + } + } Modified: lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1234932&r1=1234931&r2=1234932&view=diff ============================================================================== --- lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java (original) +++ lucene/dev/branches/solrcloud/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java Mon Jan 23 18:34:04 2012 @@ -161,7 +161,7 @@ public class TestFSTs extends LuceneTest for(IntsRef term : terms2) { pairs.add(new FSTTester.InputOutput(term, NO_OUTPUT)); } - FST fst = new FSTTester(random, dir, inputMode, pairs, outputs).doTest(0, 0, false); + FST fst = new FSTTester(random, dir, inputMode, pairs, outputs, false).doTest(0, 0, false); assertNotNull(fst); assertEquals(22, fst.getNodeCount()); assertEquals(27, fst.getArcCount()); @@ -174,7 +174,7 @@ public class TestFSTs extends LuceneTest for(int idx=0;idx(terms2[idx], outputs.get(idx))); } - final FST fst = new FSTTester(random, dir, inputMode, pairs, outputs).doTest(0, 0, false); + final FST fst = new FSTTester(random, dir, inputMode, pairs, outputs, true).doTest(0, 0, false); assertNotNull(fst); assertEquals(22, fst.getNodeCount()); assertEquals(27, fst.getArcCount()); @@ -189,7 +189,7 @@ public class TestFSTs extends LuceneTest final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx)); pairs.add(new FSTTester.InputOutput(terms2[idx], output)); } - final FST fst = new FSTTester(random, dir, inputMode, pairs, outputs).doTest(0, 0, false); + final FST fst = new FSTTester(random, dir, inputMode, pairs, outputs, false).doTest(0, 0, false); assertNotNull(fst); assertEquals(24, fst.getNodeCount()); assertEquals(30, fst.getArcCount()); @@ -222,7 +222,7 @@ public class TestFSTs extends LuceneTest for(IntsRef term : terms) { pairs.add(new FSTTester.InputOutput(term, NO_OUTPUT)); } - new FSTTester(random, dir, inputMode, pairs, outputs).doTest(); + new FSTTester(random, dir, inputMode, pairs, outputs, false).doTest(); } // PositiveIntOutput (ord) @@ -232,12 +232,13 @@ public class TestFSTs extends LuceneTest for(int idx=0;idx(terms[idx], outputs.get(idx))); } - new FSTTester(random, dir, inputMode, pairs, outputs).doTest(); + new FSTTester(random, dir, inputMode, pairs, outputs, true).doTest(); } // PositiveIntOutput (random monotonically increasing positive number) { - final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean()); + final boolean doShare = random.nextBoolean(); + final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(doShare); final List> pairs = new ArrayList>(terms.length); long lastOutput = 0; for(int idx=0;idx(terms[idx], outputs.get(value))); } - new FSTTester(random, dir, inputMode, pairs, outputs).doTest(); + new FSTTester(random, dir, inputMode, pairs, outputs, doShare).doTest(); } // PositiveIntOutput (random positive number) @@ -255,7 +256,7 @@ public class TestFSTs extends LuceneTest for(int idx=0;idx(terms[idx], outputs.get(random.nextLong()) & Long.MAX_VALUE)); } - new FSTTester(random, dir, inputMode, pairs, outputs).doTest(); + new FSTTester(random, dir, inputMode, pairs, outputs, false).doTest(); } // Pair @@ -272,7 +273,7 @@ public class TestFSTs extends LuceneTest outputs.get(o1.get(idx), o2.get(value)))); } - new FSTTester>(random, dir, inputMode, pairs, outputs).doTest(); + new FSTTester>(random, dir, inputMode, pairs, outputs, false).doTest(); } // Sequence-of-bytes @@ -284,7 +285,7 @@ public class TestFSTs extends LuceneTest final BytesRef output = random.nextInt(30) == 17 ? NO_OUTPUT : new BytesRef(Integer.toString(idx)); pairs.add(new FSTTester.InputOutput(terms[idx], output)); } - new FSTTester(random, dir, inputMode, pairs, outputs).doTest(); + new FSTTester(random, dir, inputMode, pairs, outputs, false).doTest(); } // Sequence-of-ints @@ -300,7 +301,7 @@ public class TestFSTs extends LuceneTest } pairs.add(new FSTTester.InputOutput(terms[idx], output)); } - new FSTTester(random, dir, inputMode, pairs, outputs).doTest(); + new FSTTester(random, dir, inputMode, pairs, outputs, false).doTest(); } // Up to two positive ints, shared, generally but not @@ -330,7 +331,7 @@ public class TestFSTs extends LuceneTest } pairs.add(new FSTTester.InputOutput(terms[idx], output)); } - new FSTTester(random, dir, inputMode, pairs, outputs).doTest(); + new FSTTester(random, dir, inputMode, pairs, outputs, false).doTest(); } } @@ -341,13 +342,15 @@ public class TestFSTs extends LuceneTest final int inputMode; final Outputs outputs; final Directory dir; + final boolean doReverseLookup; - public FSTTester(Random random, Directory dir, int inputMode, List> pairs, Outputs outputs) { + public FSTTester(Random random, Directory dir, int inputMode, List> pairs, Outputs outputs, boolean doReverseLookup) { this.random = random; this.dir = dir; this.inputMode = inputMode; this.pairs = pairs; this.outputs = outputs; + this.doReverseLookup = doReverseLookup; } private static class InputOutput implements Comparable> { @@ -525,6 +528,26 @@ public class TestFSTs extends LuceneTest // FST is complete private void verifyUnPruned(int inputMode, FST fst) throws IOException { + final FST fstLong; + final Set validOutputs; + long minLong = Long.MAX_VALUE; + long maxLong = Long.MIN_VALUE; + + if (doReverseLookup) { + @SuppressWarnings("unchecked") FST fstLong0 = (FST) fst; + fstLong = fstLong0; + validOutputs = new HashSet(); + for(InputOutput pair: pairs) { + Long output = (Long) pair.output; + maxLong = Math.max(maxLong, output); + minLong = Math.min(minLong, output); + validOutputs.add(output); + } + } else { + fstLong = null; + validOutputs = null; + } + if (pairs.size() == 0) { assertNull(fst); return; @@ -542,7 +565,7 @@ public class TestFSTs extends LuceneTest assertNotNull(fst); - // visit valid paris in order -- make sure all words + // visit valid pairs in order -- make sure all words // are accepted, and FSTEnum's next() steps through // them correctly if (VERBOSE) { @@ -556,7 +579,6 @@ public class TestFSTs extends LuceneTest System.out.println("TEST: check term=" + inputToString(inputMode, term) + " output=" + fst.outputs.outputToString(pair.output)); } Object output = run(fst, term, null); - assertNotNull("term " + inputToString(inputMode, term) + " is not accepted", output); assertEquals(pair.output, output); @@ -574,6 +596,20 @@ public class TestFSTs extends LuceneTest termsMap.put(pair.input, pair.output); } + if (doReverseLookup && maxLong > minLong) { + // Do random lookups so we test null (output doesn't + // exist) case: + assertNull(Util.getByOutput(fstLong, minLong-7)); + assertNull(Util.getByOutput(fstLong, maxLong+7)); + + final int num = atLeast(100); + for(int iter=0;iter> ent : prefixes.entrySet()) { - System.out.println(" " + inputToString(inputMode, ent.getKey()) + ": isLeaf=" + ent.getValue().isLeaf + " isFinal=" + ent.getValue().isFinal); + System.out.println(" " + inputToString(inputMode, ent.getKey(), false) + ": isLeaf=" + ent.getValue().isLeaf + " isFinal=" + ent.getValue().isFinal); if (ent.getValue().isFinal) { System.out.println(" finalOutput=" + outputs.outputToString(ent.getValue().finalOutput)); } @@ -951,7 +995,7 @@ public class TestFSTs extends LuceneTest //testRandomWords(20, 100); } - private String inputModeToString(int mode) { + String inputModeToString(int mode) { if (mode == 0) { return "utf8"; } else { @@ -995,7 +1039,7 @@ public class TestFSTs extends LuceneTest testRandomWords(_TestUtil.nextInt(random, 50000, 60000), 1); } - private static String inputToString(int inputMode, IntsRef term) { + static String inputToString(int inputMode, IntsRef term) { return inputToString(inputMode, term, true); } @@ -1011,6 +1055,50 @@ public class TestFSTs extends LuceneTest } } + // NOTE: this test shows a case where our current builder + // fails to produce minimal FST: + /* + public void test3() throws Exception { + final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true); + Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs); + IntsRef scratchIntsRef = new IntsRef(); + builder.add(Util.toIntsRef(new BytesRef("aa$"), scratchIntsRef), outputs.get(0)); + builder.add(Util.toIntsRef(new BytesRef("aab$"), scratchIntsRef), 1L); + builder.add(Util.toIntsRef(new BytesRef("bbb$"), scratchIntsRef), 2L); + final FST fst = builder.finish(); + //System.out.println("NODES " + fst.getNodeCount() + " ARCS " + fst.getArcCount()); + // NOTE: we produce 7 nodes today + assertEquals(6, fst.getNodeCount()); + // NOTE: we produce 8 arcs today + assertEquals(7, fst.getNodeCount()); + //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + //Util.toDot(fst, w, false, false); + //w.close(); + } + */ + + // NOTE: this test shows a case where our current builder + // fails to produce minimal FST: + /* + public void test4() throws Exception { + final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); + Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, outputs); + IntsRef scratchIntsRef = new IntsRef(); + builder.add(Util.toIntsRef(new BytesRef("aa$"), scratchIntsRef), outputs.getNoOutput()); + builder.add(Util.toIntsRef(new BytesRef("aab$"), scratchIntsRef), new BytesRef("1")); + builder.add(Util.toIntsRef(new BytesRef("bbb$"), scratchIntsRef), new BytesRef("11")); + final FST fst = builder.finish(); + //System.out.println("NODES " + fst.getNodeCount() + " ARCS " + fst.getArcCount()); + // NOTE: we produce 7 nodes today + assertEquals(6, fst.getNodeCount()); + // NOTE: we produce 8 arcs today + assertEquals(7, fst.getNodeCount()); + //Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); + //Util.toDot(fst, w, false, false); + //w.close(); + } + */ + // Build FST for all unique terms in the test line docs // file, up until a time limit public void testRealTerms() throws Exception { @@ -1422,6 +1510,14 @@ public class TestFSTs extends LuceneTest assertNotNull(seekResult); assertEquals(b, seekResult.input); assertEquals(42, (long) seekResult.output); + + assertEquals(Util.toIntsRef(new BytesRef("c"), new IntsRef()), + Util.getByOutput(fst, 13824324872317238L)); + assertNull(Util.getByOutput(fst, 47)); + assertEquals(Util.toIntsRef(new BytesRef("b"), new IntsRef()), + Util.getByOutput(fst, 42)); + assertEquals(Util.toIntsRef(new BytesRef("a"), new IntsRef()), + Util.getByOutput(fst, 17)); } public void testPrimaryKeys() throws Exception { Modified: lucene/dev/branches/solrcloud/modules/analysis/common/build.xml URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/build.xml?rev=1234932&r1=1234931&r2=1234932&view=diff ============================================================================== --- lucene/dev/branches/solrcloud/modules/analysis/common/build.xml (original) +++ lucene/dev/branches/solrcloud/modules/analysis/common/build.xml Mon Jan 23 18:34:04 2012 @@ -31,14 +31,38 @@ + jflex-StandardAnalyzer,jflex-UAX29URLEmailTokenizer, + jflex-wiki-tokenizer,jflex-HTMLStripCharFilter"/> - + + + + + + + + + + + + + + + + Modified: lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java?rev=1234932&r1=1234931&r2=1234932&view=diff ============================================================================== --- lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java (original) +++ lucene/dev/branches/solrcloud/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/BaseCharFilter.java Mon Jan 23 18:34:04 2012 @@ -20,6 +20,8 @@ package org.apache.lucene.analysis.charf import org.apache.lucene.analysis.CharStream; import org.apache.lucene.util.ArrayUtil; +import java.util.Arrays; + /** * Base utility class for implementing a {@link CharFilter}. * You subclass this, and then record mappings by calling @@ -71,6 +73,19 @@ public abstract class BaseCharFilter ext 0 : diffs[size-1]; } + /** + *

+ * Adds an offset correction mapping at the given output stream offset. + *

+ *

+ * Assumption: the offset given with each successive call to this method + * will not be smaller than the offset given at the previous invocation. + *

+ * + * @param off The output stream offset at which to apply the correction + * @param cumulativeDiff The input offset is given by adding this + * to the output offset + */ protected void addOffCorrectMap(int off, int cumulativeDiff) { if (offsets == null) { offsets = new int[64]; @@ -80,7 +95,15 @@ public abstract class BaseCharFilter ext diffs = ArrayUtil.grow(diffs); } - offsets[size] = off; - diffs[size++] = cumulativeDiff; + assert (size == 0 || off >= offsets[size]) + : "Offset #" + size + "(" + off + ") is less than the last recorded offset " + + offsets[size] + "\n" + Arrays.toString(offsets) + "\n" + Arrays.toString(diffs); + + if (size == 0 || off != offsets[size - 1]) { + offsets[size] = off; + diffs[size++] = cumulativeDiff; + } else { // Overwrite the diff at the last recorded offset + diffs[size - 1] = cumulativeDiff; + } } }