lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1640099 [1/4] - in /lucene/dev/branches/lucene6005: ./ dev-tools/ dev-tools/scripts/ lucene/ lucene/analysis/ lucene/analysis/common/ lucene/analysis/common/src/java/org/apache/lucene/analysis/sr/ lucene/analysis/common/src/resources/META-...
Date Mon, 17 Nov 2014 08:47:36 GMT
Author: mikemccand
Date: Mon Nov 17 08:47:34 2014
New Revision: 1640099

URL: http://svn.apache.org/r1640099
Log:
LUCENE-6005: merge trunk

Added:
    lucene/dev/branches/lucene6005/lucene/analysis/common/src/java/org/apache/lucene/analysis/sr/
      - copied from r1640053, lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/sr/
    lucene/dev/branches/lucene6005/lucene/analysis/common/src/test/org/apache/lucene/analysis/sr/
      - copied from r1640053, lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/sr/
    lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DaitchMokotoffSoundexFilter.java
      - copied unchanged from r1640053, lucene/dev/trunk/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DaitchMokotoffSoundexFilter.java
    lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DaitchMokotoffSoundexFilterFactory.java
      - copied unchanged from r1640053, lucene/dev/trunk/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DaitchMokotoffSoundexFilterFactory.java
    lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDaitchMokotoffSoundexFilter.java
      - copied unchanged from r1640053, lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDaitchMokotoffSoundexFilter.java
    lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDaitchMokotoffSoundexFilterFactory.java
      - copied unchanged from r1640053, lucene/dev/trunk/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestDaitchMokotoffSoundexFilterFactory.java
    lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/CachingNaiveBayesClassifierTest.java
      - copied unchanged from r1640053, lucene/dev/trunk/lucene/classification/src/test/org/apache/lucene/classification/CachingNaiveBayesClassifierTest.java
    lucene/dev/branches/lucene6005/lucene/licenses/commons-codec-1.10.jar.sha1
      - copied unchanged from r1640053, lucene/dev/trunk/lucene/licenses/commons-codec-1.10.jar.sha1
    lucene/dev/branches/lucene6005/solr/licenses/commons-codec-1.10.jar.sha1
      - copied unchanged from r1640053, lucene/dev/trunk/solr/licenses/commons-codec-1.10.jar.sha1
Removed:
    lucene/dev/branches/lucene6005/lucene/licenses/commons-codec-1.9.jar.sha1
    lucene/dev/branches/lucene6005/solr/licenses/commons-codec-1.9.jar.sha1
Modified:
    lucene/dev/branches/lucene6005/   (props changed)
    lucene/dev/branches/lucene6005/dev-tools/   (props changed)
    lucene/dev/branches/lucene6005/dev-tools/scripts/createPatch.py
    lucene/dev/branches/lucene6005/lucene/   (props changed)
    lucene/dev/branches/lucene6005/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene6005/lucene/analysis/   (props changed)
    lucene/dev/branches/lucene6005/lucene/analysis/common/   (props changed)
    lucene/dev/branches/lucene6005/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
    lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
    lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java
    lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
    lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java
    lucene/dev/branches/lucene6005/lucene/benchmark/   (props changed)
    lucene/dev/branches/lucene6005/lucene/benchmark/build.xml
    lucene/dev/branches/lucene6005/lucene/classification/   (props changed)
    lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java
    lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
    lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/package.html
    lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java
    lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java
    lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
    lucene/dev/branches/lucene6005/lucene/core/   (props changed)
    lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
    lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/Sort.java
    lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
    lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
    lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
    lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java
    lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java
    lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
    lucene/dev/branches/lucene6005/lucene/facet/   (props changed)
    lucene/dev/branches/lucene6005/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
    lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java
    lucene/dev/branches/lucene6005/lucene/highlighter/   (props changed)
    lucene/dev/branches/lucene6005/lucene/highlighter/build.xml
    lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    lucene/dev/branches/lucene6005/lucene/ivy-versions.properties   (contents, props changed)
    lucene/dev/branches/lucene6005/lucene/join/   (props changed)
    lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
    lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
    lucene/dev/branches/lucene6005/lucene/licenses/   (props changed)
    lucene/dev/branches/lucene6005/lucene/suggest/   (props changed)
    lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
    lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
    lucene/dev/branches/lucene6005/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
    lucene/dev/branches/lucene6005/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
    lucene/dev/branches/lucene6005/lucene/test-framework/   (props changed)
    lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
    lucene/dev/branches/lucene6005/solr/   (props changed)
    lucene/dev/branches/lucene6005/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene6005/solr/bin/   (props changed)
    lucene/dev/branches/lucene6005/solr/bin/solr
    lucene/dev/branches/lucene6005/solr/bin/solr.cmd   (contents, props changed)
    lucene/dev/branches/lucene6005/solr/bin/solr.in.cmd   (contents, props changed)
    lucene/dev/branches/lucene6005/solr/bin/solr.in.sh
    lucene/dev/branches/lucene6005/solr/build.xml   (contents, props changed)
    lucene/dev/branches/lucene6005/solr/core/   (props changed)
    lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/core/SolrCore.java
    lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java
    lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
    lucene/dev/branches/lucene6005/solr/core/src/java/org/apache/solr/spelling/suggest/fst/BlendedInfixLookupFactory.java
    lucene/dev/branches/lucene6005/solr/licenses/   (props changed)
    lucene/dev/branches/lucene6005/solr/webapp/   (props changed)
    lucene/dev/branches/lucene6005/solr/webapp/web/css/styles/files.css
    lucene/dev/branches/lucene6005/solr/webapp/web/js/scripts/files.js
    lucene/dev/branches/lucene6005/solr/webapp/web/js/scripts/index.js
    lucene/dev/branches/lucene6005/solr/webapp/web/js/scripts/plugins.js

Modified: lucene/dev/branches/lucene6005/dev-tools/scripts/createPatch.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/dev-tools/scripts/createPatch.py?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/dev-tools/scripts/createPatch.py (original)
+++ lucene/dev/branches/lucene6005/dev-tools/scripts/createPatch.py Mon Nov 17 08:47:34 2014
@@ -30,10 +30,11 @@ import os
 import subprocess
 import sys
 
-def make_filter_func(src_dir):
-  if os.path.exists(os.path.join(src_dir, '.git')):
+def make_filter_func(src_root, src_dir):
+  git_root = os.path.join(src_root, '.git')
+  if os.path.exists(git_root):
     def git_filter(filename):
-      rc = subprocess.call('git --git-dir=%s check-ignore %s' % (src_dir, filename), shell=True)
+      rc = subprocess.call('git --git-dir=%s check-ignore %s' % (git_root, filename), shell=True, stdout=subprocess.DEVNULL)
       return rc == 0
     return git_filter
 
@@ -89,7 +90,7 @@ def run_diff(from_dir, to_dir, skip_whit
     flags += 'bBw'
 
   args = ['diff', flags]
-  for ignore in ('.svn', '.git', 'build', '.caches'):
+  for ignore in ('.svn', '.git', 'build', '.caches', '.idea', 'idea-build'):
     args.append('-x')
     args.append(ignore)
   args.append(from_dir)
@@ -97,6 +98,13 @@ def run_diff(from_dir, to_dir, skip_whit
 
   return subprocess.Popen(args, shell=False, stdout=subprocess.PIPE)
 
+def find_root(path):
+  relative = []
+  while not os.path.exists(os.path.join(path, 'lucene', 'CHANGES.txt')):
+    path, base = os.path.split(path)
+    relative.insert(0, base)
+  return path, '' if not relative else os.path.normpath(os.path.join(*relative))
+
 def parse_config():
   parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
   parser.add_argument('--skip-whitespace', action='store_true', default=False,
@@ -107,20 +115,24 @@ def parse_config():
 
   if not os.path.isdir(c.from_dir):
     parser.error('\'from\' path %s is not a valid directory' % c.from_dir)
-  if not os.path.exists(os.path.join(c.from_dir, 'lucene', 'CHANGES.txt')):
-    parser.error('\'from\' path %s is not a valid lucene/solr checkout' % c.from_dir)
+  (c.from_root, from_relative) = find_root(c.from_dir)
+  if c.from_root is None:
+    parser.error('\'from\' path %s is not relative to a lucene/solr checkout' % c.from_dir)
   if not os.path.isdir(c.to_dir):
     parser.error('\'to\' path %s is not a valid directory' % c.to_dir)
-  if not os.path.exists(os.path.join(c.to_dir, 'lucene', 'CHANGES.txt')):
-    parser.error('\'to\' path %s is not a valid lucene/solr checkout' % c.to_dir)
-
+  (c.to_root, to_relative) = find_root(c.to_dir)
+  if c.to_root is None:
+    parser.error('\'to\' path %s is not relative to a lucene/solr checkout' % c.to_dir)
+  if from_relative != to_relative:
+    parser.error('\'from\' and \'to\' path are not equivalent relative paths within their'
+                 ' checkouts: %r != %r' % (from_relative, to_relative))
   return c
 
 def main():
   c = parse_config()
 
   p = run_diff(c.from_dir, c.to_dir, c.skip_whitespace)
-  should_filter = make_filter_func(c.from_dir)
+  should_filter = make_filter_func(c.from_root, c.from_dir)
   print_filtered_output(p.stdout, should_filter)
 
 if __name__ == '__main__':

Modified: lucene/dev/branches/lucene6005/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/CHANGES.txt?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene6005/lucene/CHANGES.txt Mon Nov 17 08:47:34 2014
@@ -77,6 +77,27 @@ New Features
   improved exception handling, and indirect norms encoding for sparse fields.
   (Mike McCandless, Ryan Ernst, Robert Muir)
 
+* LUCENE-6053: Add Serbian analyzer.  (Nikola Smolenski via Robert Muir, Mike McCandless)
+
+* LUCENE-4400: Add support for new NYSIIS Apache commons phonetic
+  codec (Thomas Neidhart via Mike McCandless)
+
+* LUCENE-6059: Add Daitch-Mokotoff Soundex phonetic Apache commons
+  phonetic codec, and upgrade to Apache commons codec 1.10. (Thomas
+  Neidhart via Mike McCandless)
+
+* LUCENE-6058: With the upgrade to Apache commons codec 1.10, the
+  experimental BeiderMorseFilter has changed its behavior, so any
+  index using it will need to be rebuilt.  (Thomas
+  Neidhart via Mike McCandless)
+
+* LUCENE-6050: Accept MUST and MUST_NOT (in addition to SHOULD) for
+  each context passed to Analyzing/BlendedInfixSuggester (Arcadius
+  Ahouansou, jane chang via Mike McCandless)
+
+* LUCENE-5929: Also extract terms to highlight from block join
+  queries. (Julie Tibshirani via Mike McCandless)
+
 API Changes
 
 * LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and
@@ -195,6 +216,9 @@ Bug Fixes
   not have the regular "spinlock" of DirectoryReader.open. It now implements
   Closeable and you must close it to release the lock.  (Mike McCandless, Robert Muir)
 
+* LUCENE-6004: Don't highlight the LookupResult.key returned from
+  AnalyzingInfixSuggester (Christian Reuschling, jane chang via Mike McCandless)
+
 * LUCENE-5980: Don't let document length overflow. (Robert Muir)
 
 * LUCENE-5961: Fix the exists() method for FunctionValues returned by many ValueSoures to
@@ -228,12 +252,17 @@ Bug Fixes
 
 * LUCENE-6055: PayloadAttribute.clone() now does a deep clone of the underlying
   bytes. (Shai Erera)
+
+* LUCENE-6060: Remove dangerous IndexWriter.unlock method (Simon
+  Willnauer, Mike McCandless)
   
 Documentation
 
 * LUCENE-5392: Add/improve analysis package documentation to reflect
   analysis API changes.  (Benson Margulies via Robert Muir - pull request #17)
 
+* LUCENE-6057: Improve Sort(SortField) docs (Martin Braun via Mike McCandless)
+
 Tests
 
 * LUCENE-5957: Add option for tests to not randomize codec
@@ -284,6 +313,9 @@ Optimizations
 * LUCENE-6040: Speed up EliasFanoDocIdSet through broadword bit selection.
   (Paul Elschot)
 
+* LUCENE-6033: CachingTokenFilter now uses ArrayList not LinkedList, and has new
+  isCached() method. (David Smiley)
+
 Build
 
 * LUCENE-5909: Smoke tester now has better command line parsing and
@@ -306,6 +338,13 @@ Other
 
 * LUCENE-5915: Remove Pulsing postings format. (Robert Muir)
 
+======================= Lucene 4.10.3 ======================
+
+Bug fixes
+
+* LUCENE-3229: Overlapping ordered SpanNearQuery spans should not match.
+  (Ludovic Boutros, Paul Elschot, Greg Dearing, ehatcher)
+
 ======================= Lucene 4.10.2 ======================
 
 Bug fixes

Modified: lucene/dev/branches/lucene6005/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory Mon Nov 17 08:47:34 2014
@@ -91,6 +91,7 @@ org.apache.lucene.analysis.reverse.Rever
 org.apache.lucene.analysis.ru.RussianLightStemFilterFactory
 org.apache.lucene.analysis.shingle.ShingleFilterFactory
 org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory
+org.apache.lucene.analysis.sr.SerbianNormalizationFilterFactory
 org.apache.lucene.analysis.standard.ClassicFilterFactory
 org.apache.lucene.analysis.standard.StandardFilterFactory
 org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory

Modified: lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilterFactory.java Mon Nov 17 08:47:34 2014
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.phone
  */
 
 import java.io.IOException;
-import java.lang.reflect.Method;
 import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
@@ -29,6 +29,7 @@ import org.apache.commons.codec.language
 import org.apache.commons.codec.language.ColognePhonetic;
 import org.apache.commons.codec.language.DoubleMetaphone;
 import org.apache.commons.codec.language.Metaphone;
+import org.apache.commons.codec.language.Nysiis;
 import org.apache.commons.codec.language.RefinedSoundex;
 import org.apache.commons.codec.language.Soundex;
 import org.apache.lucene.analysis.TokenStream;
@@ -46,8 +47,8 @@ import org.apache.lucene.analysis.util.T
  * This takes one required argument, "encoder", and the rest are optional:
  * <dl>
  *  <dt>encoder</dt><dd> required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex", "Caverphone" (v2.0),
- *  or "ColognePhonetic" (case insensitive). If encoder isn't one of these, it'll be resolved as a class name either by
- *  itself if it already contains a '.' or otherwise as in the same package as these others.</dd>
+ *  "ColognePhonetic" or "Nysiis" (case insensitive). If encoder isn't one of these, it'll be resolved as a class name
+ *  either by itself if it already contains a '.' or otherwise as in the same package as these others.</dd>
  *  <dt>inject</dt><dd> (default=true) add tokens to the stream with the offset=0</dd>
  *  <dt>maxCodeLength</dt><dd>The maximum length of the phonetic codes, as defined by the encoder. If an encoder doesn't
  *  support this then specifying this is an error.</dd>
@@ -82,6 +83,7 @@ public class PhoneticFilterFactory exten
     registry.put("RefinedSoundex".toUpperCase(Locale.ROOT), RefinedSoundex.class);
     registry.put("Caverphone".toUpperCase(Locale.ROOT), Caverphone2.class);
     registry.put("ColognePhonetic".toUpperCase(Locale.ROOT), ColognePhonetic.class);
+    registry.put("Nysiis".toUpperCase(Locale.ROOT), Nysiis.class);
   }
 
   final boolean inject; //accessed by the test

Modified: lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestBeiderMorseFilterFactory.java Mon Nov 17 08:47:34 2014
@@ -17,12 +17,10 @@ package org.apache.lucene.analysis.phone
  * limitations under the License.
  */
 
-import java.io.StringReader;
 import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 
 /** Simple tests for {@link BeiderMorseFilterFactory} */
@@ -31,10 +29,10 @@ public class TestBeiderMorseFilterFactor
     BeiderMorseFilterFactory factory = new BeiderMorseFilterFactory(new HashMap<String,String>());
     TokenStream ts = factory.create(whitespaceMockTokenizer("Weinberg"));
     assertTokenStreamContents(ts,
-        new String[] { "vDnbirk", "vanbirk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
-        new int[] { 0, 0, 0, 0, 0, 0 },
-        new int[] { 8, 8, 8, 8, 8, 8 },
-        new int[] { 1, 0, 0, 0, 0, 0 });
+        new String[] { "vDnbYrk", "vDnbirk", "vanbYrk", "vanbirk", "vinbYrk", "vinbirk", "wDnbirk", "wanbirk", "winbirk" },
+        new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0},
+        new int[] { 8, 8, 8, 8, 8, 8, 8, 8, 8},
+        new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0});
   }
   
   public void testLanguageSet() throws Exception {

Modified: lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilter.java Mon Nov 17 08:47:34 2014
@@ -21,7 +21,12 @@ import java.io.IOException;
 import java.io.StringReader;
 
 import org.apache.commons.codec.Encoder;
-import org.apache.commons.codec.language.*;
+import org.apache.commons.codec.language.Caverphone2;
+import org.apache.commons.codec.language.DoubleMetaphone;
+import org.apache.commons.codec.language.Metaphone;
+import org.apache.commons.codec.language.Nysiis;
+import org.apache.commons.codec.language.RefinedSoundex;
+import org.apache.commons.codec.language.Soundex;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
@@ -59,6 +64,11 @@ public class TestPhoneticFilter extends 
           "TTA1111111", "Datha", "KLN1111111", "Carlene" });
     assertAlgorithm(new Caverphone2(), false, "Darda Karleen Datha Carlene",
         new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" });
+
+    assertAlgorithm(new Nysiis(), true, "aaa bbb ccc easgasg",
+        new String[] { "A", "aaa", "B", "bbb", "C", "ccc", "EASGAS", "easgasg" });
+    assertAlgorithm(new Nysiis(), false, "aaa bbb ccc easgasg",
+        new String[] { "A", "B", "C", "EASGAS" });
   }
 
   

Modified: lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/analysis/phonetic/src/test/org/apache/lucene/analysis/phonetic/TestPhoneticFilterFactory.java Mon Nov 17 08:47:34 2014
@@ -18,14 +18,12 @@ package org.apache.lucene.analysis.phone
  */
 
 import java.io.IOException;
-import java.io.StringReader;
 import java.util.HashMap;
 import java.util.Map;
 
-import org.apache.commons.codec.language.Metaphone;
 import org.apache.commons.codec.language.Caverphone2;
+import org.apache.commons.codec.language.Metaphone;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
@@ -164,6 +162,12 @@ public class TestPhoneticFilterFactory e
           "67", "Meir", "862", "Schmidt" });
     assertAlgorithm("ColognePhonetic", "false", "Meier Schmitt Meir Schmidt",
         new String[] { "67", "862", "67", "862" });
+    
+    assertAlgorithm("Nysiis", "true", "Macintosh Knuth Bart Hurd",
+        new String[] { "MCANT", "Macintosh", "NAT", "Knuth", 
+          "BAD", "Bart", "HAD", "Hurd" });
+    assertAlgorithm("Nysiis", "false", "Macintosh Knuth Bart Hurd",
+        new String[] { "MCANT", "NAT", "BAD", "HAD" });
   }
   
   /** Test that bogus arguments result in exception */

Modified: lucene/dev/branches/lucene6005/lucene/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/benchmark/build.xml?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/benchmark/build.xml (original)
+++ lucene/dev/branches/lucene6005/lucene/benchmark/build.xml Mon Nov 17 08:47:34 2014
@@ -174,6 +174,7 @@
       <pathelement path="${spatial.jar}"/>
       <pathelement path="${queries.jar}"/>
       <pathelement path="${codecs.jar}"/>
+      <pathelement path="${join.jar}"/>
       <path refid="base.classpath"/>
       <fileset dir="lib"/>
     </path>
@@ -276,7 +277,7 @@
       <echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
     </target>
 
-    <target name="init" depends="module-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet,jar-spatial,jar-codecs"/>
+    <target name="init" depends="module-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet,jar-spatial,jar-codecs,jar-join"/>
   
     <target name="compile-test" depends="copy-alg-files-for-testing,module-build.compile-test"/>
     <target name="copy-alg-files-for-testing" description="copy .alg files as resources for testing">

Modified: lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java Mon Nov 17 08:47:34 2014
@@ -20,7 +20,7 @@ import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
-import java.util.TreeMap;
+import java.util.concurrent.ConcurrentSkipListMap;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
@@ -53,7 +53,7 @@ import org.apache.lucene.util.fst.Util;
  * {@link org.apache.lucene.index.TermsEnum#totalTermFreq} both on a per field
  * and a per document basis and then a corresponding
  * {@link org.apache.lucene.util.fst.FST} is used for class assignment.
- * 
+ *
  * @lucene.experimental
  */
 public class BooleanPerceptronClassifier implements Classifier<Boolean> {
@@ -67,9 +67,8 @@ public class BooleanPerceptronClassifier
 
   /**
    * Create a {@link BooleanPerceptronClassifier}
-   * 
-   * @param threshold
-   *          the binary threshold for perceptron output evaluation
+   *
+   * @param threshold the binary threshold for perceptron output evaluation
    */
   public BooleanPerceptronClassifier(Double threshold, Integer batchSize) {
     this.threshold = threshold;
@@ -98,7 +97,7 @@ public class BooleanPerceptronClassifier
     Long output = 0l;
     try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, text)) {
       CharTermAttribute charTermAttribute = tokenStream
-        .addAttribute(CharTermAttribute.class);
+          .addAttribute(CharTermAttribute.class);
       tokenStream.reset();
       while (tokenStream.incrementToken()) {
         String s = charTermAttribute.toString();
@@ -110,7 +109,8 @@ public class BooleanPerceptronClassifier
       tokenStream.end();
     }
 
-    return new ClassificationResult<>(output >= threshold, output.doubleValue());
+    double score = 1 - Math.exp(-1 * Math.abs(threshold - output.doubleValue()) / threshold);
+    return new ClassificationResult<>(output >= threshold, score);
   }
 
   /**
@@ -127,7 +127,7 @@ public class BooleanPerceptronClassifier
    */
   @Override
   public void train(LeafReader leafReader, String textFieldName,
-      String classFieldName, Analyzer analyzer, Query query) throws IOException {
+                    String classFieldName, Analyzer analyzer, Query query) throws IOException {
     this.textTerms = MultiFields.getTerms(leafReader, textFieldName);
 
     if (textTerms == null) {
@@ -150,7 +150,7 @@ public class BooleanPerceptronClassifier
     }
 
     // TODO : remove this map as soon as we have a writable FST
-    SortedMap<String,Double> weights = new TreeMap<>();
+    SortedMap<String, Double> weights = new ConcurrentSkipListMap<>();
 
     TermsEnum reuse = textTerms.iterator(null);
     BytesRef textTerm;
@@ -177,10 +177,10 @@ public class BooleanPerceptronClassifier
       ClassificationResult<Boolean> classificationResult = assignClass(doc
           .getField(textFieldName).stringValue());
       Boolean assignedClass = classificationResult.getAssignedClass();
-      
+
       // get the expected result
       IndexableField field = doc.getField(classFieldName);
-      
+
       Boolean correctClass = Boolean.valueOf(field.stringValue());
       long modifier = correctClass.compareTo(assignedClass);
       if (modifier != 0) {
@@ -198,8 +198,8 @@ public class BooleanPerceptronClassifier
   }
 
   private TermsEnum updateWeights(LeafReader leafReader, TermsEnum reuse,
-      int docId, Boolean assignedClass, SortedMap<String,Double> weights,
-      double modifier, boolean updateFST) throws IOException {
+                                  int docId, Boolean assignedClass, SortedMap<String, Double> weights,
+                                  double modifier, boolean updateFST) throws IOException {
     TermsEnum cte = textTerms.iterator(reuse);
 
     // get the doc term vectors
@@ -231,12 +231,12 @@ public class BooleanPerceptronClassifier
     return reuse;
   }
 
-  private void updateFST(SortedMap<String,Double> weights) throws IOException {
+  private void updateFST(SortedMap<String, Double> weights) throws IOException {
     PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
     Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
     BytesRefBuilder scratchBytes = new BytesRefBuilder();
     IntsRefBuilder scratchInts = new IntsRefBuilder();
-    for (Map.Entry<String,Double> entry : weights.entrySet()) {
+    for (Map.Entry<String, Double> entry : weights.entrySet()) {
       scratchBytes.copyChars(entry.getKey());
       fstBuilder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry
           .getValue().longValue());

Modified: lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java Mon Nov 17 08:47:34 2014
@@ -16,6 +16,14 @@
  */
 package org.apache.lucene.classification;
 
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.Term;
@@ -29,14 +37,6 @@ import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.util.BytesRef;
 
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 /**
  * A k-Nearest Neighbor classifier (see <code>http://en.wikipedia.org/wiki/K-nearest_neighbors</code>) based
  * on {@link MoreLikeThis}
@@ -82,14 +82,14 @@ public class KNearestNeighborClassifier 
    */
   @Override
   public ClassificationResult<BytesRef> assignClass(String text) throws IOException {
-    TopDocs topDocs=knnSearcher(text);
-    List<ClassificationResult<BytesRef>> doclist=buildListFromTopDocs(topDocs);
-    ClassificationResult<BytesRef> retval=null;
-    double maxscore=-Double.MAX_VALUE;
-    for(ClassificationResult<BytesRef> element:doclist){
-      if(element.getScore()>maxscore){
-        retval=element;
-        maxscore=element.getScore();
+    TopDocs topDocs = knnSearch(text);
+    List<ClassificationResult<BytesRef>> doclist = buildListFromTopDocs(topDocs);
+    ClassificationResult<BytesRef> retval = null;
+    double maxscore = -Double.MAX_VALUE;
+    for (ClassificationResult<BytesRef> element : doclist) {
+      if (element.getScore() > maxscore) {
+        retval = element;
+        maxscore = element.getScore();
       }
     }
     return retval;
@@ -100,24 +100,24 @@ public class KNearestNeighborClassifier 
    */
   @Override
   public List<ClassificationResult<BytesRef>> getClasses(String text) throws IOException {
-    TopDocs topDocs=knnSearcher(text);
-    List<ClassificationResult<BytesRef>> doclist=buildListFromTopDocs(topDocs);
+    TopDocs topDocs = knnSearch(text);
+    List<ClassificationResult<BytesRef>> doclist = buildListFromTopDocs(topDocs);
     Collections.sort(doclist);
     return doclist;
   }
-  
+
   /**
    * {@inheritDoc}
    */
   @Override
   public List<ClassificationResult<BytesRef>> getClasses(String text, int max) throws IOException {
-    TopDocs topDocs=knnSearcher(text);
-    List<ClassificationResult<BytesRef>> doclist=buildListFromTopDocs(topDocs);
+    TopDocs topDocs = knnSearch(text);
+    List<ClassificationResult<BytesRef>> doclist = buildListFromTopDocs(topDocs);
     Collections.sort(doclist);
     return doclist.subList(0, max);
   }
 
-  private TopDocs knnSearcher(String text) throws IOException{
+  private TopDocs knnSearch(String text) throws IOException {
     if (mlt == null) {
       throw new IOException("You must first call Classifier#train");
     }
@@ -132,31 +132,30 @@ public class KNearestNeighborClassifier 
     }
     return indexSearcher.search(mltQuery, k);
   }
-  
+
   private List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException {
     Map<BytesRef, Integer> classCounts = new HashMap<>();
     for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
-        BytesRef cl = new BytesRef(indexSearcher.doc(scoreDoc.doc).getField(classFieldName).stringValue());
-        Integer count = classCounts.get(cl);
-        if (count != null) {
-            classCounts.put(cl, count + 1);
-        } else {
-            classCounts.put(cl, 1);
-        }
+      BytesRef cl = new BytesRef(indexSearcher.doc(scoreDoc.doc).getField(classFieldName).stringValue());
+      Integer count = classCounts.get(cl);
+      if (count != null) {
+        classCounts.put(cl, count + 1);
+      } else {
+        classCounts.put(cl, 1);
+      }
     }
     List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
-    int sumdoc=0;
+    int sumdoc = 0;
     for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
-        Integer count = entry.getValue();
-        returnList.add(new ClassificationResult<>(entry.getKey().clone(), count / (double) k));
-        sumdoc+=count;
-
+      Integer count = entry.getValue();
+      returnList.add(new ClassificationResult<>(entry.getKey().clone(), count / (double) k));
+      sumdoc += count;
     }
-    
+
     //correction
-    if(sumdoc<k){
-      for(ClassificationResult<BytesRef> cr:returnList){
-        cr.setScore(cr.getScore()*(double)k/(double)sumdoc);
+    if (sumdoc < k) {
+      for (ClassificationResult<BytesRef> cr : returnList) {
+        cr.setScore(cr.getScore() * (double) k / (double) sumdoc);
       }
     }
     return returnList;

Modified: lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/package.html?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/package.html (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/package.html Mon Nov 17 08:47:34 2014
@@ -17,7 +17,6 @@
 <html>
 <body>
 Uses already seen data (the indexed documents) to classify new documents.
-Currently only contains a (simplistic) Lucene based Naive Bayes classifier,
-a k-Nearest Neighbor classifier and a Perceptron based classifier
+Currently contains a (simplistic) Naive Bayes classifier, a k-Nearest Neighbor classifier and a Perceptron based classifier
 </body>
 </html>

Modified: lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/java/org/apache/lucene/classification/utils/DocToDoubleVectorUtils.java Mon Nov 17 08:47:34 2014
@@ -16,12 +16,12 @@
  */
 package org.apache.lucene.classification.utils;
 
+import java.io.IOException;
+
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
 
-import java.io.IOException;
-
 /**
  * utility class for converting Lucene {@link org.apache.lucene.document.Document}s to <code>Double</code> vectors.
  */

Modified: lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/ClassificationTestBase.java Mon Nov 17 08:47:34 2014
@@ -91,7 +91,8 @@ public abstract class ClassificationTest
       ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
       assertNotNull(classificationResult.getAssignedClass());
       assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
-      assertTrue("got a not positive score " + classificationResult.getScore(), classificationResult.getScore() > 0);
+      double score = classificationResult.getScore();
+      assertTrue("score should be between 0 and 1, got:" + score, score <= 1 && score >= 0);
     } finally {
       if (leafReader != null)
         leafReader.close();
@@ -110,11 +111,12 @@ public abstract class ClassificationTest
       ClassificationResult<T> classificationResult = classifier.assignClass(inputDoc);
       assertNotNull(classificationResult.getAssignedClass());
       assertEquals("got an assigned class of " + classificationResult.getAssignedClass(), expectedResult, classificationResult.getAssignedClass());
-      assertTrue("got a not positive score " + classificationResult.getScore(), classificationResult.getScore() > 0);
+      double score = classificationResult.getScore();
+      assertTrue("score should be between 0 and 1, got: " + score, score <= 1 && score >= 0);
       updateSampleIndex(analyzer);
       ClassificationResult<T> secondClassificationResult = classifier.assignClass(inputDoc);
       assertEquals(classificationResult.getAssignedClass(), secondClassificationResult.getAssignedClass());
-      assertEquals(Double.valueOf(classificationResult.getScore()), Double.valueOf(secondClassificationResult.getScore()));
+      assertEquals(Double.valueOf(score), Double.valueOf(secondClassificationResult.getScore()));
 
     } finally {
       if (leafReader != null)

Modified: lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/classification/src/test/org/apache/lucene/classification/SimpleNaiveBayesClassifierTest.java Mon Nov 17 08:47:34 2014
@@ -33,7 +33,6 @@ import java.io.Reader;
 /**
  * Testcase for {@link SimpleNaiveBayesClassifier}
  */
-// TODO : eventually remove this if / when fallback methods exist for all un-supportable codec methods (see LUCENE-4872)
 public class SimpleNaiveBayesClassifierTest extends ClassificationTestBase<BytesRef> {
 
   @Test

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java Mon Nov 17 08:47:34 2014
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis;
  */
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Iterator;
-import java.util.LinkedList;
 import java.util.List;
 
 import org.apache.lucene.util.AttributeSource;
@@ -27,7 +27,8 @@ import org.apache.lucene.util.AttributeS
 /**
  * This class can be used if the token attributes of a TokenStream
  * are intended to be consumed more than once. It caches
- * all token attribute states locally in a List.
+ * all token attribute states locally in a List when the first call to
+ * {@link #incrementToken()} is called.
  * 
  * <P>CachingTokenFilter implements the optional method
  * {@link TokenStream#reset()}, which repositions the
@@ -51,7 +52,7 @@ public final class CachingTokenFilter ex
   public final boolean incrementToken() throws IOException {
     if (cache == null) {
       // fill cache lazily
-      cache = new LinkedList<>();
+      cache = new ArrayList<>(64);
       fillCache();
       iterator = cache.iterator();
     }
@@ -81,13 +82,13 @@ public final class CachingTokenFilter ex
    */
   @Override
   public void reset() {
-    if(cache != null) {
+    if (cache != null) {
       iterator = cache.iterator();
     }
   }
   
   private void fillCache() throws IOException {
-    while(input.incrementToken()) {
+    while (input.incrementToken()) {
       cache.add(captureState());
     }
     // capture final state
@@ -95,4 +96,9 @@ public final class CachingTokenFilter ex
     finalState = captureState();
   }
 
+  /** If the underlying token stream was consumed and cached. */
+  public boolean isCached() {
+    return cache != null;
+  }
+
 }

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Mon Nov 17 08:47:34 2014
@@ -4408,17 +4408,6 @@ public class IndexWriter implements Clos
     return directory.makeLock(WRITE_LOCK_NAME).isLocked();
   }
 
-  /**
-   * Forcibly unlocks the index in the named directory.
-   * <P>
-   * Caution: this should only be used by failure recovery code,
-   * when it is known that no other process nor thread is in fact
-   * currently accessing this index.
-   */
-  public static void unlock(Directory directory) throws IOException {
-    directory.makeLock(IndexWriter.WRITE_LOCK_NAME).close();
-  }
-
   /** If {@link DirectoryReader#open(IndexWriter,boolean)} has
    *  been called (ie, this writer is in near real-time
    *  mode), then after a merge completes, this class can be

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/Sort.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/Sort.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/Sort.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/Sort.java Mon Nov 17 08:47:34 2014
@@ -127,7 +127,11 @@ public class Sort {
     setSort(field);
   }
 
-  /** Sorts in succession by the criteria in each SortField. */
+  /** Sets the sort to the given criteria in succession: the
+   *  first SortField is checked first, but if it produces a
+   *  tie, then the second SortField is used to break the tie,
+   *  etc.  Finally, if there is still a tie after all SortFields
+   *  are checked, the internal Lucene docid is used to break it. */
   public Sort(SortField... fields) {
     setSort(fields);
   }
@@ -137,7 +141,11 @@ public class Sort {
     this.fields = new SortField[] { field };
   }
 
-  /** Sets the sort to the given criteria in succession. */
+  /** Sets the sort to the given criteria in succession: the
+   *  first SortField is checked first, but if it produces a
+   *  tie, then the second SortField is used to break the tie,
+   *  etc.  Finally, if there is still a tie after all SortFields
+   *  are checked, the internal Lucene docid is used to break it. */
   public void setSort(SortField... fields) {
     this.fields = fields;
   }

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java Mon Nov 17 08:47:34 2014
@@ -234,24 +234,23 @@ public class NearSpansOrdered extends Sp
     return true;
   }
   
-  /** Check whether two Spans in the same document are ordered.
-   * @return true iff spans1 starts before spans2
-   *              or the spans start at the same position,
-   *              and spans1 ends before spans2.
+  /** Check whether two Spans in the same document are ordered and not overlapping.
+   * @return false iff spans2's start position is smaller than spans1's end position
    */
-  static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
+  static final boolean docSpansOrderedNonOverlap(Spans spans1, Spans spans2) {
     assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
-    int start1 = spans1.start();
-    int start2 = spans2.start();
-    /* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */
-    return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
+    assert spans1.start() < spans1.end();
+    assert spans2.start() < spans2.end();
+    return spans1.end() <= spans2.start();
   }
 
-  /** Like {@link #docSpansOrdered(Spans,Spans)}, but use the spans
+  /** Like {@link #docSpansOrderedNonOverlap(Spans,Spans)}, but use the spans
    * starts and ends as parameters.
    */
-  private static final boolean docSpansOrdered(int start1, int end1, int start2, int end2) {
-    return (start1 == start2) ? (end1 < end2) : (start1 < start2);
+  private static final boolean docSpansOrderedNonOverlap(int start1, int end1, int start2, int end2) {
+    assert start1 < end1;
+    assert start2 < end2;
+    return end1 <= start2;
   }
 
   /** Order the subSpans within the same document by advancing all later spans
@@ -260,7 +259,7 @@ public class NearSpansOrdered extends Sp
   private boolean stretchToOrder() throws IOException {
     matchDoc = subSpans[0].doc();
     for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
-      while (! docSpansOrdered(subSpans[i-1], subSpans[i])) {
+      while (! docSpansOrderedNonOverlap(subSpans[i-1], subSpans[i])) {
         if (! subSpans[i].next()) {
           inSameDoc = false;
           more = false;
@@ -312,7 +311,7 @@ public class NearSpansOrdered extends Sp
         } else {
           int ppStart = prevSpans.start();
           int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
-          if (! docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) {
+          if (! docSpansOrderedNonOverlap(ppStart, ppEnd, lastStart, lastEnd)) {
             break; // Check remaining subSpans.
           } else { // prevSpans still before (lastStart, lastEnd)
             prevStart = ppStart;

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java Mon Nov 17 08:47:34 2014
@@ -63,7 +63,7 @@ public class NearSpansUnordered extends 
     @Override
     protected final boolean lessThan(SpansCell spans1, SpansCell spans2) {
       if (spans1.doc() == spans2.doc()) {
-        return NearSpansOrdered.docSpansOrdered(spans1, spans2);
+        return docSpansOrdered(spans1, spans2);
       } else {
         return spans1.doc() < spans2.doc();
       }
@@ -233,6 +233,18 @@ public class NearSpansUnordered extends 
     return more && (atMatch() ||  next());
   }
 
+  /** Check whether two Spans in the same document are ordered with possible overlap.
+   * @return true iff spans1 starts before spans2
+   *              or the spans start at the same position,
+   *              and spans1 ends before spans2.
+   */
+  static final boolean docSpansOrdered(Spans spans1, Spans spans2) {
+    assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
+    int start1 = spans1.start();
+    int start2 = spans2.start();
+    return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
+  }
+
   private SpansCell min() { return queue.top(); }
 
   @Override

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java Mon Nov 17 08:47:34 2014
@@ -48,12 +48,15 @@ public class SpanNearQuery extends SpanQ
 
   /** Construct a SpanNearQuery.  Matches spans matching a span from each
    * clause, with up to <code>slop</code> total unmatched positions between
-   * them.  * When <code>inOrder</code> is true, the spans from each clause
-   * must be * ordered as in <code>clauses</code>.
+   * them.
+   * <br>When <code>inOrder</code> is true, the spans from each clause
+   * must be in the same order as in <code>clauses</code> and must be non-overlapping.
+   * <br>When <code>inOrder</code> is false, the spans from each clause
+   * need not be ordered and may overlap.
    * @param clauses the clauses to find near each other
    * @param slop The slop value
    * @param inOrder true if order is important
-   * */
+   */
   public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) {
     this(clauses, slop, inOrder, true);     
   }

Modified: lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java Mon Nov 17 08:47:34 2014
@@ -37,12 +37,10 @@ import java.nio.file.Path;
 
  * <p>When this happens, a {@link LockObtainFailedException}
  * is hit when trying to create a writer, in which case you
- * need to explicitly clear the lock file first.  You can
- * either manually remove the file, or use the {@link
- * org.apache.lucene.index.IndexWriter#unlock(Directory)}
- * API.  But, first be certain that no writer is in fact
- * writing to the index otherwise you can easily corrupt
- * your index.</p>
+ * need to explicitly clear the lock file first by
+ * manually removing the file.  But, first be certain that
+ * no writer is in fact writing to the index otherwise you
+ * can easily corrupt your index.</p>
  *
  * <p>Special care needs to be taken if you change the locking
  * implementation: First be certain that no writer is in fact

Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReaderReopen.java Mon Nov 17 08:47:34 2014
@@ -16,6 +16,7 @@ package org.apache.lucene.index;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -28,7 +29,6 @@ import java.util.Random;
 import java.util.Set;
 
 import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.document.Document2;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -41,12 +41,14 @@ import org.apache.lucene.search.IndexSea
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.MockDirectoryWrapper.FakeIOException;
 import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.store.MockDirectoryWrapper.FakeIOException;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 
+
+
 public class TestDirectoryReaderReopen extends LuceneTestCase {
   
   public void testReopen() throws Exception {
@@ -429,7 +431,6 @@ public class TestDirectoryReaderReopen e
   }
   
   public static void createIndex(Random random, Directory dir, boolean multiSegment) throws IOException {
-    IndexWriter.unlock(dir);
     IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, new MockAnalyzer(random))
         .setMergePolicy(new LogDocMergePolicy()));
     FieldTypes fieldTypes = w.getFieldTypes();

Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java Mon Nov 17 08:47:34 2014
@@ -81,6 +81,22 @@ public class TestNearSpansOrdered extend
   protected SpanNearQuery makeQuery() {
     return makeQuery("w1","w2","w3",1,true);
   }
+
+  protected SpanNearQuery makeOverlappedQuery(
+      String sqt1, String sqt2, boolean sqOrdered,
+      String t3, boolean ordered) {
+    return new SpanNearQuery(
+      new SpanQuery[] {
+        new SpanNearQuery(new SpanQuery[] {
+          new SpanTermQuery(new Term(FIELD, sqt1)),
+            new SpanTermQuery(new Term(FIELD, sqt2)) },
+            1,
+            sqOrdered
+          ),
+          new SpanTermQuery(new Term(FIELD, t3)) },
+          0,
+          ordered);
+  }
   
   public void testSpanNearQuery() throws Exception {
     SpanNearQuery q = makeQuery();
@@ -169,6 +185,22 @@ public class TestNearSpansOrdered extend
     Scorer s = w.scorer(leave, leave.reader().getLiveDocs());
     assertEquals(1, s.advance(1));
   }
+
+  public void testOverlappedOrderedSpan() throws Exception {
+    SpanNearQuery q = makeOverlappedQuery("w5", "w3", false, "w4", true);
+    CheckHits.checkHits(random(), q, FIELD, searcher, new int[] {});
+  }
+  
+  public void testOverlappedNonOrderedSpan() throws Exception {
+    SpanNearQuery q = makeOverlappedQuery("w3", "w5", true, "w4", false);
+    CheckHits.checkHits(random(), q, FIELD, searcher, new int[] {0});
+  }
+
+  public void testNonOverlappedOrderedSpan() throws Exception {
+    SpanNearQuery q = makeOverlappedQuery("w3", "w4", true, "w5", true);
+    CheckHits.checkHits(random(), q, FIELD, searcher, new int[] {0});
+  }
+  
   
   /**
    * not a direct test of NearSpans, but a demonstration of how/when
@@ -181,5 +213,4 @@ public class TestNearSpansOrdered extend
                + e.toString(),
                0.0f < e.getValue());
   }
-  
 }

Modified: lucene/dev/branches/lucene6005/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java Mon Nov 17 08:47:34 2014
@@ -23,18 +23,18 @@ import org.apache.lucene.facet.FacetsCon
 import org.apache.lucene.facet.taxonomy.FacetLabel;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
-import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
 import org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache;
 import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
 import org.apache.lucene.index.CorruptIndexException; // javadocs
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.LogByteSizeMergePolicy;
 import org.apache.lucene.index.ReaderManager;
 import org.apache.lucene.index.SegmentInfos;
@@ -44,8 +44,6 @@ import org.apache.lucene.index.TieredMer
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException; // javadocs
-import org.apache.lucene.store.NativeFSLockFactory;
-import org.apache.lucene.store.SimpleFSLockFactory;
 import org.apache.lucene.util.BytesRef;
 
 /*
@@ -136,22 +134,6 @@ public class DirectoryTaxonomyWriter imp
   }
   
   /**
-   * Forcibly unlocks the taxonomy in the named directory.
-   * <P>
-   * Caution: this should only be used by failure recovery code, when it is
-   * known that no other process nor thread is in fact currently accessing
-   * this taxonomy.
-   * <P>
-   * This method is unnecessary if your {@link Directory} uses a
-   * {@link NativeFSLockFactory} instead of the default
-   * {@link SimpleFSLockFactory}. When the "native" lock is used, a lock
-   * does not stay behind forever when the process using it dies. 
-   */
-  public static void unlock(Directory directory) throws IOException {
-    IndexWriter.unlock(directory);
-  }
-
-  /**
    * Construct a Taxonomy writer.
    * 
    * @param directory
@@ -173,10 +155,7 @@ public class DirectoryTaxonomyWriter imp
    * @throws CorruptIndexException
    *     if the taxonomy is corrupted.
    * @throws LockObtainFailedException
-   *     if the taxonomy is locked by another writer. If it is known
-   *     that no other concurrent writer is active, the lock might
-   *     have been left around by an old dead process, and should be
-   *     removed using {@link #unlock(Directory)}.
+   *     if the taxonomy is locked by another writer.
    * @throws IOException
    *     if another error occurred.
    */

Modified: lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java (original)
+++ lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java Mon Nov 17 08:47:34 2014
@@ -12,8 +12,6 @@ import org.apache.lucene.facet.SlowRAMDi
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
-import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.junit.Test;
 
@@ -916,47 +914,6 @@ public class TestTaxonomyCombined extend
   }
   
   /**
-   * Test what happens if we try to write to a locked taxonomy writer,
-   * and see that we can unlock it and continue.
-   */
-  @Test
-  public void testWriterLock() throws Exception {
-    // native fslock impl gets angry if we use it, so use RAMDirectory explicitly.
-    Directory indexDir = new RAMDirectory();
-    TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
-    tw.addCategory(new FacetLabel("hi", "there"));
-    tw.commit();
-    // we deliberately not close the write now, and keep it open and
-    // locked.
-    // Verify that the writer worked:
-    TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
-    assertEquals(2, tr.getOrdinal(new FacetLabel("hi", "there")));
-    // Try to open a second writer, with the first one locking the directory.
-    // We expect to get a LockObtainFailedException.
-    try {
-      assertNull(new DirectoryTaxonomyWriter(indexDir));
-      fail("should have failed to write in locked directory");
-    } catch (LockObtainFailedException e) {
-      // this is what we expect to happen.
-    }
-    // Remove the lock, and now the open should succeed, and we can
-    // write to the new writer.
-    DirectoryTaxonomyWriter.unlock(indexDir);
-    TaxonomyWriter tw2 = new DirectoryTaxonomyWriter(indexDir);
-    tw2.addCategory(new FacetLabel("hey"));
-    tw2.close();
-    // See that the writer indeed wrote:
-    TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
-    assertNotNull(newtr);
-    tr.close();
-    tr = newtr;
-    assertEquals(3, tr.getOrdinal(new FacetLabel("hey")));
-    tr.close();
-    tw.close();
-    indexDir.close();
-  }
-  
-  /**
    * fillTaxonomyCheckPaths adds the categories in the categories[] array,
    * and asserts that the additions return exactly paths specified in
    * expectedPaths[]. This is the same add fillTaxonomy() but also checks

Modified: lucene/dev/branches/lucene6005/lucene/highlighter/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/build.xml?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/build.xml (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/build.xml Mon Nov 17 08:47:34 2014
@@ -31,10 +31,13 @@
   <path id="classpath">
     <pathelement path="${memory.jar}"/>
     <pathelement path="${queries.jar}"/>
+    <pathelement path="${join.jar}"/>
     <path refid="base.classpath"/>
   </path>
 
-  <target name="compile-core" depends="jar-memory, common.compile-core" />
+    <target name="init" depends="module-build.init,jar-memory,jar-queries,jar-join"/>
+
+  <target name="compile-core" depends="jar-memory, common.compile-core, jar-join" />
   <target name="javadocs" depends="javadocs-memory,compile-core,check-javadocs-uptodate"
           unless="javadocs-uptodate-${name}">
     <invoke-module-javadoc>

Modified: lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Mon Nov 17 08:47:34 2014
@@ -44,6 +44,8 @@ import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.memory.MemoryIndex;
 import org.apache.lucene.queries.CommonTermsQuery;
 import org.apache.lucene.search.*;
+import org.apache.lucene.search.join.ToChildBlockJoinQuery;
+import org.apache.lucene.search.join.ToParentBlockJoinQuery;
 import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
 import org.apache.lucene.search.spans.SpanFirstQuery;
 import org.apache.lucene.search.spans.SpanNearQuery;
@@ -154,6 +156,10 @@ public class WeightedSpanTermExtractor {
       for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
         extract(iterator.next(), terms);
       }
+    } else if (query instanceof ToParentBlockJoinQuery) {
+      extract(((ToParentBlockJoinQuery) query).getChildQuery(), terms);
+    } else if (query instanceof ToChildBlockJoinQuery) {
+      extract(((ToChildBlockJoinQuery) query).getParentQuery(), terms);
     } else if (query instanceof MultiPhraseQuery) {
       final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
       final List<Term[]> termArrays = mpq.getTermArrays();

Modified: lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Mon Nov 17 08:47:34 2014
@@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -49,6 +50,11 @@ import org.apache.lucene.queries.CommonT
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
+import org.apache.lucene.search.join.BitDocIdSetCachingWrapperFilter;
+import org.apache.lucene.search.join.BitDocIdSetFilter;
+import org.apache.lucene.search.join.ScoreMode;
+import org.apache.lucene.search.join.ToChildBlockJoinQuery;
+import org.apache.lucene.search.join.ToParentBlockJoinQuery;
 import org.apache.lucene.search.spans.*;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@@ -514,6 +520,62 @@ public class HighlighterTest extends Bas
 
 
   }
+  
+  public void testToParentBlockJoinQuery() throws Exception {
+    BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter(
+        new QueryWrapperFilter(
+          new TermQuery(new Term(FIELD_NAME, "parent"))));
+    
+    query = new ToParentBlockJoinQuery(new TermQuery(new Term(FIELD_NAME, "child")),
+        parentFilter, ScoreMode.None);
+    searcher = newSearcher(reader);
+    hits = searcher.search(query, 100);
+    int maxNumFragmentsRequired = 2;
+    
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(this, scorer);
+    
+    for (int i = 0; i < hits.totalHits; i++) {
+      String text = "child document";
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+      
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+      highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
+    }
+    
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 1);
+  }
+  
+  public void testToChildBlockJoinQuery() throws Exception {
+    BitDocIdSetFilter parentFilter = new BitDocIdSetCachingWrapperFilter(
+        new QueryWrapperFilter(
+          new TermQuery(new Term(FIELD_NAME, "parent"))));
+    
+    BooleanQuery booleanQuery = new BooleanQuery();
+    booleanQuery.add(new ToChildBlockJoinQuery(new TermQuery(
+        new Term(FIELD_NAME, "parent")), parentFilter, false), Occur.MUST);
+    booleanQuery.add(new TermQuery(new Term(FIELD_NAME, "child")), Occur.MUST);
+    query = booleanQuery;
+    
+    searcher = newSearcher(reader);
+    hits = searcher.search(query, 100);
+    int maxNumFragmentsRequired = 2;
+    
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(this, scorer);
+    
+    for (int i = 0; i < hits.totalHits; i++) {
+      String text = "parent document";
+      TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+      
+      highlighter.setTextFragmenter(new SimpleFragmenter(40));
+      highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
+    }
+    
+    assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
+        numHighlights == 1);
+  }
 
   public void testSimpleQueryScorerPhraseHighlighting2() throws Exception {
     PhraseQuery phraseQuery = new PhraseQuery();
@@ -1900,6 +1962,10 @@ public class HighlighterTest extends Bas
     doc.add(new StoredField(NUMERIC_FIELD_NAME, 7));
     writer.addDocument(doc, analyzer);
 
+    Document childDoc = doc(FIELD_NAME, "child document");
+    Document parentDoc = doc(FIELD_NAME, "parent document");
+    writer.addDocuments(Arrays.asList(childDoc, parentDoc));
+    
     writer.forceMerge(1);
     writer.close();
     reader = DirectoryReader.open(ramDir);

Modified: lucene/dev/branches/lucene6005/lucene/ivy-versions.properties
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/ivy-versions.properties?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/ivy-versions.properties (original)
+++ lucene/dev/branches/lucene6005/lucene/ivy-versions.properties Mon Nov 17 08:47:34 2014
@@ -55,7 +55,7 @@ com.sun.jersey.version = 1.9
 /com.uwyn/jhighlight = 1.0
 /commons-beanutils/commons-beanutils = 1.8.3
 /commons-cli/commons-cli = 1.2
-/commons-codec/commons-codec = 1.9
+/commons-codec/commons-codec = 1.10
 /commons-collections/commons-collections = 3.2.1
 /commons-configuration/commons-configuration = 1.6
 /commons-digester/commons-digester = 2.1

Modified: lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Mon Nov 17 08:47:34 2014
@@ -90,6 +90,11 @@ public class ToChildBlockJoinQuery exten
     return new ToChildBlockJoinWeight(this, parentQuery.createWeight(searcher), parentsFilter, doScores);
   }
 
+  /** Return our parent query. */
+  public Query getParentQuery() {
+    return parentQuery;
+  }
+
   private static class ToChildBlockJoinWeight extends Weight {
     private final Query joinQuery;
     private final Weight parentWeight;

Modified: lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/branches/lucene6005/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Mon Nov 17 08:47:34 2014
@@ -122,6 +122,11 @@ public class ToParentBlockJoinQuery exte
   public Weight createWeight(IndexSearcher searcher) throws IOException {
     return new BlockJoinWeight(this, childQuery.createWeight(searcher), parentsFilter, scoreMode);
   }
+  
+  /** Return our child query. */
+  public Query getChildQuery() {
+    return childQuery;
+  }
 
   private static class BlockJoinWeight extends Weight {
     private final Query joinQuery;

Modified: lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (original)
+++ lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java Mon Nov 17 08:47:34 2014
@@ -22,8 +22,10 @@ import java.io.IOException;
 import java.io.StringReader;
 import java.nio.file.Path;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -70,8 +72,8 @@ import org.apache.lucene.search.TermQuer
 import org.apache.lucene.search.TopFieldCollector;
 import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.search.suggest.InputIterator;
-import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
 import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.Directory;
@@ -390,7 +392,22 @@ public class AnalyzingInfixSuggester ext
 
   /** Lookup, without any context. */
   public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
-    return lookup(key, null, num, allTermsRequired, doHighlight);
+    return lookup(key, (Map<BytesRef, BooleanClause.Occur>)null, num, allTermsRequired, doHighlight);
+  }
+
+  /** Lookup, with context but without booleans. Context booleans default to SHOULD,
+   *  so each suggestion must have at least one of the contexts. */
+  public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+
+    if (contexts == null) {
+      return lookup(key, num, allTermsRequired, doHighlight);
+    }
+
+    Map<BytesRef, BooleanClause.Occur> contextInfo = new HashMap<>();
+    for (BytesRef context : contexts) {
+      contextInfo.put(context, BooleanClause.Occur.SHOULD);
+    }
+    return lookup(key, contextInfo, num, allTermsRequired, doHighlight);
   }
 
   /** This is called if the last token isn't ended
@@ -408,7 +425,7 @@ public class AnalyzingInfixSuggester ext
   /** Retrieve suggestions, specifying whether all terms
    *  must match ({@code allTermsRequired}) and whether the hits
    *  should be highlighted ({@code doHighlight}). */
-  public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+  public List<LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
 
     if (searcherMgr == null) {
       throw new IllegalStateException("suggester was not built");
@@ -469,21 +486,35 @@ public class AnalyzingInfixSuggester ext
         }
       }
 
-      if (contexts != null) {
-        BooleanQuery sub = new BooleanQuery();
-        query.add(sub, BooleanClause.Occur.MUST);
-        for(BytesRef context : contexts) {
-          // NOTE: we "should" wrap this in
-          // ConstantScoreQuery, or maybe send this as a
-          // Filter instead to search, but since all of
-          // these are MUST'd, the change to the score won't
-          // affect the overall ranking.  Since we indexed
-          // as DOCS_ONLY, the perf should be the same
-          // either way (no freq int[] blocks to decode):
-
-          // TODO: if we had a BinaryTermField we could fix
-          // this "must be valid ut8f" limitation:
-          sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.utf8ToString())), BooleanClause.Occur.SHOULD);
+      if (contextInfo != null) {
+        
+        boolean allMustNot = true;
+        for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
+          if (entry.getValue() != BooleanClause.Occur.MUST_NOT) {
+            allMustNot = false;
+            break;
+          }
+        }
+
+        // do not make a subquery if all context booleans are must not
+        if (allMustNot == true) {
+          for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
+            query.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), BooleanClause.Occur.MUST_NOT);
+          }
+
+        } else {
+          BooleanQuery sub = new BooleanQuery();
+          query.add(sub, BooleanClause.Occur.MUST);
+
+          for (Map.Entry<BytesRef, BooleanClause.Occur> entry : contextInfo.entrySet()) {
+            // NOTE: we "should" wrap this in
+            // ConstantScoreQuery, or maybe send this as a
+            // Filter instead to search.
+
+            // TODO: if we had a BinaryTermField we could fix
+            // this "must be valid ut8f" limitation:
+            sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, entry.getKey().utf8ToString())), entry.getValue());
+          }
         }
       }
     }
@@ -572,8 +603,7 @@ public class AnalyzingInfixSuggester ext
       LookupResult result;
 
       if (doHighlight) {
-        Object highlightKey = highlight(text, matchedTokens, prefixToken);
-        result = new LookupResult(highlightKey.toString(), highlightKey, score, payload, contexts);
+        result = new LookupResult(text, highlight(text, matchedTokens, prefixToken), score, payload, contexts);
       } else {
         result = new LookupResult(text, score, payload, contexts);
       }
@@ -664,12 +694,14 @@ public class AnalyzingInfixSuggester ext
   protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
     // TODO: apps can try to invert their analysis logic
     // here, e.g. downcase the two before checking prefix:
+    if (prefixToken.length() >= surface.length()) {
+      addWholeMatch(sb, surface, analyzed);
+      return;
+    }
     sb.append("<b>");
     sb.append(surface.substring(0, prefixToken.length()));
     sb.append("</b>");
-    if (prefixToken.length() < surface.length()) {
-      sb.append(surface.substring(prefixToken.length()));
-    }
+    sb.append(surface.substring(prefixToken.length()));
   }
 
   @Override

Modified: lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java?rev=1640099&r1=1640098&r2=1640099&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java (original)
+++ lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java Mon Nov 17 08:47:34 2014
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 
@@ -33,6 +34,7 @@ import org.apache.lucene.index.IndexOpti
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TopFieldDocs;
@@ -147,6 +149,12 @@ public class BlendedInfixSuggester exten
   }
 
   @Override
+  public List<Lookup.LookupResult> lookup(CharSequence key, Map<BytesRef, BooleanClause.Occur> contextInfo, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+    // here we multiply the number of searched element by the defined factor
+    return super.lookup(key, contextInfo, num * numFactor, allTermsRequired, doHighlight);
+  }
+
+  @Override
   protected FieldType getTextFieldType() {
     FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
     ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
@@ -199,8 +207,7 @@ public class BlendedInfixSuggester exten
 
       LookupResult result;
       if (doHighlight) {
-        Object highlightKey = highlight(text, matchedTokens, prefixToken);
-        result = new LookupResult(highlightKey.toString(), highlightKey, score, payload);
+        result = new LookupResult(text, highlight(text, matchedTokens, prefixToken), score, payload);
       } else {
         result = new LookupResult(text, score, payload);
       }



Mime
View raw message