lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1442821 [1/2] - in /lucene/dev/branches/lucene4547: ./ dev-tools/ dev-tools/maven/lucene/highlighter/ lucene/ lucene/analysis/ lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ lucene/analysis/uima/src/test-files/uima/ lucene/...
Date Wed, 06 Feb 2013 00:58:31 GMT
Author: rmuir
Date: Wed Feb  6 00:58:30 2013
New Revision: 1442821

URL: http://svn.apache.org/viewvc?rev=1442821&view=rev
Log:
Merged /lucene/dev/trunk:r1441770-1442810

Removed:
    lucene/dev/branches/lucene4547/lucene/licenses/asm-debug-all-4.1.jar.sha1
    lucene/dev/branches/lucene4547/lucene/licenses/asm-debug-all-LICENSE-BSD_LIKE.txt
    lucene/dev/branches/lucene4547/lucene/licenses/asm-debug-all-NOTICE.txt
    lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/commons-io.txt
    lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/jdk-deprecated.txt
    lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/jdk.txt
    lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/system-out.txt
    lucene/dev/branches/lucene4547/lucene/tools/lib/
    lucene/dev/branches/lucene4547/lucene/tools/src/java/org/apache/lucene/validation/ForbiddenApisCheckTask.java
Modified:
    lucene/dev/branches/lucene4547/   (props changed)
    lucene/dev/branches/lucene4547/dev-tools/   (props changed)
    lucene/dev/branches/lucene4547/dev-tools/maven/lucene/highlighter/pom.xml.template
    lucene/dev/branches/lucene4547/lucene/   (props changed)
    lucene/dev/branches/lucene4547/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene4547/lucene/analysis/   (props changed)
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
    lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
    lucene/dev/branches/lucene4547/lucene/benchmark/   (props changed)
    lucene/dev/branches/lucene4547/lucene/benchmark/build.xml
    lucene/dev/branches/lucene4547/lucene/build.xml   (contents, props changed)
    lucene/dev/branches/lucene4547/lucene/common-build.xml   (contents, props changed)
    lucene/dev/branches/lucene4547/lucene/core/   (props changed)
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestSort.java
    lucene/dev/branches/lucene4547/lucene/highlighter/   (props changed)
    lucene/dev/branches/lucene4547/lucene/highlighter/build.xml
    lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
    lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
    lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
    lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
    lucene/dev/branches/lucene4547/lucene/licenses/   (props changed)
    lucene/dev/branches/lucene4547/lucene/memory/   (props changed)
    lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/branches/lucene4547/lucene/queryparser/   (props changed)
    lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
    lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
    lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
    lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/package.html
    lucene/dev/branches/lucene4547/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
    lucene/dev/branches/lucene4547/lucene/test-framework/   (props changed)
    lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
    lucene/dev/branches/lucene4547/lucene/tools/   (props changed)
    lucene/dev/branches/lucene4547/lucene/tools/build.xml
    lucene/dev/branches/lucene4547/lucene/tools/custom-tasks.xml
    lucene/dev/branches/lucene4547/lucene/tools/ivy.xml
    lucene/dev/branches/lucene4547/lucene/tools/src/java/lucene-solr.antlib.xml
    lucene/dev/branches/lucene4547/solr/   (props changed)
    lucene/dev/branches/lucene4547/solr/build.xml   (contents, props changed)
    lucene/dev/branches/lucene4547/solr/common-build.xml   (contents, props changed)
    lucene/dev/branches/lucene4547/solr/contrib/   (props changed)
    lucene/dev/branches/lucene4547/solr/contrib/uima/src/test-files/uima/uima-tokenizers-schema.xml
    lucene/dev/branches/lucene4547/solr/core/   (props changed)
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
    lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
    lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
    lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
    lucene/dev/branches/lucene4547/solr/solrj/   (props changed)
    lucene/dev/branches/lucene4547/solr/solrj/ivy.xml

Modified: lucene/dev/branches/lucene4547/dev-tools/maven/lucene/highlighter/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/dev-tools/maven/lucene/highlighter/pom.xml.template?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/dev-tools/maven/lucene/highlighter/pom.xml.template (original)
+++ lucene/dev/branches/lucene4547/dev-tools/maven/lucene/highlighter/pom.xml.template Wed Feb  6 00:58:30 2013
@@ -61,6 +61,11 @@
       <artifactId>lucene-memory</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>lucene-queries</artifactId>
+      <version>${project.version}</version>
+    </dependency>
   </dependencies>
   <build>
     <sourceDirectory>${module-path}/src/java</sourceDirectory>

Modified: lucene/dev/branches/lucene4547/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/CHANGES.txt?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene4547/lucene/CHANGES.txt Wed Feb  6 00:58:30 2013
@@ -94,6 +94,12 @@ New Features
 * LUCENE-4723: Add AnalyzerFactoryTask to benchmark, and enable analyzer
   creation via the resulting factories using NewAnalyzerTask.  (Steve Rowe)
 
+* LUCENE-4728: Unknown and not explicitly mapped queries are now rewritten
+  against the highlighting IndexReader to obtain primitive queries before 
+  discarding the query entirely. WeightedSpanTermExtractor now builds a
+  MemoryIndex only once even if multiple fields are highlighted.
+  (Simon Willnauer)
+
 API Changes
 
 * LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)
@@ -133,10 +139,18 @@ Bug Fixes
 * LUCENE-4739: Fixed bugs that prevented FSTs more than ~1.1GB from
   being saved and loaded (Adrien Grand, Mike McCandless)
 
+Documentation
+
+* LUCENE-4718: Fixed documentation of oal.queryparser.classic.
+  (Hayden Muhl via Adrien Grand)
+
 Build
 
 * LUCENE-4636: Upgrade ivy to 2.3.0 (Shawn Heisey via Robert Muir)
 
+* LUCENE-4570: Use the Policeman Forbidden API checker, released separately
+  from Lucene and downloaded via Ivy.  (Uwe Schindler, Robert Muir)
+
 ======================= Lucene 4.1.0 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Wed Feb  6 00:58:30 2013
@@ -28,6 +28,8 @@ import org.apache.uima.resource.Resource
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Abstract base implementation of a {@link Tokenizer} which is able to analyze the given input with a
@@ -39,10 +41,10 @@ public abstract class BaseUIMATokenizer 
   protected final AnalysisEngine ae;
   protected final CAS cas;
 
-  protected BaseUIMATokenizer(Reader reader, String descriptorPath) {
+  protected BaseUIMATokenizer(Reader reader, String descriptorPath, Map<String, Object> configurationParameters) {
     super(reader);
     try {
-      ae = AEProviderFactory.getInstance().getAEProvider(descriptorPath).getAE();
+      ae = AEProviderFactory.getInstance().getAEProvider(null, descriptorPath, configurationParameters).getAE();
       cas = ae.newCAS();
     } catch (ResourceInitializationException e) {
       throw new RuntimeException(e);

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java Wed Feb  6 00:58:30 2013
@@ -26,6 +26,7 @@ import org.apache.uima.cas.text.Annotati
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Map;
 
 /**
  * a {@link Tokenizer} which creates tokens from UIMA Annotations
@@ -40,8 +41,8 @@ public final class UIMAAnnotationsTokeni
 
   private int finalOffset = 0;
 
-  public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Reader input) {
-    super(input, descriptorPath);
+  public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters, Reader input) {
+    super(input, descriptorPath, configurationParameters);
     this.tokenTypeString = tokenType;
     this.termAttr = addAttribute(CharTermAttribute.class);
     this.offsetAttr = addAttribute(OffsetAttribute.class);

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java Wed Feb  6 00:58:30 2013
@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.util.T
 import org.apache.lucene.analysis.uima.UIMAAnnotationsTokenizer;
 
 import java.io.Reader;
+import java.util.HashMap;
 import java.util.Map;
 
 /**
@@ -31,19 +32,29 @@ public class UIMAAnnotationsTokenizerFac
 
   private String descriptorPath;
   private String tokenType;
+  private Map<String, Object> configurationParameters;
 
   @Override
   public void init(Map<String, String> args) {
     super.init(args);
-    descriptorPath = args.get("descriptorPath");
-    tokenType = args.get("tokenType");
-    if (descriptorPath == null || tokenType == null) {
-      throw new IllegalArgumentException("Both descriptorPath and tokenType are mandatory");
+    configurationParameters = new HashMap<String, Object>();
+    for (String k : args.keySet()) {
+      if (k.equals("tokenType")) {
+        tokenType = args.get("tokenType");
+      } else if (k.equals("descriptorPath")) {
+        descriptorPath = args.get("descriptorPath");
+      } else {
+        configurationParameters.put(k, args.get(k));
+      }
     }
+    if (descriptorPath == null || tokenType == null ) {
+      throw new IllegalArgumentException("descriptorPath and tokenType are mandatory");
+    }
+
   }
 
   @Override
   public Tokenizer create(Reader input) {
-    return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, input);
+    return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, input);
   }
 }

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java Wed Feb  6 00:58:30 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.uima;
 import org.apache.lucene.analysis.Analyzer;
 
 import java.io.Reader;
+import java.util.Map;
 
 /**
  * An {@link Analyzer} which use the {@link UIMAAnnotationsTokenizer} for creating tokens
@@ -28,15 +29,17 @@ public final class UIMABaseAnalyzer exte
 
   private final String descriptorPath;
   private final String tokenType;
+  private final Map<String, Object> configurationParameters;
 
-  public UIMABaseAnalyzer(String descriptorPath, String tokenType) {
+  public UIMABaseAnalyzer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
     this.descriptorPath = descriptorPath;
     this.tokenType = tokenType;
+    this.configurationParameters = configurationParameters;
   }
 
   @Override
   protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-    return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, reader));
+    return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, reader));
   }
 
 }

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java Wed Feb  6 00:58:30 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.uima;
 import org.apache.lucene.analysis.Analyzer;
 
 import java.io.Reader;
+import java.util.Map;
 
 /**
  * {@link Analyzer} which uses the {@link UIMATypeAwareAnnotationsTokenizer} for the tokenization phase
@@ -28,15 +29,17 @@ public final class UIMATypeAwareAnalyzer
   private final String descriptorPath;
   private final String tokenType;
   private final String featurePath;
+  private final Map<String, Object> configurationParameters;
 
-  public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath) {
+  public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath, Map<String, Object> configurationParameters) {
     this.descriptorPath = descriptorPath;
     this.tokenType = tokenType;
     this.featurePath = featurePath;
+    this.configurationParameters = configurationParameters;
   }
 
   @Override
   protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-    return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, reader));
+    return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, reader));
   }
 }

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java Wed Feb  6 00:58:30 2013
@@ -29,6 +29,7 @@ import org.apache.uima.cas.text.Annotati
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Map;
 
 /**
  * A {@link Tokenizer} which creates token from UIMA Annotations filling also their {@link TypeAttribute} according to
@@ -50,8 +51,8 @@ public final class UIMATypeAwareAnnotati
 
   private int finalOffset = 0;
 
-  public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Reader input) {
-    super(input, descriptorPath);
+  public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters, Reader input) {
+    super(input, descriptorPath, configurationParameters);
     this.tokenTypeString = tokenType;
     this.termAttr = addAttribute(CharTermAttribute.class);
     this.typeAttr = addAttribute(TypeAttribute.class);

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java Wed Feb  6 00:58:30 2013
@@ -18,10 +18,10 @@ package org.apache.lucene.analysis.uima;
  */
 
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.uima.UIMATypeAwareAnnotationsTokenizer;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 
 import java.io.Reader;
+import java.util.HashMap;
 import java.util.Map;
 
 /**
@@ -32,13 +32,23 @@ public class UIMATypeAwareAnnotationsTok
   private String descriptorPath;
   private String tokenType;
   private String featurePath;
+  private Map<String, Object> configurationParameters;
 
   @Override
   public void init(Map<String, String> args) {
     super.init(args);
-    descriptorPath = args.get("descriptorPath");
-    tokenType = args.get("tokenType");
-    featurePath = args.get("featurePath");
+    configurationParameters = new HashMap<String, Object>();
+    for (String k : args.keySet()) {
+      if (k.equals("featurePath")) {
+        featurePath = args.get("featurePath");
+      } else if (k.equals("tokenType")) {
+        tokenType = args.get("tokenType");
+      } else if (k.equals("descriptorPath")) {
+        descriptorPath = args.get("descriptorPath");
+      } else {
+        configurationParameters.put(k, args.get(k));
+      }
+    }
     if (descriptorPath == null || tokenType == null || featurePath == null) {
       throw new IllegalArgumentException("descriptorPath, tokenType, and featurePath are mandatory");
     }
@@ -46,6 +56,6 @@ public class UIMATypeAwareAnnotationsTok
 
   @Override
   public Tokenizer create(Reader input) {
-    return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, input);
+    return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, input);
   }
 }

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml Wed Feb  6 00:58:30 2013
@@ -20,7 +20,7 @@
   <primitive>true</primitive>
   <annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleEntityAnnotator</annotatorImplementationName>
   <analysisEngineMetaData>
-    <name>DummyPoSTagger</name>
+    <name>EntityAnnotator</name>
     <description/>
     <version>1.0</version>
     <vendor>ASF</vendor>

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml Wed Feb  6 00:58:30 2013
@@ -20,9 +20,28 @@
   <primitive>true</primitive>
   <annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleWSTokenizerAnnotator</annotatorImplementationName>
   <analysisEngineMetaData>
-    <name>DummyPoSTagger</name>
+    <name>WSTokenizer</name>
     <version>1.0</version>
     <vendor>ASF</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>line-end</name>
+        <description>
+          the string used as line end
+        </description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+        <nameValuePair>
+          <name>line-end</name>
+          <value>
+            <string>\n</string>
+          </value>
+        </nameValuePair>
+    </configurationParameterSettings>
     <typeSystemDescription>
       <types>
         <typeDescription>

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java Wed Feb  6 00:58:30 2013
@@ -36,6 +36,8 @@ import org.junit.Before;
 import org.junit.Test;
 
 import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Testcase for {@link UIMABaseAnalyzer}
@@ -48,7 +50,7 @@ public class UIMABaseAnalyzerTest extend
   @Before
   public void setUp() throws Exception {
     super.setUp();
-    analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation");
+    analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation", null);
   }
 
   @Override
@@ -120,7 +122,15 @@ public class UIMABaseAnalyzerTest extend
 
   @Test
   public void testRandomStrings() throws Exception {
-    checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation"),
+    checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", null),
+        100 * RANDOM_MULTIPLIER);
+  }
+
+  @Test
+  public void testRandomStringsWithConfigurationParameters() throws Exception {
+    Map<String, Object> cp = new HashMap<String, Object>();
+    cp.put("line-end", "\r");
+    checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestWSTokenizerAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", cp),
         100 * RANDOM_MULTIPLIER);
   }
 

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java Wed Feb  6 00:58:30 2013
@@ -37,7 +37,7 @@ public class UIMATypeAwareAnalyzerTest e
   public void setUp() throws Exception {
     super.setUp();
     analyzer = new UIMATypeAwareAnalyzer("/uima/AggregateSentenceAE.xml",
-        "org.apache.uima.TokenAnnotation", "posTag");
+        "org.apache.uima.TokenAnnotation", "posTag", null);
   }
 
   @Override
@@ -63,7 +63,7 @@ public class UIMATypeAwareAnalyzerTest e
   @Test
   public void testRandomStrings() throws Exception {
     checkRandomData(random(), new UIMATypeAwareAnalyzer("/uima/TestAggregateSentenceAE.xml",
-        "org.apache.lucene.uima.ts.TokenAnnotation", "pos"), 100 * RANDOM_MULTIPLIER);
+        "org.apache.lucene.uima.ts.TokenAnnotation", "pos", null), 100 * RANDOM_MULTIPLIER);
   }
 
 }

Modified: lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java Wed Feb  6 00:58:30 2013
@@ -17,11 +17,13 @@ package org.apache.lucene.analysis.uima.
  * limitations under the License.
  */
 
+import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
 
 /**
  * Dummy implementation of a UIMA based whitespace tokenizer
@@ -30,15 +32,21 @@ public class SampleWSTokenizerAnnotator 
 
   private final static String TOKEN_TYPE = "org.apache.lucene.uima.ts.TokenAnnotation";
   private final static String SENTENCE_TYPE = "org.apache.lucene.uima.ts.SentenceAnnotation";
-  private static final String CR = "\n";
+  private String lineEnd;
   private static final String WHITESPACE = " ";
 
   @Override
+  public void initialize(UimaContext aContext) throws ResourceInitializationException {
+    super.initialize(aContext);
+    lineEnd = String.valueOf(aContext.getConfigParameterValue("line-end"));
+  }
+
+  @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
     Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
     Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
     int i = 0;
-    for (String sentenceString : jCas.getDocumentText().split(CR)) {
+    for (String sentenceString : jCas.getDocumentText().split(lineEnd)) {
       // add the sentence
       AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
       jCas.addFsToIndexes(sentenceAnnotation);

Modified: lucene/dev/branches/lucene4547/lucene/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/benchmark/build.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/benchmark/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/benchmark/build.xml Wed Feb  6 00:58:30 2013
@@ -147,6 +147,7 @@
       <pathelement path="${analyzers-common.jar}"/>
       <pathelement path="${queryparser.jar}"/>
       <pathelement path="${facet.jar}"/>
+      <pathelement path="${queries.jar}"/>
       <fileset dir="${common.dir}/analysis/icu/lib"/>
       <path refid="base.classpath"/>
       <fileset dir="lib"/>

Modified: lucene/dev/branches/lucene4547/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/build.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/build.xml Wed Feb  6 00:58:30 2013
@@ -157,28 +157,34 @@
     <license-check-macro dir="${basedir}" licensedir="${common.dir}/licenses" />
   </target>
 
-  <target name="check-forbidden-apis" depends="compile-tools,compile-test,load-custom-tasks,-check-forbidden-jdk-apis,-check-forbidden-test-apis,-check-system-out" description="Check forbidden API calls in compiled class files"/>
+  <target name="check-forbidden-apis" depends="compile-tools,compile-test,install-forbidden-apis,-forbidden-apis-classpath,-check-forbidden-jdk-apis,-check-forbidden-test-apis,-check-system-out" description="Check forbidden API calls in compiled class files"/>
+
+  <!-- TODO: Make the forbidden API checks per module! -->
+  <target name="-forbidden-apis-classpath">
+    <path id="forbidden-apis.classpath">
+      <fileset dir="${basedir}" includes="**/lib/*.jar"/>
+      <dirset dir="${basedir}/build" includes="**/classes/*"/>
+    </path>
+  </target>
 
   <target name="-check-forbidden-jdk-apis">
-    <forbidden-apis> 
-      <apiFileSet dir="${custom-tasks.dir}/forbiddenApis">
-        <include name="jdk.txt" />
-        <include name="jdk-deprecated.txt" />
-        <include name="executors.txt" />
-      </apiFileSet>
+    <forbidden-apis internalRuntimeForbidden="true" classpathref="forbidden-apis.classpath">
+      <bundledSignatures name="jdk-unsafe-${javac.target}"/>
+      <bundledSignatures name="jdk-deprecated-${javac.target}"/>
+      <signaturesFileSet file="${common.dir}/tools/forbiddenApis/executors.txt"/>
       <fileset dir="${basedir}/build" includes="**/*.class" />
     </forbidden-apis>
   </target>
 
   <target name="-check-forbidden-test-apis">
-    <forbidden-apis apiFile="${custom-tasks.dir}/forbiddenApis/tests.txt"> 
+    <forbidden-apis signaturesFile="${common.dir}/tools/forbiddenApis/tests.txt" classpathref="forbidden-apis.classpath"> 
       <classpath refid="junit-path"/>
       <fileset dir="${basedir}/build" includes="**/classes/test/**/*.class,test-framework/**/*.class" />
     </forbidden-apis>
   </target>
 
   <target name="-check-system-out">
-    <forbidden-apis apiFile="${custom-tasks.dir}/forbiddenApis/system-out.txt">
+    <forbidden-apis bundledSignatures="jdk-system-out" classpathref="forbidden-apis.classpath">
       <fileset dir="${basedir}/build">
         <include name="**/classes/java/**/*.class"/>
         <!-- this is basically tests -->

Modified: lucene/dev/branches/lucene4547/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/common-build.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/common-build.xml Wed Feb  6 00:58:30 2013
@@ -1905,6 +1905,14 @@ ${tests-output}/junit4-*.suites     - pe
     <property name="groovy.loaded" value="true"/>
   </target>
   
+  <!-- Forbidden API Task -->
+  <target name="install-forbidden-apis" unless="forbidden-apis.loaded" depends="ivy-availability-check,ivy-configure">
+    <ivy:cachepath organisation="de.thetaphi" module="forbiddenapis" revision="1.0"
+      inline="true" conf="default" transitive="true" pathid="forbidden-apis.classpath"/>
+    <taskdef name="forbidden-apis" classname="de.thetaphi.forbiddenapis.AntTask" classpathref="forbidden-apis.classpath"/>
+    <property name="forbidden-apis.loaded" value="true"/>
+  </target>
+  
   <!-- PEGDOWN macro: Before using depend on the target "resolve-pegdown" -->
   
   <target name="resolve-pegdown" unless="pegdown.loaded" depends="ivy-availability-check,ivy-configure">

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCache.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCache.java Wed Feb  6 00:58:30 2013
@@ -29,6 +29,8 @@ import org.apache.lucene.index.AtomicRea
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocTermOrds;
 import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.NumericUtils;
@@ -90,20 +92,21 @@ public interface FieldCache {
   }
 
   /**
-   * Hack: When thrown from a Parser (NUMERIC_UTILS_* ones), this stops
-   * processing terms and returns the current FieldCache
-   * array.
-   * @lucene.internal
-   */
-  public static final class StopFillCacheException extends RuntimeException {
-  }
-  
-  /**
    * Marker interface as super-interface to all parsers. It
    * is used to specify a custom parser to {@link
    * SortField#SortField(String, FieldCache.Parser)}.
    */
   public interface Parser {
+    
+    /**
+     * Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers
+     * to filter the actual TermsEnum before the field cache is filled.
+     * 
+     * @param terms the {@link Terms} instance to create the {@link TermsEnum} from.
+     * @return a possibly filtered {@link TermsEnum} instance, this method must not return <code>null</code>.
+     * @throws IOException if an {@link IOException} occurs
+     */
+    public TermsEnum termsEnum(Terms terms) throws IOException;
   }
 
   /** Interface to parse bytes from document fields.
@@ -171,6 +174,10 @@ public interface FieldCache {
     public String toString() { 
       return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER"; 
     }
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return terms.iterator(null);
+    }
   };
 
   /** The default parser for short values, which are encoded by {@link Short#toString(short)} */
@@ -187,6 +194,11 @@ public interface FieldCache {
     public String toString() { 
       return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER"; 
     }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return terms.iterator(null);
+    }
   };
 
   /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */
@@ -199,6 +211,12 @@ public interface FieldCache {
       // directly from byte[]
       return Integer.parseInt(term.utf8ToString());
     }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return terms.iterator(null);
+    }
+    
     @Override
     public String toString() { 
       return FieldCache.class.getName()+".DEFAULT_INT_PARSER"; 
@@ -215,6 +233,12 @@ public interface FieldCache {
       // directly from byte[]
       return Float.parseFloat(term.utf8ToString());
     }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return terms.iterator(null);
+    }
+    
     @Override
     public String toString() { 
       return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER"; 
@@ -231,6 +255,12 @@ public interface FieldCache {
       // directly from byte[]
       return Long.parseLong(term.utf8ToString());
     }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return terms.iterator(null);
+    }
+    
     @Override
     public String toString() { 
       return FieldCache.class.getName()+".DEFAULT_LONG_PARSER"; 
@@ -247,6 +277,12 @@ public interface FieldCache {
       // directly from byte[]
       return Double.parseDouble(term.utf8ToString());
     }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return terms.iterator(null);
+    }
+    
     @Override
     public String toString() { 
       return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER"; 
@@ -260,10 +296,14 @@ public interface FieldCache {
   public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){
     @Override
     public int parseInt(BytesRef term) {
-      if (NumericUtils.getPrefixCodedIntShift(term) > 0)
-        throw new StopFillCacheException();
       return NumericUtils.prefixCodedToInt(term);
     }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return NumericUtils.filterPrefixCodedInts(terms.iterator(null));
+    }
+    
     @Override
     public String toString() { 
       return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER"; 
@@ -277,14 +317,17 @@ public interface FieldCache {
   public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){
     @Override
     public float parseFloat(BytesRef term) {
-      if (NumericUtils.getPrefixCodedIntShift(term) > 0)
-        throw new StopFillCacheException();
       return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term));
     }
     @Override
     public String toString() { 
       return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER"; 
     }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return NumericUtils.filterPrefixCodedInts(terms.iterator(null));
+    }
   };
 
   /**
@@ -294,14 +337,17 @@ public interface FieldCache {
   public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){
     @Override
     public long parseLong(BytesRef term) {
-      if (NumericUtils.getPrefixCodedLongShift(term) > 0)
-        throw new StopFillCacheException();
       return NumericUtils.prefixCodedToLong(term);
     }
     @Override
     public String toString() { 
       return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER"; 
     }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return NumericUtils.filterPrefixCodedLongs(terms.iterator(null));
+    }
   };
 
   /**
@@ -311,14 +357,17 @@ public interface FieldCache {
   public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){
     @Override
     public double parseDouble(BytesRef term) {
-      if (NumericUtils.getPrefixCodedLongShift(term) > 0)
-        throw new StopFillCacheException();
       return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term));
     }
     @Override
     public String toString() { 
       return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER"; 
     }
+    
+    @Override
+    public TermsEnum termsEnum(Terms terms) throws IOException {
+      return NumericUtils.filterPrefixCodedLongs(terms.iterator(null));
+    }
   };
   
  
@@ -634,7 +683,7 @@ public interface FieldCache {
       return b.toString();
     }
   }
-
+  
   /**
    * EXPERT: Generates an array of CacheEntry objects representing all items 
    * currently in the FieldCache.

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Wed Feb  6 00:58:30 2013
@@ -298,7 +298,7 @@ class FieldCacheImpl implements FieldCac
           }
         }
 
-        final TermsEnum termsEnum = terms.iterator(null);
+        final TermsEnum termsEnum = termsEnum(terms);
 
         DocsEnum docs = null;
         FixedBitSet docsWithField = null;
@@ -307,11 +307,7 @@ class FieldCacheImpl implements FieldCac
           if (term == null) {
             break;
           }
-          try {
-            visitTerm(term);
-          } catch (StopFillCacheException stop) {
-            break;
-          }
+          visitTerm(term);
           docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
           while (true) {
             final int docID = docs.nextDoc();
@@ -331,6 +327,7 @@ class FieldCacheImpl implements FieldCac
       }
     }
 
+    protected abstract TermsEnum termsEnum(Terms terms) throws IOException;
     protected abstract void visitTerm(BytesRef term);
     protected abstract void visitDoc(int docID);
   }
@@ -425,6 +422,11 @@ class FieldCacheImpl implements FieldCac
           public void visitDoc(int docID) {
             values[docID] = currentValue;
           }
+
+          @Override
+          protected TermsEnum termsEnum(Terms terms) throws IOException {
+            return parser.termsEnum(terms);
+          }
         };
 
       u.uninvert(reader, key.field, setDocsWithField);
@@ -505,6 +507,11 @@ class FieldCacheImpl implements FieldCac
           public void visitDoc(int docID) {
             values[docID] = currentValue;
           }
+          
+          @Override
+          protected TermsEnum termsEnum(Terms terms) throws IOException {
+            return parser.termsEnum(terms);
+          }
         };
 
       u.uninvert(reader, key.field, setDocsWithField);
@@ -610,6 +617,11 @@ class FieldCacheImpl implements FieldCac
           public void visitDoc(int docID) {
             values[docID] = currentValue;
           }
+          
+          @Override
+          protected TermsEnum termsEnum(Terms terms) throws IOException {
+            return parser.termsEnum(terms);
+          }
         };
 
       u.uninvert(reader, key.field, setDocsWithField);
@@ -779,6 +791,11 @@ class FieldCacheImpl implements FieldCac
           public void visitDoc(int docID) {
             values[docID] = currentValue;
           }
+          
+          @Override
+          protected TermsEnum termsEnum(Terms terms) throws IOException {
+            return parser.termsEnum(terms);
+          }
         };
 
       u.uninvert(reader, key.field, setDocsWithField);
@@ -877,6 +894,11 @@ class FieldCacheImpl implements FieldCac
           public void visitDoc(int docID) {
             values[docID] = currentValue;
           }
+          
+          @Override
+          protected TermsEnum termsEnum(Terms terms) throws IOException {
+            return parser.termsEnum(terms);
+          }
         };
 
       u.uninvert(reader, key.field, setDocsWithField);
@@ -975,6 +997,11 @@ class FieldCacheImpl implements FieldCac
           public void visitDoc(int docID) {
             values[docID] = currentValue;
           }
+          
+          @Override
+          protected TermsEnum termsEnum(Terms terms) throws IOException {
+            return parser.termsEnum(terms);
+          }
         };
 
       u.uninvert(reader, key.field, setDocsWithField);

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java Wed Feb  6 00:58:30 2013
@@ -22,6 +22,8 @@ import org.apache.lucene.document.Double
 import org.apache.lucene.document.FloatField; // javadocs
 import org.apache.lucene.document.IntField; // javadocs
 import org.apache.lucene.document.LongField; // javadocs
+import org.apache.lucene.index.FilteredTermsEnum;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.NumericRangeFilter;
 import org.apache.lucene.search.NumericRangeQuery; // for javadocs
 
@@ -456,4 +458,41 @@ public final class NumericUtils {
   
   }
   
+  /**
+   * Filters the given {@link TermsEnum} by accepting only prefix coded 64 bit
+   * terms with a shift value of <tt>0</tt>.
+   * 
+   * @param termsEnum
+   *          the terms enum to filter
+   * @return a filtered {@link TermsEnum} that only returns prefix coded 64 bit
+   *         terms with a shift value of <tt>0</tt>.
+   */
+  public static TermsEnum filterPrefixCodedLongs(TermsEnum termsEnum) {
+    return new FilteredTermsEnum(termsEnum, false) {
+      @Override
+      protected AcceptStatus accept(BytesRef term) {
+        return NumericUtils.getPrefixCodedLongShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END;
+      }
+    };
+  }
+  
+  /**
+   * Filters the given {@link TermsEnum} by accepting only prefix coded 32 bit
+   * terms with a shift value of <tt>0</tt>.
+   * 
+   * @param termsEnum
+   *          the terms enum to filter
+   * @return a filtered {@link TermsEnum} that only returns prefix coded 32 bit
+   *         terms with a shift value of <tt>0</tt>.
+   */
+  public static TermsEnum filterPrefixCodedInts(TermsEnum termsEnum) {
+    return new FilteredTermsEnum(termsEnum, false) {
+      
+      @Override
+      protected AcceptStatus accept(BytesRef term) {
+        return NumericUtils.getPrefixCodedIntShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END;
+      }
+    };
+  }
+  
 }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java Wed Feb  6 00:58:30 2013
@@ -20,6 +20,8 @@ package org.apache.lucene.search;
 import java.io.IOException;
 
 import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -94,6 +96,11 @@ final class JustCompileSearch {
     public long parseLong(BytesRef string) {
       throw new UnsupportedOperationException(UNSUPPORTED_MSG);
     }
+
+    @Override
+    public TermsEnum termsEnum(Terms terms) {
+      throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+    }
     
   }
   
@@ -103,6 +110,11 @@ final class JustCompileSearch {
     public double parseDouble(BytesRef term) {
       throw new UnsupportedOperationException(UNSUPPORTED_MSG);
     }
+
+    @Override
+    public TermsEnum termsEnum(Terms terms) {
+      throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+    }
     
   }
 

Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestSort.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestSort.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestSort.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestSort.java Wed Feb  6 00:58:30 2013
@@ -51,6 +51,8 @@ import org.apache.lucene.index.RandomInd
 import org.apache.lucene.index.StorableField;
 import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.FieldValueHitQueue.Entry;
 import org.apache.lucene.store.Directory;
@@ -625,8 +627,13 @@ public class TestSort extends LuceneTest
       public final int parseInt(final BytesRef term) {
         return (term.bytes[term.offset]-'A') * 123456;
       }
-    }), SortField.FIELD_DOC);
-    assertMatches(full, queryA, sort, "JIHGFEDCBA");
+      
+      @Override
+      public TermsEnum termsEnum(Terms terms) throws IOException {
+        return terms.iterator(null);
+      }
+    }), SortField.FIELD_DOC );
+    assertMatches (full, queryA, sort, "JIHGFEDCBA");
     assertSaneFieldCaches(getTestName() + " IntParser");
     fc.purgeAllCaches();
 
@@ -635,8 +642,12 @@ public class TestSort extends LuceneTest
       public final float parseFloat(final BytesRef term) {
         return (float) Math.sqrt( term.bytes[term.offset]);
       }
-    }), SortField.FIELD_DOC);
-    assertMatches(full, queryA, sort, "JIHGFEDCBA");
+      @Override
+      public TermsEnum termsEnum(Terms terms) throws IOException {
+        return terms.iterator(null);
+      }
+    }), SortField.FIELD_DOC );
+    assertMatches (full, queryA, sort, "JIHGFEDCBA");
     assertSaneFieldCaches(getTestName() + " FloatParser");
     fc.purgeAllCaches();
 
@@ -645,8 +656,13 @@ public class TestSort extends LuceneTest
       public final long parseLong(final BytesRef term) {
         return (term.bytes[term.offset]-'A') * 1234567890L;
       }
-    }), SortField.FIELD_DOC);
-    assertMatches(full, queryA, sort, "JIHGFEDCBA");
+      
+      @Override
+      public TermsEnum termsEnum(Terms terms) throws IOException {
+        return terms.iterator(null);
+      }
+    }), SortField.FIELD_DOC );
+    assertMatches (full, queryA, sort, "JIHGFEDCBA");
     assertSaneFieldCaches(getTestName() + " LongParser");
     fc.purgeAllCaches();
 
@@ -655,8 +671,12 @@ public class TestSort extends LuceneTest
       public final double parseDouble(final BytesRef term) {
         return Math.pow( term.bytes[term.offset], (term.bytes[term.offset]-'A'));
       }
-    }), SortField.FIELD_DOC);
-    assertMatches(full, queryA, sort, "JIHGFEDCBA");
+      @Override
+      public TermsEnum termsEnum(Terms terms) throws IOException {
+        return terms.iterator(null);
+      }
+    }), SortField.FIELD_DOC );
+    assertMatches (full, queryA, sort, "JIHGFEDCBA");
     assertSaneFieldCaches(getTestName() + " DoubleParser");
     fc.purgeAllCaches();
 
@@ -665,8 +685,13 @@ public class TestSort extends LuceneTest
       public final byte parseByte(final BytesRef term) {
         return (byte) (term.bytes[term.offset]-'A');
       }
-    }), SortField.FIELD_DOC);
-    assertMatches(full, queryA, sort, "JIHGFEDCBA");
+
+      @Override
+      public TermsEnum termsEnum(Terms terms) throws IOException {
+        return terms.iterator(null);
+      }
+    }), SortField.FIELD_DOC );
+    assertMatches (full, queryA, sort, "JIHGFEDCBA");
     assertSaneFieldCaches(getTestName() + " ByteParser");
     fc.purgeAllCaches();
 
@@ -675,8 +700,12 @@ public class TestSort extends LuceneTest
       public final short parseShort(final BytesRef term) {
         return (short) (term.bytes[term.offset]-'A');
       }
-    }), SortField.FIELD_DOC);
-    assertMatches(full, queryA, sort, "JIHGFEDCBA");
+      @Override
+      public TermsEnum termsEnum(Terms terms) throws IOException {
+        return terms.iterator(null);
+      }
+    }), SortField.FIELD_DOC );
+    assertMatches (full, queryA, sort, "JIHGFEDCBA");
     assertSaneFieldCaches(getTestName() + " ShortParser");
     fc.purgeAllCaches();
   }
@@ -752,6 +781,11 @@ public class TestSort extends LuceneTest
       public final int parseInt(final BytesRef term) {
         return (term.bytes[term.offset]-'A') * 123456;
       }
+      
+      @Override
+      public TermsEnum termsEnum(Terms terms) throws IOException {
+        return terms.iterator(null);
+      }
     };
 
     @Override

Modified: lucene/dev/branches/lucene4547/lucene/highlighter/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/build.xml?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/build.xml Wed Feb  6 00:58:30 2013
@@ -27,6 +27,7 @@
 
   <path id="classpath">
     <pathelement path="${memory.jar}"/>
+    <pathelement path="${queries.jar}"/>
     <path refid="base.classpath"/>
   </path>
 

Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java Wed Feb  6 00:58:30 2013
@@ -187,11 +187,9 @@ public class Highlighter
     ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
     StringBuilder newText=new StringBuilder();
 
-      CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
-      OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
-      tokenStream.addAttribute(PositionIncrementAttribute.class);
-      tokenStream.reset();
-
+    CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
+    OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
+    tokenStream.reset();
     TextFragment currentFrag =  new TextFragment(newText,newText.length(), docFrags.size());
 
     if (fragmentScorer instanceof QueryScorer) {

Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Wed Feb  6 00:58:30 2013
@@ -18,7 +18,7 @@ package org.apache.lucene.search.highlig
  */
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -29,11 +29,20 @@ import java.util.TreeSet;
 
 import org.apache.lucene.analysis.CachingTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterAtomicReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.memory.MemoryIndex;
+import org.apache.lucene.queries.CommonTermsQuery;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
 import org.apache.lucene.search.spans.SpanFirstQuery;
@@ -44,6 +53,8 @@ import org.apache.lucene.search.spans.Sp
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.search.spans.Spans;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
+
 
 /**
  * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether 
@@ -53,12 +64,13 @@ public class WeightedSpanTermExtractor {
 
   private String fieldName;
   private TokenStream tokenStream;
-  private Map<String,AtomicReaderContext> readers = new HashMap<String,AtomicReaderContext>(10); 
   private String defaultField;
   private boolean expandMultiTermQuery;
   private boolean cachedTokenStream;
   private boolean wrapToCaching = true;
   private int maxDocCharsToAnalyze;
+  private AtomicReader reader = null;
+
 
   public WeightedSpanTermExtractor() {
   }
@@ -69,18 +81,6 @@ public class WeightedSpanTermExtractor {
     }
   }
 
-  private void closeReaders() {
-    Collection<AtomicReaderContext> ctxSet = readers.values();
-
-    for (final AtomicReaderContext ctx : ctxSet) {
-      try {
-        ctx.reader().close();
-      } catch (IOException e) {
-        // alert?
-      }
-    }
-  }
-
   /**
    * Fills a <code>Map</code> with <@link WeightedSpanTerm>s using the terms from the supplied <code>Query</code>.
    * 
@@ -146,21 +146,14 @@ public class WeightedSpanTermExtractor {
       if (q != null) {
         extract(q, terms);
       }
+    } else if (query instanceof CommonTermsQuery) {
+      // specialized since rewriting would change the result query 
+      // this query is TermContext sensitive.
+      extractWeightedTerms(terms, query);
     } else if (query instanceof DisjunctionMaxQuery) {
       for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
         extract(iterator.next(), terms);
       }
-    } else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
-      MultiTermQuery mtq = ((MultiTermQuery)query);
-      if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
-        mtq = (MultiTermQuery) mtq.clone();
-        mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
-        query = mtq;
-      }
-      if (mtq.getField() != null) {
-        IndexReader ir = getLeafContextForField(mtq.getField()).reader();
-        extract(query.rewrite(ir), terms);
-      }
     } else if (query instanceof MultiPhraseQuery) {
       final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
       final List<Term[]> termArrays = mpq.getTermArrays();
@@ -210,12 +203,30 @@ public class WeightedSpanTermExtractor {
         sp.setBoost(query.getBoost());
         extractWeightedSpanTerms(terms, sp);
       }
+    } else {
+      Query origQuery = query;
+      if (query instanceof MultiTermQuery) {
+        if (!expandMultiTermQuery) {
+          return;
+        }
+        MultiTermQuery copy = (MultiTermQuery) query.clone();
+        copy.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+        origQuery = copy;
+      }
+      final IndexReader reader = getLeafContext().reader();
+      Query rewritten = origQuery.rewrite(reader);
+      if (rewritten != origQuery) {
+        // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
+        // if this method is overwritten in a subclass or above in the next recursion
+        extract(rewritten, terms);
+      } 
     }
     extractUnknownQuery(query, terms);
   }
 
   protected void extractUnknownQuery(Query query,
       Map<String, WeightedSpanTerm> terms) throws IOException {
+    
     // for sub-classing to extract custom queries
   }
 
@@ -249,7 +260,7 @@ public class WeightedSpanTermExtractor {
     final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
     if (mustRewriteQuery) {
       for (final String field : fieldNames) {
-        final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContextForField(field).reader());
+        final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContext().reader());
         queries.put(field, rewrittenQuery);
         rewrittenQuery.extractTerms(nonWeightedTerms);
       }
@@ -266,7 +277,7 @@ public class WeightedSpanTermExtractor {
       } else {
         q = spanQuery;
       }
-      AtomicReaderContext context = getLeafContextForField(field);
+      AtomicReaderContext context = getLeafContext();
       Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
       TreeSet<Term> extractedTerms = new TreeSet<Term>();
       q.extractTerms(extractedTerms);
@@ -338,23 +349,79 @@ public class WeightedSpanTermExtractor {
     return rv;
   }
 
-  protected AtomicReaderContext getLeafContextForField(String field) throws IOException {
-    if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
-      tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
-      cachedTokenStream = true;
-    }
-    AtomicReaderContext context = readers.get(field);
-    if (context == null) {
-      MemoryIndex indexer = new MemoryIndex();
-      indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+  protected AtomicReaderContext getLeafContext() throws IOException {
+    if (reader == null) {
+      if(wrapToCaching && !(tokenStream instanceof CachingTokenFilter)) {
+        assert !cachedTokenStream;
+        tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+        cachedTokenStream = true;
+      }
+      final MemoryIndex indexer = new MemoryIndex(true);
+      indexer.addField(DelegatingAtomicReader.FIELD_NAME, tokenStream);
       tokenStream.reset();
-      IndexSearcher searcher = indexer.createSearcher();
+      final IndexSearcher searcher = indexer.createSearcher();
       // MEM index has only atomic ctx
-      context = (AtomicReaderContext) searcher.getTopReaderContext();
-      readers.put(field, context);
+      reader = new DelegatingAtomicReader(((AtomicReaderContext)searcher.getTopReaderContext()).reader());
+    }
+    return reader.getContext();
+  }
+  
+  /*
+   * This reader will just delegate every call to a single field in the wrapped
+   * AtomicReader. This way we only need to build this field once rather than
+   * N-Times
+   */
+  static final class DelegatingAtomicReader extends FilterAtomicReader {
+    private static final String FIELD_NAME = "shadowed_field";
+
+    DelegatingAtomicReader(AtomicReader in) {
+      super(in);
+    }
+    
+    @Override
+    public FieldInfos getFieldInfos() {
+      throw new UnsupportedOperationException();
     }
 
-    return context;
+    @Override
+    public Fields fields() throws IOException {
+      return new FilterFields(super.fields()) {
+        @Override
+        public Terms terms(String field) throws IOException {
+          return super.terms(DelegatingAtomicReader.FIELD_NAME);
+        }
+
+        @Override
+        public Iterator<String> iterator() {
+          return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator();
+        }
+
+        @Override
+        public int size() {
+          return 1;
+        }
+      };
+    }
+
+    @Override
+    public NumericDocValues getNumericDocValues(String field) throws IOException {
+      return super.getNumericDocValues(FIELD_NAME);
+    }
+    
+    @Override
+    public BinaryDocValues getBinaryDocValues(String field) throws IOException {
+      return super.getBinaryDocValues(FIELD_NAME);
+    }
+    
+    @Override
+    public SortedDocValues getSortedDocValues(String field) throws IOException {
+      return super.getSortedDocValues(FIELD_NAME);
+    }
+    
+    @Override
+    public NumericDocValues getNormValues(String field) throws IOException {
+      return super.getNormValues(FIELD_NAME);
+    }
   }
 
   /**
@@ -401,7 +468,7 @@ public class WeightedSpanTermExtractor {
     try {
       extract(query, terms);
     } finally {
-      closeReaders();
+      IOUtils.close(reader);
     }
 
     return terms;
@@ -449,8 +516,7 @@ public class WeightedSpanTermExtractor {
         weightedSpanTerm.weight *= idf;
       }
     } finally {
-
-      closeReaders();
+      IOUtils.close(reader);
     }
 
     return terms;

Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java Wed Feb  6 00:58:30 2013
@@ -28,9 +28,12 @@ import java.util.Set;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Query;
@@ -92,8 +95,7 @@ public class FieldQuery {
         if( !clause.isProhibited() )
           flatten( clause.getQuery(), reader, flatQueries );
       }
-    }
-    else if( sourceQuery instanceof DisjunctionMaxQuery ){
+    } else if( sourceQuery instanceof DisjunctionMaxQuery ){
       DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
       for( Query query : dmq ){
         flatten( query, reader, flatQueries );
@@ -103,12 +105,6 @@ public class FieldQuery {
       if( !flatQueries.contains( sourceQuery ) )
         flatQueries.add( sourceQuery );
     }
-    else if (sourceQuery instanceof MultiTermQuery && reader != null) {
-      MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
-      copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
-      BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader);
-      flatten(mtqTerms, reader, flatQueries);
-    }
     else if( sourceQuery instanceof PhraseQuery ){
       if( !flatQueries.contains( sourceQuery ) ){
         PhraseQuery pq = (PhraseQuery)sourceQuery;
@@ -118,6 +114,31 @@ public class FieldQuery {
           flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
         }
       }
+    } else if (sourceQuery instanceof ConstantScoreQuery) {
+      final Query q = ((ConstantScoreQuery) sourceQuery).getQuery();
+      if (q != null) {
+        flatten(q, reader, flatQueries);
+      }
+    } else if (sourceQuery instanceof FilteredQuery) {
+      final Query q = ((FilteredQuery) sourceQuery).getQuery();
+      if (q != null) {
+        flatten(q, reader, flatQueries);
+      }
+    } else if (reader != null){
+      Query query = sourceQuery;
+      if (sourceQuery instanceof MultiTermQuery) {
+        MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
+        copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
+        query = copy;
+      }
+      Query rewritten = query.rewrite(reader);
+      if (rewritten != query) {
+        // only rewrite once and then flatten again - the rewritten query could have a speacial treatment
+        // if this method is overwritten in a subclass.
+        flatten(rewritten, reader, flatQueries);
+        
+      } 
+      // if the query is already rewritten we discard it
     }
     // else discard queries
   }

Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Wed Feb  6 00:58:30 2013
@@ -46,6 +46,7 @@ import org.apache.lucene.index.StoredDoc
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
@@ -114,6 +115,87 @@ public class HighlighterTest extends Bas
     }
   }
   
+  public void testHighlightingCommonTermsQuery() throws Exception {
+    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+    CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
+    query.add(new Term(FIELD_NAME, "this"));
+    query.add(new Term(FIELD_NAME, "long"));
+    query.add(new Term(FIELD_NAME, "very"));
+
+    searcher = new IndexSearcher(reader);
+    TopDocs hits = searcher.search(query, 10);
+    assertEquals(2, hits.totalHits);
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(scorer);
+
+    StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc);
+    String storedField = doc.get(FIELD_NAME);
+
+    TokenStream stream = TokenSources.getAnyTokenStream(searcher
+        .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer);
+    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
+    highlighter.setTextFragmenter(fragmenter);
+    String fragment = highlighter.getBestFragment(stream, storedField);
+    assertEquals("Hello <B>this</B> is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
+    
+    doc = searcher.doc(hits.scoreDocs[1].doc);
+    storedField = doc.get(FIELD_NAME);
+
+    stream = TokenSources.getAnyTokenStream(searcher
+        .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
+    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
+    fragment = highlighter.getBestFragment(stream, storedField);
+    assertEquals("<B>This</B> piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
+  }
+  
+  public void testHighlightUnknowQueryAfterRewrite() throws IOException, InvalidTokenOffsetsException {
+    Query query = new Query() {
+      
+      @Override
+      public Query rewrite(IndexReader reader) throws IOException {
+        CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
+        query.add(new Term(FIELD_NAME, "this"));
+        query.add(new Term(FIELD_NAME, "long"));
+        query.add(new Term(FIELD_NAME, "very"));
+        return query;
+      }
+
+      @Override
+      public String toString(String field) {
+        return null;
+      }
+      
+    };
+    
+    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+    
+    searcher = new IndexSearcher(reader);
+    TopDocs hits = searcher.search(query, 10);
+    assertEquals(2, hits.totalHits);
+    QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+    Highlighter highlighter = new Highlighter(scorer);
+
+    StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc);
+    String storedField = doc.get(FIELD_NAME);
+
+    TokenStream stream = TokenSources.getAnyTokenStream(searcher
+        .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer);
+    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
+    highlighter.setTextFragmenter(fragmenter);
+    String fragment = highlighter.getBestFragment(stream, storedField);
+    assertEquals("Hello <B>this</B> is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
+    
+    doc = searcher.doc(hits.scoreDocs[1].doc);
+    storedField = doc.get(FIELD_NAME);
+
+    stream = TokenSources.getAnyTokenStream(searcher
+        .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
+    highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
+    fragment = highlighter.getBestFragment(stream, storedField);
+    assertEquals("<B>This</B> piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
+    
+  }
+  
   public void testHighlightingWithDefaultField() throws Exception {
 
     String s1 = "I call our world Flatland, not because we call it so,";
@@ -150,7 +232,7 @@ public class HighlighterTest extends Bas
         "Query in a named field does not result in highlighting when that field isn't in the query",
         s1, highlightField(q, FIELD_NAME, s1));
   }
-
+  
   /**
    * This method intended for use with <tt>testHighlightingWithDefaultField()</tt>
    */
@@ -603,7 +685,7 @@ public class HighlighterTest extends Bas
     // Not sure we can assert anything here - just running to check we dont
     // throw any exceptions
   }
-  
+
   public void testSpanHighlighting() throws Exception {
     Query query1 = new SpanNearQuery(new SpanQuery[] {
         new SpanTermQuery(new Term(FIELD_NAME, "wordx")),

Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java Wed Feb  6 00:58:30 2013
@@ -18,6 +18,8 @@ package org.apache.lucene.search.vectorh
 import java.io.IOException;
 
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -26,7 +28,13 @@ import org.apache.lucene.index.Directory
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TokenSources;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 
@@ -62,4 +70,47 @@ public class FastVectorHighlighterTest e
     writer.close();
     dir.close();
   }
+  
+  public void testCommonTermsQueryHighlightTest() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT,  new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
+    FieldType type = new FieldType(TextField.TYPE_STORED);
+    type.setStoreTermVectorOffsets(true);
+    type.setStoreTermVectorPositions(true);
+    type.setStoreTermVectors(true);
+    type.freeze();
+    String[] texts = {
+        "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
+        "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
+        "JFK has been shot", "John Kennedy has been shot",
+        "This text has a typo in referring to Keneddy",
+        "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
+    for (int i = 0; i < texts.length; i++) {
+      Document doc = new Document();
+      Field field = new Field("field", texts[i], type);
+      doc.add(field);
+      writer.addDocument(doc);
+    }
+    CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2);
+    query.add(new Term("field", "text"));
+    query.add(new Term("field", "long"));
+    query.add(new Term("field", "very"));
+   
+    FastVectorHighlighter highlighter = new FastVectorHighlighter();
+    IndexReader reader = DirectoryReader.open(writer, true);
+    IndexSearcher searcher = new IndexSearcher(reader);
+    TopDocs hits = searcher.search(query, 10);
+    assertEquals(2, hits.totalHits);
+    FieldQuery fieldQuery  = highlighter.getFieldQuery(query, reader);
+    String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
+    assertEquals("This piece of <b>text</b> refers to Kennedy at the beginning then has a longer piece of <b>text</b> that is <b>very</b> <b>long</b> in the middle and finally ends with another reference to Kennedy", bestFragments[0]);
+
+    fieldQuery  = highlighter.getFieldQuery(query, reader);
+    bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[1].doc, "field", 1000, 1);
+    assertEquals("Hello this is a piece of <b>text</b> that is <b>very</b> <b>long</b> and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]);
+
+    reader.close();
+    writer.close();
+    dir.close();
+  }
 }

Modified: lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java Wed Feb  6 00:58:30 2013
@@ -23,8 +23,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
@@ -35,6 +40,7 @@ import org.apache.lucene.search.TermRang
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
 import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 
 public class FieldQueryTest extends AbstractTestCase {
@@ -905,4 +911,40 @@ public class FieldQueryTest extends Abst
     assertNotNull (fq.searchPhrase(F, phraseCandidate));
   }
   
+  public void testStopRewrite() throws Exception {
+    Query q = new Query() {
+
+      @Override
+      public String toString(String field) {
+        return "DummyQuery";
+      }
+      
+    };
+    make1d1fIndex( "a" );
+    assertNotNull(reader);
+    new FieldQuery(q, reader, true, true );
+  }
+  
+  public void testFlattenFilteredQuery() throws Exception {
+    Query query = new FilteredQuery(pqF( "A" ), new Filter() {
+      @Override
+      public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
+          throws IOException {
+        return null;
+      }
+    });
+    FieldQuery fq = new FieldQuery( query, true, true );
+    Set<Query> flatQueries = new HashSet<Query>();
+    fq.flatten( query, reader, flatQueries );
+    assertCollectionQueries( flatQueries, tq( "A" ) );
+  }
+  
+  public void testFlattenConstantScoreQuery() throws Exception {
+    Query query = new ConstantScoreQuery(pqF( "A" ));
+    FieldQuery fq = new FieldQuery( query, true, true );
+    Set<Query> flatQueries = new HashSet<Query>();
+    fq.flatten( query, reader, flatQueries );
+    assertCollectionQueries( flatQueries, tq( "A" ) );
+  }
+  
 }

Modified: lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Wed Feb  6 00:58:30 2013
@@ -465,7 +465,9 @@ public class MemoryIndex {
       throw new RuntimeException(e);
     } finally {
       try {
-        if (stream != null) stream.close();
+        if (stream != null) {
+          stream.close();
+        }
       } catch (IOException e2) {
         throw new RuntimeException(e2);
       }

Modified: lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj?rev=1442821&r1=1442820&r2=1442821&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj (original)
+++ lucene/dev/branches/lucene4547/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj Wed Feb  6 00:58:30 2013
@@ -162,7 +162,7 @@ PARSER_END(QueryParser)
 | <CARAT:     "^" > : Boost
 | <QUOTED:     "\"" (<_QUOTED_CHAR>)* "\"">
 | <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >
-| <FUZZY_SLOP:     "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
+| <FUZZY_SLOP:     "~" ((<_NUM_CHAR>)+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*)  >
 | <PREFIXTERM:  ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
 | <WILDTERM:  (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
 | <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >



Mime
View raw message