lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpou...@apache.org
Subject [1/3] lucene-solr:master: LUCENE-7355: Add Analyzer#normalize() and use it in query parsers.
Date Tue, 12 Jul 2016 14:36:45 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/master ced914036 -> e92a38af9


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
index 01f3d4d..b8cfc5b 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
@@ -16,6 +16,8 @@
  */
 package org.apache.lucene.analysis;
 
+import org.apache.lucene.util.AttributeFactory;
+
 /**
  * Analyzer for testing that encodes terms as UTF-16 bytes.
  */
@@ -26,4 +28,9 @@ public final class MockBytesAnalyzer extends Analyzer {
         MockTokenizer.KEYWORD, false, MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH);
     return new TokenStreamComponents(t);
   }
+
+  @Override
+  protected AttributeFactory attributeFactory() {
+    return MockUTF16TermAttributeImpl.UTF16_TERM_ATTRIBUTE_FACTORY;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/test-framework/src/java/org/apache/lucene/analysis/MockLowerCaseFilter.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockLowerCaseFilter.java
b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockLowerCaseFilter.java
new file mode 100644
index 0000000..b1aea3d
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockLowerCaseFilter.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+/** A lowercasing {@link TokenFilter}. */
+public final class MockLowerCaseFilter extends TokenFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+  /** Sole constructor. */
+  public MockLowerCaseFilter(TokenStream in) {
+    super(in);
+  }
+  
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      CharacterUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length());
+      return true;
+    } else
+      return false;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
index c9f263d..a5afbec 100644
--- a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
+++ b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
@@ -18,6 +18,7 @@ package org.apache.solr.analysis;
 
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 
@@ -84,8 +85,21 @@ public final class TokenizerChain extends SolrAnalyzer {
   }
 
   @Override
+  protected Reader initReaderForNormalization(String fieldName, Reader reader) {
+    if (charFilters != null && charFilters.length > 0) {
+      for (CharFilterFactory charFilter : charFilters) {
+        if (charFilter instanceof MultiTermAwareComponent) {
+          charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
+          reader = charFilter.create(reader);
+        }
+      }
+    }
+    return reader;
+  }
+
+  @Override
   protected TokenStreamComponents createComponents(String fieldName) {
-    Tokenizer tk = tokenizer.create();
+    Tokenizer tk = tokenizer.create(attributeFactory());
     TokenStream ts = tk;
     for (TokenFilterFactory filter : filters) {
       ts = filter.create(ts);
@@ -94,6 +108,18 @@ public final class TokenizerChain extends SolrAnalyzer {
   }
 
   @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = in;
+    for (TokenFilterFactory filter : filters) {
+      if (filter instanceof MultiTermAwareComponent) {
+        filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
+        result = filter.create(in);
+      }
+    }
+    return result;
+  }
+
+  @Override
   public String toString() {
     StringBuilder sb = new StringBuilder("TokenizerChain(");
     for (CharFilterFactory filter: charFilters) {


Mime
View raw message