lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dsmi...@apache.org
Subject lucene-solr:branch_7x: SOLR-11976: TokenizerChain.normalize was only considering the first MultiTermAwareComponent
Date Fri, 09 Mar 2018 03:32:42 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 1c504c974 -> de5374688


SOLR-11976: TokenizerChain.normalize was only considering the first MultiTermAwareComponent

(cherry picked from commit 588e19e)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/de537468
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/de537468
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/de537468

Branch: refs/heads/branch_7x
Commit: de53746880edf0271ea967bf8b561cf5bc5a7194
Parents: 1c504c9
Author: David Smiley <dsmiley@apache.org>
Authored: Thu Mar 8 22:30:39 2018 -0500
Committer: David Smiley <dsmiley@apache.org>
Committed: Thu Mar 8 22:32:34 2018 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  4 ++
 .../apache/solr/analysis/TokenizerChain.java    |  9 ++--
 .../solr/analysis/TokenizerChainTest.java       | 43 ++++++++++++++++++++
 3 files changed, 52 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/de537468/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f36dba5..31d2868 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -234,6 +234,10 @@ Bug Fixes
 
 * SOLR-12061: Fix substitution bug in API V1 to V2 migration when using SolrJ with V2 API.
(Tomás Fernánadez Löbbe)
 
+* SOLR-11976: TokenizerChain.normalize: only the first filter that is a MultiTermAwareComponent
was participating
+  in normalization instead of all.  This bug normally doesn't matter since TextField doesn't
call it.
+  (Tim Allison via David Smiley)
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/de537468/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
index ab5458c..af7e812 100644
--- a/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
+++ b/solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
@@ -16,14 +16,15 @@
  */
 package org.apache.solr.analysis;
 
-import org.apache.lucene.analysis.*;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.util.CharFilterFactory;
 import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 
-import java.io.Reader;
-
 /**
  * An analyzer that uses a tokenizer and a list of token filters to
  * create a TokenStream.
@@ -113,7 +114,7 @@ public final class TokenizerChain extends SolrAnalyzer {
     for (TokenFilterFactory filter : filters) {
       if (filter instanceof MultiTermAwareComponent) {
         filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
-        result = filter.create(in);
+        result = filter.create(result);
       }
     }
     return result;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/de537468/solr/core/src/test/org/apache/solr/analysis/TokenizerChainTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/analysis/TokenizerChainTest.java b/solr/core/src/test/org/apache/solr/analysis/TokenizerChainTest.java
new file mode 100644
index 0000000..2e4c67a
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/analysis/TokenizerChainTest.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.analysis;
+
+import java.util.Collections;
+
+import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
+import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.Test;
+
+
+public class TokenizerChainTest extends SolrTestCaseJ4 {
+
+  @Test
+  public void testNormalization() throws Exception {
+    String fieldName = "f";
+    TokenFilterFactory[] tff = new TokenFilterFactory[2];
+    tff[0] = new LowerCaseFilterFactory(Collections.EMPTY_MAP);
+    tff[1] = new ASCIIFoldingFilterFactory(Collections.EMPTY_MAP);
+    TokenizerChain tokenizerChain = new TokenizerChain(
+        new MockTokenizerFactory(Collections.EMPTY_MAP),
+        tff);
+    assertEquals(new BytesRef("fooba"),
+        tokenizerChain.normalize(fieldName, "FOOB\u00c4"));
+  }
+}


Mime
View raw message