lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From da...@apache.org
Subject [20/33] lucene-solr:jira/http2: SOLR-12570: OpenNLPExtractNamedEntitiesUpdateProcessor cannot support multi fields because pattern replacement doesn't work correctly
Date Wed, 25 Jul 2018 08:34:02 GMT
SOLR-12570: OpenNLPExtractNamedEntitiesUpdateProcessor cannot support multi fields because
pattern replacement doesn't work correctly


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/995a902d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/995a902d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/995a902d

Branch: refs/heads/jira/http2
Commit: 995a902d1ad40860ee082b57e4e47c1be52c856e
Parents: 55bfadb
Author: koji <koji@apache.org>
Authored: Mon Jul 23 16:58:46 2018 +0900
Committer: koji <koji@apache.org>
Committed: Mon Jul 23 16:58:46 2018 +0900

----------------------------------------------------------------------
 solr/CHANGES.txt                                    |  3 +++
 ...PExtractNamedEntitiesUpdateProcessorFactory.java | 16 ++++++++--------
 ...PExtractNamedEntitiesUpdateProcessorFactory.java |  9 ++++++---
 3 files changed, 17 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/995a902d/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index e93e0e7..e1069d8 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -159,6 +159,9 @@ Bug Fixes
 
 * SOLR-12553: Allow SignificantTerms Query Parser to use local parameters (Alexandre Rafalovitch)
 
+* SOLR-12570: OpenNLPExtractNamedEntitiesUpdateProcessor cannot support multi fields because
pattern replacement
+  doesn't work correctly. (Koji Sekiguchi)
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/995a902d/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
index 2a7514d..d69c367 100644
--- a/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
+++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
@@ -17,8 +17,6 @@
 
 package org.apache.solr.update.processor;
 
-import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
-
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
@@ -57,6 +55,8 @@ import org.apache.solr.util.plugin.SolrCoreAware;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
+
 /**
  * Extracts named entities using an OpenNLP NER <code>modelFile</code> from the
values found in
  * any matching <code>source</code> field into a configured <code>dest</code>
field, after
@@ -500,13 +500,13 @@ public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory
               SolrInputField destField = null;
               String entityName = entity.first();
               String entityType = entity.second();
-              resolvedDest = resolvedDest.replace(ENTITY_TYPE, entityType);
-              if (doc.containsKey(resolvedDest)) {
-                destField = doc.getField(resolvedDest);
+              final String resolved = resolvedDest.replace(ENTITY_TYPE, entityType);
+              if (doc.containsKey(resolved)) {
+                destField = doc.getField(resolved);
               } else {
-                SolrInputField targetField = destMap.get(resolvedDest);
+                SolrInputField targetField = destMap.get(resolved);
                 if (targetField == null) {
-                  destField = new SolrInputField(resolvedDest);
+                  destField = new SolrInputField(resolved);
                 } else {
                   destField = targetField;
                 }
@@ -514,7 +514,7 @@ public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory
               destField.addValue(entityName);
 
               // put it in map to avoid concurrent modification...
-              destMap.put(resolvedDest, destField);
+              destMap.put(resolved, destField);
             }
           }
         }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/995a902d/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
b/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
index dad06a8..851fea0 100644
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
+++ b/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
@@ -82,7 +82,7 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends
Updat
             f("subtitle", "Ineluctably, Flashman."),
             f("corrolary_txt", "Forsooth thou bringeth Flashman."),
             f("notes_txt", "Yes Flashman."),
-            f("summary", "Many aspire to be Flashman."),
+            f("summary", "Many aspire to be Flashman in London."),
             f("descs", "Courage, Flashman.", "Ain't he Flashman."),
             f("descriptions", "Flashman. Flashman. Flashman.")));
 
@@ -91,6 +91,7 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends
Updat
     assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("key_desc_people"));
     assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("key_description_people"));
     assertEquals("Flashman", doc.getFieldValue("summary_person_s")); // {EntityType} field
name interpolation
+    assertEquals("London", doc.getFieldValue("summary_location_s")); // {EntityType} field
name interpolation
   }
 
   public void testEquivalentExtraction() throws Exception {
@@ -182,11 +183,13 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends
Updat
   public void testExtractFieldRegexReplaceAllWithEntityType() throws Exception {
     SolrInputDocument d = processAdd("extract-regex-replaceall-with-entity-type",
         doc(f("id", "1111"),
-            f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
-            f("foo_x3_x7_s", "Flashman. Whoa.")));
+            f("foo_x2_s", "Infrequently Flashman in London.", "In the words of Flashman in
London."),
+            f("foo_x3_x7_s", "Flashman in London. Whoa.")));
 
     assertNotNull(d);
     assertEquals(d.getFieldNames().toString(), Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_person_y2_s"));
+    assertEquals(d.getFieldNames().toString(), Arrays.asList("London", "London"), d.getFieldValues("foo_location_y2_s"));
     assertEquals(d.getFieldNames().toString(),"Flashman", d.getFieldValue("foo_person_y3_person_y7_s"));
+    assertEquals(d.getFieldNames().toString(),"London", d.getFieldValue("foo_location_y3_location_y7_s"));
   }
 }


Mime
View raw message