From commits-return-102379-archive-asf-public=cust-asf.ponee.io@lucene.apache.org Wed Jul 25 10:33:49 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 38B33180792 for ; Wed, 25 Jul 2018 10:33:48 +0200 (CEST) Received: (qmail 55966 invoked by uid 500); 25 Jul 2018 08:33:44 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 55484 invoked by uid 99); 25 Jul 2018 08:33:44 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 25 Jul 2018 08:33:44 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id E23E3E1166; Wed, 25 Jul 2018 08:33:43 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: datcm@apache.org To: commits@lucene.apache.org Date: Wed, 25 Jul 2018 08:34:02 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [20/33] lucene-solr:jira/http2: SOLR-12570: OpenNLPExtractNamedEntitiesUpdateProcessor cannot support multi fields because pattern replacement doesn't work correctly SOLR-12570: OpenNLPExtractNamedEntitiesUpdateProcessor cannot support multi fields because pattern replacement doesn't work correctly Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/995a902d Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/995a902d Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/995a902d Branch: refs/heads/jira/http2 Commit: 995a902d1ad40860ee082b57e4e47c1be52c856e Parents: 55bfadb Author: koji Authored: Mon Jul 23 16:58:46 2018 +0900 Committer: koji Committed: Mon Jul 23 16:58:46 2018 +0900 ---------------------------------------------------------------------- solr/CHANGES.txt | 3 +++ ...PExtractNamedEntitiesUpdateProcessorFactory.java | 16 ++++++++-------- ...PExtractNamedEntitiesUpdateProcessorFactory.java | 9 ++++++--- 3 files changed, 17 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/995a902d/solr/CHANGES.txt ---------------------------------------------------------------------- diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e93e0e7..e1069d8 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -159,6 +159,9 @@ Bug Fixes * SOLR-12553: Allow SignificantTerms Query Parser to use local parameters (Alexandre Rafalovitch) +* SOLR-12570: OpenNLPExtractNamedEntitiesUpdateProcessor cannot support multi fields because pattern replacement + doesn't work correctly. (Koji Sekiguchi) + Optimizations ---------------------- http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/995a902d/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java ---------------------------------------------------------------------- diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java index 2a7514d..d69c367 100644 --- a/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java +++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java @@ -17,8 +17,6 @@ package org.apache.solr.update.processor; -import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR; - import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; @@ -57,6 +55,8 @@ import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR; + /** * Extracts named entities using an OpenNLP NER modelFile from the values found in * any matching source field into a configured dest field, after @@ -500,13 +500,13 @@ public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory SolrInputField destField = null; String entityName = entity.first(); String entityType = entity.second(); - resolvedDest = resolvedDest.replace(ENTITY_TYPE, entityType); - if (doc.containsKey(resolvedDest)) { - destField = doc.getField(resolvedDest); + final String resolved = resolvedDest.replace(ENTITY_TYPE, entityType); + if (doc.containsKey(resolved)) { + destField = doc.getField(resolved); } else { - SolrInputField targetField = destMap.get(resolvedDest); + SolrInputField targetField = destMap.get(resolved); if (targetField == null) { - destField = new SolrInputField(resolvedDest); + destField = new SolrInputField(resolved); } else { destField = targetField; } @@ -514,7 +514,7 @@ public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory destField.addValue(entityName); // put it in map to avoid concurrent modification... - destMap.put(resolvedDest, destField); + destMap.put(resolved, destField); } } } http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/995a902d/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java ---------------------------------------------------------------------- diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java index dad06a8..851fea0 100644 --- a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java +++ b/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java @@ -82,7 +82,7 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends Updat f("subtitle", "Ineluctably, Flashman."), f("corrolary_txt", "Forsooth thou bringeth Flashman."), f("notes_txt", "Yes Flashman."), - f("summary", "Many aspire to be Flashman."), + f("summary", "Many aspire to be Flashman in London."), f("descs", "Courage, Flashman.", "Ain't he Flashman."), f("descriptions", "Flashman. Flashman. Flashman."))); @@ -91,6 +91,7 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends Updat assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("key_desc_people")); assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("key_description_people")); assertEquals("Flashman", doc.getFieldValue("summary_person_s")); // {EntityType} field name interpolation + assertEquals("London", doc.getFieldValue("summary_location_s")); // {EntityType} field name interpolation } public void testEquivalentExtraction() throws Exception { @@ -182,11 +183,13 @@ public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends Updat public void testExtractFieldRegexReplaceAllWithEntityType() throws Exception { SolrInputDocument d = processAdd("extract-regex-replaceall-with-entity-type", doc(f("id", "1111"), - f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."), - f("foo_x3_x7_s", "Flashman. Whoa."))); + f("foo_x2_s", "Infrequently Flashman in London.", "In the words of Flashman in London."), + f("foo_x3_x7_s", "Flashman in London. Whoa."))); assertNotNull(d); assertEquals(d.getFieldNames().toString(), Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_person_y2_s")); + assertEquals(d.getFieldNames().toString(), Arrays.asList("London", "London"), d.getFieldValues("foo_location_y2_s")); assertEquals(d.getFieldNames().toString(),"Flashman", d.getFieldValue("foo_person_y3_person_y7_s")); + assertEquals(d.getFieldNames().toString(),"London", d.getFieldValue("foo_location_y3_location_y7_s")); } }