Return-Path: X-Original-To: apmail-commons-commits-archive@minotaur.apache.org Delivered-To: apmail-commons-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 232BE17CC1 for ; Sun, 19 Apr 2015 15:14:51 +0000 (UTC) Received: (qmail 34372 invoked by uid 500); 19 Apr 2015 15:14:50 -0000 Delivered-To: apmail-commons-commits-archive@commons.apache.org Received: (qmail 34080 invoked by uid 500); 19 Apr 2015 15:14:50 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 34039 invoked by uid 99); 19 Apr 2015 15:14:50 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 19 Apr 2015 15:14:50 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 4AF63E0F7A; Sun, 19 Apr 2015 15:14:50 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: britter@apache.org To: commits@commons.apache.org Date: Sun, 19 Apr 2015 15:14:53 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [04/12] [text] Make HumanNameParser return a name object. Introduce a new wrapper object for strings to be parsed called NameString. Make HumanNameParser return a name object. Introduce a new wrapper object for strings to be parsed called NameString. Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/685f9a86 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/685f9a86 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/685f9a86 Branch: refs/heads/SANDBOX-498 Commit: 685f9a864d46cc526b14e3a7476465c49d991478 Parents: 9a0cc85 Author: Benedikt Ritter Authored: Sun Apr 19 16:22:45 2015 +0200 Committer: Benedikt Ritter Committed: Sun Apr 19 16:22:45 2015 +0200 ---------------------------------------------------------------------- .../commons/text/names/HumanNameParser.java | 36 ++--- .../org/apache/commons/text/names/Name.java | 141 ++++++------------- .../apache/commons/text/names/NameString.java | 134 ++++++++++++++++++ .../commons/text/names/HumanNameParserTest.java | 24 ++-- .../commons/text/names/NameStringTest.java | 104 ++++++++++++++ .../org/apache/commons/text/names/NameTest.java | 104 -------------- 6 files changed, 315 insertions(+), 228 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/HumanNameParser.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java index fa2433a..df8e55c 100644 --- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java +++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java @@ -195,14 +195,14 @@ public class HumanNameParser { /** * Consumes the string and creates the name parts. * - * @param nameStr the name to parse. Must not be null. + * @param name the name to parse. Must not be null. * @throws NameParseException if the parser fails to retrieve the name parts. - * @throws NullPointerException if nameStr is null. + * @throws NullPointerException if name is null. */ - public void parse(String nameStr) { - Objects.requireNonNull(nameStr, "Parameter 'nameStr' must not be null."); + public Name parse(String name) { + Objects.requireNonNull(name, "Parameter 'name' must not be null."); - Name name = new Name(nameStr); + NameString nameString = new NameString(name); String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*"; String prefixes = StringUtils.join(this.prefixes, " |") + " "; @@ -218,28 +218,30 @@ public class HumanNameParser { String firstRegex = "(?i)^([^ ]+)"; // get nickname, if there is one - this.nickname = name.chopWithRegex(nicknamesRegex, 2); + this.nickname = nameString.chopWithRegex(nicknamesRegex, 2); // get suffix, if there is one - this.suffix = name.chopWithRegex(suffixRegex, 1); + this.suffix = nameString.chopWithRegex(suffixRegex, 1); - // flip the before-comma and after-comma parts of the name - name.flip(","); + // flip the before-comma and after-comma parts of the nameString + nameString.flip(","); - // get the last name - this.last = name.chopWithRegex(lastRegex, 0); + // get the last nameString + this.last = nameString.chopWithRegex(lastRegex, 0); // get the first initial, if there is one - this.leadingInit = name.chopWithRegex(leadingInitRegex, 1); + this.leadingInit = nameString.chopWithRegex(leadingInitRegex, 1); - // get the first name - this.first = name.chopWithRegex(firstRegex, 0); + // get the first nameString + this.first = nameString.chopWithRegex(firstRegex, 0); if (StringUtils.isBlank(this.first)) { - throw new NameParseException("Couldn't find a first name in '{" + name.getStr() + "}'"); + throw new NameParseException("Couldn't find a first name in '{" + nameString.getStr() + "}'"); } - // if anything's left, that's the middle name - this.middle = name.getStr(); + // if anything's left, that's the middle nameString + this.middle = nameString.getStr(); + + return new Name(leadingInit, first, nickname, middle, last, suffix); } } http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/Name.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/names/Name.java b/src/main/java/org/apache/commons/text/names/Name.java index 0dd2560..3067ba5 100644 --- a/src/main/java/org/apache/commons/text/names/Name.java +++ b/src/main/java/org/apache/commons/text/names/Name.java @@ -16,119 +16,70 @@ */ package org.apache.commons.text.names; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.util.Objects; /** - *

A {@code Name} object that encapsulates a name string, and contains the logic - * for handling with Regexes.

+ * An object representing the result of parsing a Name. * - *

This class is not thread-safe.

+ *

This class is immutable.

*/ -public class Name { +public final class Name { - /** - * Encapsulated string. Not immutable! - */ - private String str; + private final String leadingInitial; + private final String firstName; + private final String nickName; + private final String middleName; + private final String lastName; + private final String suffix; - /** - * Creates a new Name object. - * - * @param str encapsulated string. - */ - public Name(String str) { - this.str = str; + Name(String leadingInitial, String firstName, String nickName, String middleName, String lastName, String suffix) { + this.leadingInitial = leadingInitial; + this.firstName = firstName; + this.nickName = nickName; + this.middleName = middleName; + this.lastName = lastName; + this.suffix = suffix; } - /** - * Gets the encapsulated string. - * - * @return encapsulated string - */ - public String getStr() { - return str; + public String getLeadingInitial() { + return leadingInitial; } - /** - * Sets the encapsulated string value. - * - * @param str string value - */ - public void setStr(String str) { - this.str = str; - this.norm(); + public String getFirstName() { + return firstName; } - /** - * Uses a regex to chop off and return part of the namestring. - * There are two parts: first, it returns the matched substring, - * and then it removes that substring from the encapsulated - * string and normalizes it. - * - * @param regex matches the part of the namestring to chop off - * @param submatchIndex which of the parenthesized submatches to use - * @return the part of the namestring that got chopped off - */ - public String chopWithRegex(String regex, int submatchIndex) { - String chopped = ""; - Pattern pattern = Pattern.compile(regex); - Matcher matcher = pattern.matcher(this.str); + public String getNickName() { + return nickName; + } - // workdaround for numReplacements in Java - int numReplacements = 0; - while (matcher.find()) { - numReplacements++; - } + public String getMiddleName() { + return middleName; + } - // recreate or the groups are gone - pattern = Pattern.compile(regex); - matcher = pattern.matcher(this.str); - if (matcher.find()) { - boolean subset = matcher.groupCount() > submatchIndex; - if (subset) { - this.str = this.str.replaceAll(regex, " "); - if (numReplacements > 1) { - throw new NameParseException("The regex being used to find the name has multiple matches."); - } - this.norm(); - return matcher.group(submatchIndex).trim(); - } - } - return chopped; + public String getLastName() { + return lastName; } - /** - * Flips the front and back parts of a name with one another. - * Front and back are determined by a specified character somewhere in the - * middle of the string. - * - * @param flipAroundChar the character(s) demarcating the two halves you want to flip. - * @throws NameParseException if a regex fails or a condition is not expected - */ - public void flip(String flipAroundChar) { - String[] parts = this.str.split(flipAroundChar); - if (parts != null) { - if (parts.length == 2) { - this.str = String.format("%s %s", parts[1], parts[0]); - this.norm(); - } else if (parts.length > 2) { - throw new NameParseException( - "Can't flip around multiple '" + flipAroundChar + "' characters in namestring."); - } - } + public String getSuffix() { + return suffix; } - /** - *

Removes extra whitespace and punctuation from {@code this.str}.

- * - *

Strips whitespace chars from ends, strips redundant whitespace, converts - * whitespace chars to " ".

- */ - public void norm() { - this.str = this.str.trim(); - this.str = this.str.replaceAll("\\s+", " "); - this.str = this.str.replaceAll(",$", " "); + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Name name = (Name) o; + return Objects.equals(leadingInitial, name.leadingInitial) && + Objects.equals(firstName, name.firstName) && + Objects.equals(nickName, name.nickName) && + Objects.equals(middleName, name.middleName) && + Objects.equals(lastName, name.lastName) && + Objects.equals(suffix, name.suffix); } + @Override + public int hashCode() { + return Objects.hash(leadingInitial, firstName, nickName, middleName, lastName, suffix); + } } http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/main/java/org/apache/commons/text/names/NameString.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/names/NameString.java b/src/main/java/org/apache/commons/text/names/NameString.java new file mode 100644 index 0000000..8f606f2 --- /dev/null +++ b/src/main/java/org/apache/commons/text/names/NameString.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.names; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * A wrapper around a String representing a Name to parse. Contains the logic + * for handling executing Regexes on the wrapped name string. + * + *

This class is not thread-safe.

+ */ +final class NameString { + + /** + * Encapsulated string. Not immutable! + */ + private String str; + + /** + * Creates a new Name object. + * + * @param str encapsulated string. + */ + public NameString(String str) { + this.str = str; + } + + /** + * Gets the encapsulated string. + * + * @return encapsulated string + */ + public String getStr() { + return str; + } + + /** + * Sets the encapsulated string value. + * + * @param str string value + */ + public void setStr(String str) { + this.str = str; + this.norm(); + } + + /** + * Uses a regex to chop off and return part of the namestring. + * There are two parts: first, it returns the matched substring, + * and then it removes that substring from the encapsulated + * string and normalizes it. + * + * @param regex matches the part of the namestring to chop off + * @param submatchIndex which of the parenthesized submatches to use + * @return the part of the namestring that got chopped off + */ + public String chopWithRegex(String regex, int submatchIndex) { + String chopped = ""; + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(this.str); + + // workdaround for numReplacements in Java + int numReplacements = 0; + while (matcher.find()) { + numReplacements++; + } + + // recreate or the groups are gone + pattern = Pattern.compile(regex); + matcher = pattern.matcher(this.str); + if (matcher.find()) { + boolean subset = matcher.groupCount() > submatchIndex; + if (subset) { + this.str = this.str.replaceAll(regex, " "); + if (numReplacements > 1) { + throw new NameParseException("The regex being used to find the name has multiple matches."); + } + this.norm(); + return matcher.group(submatchIndex).trim(); + } + } + return chopped; + } + + /** + * Flips the front and back parts of a name with one another. + * Front and back are determined by a specified character somewhere in the + * middle of the string. + * + * @param flipAroundChar the character(s) demarcating the two halves you want to flip. + * @throws NameParseException if a regex fails or a condition is not expected + */ + public void flip(String flipAroundChar) { + String[] parts = this.str.split(flipAroundChar); + if (parts != null) { + if (parts.length == 2) { + this.str = String.format("%s %s", parts[1], parts[0]); + this.norm(); + } else if (parts.length > 2) { + throw new NameParseException( + "Can't flip around multiple '" + flipAroundChar + "' characters in namestring."); + } + } + } + + /** + *

Removes extra whitespace and punctuation from {@code this.str}.

+ * + *

Strips whitespace chars from ends, strips redundant whitespace, converts + * whitespace chars to " ".

+ */ + public void norm() { + this.str = this.str.trim(); + this.str = this.str.replaceAll("\\s+", " "); + this.str = this.str.replaceAll(",$", " "); + } + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java index d43d2be..d059ed4 100644 --- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java +++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java @@ -71,26 +71,26 @@ public class HumanNameParserTest { */ private void validateRecord(CSVRecord record) { HumanNameParser parser = new HumanNameParser(); - parser.parse(record.get(Colums.Name)); + Name result = parser.parse(record.get(Colums.Name)); long recordNum = record.getRecordNumber(); assertThat("Wrong LeadingInit in record " + recordNum, - parser.getLeadingInit(), equalTo(record.get(Colums.LeadingInit))); - + result.getLeadingInitial(), equalTo(record.get(Colums.LeadingInit))); + assertThat("Wrong FirstName in record " + recordNum, - parser.getFirst(), equalTo(record.get(Colums.FirstName))); - + result.getFirstName(), equalTo(record.get(Colums.FirstName))); + assertThat("Wrong NickName in record " + recordNum, - parser.getNickname(), equalTo(record.get(Colums.NickName))); - + result.getNickName(), equalTo(record.get(Colums.NickName))); + assertThat("Wrong MiddleName in record " + recordNum, - parser.getMiddle(), equalTo(record.get(Colums.MiddleName))); - + result.getMiddleName(), equalTo(record.get(Colums.MiddleName))); + assertThat("Wrong LastName in record " + recordNum, - parser.getLast(), equalTo(record.get(Colums.LastName))); - + result.getLastName(), equalTo(record.get(Colums.LastName))); + assertThat("Wrong Suffix in record " + recordNum, - parser.getSuffix(), equalTo(record.get(Colums.Suffix))); + result.getSuffix(), equalTo(record.get(Colums.Suffix))); } private enum Colums { http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/NameStringTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/names/NameStringTest.java b/src/test/java/org/apache/commons/text/names/NameStringTest.java new file mode 100644 index 0000000..494c70b --- /dev/null +++ b/src/test/java/org/apache/commons/text/names/NameStringTest.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.names; + +import static org.junit.Assert.assertEquals; + +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same + * input file as the PHP library 0.2 version. + */ +public class NameStringTest { + + private NameString nameString; + + @Before + public void setUp() { + nameString = new NameString("Björn O'Malley"); + } + + @Test + public void testSetStrRemovesWhitespaceAtEnds() { + nameString.setStr(" Björn O'Malley \r\n"); + assertEquals( + "Björn O'Malley", + nameString.getStr() + ); + } + + @Test + public void testSetStrRemovesRedudentantWhitespace(){ + nameString.setStr(" Björn O'Malley"); + assertEquals( + "Björn O'Malley", + nameString.getStr() + ); + } + + @Test + public void testChopWithRegexReturnsChoppedSubstring(){ + nameString.setStr("Björn O'Malley"); + assertEquals( + "Björn", + nameString.chopWithRegex("(^([^ ]+))(.+)", 1) + ); + } + + @Test + public void testChopWithRegexChopsStartOffNameStr(){ + nameString.setStr("Björn O'Malley"); + nameString.chopWithRegex("(^[^ ]+)", 0); + assertEquals( + "O'Malley", + nameString.getStr() + ); + } + + @Test + public void testChopWithRegexChopsEndOffNameStr(){ + nameString.setStr("Björn O'Malley"); + nameString.chopWithRegex("( (.+)$)", 1); + assertEquals( + "Björn", + nameString.getStr() + ); + } + + @Test + public void testChopWithRegexChopsMiddleFromNameStr(){ + nameString.setStr("Björn 'Bill' O'Malley"); + nameString.chopWithRegex("( '[^']+' )", 0); + assertEquals( + "Björn O'Malley", + nameString.getStr() + ); + } + + @Test + public void testFlip() { + nameString.setStr("O'Malley, Björn"); + nameString.flip(","); + assertEquals( + "Björn O'Malley", + nameString.getStr() + ); + } + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/685f9a86/src/test/java/org/apache/commons/text/names/NameTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/names/NameTest.java b/src/test/java/org/apache/commons/text/names/NameTest.java deleted file mode 100644 index 7822e92..0000000 --- a/src/test/java/org/apache/commons/text/names/NameTest.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.text.names; - -import static org.junit.Assert.assertEquals; - -import org.junit.Before; -import org.junit.Test; - -/** - * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same - * input file as the PHP library 0.2 version. - */ -public class NameTest { - - protected Name object; - - @Before - public void setUp() { - object = new Name("Björn O'Malley"); - } - - @Test - public void testSetStrRemovesWhitespaceAtEnds() { - object.setStr(" Björn O'Malley \r\n"); - assertEquals( - "Björn O'Malley", - object.getStr() - ); - } - - @Test - public void testSetStrRemovesRedudentantWhitespace(){ - object.setStr(" Björn O'Malley"); - assertEquals( - "Björn O'Malley", - object.getStr() - ); - } - - @Test - public void testChopWithRegexReturnsChoppedSubstring(){ - object.setStr("Björn O'Malley"); - assertEquals( - "Björn", - object.chopWithRegex("(^([^ ]+))(.+)", 1) - ); - } - - @Test - public void testChopWithRegexChopsStartOffNameStr(){ - object.setStr("Björn O'Malley"); - object.chopWithRegex("(^[^ ]+)", 0); - assertEquals( - "O'Malley", - object.getStr() - ); - } - - @Test - public void testChopWithRegexChopsEndOffNameStr(){ - object.setStr("Björn O'Malley"); - object.chopWithRegex("( (.+)$)", 1); - assertEquals( - "Björn", - object.getStr() - ); - } - - @Test - public void testChopWithRegexChopsMiddleFromNameStr(){ - object.setStr("Björn 'Bill' O'Malley"); - object.chopWithRegex("( '[^']+' )", 0); - assertEquals( - "Björn O'Malley", - object.getStr() - ); - } - - @Test - public void testFlip() { - object.setStr("O'Malley, Björn"); - object.flip(","); - assertEquals( - "Björn O'Malley", - object.getStr() - ); - } - -}