commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ki...@apache.org
Subject [text] SANDBOX-487 Human name parser
Date Wed, 15 Apr 2015 09:16:40 GMT
Repository: commons-text
Updated Branches:
  refs/heads/master 6280d46c5 -> 411e81f8d


SANDBOX-487 Human name parser


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/411e81f8
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/411e81f8
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/411e81f8

Branch: refs/heads/master
Commit: 411e81f8de92779b29b76a34412a68b6fcfee664
Parents: 6280d46
Author: Bruno P. Kinoshita <brunodepaulak@yahoo.com.br>
Authored: Wed Apr 15 21:16:22 2015 +1200
Committer: Bruno P. Kinoshita <brunodepaulak@yahoo.com.br>
Committed: Wed Apr 15 21:16:22 2015 +1200

----------------------------------------------------------------------
 pom.xml                                         |  54 ++++
 .../commons/text/names/HumanNameParser.java     | 269 +++++++++++++++++++
 .../org/apache/commons/text/names/Name.java     | 136 ++++++++++
 .../commons/text/names/NameParseException.java  |  79 ++++++
 .../apache/commons/text/names/package-info.java |  22 ++
 .../org/apache/commons/text/names/NameTest.java | 106 ++++++++
 .../apache/commons/text/names/ParserTest.java   | 104 +++++++
 .../org/apache/commons/text/names/testNames.txt |  31 +++
 8 files changed, 801 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 6b2a50f..6cf4dcf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -66,6 +66,13 @@
   <!-- Lang should depend on very little -->
   <dependencies>
     <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+      <!-- if upgrading, be sure to check shaded jar relocations! -->
+      <version>3.4</version>
+    </dependency>
+    <!-- testing -->
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <version>4.12</version>
@@ -139,6 +146,53 @@
           </ignorePathsToDelete>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <!-- v1.7 is somehow not compatible with commons-parent 25; see
+             http://svn.apache.org/viewvc?diff_format=h&view=revision&revision=1350822
-->
+        <version>1.6</version>
+        <configuration>
+          <minimizeJar>true</minimizeJar>
+          <createDependencyReducedPom>true</createDependencyReducedPom>
+          <createSourcesJar>true</createSourcesJar>
+          <artifactSet>
+            <includes>
+              <include>org.apache.commons:commons-lang3</include>
+            </includes>
+          </artifactSet>
+          <relocations>
+            <relocation>
+              <pattern>org.apache.commons.lang3.builder.</pattern>
+              <shadedPattern>org.apache.commons.text._lang3.builder.__</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>org.apache.commons.lang3.exception.</pattern>
+              <shadedPattern>org.apache.commons.text._lang3.exception.__</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>org.apache.commons.lang3.mutable.</pattern>
+              <shadedPattern>org.apache.commons.text._lang3.mutable.__</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>org.apache.commons.lang3.tuple.</pattern>
+              <shadedPattern>org.apache.commons.text._lang3.tuple.__</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>org.apache.commons.lang3.</pattern>
+              <shadedPattern>org.apache.commons.text._lang3.__</shadedPattern>
+            </relocation>
+          </relocations>
+        </configuration>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
 
   </build>

http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/HumanNameParser.java b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
new file mode 100644
index 0000000..6ad6394
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@ -0,0 +1,269 @@
+/*
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * <p>A parser capable of parsing name parts out of a single string.</p>
+ *
+ * <p>The code works by basically applying several Regexes in a certain order
+ * and removing (chopping) tokens off the original string. The parser consumes
+ * the tokens during its creation.</p>
+ *
+ * <ul>
+ * <li>J. Walter Weatherman </li>
+ * <li>de la Cruz, Ana M.</li>
+ * <li>James C. ('Jimmy') O'Dell, Jr.</li>
+ * </ul>
+ *
+ * <p>and parses out the:</p>
+ *
+ * <ul>
+ * <li>leading initial (Like "J." in "J. Walter Weatherman")</li>
+ * <li>first name (or first initial in a name like 'R. Crumb')</li>
+ * <li>nicknames (like "Jimmy" in "James C. ('Jimmy') O'Dell, Jr.")</li>
+ * <li>middle names</li>
+ * <li>last name (including compound ones like "van der Sar' and "Ortega y Gasset"),
and</li>
+ * <li>suffix (like 'Jr.', 'III')</li>
+ * </ul>
+ *
+ * <pre>
+ * Name name = new Name("S�rgio Vieira de Mello");
+ * HumanNameParser parser = new HumanNameParser(name);
+ * String firstName = parser.getFirst();
+ * String nickname = parser.getNickname();
+ * // ...
+ * </pre>
+ *
+ * <p>The original code was written in <a href="http://jasonpriem.com/human-name-parse">PHP</a>
+ * and ported to <a href="http://tupilabs.github.io/HumanNameParser.java/">Java</a>.</p>
+ *
+ * <p>This implementation is based on the Java implementation, with additions
+ * suggested in <a href="https://issues.apache.org/jira/browse/SANDBOX-487">SANDBOX-487</a>.</p>
+ *
+ * <p>This class is not thread-safe.</p>
+ *
+ * @since 1.0
+ */
+public class HumanNameParser {
+
+    /**
+     * Name parsed.
+     */
+    private Name name;
+    /**
+     * Leading init part.
+     */
+    private String leadingInit;
+    /**
+     * First name.
+     */
+    private String first;
+    /**
+     * Single nickname found in the name input.
+     */
+    private String nickname;
+    /**
+     * Middle name.
+     */
+    private String middle;
+    /**
+     * Last name.
+     */
+    private String last;
+    /**
+     * Name suffix.
+     */
+    private String suffix;
+    /**
+     * Suffixes found.
+     */
+    private List<String> suffixes;
+    /**
+     * Prefixes found.
+     */
+    private List<String> prefixes;
+
+    /**
+     * Creates a parser given a string name.
+     *
+     * @param name string name
+     */
+    public HumanNameParser(String name) {
+        this(new Name(name));
+    }
+
+    /**
+     * Creates a parser given a {@code Name} object.
+     *
+     * @param name {@code Name}
+     */
+    public HumanNameParser(Name name) {
+        this.name = name;
+
+        this.leadingInit = "";
+        this.first = "";
+        this.nickname = "";
+        this.middle = "";
+        this.last = "";
+        this.suffix = "";
+
+        this.suffixes = Arrays.asList(new String[] {
+                "esq", "esquire", "jr",
+                "sr", "2", "ii", "iii", "iv" });
+        this.prefixes = Arrays
+            .asList(new String[] {
+                    "bar", "ben", "bin", "da", "dal",
+                    "de la", "de", "del", "der", "di", "ibn", "la", "le",
+                    "san", "st", "ste", "van", "van der", "van den", "vel",
+                    "von" });
+
+        this.parse();
+    }
+
+    /**
+     * Gets the {@code Name} object.
+     *
+     * @return the {@code Name} object
+     */
+    public Name getName() {
+        return name;
+    }
+
+    /**
+     * Gets the leading init part of the name.
+     *
+     * @return the leading init part of the name
+     */
+    public String getLeadingInit() {
+        return leadingInit;
+    }
+
+    /**
+     * Gets the first name.
+     *
+     * @return first name
+     */
+    public String getFirst() {
+        return first;
+    }
+
+    /**
+     * Gets the nickname.
+     *
+     * @return the nickname
+     */
+    public String getNickname() {
+        return nickname;
+    }
+
+    /**
+     * Gets the middle name.
+     *
+     * @return the middle name
+     */
+    public String getMiddle() {
+        return middle;
+    }
+
+    /**
+     * Gets the last name.
+     *
+     * @return the last name
+     */
+    public String getLast() {
+        return last;
+    }
+
+    /**
+     * Gets the suffix part of the name.
+     *
+     * @return the name suffix
+     */
+    public String getSuffix() {
+        return suffix;
+    }
+
+    /**
+     * Gets the name suffixes.
+     *
+     * @return the name suffixes
+     */
+    public List<String> getSuffixes() {
+        return suffixes;
+    }
+
+    /**
+     * Gets the name prefixes.
+     *
+     * @return the name prefixes
+     */
+    public List<String> getPrefixes() {
+        return prefixes;
+    }
+
+    /**
+     * Consumes the string and creates the name parts.
+     *
+     * @throws NameParseException if the parser fails to retrieve the name parts
+     */
+    private void parse() {
+        String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
+        String prefixes = StringUtils.join(this.prefixes, " |") + " ";
+
+        // The regex use is a bit tricky.  *Everything* matched by the regex will be replaced,
+        // but you can select a particular parenthesized submatch to be returned.
+        // Also, note that each regex requres that the preceding ones have been run, and
matches chopped out.
+        // names that starts or end w/ an apostrophe break this
+        String nicknamesRegex = "(?i) ('|\\\"|\\(\\\"*'*)(.+?)('|\\\"|\\\"*'*\\)) ";
+        String suffixRegex = "(?i),* *((" + suffixes + ")$)";
+        String lastRegex = "(?i)(?!^)\\b([^ ]+ y |" + prefixes + ")*[^ ]+$";
+        // note the lookahead, which isn't returned or replaced
+        String leadingInitRegex = "(?i)(^(.\\.*)(?= \\p{L}{2}))";
+        String firstRegex = "(?i)^([^ ]+)";
+
+        // get nickname, if there is one
+        this.nickname = this.name.chopWithRegex(nicknamesRegex, 2);
+
+        // get suffix, if there is one
+        this.suffix = this.name.chopWithRegex(suffixRegex, 1);
+
+        // flip the before-comma and after-comma parts of the name
+        this.name.flip(",");
+
+        // get the last name
+        this.last = this.name.chopWithRegex(lastRegex, 0);
+
+        // get the first initial, if there is one
+        this.leadingInit = this.name.chopWithRegex(leadingInitRegex, 1);
+
+        // get the first name
+        this.first = this.name.chopWithRegex(firstRegex, 0);
+        if (StringUtils.isBlank(this.first)) {
+            throw new NameParseException("Couldn't find a first name in '{" + this.name.getStr()
+ "}'");
+        }
+
+        // if anything's left, that's the middle name
+        this.middle = this.name.getStr();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/main/java/org/apache/commons/text/names/Name.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/Name.java b/src/main/java/org/apache/commons/text/names/Name.java
new file mode 100644
index 0000000..71df7d8
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/Name.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * <p>A {@code Name} object that encapsulates a name string, and contains the logic
+ * for handling with Regexes.</p>
+ *
+ * <p>This class is not thread-safe.</p>
+ *
+ * @since 1.0
+ */
+public class Name {
+
+    /**
+     * Encapsulated string. Not immutable!
+     */
+    private String str;
+
+    /**
+     * Creates a new Name object.
+     *
+     * @param str encapsulated string.
+     */
+    public Name(String str) {
+        this.str = str;
+    }
+
+    /**
+     * Gets the encapsulated string.
+     *
+     * @return encapsulated string
+     */
+    public String getStr() {
+        return str;
+    }
+
+    /**
+     * Sets the encapsulated string value.
+     *
+     * @param str string value
+     */
+    public void setStr(String str) {
+        this.str = str;
+        this.norm();
+    }
+
+    /**
+     * Uses a regex to chop off and return part of the namestring.
+     * There are two parts: first, it returns the matched substring,
+     * and then it removes that substring from the encapsulated
+     * string and normalizes it.
+     *
+     * @param regex matches the part of the namestring to chop off
+     * @param submatchIndex which of the parenthesized submatches to use
+     * @return the part of the namestring that got chopped off
+     */
+    public String chopWithRegex(String regex, int submatchIndex) {
+        String chopped = "";
+        Pattern pattern = Pattern.compile(regex);
+        Matcher matcher = pattern.matcher(this.str);
+
+        // workdaround for numReplacements in Java
+        int numReplacements = 0;
+        while (matcher.find()) {
+            numReplacements++;
+        }
+
+        // recreate or the groups are gone
+        pattern = Pattern.compile(regex);
+        matcher = pattern.matcher(this.str);
+        if (matcher.find()) {
+            boolean subset = matcher.groupCount() > submatchIndex;
+            if (subset) {
+                this.str = this.str.replaceAll(regex, " ");
+                if (numReplacements > 1) {
+                    throw new NameParseException("The regex being used to find the name has
multiple matches.");
+                }
+                this.norm();
+                return matcher.group(submatchIndex).trim();
+            }
+        }
+        return chopped;
+    }
+
+    /**
+     * Flips the front and back parts of a name with one another.
+     * Front and back are determined by a specified character somewhere in the
+     * middle of the string.
+     *
+     * @param flipAroundChar the character(s) demarcating the two halves you want to flip.
+     * @throws NameParseException if a regex fails or a condition is not expected
+     */
+    public void flip(String flipAroundChar) {
+        String[] parts = this.str.split(flipAroundChar);
+        if (parts != null) {
+            if (parts.length == 2) {
+                this.str = String.format("%s %s", parts[1], parts[0]);
+                this.norm();
+            } else if (parts.length > 2) {
+                throw new NameParseException(
+                        "Can't flip around multiple '" + flipAroundChar + "' characters in
namestring.");
+            }
+        }
+    }
+
+    /**
+     * <p>Removes extra whitespace and punctuation from {@code this.str}.</p>
+     *
+     * <p>Strips whitespace chars from ends, strips redundant whitespace, converts
+     * whitespace chars to " ".</p>
+     */
+    public void norm() {
+        this.str = this.str.trim();
+        this.str = this.str.replaceAll("\\s+", " ");
+        this.str = this.str.replaceAll(",$", " ");
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/main/java/org/apache/commons/text/names/NameParseException.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/NameParseException.java b/src/main/java/org/apache/commons/text/names/NameParseException.java
new file mode 100644
index 0000000..2ff160f
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/NameParseException.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+/**
+ * Name parse exception.
+ *
+ * @since 1.0
+ */
+public class NameParseException extends RuntimeException {
+
+    /**
+     * Serial UID.
+     */
+    private static final long serialVersionUID = -2375904385006224156L;
+
+    /**
+     * Constructor.
+     */
+    public NameParseException() {
+        super();
+    }
+
+    /**
+     * Contructor with message.
+     *
+     * @param message message
+     */
+    public NameParseException(String message) {
+        super(message);
+    }
+
+    /**
+     * Constructor with case.
+     *
+     * @param cause cause
+     */
+    public NameParseException(Throwable cause) {
+        super(cause);
+    }
+
+    /**
+     * Constructor with message and cause.
+     *
+     * @param message message
+     * @param cause cause
+     */
+    public NameParseException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    /**
+     * Complete constructor.
+     *
+     * @param message message
+     * @param cause cause
+     * @param enableSuppression flag to enable suppression
+     * @param writableStackTrace a writable stack trace
+     */
+    public NameParseException(String message, Throwable cause,
+            boolean enableSuppression, boolean writableStackTrace) {
+        super(message, cause, enableSuppression, writableStackTrace);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/main/java/org/apache/commons/text/names/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/names/package-info.java b/src/main/java/org/apache/commons/text/names/package-info.java
new file mode 100644
index 0000000..1423d24
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/names/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * <p>A human names parser in Java.</p>
+ *
+ * @since 1.0
+ */
+package org.apache.commons.text.names;

http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/test/java/org/apache/commons/text/names/NameTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/NameTest.java b/src/test/java/org/apache/commons/text/names/NameTest.java
new file mode 100644
index 0000000..53c9764
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/names/NameTest.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests for {@code Name} and {@code HumanNameParser}. Utilizes the same
+ * input file as the PHP library 0.2 version.
+ *
+ * @since 1.0
+ */
+public class NameTest {
+
+    protected Name object;
+
+    @Before
+    public void setUp() {
+        object = new Name("Björn O'Malley");
+    }
+
+    @Test
+    public void testSetStrRemovesWhitespaceAtEnds() {
+        object.setStr("    Björn O'Malley \r\n");
+        assertEquals(
+            "Björn O'Malley",
+            object.getStr()
+        );
+    }
+
+    @Test
+    public void testSetStrRemovesRedudentantWhitespace(){
+        object.setStr(" Björn    O'Malley");
+        assertEquals(
+            "Björn O'Malley",
+            object.getStr()
+        );
+    }
+
+    @Test
+    public void testChopWithRegexReturnsChoppedSubstring(){
+        object.setStr("Björn O'Malley");
+        assertEquals(
+            "Björn",
+            object.chopWithRegex("(^([^ ]+))(.+)", 1)
+        );
+    }
+
+    @Test
+    public void testChopWithRegexChopsStartOffNameStr(){
+        object.setStr("Björn O'Malley");
+        object.chopWithRegex("(^[^ ]+)", 0);
+        assertEquals(
+                "O'Malley",
+            object.getStr()
+        );
+    }
+
+    @Test
+    public void testChopWithRegexChopsEndOffNameStr(){
+        object.setStr("Björn O'Malley");
+        object.chopWithRegex("( (.+)$)", 1);
+        assertEquals(
+            "Björn",
+            object.getStr()
+        );
+    }
+
+    @Test
+    public void testChopWithRegexChopsMiddleFromNameStr(){
+        object.setStr("Björn 'Bill' O'Malley");
+        object.chopWithRegex("( '[^']+' )", 0);
+        assertEquals(
+            "Björn O'Malley",
+            object.getStr()
+        );
+    }
+
+    @Test
+    public void testFlip() {
+        object.setStr("O'Malley, Björn");
+        object.flip(",");
+        assertEquals(
+            "Björn O'Malley",
+            object.getStr()
+        );
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/test/java/org/apache/commons/text/names/ParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/text/names/ParserTest.java b/src/test/java/org/apache/commons/text/names/ParserTest.java
new file mode 100644
index 0000000..e9ca3c0
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/names/ParserTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.names;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.logging.Logger;
+
+import org.apache.commons.lang3.StringUtils;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Tests the {@code HumanNameParser} class.
+ *
+ * @since 1.0
+ */
+public class ParserTest {
+
+    private static final Logger LOGGER = Logger.getLogger(ParserTest.class.getName());
+
+    private static File testNames = null;
+
+    @BeforeClass
+    public static void setUp() {
+        testNames = new File(ParserTest.class.getResource("/org/apache/commons/text/names/testNames.txt").getFile());
+    }
+
+    @Test
+    public void testAll() throws IOException {
+        BufferedReader buffer = null;
+        FileReader reader = null;
+
+        try {
+            reader = new FileReader(testNames);
+            buffer = new BufferedReader(reader);
+
+            String line = null;
+            while ((line = buffer.readLine()) != null) {
+                if (StringUtils.isBlank(line)) {
+                    LOGGER.warning("Empty line in testNames.txt");
+                    continue;
+                }
+
+                String[] tokens = line.split("\\|");
+                if (tokens.length != 7) {
+                    LOGGER.warning(String.format("Invalid line in testNames.txt: %s", line));
+                    continue;
+                }
+
+                validateLine(tokens);
+            }
+        } finally {
+            if (reader != null)
+                reader.close();
+            if (buffer != null)
+                buffer.close();
+        }
+    }
+
+    /**
+     * Validates a line in the testNames.txt file.
+     *
+     * @param tokens the tokens with leading spaces
+     */
+    private void validateLine(String[] tokens) {
+        String name = tokens[0].trim();
+
+        String leadingInit = tokens[1].trim();
+        String first = tokens[2].trim();
+        String nickname = tokens[3].trim();
+        String middle = tokens[4].trim();
+        String last = tokens[5].trim();
+        String suffix = tokens[6].trim();
+
+        HumanNameParser parser = new HumanNameParser(name);
+
+        assertEquals(leadingInit, parser.getLeadingInit());
+        assertEquals(first, parser.getFirst());
+        assertEquals(nickname, parser.getNickname());
+        assertEquals(middle, parser.getMiddle());
+        assertEquals(last, parser.getLast());
+        assertEquals(suffix, parser.getSuffix());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/411e81f8/src/test/resources/org/apache/commons/text/names/testNames.txt
----------------------------------------------------------------------
diff --git a/src/test/resources/org/apache/commons/text/names/testNames.txt b/src/test/resources/org/apache/commons/text/names/testNames.txt
new file mode 100644
index 0000000..83ddc31
--- /dev/null
+++ b/src/test/resources/org/apache/commons/text/names/testNames.txt
@@ -0,0 +1,31 @@
+Björn O'Malley| | Björn| | | O'Malley| 
+Bin Lin| | Bin| | | Lin| 
+Linda Jones| | Linda| | | Jones| 
+Jason H. Priem| | Jason| | H.| Priem| 
+Björn O'Malley-Muñoz| | Björn| | | O'Malley-Muñoz| 
+Björn C. O'Malley| | Björn| | C.| O'Malley| 
+Björn "Bill" O'Malley| | Björn| Bill| | O'Malley| 
+Björn ("Bill") O'Malley| | Björn| Bill| | O'Malley| 
+Björn ("Wild Bill") O'Malley| | Björn| Wild Bill| | O'Malley| 
+Björn (Bill) O'Malley| | Björn| Bill| | O'Malley| 
+Björn 'Bill' O'Malley| | Björn| Bill| | O'Malley| 
+Björn C O'Malley| | Björn| | C| O'Malley| 
+Björn C. R. O'Malley| | Björn| | C. R.| O'Malley| 
+Björn Charles O'Malley| | Björn| | Charles| O'Malley| 
+Björn Charles R. O'Malley| | Björn| | Charles R.| O'Malley| 
+Björn van O'Malley| | Björn| | | van O'Malley| 
+Björn Charles van der O'Malley| | Björn| | Charles| van der O'Malley| 
+Björn Charles O'Malley y Muñoz| | Björn| | Charles| O'Malley y Muñoz| 
+Björn O'Malley, Jr.| | Björn| | | O'Malley| Jr.
+Björn O'Malley Jr| | Björn| | | O'Malley| Jr
+B O'Malley| | B| | | O'Malley| 
+William Carlos Williams| | William| | Carlos| Williams| 
+C. Björn Roger O'Malley| C.| Björn| | Roger| O'Malley| 
+B. C. O'Malley| | B.| | C.| O'Malley| 
+B C O'Malley| | B| | C| O'Malley| 
+B.J. Thomas| | B.J.| | | Thomas| 
+O'Malley, Björn| | Björn| | | O'Malley| 
+O'Malley, Björn Jr| | Björn| | | O'Malley| Jr
+O'Malley, C. Björn| C.| Björn| | | O'Malley| 
+O'Malley, C. Björn III| C.| Björn| | | O'Malley| III
+O'Malley y Muñoz, C. Björn Roger III| C.| Björn| | Roger| O'Malley y Muñoz| III
\ No newline at end of file


Mime
View raw message