commons-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ki...@apache.org
Subject [7/7] [text] Merge branch 'pr/4'
Date Sun, 20 Nov 2016 06:57:46 GMT
Merge branch 'pr/4'

TEXT-20: Add salutations like Mr, Mrs, etc.
This closes #4 from GitHub. Thanks to Tom Mackenzie.


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/6fd10f89
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/6fd10f89
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/6fd10f89

Branch: refs/heads/master
Commit: 6fd10f89aaa3870d91368979e4f8a32ebcfc1049
Parents: ebb2a92 9e84145
Author: Bruno P. Kinoshita <brunodepaulak@yahoo.com.br>
Authored: Sun Nov 20 19:52:56 2016 +1300
Committer: Bruno P. Kinoshita <brunodepaulak@yahoo.com.br>
Committed: Sun Nov 20 19:52:56 2016 +1300

----------------------------------------------------------------------
 .../commons/text/names/HumanNameParser.java     | 17 ++++-
 .../org/apache/commons/text/names/Name.java     | 13 +++-
 .../commons/text/names/HumanNameParserTest.java |  6 +-
 .../org/apache/commons/text/names/testNames.txt | 65 ++++++++++----------
 4 files changed, 64 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/6fd10f89/src/main/java/org/apache/commons/text/names/HumanNameParser.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/commons/text/names/HumanNameParser.java
index 5718ba9,fd1608a..1348313
--- a/src/main/java/org/apache/commons/text/names/HumanNameParser.java
+++ b/src/main/java/org/apache/commons/text/names/HumanNameParser.java
@@@ -100,13 -100,8 +100,17 @@@ import org.apache.commons.lang3.StringU
   */
  public final class HumanNameParser {
  
 -    private final List<String> salutations;
 +    /**
 +     * List of suffixes. Not exposed to users or children classes.
 +     */
      private final List<String> suffixes;
 +    /**
++     * List of salutations. Not exposed to users or children classes.
++     */
++    private final List<String> salutations;
++    /**
 +     * List of prefixes. Not exposed to users or children classes.
 +     */
      private final List<String> prefixes;
  
      /**
@@@ -132,30 -130,34 +139,34 @@@
       * @throws NullPointerException if name is null.
       * @return The name object
       */
 -    public Name parse(String name) {
 +    public Name parse(final String name) {
          Objects.requireNonNull(name, "Parameter 'name' must not be null.");
  
 -        NameString nameString = new NameString(name);
 +        final NameString nameString = new NameString(name);
          // TODO compile regexes only once when the parser is created
 -        String salutations = StringUtils.join(this.salutations, " |") + "";
 -        String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
 -        String prefixes = StringUtils.join(this.prefixes, " |") + " ";
 +        final String suffixes = StringUtils.join(this.suffixes, "\\.*|") + "\\.*";
 +        final String prefixes = StringUtils.join(this.prefixes, " |") + " ";
++        final String salutations = StringUtils.join(this.salutations, " |") + " ";
  
          // The regex use is a bit tricky.  *Everything* matched by the regex will be replaced,
          // but you can select a particular parenthesized submatch to be returned.
          // Also, note that each regex requres that the preceding ones have been run, and
matches chopped out.
          // names that starts or end w/ an apostrophe break this
 -        String salutationRegex = "^(?i)(("+salutations+")\\.)";
 -        String nicknamesRegex = "(?i) ('|\\\"|\\(\\\"*'*)(.+?)('|\\\"|\\\"*'*\\)) ";
 -        String suffixRegex = "(?i),* *((" + suffixes + ")$)";
 -        String lastRegex = "(?i)(?!^)\\b([^ ]+ y |" + prefixes + ")*[^ ]+$";
 +        final String nicknamesRegex = "(?i) ('|\\\"|\\(\\\"*'*)(.+?)('|\\\"|\\\"*'*\\))
";
 +        final String suffixRegex = "(?i),* *((" + suffixes + ")$)";
 +        final String lastRegex = "(?i)(?!^)\\b([^ ]+ y |" + prefixes + ")*[^ ]+$";
++        final String salutationRegex = "^(?i)(("+salutations+")\\.)";
          // note the lookahead, which isn't returned or replaced
 -        String leadingInitRegex = "(?i)(^(.\\.*)(?= \\p{L}{2}))";
 -        String firstRegex = "(?i)^([^ ]+)";
 +        final String leadingInitRegex = "(?i)(^(.\\.*)(?= \\p{L}{2}))";
 +        final String firstRegex = "(?i)^([^ ]+)";
  
+         String salutation = nameString.chopWithRegex(salutationRegex, 1);
+ 
          // get nickname, if there is one
 -        String nickname = nameString.chopWithRegex(nicknamesRegex, 2);
 +        final String nickname = nameString.chopWithRegex(nicknamesRegex, 2);
  
          // get suffix, if there is one
 -        String suffix = nameString.chopWithRegex(suffixRegex, 1);
 +        final String suffix = nameString.chopWithRegex(suffixRegex, 1);
  
          // flip the before-comma and after-comma parts of the name
          nameString.flip(",");

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6fd10f89/src/main/java/org/apache/commons/text/names/Name.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/commons/text/names/Name.java
index 6545b84,ef3d36a..8b6f267
--- a/src/main/java/org/apache/commons/text/names/Name.java
+++ b/src/main/java/org/apache/commons/text/names/Name.java
@@@ -25,43 -25,17 +25,49 @@@ import java.util.Objects
   */
  public final class Name {
  
 +    /**
 +     * Leading initial. e.g. <em>F.</em>, as in <em>Francisco ('Chico')
Silva Zhao II</em>.
 +     */
      private final String leadingInitial;
 +    /**
++     * Salutation. e.g. <em>Dr.</em>, as in <em>Dr. Jekyll</em>,
or <em>Mr.</em>, as in <em>Mr. Hyde</em/>.
++     */
+     private final String salutation;
++    /**
 +     * The first name, e.g. <em>Francisco</em>, as in <em>Francisco ('Chico')
Silva Zhao II</em>.
 +     */
      private final String firstName;
 +    /**
 +     * The nickname, e.g. <em>Chico</em>, as in <em>Francisco ('Chico')
Silva Zhao II</em>.
 +     */
      private final String nickName;
 +    /**
 +     * The middle name, e.g. <em>Silva</em>, as in <em>Francisco ('Chico')
Silva Zhao II</em>.
 +     */
      private final String middleName;
 +    /**
 +     * The last name, e.g. <em>Zhao</em>, as in <em>Francisco ('Chico')
Silva Zhao II</em>.
 +     */
      private final String lastName;
 +    /**
 +     * The suffix, e.g. <em>II</em>, as in <em>Francisco ('Chico') Silva
Zhao II</em>.
 +     */
      private final String suffix;
  
 -    Name(String leadingInitial, String salutation, String firstName, String nickName, String
middleName, String lastName, String suffix) {
 +    /**
 +     * Create a Name.
 +     *
 +     * @param leadingInitial the leading initial
++     * @param salutation the salutation
 +     * @param firstName the first name
 +     * @param nickName the nickname
 +     * @param middleName the middle name
 +     * @param lastName the last name
 +     * @param suffix a suffix
 +     */
-     Name(final String leadingInitial, final String firstName, final String nickName, final
String middleName, final String lastName, final String suffix) {
++    Name(final String leadingInitial, final String salutation, final String firstName, final
String nickName, final String middleName, final String lastName, final String suffix) {
          this.leadingInitial = leadingInitial;
+         this.salutation = salutation;
          this.firstName = firstName;
          this.nickName = nickName;
          this.middleName = middleName;

http://git-wip-us.apache.org/repos/asf/commons-text/blob/6fd10f89/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
----------------------------------------------------------------------
diff --cc src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
index f473206,22c96cc..1dd6085
--- a/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
+++ b/src/test/java/org/apache/commons/text/names/HumanNameParserTest.java
@@@ -70,10 -70,14 +70,14 @@@ public class HumanNameParserTest 
       *
       * @param record a CSVRecord representing one record in the input file.
       */
 -    private void validateRecord(CSVRecord record) {
 -        Name result = nameParser.parse(record.get(Columns.Name));
 +    private void validateRecord(final CSVRecord record) {
 +        final Name result = nameParser.parse(record.get(Columns.Name));
  
 -        long recordNum = record.getRecordNumber();
 +        final long recordNum = record.getRecordNumber();
+ 
 -        assertThat("Wrong LeadingInit in record " + recordNum,
++        assertThat("Wrong Salutation in record " + recordNum,
+                 result.getSalutation(), equalTo(record.get(Columns.Salutation)));
+ 
          assertThat("Wrong LeadingInit in record " + recordNum,
                  result.getLeadingInitial(), equalTo(record.get(Columns.LeadingInit)));
  


Mime
View raw message