brooklyn-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From alasdairhodge <...@git.apache.org>
Subject [GitHub] incubator-brooklyn pull request: Date parsing and type coercion fi...
Date Mon, 08 Jun 2015 09:30:04 GMT
Github user alasdairhodge commented on a diff in the pull request:

    https://github.com/apache/incubator-brooklyn/pull/682#discussion_r31898443
  
    --- Diff: utils/common/src/main/java/brooklyn/util/time/Time.java ---
    @@ -443,36 +472,433 @@ else if (s.equalsIgnoreCase("d") || s.equalsIgnoreCase("day") ||
s.equalsIgnoreC
             }
         }
     
    +    /** parses dates from string, accepting many formats including 'YYYY-MM-DD', 'YYYY-MM-DD
HH:mm:SS', or millis since UTC epoch */
    +    public static Date parseDate(String input) {
    +        if (input==null) return null;
    +        return parseDateMaybe(input).get();
    +    }
    +    
    +    /** as {@link #parseDate(String)} but returning a {@link Maybe} rather than throwing
or returning null */
    +    public static Maybe<Date> parseDateMaybe(String input) {
    +        if (input==null) return Maybe.absent("value is null");
    +        input = input.trim();
    +        Maybe<Date> result;
    +
    +        result = parseDateUtc(input);
    +        if (result.isPresent()) return result;
    +
    +        result = parseDateSimpleFlexibleFormatParser(input);
    +        if (result.isPresent()) return result;
    +        // return the error from this method
    +        Maybe<Date> returnResult = result;
    +
    +//        // see natty method comments below
    +//        Maybe<Date> result = parseDateNatty(input);
    +//        if (result.isPresent()) return result;
    +
    +        result = parseDateFormat(input, new SimpleDateFormat(DATE_FORMAT_OF_DATE_TOSTRING));
    +        if (result.isPresent()) return result;
    +        result = parseDateDefaultParse(input);
    +        if (result.isPresent()) return result;
    +
    +        return returnResult;
    +    }
    +
    +    @SuppressWarnings("deprecation")
    +    private static Maybe<Date> parseDateDefaultParse(String input) {
    +        try {
    +            long ms = Date.parse(input);
    +            if (ms>=new Date(1999, 12, 25).getTime() && ms <= new Date(2200,
1, 2).getTime()) {
    +                // accept default date parse for this century and next
    +                return Maybe.of(new Date(ms));
    +            }
    +        } catch (Exception e) {
    +            Exceptions.propagateIfFatal(e);
    +        }
    +        return Maybe.absent();
    +    }
    +
    +    private static Maybe<Date> parseDateUtc(String input) {
    +        if (input.matches("\\d+")) {
    +            Maybe<Date> result = Maybe.of(new Date(Long.parseLong(input)));
    +            if (result.isPresent()) {
    +                @SuppressWarnings("deprecation")
    +                int year = result.get().getYear();
    +                if (year >= 2000 && year < 2200) {
    +                    // only applicable for dates in this century
    +                    return result;
    +                } else {
    +                    return Maybe.absent("long is probably not millis since epoch UTC;
millis as string is not in acceptable range");
    +                }
    +            }
    +        }
    +        return Maybe.absent("not long millis since epoch UTC");
    +    }
    +
    +    private final static String DIGIT = "\\d";
    +    private final static String LETTER = "\\p{L}";
    +    private final static String COMMON_SEPARATORS = "-\\.";
    +    private final static String TIME_SEPARATOR = COMMON_SEPARATORS+":";
    +    private final static String DATE_SEPARATOR = COMMON_SEPARATORS+"/ ";
    +    private final static String DATE_TIME_ANY_ORDER_GROUP_SEPARATOR = COMMON_SEPARATORS+":/
";
    +
    +    private final static String DATE_ONLY_WITH_INNER_SEPARATORS = 
    +            namedGroup("year", DIGIT+DIGIT+DIGIT+DIGIT)
    +            + anyChar(DATE_SEPARATOR)
    +            + namedGroup("month", options(optionally(DIGIT)+DIGIT, anyChar(LETTER)+"+"))
    +            + anyChar(DATE_SEPARATOR)
    +            + namedGroup("day", optionally(DIGIT)+DIGIT);
    +    private final static String DATE_WORDS_2 = 
    +        namedGroup("month", anyChar(LETTER)+"+")
    +        + anyChar(DATE_SEPARATOR)
    +        + namedGroup("day", optionally(DIGIT)+DIGIT)
    +        + ",?"+anyChar(DATE_SEPARATOR)+"+"
    +        + namedGroup("year", DIGIT+DIGIT+DIGIT+DIGIT);
    +    // we could parse NN-NN-NNNN as DD-MM-YYYY always, but could be confusing for MM-DD-YYYY
oriented people, so require month named
    +    private final static String DATE_WORDS_3 = 
    +        namedGroup("day", optionally(DIGIT)+DIGIT)
    +        + anyChar(DATE_SEPARATOR)
    +        + namedGroup("month", anyChar(LETTER)+"+")
    +        + ",?"+anyChar(DATE_SEPARATOR)+"+"
    +        + namedGroup("year", DIGIT+DIGIT+DIGIT+DIGIT);
    +
    +    private final static String DATE_ONLY_NO_SEPARATORS = 
    +            namedGroup("year", DIGIT+DIGIT+DIGIT+DIGIT)
    +            + namedGroup("month", DIGIT+DIGIT)
    +            + namedGroup("day", DIGIT+DIGIT);
    +
    +    private final static String MERIDIAN = anyChar("aApP")+optionally(anyChar("mM"));
    +    private final static String TIME_ONLY_WITH_INNER_SEPARATORS = 
    +        namedGroup("hours", optionally(DIGIT)+DIGIT)+
    +        optionally(
    +            anyChar(TIME_SEPARATOR)+
    +            namedGroup("mins", DIGIT+DIGIT)+
    +            optionally(
    +                anyChar(TIME_SEPARATOR)+
    +                namedGroup("secs", DIGIT+DIGIT+optionally( optionally("\\.")+DIGIT+"+"))))+
    +        optionally(" *" + namedGroup("meridian", notMatching(LETTER+LETTER+LETTER)+MERIDIAN));
    +    private final static String TIME_ONLY_NO_SEPARATORS = 
    +        namedGroup("hours", DIGIT+DIGIT)+
    +        namedGroup("mins", DIGIT+DIGIT)+
    +        optionally(
    +            namedGroup("secs", DIGIT+DIGIT+optionally( optionally("\\.")+DIGIT+"+")))+
    +        namedGroup("meridian", "");
    +
    +    private final static String TZ_CODE = namedGroup("tzCode",
    +        notMatching(MERIDIAN+options("$", anyChar("^"+LETTER))) // not AM or PM
    +        + anyChar(LETTER)+"+"+anyChar(LETTER+DIGIT+"\\/\\-\\' _")+"*");
    +    private final static String TIME_ZONE_SIGNED_OFFSET = namedGroup("tz", options(namedGroup("tzOffset",
options("\\+", "-")+
    +            DIGIT+optionally(DIGIT)+optionally(optionally(":")+DIGIT+DIGIT)), 
    +        optionally("\\+")+TZ_CODE));
    +    private final static String TIME_ZONE_OPTIONALLY_SIGNED_OFFSET = namedGroup("tz",

    +        options(
    +            namedGroup("tzOffset", options("\\+", "-", " ")+
    +                options("0"+DIGIT, "10", "11", "12")+optionally(optionally(":")+DIGIT+DIGIT)),

    +            TZ_CODE));
    +
    +    private static String getDateTimeSeparatorPattern(String extraChars) {
    +        return options(" +"+optionally(anyChar(DATE_TIME_ANY_ORDER_GROUP_SEPARATOR+extraChars+",")),
    +                anyChar(DATE_TIME_ANY_ORDER_GROUP_SEPARATOR+extraChars+","))
    +            + anyChar(DATE_TIME_ANY_ORDER_GROUP_SEPARATOR+extraChars)+"*";
    +    }
    +    
    +    @SuppressWarnings("deprecation")
    +    private static Maybe<Date> parseDateSimpleFlexibleFormatParser(String input)
{
    +        input = input.trim();
    +
    +        String[] DATE_PATTERNS = new String[] {
    +            DATE_ONLY_WITH_INNER_SEPARATORS,
    +            DATE_ONLY_NO_SEPARATORS,
    +            DATE_WORDS_2,
    +            DATE_WORDS_3,            
    +        };
    +        String[] TIME_PATTERNS = new String[] {
    +            TIME_ONLY_WITH_INNER_SEPARATORS,
    +            TIME_ONLY_NO_SEPARATORS            
    +        };
    +        String[] TZ_PATTERNS = new String[] {
    +            // space then time zone with sign (+-) or code is preferred
    +            optionally(getDateTimeSeparatorPattern("")) + " " + TIME_ZONE_SIGNED_OFFSET,
    +            // then no TZ - but declare the named groups
    +            namedGroup("tz", namedGroup("tzOffset", "")+namedGroup("tzCode", "")),
    +            // then any separator then offset with sign
    +            getDateTimeSeparatorPattern("") + TIME_ZONE_SIGNED_OFFSET,
    +            
    +            // try parsing with enforced separators before TZ first 
    +            // (so e.g. in the case of DATE-0100, the -0100 is the time, not the timezone)
    +            // then relax below (e.g. in the case of DATE-TIME+0100)
    +            
    +            // finally match DATE-TIME-1000 as time zone -1000
    +            // or DATE-TIME 1000 as TZ +1000 in case a + was supplied but converted to
' ' by web
    +            // (but be stricter about the format, two or four digits required, and hours
<= 12 so as not to confuse with a year)
    +            optionally(getDateTimeSeparatorPattern("")) + TIME_ZONE_OPTIONALLY_SIGNED_OFFSET
    +        };
    +        
    +        List<String> basePatterns = MutableList.of();
    +        
    +        // patterns with date first
    +        String[] DATE_PATTERNS_UNCLOSED = new String[] {
    +            // separator before time *required* if date had separators
    +            DATE_ONLY_WITH_INNER_SEPARATORS + "("+getDateTimeSeparatorPattern("T"),
    +            // separator before time optional if date did not have separators
    +            DATE_ONLY_NO_SEPARATORS + "("+optionally(getDateTimeSeparatorPattern("T")),
    +            // separator before time required if date has words
    +            DATE_WORDS_2 + "("+getDateTimeSeparatorPattern("T"),
    +            DATE_WORDS_3 + "("+getDateTimeSeparatorPattern("T"),
    +        };
    +        for (String tzP: TZ_PATTERNS)
    +            for (String dateP: DATE_PATTERNS_UNCLOSED)
    +                for (String timeP: TIME_PATTERNS)
    +                    basePatterns.add("^" + dateP + timeP+")?" + tzP + "$");
    +        
    +        // also allow time first, with TZ after, then before
    +        for (String tzP: TZ_PATTERNS)
    +            for (String dateP: DATE_PATTERNS)
    +                for (String timeP: TIME_PATTERNS)
    +                    basePatterns.add("^" + timeP + getDateTimeSeparatorPattern("") +
dateP + tzP + "$");
    +        // also allow time first, with TZ after, then before
    +        for (String tzP: TZ_PATTERNS)
    +            for (String dateP: DATE_PATTERNS)
    +                for (String timeP: TIME_PATTERNS)
    +                    basePatterns.add("^" + timeP + tzP + getDateTimeSeparatorPattern("")
+ dateP + "$");
    +
    +        Maybe<Matcher> mm = Maybe.absent();
    +        for (String p: basePatterns) {
    +            mm = match(p, input);
    +            if (mm.isPresent()) break;
    +        }
    +        if (mm.isPresent()) {
    +            Matcher m = mm.get();
    +            Calendar result;
    +
    +            String tz = m.group("tz");
    +            
    +            int year = Integer.parseInt(m.group("year"));
    +            int day = Integer.parseInt(m.group("day"));
    +            
    +            String monthS = m.group("month");
    +            int month;
    +            if (monthS.matches(DIGIT+"+")) {
    +                month = Integer.parseInt(monthS)-1;
    +            } else {
    +                try {
    +                    month = new SimpleDateFormat("yyyy-MMM-dd").parse("2015-"+monthS+"-15").getMonth();
    +                } catch (ParseException e) {
    +                    return Maybe.absent("Unknown date format '"+input+"': invalid month
'"+monthS+"'; try 'yyyy-MM-dd HH:mm:ss.SSS +0000'");
    +                }
    +            }
    +            
    +            if (Strings.isNonBlank(tz)) {
    +                TimeZone tzz = null;
    +                String tzCode = m.group("tzCode");
    +                if (Strings.isNonBlank(tzCode)) {
    +                    tz = tzCode;
    +                }
    +                if (tz.matches(DIGIT+"+")) {
    +                    // stick a plus in front in case it was submitted by a web form and
turned into a space
    +                    tz = "+"+tz;
    +                } else {
    +                    tzz = getTimeZone(tz);
    +                }
    +                if (tzz==null) {
    +                    Maybe<Matcher> tmm = match("^ ?(?<tzH>(\\+|\\-||)"+DIGIT+optionally(DIGIT)+")"+optionally(optionally(":")+namedGroup("tzM",
DIGIT+DIGIT))+"$", tz);
    +                    if (tmm.isAbsent()) {
    +                        return Maybe.absent("Unknown date format '"+input+"': invalid
timezone '"+tz+"'; try 'yyyy-MM-dd HH:mm:ss.SSS +0000'");
    +                    }
    +                    Matcher tm = tmm.get();
    +                    String tzM = tm.group("tzM");
    +                    int offset = (60*Integer.parseInt(tm.group("tzH")) + Integer.parseInt("0"+(tzM!=null
? tzM : "")))*60;
    +                    tzz = new SimpleTimeZone(offset*1000, tz);
    +                }
    +                tz = getTimeZoneOffsetString(tzz, year, month, day);
    +                result = new GregorianCalendar(tzz);
    +            } else {
    +                result = new GregorianCalendar();
    +            }
    +            result.clear();
    +            
    +            result.set(Calendar.YEAR, year);
    +            result.set(Calendar.MONTH, month);
    +            result.set(Calendar.DAY_OF_MONTH, day);
    +            if (m.group("hours")!=null) {
    +                int hours = Integer.parseInt(m.group("hours"));
    +                String meridian = m.group("meridian");
    +                if (Strings.isNonBlank(meridian) && meridian.toLowerCase().startsWith("p"))
{
    +                    if (hours>12) {
    +                        return Maybe.absent("Unknown date format '"+input+"': can't be
"+hours+" PM; try 'yyyy-MM-dd HH:mm:ss.SSS +0000'");
    +                    }
    +                    hours += 12;
    +                }
    +                result.set(Calendar.HOUR_OF_DAY, hours);
    +                String minsS = m.group("mins");
    +                if (Strings.isNonBlank(minsS)) {
    +                    result.set(Calendar.MINUTE, Integer.parseInt(minsS));
    +                }
    +                String secsS = m.group("secs");
    +                if (Strings.isBlank(secsS)) {
    +                    // leave at zero
    +                } else if (secsS.matches(DIGIT+DIGIT+"?")) {
    +                    result.set(Calendar.SECOND, Integer.parseInt(secsS));
    +                } else {
    +                    double s = Double.parseDouble(secsS);
    +                    if (s>=0 && s<=60) {
    +                        // in double format, with correct period
    +                    } else if (secsS.length()==5) {
    +                        // allow ssSSS with no punctuation
    +                        s = s/=1000;
    +                    } else {
    +                        return Maybe.absent("Unknown date format '"+input+"': invalid
seconds '"+secsS+"'; try 'YYYY-MM-DD HH:mm:ss.SSS +0000'");
    +                    }
    +                    result.set(Calendar.SECOND, (int)s);
    +                    result.set(Calendar.MILLISECOND, (int)((s*1000) % 1000));
    +                }
    +            }
    +            
    +            return Maybe.of(result.getTime());
    +        }
    +        return Maybe.absent("Unknown date format '"+input+"'; try ISO-8601, or 'yyyy-MM-dd'
or 'yyyy-MM-dd HH:mm:ss +0000'");
    +    }
    +    
    +    public static TimeZone getTimeZone(String code) {
    +        if (code.indexOf('/')==-1) {
    +            if ("Z".equals(code)) return getTimeZone("UTC");
    +            
    +            // get the time zone -- most short codes aren't accepted, so accept (and
prefer) certain common codes
    +            if ("EST".equals(code)) return getTimeZone("America/New_York");
    +            if ("EDT".equals(code)) return getTimeZone("America/New_York");
    +            if ("PST".equals(code)) return getTimeZone("America/Los_Angeles");
    +            if ("PDT".equals(code)) return getTimeZone("America/Los_Angeles");
    +            if ("CST".equals(code)) return getTimeZone("America/Chicago");
    +            if ("CDT".equals(code)) return getTimeZone("America/Chicago");
    +            if ("MST".equals(code)) return getTimeZone("America/Denver");
    +            if ("MDT".equals(code)) return getTimeZone("America/Denver");
    +
    +            if ("BST".equals(code)) return getTimeZone("Europe/London");  // otherwise
BST is Bangladesh!
    +            if ("CEST".equals(code)) return getTimeZone("Europe/Paris");
    +            // IST falls through to below, where it is treated as India (not Irish);
IDT not recognised
    +        }
    +        
    +        TimeZone tz = TimeZone.getTimeZone(code);
    +        if (tz!=null && !tz.equals(TimeZone.getTimeZone("GMT"))) {
    +            // recognized
    +            return tz;
    +        }
    +        // possibly unrecognized -- GMT returned if not known, bad TimeZone API!
    +        String timeZones[] = TimeZone.getAvailableIDs();
    +        for (String tzs: timeZones) {
    +            if (tzs.equals(code)) return tz;
    +        }
    +        // definitely unrecognized
    +        return null;
    +    }
    +    
    +    /** convert a TimeZone e.g. Europe/London to an offset string as at the given day,
e.g. +0100 or +0000 depending daylight savings,
    +     * absent with nice error if zone unknown */
    +    public static Maybe<String> getTimeZoneOffsetString(String tz, int year, int
month, int day) {
    +        TimeZone tzz = getTimeZone(tz);
    +        if (tzz==null) return Maybe.absent("Unknown time zone code: "+tz);
    +        return Maybe.of(getTimeZoneOffsetString(tzz, year, month, day));
    +    }
    +    
    +    /** as {@link #getTimeZoneOffsetString(String, int, int, int)} where the {@link TimeZone}
is already instantiated */
    +    @SuppressWarnings("deprecation")
    +    public static String getTimeZoneOffsetString(TimeZone tz, int year, int month, int
day) {
    +        int tzMins = tz.getOffset(new Date(year, month, day).getTime())/60/1000;
    +        String tzStr = (tzMins<0 ? "-" : "+") + Strings.makePaddedString(""+(Math.abs(tzMins)/60),
2, "0", "")+Strings.makePaddedString(""+(Math.abs(tzMins)%60), 2, "0", "");
    +        return tzStr;
    +    }
    +
    +    private static String namedGroup(String name, String pattern) {
    +        return "(?<"+name+">"+pattern+")";
    +    }
    +    private static String anyChar(String charSet) {
    +        return "["+charSet+"]";
    +    }
    +    private static String optionally(String pattern) {
    +        return "("+pattern+")?";
    --- End diff --
    
    Would a non-capturing group be better?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message