Return-Path: Delivered-To: apmail-jakarta-commons-dev-archive@apache.org Received: (qmail 63426 invoked from network); 30 Apr 2003 20:32:13 -0000 Received: from exchange.sun.com (192.18.33.10) by daedalus.apache.org with SMTP; 30 Apr 2003 20:32:13 -0000 Received: (qmail 10061 invoked by uid 97); 30 Apr 2003 20:34:17 -0000 Delivered-To: qmlist-jakarta-archive-commons-dev@nagoya.betaversion.org Received: (qmail 10054 invoked from network); 30 Apr 2003 20:34:16 -0000 Received: from daedalus.apache.org (HELO apache.org) (208.185.179.12) by nagoya.betaversion.org with SMTP; 30 Apr 2003 20:34:16 -0000 Received: (qmail 61662 invoked by uid 500); 30 Apr 2003 20:31:46 -0000 Mailing-List: contact commons-dev-help@jakarta.apache.org; run by ezmlm Precedence: bulk List-Unsubscribe: List-Subscribe: List-Help: List-Post: List-Id: "Jakarta Commons Developers List" Reply-To: "Jakarta Commons Developers List" Delivered-To: mailing list commons-dev@jakarta.apache.org Received: (qmail 61616 invoked by uid 500); 30 Apr 2003 20:31:46 -0000 Received: (qmail 61589 invoked from network); 30 Apr 2003 20:31:45 -0000 Received: from icarus.apache.org (208.185.179.13) by daedalus.apache.org with SMTP; 30 Apr 2003 20:31:45 -0000 Received: (qmail 52678 invoked by uid 1182); 30 Apr 2003 20:31:45 -0000 Date: 30 Apr 2003 20:31:45 -0000 Message-ID: <20030430203145.52677.qmail@icarus.apache.org> From: rleland@apache.org To: jakarta-commons-cvs@apache.org Subject: cvs commit: jakarta-commons/validator/src/share/org/apache/commons/validator UrlValidator.java X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N rleland 2003/04/30 13:31:45 Modified: validator/src/share/org/apache/commons/validator UrlValidator.java Log: Refactor UrlValidator by moving many of the method variables into the class proper to reduce overhead. Also factor out validAuthorty() which could be shared between EmailValidator() and UrlValidator. Revision Changes Path 1.2 +148 -132 jakarta-commons/validator/src/share/org/apache/commons/validator/UrlValidator.java Index: UrlValidator.java =================================================================== RCS file: /home/cvs/jakarta-commons/validator/src/share/org/apache/commons/validator/UrlValidator.java,v retrieving revision 1.1 retrieving revision 1.2 diff -u -r1.1 -r1.2 --- UrlValidator.java 30 Apr 2003 19:04:24 -0000 1.1 +++ UrlValidator.java 30 Apr 2003 20:31:45 -0000 1.2 @@ -83,14 +83,51 @@ * @version $Revision$ $Date$ */ public class UrlValidator implements Serializable { + private static final String alphaChars = "a-zA-Z"; //used + private static final String alphaNumChars = alphaChars + "\\d"; //used + private static final String specialChars = ";/@&=,.?:+$"; + private static final String validChars = "[^\\s" + specialChars + "]"; + + private static final String schemeChars = alphaChars; // Drop numeric, and "+-." for now + private static final String authorityChars = alphaNumChars + "\\-\\."; + private static final String atom = validChars + '+'; + + // ----- This expressions derived/taken from the BNF for URI (RFC2396) ------------- + private static final String urlPat = ValidatorUtil.getDelimitedRegExp("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"); + // 12 3 4 5 6 7 8 9 + private static final int PARSE_URL_SCHEME = 2; //Schema, (Protocol), http:, ftp:, file:, ... + private static final int PARSE_URL_AUTHORITY = 4; //Includes host/ip port + private static final int PARSE_URL_PATH = 5; + private static final int PARSE_URL_QUERY = 7; + private static final int PARSE_URL_FRAGMENT = 9; + //Protocol eg: http:, ftp:,https: + private static final String schemePat = ValidatorUtil.getDelimitedRegExp("^[" + schemeChars + "]"); + private static final String authorityPat = ValidatorUtil.getDelimitedRegExp("^([" + authorityChars + "]*)(:\\d*)?(.*)?"); + // 1 2 3 4 + private static final int PARSE_AUTHORITY_HOST_IP = 1; + private static final int PARSE_AUTHORITY_PORT = 2; + private static final int PARSE_AUTHORITY_EXTRA = 3;//Should always be empty. + + + private static final String pathPat = ValidatorUtil.getDelimitedRegExp("^(/[-a-zA-Z0-9_:@&?=+,.!/~*'%$]*)$"); + private static final String queryPat = ValidatorUtil.getDelimitedRegExp("^(.*)$"); + private static final String legalAsciiPat = ValidatorUtil.getDelimitedRegExp("^[\\000-\\177]+$"); + private static final String ipV4DomainPat = ValidatorUtil.getDelimitedRegExp("^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$"); + private static final String domainPat = ValidatorUtil.getDelimitedRegExp("^" + atom + "(\\." + atom + ")*$"); + private static final String portPat = ValidatorUtil.getDelimitedRegExp("^:(\\d{1,5})$"); + private static final String atomPat = ValidatorUtil.getDelimitedRegExp("(" + atom + ")"); + private static final String alphaPat = ValidatorUtil.getDelimitedRegExp("^[" + alphaChars + "]"); + /** * Allow a double slash in the path componet such that * path//file is treated as path/file */ public static final String OPTION_ALLOW_2_SLASH = "allow2Slash"; public static final String OPTION_NO_FRAGMENT = "noFragment"; - boolean allow2Slash = false; - boolean noFragment = false; + + // Non static fields + private boolean allow2Slash = false; + private boolean noFragment = false; public UrlValidator() { @@ -137,55 +174,15 @@ public boolean isValid(String value) { boolean bValid = true; try { - String alphaChars = "a-zA-Z"; //used - String alphaNumChars = alphaChars + "\\d"; //used - String specialChars = ";/@&=,.?:+$"; - String validChars = "[^\\s" + specialChars + "]"; - - String schemeChars = alphaChars; // Drop numeric, and "+-." for now - String authorityChars = alphaNumChars + "\\-\\."; - String atom = validChars + '+'; - - // Each pattern must be surrounded by / - String legalAsciiPat = ValidatorUtil.getDelimitedRegExp("^[\\000-\\177]+$"); -// ----- This expressions derived from the BNF for URI (RFC2396) ------------- - String urlPat = ValidatorUtil.getDelimitedRegExp("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"); - // 12 3 4 5 6 7 8 9 - final int PARSE_SCHEME = 2; //Schema, (Protocol), http:, ftp:, file:, ... - final int PARSE_AUTHORITY = 4; //Includes host/ip port - final int PARSE_PATH = 5; - final int PARSE_QUERY = 7; - final int PARSE_FRAGMENT = 9; - //Protocol eg: http:, ftp:,https: - String schemePat = ValidatorUtil.getDelimitedRegExp("^[" + schemeChars + "]"); - String authorityPat = ValidatorUtil.getDelimitedRegExp("^([" + authorityChars + "]*)(:\\d*)?(.*)?"); - // 1 2 3 4 - final int PARSE_HOST_IP = 1; - final int PARSE_PORT = 2; - final int PARSE_AUTH_EXTRA = 3;//Should always be empty. - String ipV4DomainPat = ValidatorUtil.getDelimitedRegExp("^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$"); - String domainPat = ValidatorUtil.getDelimitedRegExp("^" + atom + "(\\." + atom + ")*$"); - String portPat = ValidatorUtil.getDelimitedRegExp("^:(\\d{1,5})$"); - String pathPat = ValidatorUtil.getDelimitedRegExp("^(/[-a-zA-Z0-9_:@&?=+,.!/~*'%$]*)$"); - String queryPat = ValidatorUtil.getDelimitedRegExp("^(.*)$"); - String atomPat = ValidatorUtil.getDelimitedRegExp("(" + atom + ")"); - String alphaPat = ValidatorUtil.getDelimitedRegExp("^[" + alphaChars + "]"); + Perl5Util matchUrlPat = new Perl5Util(); Perl5Util matchSchemePat = new Perl5Util(); - Perl5Util matchAuthorityPat = new Perl5Util(); - Perl5Util matchIPV4Pat = new Perl5Util(); - Perl5Util matchDomainPat = new Perl5Util(); - Perl5Util matchPortPat = new Perl5Util(); Perl5Util matchPathPat = new Perl5Util(); Perl5Util matchQueryPat = new Perl5Util(); - Perl5Util matchAtomPat = new Perl5Util(); Perl5Util matchAsciiPat = new Perl5Util(); - Perl5Util matchAlphaPat = new Perl5Util(); - - if (!matchAsciiPat.match(legalAsciiPat, value)) { return false; } @@ -200,7 +197,7 @@ // Check the scheme component of the url address if (bValid) { - String scheme = matchUrlPat.group(PARSE_SCHEME); + String scheme = matchUrlPat.group(PARSE_URL_SCHEME); // See if "scheme" is valid bValid = matchSchemePat.match(schemePat, scheme); @@ -209,94 +206,14 @@ // Check the domain component of the url address if (bValid) { // Check the whole url address structure - bValid = matchAuthorityPat.match(authorityPat, matchUrlPat.group(PARSE_AUTHORITY)); + bValid = isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY)); if (bValid) { - boolean ipV4Address = false; - boolean hostname = false; - // check if authority is IP address or hostname - String hostIP = matchAuthorityPat.group(PARSE_HOST_IP); - ipV4Address = matchIPV4Pat.match(ipV4DomainPat, hostIP); - - if (ipV4Address) { - // this is an IP address so check components - for (int i = 1; i <= 4; i++) { - String ipSegment = matchIPV4Pat.group(i); - if (ipSegment != null && ipSegment.length() > 0) { - int iIpSegment = 0; - try { - iIpSegment = Integer.parseInt(ipSegment); - } catch (Exception e) { - bValid = false; - } - - if (iIpSegment > 255) { - bValid = false; - } - } else { - bValid = false; - } - } - } else { - // Domain is hostname name - hostname = matchDomainPat.match(domainPat, hostIP); - } - //rightmost hostname will never start with a digit. - if (hostname) { - // this is a hostname authority so check components - String[] domainSegment = new String[10]; - boolean match = true; - int segmentCount = 0; - int segmentLength = 0; - - while (match) { - match = matchAtomPat.match(atomPat, hostIP); - if (match) { - domainSegment[segmentCount] = matchAtomPat.group(1); - segmentLength = domainSegment[segmentCount].length() + 1; - hostIP = (segmentLength >= hostIP.length()) ? "" : hostIP.substring(segmentLength); - segmentCount++; - } - } - String topLevel = domainSegment[segmentCount - 1]; - if (topLevel.length() < 2 || topLevel.length() > 4) { - bValid = false; - } - - // First letter of top level must be a alpha - boolean isAlpha; - isAlpha = matchAlphaPat.match(alphaPat, topLevel.substring(0, 1)); - if (!isAlpha) { - bValid = false; - } - - // Make sure there's a host name preceding the authority. - if (segmentCount < 2) { - bValid = false; - } - } - - if (bValid) { - bValid = (hostname || ipV4Address); - } - - if (bValid) { - String port = matchAuthorityPat.group(PARSE_PORT); - if (port != null) { - bValid = matchPortPat.match(portPat, port); - } - } - - if (bValid) { - String extra = matchAuthorityPat.group(PARSE_AUTH_EXTRA); - bValid = ((extra == null) || (extra.length() == 0)); - } - // Check the path component of the url address if (bValid) { - String path = matchUrlPat.group(PARSE_PATH); + String path = matchUrlPat.group(PARSE_URL_PATH); // See if "path" is valid bValid = matchPathPat.match(pathPat, path); if (bValid) { //Shouldn't end with a '/' @@ -321,7 +238,7 @@ // Check the query component of the url address if (bValid) { - String query = matchUrlPat.group(PARSE_QUERY); + String query = matchUrlPat.group(PARSE_URL_QUERY); if (null != query) { // See if "query" is valid bValid = matchQueryPat.match(queryPat, query); @@ -329,7 +246,7 @@ } // Check the fragment component of the url address if (bValid) { - String fragment = matchUrlPat.group(PARSE_FRAGMENT); + String fragment = matchUrlPat.group(PARSE_URL_FRAGMENT); if (null != fragment) { bValid = (noFragment == false); } @@ -340,6 +257,105 @@ bValid = false; } + return bValid; + } + + boolean isValidAuthority(String authority) { + boolean bValid = true; + Perl5Util matchAuthorityPat = new Perl5Util(); + Perl5Util matchIPV4Pat = new Perl5Util(); + Perl5Util matchDomainPat = new Perl5Util(); + Perl5Util matchAtomPat = new Perl5Util(); + Perl5Util matchPortPat = new Perl5Util(); + Perl5Util matchAlphaPat = new Perl5Util(); + + + bValid = matchAuthorityPat.match(authorityPat, authority); + + + if (bValid) { + boolean ipV4Address = false; + boolean hostname = false; + // check if authority is IP address or hostname + String hostIP = matchAuthorityPat.group(PARSE_AUTHORITY_HOST_IP); + ipV4Address = matchIPV4Pat.match(ipV4DomainPat, hostIP); + + if (ipV4Address) { + // this is an IP address so check components + for (int i = 1; i <= 4; i++) { + String ipSegment = matchIPV4Pat.group(i); + if (ipSegment != null && ipSegment.length() > 0) { + int iIpSegment = 0; + try { + iIpSegment = Integer.parseInt(ipSegment); + } catch (Exception e) { + bValid = false; + } + + if (iIpSegment > 255) { + bValid = false; + } + } else { + bValid = false; + } + } + } else { + // Domain is hostname name + hostname = matchDomainPat.match(domainPat, hostIP); + } + //rightmost hostname will never start with a digit. + if (hostname) { + // this is a hostname authority so check components + String[] domainSegment = new String[10]; + boolean match = true; + int segmentCount = 0; + int segmentLength = 0; + + while (match) { + match = matchAtomPat.match(atomPat, hostIP); + if (match) { + domainSegment[segmentCount] = matchAtomPat.group(1); + segmentLength = domainSegment[segmentCount].length() + 1; + hostIP = (segmentLength >= hostIP.length()) ? "" : hostIP.substring(segmentLength); + segmentCount++; + } + } + String topLevel = domainSegment[segmentCount - 1]; + if (topLevel.length() < 2 || topLevel.length() > 4) { + bValid = false; + } + + // First letter of top level must be a alpha + boolean isAlpha; + isAlpha = matchAlphaPat.match(alphaPat, topLevel.substring(0, 1)); + if (!isAlpha) { + bValid = false; + } + + // Make sure there's a host name preceding the authority. + if (segmentCount < 2) { + bValid = false; + } + } + + if (bValid) { + bValid = (hostname || ipV4Address); + } + + if (bValid) { + String port = matchAuthorityPat.group(PARSE_AUTHORITY_PORT); + if (port != null) { + bValid = matchPortPat.match(portPat, port); + } + } + + if (bValid) { + String extra = matchAuthorityPat.group(PARSE_AUTHORITY_EXTRA); + bValid = ((extra == null) || (extra.length() == 0)); + } + + + } return bValid; } --------------------------------------------------------------------- To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org For additional commands, e-mail: commons-dev-help@jakarta.apache.org