apr-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jor...@apache.org
Subject svn commit: r1002512 [11/12] - in /apr/apr-util/vendor/expat/current: ./ conftools/ doc/ examples/ lib/ xmlwf/
Date Wed, 29 Sep 2010 08:17:00 GMT
Added: apr/apr-util/vendor/expat/current/lib/xmltok_impl.c
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/lib/xmltok_impl.c?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/lib/xmltok_impl.c (added)
+++ apr/apr-util/vendor/expat/current/lib/xmltok_impl.c Wed Sep 29 08:16:58 2010
@@ -0,0 +1,1768 @@
+/*
+Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
+See the file COPYING for copying permission.
+*/
+
+#ifndef IS_INVALID_CHAR
+#define IS_INVALID_CHAR(enc, ptr, n) (0)
+#endif
+
+#define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
+    case BT_LEAD ## n: \
+      if (end - ptr < n) \
+	return XML_TOK_PARTIAL_CHAR; \
+      if (IS_INVALID_CHAR(enc, ptr, n)) { \
+        *(nextTokPtr) = (ptr); \
+        return XML_TOK_INVALID; \
+      } \
+      ptr += n; \
+      break;
+
+#define INVALID_CASES(ptr, nextTokPtr) \
+  INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
+  INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
+  INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
+  case BT_NONXML: \
+  case BT_MALFORM: \
+  case BT_TRAIL: \
+    *(nextTokPtr) = (ptr); \
+    return XML_TOK_INVALID;
+
+#define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
+   case BT_LEAD ## n: \
+     if (end - ptr < n) \
+       return XML_TOK_PARTIAL_CHAR; \
+     if (!IS_NAME_CHAR(enc, ptr, n)) { \
+       *nextTokPtr = ptr; \
+       return XML_TOK_INVALID; \
+     } \
+     ptr += n; \
+     break;
+
+#define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
+  case BT_NONASCII: \
+    if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
+      *nextTokPtr = ptr; \
+      return XML_TOK_INVALID; \
+    } \
+  case BT_NMSTRT: \
+  case BT_HEX: \
+  case BT_DIGIT: \
+  case BT_NAME: \
+  case BT_MINUS: \
+    ptr += MINBPC(enc); \
+    break; \
+  CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
+  CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
+  CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
+
+#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
+   case BT_LEAD ## n: \
+     if (end - ptr < n) \
+       return XML_TOK_PARTIAL_CHAR; \
+     if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
+       *nextTokPtr = ptr; \
+       return XML_TOK_INVALID; \
+     } \
+     ptr += n; \
+     break;
+
+#define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
+  case BT_NONASCII: \
+    if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
+      *nextTokPtr = ptr; \
+      return XML_TOK_INVALID; \
+    } \
+  case BT_NMSTRT: \
+  case BT_HEX: \
+    ptr += MINBPC(enc); \
+    break; \
+  CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
+  CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
+  CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
+
+#ifndef PREFIX
+#define PREFIX(ident) ident
+#endif
+
+/* ptr points to character following "<!-" */
+
+static
+int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
+			const char **nextTokPtr)
+{
+  if (ptr != end) {
+    if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+    ptr += MINBPC(enc);
+    while (ptr != end) {
+      switch (BYTE_TYPE(enc, ptr)) {
+      INVALID_CASES(ptr, nextTokPtr)
+      case BT_MINUS:
+	if ((ptr += MINBPC(enc)) == end)
+	  return XML_TOK_PARTIAL;
+	if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
+	  if ((ptr += MINBPC(enc)) == end)
+	    return XML_TOK_PARTIAL;
+	  if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
+	    *nextTokPtr = ptr;
+	    return XML_TOK_INVALID;
+	  }
+	  *nextTokPtr = ptr + MINBPC(enc);
+	  return XML_TOK_COMMENT;
+	}
+	break;
+      default:
+	ptr += MINBPC(enc);
+	break;
+      }
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "<!" */
+
+static
+int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
+		     const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  case BT_MINUS:
+    return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+  case BT_LSQB:
+    *nextTokPtr = ptr + MINBPC(enc);
+    return XML_TOK_COND_SECT_OPEN;
+  case BT_NMSTRT:
+  case BT_HEX:
+    ptr += MINBPC(enc);
+    break;
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_PERCNT:
+      if (ptr + MINBPC(enc) == end)
+	return XML_TOK_PARTIAL;
+      /* don't allow <!ENTITY% foo "whatever"> */
+      switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
+      case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+      /* fall through */
+    case BT_S: case BT_CR: case BT_LF:
+      *nextTokPtr = ptr;
+      return XML_TOK_DECL_OPEN;
+    case BT_NMSTRT:
+    case BT_HEX:
+      ptr += MINBPC(enc);
+      break;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+static
+int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
+{
+  int upper = 0;
+  *tokPtr = XML_TOK_PI;
+  if (end - ptr != MINBPC(enc)*3)
+    return 1;
+  switch (BYTE_TO_ASCII(enc, ptr)) {
+  case ASCII_x:
+    break;
+  case ASCII_X:
+    upper = 1;
+    break;
+  default:
+    return 1;
+  }
+  ptr += MINBPC(enc);
+  switch (BYTE_TO_ASCII(enc, ptr)) {
+  case ASCII_m:
+    break;
+  case ASCII_M:
+    upper = 1;
+    break;
+  default:
+    return 1;
+  }
+  ptr += MINBPC(enc);
+  switch (BYTE_TO_ASCII(enc, ptr)) {
+  case ASCII_l:
+    break;
+  case ASCII_L:
+    upper = 1;
+    break;
+  default:
+    return 1;
+  }
+  if (upper)
+    return 0;
+  *tokPtr = XML_TOK_XML_DECL;
+  return 1;
+}
+
+/* ptr points to character following "<?" */
+
+static
+int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
+		   const char **nextTokPtr)
+{
+  int tok;
+  const char *target = ptr;
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_S: case BT_CR: case BT_LF:
+      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+      ptr += MINBPC(enc);
+      while (ptr != end) {
+        switch (BYTE_TYPE(enc, ptr)) {
+        INVALID_CASES(ptr, nextTokPtr)
+	case BT_QUEST:
+	  ptr += MINBPC(enc);
+	  if (ptr == end)
+	    return XML_TOK_PARTIAL;
+	  if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
+	    *nextTokPtr = ptr + MINBPC(enc);
+	    return tok;
+	  }
+	  break;
+	default:
+	  ptr += MINBPC(enc);
+	  break;
+	}
+      }
+      return XML_TOK_PARTIAL;
+    case BT_QUEST:
+      if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+      ptr += MINBPC(enc);
+      if (ptr == end)
+	return XML_TOK_PARTIAL;
+      if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
+	*nextTokPtr = ptr + MINBPC(enc);
+	return tok;
+      }
+      /* fall through */
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+
+static
+int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
+			     const char **nextTokPtr)
+{
+  static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
+  int i;
+  /* CDATA[ */
+  if (end - ptr < 6 * MINBPC(enc))
+    return XML_TOK_PARTIAL;
+  for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
+    if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  *nextTokPtr = ptr;
+  return XML_TOK_CDATA_SECT_OPEN;
+}
+
+static
+int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
+			    const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_NONE;
+  if (MINBPC(enc) > 1) {
+    size_t n = end - ptr;
+    if (n & (MINBPC(enc) - 1)) {
+      n &= ~(MINBPC(enc) - 1);
+      if (n == 0)
+	return XML_TOK_PARTIAL;
+      end = ptr + n;
+    }
+  }
+  switch (BYTE_TYPE(enc, ptr)) {
+  case BT_RSQB:
+    ptr += MINBPC(enc);
+    if (ptr == end)
+      return XML_TOK_PARTIAL;
+    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
+      break;
+    ptr += MINBPC(enc);
+    if (ptr == end)
+      return XML_TOK_PARTIAL;
+    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
+      ptr -= MINBPC(enc);
+      break;
+    }
+    *nextTokPtr = ptr + MINBPC(enc);
+    return XML_TOK_CDATA_SECT_CLOSE;
+  case BT_CR:
+    ptr += MINBPC(enc);
+    if (ptr == end)
+      return XML_TOK_PARTIAL;
+    if (BYTE_TYPE(enc, ptr) == BT_LF)
+      ptr += MINBPC(enc);
+    *nextTokPtr = ptr;
+    return XML_TOK_DATA_NEWLINE;
+  case BT_LF:
+    *nextTokPtr = ptr + MINBPC(enc);
+    return XML_TOK_DATA_NEWLINE;
+  INVALID_CASES(ptr, nextTokPtr)
+  default:
+    ptr += MINBPC(enc);
+    break;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+#define LEAD_CASE(n) \
+    case BT_LEAD ## n: \
+      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
+	*nextTokPtr = ptr; \
+	return XML_TOK_DATA_CHARS; \
+      } \
+      ptr += n; \
+      break;
+    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+    case BT_NONXML:
+    case BT_MALFORM:
+    case BT_TRAIL:
+    case BT_CR:
+    case BT_LF:
+    case BT_RSQB:
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    default:
+      ptr += MINBPC(enc);
+      break;
+    }
+  }
+  *nextTokPtr = ptr;
+  return XML_TOK_DATA_CHARS;
+}
+
+/* ptr points to character following "</" */
+
+static
+int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
+		       const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_S: case BT_CR: case BT_LF:
+      for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
+	switch (BYTE_TYPE(enc, ptr)) {
+	case BT_S: case BT_CR: case BT_LF:
+	  break;
+	case BT_GT:
+	  *nextTokPtr = ptr + MINBPC(enc);
+          return XML_TOK_END_TAG;
+	default:
+	  *nextTokPtr = ptr;
+	  return XML_TOK_INVALID;
+	}
+      }
+      return XML_TOK_PARTIAL;
+#ifdef XML_NS
+    case BT_COLON:
+      /* no need to check qname syntax here, since end-tag must match exactly */
+      ptr += MINBPC(enc);
+      break;
+#endif
+    case BT_GT:
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_END_TAG;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "&#X" */
+
+static
+int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
+			   const char **nextTokPtr)
+{
+  if (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_DIGIT:
+    case BT_HEX:
+      break;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+    for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
+      switch (BYTE_TYPE(enc, ptr)) {
+      case BT_DIGIT:
+      case BT_HEX:
+	break;
+      case BT_SEMI:
+	*nextTokPtr = ptr + MINBPC(enc);
+	return XML_TOK_CHAR_REF;
+      default:
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "&#" */
+
+static
+int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
+			const char **nextTokPtr)
+{
+  if (ptr != end) {
+    if (CHAR_MATCHES(enc, ptr, ASCII_x))
+      return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_DIGIT:
+      break;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+    for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
+      switch (BYTE_TYPE(enc, ptr)) {
+      case BT_DIGIT:
+	break;
+      case BT_SEMI:
+	*nextTokPtr = ptr + MINBPC(enc);
+	return XML_TOK_CHAR_REF;
+      default:
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "&" */
+
+static
+int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
+		    const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+  case BT_NUM:
+    return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_SEMI:
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_ENTITY_REF;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following first character of attribute name */
+
+static
+int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
+		     const char **nextTokPtr)
+{
+#ifdef XML_NS
+  int hadColon = 0;
+#endif
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+#ifdef XML_NS
+    case BT_COLON:
+      if (hadColon) {
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+      hadColon = 1;
+      ptr += MINBPC(enc);
+      if (ptr == end)
+	return XML_TOK_PARTIAL;
+      switch (BYTE_TYPE(enc, ptr)) {
+      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+      default:
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+      break;
+#endif
+    case BT_S: case BT_CR: case BT_LF:
+      for (;;) {
+	int t;
+
+	ptr += MINBPC(enc);
+	if (ptr == end)
+	  return XML_TOK_PARTIAL;
+	t = BYTE_TYPE(enc, ptr);
+	if (t == BT_EQUALS)
+	  break;
+	switch (t) {
+	case BT_S:
+	case BT_LF:
+	case BT_CR:
+	  break;
+	default:
+	  *nextTokPtr = ptr;
+	  return XML_TOK_INVALID;
+	}
+      }
+    /* fall through */
+    case BT_EQUALS:
+      {
+	int open;
+#ifdef XML_NS
+	hadColon = 0;
+#endif
+	for (;;) {
+	  
+	  ptr += MINBPC(enc);
+	  if (ptr == end)
+	    return XML_TOK_PARTIAL;
+	  open = BYTE_TYPE(enc, ptr);
+	  if (open == BT_QUOT || open == BT_APOS)
+	    break;
+	  switch (open) {
+	  case BT_S:
+	  case BT_LF:
+	  case BT_CR:
+	    break;
+	  default:
+	    *nextTokPtr = ptr;
+	    return XML_TOK_INVALID;
+	  }
+	}
+	ptr += MINBPC(enc);
+	/* in attribute value */
+	for (;;) {
+	  int t;
+	  if (ptr == end)
+	    return XML_TOK_PARTIAL;
+	  t = BYTE_TYPE(enc, ptr);
+	  if (t == open)
+	    break;
+	  switch (t) {
+	  INVALID_CASES(ptr, nextTokPtr)
+	  case BT_AMP:
+	    {
+	      int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
+	      if (tok <= 0) {
+		if (tok == XML_TOK_INVALID)
+		  *nextTokPtr = ptr;
+		return tok;
+	      }
+	      break;
+	    }
+	  case BT_LT:
+	    *nextTokPtr = ptr;
+	    return XML_TOK_INVALID;
+	  default:
+	    ptr += MINBPC(enc);
+	    break;
+	  }
+	}
+	ptr += MINBPC(enc);
+	if (ptr == end)
+	  return XML_TOK_PARTIAL;
+	switch (BYTE_TYPE(enc, ptr)) {
+	case BT_S:
+	case BT_CR:
+	case BT_LF:
+	  break;
+	case BT_SOL:
+	  goto sol;
+	case BT_GT:
+	  goto gt;
+	default:
+	  *nextTokPtr = ptr;
+	  return XML_TOK_INVALID;
+	}
+	/* ptr points to closing quote */
+	for (;;) {
+	  ptr += MINBPC(enc);
+	  if (ptr == end)
+	    return XML_TOK_PARTIAL;
+	  switch (BYTE_TYPE(enc, ptr)) {
+	  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+	  case BT_S: case BT_CR: case BT_LF:
+	    continue;
+	  case BT_GT:
+          gt:
+	    *nextTokPtr = ptr + MINBPC(enc);
+	    return XML_TOK_START_TAG_WITH_ATTS;
+	  case BT_SOL:
+          sol:
+	    ptr += MINBPC(enc);
+	    if (ptr == end)
+	      return XML_TOK_PARTIAL;
+	    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
+	      *nextTokPtr = ptr;
+	      return XML_TOK_INVALID;
+	    }
+	    *nextTokPtr = ptr + MINBPC(enc);
+	    return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
+	  default:
+	    *nextTokPtr = ptr;
+	    return XML_TOK_INVALID;
+	  }
+	  break;
+	}
+	break;
+      }
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+/* ptr points to character following "<" */
+
+static
+int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
+		   const char **nextTokPtr)
+{
+#ifdef XML_NS
+  int hadColon;
+#endif
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+  case BT_EXCL:
+    if ((ptr += MINBPC(enc)) == end)
+      return XML_TOK_PARTIAL;
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_MINUS:
+      return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+    case BT_LSQB:
+      return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+    }
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  case BT_QUEST:
+    return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+  case BT_SOL:
+    return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+#ifdef XML_NS
+  hadColon = 0;
+#endif
+  /* we have a start-tag */
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+#ifdef XML_NS
+    case BT_COLON:
+      if (hadColon) {
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+      hadColon = 1;
+      ptr += MINBPC(enc);
+      if (ptr == end)
+	return XML_TOK_PARTIAL;
+      switch (BYTE_TYPE(enc, ptr)) {
+      CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+      default:
+        *nextTokPtr = ptr;
+        return XML_TOK_INVALID;
+      }
+      break;
+#endif
+    case BT_S: case BT_CR: case BT_LF:
+      {
+        ptr += MINBPC(enc);
+	while (ptr != end) {
+	  switch (BYTE_TYPE(enc, ptr)) {
+	  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+	  case BT_GT:
+	    goto gt;
+	  case BT_SOL:
+	    goto sol;
+	  case BT_S: case BT_CR: case BT_LF:
+	    ptr += MINBPC(enc);
+	    continue;
+	  default:
+	    *nextTokPtr = ptr;
+	    return XML_TOK_INVALID;
+	  }
+	  return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
+	}
+	return XML_TOK_PARTIAL;
+      }
+    case BT_GT:
+    gt:
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_START_TAG_NO_ATTS;
+    case BT_SOL:
+    sol:
+      ptr += MINBPC(enc);
+      if (ptr == end)
+	return XML_TOK_PARTIAL;
+      if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+static
+int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
+		       const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_NONE;
+  if (MINBPC(enc) > 1) {
+    size_t n = end - ptr;
+    if (n & (MINBPC(enc) - 1)) {
+      n &= ~(MINBPC(enc) - 1);
+      if (n == 0)
+	return XML_TOK_PARTIAL;
+      end = ptr + n;
+    }
+  }
+  switch (BYTE_TYPE(enc, ptr)) {
+  case BT_LT:
+    return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+  case BT_AMP:
+    return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+  case BT_CR:
+    ptr += MINBPC(enc);
+    if (ptr == end)
+      return XML_TOK_TRAILING_CR;
+    if (BYTE_TYPE(enc, ptr) == BT_LF)
+      ptr += MINBPC(enc);
+    *nextTokPtr = ptr;
+    return XML_TOK_DATA_NEWLINE;
+  case BT_LF:
+    *nextTokPtr = ptr + MINBPC(enc);
+    return XML_TOK_DATA_NEWLINE;
+  case BT_RSQB:
+    ptr += MINBPC(enc);
+    if (ptr == end)
+      return XML_TOK_TRAILING_RSQB;
+    if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
+      break;
+    ptr += MINBPC(enc);
+    if (ptr == end)
+      return XML_TOK_TRAILING_RSQB;
+    if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
+      ptr -= MINBPC(enc);
+      break;
+    }
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  INVALID_CASES(ptr, nextTokPtr)
+  default:
+    ptr += MINBPC(enc);
+    break;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+#define LEAD_CASE(n) \
+    case BT_LEAD ## n: \
+      if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
+	*nextTokPtr = ptr; \
+	return XML_TOK_DATA_CHARS; \
+      } \
+      ptr += n; \
+      break;
+    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+    case BT_RSQB:
+      if (ptr + MINBPC(enc) != end) {
+	 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
+	   ptr += MINBPC(enc);
+	   break;
+	 }
+	 if (ptr + 2*MINBPC(enc) != end) {
+	   if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
+	     ptr += MINBPC(enc);
+	     break;
+	   }
+	   *nextTokPtr = ptr + 2*MINBPC(enc);
+	   return XML_TOK_INVALID;
+	 }
+      }
+      /* fall through */
+    case BT_AMP:
+    case BT_LT:
+    case BT_NONXML:
+    case BT_MALFORM:
+    case BT_TRAIL:
+    case BT_CR:
+    case BT_LF:
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    default:
+      ptr += MINBPC(enc);
+      break;
+    }
+  }
+  *nextTokPtr = ptr;
+  return XML_TOK_DATA_CHARS;
+}
+
+/* ptr points to character following "%" */
+
+static
+int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
+			const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+  case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
+    *nextTokPtr = ptr;
+    return XML_TOK_PERCENT;
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_SEMI:
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_PARAM_ENTITY_REF;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+static
+int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
+			  const char **nextTokPtr)
+{
+  if (ptr == end)
+    return XML_TOK_PARTIAL;
+  switch (BYTE_TYPE(enc, ptr)) {
+  CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_CR: case BT_LF: case BT_S:
+    case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
+      *nextTokPtr = ptr;
+      return XML_TOK_POUND_NAME;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return -XML_TOK_POUND_NAME;
+}
+
+static
+int PREFIX(scanLit)(int open, const ENCODING *enc,
+		    const char *ptr, const char *end,
+		    const char **nextTokPtr)
+{
+  while (ptr != end) {
+    int t = BYTE_TYPE(enc, ptr);
+    switch (t) {
+    INVALID_CASES(ptr, nextTokPtr)
+    case BT_QUOT:
+    case BT_APOS:
+      ptr += MINBPC(enc);
+      if (t != open)
+	break;
+      if (ptr == end)
+	return -XML_TOK_LITERAL;
+      *nextTokPtr = ptr;
+      switch (BYTE_TYPE(enc, ptr)) {
+      case BT_S: case BT_CR: case BT_LF:
+      case BT_GT: case BT_PERCNT: case BT_LSQB:
+	return XML_TOK_LITERAL;
+      default:
+	return XML_TOK_INVALID;
+      }
+    default:
+      ptr += MINBPC(enc);
+      break;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+static
+int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
+		      const char **nextTokPtr)
+{
+  int tok;
+  if (ptr == end)
+    return XML_TOK_NONE;
+  if (MINBPC(enc) > 1) {
+    size_t n = end - ptr;
+    if (n & (MINBPC(enc) - 1)) {
+      n &= ~(MINBPC(enc) - 1);
+      if (n == 0)
+	return XML_TOK_PARTIAL;
+      end = ptr + n;
+    }
+  }
+  switch (BYTE_TYPE(enc, ptr)) {
+  case BT_QUOT:
+    return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
+  case BT_APOS:
+    return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
+  case BT_LT:
+    {
+      ptr += MINBPC(enc);
+      if (ptr == end)
+	return XML_TOK_PARTIAL;
+      switch (BYTE_TYPE(enc, ptr)) {
+      case BT_EXCL:
+	return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+      case BT_QUEST:
+	return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+      case BT_NMSTRT:
+      case BT_HEX:
+      case BT_NONASCII:
+      case BT_LEAD2:
+      case BT_LEAD3:
+      case BT_LEAD4:
+	*nextTokPtr = ptr - MINBPC(enc);
+	return XML_TOK_INSTANCE_START;
+      }
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  case BT_CR:
+    if (ptr + MINBPC(enc) == end)
+      return -XML_TOK_PROLOG_S;
+    /* fall through */
+  case BT_S: case BT_LF:
+    for (;;) {
+      ptr += MINBPC(enc);
+      if (ptr == end)
+	break;
+      switch (BYTE_TYPE(enc, ptr)) {
+      case BT_S: case BT_LF:
+	break;
+      case BT_CR:
+	/* don't split CR/LF pair */
+	if (ptr + MINBPC(enc) != end)
+	  break;
+	/* fall through */
+      default:
+	*nextTokPtr = ptr;
+	return XML_TOK_PROLOG_S;
+      }
+    }
+    *nextTokPtr = ptr;
+    return XML_TOK_PROLOG_S;
+  case BT_PERCNT:
+    return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+  case BT_COMMA:
+    *nextTokPtr = ptr + MINBPC(enc);
+    return XML_TOK_COMMA;
+  case BT_LSQB:
+    *nextTokPtr = ptr + MINBPC(enc);
+    return XML_TOK_OPEN_BRACKET;
+  case BT_RSQB:
+    ptr += MINBPC(enc);
+    if (ptr == end)
+      return -XML_TOK_CLOSE_BRACKET;
+    if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
+      if (ptr + MINBPC(enc) == end)
+	return XML_TOK_PARTIAL;
+      if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
+	*nextTokPtr = ptr + 2*MINBPC(enc);
+	return XML_TOK_COND_SECT_CLOSE;
+      }
+    }
+    *nextTokPtr = ptr;
+    return XML_TOK_CLOSE_BRACKET;
+  case BT_LPAR:
+    *nextTokPtr = ptr + MINBPC(enc);
+    return XML_TOK_OPEN_PAREN;
+  case BT_RPAR:
+    ptr += MINBPC(enc);
+    if (ptr == end)
+      return -XML_TOK_CLOSE_PAREN;
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_AST:
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_CLOSE_PAREN_ASTERISK;
+    case BT_QUEST:
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_CLOSE_PAREN_QUESTION;
+    case BT_PLUS:
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_CLOSE_PAREN_PLUS;
+    case BT_CR: case BT_LF: case BT_S:
+    case BT_GT: case BT_COMMA: case BT_VERBAR:
+    case BT_RPAR:
+      *nextTokPtr = ptr;
+      return XML_TOK_CLOSE_PAREN;
+    }
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  case BT_VERBAR:
+    *nextTokPtr = ptr + MINBPC(enc);
+    return XML_TOK_OR;
+  case BT_GT:
+    *nextTokPtr = ptr + MINBPC(enc);
+    return XML_TOK_DECL_CLOSE;
+  case BT_NUM:
+    return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+#define LEAD_CASE(n) \
+  case BT_LEAD ## n: \
+    if (end - ptr < n) \
+      return XML_TOK_PARTIAL_CHAR; \
+    if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
+      ptr += n; \
+      tok = XML_TOK_NAME; \
+      break; \
+    } \
+    if (IS_NAME_CHAR(enc, ptr, n)) { \
+      ptr += n; \
+      tok = XML_TOK_NMTOKEN; \
+      break; \
+    } \
+    *nextTokPtr = ptr; \
+    return XML_TOK_INVALID;
+    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+  case BT_NMSTRT:
+  case BT_HEX:
+    tok = XML_TOK_NAME;
+    ptr += MINBPC(enc);
+    break;
+  case BT_DIGIT:
+  case BT_NAME:
+  case BT_MINUS:
+#ifdef XML_NS
+  case BT_COLON:
+#endif
+    tok = XML_TOK_NMTOKEN;
+    ptr += MINBPC(enc);
+    break;
+  case BT_NONASCII:
+    if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
+      ptr += MINBPC(enc);
+      tok = XML_TOK_NAME;
+      break;
+    }
+    if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
+      ptr += MINBPC(enc);
+      tok = XML_TOK_NMTOKEN;
+      break;
+    }
+    /* fall through */
+  default:
+    *nextTokPtr = ptr;
+    return XML_TOK_INVALID;
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+    case BT_GT: case BT_RPAR: case BT_COMMA:
+    case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
+    case BT_S: case BT_CR: case BT_LF:
+      *nextTokPtr = ptr;
+      return tok;
+#ifdef XML_NS
+    case BT_COLON:
+      ptr += MINBPC(enc);
+      switch (tok) {
+      case XML_TOK_NAME:
+	if (ptr == end)
+	  return XML_TOK_PARTIAL;
+	tok = XML_TOK_PREFIXED_NAME;
+	switch (BYTE_TYPE(enc, ptr)) {
+	CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
+	default:
+	  tok = XML_TOK_NMTOKEN;
+	  break;
+	}
+	break;
+      case XML_TOK_PREFIXED_NAME:
+	tok = XML_TOK_NMTOKEN;
+	break;
+      }
+      break;
+#endif
+    case BT_PLUS:
+      if (tok == XML_TOK_NMTOKEN)  {
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_NAME_PLUS;
+    case BT_AST:
+      if (tok == XML_TOK_NMTOKEN)  {
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_NAME_ASTERISK;
+    case BT_QUEST:
+      if (tok == XML_TOK_NMTOKEN)  {
+	*nextTokPtr = ptr;
+	return XML_TOK_INVALID;
+      }
+      *nextTokPtr = ptr + MINBPC(enc);
+      return XML_TOK_NAME_QUESTION;
+    default:
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    }
+  }
+  return -tok;
+}
+
+static
+int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
+			      const char **nextTokPtr)
+{
+  const char *start;
+  if (ptr == end)
+    return XML_TOK_NONE;
+  start = ptr;
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+#define LEAD_CASE(n) \
+    case BT_LEAD ## n: ptr += n; break;
+    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+    case BT_AMP:
+      if (ptr == start)
+	return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    case BT_LT:
+      /* this is for inside entity references */
+      *nextTokPtr = ptr;
+      return XML_TOK_INVALID;
+    case BT_LF:
+      if (ptr == start) {
+	*nextTokPtr = ptr + MINBPC(enc);
+	return XML_TOK_DATA_NEWLINE;
+      }
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    case BT_CR:
+      if (ptr == start) {
+	ptr += MINBPC(enc);
+	if (ptr == end)
+	  return XML_TOK_TRAILING_CR;
+	if (BYTE_TYPE(enc, ptr) == BT_LF)
+	  ptr += MINBPC(enc);
+	*nextTokPtr = ptr;
+	return XML_TOK_DATA_NEWLINE;
+      }
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    case BT_S:
+      if (ptr == start) {
+	*nextTokPtr = ptr + MINBPC(enc);
+	return XML_TOK_ATTRIBUTE_VALUE_S;
+      }
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    default:
+      ptr += MINBPC(enc);
+      break;
+    }
+  }
+  *nextTokPtr = ptr;
+  return XML_TOK_DATA_CHARS;
+}
+
+static
+int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
+			   const char **nextTokPtr)
+{
+  const char *start;
+  if (ptr == end)
+    return XML_TOK_NONE;
+  start = ptr;
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+#define LEAD_CASE(n) \
+    case BT_LEAD ## n: ptr += n; break;
+    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+    case BT_AMP:
+      if (ptr == start)
+	return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    case BT_PERCNT:
+      if (ptr == start) {
+	int tok =  PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
+				       end, nextTokPtr);
+	return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
+      }
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    case BT_LF:
+      if (ptr == start) {
+	*nextTokPtr = ptr + MINBPC(enc);
+	return XML_TOK_DATA_NEWLINE;
+      }
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    case BT_CR:
+      if (ptr == start) {
+	ptr += MINBPC(enc);
+	if (ptr == end)
+	  return XML_TOK_TRAILING_CR;
+	if (BYTE_TYPE(enc, ptr) == BT_LF)
+	  ptr += MINBPC(enc);
+	*nextTokPtr = ptr;
+	return XML_TOK_DATA_NEWLINE;
+      }
+      *nextTokPtr = ptr;
+      return XML_TOK_DATA_CHARS;
+    default:
+      ptr += MINBPC(enc);
+      break;
+    }
+  }
+  *nextTokPtr = ptr;
+  return XML_TOK_DATA_CHARS;
+}
+
+#ifdef XML_DTD
+
+static
+int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
+			     const char **nextTokPtr)
+{
+  int level = 0;
+  if (MINBPC(enc) > 1) {
+    size_t n = end - ptr;
+    if (n & (MINBPC(enc) - 1)) {
+      n &= ~(MINBPC(enc) - 1);
+      end = ptr + n;
+    }
+  }
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    INVALID_CASES(ptr, nextTokPtr)
+    case BT_LT:
+      if ((ptr += MINBPC(enc)) == end)
+	return XML_TOK_PARTIAL;
+      if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
+	if ((ptr += MINBPC(enc)) == end)
+	  return XML_TOK_PARTIAL;
+	if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
+	  ++level;
+	  ptr += MINBPC(enc);
+	}
+      }
+      break;
+    case BT_RSQB:
+      if ((ptr += MINBPC(enc)) == end)
+	return XML_TOK_PARTIAL;
+      if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
+	if ((ptr += MINBPC(enc)) == end)
+	  return XML_TOK_PARTIAL;
+	if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
+	  ptr += MINBPC(enc);
+	  if (level == 0) {
+	    *nextTokPtr = ptr;
+	    return XML_TOK_IGNORE_SECT;
+	  }
+	  --level;
+	}
+      }
+      break;
+    default:
+      ptr += MINBPC(enc);
+      break;
+    }
+  }
+  return XML_TOK_PARTIAL;
+}
+
+#endif /* XML_DTD */
+
+static
+int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
+		       const char **badPtr)
+{
+  ptr += MINBPC(enc);
+  end -= MINBPC(enc);
+  for (; ptr != end; ptr += MINBPC(enc)) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_DIGIT:
+    case BT_HEX:
+    case BT_MINUS:
+    case BT_APOS:
+    case BT_LPAR:
+    case BT_RPAR:
+    case BT_PLUS:
+    case BT_COMMA:
+    case BT_SOL:
+    case BT_EQUALS:
+    case BT_QUEST:
+    case BT_CR:
+    case BT_LF:
+    case BT_SEMI:
+    case BT_EXCL:
+    case BT_AST:
+    case BT_PERCNT:
+    case BT_NUM:
+#ifdef XML_NS
+    case BT_COLON:
+#endif
+      break;
+    case BT_S:
+      if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
+	*badPtr = ptr;
+	return 0;
+      }
+      break;
+    case BT_NAME:
+    case BT_NMSTRT:
+      if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
+	break;
+    default:
+      switch (BYTE_TO_ASCII(enc, ptr)) {
+      case 0x24: /* $ */
+      case 0x40: /* @ */
+	break;
+      default:
+	*badPtr = ptr;
+	return 0;
+      }
+      break;
+    }
+  }
+  return 1;
+}
+
+/* This must only be called for a well-formed start-tag or empty element tag.
+Returns the number of attributes.  Pointers to the first attsMax attributes 
+are stored in atts. */
+
+static
+int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
+		    int attsMax, ATTRIBUTE *atts)
+{
+  enum { other, inName, inValue } state = inName;
+  int nAtts = 0;
+  int open = 0; /* defined when state == inValue;
+		   initialization just to shut up compilers */
+
+  for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
+    switch (BYTE_TYPE(enc, ptr)) {
+#define START_NAME \
+      if (state == other) { \
+	if (nAtts < attsMax) { \
+	  atts[nAtts].name = ptr; \
+	  atts[nAtts].normalized = 1; \
+	} \
+	state = inName; \
+      }
+#define LEAD_CASE(n) \
+    case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
+    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+    case BT_NONASCII:
+    case BT_NMSTRT:
+    case BT_HEX:
+      START_NAME
+      break;
+#undef START_NAME
+    case BT_QUOT:
+      if (state != inValue) {
+	if (nAtts < attsMax)
+	  atts[nAtts].valuePtr = ptr + MINBPC(enc);
+        state = inValue;
+        open = BT_QUOT;
+      }
+      else if (open == BT_QUOT) {
+        state = other;
+	if (nAtts < attsMax)
+	  atts[nAtts].valueEnd = ptr;
+	nAtts++;
+      }
+      break;
+    case BT_APOS:
+      if (state != inValue) {
+	if (nAtts < attsMax)
+	  atts[nAtts].valuePtr = ptr + MINBPC(enc);
+        state = inValue;
+        open = BT_APOS;
+      }
+      else if (open == BT_APOS) {
+        state = other;
+	if (nAtts < attsMax)
+	  atts[nAtts].valueEnd = ptr;
+	nAtts++;
+      }
+      break;
+    case BT_AMP:
+      if (nAtts < attsMax)
+	atts[nAtts].normalized = 0;
+      break;
+    case BT_S:
+      if (state == inName)
+        state = other;
+      else if (state == inValue
+	       && nAtts < attsMax
+	       && atts[nAtts].normalized
+	       && (ptr == atts[nAtts].valuePtr
+		   || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
+		   || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
+	           || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
+	atts[nAtts].normalized = 0;
+      break;
+    case BT_CR: case BT_LF:
+      /* This case ensures that the first attribute name is counted
+         Apart from that we could just change state on the quote. */
+      if (state == inName)
+        state = other;
+      else if (state == inValue && nAtts < attsMax)
+	atts[nAtts].normalized = 0;
+      break;
+    case BT_GT:
+    case BT_SOL:
+      if (state != inValue)
+	return nAtts;
+      break;
+    default:
+      break;
+    }
+  }
+  /* not reached */
+}
+
+static
+int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
+{
+  int result = 0;
+  /* skip &# */
+  ptr += 2*MINBPC(enc);
+  if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
+    for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
+      int c = BYTE_TO_ASCII(enc, ptr);
+      switch (c) {
+      case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
+      case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
+	result <<= 4;
+	result |= (c - ASCII_0);
+	break;
+      case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
+	result <<= 4;
+	result += 10 + (c - ASCII_A);
+	break;
+      case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
+	result <<= 4;
+	result += 10 + (c - ASCII_a);
+	break;
+      }
+      if (result >= 0x110000)
+	return -1;
+    }
+  }
+  else {
+    for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
+      int c = BYTE_TO_ASCII(enc, ptr);
+      result *= 10;
+      result += (c - ASCII_0);
+      if (result >= 0x110000)
+	return -1;
+    }
+  }
+  return checkCharRefNumber(result);
+}
+
+static
+int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
+{
+  switch ((end - ptr)/MINBPC(enc)) {
+  case 2:
+    if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
+      switch (BYTE_TO_ASCII(enc, ptr)) {
+      case ASCII_l:
+	return ASCII_LT;
+      case ASCII_g:
+	return ASCII_GT;
+      }
+    }
+    break;
+  case 3:
+    if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
+      ptr += MINBPC(enc);
+      if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
+	ptr += MINBPC(enc);
+	if (CHAR_MATCHES(enc, ptr, ASCII_p))
+	  return ASCII_AMP;
+      }
+    }
+    break;
+  case 4:
+    switch (BYTE_TO_ASCII(enc, ptr)) {
+    case ASCII_q:
+      ptr += MINBPC(enc);
+      if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
+	ptr += MINBPC(enc);
+	if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
+	  ptr += MINBPC(enc);
+  	  if (CHAR_MATCHES(enc, ptr, ASCII_t))
+	    return ASCII_QUOT;
+	}
+      }
+      break;
+    case ASCII_a:
+      ptr += MINBPC(enc);
+      if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
+	ptr += MINBPC(enc);
+	if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
+	  ptr += MINBPC(enc);
+  	  if (CHAR_MATCHES(enc, ptr, ASCII_s))
+	    return ASCII_APOS;
+	}
+      }
+      break;
+    }
+  }
+  return 0;
+}
+
+static
+int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
+{
+  for (;;) {
+    switch (BYTE_TYPE(enc, ptr1)) {
+#define LEAD_CASE(n) \
+    case BT_LEAD ## n: \
+      if (*ptr1++ != *ptr2++) \
+	return 0;
+    LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
+#undef LEAD_CASE
+      /* fall through */
+      if (*ptr1++ != *ptr2++)
+	return 0;
+      break;
+    case BT_NONASCII:
+    case BT_NMSTRT:
+#ifdef XML_NS
+    case BT_COLON:
+#endif
+    case BT_HEX:
+    case BT_DIGIT:
+    case BT_NAME:
+    case BT_MINUS:
+      if (*ptr2++ != *ptr1++)
+	return 0;
+      if (MINBPC(enc) > 1) {
+	if (*ptr2++ != *ptr1++)
+	  return 0;
+	if (MINBPC(enc) > 2) {
+	  if (*ptr2++ != *ptr1++)
+	    return 0;
+          if (MINBPC(enc) > 3) {
+	    if (*ptr2++ != *ptr1++)
+      	      return 0;
+	  }
+	}
+      }
+      break;
+    default:
+      if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
+	return 1;
+      switch (BYTE_TYPE(enc, ptr2)) {
+      case BT_LEAD2:
+      case BT_LEAD3:
+      case BT_LEAD4:
+      case BT_NONASCII:
+      case BT_NMSTRT:
+#ifdef XML_NS
+      case BT_COLON:
+#endif
+      case BT_HEX:
+      case BT_DIGIT:
+      case BT_NAME:
+      case BT_MINUS:
+	return 0;
+      default:
+	return 1;
+      }
+    }
+  }
+  /* not reached */
+}
+
+static
+int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
+			     const char *end1, const char *ptr2)
+{
+  for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
+    if (ptr1 == end1)
+      return 0;
+    if (!CHAR_MATCHES(enc, ptr1, *ptr2))
+      return 0;
+  }
+  return ptr1 == end1;
+}
+
+static
+int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
+{
+  const char *start = ptr;
+  for (;;) {
+    switch (BYTE_TYPE(enc, ptr)) {
+#define LEAD_CASE(n) \
+    case BT_LEAD ## n: ptr += n; break;
+    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+    case BT_NONASCII:
+    case BT_NMSTRT:
+#ifdef XML_NS
+    case BT_COLON:
+#endif
+    case BT_HEX:
+    case BT_DIGIT:
+    case BT_NAME:
+    case BT_MINUS:
+      ptr += MINBPC(enc);
+      break;
+    default:
+      return ptr - start;
+    }
+  }
+}
+
+static
+const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
+{
+  for (;;) {
+    switch (BYTE_TYPE(enc, ptr)) {
+    case BT_LF:
+    case BT_CR:
+    case BT_S:
+      ptr += MINBPC(enc);
+      break;
+    default:
+      return ptr;
+    }
+  }
+}
+
+static
+void PREFIX(updatePosition)(const ENCODING *enc,
+			    const char *ptr,
+			    const char *end,
+			    POSITION *pos)
+{
+  while (ptr != end) {
+    switch (BYTE_TYPE(enc, ptr)) {
+#define LEAD_CASE(n) \
+    case BT_LEAD ## n: \
+      ptr += n; \
+      break;
+    LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+    case BT_LF:
+      pos->columnNumber = (unsigned)-1;
+      pos->lineNumber++;
+      ptr += MINBPC(enc);
+      break;
+    case BT_CR:
+      pos->lineNumber++;
+      ptr += MINBPC(enc);
+      if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
+	ptr += MINBPC(enc);
+      pos->columnNumber = (unsigned)-1;
+      break;
+    default:
+      ptr += MINBPC(enc);
+      break;
+    }
+    pos->columnNumber++;
+  }
+}
+
+#undef DO_LEAD_CASE
+#undef MULTIBYTE_CASES
+#undef INVALID_CASES
+#undef CHECK_NAME_CASE
+#undef CHECK_NAME_CASES
+#undef CHECK_NMSTRT_CASE
+#undef CHECK_NMSTRT_CASES

Added: apr/apr-util/vendor/expat/current/lib/xmltok_impl.h
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/lib/xmltok_impl.h?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/lib/xmltok_impl.h (added)
+++ apr/apr-util/vendor/expat/current/lib/xmltok_impl.h Wed Sep 29 08:16:58 2010
@@ -0,0 +1,46 @@
+/*
+Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
+See the file COPYING for copying permission.
+*/
+
+enum {
+  BT_NONXML,
+  BT_MALFORM,
+  BT_LT,
+  BT_AMP,
+  BT_RSQB,
+  BT_LEAD2,
+  BT_LEAD3,
+  BT_LEAD4,
+  BT_TRAIL,
+  BT_CR,
+  BT_LF,
+  BT_GT,
+  BT_QUOT,
+  BT_APOS,
+  BT_EQUALS,
+  BT_QUEST,
+  BT_EXCL,
+  BT_SOL,
+  BT_SEMI,
+  BT_NUM,
+  BT_LSQB,
+  BT_S,
+  BT_NMSTRT,
+  BT_COLON,
+  BT_HEX,
+  BT_DIGIT,
+  BT_NAME,
+  BT_MINUS,
+  BT_OTHER, /* known not to be a name or name start character */
+  BT_NONASCII, /* might be a name or name start character */
+  BT_PERCNT,
+  BT_LPAR,
+  BT_RPAR,
+  BT_AST,
+  BT_PLUS,
+  BT_COMMA,
+  BT_VERBAR
+};
+
+#include <stddef.h>

Added: apr/apr-util/vendor/expat/current/lib/xmltok_ns.c
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/lib/xmltok_ns.c?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/lib/xmltok_ns.c (added)
+++ apr/apr-util/vendor/expat/current/lib/xmltok_ns.c Wed Sep 29 08:16:58 2010
@@ -0,0 +1,98 @@
+const ENCODING *NS(XmlGetUtf8InternalEncoding)(void)
+{
+  return &ns(internal_utf8_encoding).enc;
+}
+
+const ENCODING *NS(XmlGetUtf16InternalEncoding)(void)
+{
+#if XML_BYTE_ORDER == 12
+  return &ns(internal_little2_encoding).enc;
+#elif XML_BYTE_ORDER == 21
+  return &ns(internal_big2_encoding).enc;
+#else
+  const short n = 1;
+  return *(const char *)&n ? &ns(internal_little2_encoding).enc : &ns(internal_big2_encoding).enc;
+#endif
+}
+
+static
+const ENCODING *NS(encodings)[] = {
+  &ns(latin1_encoding).enc,
+  &ns(ascii_encoding).enc,
+  &ns(utf8_encoding).enc,
+  &ns(big2_encoding).enc,
+  &ns(big2_encoding).enc,
+  &ns(little2_encoding).enc,
+  &ns(utf8_encoding).enc /* NO_ENC */
+};
+
+static
+int NS(initScanProlog)(const ENCODING *enc, const char *ptr, const char *end,
+		       const char **nextTokPtr)
+{
+  return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_PROLOG_STATE, ptr, end, nextTokPtr);
+}
+
+static
+int NS(initScanContent)(const ENCODING *enc, const char *ptr, const char *end,
+		       const char **nextTokPtr)
+{
+  return initScan(NS(encodings), (const INIT_ENCODING *)enc, XML_CONTENT_STATE, ptr, end, nextTokPtr);
+}
+
+int NS(XmlInitEncoding)(INIT_ENCODING *p, const ENCODING **encPtr, const char *name)
+{
+  int i = getEncodingIndex(name);
+  if (i == UNKNOWN_ENC)
+    return 0;
+  SET_INIT_ENC_INDEX(p, i);
+  p->initEnc.scanners[XML_PROLOG_STATE] = NS(initScanProlog);
+  p->initEnc.scanners[XML_CONTENT_STATE] = NS(initScanContent);
+  p->initEnc.updatePosition = initUpdatePosition;
+  p->encPtr = encPtr;
+  *encPtr = &(p->initEnc);
+  return 1;
+}
+
+static
+const ENCODING *NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end)
+{
+#define ENCODING_MAX 128
+  char buf[ENCODING_MAX];
+  char *p = buf;
+  int i;
+  XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1);
+  if (ptr != end)
+    return 0;
+  *p = 0;
+  if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2)
+    return enc;
+  i = getEncodingIndex(buf);
+  if (i == UNKNOWN_ENC)
+    return 0;
+  return NS(encodings)[i];
+}
+
+int NS(XmlParseXmlDecl)(int isGeneralTextEntity,
+			const ENCODING *enc,
+			const char *ptr,
+			const char *end,
+			const char **badPtr,
+			const char **versionPtr,
+			const char **versionEndPtr,
+			const char **encodingName,
+			const ENCODING **encoding,
+			int *standalone)
+{
+  return doParseXmlDecl(NS(findEncoding),
+			isGeneralTextEntity,
+			enc,
+			ptr,
+			end,
+			badPtr,
+			versionPtr,
+			versionEndPtr,
+			encodingName,
+			encoding,
+			standalone);
+}

Added: apr/apr-util/vendor/expat/current/xmlwf/Makefile.in
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/xmlwf/Makefile.in?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/xmlwf/Makefile.in (added)
+++ apr/apr-util/vendor/expat/current/xmlwf/Makefile.in Wed Sep 29 08:16:58 2010
@@ -0,0 +1,68 @@
+################################################################
+# Process this file with top-level configure script to produce Makefile
+#
+# Copyright 2000 Clark Cooper
+#
+#  This file is part of EXPAT.
+#
+#  EXPAT is free software; you can redistribute it and/or modify it
+#  under the terms of the License (based on the MIT/X license) contained
+#  in the file COPYING that comes with this distribution.
+#
+# EXPAT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN EXPAT.
+#
+
+bindir = @bindir@
+
+LIBDIR= ../lib/.libs
+INCDIR= ../lib
+
+LDFLAGS= @LDFLAGS@ -static
+CFLAGS= @CFLAGS@ -I$(INCDIR)
+CC = @CC@
+
+FILEMAP_OBJ= @FILEMAP_OBJ@
+OBJS= xmlwf.o xmlfile.o codepage.o $(FILEMAP_OBJ)
+LIBS= -L$(LIBDIR) -lexpat
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = ${INSTALL}
+LIBTOOL = @LIBTOOL@
+mkinstalldirs = $(SHELL) $(top_srcdir)/conftools/mkinstalldirs
+
+srcdir = @srcdir@
+top_builddir = ..
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+
+xmlwf: $(OBJS)
+	$(CC) -o xmlwf $(LDFLAGS) $(OBJS) $(LIBS)
+
+install: xmlwf
+	$(mkinstalldirs) $(bindir)
+	$(LIBTOOL) --mode=install $(INSTALL_PROGRAM) xmlwf $(bindir)/xmlwf
+
+uninstall:
+	$(LIBTOOL) --mode=uninstall rm -f $(bindir)/xmlwf
+
+check:	$(SUBDIRS)
+	@echo
+	@echo This package does not yet have a regression test.
+	@echo
+
+clean:
+	rm -f xmlwf core *.o
+
+distclean: clean
+	rm -f Makefile
+
+maintainer-clean: distclean

Added: apr/apr-util/vendor/expat/current/xmlwf/codepage.c
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/xmlwf/codepage.c?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/xmlwf/codepage.c (added)
+++ apr/apr-util/vendor/expat/current/xmlwf/codepage.c Wed Sep 29 08:16:58 2010
@@ -0,0 +1,65 @@
+/*
+Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
+See the file COPYING for copying permission.
+*/
+
+#include "codepage.h"
+
+#ifdef WIN32
+#define STRICT 1
+#define WIN32_LEAN_AND_MEAN 1
+
+#include <windows.h>
+
+int codepageMap(int cp, int *map)
+{
+  int i;
+  CPINFO info;
+  if (!GetCPInfo(cp, &info) || info.MaxCharSize > 2)
+    return 0;
+  for (i = 0; i < 256; i++)
+    map[i] = -1;
+  if (info.MaxCharSize > 1) {
+    for (i = 0; i < MAX_LEADBYTES; i++) {
+      int j, lim;
+      if (info.LeadByte[i] == 0 && info.LeadByte[i + 1] == 0)
+        break;
+      lim = info.LeadByte[i + 1];
+      for (j = info.LeadByte[i]; j < lim; j++)
+	map[j] = -2;
+    }
+  }
+  for (i = 0; i < 256; i++) {
+   if (map[i] == -1) {
+     char c = i;
+     unsigned short n;
+     if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
+		             &c, 1, &n, 1) == 1)
+       map[i] = n;
+   }
+  }
+  return 1;
+}
+
+int codepageConvert(int cp, const char *p)
+{
+  unsigned short c;
+  if (MultiByteToWideChar(cp, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
+		          p, 2, &c, 1) == 1)
+    return c;
+  return -1;
+}
+
+#else /* not WIN32 */
+
+int codepageMap(int cp, int *map)
+{
+  return 0;
+}
+
+int codepageConvert(int cp, const char *p)
+{
+  return -1;
+}
+
+#endif /* not WIN32 */

Propchange: apr/apr-util/vendor/expat/current/xmlwf/codepage.c
------------------------------------------------------------------------------
    svn:executable = 

Added: apr/apr-util/vendor/expat/current/xmlwf/codepage.h
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/xmlwf/codepage.h?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/xmlwf/codepage.h (added)
+++ apr/apr-util/vendor/expat/current/xmlwf/codepage.h Wed Sep 29 08:16:58 2010
@@ -0,0 +1,7 @@
+/*
+Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
+See the file COPYING for copying permission.
+*/
+
+int codepageMap(int cp, int *map);
+int codepageConvert(int cp, const char *p);

Propchange: apr/apr-util/vendor/expat/current/xmlwf/codepage.h
------------------------------------------------------------------------------
    svn:executable = 

Added: apr/apr-util/vendor/expat/current/xmlwf/ct.c
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/xmlwf/ct.c?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/xmlwf/ct.c (added)
+++ apr/apr-util/vendor/expat/current/xmlwf/ct.c Wed Sep 29 08:16:58 2010
@@ -0,0 +1,145 @@
+#define CHARSET_MAX 41
+
+static
+const char *getTok(const char **pp)
+{
+  enum { inAtom, inString, init, inComment };
+  int state = init;
+  const char *tokStart = 0;
+  for (;;) {
+    switch (**pp) {
+    case '\0':
+      return 0;
+    case ' ':
+    case '\r':
+    case '\t':
+    case '\n':
+      if (state == inAtom)
+	return tokStart;
+      break;
+    case '(':
+      if (state == inAtom)
+	return tokStart;
+      if (state != inString)
+	state++;
+      break;
+    case ')':
+      if (state > init)
+	--state;
+      else if (state != inString)
+	return 0;
+      break;
+    case ';':
+    case '/':
+    case '=':
+      if (state == inAtom)
+	return tokStart;
+      if (state == init)
+	return (*pp)++;
+      break;
+    case '\\':
+      ++*pp;
+      if (**pp == '\0')
+	return 0;
+      break;
+    case '"':
+      switch (state) {
+      case inString:
+	++*pp;
+	return tokStart;
+      case inAtom:
+	return tokStart;
+      case init:
+	tokStart = *pp;
+	state = inString;
+	break;
+      }
+      break;
+    default:
+      if (state == init) {
+	tokStart = *pp;
+	state = inAtom;
+      }
+      break;
+    }
+    ++*pp;
+  }
+  /* not reached */
+}
+
+/* key must be lowercase ASCII */
+
+static
+int matchkey(const char *start, const char *end, const char *key)
+{
+  if (!start)
+    return 0;
+  for (; start != end; start++, key++)
+    if (*start != *key && *start != 'A' + (*key - 'a'))
+      return 0;
+  return *key == '\0';
+}
+
+void getXMLCharset(const char *buf, char *charset)
+{
+  const char *next, *p;
+
+  charset[0] = '\0';
+  next = buf;
+  p = getTok(&next);
+  if (matchkey(p, next, "text"))
+    strcpy(charset, "us-ascii");
+  else if (!matchkey(p, next, "application"))
+    return;
+  p = getTok(&next);
+  if (!p || *p != '/')
+    return;
+  p = getTok(&next);
+  if (matchkey(p, next, "xml"))
+    isXml = 1;
+  p = getTok(&next);
+  while (p) {
+    if (*p == ';') {
+      p = getTok(&next);
+      if (matchkey(p, next, "charset")) {
+	p = getTok(&next);
+	if (p && *p == '=') {
+	  p = getTok(&next);
+	  if (p) {
+	    char *s = charset;
+	    if (*p == '"') {
+	      while (++p != next - 1) {
+		if (*p == '\\')
+		  ++p;
+		if (s == charset + CHARSET_MAX - 1) {
+		  charset[0] = '\0';
+		  break;
+		}
+		*s++ = *p;
+	      }
+	      *s++ = '\0';
+	    }
+	    else {
+	      if (next - p > CHARSET_MAX - 1)
+		break;
+	      while (p != next)
+		*s++ = *p++;
+	      *s = 0;
+	      break;
+	    }
+	  }
+	}
+      }
+    }
+  else
+    p = getTok(&next);
+  }
+}
+
+int main(int argc, char **argv)
+{
+  char buf[CHARSET_MAX];
+  getXMLCharset(argv[1], buf);
+  printf("charset = \"%s\"\n", buf);
+  return 0;
+}

Propchange: apr/apr-util/vendor/expat/current/xmlwf/ct.c
------------------------------------------------------------------------------
    svn:executable = 

Added: apr/apr-util/vendor/expat/current/xmlwf/filemap.h
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/xmlwf/filemap.h?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/xmlwf/filemap.h (added)
+++ apr/apr-util/vendor/expat/current/xmlwf/filemap.h Wed Sep 29 08:16:58 2010
@@ -0,0 +1,17 @@
+/*
+Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
+See the file COPYING for copying permission.
+*/
+
+
+#include <stddef.h>
+
+#ifdef XML_UNICODE
+int filemap(const wchar_t *name,
+	    void (*processor)(const void *, size_t, const wchar_t *, void *arg),
+	    void *arg);
+#else
+int filemap(const char *name,
+	    void (*processor)(const void *, size_t, const char *, void *arg),
+	    void *arg);
+#endif

Propchange: apr/apr-util/vendor/expat/current/xmlwf/filemap.h
------------------------------------------------------------------------------
    svn:executable = 

Added: apr/apr-util/vendor/expat/current/xmlwf/readfilemap.c
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/xmlwf/readfilemap.c?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/xmlwf/readfilemap.c (added)
+++ apr/apr-util/vendor/expat/current/xmlwf/readfilemap.c Wed Sep 29 08:16:58 2010
@@ -0,0 +1,74 @@
+/*
+Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
+See the file COPYING for copying permission.
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifndef S_ISREG
+#ifndef S_IFREG
+#define S_IFREG _S_IFREG
+#endif
+#ifndef S_IFMT
+#define S_IFMT _S_IFMT
+#endif
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#endif /* not S_ISREG */
+
+#ifndef O_BINARY
+#ifdef _O_BINARY
+#define O_BINARY _O_BINARY
+#else
+#define O_BINARY 0
+#endif
+#endif
+
+int filemap(const char *name,
+	    void (*processor)(const void *, size_t, const char *, void *arg),
+	    void *arg)
+{
+  size_t nbytes;
+  int fd;
+  int n;
+  struct stat sb;
+  void *p;
+
+  fd = open(name, O_RDONLY|O_BINARY);
+  if (fd < 0) {
+    perror(name);
+    return 0;
+  }
+  if (fstat(fd, &sb) < 0) {
+    perror(name);
+    return 0;
+  }
+  if (!S_ISREG(sb.st_mode)) {
+    fprintf(stderr, "%s: not a regular file\n", name);
+    return 0;
+  }
+  nbytes = sb.st_size;
+  p = malloc(nbytes);
+  if (!p) {
+    fprintf(stderr, "%s: out of memory\n", name);
+    return 0;
+  }
+  n = read(fd, p, nbytes);
+  if (n < 0) {
+    perror(name);
+    close(fd);
+    return 0;
+  }
+  if (n != nbytes) {
+    fprintf(stderr, "%s: read unexpected number of bytes\n", name);
+    close(fd);
+    return 0;
+  }
+  processor(p, nbytes, name, arg);
+  free(p);
+  close(fd);
+  return 1;
+}

Propchange: apr/apr-util/vendor/expat/current/xmlwf/readfilemap.c
------------------------------------------------------------------------------
    svn:executable = 

Added: apr/apr-util/vendor/expat/current/xmlwf/unixfilemap.c
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/xmlwf/unixfilemap.c?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/xmlwf/unixfilemap.c (added)
+++ apr/apr-util/vendor/expat/current/xmlwf/unixfilemap.c Wed Sep 29 08:16:58 2010
@@ -0,0 +1,58 @@
+/*
+Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
+See the file COPYING for copying permission.
+*/
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#ifndef MAP_FILE
+#define MAP_FILE 0
+#endif
+
+#include "filemap.h"
+
+int filemap(const char *name,
+	    void (*processor)(const void *, size_t, const char *, void *arg),
+	    void *arg)
+{
+  int fd;
+  size_t nbytes;
+  struct stat sb;
+  void *p;
+
+  fd = open(name, O_RDONLY);
+  if (fd < 0) {
+    perror(name);
+    return 0;
+  }
+  if (fstat(fd, &sb) < 0) {
+    perror(name);
+    close(fd);
+    return 0;
+  }
+  if (!S_ISREG(sb.st_mode)) {
+    close(fd);
+    fprintf(stderr, "%s: not a regular file\n", name);
+    return 0;
+  }
+  
+  nbytes = sb.st_size;
+  p = (void *)mmap((caddr_t)0, (size_t)nbytes, PROT_READ,
+		   MAP_FILE|MAP_PRIVATE, fd, (off_t)0);
+  if (p == (void *)-1) {
+    perror(name);
+    close(fd);
+    return 0;
+  }
+  processor(p, nbytes, name, arg);
+  munmap((caddr_t)p, nbytes);
+  close(fd);
+  return 1;
+}

Propchange: apr/apr-util/vendor/expat/current/xmlwf/unixfilemap.c
------------------------------------------------------------------------------
    svn:executable = 

Added: apr/apr-util/vendor/expat/current/xmlwf/wfcheck.c
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/xmlwf/wfcheck.c?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/xmlwf/wfcheck.c (added)
+++ apr/apr-util/vendor/expat/current/xmlwf/wfcheck.c Wed Sep 29 08:16:58 2010
@@ -0,0 +1,953 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "wfcheck.h"
+#include "hashtable.h"
+
+#include "xmltok.h"
+#include "xmlrole.h"
+
+typedef struct {
+  const char *name;
+  const char *textPtr;
+  size_t textLen;
+  const char *docTextPtr;
+  const char *systemId;
+  const char *publicId;
+  const char *notation;
+  char open;
+  char wfInContent;
+  char wfInAttribute;
+  char magic;
+} ENTITY;
+
+#define INIT_BLOCK_SIZE 1024
+
+typedef struct block {
+  struct block *next;
+  char s[1];
+} BLOCK;
+
+typedef struct {
+  BLOCK *blocks;
+  const char *end;
+  char *ptr;
+  char *start;
+} STRING_POOL;
+
+typedef struct {
+  HASH_TABLE generalEntities;
+  HASH_TABLE paramEntities;
+  STRING_POOL pool;
+  int containsRef;
+  int standalone;
+  char *groupConnector;
+  size_t groupSize;
+} DTD;
+
+typedef struct {
+  DTD dtd;
+  size_t stackSize;
+  const char **startName;
+  int attsSize;
+  ATTRIBUTE *atts;
+} CONTEXT;
+
+static void poolInit(STRING_POOL *);
+static void poolDestroy(STRING_POOL *);
+static const char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
+			      const char *ptr, const char *end);
+static const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
+			    const char *ptr, const char *end);
+static int poolGrow(STRING_POOL *);
+static int dtdInit(DTD *);
+static void dtdDestroy(DTD *);
+static int contextInit(CONTEXT *);
+static void contextDestroy(CONTEXT *);
+
+#define poolStart(pool) ((pool)->start)
+#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
+#define poolFinish(pool) ((pool)->start = (pool)->ptr)
+
+static enum WfCheckResult
+checkProlog(DTD *, const char *s, const char *end, const char **, const ENCODING **enc);
+static enum WfCheckResult
+checkContent(size_t level, CONTEXT *context, const ENCODING *enc,
+  	     const char *s, const char *end, const char **badPtr);
+static enum WfCheckResult
+checkGeneralTextEntity(CONTEXT *context,
+		       const char *s, const char *end,
+		       const char **nextPtr,
+		       const ENCODING **enc);
+static enum WfCheckResult
+checkAttributeValue(DTD *, const ENCODING *, const char *, const char *, const char **);
+static enum WfCheckResult
+checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts,
+			 const char **badPtr);
+static enum WfCheckResult
+checkParsedEntities(CONTEXT *context, const char **badPtr);
+
+static
+enum WfCheckResult storeEntity(DTD *dtd,
+			       const ENCODING *enc,
+			       int isParam,
+			       const char *entityNamePtr,
+			       const char *entityNameEnd,
+			       const char *entityTextPtr,
+			       const char *entityTextEnd,
+			       const char **badPtr);
+
+
+enum WfCheckResult
+wfCheck(enum EntityType entityType, const char *s, size_t n,
+	const char **badPtr, unsigned long *badLine, unsigned long *badCol)
+{
+  CONTEXT context;
+  const ENCODING *enc;
+  const char *start = s;
+  const char *end = s + n;
+  const char *next = 0;
+  enum WfCheckResult result;
+
+  if (!contextInit(&context)) {
+    contextDestroy(&context);
+    return noMemory;
+  }
+  if (entityType == documentEntity) {
+    result = checkProlog(&context.dtd, s, end, &next, &enc);
+    s = next;
+    if (!result) {
+      result = checkParsedEntities(&context, &next);
+      s = next;
+      if (!result) {
+	result = checkContent(0, &context, enc, s, end, &next);
+	s = next;
+      }
+    }
+  }
+  else {
+    result = checkGeneralTextEntity(&context, s, end, &next, &enc);
+    s = next;
+  }
+  if (result && s) {
+    POSITION pos;
+    memset(&pos, 0, sizeof(POSITION));
+    XmlUpdatePosition(enc, start, s, &pos);
+    *badPtr = s;
+    *badLine = pos.lineNumber;
+    *badCol = pos.columnNumber;
+  }
+  contextDestroy(&context);
+  return result;
+}
+
+static
+int contextInit(CONTEXT *p)
+{
+  p->stackSize = 1024;
+  p->startName = malloc(p->stackSize * sizeof(char *));
+  p->attsSize = 1024;
+  p->atts = malloc(p->attsSize * sizeof(ATTRIBUTE));
+  return dtdInit(&(p->dtd)) && p->atts && p->startName;
+}
+
+static
+void contextDestroy(CONTEXT *p)
+{
+  dtdDestroy(&(p->dtd));
+  free((void *)p->startName);
+  free((void *)p->atts);
+}
+
+static enum WfCheckResult
+checkContent(size_t level, CONTEXT *context, const ENCODING *enc,
+	     const char *s, const char *end, const char **badPtr)
+{
+  size_t startLevel = level;
+  const char *next;
+  int tok = XmlContentTok(enc, s, end, &next);
+  for (;;) {
+    switch (tok) {
+    case XML_TOK_TRAILING_CR:
+    case XML_TOK_NONE:
+      if (startLevel > 0) {
+	if (level != startLevel) {
+	  *badPtr = s;
+	  return asyncEntity;
+        }
+	return wellFormed;
+      }
+      *badPtr = s;
+      return noElements;
+    case XML_TOK_INVALID:
+      *badPtr = next;
+      return invalidToken;
+    case XML_TOK_PARTIAL:
+      *badPtr = s;
+      return unclosedToken;
+    case XML_TOK_PARTIAL_CHAR:
+      *badPtr = s;
+      return partialChar;
+    case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
+      break;
+    case XML_TOK_ENTITY_REF:
+      {
+	const char *name = poolStoreString(&context->dtd.pool, enc,
+					   s + enc->minBytesPerChar,
+					   next - enc->minBytesPerChar);
+	ENTITY *entity = (ENTITY *)lookup(&context->dtd.generalEntities, name, 0);
+	poolDiscard(&context->dtd.pool);
+	if (!entity) {
+	  if (!context->dtd.containsRef || context->dtd.standalone) {
+	    *badPtr = s;
+	    return undefinedEntity;
+	  }
+	  break;
+	}
+	if (entity->wfInContent)
+	  break;
+	if (entity->open) {
+	  *badPtr = s;
+	  return recursiveEntityRef;
+	}
+	if (entity->notation) {
+	  *badPtr = s;
+	  return binaryEntityRef;
+	}
+	if (entity) {
+	  if (entity->textPtr) {
+	    enum WfCheckResult result;
+	    const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
+	    entity->open = 1;
+	    result = checkContent(level, context, internalEnc,
+				  entity->textPtr, entity->textPtr + entity->textLen,
+				  badPtr);
+	    entity->open = 0;
+	    if (result && *badPtr) {
+	      *badPtr = s;
+	      return result;
+	    }
+	    entity->wfInContent = 1;
+	  }
+	}
+	break;
+      }
+    case XML_TOK_START_TAG_NO_ATTS:
+      if (level == context->stackSize) {
+	context->startName
+	  = realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *));
+	if (!context->startName)
+	  return noMemory;
+      }
+      context->startName[level++] = s + enc->minBytesPerChar;
+      break;
+    case XML_TOK_START_TAG_WITH_ATTS:
+      if (level == context->stackSize) {
+	context->startName = realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *));
+	if (!context->startName)
+	  return noMemory;
+      }
+      context->startName[level++] = s + enc->minBytesPerChar;
+      /* fall through */
+    case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
+      {
+	int i;
+	int n = XmlGetAttributes(enc, s, context->attsSize, context->atts);
+	if (n > context->attsSize) {
+	  context->attsSize = 2*n;
+	  context->atts = realloc((void *)context->atts, context->attsSize * sizeof(ATTRIBUTE));
+	  if (!context->atts)
+	    return noMemory;
+	  XmlGetAttributes(enc, s, n, context->atts);
+	}
+	for (i = 0; i < n; i++) {
+	  if (!context->atts[i].normalized) {
+	    enum WfCheckResult result
+	      = checkAttributeValue(&context->dtd, enc,
+			  	    context->atts[i].valuePtr,
+				    context->atts[i].valueEnd,
+				    badPtr);
+	    if (result)
+	      return result;
+	  }
+	}
+	if (i > 1) {
+	  enum WfCheckResult result = checkAttributeUniqueness(context, enc, n, badPtr);
+	  if (result)
+	    return result;
+	}
+      }
+      break;
+    case XML_TOK_END_TAG:
+      if (level == startLevel) {
+        *badPtr = s;
+        return asyncEntity;
+      }
+      --level;
+      if (!XmlSameName(enc, context->startName[level], s + enc->minBytesPerChar * 2)) {
+	*badPtr = s;
+	return tagMismatch;
+      }
+      break;
+    case XML_TOK_CHAR_REF:
+      if (XmlCharRefNumber(enc, s) < 0) {
+	*badPtr = s;
+	return badCharRef;
+      }
+      break;
+    case XML_TOK_XML_DECL:
+      *badPtr = s;
+      return misplacedXmlPi;
+    }
+    s = next;
+    if (level == 0) {
+      do {
+	tok = XmlPrologTok(enc, s, end, &next);
+	switch (tok) {
+	case XML_TOK_TRAILING_CR:
+	case XML_TOK_NONE:
+	  return wellFormed;
+	case XML_TOK_PROLOG_S:
+	case XML_TOK_COMMENT:
+	case XML_TOK_PI:
+	  s = next;
+	  break;
+	default:
+	  if (tok > 0) {
+	    *badPtr = s;
+	    return junkAfterDocElement;
+	  }
+	  break;
+	}
+      } while (tok > 0);
+    }
+    else
+      tok = XmlContentTok(enc, s, end, &next);
+  }
+  /* not reached */
+}
+
+static
+int attcmp(const void *p1, const void *p2)
+{
+  const ATTRIBUTE *a1 = p1;
+  const ATTRIBUTE *a2 = p2;
+  size_t n1 = a1->valuePtr - a1->name;
+  size_t n2 = a2->valuePtr - a2->name;
+
+  if (n1 == n2) {
+    int n = memcmp(a1->name, a2->name, n1);
+    if (n)
+      return n;
+    /* Sort identical attribute names by position, so that we always
+       report the first duplicate attribute. */
+    if (a1->name < a2->name)
+      return -1;
+    else if (a1->name > a2->name)
+      return 1;
+    else
+      return 0;
+  }
+  else if (n1 < n2)
+    return -1;
+  else
+    return 1;
+}
+
+/* Note that this trashes the attribute values. */
+
+static enum WfCheckResult
+checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts,
+			 const char **badPtr)
+{
+#define QSORT_MIN_ATTS 10
+  if (nAtts < QSORT_MIN_ATTS) {
+    int i;
+    for (i = 1; i < nAtts; i++) {
+      int j;
+      for (j = 0; j < i; j++) {
+	if (XmlSameName(enc, context->atts[i].name, context->atts[j].name)) {
+	  *badPtr = context->atts[i].name;
+  	  return duplicateAttribute;
+	}
+      }
+    }
+  }
+  else {
+    int i;
+    const char *dup = 0;
+    /* Store the end of the name in valuePtr */
+    for (i = 0; i < nAtts; i++) {
+      ATTRIBUTE *a = context->atts + i;
+      a->valuePtr = a->name + XmlNameLength(enc, a->name);
+    }
+    qsort(context->atts, nAtts, sizeof(ATTRIBUTE), attcmp);
+    for (i = 1; i < nAtts; i++) {
+      ATTRIBUTE *a = context->atts + i;
+      if (XmlSameName(enc, a->name, a[-1].name)) {
+	if (!dup || a->name < dup)
+	  dup = a->name;
+      }
+    }
+    if (dup) {
+      *badPtr = dup;
+      return duplicateAttribute;
+    }
+  }
+  return wellFormed;
+}
+
+static enum WfCheckResult
+checkProlog(DTD *dtd, const char *s, const char *end,
+	    const char **nextPtr, const ENCODING **enc)
+{
+  const char *entityNamePtr, *entityNameEnd;
+  int entityIsParam;
+  PROLOG_STATE state;
+  ENTITY *entity;
+  INIT_ENCODING initEnc;
+  XmlInitEncoding(&initEnc, enc);
+  XmlPrologStateInit(&state);
+  for (;;) {
+    const char *next;
+    int tok = XmlPrologTok(*enc, s, end, &next);
+    switch (XmlTokenRole(&state, tok, s, next, *enc)) {
+    case XML_ROLE_XML_DECL:
+      {
+	const char *encodingName = 0;
+	const ENCODING *encoding = 0;
+	const char *version;
+	int standalone = -1;
+	if (!XmlParseXmlDecl(0,
+			     *enc,
+			     s,
+			     next,
+			     nextPtr,
+			     &version,
+			     &encodingName,
+			     &encoding,
+			     &standalone))
+	  return syntaxError;
+	if (encoding) {
+	  if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
+	    *nextPtr = encodingName;
+	    return incorrectEncoding;
+	  }
+	  *enc = encoding;
+	}
+	else if (encodingName) {
+	  *nextPtr = encodingName;
+	  return unknownEncoding;
+	}
+	if (standalone == 1)
+	  dtd->standalone = 1;
+	break;
+      }
+    case XML_ROLE_DOCTYPE_SYSTEM_ID:
+      dtd->containsRef = 1;
+      break;
+    case XML_ROLE_DOCTYPE_PUBLIC_ID:
+    case XML_ROLE_ENTITY_PUBLIC_ID:
+    case XML_ROLE_NOTATION_PUBLIC_ID:
+      if (!XmlIsPublicId(*enc, s, next, nextPtr))
+	return syntaxError;
+      break;
+    case XML_ROLE_INSTANCE_START:
+      *nextPtr = s;
+      return wellFormed;
+    case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
+    case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
+      {
+	const char *tem = 0;
+	enum WfCheckResult result
+	  = checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar,
+				next - (*enc)->minBytesPerChar,
+				&tem);
+	if (result) {
+	  if (tem)
+	    *nextPtr = tem;
+	  return result;
+	}
+	break;
+      }
+    case XML_ROLE_ENTITY_VALUE:
+      {
+	enum WfCheckResult result
+	  = storeEntity(dtd,
+			*enc,
+			entityIsParam,
+			entityNamePtr,
+			entityNameEnd,
+			s,
+			next,
+			nextPtr);
+	if (result != wellFormed)
+	  return result;
+      }
+      break;
+    case XML_ROLE_ENTITY_SYSTEM_ID:
+      {
+	const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd);
+	entity = (ENTITY *)lookup(entityIsParam ? &dtd->paramEntities : &dtd->generalEntities,
+				  name, sizeof(ENTITY));
+	if (entity->name != name) {
+	  poolDiscard(&dtd->pool);
+	  entity = 0;
+	}
+	else {
+	  poolFinish(&dtd->pool);
+	  entity->systemId = poolStoreString(&dtd->pool, *enc,
+					     s + (*enc)->minBytesPerChar,
+					     next - (*enc)->minBytesPerChar);
+	  poolFinish(&dtd->pool);
+	}
+      }
+      break;
+    case XML_ROLE_PARAM_ENTITY_REF:
+      {
+	const char *name = poolStoreString(&dtd->pool, *enc,
+					   s + (*enc)->minBytesPerChar,
+					   next - (*enc)->minBytesPerChar);
+	ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0);
+	poolDiscard(&dtd->pool);
+	if (!entity) {
+	  if (!dtd->containsRef || dtd->standalone) {
+	    *nextPtr = s;
+	    return undefinedEntity;
+	  }
+	}
+      }
+      break;
+    case XML_ROLE_ENTITY_NOTATION_NAME:
+      if (entity) {
+	entity->notation = poolStoreString(&dtd->pool, *enc, s, next);
+	poolFinish(&dtd->pool);
+      }
+      break;
+    case XML_ROLE_GENERAL_ENTITY_NAME:
+      entityNamePtr = s;
+      entityNameEnd = next;
+      entityIsParam = 0;
+      break;
+    case XML_ROLE_PARAM_ENTITY_NAME:
+      entityNamePtr = s;
+      entityNameEnd = next;
+      entityIsParam = 1;
+      break;
+    case XML_ROLE_ERROR:
+      *nextPtr = s;
+      switch (tok) {
+      case XML_TOK_PARAM_ENTITY_REF:
+	return paramEntityRef;
+      case XML_TOK_INVALID:
+	*nextPtr = next;
+	return invalidToken;
+      case XML_TOK_NONE:
+	return noElements;
+      case XML_TOK_PARTIAL:
+	return unclosedToken;
+      case XML_TOK_PARTIAL_CHAR:
+	return partialChar;
+      case XML_TOK_TRAILING_CR:
+	*nextPtr = s + (*enc)->minBytesPerChar;
+	return noElements;
+      case XML_TOK_XML_DECL:
+	return misplacedXmlPi;
+      default:
+	return syntaxError;
+      }
+    case XML_ROLE_GROUP_OPEN:
+      if (state.level >= dtd->groupSize) {
+	if (dtd->groupSize)
+	  dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2);
+	else
+	  dtd->groupConnector = malloc(dtd->groupSize = 32);
+	if (!dtd->groupConnector)
+	  return noMemory;
+      }
+      dtd->groupConnector[state.level] = 0;
+      break;
+    case XML_ROLE_GROUP_SEQUENCE:
+      if (dtd->groupConnector[state.level] == '|') {
+	*nextPtr = s;
+	return syntaxError;
+      }
+      dtd->groupConnector[state.level] = ',';
+      break;
+    case XML_ROLE_GROUP_CHOICE:
+      if (dtd->groupConnector[state.level] == ',') {
+	*nextPtr = s;
+	return syntaxError;
+      }
+      dtd->groupConnector[state.level] = '|';
+      break;
+    case XML_ROLE_NONE:
+      if (tok == XML_TOK_PARAM_ENTITY_REF)
+	dtd->containsRef = 1;
+      break;
+    }
+    s = next;
+  }
+  /* not reached */
+}
+
+static enum WfCheckResult
+checkParsedEntities(CONTEXT *context, const char **badPtr)
+{
+  HASH_TABLE_ITER iter;
+  hashTableIterInit(&iter, &context->dtd.generalEntities);
+  for (;;) {
+    ENTITY *entity = (ENTITY *)hashTableIterNext(&iter);
+    if (!entity)
+      break;
+    if (entity->textPtr && !entity->wfInContent && !entity->magic) {
+      enum WfCheckResult result;
+      const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
+      entity->open = 1;
+      result = checkContent(1, context, internalEnc,
+			    entity->textPtr, entity->textPtr + entity->textLen,
+			    badPtr);
+      entity->open = 0;
+      if (result && *badPtr) {
+	*badPtr = entity->docTextPtr;
+	return result;
+      }
+      entity->wfInContent = 1;
+    }
+  }
+  return wellFormed;
+}
+
+static enum WfCheckResult
+checkGeneralTextEntity(CONTEXT *context,
+		       const char *s, const char *end,
+		       const char **nextPtr,
+		       const ENCODING **enc)
+{
+  INIT_ENCODING initEnc;
+  const char *next;
+  int tok;
+
+  XmlInitEncoding(&initEnc, enc);
+  tok = XmlContentTok(*enc, s, end, &next);
+
+  if (tok == XML_TOK_BOM) {
+    s = next;
+    tok = XmlContentTok(*enc, s, end, &next);
+  }
+  if (tok == XML_TOK_XML_DECL) {
+    const char *encodingName = 0;
+    const ENCODING *encoding = 0;
+    const char *version;
+    if (!XmlParseXmlDecl(1,
+			 *enc,
+			 s,
+			 next,
+			 nextPtr,
+			 &version,
+			 &encodingName,
+			 &encoding,
+			 0))
+      return syntaxError;
+    if (encoding) {
+      if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
+	*nextPtr = encodingName;
+	return incorrectEncoding;
+      }
+      *enc = encoding;
+    }
+    else if (encodingName) {
+      *nextPtr = encodingName;
+      return unknownEncoding;
+    }
+    s = next;
+  }
+  context->dtd.containsRef = 1;
+  return checkContent(1, context, *enc, s, end, nextPtr);
+}
+
+static enum WfCheckResult
+checkAttributeValue(DTD *dtd, const ENCODING *enc,
+		    const char *ptr, const char *end, const char **badPtr)
+{
+  for (;;) {
+    const char *next;
+    int tok = XmlAttributeValueTok(enc, ptr, end, &next);
+    switch (tok) {
+    case XML_TOK_TRAILING_CR:
+    case XML_TOK_NONE:
+      return wellFormed;
+    case XML_TOK_INVALID:
+      *badPtr = next;
+      return invalidToken;
+    case XML_TOK_PARTIAL:
+      *badPtr = ptr;
+      return invalidToken;
+    case XML_TOK_CHAR_REF:
+      if (XmlCharRefNumber(enc, ptr) < 0) {
+	*badPtr = ptr;
+	return badCharRef;
+      }
+      break;
+    case XML_TOK_DATA_CHARS:
+    case XML_TOK_DATA_NEWLINE:
+      break;
+    case XML_TOK_ENTITY_REF:
+      {
+	const char *name = poolStoreString(&dtd->pool, enc,
+					   ptr + enc->minBytesPerChar,
+					   next - enc->minBytesPerChar);
+	ENTITY *entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0);
+	poolDiscard(&dtd->pool);
+	if (!entity) {
+	  if (!dtd->containsRef) {
+	    *badPtr = ptr;
+	    return undefinedEntity;
+	  }
+	  break;
+	}
+	if (entity->wfInAttribute)
+	  break;
+	if (entity->open) {
+	  *badPtr = ptr;
+	  return recursiveEntityRef;
+	}
+	if (entity->notation) {
+	  *badPtr = ptr;
+	  return binaryEntityRef;
+	}
+	if (entity) {
+	  if (entity->textPtr) {
+	    enum WfCheckResult result;
+	    const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
+	    const char *textEnd = entity->textPtr + entity->textLen;
+	    entity->open = 1;
+	    result = checkAttributeValue(dtd, internalEnc, entity->textPtr, textEnd, badPtr);
+	    entity->open = 0;
+	    if (result && *badPtr) {
+	      *badPtr = ptr;
+	      return result;
+	    }
+	    entity->wfInAttribute = 1;
+	  }
+	  else {
+	    *badPtr = ptr;
+	    return attributeExternalEntityRef;
+	  }
+	}
+	break;
+      }
+      break;
+    default:
+      abort();
+    }
+    ptr = next;
+  }
+  /* not reached */
+}
+
+static
+void poolInit(STRING_POOL *pool)
+{
+  pool->blocks = 0;
+  pool->start = 0;
+  pool->ptr = 0;
+  pool->end = 0;
+}
+
+static
+void poolDestroy(STRING_POOL *pool)
+{
+  BLOCK *p = pool->blocks;
+  while (p) {
+    BLOCK *tem = p->next;
+    free(p);
+    p = tem;
+  }
+  pool->blocks = 0;
+  pool->ptr = 0;
+  pool->start = 0;
+  pool->end = 0;
+}
+
+static
+const char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
+		       const char *ptr, const char *end)
+{
+  for (;;) {
+    XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &(pool->ptr), pool->end);
+    if (ptr == end)
+      break;
+    if (!poolGrow(pool))
+      return 0;
+  }
+  return pool->start;
+}
+
+static
+const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
+			    const char *ptr, const char *end)
+{
+  if (!poolAppend(pool, enc, ptr, end))
+    return 0;
+  if (pool->ptr == pool->end && !poolGrow(pool))
+    return 0;
+  *(pool->ptr)++ = 0;
+  return pool->start;
+}
+
+static
+int poolGrow(STRING_POOL *pool)
+{
+  if (pool->blocks && pool->start == pool->blocks->s) {
+    size_t blockSize = (pool->end - pool->start)*2;
+    pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize);
+    if (!pool->blocks)
+      return 0;
+    pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
+    pool->start = pool->blocks->s;
+    pool->end = pool->start + blockSize;
+  }
+  else {
+    BLOCK *tem;
+    size_t blockSize = pool->end - pool->start;
+    if (blockSize < INIT_BLOCK_SIZE)
+      blockSize = INIT_BLOCK_SIZE;
+    else
+      blockSize *= 2;
+    tem = malloc(offsetof(BLOCK, s) + blockSize);
+    if (!tem)
+      return 0;
+    tem->next = pool->blocks;
+    pool->blocks = tem;
+    memcpy(tem->s, pool->start, pool->ptr - pool->start);
+    pool->ptr = tem->s + (pool->ptr - pool->start);
+    pool->start = tem->s;
+    pool->end = tem->s + blockSize;
+  }
+  return 1;
+}
+
+static int dtdInit(DTD *dtd)
+{
+  static const char *names[] = { "lt", "amp", "gt", "quot", "apos" };
+  static const char chars[] = { '<', '&', '>', '"', '\'' };
+  int i;
+
+  poolInit(&(dtd->pool));
+  hashTableInit(&(dtd->generalEntities));
+  for (i = 0; i < 5; i++) {
+    ENTITY *entity = (ENTITY *)lookup(&(dtd->generalEntities), names[i], sizeof(ENTITY));
+    if (!entity)
+      return 0;
+    entity->textPtr = chars + i;
+    entity->textLen = 1;
+    entity->magic = 1;
+    entity->wfInContent = 1;
+    entity->wfInAttribute = 1;
+  }
+  hashTableInit(&(dtd->paramEntities));
+  dtd->containsRef = 0;
+  dtd->groupSize = 0;
+  dtd->groupConnector = 0;
+  return 1;
+}
+
+static void dtdDestroy(DTD *dtd)
+{
+  poolDestroy(&(dtd->pool));
+  hashTableDestroy(&(dtd->generalEntities));
+  hashTableDestroy(&(dtd->paramEntities));
+  free(dtd->groupConnector);
+}
+
+static
+enum WfCheckResult storeEntity(DTD *dtd,
+			       const ENCODING *enc,
+			       int isParam,
+			       const char *entityNamePtr,
+			       const char *entityNameEnd,
+			       const char *entityTextPtr,
+			       const char *entityTextEnd,
+			       const char **badPtr)
+{
+  ENTITY *entity;
+  const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
+  STRING_POOL *pool = &(dtd->pool);
+  if (!poolStoreString(pool, enc, entityNamePtr, entityNameEnd))
+    return noMemory;
+  entity = (ENTITY *)lookup(isParam ? &(dtd->paramEntities) : &(dtd->generalEntities),
+			    pool->start,
+			    sizeof(ENTITY));
+  if (entity->name != pool->start) {
+    poolDiscard(pool);
+    entityNamePtr = 0;
+  }
+  else
+    poolFinish(pool);
+  entityTextPtr += enc->minBytesPerChar;
+  entityTextEnd -= enc->minBytesPerChar;
+  entity->docTextPtr = entityTextPtr;
+  for (;;) {
+    const char *next;
+    int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
+    switch (tok) {
+    case XML_TOK_PARAM_ENTITY_REF:
+      *badPtr = entityTextPtr;
+      return syntaxError;
+    case XML_TOK_NONE:
+      if (entityNamePtr) {
+	entity->textPtr = pool->start;
+	entity->textLen = pool->ptr - pool->start;
+	poolFinish(pool);
+      }
+      else
+	poolDiscard(pool);
+      return wellFormed;
+    case XML_TOK_ENTITY_REF:
+    case XML_TOK_DATA_CHARS:
+      if (!poolAppend(pool, enc, entityTextPtr, next))
+	return noMemory;
+      break;
+    case XML_TOK_TRAILING_CR:
+      next = entityTextPtr + enc->minBytesPerChar;
+      /* fall through */
+    case XML_TOK_DATA_NEWLINE:
+      if (pool->end == pool->ptr && !poolGrow(pool))
+	return noMemory;
+      *(pool->ptr)++ = '\n';
+      break;
+    case XML_TOK_CHAR_REF:
+      {
+	char buf[XML_MAX_BYTES_PER_CHAR];
+	int i;
+	int n = XmlCharRefNumber(enc, entityTextPtr);
+	if (n < 0) {
+	  *badPtr = entityTextPtr;
+	  return badCharRef;
+	}
+	n = XmlEncode(utf8, n, buf);
+	if (!n) {
+	  *badPtr = entityTextPtr;
+	  return badCharRef;
+	}
+	for (i = 0; i < n; i++) {
+	  if (pool->end == pool->ptr && !poolGrow(pool))
+	    return noMemory;
+	  *(pool->ptr)++ = buf[i];
+	}
+      }
+      break;
+    case XML_TOK_PARTIAL:
+      *badPtr = entityTextPtr;
+      return invalidToken;
+    case XML_TOK_INVALID:
+      *badPtr = next;
+      return invalidToken;
+    default:
+      abort();
+    }
+    entityTextPtr = next;
+  }
+  /* not reached */
+}

Propchange: apr/apr-util/vendor/expat/current/xmlwf/wfcheck.c
------------------------------------------------------------------------------
    svn:executable = 

Added: apr/apr-util/vendor/expat/current/xmlwf/wfcheck.h
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/xmlwf/wfcheck.h?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/xmlwf/wfcheck.h (added)
+++ apr/apr-util/vendor/expat/current/xmlwf/wfcheck.h Wed Sep 29 08:16:58 2010
@@ -0,0 +1,38 @@
+
+#include <stddef.h>
+
+enum WfCheckResult {
+  wellFormed,
+  noMemory,
+  syntaxError,
+  noElements,
+  invalidToken,
+  unclosedToken,
+  partialChar,
+  tagMismatch,
+  duplicateAttribute,
+  junkAfterDocElement,
+  paramEntityRef,
+  undefinedEntity,
+  recursiveEntityRef,
+  asyncEntity,
+  badCharRef,
+  binaryEntityRef,
+  attributeExternalEntityRef,
+  misplacedXmlPi,
+  unknownEncoding,
+  incorrectEncoding
+};
+
+enum EntityType {
+  documentEntity,
+  generalTextEntity
+};
+
+enum WfCheckResult wfCheck(enum EntityType entityType,
+			   const char *s, size_t n,
+			   const char **errorPtr,
+			   unsigned long *errorLineNumber,
+			   unsigned long *errorColNumber);
+const char *wfCheckMessage(enum WfCheckResult);
+

Propchange: apr/apr-util/vendor/expat/current/xmlwf/wfcheck.h
------------------------------------------------------------------------------
    svn:executable = 

Added: apr/apr-util/vendor/expat/current/xmlwf/wfcheckmessage.c
URL: http://svn.apache.org/viewvc/apr/apr-util/vendor/expat/current/xmlwf/wfcheckmessage.c?rev=1002512&view=auto
==============================================================================
--- apr/apr-util/vendor/expat/current/xmlwf/wfcheckmessage.c (added)
+++ apr/apr-util/vendor/expat/current/xmlwf/wfcheckmessage.c Wed Sep 29 08:16:58 2010
@@ -0,0 +1,30 @@
+#include "wfcheck.h"
+
+const char *wfCheckMessage(enum WfCheckResult result)
+{
+  static const char *message[] = {
+    0,
+    "out of memory",
+    "syntax error",
+    "no element found",
+    "not well-formed",
+    "unclosed token",
+    "unclosed token",
+    "mismatched tag",
+    "duplicate attribute",
+    "junk after document element",
+    "parameter entity reference not allowed within declaration in internal subset",
+    "undefined entity",
+    "recursive entity reference",
+    "asynchronous entity",
+    "reference to invalid character number",
+    "reference to binary entity",
+    "reference to external entity in attribute",
+    "xml pi not at start of external entity",
+    "unknown encoding",
+    "encoding specified in XML declaration is incorrect"
+  };
+  if (result > 0 && result < sizeof(message)/sizeof(message[0]))
+    return message[result];
+  return 0;
+}

Propchange: apr/apr-util/vendor/expat/current/xmlwf/wfcheckmessage.c
------------------------------------------------------------------------------
    svn:executable = 



Mime
View raw message