Return-Path: X-Original-To: apmail-corinthia-commits-archive@minotaur.apache.org Delivered-To: apmail-corinthia-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 7204CCD5D for ; Tue, 30 Dec 2014 14:45:24 +0000 (UTC) Received: (qmail 84055 invoked by uid 500); 30 Dec 2014 14:45:24 -0000 Delivered-To: apmail-corinthia-commits-archive@corinthia.apache.org Received: (qmail 84036 invoked by uid 500); 30 Dec 2014 14:45:24 -0000 Mailing-List: contact commits-help@corinthia.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@corinthia.incubator.apache.org Delivered-To: mailing list commits@corinthia.incubator.apache.org Received: (qmail 84027 invoked by uid 99); 30 Dec 2014 14:45:24 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 30 Dec 2014 14:45:24 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED,T_RP_MATCHES_RCVD X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO mail.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with SMTP; Tue, 30 Dec 2014 14:44:30 +0000 Received: (qmail 81796 invoked by uid 99); 30 Dec 2014 14:44:26 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 30 Dec 2014 14:44:26 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id EF7F1A39B52; Tue, 30 Dec 2014 14:44:25 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: pmkelly@apache.org To: commits@corinthia.incubator.apache.org Date: Tue, 30 Dec 2014 14:44:38 -0000 Message-Id: <6eb7edb957874a92879b9ba8ea4ea5f8@git.apache.org> In-Reply-To: <37b7c4a06d7b497db8bc2e76f585a136@git.apache.org> References: <37b7c4a06d7b497db8bc2e76f585a136@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [14/63] [abbrv] [partial] incubator-corinthia git commit: Moved 3rdparty to platform This is the first part of isolating 3rdparty within platform. The idea is that only platform.h should be used outside platform platform.h might contain includes to inter X-Virus-Checked: Checked by ClamAV on apache.org http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/ac70cb0e/DocFormats/3rdparty/external/w3c-tidy-html5/src/streamio.c ---------------------------------------------------------------------- diff --git a/DocFormats/3rdparty/external/w3c-tidy-html5/src/streamio.c b/DocFormats/3rdparty/external/w3c-tidy-html5/src/streamio.c deleted file mode 100644 index eac773c..0000000 --- a/DocFormats/3rdparty/external/w3c-tidy-html5/src/streamio.c +++ /dev/null @@ -1,1401 +0,0 @@ -/* streamio.c -- handles character stream I/O - - (c) 1998-2008 (W3C) MIT, ERCIM, Keio University - See tidy.h for the copyright notice. - - Wrapper around Tidy input source and output sink - that calls appropriate interfaces, and applies - necessary char encoding transformations: to/from - ISO-10646 and/or UTF-8. - -*/ - -#include -#include - -#include "streamio.h" -#include "tidy-int.h" -#include "lexer.h" -#include "message.h" -#include "utf8.h" -#include "tmbstr.h" - -#ifdef TIDY_WIN32_MLANG_SUPPORT -#include "win32tc.h" -#endif - -/************************ -** Forward Declarations -************************/ - -static uint ReadCharFromStream( StreamIn* in ); - -static uint ReadByte( StreamIn* in ); -static void UngetByte( StreamIn* in, uint byteValue ); - -static void PutByte( uint byteValue, StreamOut* out ); - -static void EncodeWin1252( uint c, StreamOut* out ); -static void EncodeMacRoman( uint c, StreamOut* out ); -static void EncodeIbm858( uint c, StreamOut* out ); -static void EncodeLatin0( uint c, StreamOut* out ); - -static uint DecodeIbm850(uint c); -static uint DecodeLatin0(uint c); - -static uint PopChar( StreamIn *in ); - -/****************************** -** Static (duration) Globals -******************************/ - -static StreamOut stderrStreamOut = -{ - ASCII, - FSM_ASCII, - DEFAULT_NL_CONFIG, -#ifdef TIDY_WIN32_MLANG_SUPPORT - NULL, -#endif - FileIO, - { 0, TY_(filesink_putByte) } -}; - -static StreamOut stdoutStreamOut = -{ - ASCII, - FSM_ASCII, - DEFAULT_NL_CONFIG, -#ifdef TIDY_WIN32_MLANG_SUPPORT - NULL, -#endif - FileIO, - { 0, TY_(filesink_putByte) } -}; - -StreamOut* TY_(StdErrOutput)(void) -{ - if ( stderrStreamOut.sink.sinkData == 0 ) - stderrStreamOut.sink.sinkData = stderr; - return &stderrStreamOut; -} - -#if 0 -StreamOut* TY_(StdOutOutput)(void) -{ - if ( stdoutStreamOut.sink.sinkData == 0 ) - stdoutStreamOut.sink.sinkData = stdout; - return &stdoutStreamOut; -} -#endif - -void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out ) -{ - if ( out && out != &stderrStreamOut && out != &stdoutStreamOut ) - { - if ( out->iotype == FileIO ) - fclose( (FILE*) out->sink.sinkData ); - TidyDocFree( doc, out ); - } -} - -/************************ -** Source -************************/ - -static void InitLastPos( StreamIn *in ); - -StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding ) -{ - StreamIn *in = (StreamIn*) TidyDocAlloc( doc, sizeof(StreamIn) ); - - TidyClearMemory( in, sizeof(StreamIn) ); - in->curline = 1; - in->curcol = 1; - in->encoding = encoding; - in->state = FSM_ASCII; - in->doc = doc; - in->bufsize = CHARBUF_SIZE; - in->allocator = doc->allocator; - in->charbuf = (tchar*)TidyDocAlloc(doc, sizeof(tchar) * in->bufsize); - InitLastPos( in ); -#ifdef TIDY_STORE_ORIGINAL_TEXT - in->otextbuf = NULL; - in->otextlen = 0; - in->otextsize = 0; -#endif - return in; -} - -void TY_(freeStreamIn)(StreamIn* in) -{ -#ifdef TIDY_STORE_ORIGINAL_TEXT - if (in->otextbuf) - TidyFree(in->allocator, in->otextbuf); -#endif - TidyFree(in->allocator, in->charbuf); - TidyFree(in->allocator, in); -} - -StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE *fp, int encoding ) -{ - StreamIn *in = TY_(initStreamIn)( doc, encoding ); - if ( TY_(initFileSource)( doc->allocator, &in->source, fp ) != 0 ) - { - TY_(freeStreamIn)( in ); - return NULL; - } - in->iotype = FileIO; - return in; -} - -StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* buf, int encoding ) -{ - StreamIn *in = TY_(initStreamIn)( doc, encoding ); - tidyInitInputBuffer( &in->source, buf ); - in->iotype = BufferIO; - return in; -} - -StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding ) -{ - StreamIn *in = TY_(initStreamIn)( doc, encoding ); - memcpy( &in->source, source, sizeof(TidyInputSource) ); - in->iotype = UserIO; - return in; -} - -int TY_(ReadBOMEncoding)(StreamIn *in) -{ - uint c, c1; -#if SUPPORT_UTF16_ENCODINGS - uint bom; -#endif - - c = ReadByte(in); - if (c == EndOfStream) - return -1; - - c1 = ReadByte( in ); - if (c1 == EndOfStream) - { - UngetByte(in, c); - return -1; - } - - /* todo: dont warn about mismatch for auto input encoding */ - /* todo: let the user override the encoding found here */ - -#if SUPPORT_UTF16_ENCODINGS - bom = (c << 8) + c1; - - if ( bom == UNICODE_BOM_BE ) - { - /* big-endian UTF-16 */ - if ( in->encoding != UTF16 && in->encoding != UTF16BE ) - TY_(ReportEncodingWarning)(in->doc, ENCODING_MISMATCH, UTF16BE); - - return UTF16BE; /* return decoded BOM */ - } - else if (bom == UNICODE_BOM_LE) - { - /* little-endian UTF-16 */ - if (in->encoding != UTF16 && in->encoding != UTF16LE) - TY_(ReportEncodingWarning)(in->doc, ENCODING_MISMATCH, UTF16LE); - - return UTF16LE; /* return decoded BOM */ - } - else -#endif /* SUPPORT_UTF16_ENCODINGS */ - { - uint c2 = ReadByte(in); - - if (c2 == EndOfStream) - { - UngetByte(in, c1); - UngetByte(in, c); - return -1; - } - - if (((c << 16) + (c1 << 8) + c2) == UNICODE_BOM_UTF8) - { - /* UTF-8 */ - if (in->encoding != UTF8) - TY_(ReportEncodingWarning)(in->doc, ENCODING_MISMATCH, UTF8); - - return UTF8; - } - else - UngetByte( in, c2 ); - } - - UngetByte(in, c1); - UngetByte(in, c); - - return -1; -} - -#ifdef TIDY_STORE_ORIGINAL_TEXT -void TY_(AddByteToOriginalText)(StreamIn *in, tmbchar c) -{ - if (in->otextlen + 1 >= in->otextsize) - { - size_t size = in->otextsize ? 1 : 2; - in->otextbuf = TidyRealloc(in->allocator, in->otextbuf, in->otextsize + size); - in->otextsize += size; - } - in->otextbuf[in->otextlen++] = c; - in->otextbuf[in->otextlen ] = 0; -} - -void TY_(AddCharToOriginalText)(StreamIn *in, tchar c) -{ - int i, err, count = 0; - tmbchar buf[10] = {0}; - - err = TY_(EncodeCharToUTF8Bytes)(c, buf, NULL, &count); - - if (err) - { - /* replacement character 0xFFFD encoded as UTF-8 */ - buf[0] = (byte) 0xEF; - buf[1] = (byte) 0xBF; - buf[2] = (byte) 0xBD; - count = 3; - } - - for (i = 0; i < count; ++i) - TY_(AddByteToOriginalText)(in, buf[i]); -} -#endif - -static void InitLastPos( StreamIn *in ) -{ - in->curlastpos = 0; - in->firstlastpos = 0; -} - -static void PopLastPos( StreamIn *in ) -{ - in->curlastpos = (in->curlastpos+1)%LASTPOS_SIZE; - if ( in->curlastpos == in->firstlastpos ) - in->firstlastpos = (in->firstlastpos+1)%LASTPOS_SIZE; -} - -static void SaveLastPos( StreamIn *in ) -{ - PopLastPos( in ); - in->lastcols[in->curlastpos] = in->curcol; -} - -static void RestoreLastPos( StreamIn *in ) -{ - if ( in->firstlastpos == in->curlastpos ) - in->curcol = 0; - else - { - in->curcol = in->lastcols[in->curlastpos]; - if ( in->curlastpos == 0 ) - in->curlastpos = LASTPOS_SIZE; - in->curlastpos--; - } -} - -uint TY_(ReadChar)( StreamIn *in ) -{ - uint c = EndOfStream; - uint tabsize = cfg( in->doc, TidyTabSize ); -#ifdef TIDY_STORE_ORIGINAL_TEXT - Bool added = no; -#endif - - if ( in->pushed ) - return PopChar( in ); - - SaveLastPos( in ); - - if ( in->tabs > 0 ) - { - in->curcol++; - in->tabs--; - return ' '; - } - - for (;;) - { - c = ReadCharFromStream(in); - - if ( EndOfStream == c ) - return EndOfStream; - - if (c == '\n') - { -#ifdef TIDY_STORE_ORIGINAL_TEXT - added = yes; - TY_(AddCharToOriginalText)(in, (tchar)c); -#endif - in->curcol = 1; - in->curline++; - break; - } - - if (c == '\t') - { -#ifdef TIDY_STORE_ORIGINAL_TEXT - added = yes; - TY_(AddCharToOriginalText)(in, (tchar)c); -#endif - in->tabs = tabsize > 0 ? - tabsize - ((in->curcol - 1) % tabsize) - 1 - : 0; - in->curcol++; - c = ' '; - break; - } - - /* #427663 - map '\r' to '\n' - Andy Quick 11 Aug 00 */ - if (c == '\r') - { -#ifdef TIDY_STORE_ORIGINAL_TEXT - added = yes; - TY_(AddCharToOriginalText)(in, (tchar)c); -#endif - c = ReadCharFromStream(in); - if (c != '\n') - { - TY_(UngetChar)( c, in ); - c = '\n'; - } - else - { -#ifdef TIDY_STORE_ORIGINAL_TEXT - TY_(AddCharToOriginalText)(in, (tchar)c); -#endif - } - in->curcol = 1; - in->curline++; - break; - } - -#ifndef NO_NATIVE_ISO2022_SUPPORT - /* strip control characters, except for Esc */ - if (c == '\033') - break; -#endif - - /* Form Feed is allowed in HTML */ - if ( c == '\015' && !cfgBool(in->doc, TidyXmlTags) ) - break; - - if ( c < 32 ) - continue; /* discard control char */ - - /* watch out for chars that have already been decoded such as */ - /* IS02022, UTF-8 etc, that don't require further decoding */ - - if ( - in->encoding == RAW -#ifndef NO_NATIVE_ISO2022_SUPPORT - || in->encoding == ISO2022 -#endif - || in->encoding == UTF8 - -#if SUPPORT_ASIAN_ENCODINGS - || in->encoding == SHIFTJIS /* #431953 - RJ */ - || in->encoding == BIG5 /* #431953 - RJ */ -#endif - ) - { - in->curcol++; - break; - } - -#if SUPPORT_UTF16_ENCODINGS - /* handle surrogate pairs */ - if ( in->encoding == UTF16LE || - in->encoding == UTF16 || - in->encoding == UTF16BE ) - { - if ( !TY_(IsValidUTF16FromUCS4)(c) ) - { - /* invalid UTF-16 value */ - TY_(ReportEncodingError)(in->doc, INVALID_UTF16, c, yes); - c = 0; - } - else if ( TY_(IsLowSurrogate)(c) ) - { - uint n = c; - uint m = ReadCharFromStream( in ); - if ( m == EndOfStream ) - return EndOfStream; - - c = 0; - if ( TY_(IsHighSurrogate)(m) ) - { - n = TY_(CombineSurrogatePair)( m, n ); - if ( TY_(IsValidCombinedChar)(n) ) - c = n; - } - /* not a valid pair */ - if ( 0 == c ) - TY_(ReportEncodingError)( in->doc, INVALID_UTF16, c, yes ); - } - } -#endif - - /* Do first: acts on range 128 - 255 */ - switch ( in->encoding ) - { - case MACROMAN: - c = TY_(DecodeMacRoman)( c ); - break; - case IBM858: - c = DecodeIbm850( c ); - break; - case LATIN0: - c = DecodeLatin0( c ); - break; - } - - /* produced e.g. as a side-effect of smart quotes in Word */ - /* but can't happen if using MACROMAN encoding */ - if ( 127 < c && c < 160 ) - { - uint c1 = 0, replMode = DISCARDED_CHAR; - Bool isVendorChar = ( in->encoding == WIN1252 || - in->encoding == MACROMAN ); - Bool isWinChar = ( in->encoding == WIN1252 || - TY_(ReplacementCharEncoding) == WIN1252 ); - Bool isMacChar = ( in->encoding == MACROMAN || - TY_(ReplacementCharEncoding) == MACROMAN ); - - /* set error position just before offending character */ - if (in->doc->lexer) - { - in->doc->lexer->lines = in->curline; - in->doc->lexer->columns = in->curcol; - } - - if ( isWinChar ) - c1 = TY_(DecodeWin1252)( c ); - else if ( isMacChar ) - c1 = TY_(DecodeMacRoman)( c ); - if ( c1 ) - replMode = REPLACED_CHAR; - - if ( c1 == 0 && isVendorChar ) - TY_(ReportEncodingError)(in->doc, VENDOR_SPECIFIC_CHARS, c, replMode == DISCARDED_CHAR); - else if ( ! isVendorChar ) - TY_(ReportEncodingError)(in->doc, INVALID_SGML_CHARS, c, replMode == DISCARDED_CHAR); - - c = c1; - } - - if ( c == 0 ) - continue; /* illegal char is discarded */ - - in->curcol++; - break; - } - -#ifdef TIDY_STORE_ORIGINAL_TEXT - if (!added) - TY_(AddCharToOriginalText)(in, (tchar)c); -#endif - - return c; -} - -static uint PopChar( StreamIn *in ) -{ - uint c = EndOfStream; - if ( in->pushed ) - { - assert( in->bufpos > 0 ); - c = in->charbuf[ --in->bufpos ]; - if ( in->bufpos == 0 ) - in->pushed = no; - - if ( c == '\n' ) - { - in->curcol = 1; - in->curline++; - PopLastPos( in ); - return c; - } - in->curcol++; - PopLastPos( in ); - } - return c; -} - -void TY_(UngetChar)( uint c, StreamIn *in ) -{ - if (c == EndOfStream) - { - /* fprintf(stderr, "Attempt to UngetChar EOF\n"); */ - return; - } - - in->pushed = yes; - - if (in->bufpos + 1 >= in->bufsize) - in->charbuf = (tchar*)TidyRealloc(in->allocator, in->charbuf, sizeof(tchar) * ++(in->bufsize)); - - in->charbuf[(in->bufpos)++] = c; - - if (c == '\n') - --(in->curline); - - RestoreLastPos( in ); -} - - - -/************************ -** Sink -************************/ - -static StreamOut* initStreamOut( TidyDocImpl* doc, int encoding, uint nl ) -{ - StreamOut* out = (StreamOut*) TidyDocAlloc( doc, sizeof(StreamOut) ); - TidyClearMemory( out, sizeof(StreamOut) ); - out->encoding = encoding; - out->state = FSM_ASCII; - out->nl = nl; - return out; -} - -StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint nl ) -{ - StreamOut* out = initStreamOut( doc, encoding, nl ); - TY_(initFileSink)( &out->sink, fp ); - out->iotype = FileIO; - return out; -} -StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint nl ) -{ - StreamOut* out = initStreamOut( doc, encoding, nl ); - tidyInitOutputBuffer( &out->sink, buf ); - out->iotype = BufferIO; - return out; -} -StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint nl ) -{ - StreamOut* out = initStreamOut( doc, encoding, nl ); - memcpy( &out->sink, sink, sizeof(TidyOutputSink) ); - out->iotype = UserIO; - return out; -} - -void TY_(WriteChar)( uint c, StreamOut* out ) -{ - /* Translate outgoing newlines */ - if ( LF == c ) - { - if ( out->nl == TidyCRLF ) - TY_(WriteChar)( CR, out ); - else if ( out->nl == TidyCR ) - c = CR; - } - - if (out->encoding == MACROMAN) - { - EncodeMacRoman( c, out ); - } - else if (out->encoding == WIN1252) - { - EncodeWin1252( c, out ); - } - else if (out->encoding == IBM858) - { - EncodeIbm858( c, out ); - } - else if (out->encoding == LATIN0) - { - EncodeLatin0( c, out ); - } - - else if (out->encoding == UTF8) - { - int count = 0; - - TY_(EncodeCharToUTF8Bytes)( c, NULL, &out->sink, &count ); - if (count <= 0) - { - /* TY_(ReportEncodingError)(in->lexer, INVALID_UTF8 | REPLACED_CHAR, c); */ - /* replacement char 0xFFFD encoded as UTF-8 */ - PutByte(0xEF, out); PutByte(0xBF, out); PutByte(0xBF, out); - } - } -#ifndef NO_NATIVE_ISO2022_SUPPORT - else if (out->encoding == ISO2022) - { - if (c == 0x1b) /* ESC */ - out->state = FSM_ESC; - else - { - switch (out->state) - { - case FSM_ESC: - if (c == '$') - out->state = FSM_ESCD; - else if (c == '(') - out->state = FSM_ESCP; - else - out->state = FSM_ASCII; - break; - - case FSM_ESCD: - if (c == '(') - out->state = FSM_ESCDP; - else - out->state = FSM_NONASCII; - break; - - case FSM_ESCDP: - out->state = FSM_NONASCII; - break; - - case FSM_ESCP: - out->state = FSM_ASCII; - break; - - case FSM_NONASCII: - c &= 0x7F; - break; - - case FSM_ASCII: - break; - } - } - - PutByte(c, out); - } -#endif /* NO_NATIVE_ISO2022_SUPPORT */ - -#if SUPPORT_UTF16_ENCODINGS - else if ( out->encoding == UTF16LE || - out->encoding == UTF16BE || - out->encoding == UTF16 ) - { - int i, numChars = 1; - uint theChars[2]; - - if ( !TY_(IsValidUTF16FromUCS4)(c) ) - { - /* invalid UTF-16 value */ - /* TY_(ReportEncodingError)(in->lexer, INVALID_UTF16 | DISCARDED_CHAR, c); */ - c = 0; - numChars = 0; - } - else if ( TY_(IsCombinedChar)(c) ) - { - /* output both, unless something goes wrong */ - numChars = 2; - if ( !TY_(SplitSurrogatePair)(c, &theChars[0], &theChars[1]) ) - { - /* TY_(ReportEncodingError)(in->lexer, INVALID_UTF16 | DISCARDED_CHAR, c); */ - c = 0; - numChars = 0; - } - } - else - { - /* just put the char out */ - theChars[0] = c; - } - - for (i = 0; i < numChars; i++) - { - c = theChars[i]; - - if (out->encoding == UTF16LE) - { - uint ch = c & 0xFF; PutByte(ch, out); - ch = (c >> 8) & 0xFF; PutByte(ch, out); - } - - else if (out->encoding == UTF16BE || out->encoding == UTF16) - { - uint ch = (c >> 8) & 0xFF; PutByte(ch, out); - ch = c & 0xFF; PutByte(ch, out); - } - } - } -#endif - -#if SUPPORT_ASIAN_ENCODINGS - else if (out->encoding == BIG5 || out->encoding == SHIFTJIS) - { - if (c < 128) - PutByte(c, out); - else - { - uint ch = (c >> 8) & 0xFF; PutByte(ch, out); - ch = c & 0xFF; PutByte(ch, out); - } - } -#endif - - else - PutByte( c, out ); -} - - - -/**************************** -** Miscellaneous / Helpers -****************************/ - -/* char encoding used when replacing illegal SGML chars, -** regardless of specified encoding. Set at compile time -** to either Windows or Mac. -*/ -const int TY_(ReplacementCharEncoding) = DFLT_REPL_CHARENC; - - -/* Mapping for Windows Western character set CP 1252 -** (chars 128-159/U+0080-U+009F) to Unicode. -*/ -static const uint Win2Unicode[32] = -{ - 0x20AC, 0x0000, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, - 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x0000, 0x017D, 0x0000, - 0x0000, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, - 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x0000, 0x017E, 0x0178 -}; - -/* Function for conversion from Windows-1252 to Unicode */ -uint TY_(DecodeWin1252)(uint c) -{ - if (127 < c && c < 160) - c = Win2Unicode[c - 128]; - - return c; -} - -static void EncodeWin1252( uint c, StreamOut* out ) -{ - if (c < 128 || (c > 159 && c < 256)) - PutByte(c, out); - else - { - int i; - - for (i = 128; i < 160; i++) - if (Win2Unicode[i - 128] == c) - { - PutByte(i, out); - break; - } - } -} - -/* - John Love-Jensen contributed this table for mapping MacRoman - character set to Unicode -*/ - -/* modified to only need chars 128-255/U+0080-U+00FF - Terry Teague 19 Aug 01 */ -static const uint Mac2Unicode[128] = -{ - /* x7F = DEL */ - - 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, - 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, - - 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, - 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, - - 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, - 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, - - 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, - /* =BD U+2126 OHM SIGN */ - 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, - - 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, - 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, - - 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, - /* =DB U+00A4 CURRENCY SIGN */ - 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, - - 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, - 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, - /* xF0 = Apple Logo */ - /* =F0 U+2665 BLACK HEART SUIT */ - 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, - 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7 -}; - -/* Function to convert from MacRoman to Unicode */ -uint TY_(DecodeMacRoman)(uint c) -{ - if (127 < c) - c = Mac2Unicode[c - 128]; - return c; -} - -static void EncodeMacRoman( uint c, StreamOut* out ) -{ - if (c < 128) - PutByte(c, out); - else - { - /* For mac users, map Unicode back to MacRoman. */ - int i; - for (i = 128; i < 256; i++) - { - if (Mac2Unicode[i - 128] == c) - { - PutByte(i, out); - break; - } - } - } -} - -/* Mapping for OS/2 Western character set CP 850 -** (chars 128-255) to Unicode. -*/ -static const uint IBM2Unicode[128] = -{ - 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, - 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, - 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, - 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192, - 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, - 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, - 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0, - 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510, - 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3, - 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4, - 0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x20AC, 0x00cd, 0x00ce, - 0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580, - 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe, - 0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4, - 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8, - 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0 -}; - -/* Function for conversion from OS/2-850 to Unicode */ -static uint DecodeIbm850(uint c) -{ - if (127 < c && c < 256) - c = IBM2Unicode[c - 128]; - - return c; -} - -/* For OS/2,Java users, map Unicode back to IBM858 (IBM850+Euro). */ -static void EncodeIbm858( uint c, StreamOut* out ) -{ - if (c < 128) - PutByte(c, out); - else - { - int i; - for (i = 128; i < 256; i++) - { - if (IBM2Unicode[i - 128] == c) - { - PutByte(i, out); - break; - } - } - } -} - - -/* Convert from Latin0 (aka Latin9, ISO-8859-15) to Unicode */ -static uint DecodeLatin0(uint c) -{ - if (159 < c && c < 191) - { - switch (c) - { - case 0xA4: c = 0x20AC; break; - case 0xA6: c = 0x0160; break; - case 0xA8: c = 0x0161; break; - case 0xB4: c = 0x017D; break; - case 0xB8: c = 0x017E; break; - case 0xBC: c = 0x0152; break; - case 0xBD: c = 0x0153; break; - case 0xBE: c = 0x0178; break; - } - } - return c; -} - -/* Map Unicode back to ISO-8859-15. */ -static void EncodeLatin0( uint c, StreamOut* out ) -{ - switch (c) - { - case 0x20AC: c = 0xA4; break; - case 0x0160: c = 0xA6; break; - case 0x0161: c = 0xA8; break; - case 0x017D: c = 0xB4; break; - case 0x017E: c = 0xB8; break; - case 0x0152: c = 0xBC; break; - case 0x0153: c = 0xBD; break; - case 0x0178: c = 0xBE; break; - } - PutByte(c, out); -} - -/* - Table to map symbol font characters to Unicode; undefined - characters are mapped to 0x0000 and characters without any - Unicode equivalent are mapped to '?'. Is this appropriate? -*/ - -#if 0 -static const uint Symbol2Unicode[] = -{ - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, - - 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, - 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, - - 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220D, - 0x0028, 0x0029, 0x2217, 0x002B, 0x002C, 0x2212, 0x002E, 0x002F, - - 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, - 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, - - 0x2245, 0x0391, 0x0392, 0x03A7, 0x0394, 0x0395, 0x03A6, 0x0393, - 0x0397, 0x0399, 0x03D1, 0x039A, 0x039B, 0x039C, 0x039D, 0x039F, - - 0x03A0, 0x0398, 0x03A1, 0x03A3, 0x03A4, 0x03A5, 0x03C2, 0x03A9, - 0x039E, 0x03A8, 0x0396, 0x005B, 0x2234, 0x005D, 0x22A5, 0x005F, - - 0x00AF, 0x03B1, 0x03B2, 0x03C7, 0x03B4, 0x03B5, 0x03C6, 0x03B3, - 0x03B7, 0x03B9, 0x03D5, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BF, - - 0x03C0, 0x03B8, 0x03C1, 0x03C3, 0x03C4, 0x03C5, 0x03D6, 0x03C9, - 0x03BE, 0x03C8, 0x03B6, 0x007B, 0x007C, 0x007D, 0x223C, 0x003F, - - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - - 0x00A0, 0x03D2, 0x2032, 0x2264, 0x2044, 0x221E, 0x0192, 0x2663, - 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193, - - 0x00B0, 0x00B1, 0x2033, 0x2265, 0x00D7, 0x221D, 0x2202, 0x00B7, - 0x00F7, 0x2260, 0x2261, 0x2248, 0x2026, 0x003F, 0x003F, 0x21B5, - - 0x2135, 0x2111, 0x211C, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229, - 0x222A, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209, - - 0x2220, 0x2207, 0x00AE, 0x00A9, 0x2122, 0x220F, 0x221A, 0x22C5, - 0x00AC, 0x2227, 0x2228, 0x21D4, 0x21D0, 0x21D1, 0x21D2, 0x21D3, - - 0x25CA, 0x2329, 0x00AE, 0x00A9, 0x2122, 0x2211, 0x003F, 0x003F, - 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, - - 0x20AC, 0x232A, 0x222B, 0x2320, 0x003F, 0x2321, 0x003F, 0x003F, - 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F, 0x003F -}; - -/* Function to convert from Symbol Font chars to Unicode */ -uint DecodeSymbolFont(uint c) -{ - if (c > 255) - return c; - - /* todo: add some error message */ - - return Symbol2Unicode[c]; -} -#endif - - -/* Facilitates user defined source by providing -** an entry point to marshal pointers-to-functions. -** Needed by .NET and possibly other language bindings. -*/ -Bool TIDY_CALL tidyInitSource( TidyInputSource* source, - void* srcData, - TidyGetByteFunc gbFunc, - TidyUngetByteFunc ugbFunc, - TidyEOFFunc endFunc ) -{ - Bool status = ( source && srcData && gbFunc && ugbFunc && endFunc ); - - if ( status ) - { - source->sourceData = srcData; - source->getByte = gbFunc; - source->ungetByte = ugbFunc; - source->eof = endFunc; - } - - return status; -} - -Bool TIDY_CALL tidyInitSink( TidyOutputSink* sink, - void* snkData, - TidyPutByteFunc pbFunc ) -{ - Bool status = ( sink && snkData && pbFunc ); - if ( status ) - { - sink->sinkData = snkData; - sink->putByte = pbFunc; - } - return status; -} - -/* GetByte must return a byte value in a signed -** integer so that a negative value can signal EOF -** without interfering w/ 0-255 legitimate byte values. -*/ -uint TIDY_CALL tidyGetByte( TidyInputSource* source ) -{ - int bv = source->getByte( source->sourceData ); - return (uint) bv; -} -Bool TIDY_CALL tidyIsEOF( TidyInputSource* source ) -{ - return source->eof( source->sourceData ); -} -void TIDY_CALL tidyUngetByte( TidyInputSource* source, uint ch ) -{ - source->ungetByte( source->sourceData, (byte) ch ); -} -void TIDY_CALL tidyPutByte( TidyOutputSink* sink, uint ch ) -{ - sink->putByte( sink->sinkData, (byte) ch ); -} - -static uint ReadByte( StreamIn* in ) -{ - return tidyGetByte( &in->source ); -} -Bool TY_(IsEOF)( StreamIn* in ) -{ - return tidyIsEOF( &in->source ); -} -static void UngetByte( StreamIn* in, uint byteValue ) -{ - tidyUngetByte( &in->source, byteValue ); -} -static void PutByte( uint byteValue, StreamOut* out ) -{ - tidyPutByte( &out->sink, byteValue ); -} - -#if 0 -static void UngetRawBytesToStream( StreamIn *in, byte* buf, int *count ) -{ - int i; - - for (i = 0; i < *count; i++) - { - /* should never get here; testing for 0xFF, a valid char, is not a good idea */ - if ( in && TY_(IsEOF)(in) ) - { - /* fprintf(stderr,"Attempt to unget EOF in UngetRawBytesToStream\n"); */ - *count = -i; - return; - } - - in->source.ungetByte( in->source.sourceData, buf[i] ); - } -} - -/* - Read raw bytes from stream, return <= 0 if EOF; or if - "unget" is true, Unget the bytes to re-synchronize the input stream - Normally UTF-8 successor bytes are read using this routine. -*/ -static void ReadRawBytesFromStream( StreamIn *in, byte* buf, int *count ) -{ - int ix; - for ( ix=0; ix < *count; ++ix ) - { - if ( in->rawPushed ) - { - buf[ix] = in->rawBytebuf[ --in->rawBufpos ]; - if ( in->rawBufpos == 0 ) - in->rawPushed = no; - } - else - { - if ( in->source.eof(in->source.sourceData) ) - { - *count = -i; - break; - } - buf[ix] = in->source.getByte( in->source.sourceData ); - } - } -} -#endif /* 0 */ - -/* read char from stream */ -static uint ReadCharFromStream( StreamIn* in ) -{ - uint c, n; -#ifdef TIDY_WIN32_MLANG_SUPPORT - uint bytesRead = 0; -#endif - - if ( TY_(IsEOF)(in) ) - return EndOfStream; - - c = ReadByte( in ); - - if (c == EndOfStream) - return c; - -#ifndef NO_NATIVE_ISO2022_SUPPORT - /* - A document in ISO-2022 based encoding uses some ESC sequences - called "designator" to switch character sets. The designators - defined and used in ISO-2022-JP are: - - "ESC" + "(" + ? for ISO646 variants - - "ESC" + "$" + ? and - "ESC" + "$" + "(" + ? for multibyte character sets - - Where ? stands for a single character used to indicate the - character set for multibyte characters. - - Tidy handles this by preserving the escape sequence and - setting the top bit of each byte for non-ascii chars. This - bit is then cleared on output. The input stream keeps track - of the state to determine when to set/clear the bit. - */ - - if (in->encoding == ISO2022) - { - if (c == 0x1b) /* ESC */ - { - in->state = FSM_ESC; - return c; - } - - switch (in->state) - { - case FSM_ESC: - if (c == '$') - in->state = FSM_ESCD; - else if (c == '(') - in->state = FSM_ESCP; - else - in->state = FSM_ASCII; - break; - - case FSM_ESCD: - if (c == '(') - in->state = FSM_ESCDP; - else - in->state = FSM_NONASCII; - break; - - case FSM_ESCDP: - in->state = FSM_NONASCII; - break; - - case FSM_ESCP: - in->state = FSM_ASCII; - break; - - case FSM_NONASCII: - c |= 0x80; - break; - - case FSM_ASCII: - break; - } - - return c; - } -#endif /* #ifndef NO_NATIVE_ISO2022_SUPPORT */ - -#if SUPPORT_UTF16_ENCODINGS - if ( in->encoding == UTF16LE ) - { - uint c1 = ReadByte( in ); - if ( EndOfStream == c1 ) - return EndOfStream; - n = (c1 << 8) + c; - return n; - } - - if ((in->encoding == UTF16) || (in->encoding == UTF16BE)) /* UTF-16 is big-endian by default */ - { - uint c1 = ReadByte( in ); - if ( EndOfStream == c1 ) - return EndOfStream; - n = (c << 8) + c1; - return n; - } -#endif - - if ( in->encoding == UTF8 ) - { - /* deal with UTF-8 encoded char */ - - int err, count = 0; - - /* first byte "c" is passed in separately */ - err = TY_(DecodeUTF8BytesToChar)( &n, c, NULL, &in->source, &count ); - if (!err && (n == (uint)EndOfStream) && (count == 1)) /* EOF */ - return EndOfStream; - else if (err) - { - /* set error position just before offending character */ - in->doc->lexer->lines = in->curline; - in->doc->lexer->columns = in->curcol; - - TY_(ReportEncodingError)(in->doc, INVALID_UTF8, n, no); - n = 0xFFFD; /* replacement char */ - } - - return n; - } - -#if SUPPORT_ASIAN_ENCODINGS - /* - This section is suitable for any "multibyte" variable-width - character encoding in which a one-byte code is less than - 128, and the first byte of a two-byte code is greater or - equal to 128. Note that Big5 and ShiftJIS fit into this - kind, even though their second byte may be less than 128 - */ - if ((in->encoding == BIG5) || (in->encoding == SHIFTJIS)) - { - if (c < 128) - return c; - else if ((in->encoding == SHIFTJIS) && (c >= 0xa1 && c <= 0xdf)) /* 461643 - fix suggested by Rick Cameron 14 Sep 01 */ - { - /* - Rick Cameron pointed out that for Shift_JIS, the values from - 0xa1 through 0xdf represent singe-byte characters - (U+FF61 to U+FF9F - half-shift Katakana) - */ - return c; - } - else - { - uint c1 = ReadByte( in ); - if ( EndOfStream == c1 ) - return EndOfStream; - n = (c << 8) + c1; - return n; - } - } -#endif - -#ifdef TIDY_WIN32_MLANG_SUPPORT - else if (in->encoding > WIN32MLANG) - { - assert( in->mlang != NULL ); - return TY_(Win32MLangGetChar)((byte)c, in, &bytesRead); - } -#endif - - else - n = c; - - return n; -} - -/* Output a Byte Order Mark if required */ -void TY_(outBOM)( StreamOut *out ) -{ - if ( out->encoding == UTF8 -#if SUPPORT_UTF16_ENCODINGS - || out->encoding == UTF16LE - || out->encoding == UTF16BE - || out->encoding == UTF16 -#endif - ) - { - /* this will take care of encoding the BOM correctly */ - TY_(WriteChar)( UNICODE_BOM, out ); - } -} - -/* this is in intermediate fix for various problems in the */ -/* long term code and data in charsets.c should be used */ -static struct _enc2iana -{ - uint id; - ctmbstr name; - ctmbstr tidyOptName; -} const enc2iana[] = -{ - { ASCII, "us-ascii", "ascii" }, - { LATIN0, "iso-8859-15", "latin0" }, - { LATIN1, "iso-8859-1", "latin1" }, - { UTF8, "utf-8", "utf8" }, - { MACROMAN, "macintosh", "mac" }, - { WIN1252, "windows-1252", "win1252" }, - { IBM858, "ibm00858", "ibm858" }, -#if SUPPORT_UTF16_ENCODINGS - { UTF16LE, "utf-16", "utf16le" }, - { UTF16BE, "utf-16", "utf16be" }, - { UTF16, "utf-16", "utf16" }, -#endif -#if SUPPORT_ASIAN_ENCODINGS - { BIG5, "big5", "big5" }, - { SHIFTJIS, "shift_jis", "shiftjis"}, -#endif -#ifndef NO_NATIVE_ISO2022_SUPPORT - { ISO2022, NULL, "iso2022" }, -#endif - { RAW, NULL, "raw" } -}; - -ctmbstr TY_(GetEncodingNameFromTidyId)(uint id) -{ - uint i; - - for (i = 0; enc2iana[i].name; ++i) - if (enc2iana[i].id == id) - return enc2iana[i].name; - - return NULL; -} - -ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id) -{ - uint i; - - for (i = 0; i < sizeof(enc2iana)/sizeof(enc2iana[0]); ++i) - if (enc2iana[i].id == id) - return enc2iana[i].tidyOptName; - - return NULL; -} - -int TY_(GetCharEncodingFromOptName)( ctmbstr charenc ) -{ - uint i; - - for (i = 0; i < sizeof(enc2iana)/sizeof(enc2iana[0]); ++i) - if (TY_(tmbstrcasecmp)(charenc, enc2iana[i].tidyOptName) == 0 ) - return enc2iana[i].id; - - return -1; -} - -/* - * local variables: - * mode: c - * indent-tabs-mode: nil - * c-basic-offset: 4 - * eval: (c-set-offset 'substatement-open 0) - * end: - */ http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/ac70cb0e/DocFormats/3rdparty/external/w3c-tidy-html5/src/streamio.h ---------------------------------------------------------------------- diff --git a/DocFormats/3rdparty/external/w3c-tidy-html5/src/streamio.h b/DocFormats/3rdparty/external/w3c-tidy-html5/src/streamio.h deleted file mode 100644 index 3afcdda..0000000 --- a/DocFormats/3rdparty/external/w3c-tidy-html5/src/streamio.h +++ /dev/null @@ -1,216 +0,0 @@ -#ifndef __STREAMIO_H__ -#define __STREAMIO_H__ - -/* streamio.h -- handles character stream I/O - - (c) 1998-2007 (W3C) MIT, ERCIM, Keio University - See tidy.h for the copyright notice. - - Wrapper around Tidy input source and output sink - that calls appropriate interfaces, and applies - necessary char encoding transformations: to/from - ISO-10646 and/or UTF-8. - -*/ - -#include "forward.h" -#include "buffio.h" -#include "fileio.h" - -#ifdef __cplusplus -extern "C" -{ -#endif -typedef enum -{ - FileIO, - BufferIO, - UserIO -} IOType; - -/* states for ISO 2022 - - A document in ISO-2022 based encoding uses some ESC sequences called - "designator" to switch character sets. The designators defined and - used in ISO-2022-JP are: - - "ESC" + "(" + ? for ISO646 variants - - "ESC" + "$" + ? and - "ESC" + "$" + "(" + ? for multibyte character sets -*/ -typedef enum -{ - FSM_ASCII, - FSM_ESC, - FSM_ESCD, - FSM_ESCDP, - FSM_ESCP, - FSM_NONASCII -} ISO2022State; - -/************************ -** Source -************************/ - -enum -{ - CHARBUF_SIZE=5, - LASTPOS_SIZE=64 -}; - -/* non-raw input is cleaned up*/ -struct _StreamIn -{ - ISO2022State state; /* FSM for ISO2022 */ - Bool pushed; - TidyAllocator *allocator; - tchar* charbuf; - uint bufpos; - uint bufsize; - int tabs; - int lastcols[LASTPOS_SIZE]; - unsigned short curlastpos; /* current last position in lastcols */ - unsigned short firstlastpos; /* first valid last position in lastcols */ - int curcol; - int curline; - int encoding; - IOType iotype; - - TidyInputSource source; - -#ifdef TIDY_WIN32_MLANG_SUPPORT - void* mlang; -#endif - -#ifdef TIDY_STORE_ORIGINAL_TEXT - tmbstr otextbuf; - size_t otextsize; - uint otextlen; -#endif - - /* Pointer back to document for error reporting */ - TidyDocImpl* doc; -}; - -StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding ); -void TY_(freeStreamIn)(StreamIn* in); - -StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding ); -StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding ); -StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding ); - -int TY_(ReadBOMEncoding)(StreamIn *in); -uint TY_(ReadChar)( StreamIn* in ); -void TY_(UngetChar)( uint c, StreamIn* in ); -Bool TY_(IsEOF)( StreamIn* in ); - - -/************************ -** Sink -************************/ - -struct _StreamOut -{ - int encoding; - ISO2022State state; /* for ISO 2022 */ - uint nl; - -#ifdef TIDY_WIN32_MLANG_SUPPORT - void* mlang; -#endif - - IOType iotype; - TidyOutputSink sink; -}; - -StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint newln ); -StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint newln ); -StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint newln ); - -StreamOut* TY_(StdErrOutput)(void); -/* StreamOut* StdOutOutput(void); */ -void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out ); - -void TY_(WriteChar)( uint c, StreamOut* out ); -void TY_(outBOM)( StreamOut *out ); - -ctmbstr TY_(GetEncodingNameFromTidyId)(uint id); -ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id); -int TY_(GetCharEncodingFromOptName)(ctmbstr charenc); - -/************************ -** Misc -************************/ - -/* character encodings -*/ -#define RAW 0 -#define ASCII 1 -#define LATIN0 2 -#define LATIN1 3 -#define UTF8 4 -#define ISO2022 5 -#define MACROMAN 6 -#define WIN1252 7 -#define IBM858 8 - -#if SUPPORT_UTF16_ENCODINGS -#define UTF16LE 9 -#define UTF16BE 10 -#define UTF16 11 -#endif - -/* Note that Big5 and SHIFTJIS are not converted to ISO 10646 codepoints -** (i.e., to Unicode) before being recoded into UTF-8. This may be -** confusing: usually UTF-8 implies ISO10646 codepoints. -*/ -#if SUPPORT_ASIAN_ENCODINGS -#if SUPPORT_UTF16_ENCODINGS -#define BIG5 12 -#define SHIFTJIS 13 -#else -#define BIG5 9 -#define SHIFTJIS 10 -#endif -#endif - -#ifdef TIDY_WIN32_MLANG_SUPPORT -/* hack: windows code page numbers start at 37 */ -#define WIN32MLANG 36 -#endif - - -/* char encoding used when replacing illegal SGML chars, -** regardless of specified encoding. Set at compile time -** to either Windows or Mac. -*/ -extern const int TY_(ReplacementCharEncoding); - -/* Function for conversion from Windows-1252 to Unicode */ -uint TY_(DecodeWin1252)(uint c); - -/* Function to convert from MacRoman to Unicode */ -uint TY_(DecodeMacRoman)(uint c); - -#ifdef __cplusplus -} -#endif - - -/* Use numeric constants as opposed to escape chars (\r, \n) -** to avoid conflict Mac compilers that may re-define these. -*/ -#define CR 0xD -#define LF 0xA - -#if defined(MAC_OS_CLASSIC) -#define DEFAULT_NL_CONFIG TidyCR -#elif defined(_WIN32) || defined(OS2_OS) -#define DEFAULT_NL_CONFIG TidyCRLF -#else -#define DEFAULT_NL_CONFIG TidyLF -#endif - - -#endif /* __STREAMIO_H__ */ http://git-wip-us.apache.org/repos/asf/incubator-corinthia/blob/ac70cb0e/DocFormats/3rdparty/external/w3c-tidy-html5/src/tagask.c ---------------------------------------------------------------------- diff --git a/DocFormats/3rdparty/external/w3c-tidy-html5/src/tagask.c b/DocFormats/3rdparty/external/w3c-tidy-html5/src/tagask.c deleted file mode 100644 index bd4e1ab..0000000 --- a/DocFormats/3rdparty/external/w3c-tidy-html5/src/tagask.c +++ /dev/null @@ -1,280 +0,0 @@ -/* tagask.c -- Interrogate node type - - (c) 1998-2006 (W3C) MIT, ERCIM, Keio University - See tidy.h for the copyright notice. - -*/ - -#include "tidy-int.h" -#include "tags.h" -#include "tidy.h" - -Bool TIDY_CALL tidyNodeIsText( TidyNode tnod ) -{ return TY_(nodeIsText)( tidyNodeToImpl(tnod) ); -} -Bool tidyNodeCMIsBlock( TidyNode tnod ); /* not exported yet */ -Bool tidyNodeCMIsBlock( TidyNode tnod ) -{ return TY_(nodeCMIsBlock)( tidyNodeToImpl(tnod) ); -} -Bool tidyNodeCMIsInline( TidyNode tnod ); /* not exported yet */ -Bool tidyNodeCMIsInline( TidyNode tnod ) -{ return TY_(nodeCMIsInline)( tidyNodeToImpl(tnod) ); -} -Bool tidyNodeCMIsEmpty( TidyNode tnod ); /* not exported yet */ -Bool tidyNodeCMIsEmpty( TidyNode tnod ) -{ return TY_(nodeCMIsEmpty)( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsHeader( TidyNode tnod ) -{ return TY_(nodeIsHeader)( tidyNodeToImpl(tnod) ); -} - -Bool TIDY_CALL tidyNodeIsHTML( TidyNode tnod ) -{ return nodeIsHTML( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsHEAD( TidyNode tnod ) -{ return nodeIsHEAD( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsTITLE( TidyNode tnod ) -{ return nodeIsTITLE( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsBASE( TidyNode tnod ) -{ return nodeIsBASE( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsMETA( TidyNode tnod ) -{ return nodeIsMETA( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsBODY( TidyNode tnod ) -{ return nodeIsBODY( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsFRAMESET( TidyNode tnod ) -{ return nodeIsFRAMESET( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsFRAME( TidyNode tnod ) -{ return nodeIsFRAME( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsIFRAME( TidyNode tnod ) -{ return nodeIsIFRAME( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsNOFRAMES( TidyNode tnod ) -{ return nodeIsNOFRAMES( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsHR( TidyNode tnod ) -{ return nodeIsHR( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsH1( TidyNode tnod ) -{ return nodeIsH1( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsH2( TidyNode tnod ) -{ return nodeIsH2( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsPRE( TidyNode tnod ) -{ return nodeIsPRE( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsLISTING( TidyNode tnod ) -{ return nodeIsLISTING( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsP( TidyNode tnod ) -{ return nodeIsP( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsUL( TidyNode tnod ) -{ return nodeIsUL( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsOL( TidyNode tnod ) -{ return nodeIsOL( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsDL( TidyNode tnod ) -{ return nodeIsDL( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsDIR( TidyNode tnod ) -{ return nodeIsDIR( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsLI( TidyNode tnod ) -{ return nodeIsLI( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsDT( TidyNode tnod ) -{ return nodeIsDT( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsDD( TidyNode tnod ) -{ return nodeIsDD( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsTABLE( TidyNode tnod ) -{ return nodeIsTABLE( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsCAPTION( TidyNode tnod ) -{ return nodeIsCAPTION( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsTD( TidyNode tnod ) -{ return nodeIsTD( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsTH( TidyNode tnod ) -{ return nodeIsTH( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsTR( TidyNode tnod ) -{ return nodeIsTR( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsCOL( TidyNode tnod ) -{ return nodeIsCOL( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsCOLGROUP( TidyNode tnod ) -{ return nodeIsCOLGROUP( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsBR( TidyNode tnod ) -{ return nodeIsBR( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsA( TidyNode tnod ) -{ return nodeIsA( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsLINK( TidyNode tnod ) -{ return nodeIsLINK( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsB( TidyNode tnod ) -{ return nodeIsB( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsI( TidyNode tnod ) -{ return nodeIsI( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsSTRONG( TidyNode tnod ) -{ return nodeIsSTRONG( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsEM( TidyNode tnod ) -{ return nodeIsEM( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsBIG( TidyNode tnod ) -{ return nodeIsBIG( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsSMALL( TidyNode tnod ) -{ return nodeIsSMALL( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsPARAM( TidyNode tnod ) -{ return nodeIsPARAM( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsOPTION( TidyNode tnod ) -{ return nodeIsOPTION( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsOPTGROUP( TidyNode tnod ) -{ return nodeIsOPTGROUP( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsIMG( TidyNode tnod ) -{ return nodeIsIMG( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsMAP( TidyNode tnod ) -{ return nodeIsMAP( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsAREA( TidyNode tnod ) -{ return nodeIsAREA( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsNOBR( TidyNode tnod ) -{ return nodeIsNOBR( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsWBR( TidyNode tnod ) -{ return nodeIsWBR( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsFONT( TidyNode tnod ) -{ return nodeIsFONT( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsLAYER( TidyNode tnod ) -{ return nodeIsLAYER( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsSPACER( TidyNode tnod ) -{ return nodeIsSPACER( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsCENTER( TidyNode tnod ) -{ return nodeIsCENTER( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsSTYLE( TidyNode tnod ) -{ return nodeIsSTYLE( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsSCRIPT( TidyNode tnod ) -{ return nodeIsSCRIPT( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsNOSCRIPT( TidyNode tnod ) -{ return nodeIsNOSCRIPT( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsFORM( TidyNode tnod ) -{ return nodeIsFORM( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsTEXTAREA( TidyNode tnod ) -{ return nodeIsTEXTAREA( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsBLOCKQUOTE( TidyNode tnod ) -{ return nodeIsBLOCKQUOTE( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsAPPLET( TidyNode tnod ) -{ return nodeIsAPPLET( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsOBJECT( TidyNode tnod ) -{ return nodeIsOBJECT( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsDIV( TidyNode tnod ) -{ return nodeIsDIV( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsSPAN( TidyNode tnod ) -{ return nodeIsSPAN( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsINPUT( TidyNode tnod ) -{ return nodeIsINPUT( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsQ( TidyNode tnod ) -{ return nodeIsQ( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsLABEL( TidyNode tnod ) -{ return nodeIsLABEL( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsH3( TidyNode tnod ) -{ return nodeIsH3( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsH4( TidyNode tnod ) -{ return nodeIsH4( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsH5( TidyNode tnod ) -{ return nodeIsH5( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsH6( TidyNode tnod ) -{ return nodeIsH6( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsADDRESS( TidyNode tnod ) -{ return nodeIsADDRESS( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsXMP( TidyNode tnod ) -{ return nodeIsXMP( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsSELECT( TidyNode tnod ) -{ return nodeIsSELECT( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsBLINK( TidyNode tnod ) -{ return nodeIsBLINK( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsMARQUEE( TidyNode tnod ) -{ return nodeIsMARQUEE( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsEMBED( TidyNode tnod ) -{ return nodeIsEMBED( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsBASEFONT( TidyNode tnod ) -{ return nodeIsBASEFONT( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsISINDEX( TidyNode tnod ) -{ return nodeIsISINDEX( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsS( TidyNode tnod ) -{ return nodeIsS( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsSTRIKE( TidyNode tnod ) -{ return nodeIsSTRIKE( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsU( TidyNode tnod ) -{ return nodeIsU( tidyNodeToImpl(tnod) ); -} -Bool TIDY_CALL tidyNodeIsMENU( TidyNode tnod ) -{ return nodeIsMENU( tidyNodeToImpl(tnod) ); -} - - -/* - * local variables: - * mode: c - * indent-tabs-mode: nil - * c-basic-offset: 4 - * eval: (c-set-offset 'substatement-open 0) - * end: - */