Return-Path: X-Original-To: apmail-couchdb-commits-archive@www.apache.org Delivered-To: apmail-couchdb-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id C8AAE1026F for ; Sat, 15 Feb 2014 09:50:51 +0000 (UTC) Received: (qmail 43401 invoked by uid 500); 15 Feb 2014 09:50:27 -0000 Delivered-To: apmail-couchdb-commits-archive@couchdb.apache.org Received: (qmail 42763 invoked by uid 500); 15 Feb 2014 09:49:58 -0000 Mailing-List: contact commits-help@couchdb.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@couchdb.apache.org Delivered-To: mailing list commits@couchdb.apache.org Received: (qmail 41280 invoked by uid 99); 15 Feb 2014 09:49:10 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 15 Feb 2014 09:49:10 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id D6E33927B98; Sat, 15 Feb 2014 09:49:08 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: benoitc@apache.org To: commits@couchdb.apache.org Date: Sat, 15 Feb 2014 09:49:39 -0000 Message-Id: <2acd8f0d466346e6a651229cbb49b991@git.apache.org> In-Reply-To: <462f90d1cb314bc68f38d5e7b324ddb6@git.apache.org> References: <462f90d1cb314bc68f38d5e7b324ddb6@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [33/59] [abbrv] remove couch_collate http://git-wip-us.apache.org/repos/asf/couchdb/blob/81332b78/apps/couch_collate/platform/osx/icu/unicode/msgfmt.h ---------------------------------------------------------------------- diff --git a/apps/couch_collate/platform/osx/icu/unicode/msgfmt.h b/apps/couch_collate/platform/osx/icu/unicode/msgfmt.h deleted file mode 100644 index 67e17cf..0000000 --- a/apps/couch_collate/platform/osx/icu/unicode/msgfmt.h +++ /dev/null @@ -1,940 +0,0 @@ -/* -* Copyright (C) 2007-2008, International Business Machines Corporation and others. All Rights Reserved. -******************************************************************************** -* -* File MSGFMT.H -* -* Modification History: -* -* Date Name Description -* 02/19/97 aliu Converted from java. -* 03/20/97 helena Finished first cut of implementation. -* 07/22/98 stephen Removed operator!= (defined in Format) -* 08/19/2002 srl Removing Javaisms -******************************************************************************** -*/ - -#ifndef MSGFMT_H -#define MSGFMT_H - -#include "unicode/utypes.h" - -/** - * \file - * \brief C++ API: Formats messages in a language-neutral way. - */ - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/format.h" -#include "unicode/locid.h" -#include "unicode/parseerr.h" -#include "unicode/uchar.h" - -U_NAMESPACE_BEGIN - -class NumberFormat; -class DateFormat; - -/** - * - * A MessageFormat produces concatenated messages in a - * language-neutral way. It should be used for all string - * concatenations that are visible to end users. - *

- * A MessageFormat contains an array of subformats arranged - * within a template string. Together, the subformats and - * template string determine how the MessageFormat will operate during - * formatting and parsing. - *

- * Typically, both the subformats and the template string are - * specified at once in a pattern. By using different - * patterns for different locales, messages may be localized. - *

- * During formatting, the MessageFormat takes an array of arguments - * and produces a user-readable string. Each argument is a - * Formattable object; they may be passed in in an array, or as a - * single Formattable object which itself contains an array. Each - * argument is matched up with its corresponding subformat, which then - * formats it into a string. The resultant strings are then assembled - * within the string template of the MessageFormat to produce the - * final output string. - *

- * Note: - * In ICU 4.0 MessageFormat supports named arguments. If a named argument - * is used, all arguments must be named. Names start with a character in - * UCHAR_ID_START and continue with characters in - * UCHARID_CONTINUE, in particular they do not start with a digit. - * If named arguments are used, {@link #usesNamedArguments()} will return true. - *

- * The other new methods supporting named arguments are - * {@link #getFormatNames(UErrorCode& status)}, - * {@link #getFormat(const UnicodeString& formatName, UErrorCode& status)} - * {@link #setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status)}, - * {@link #adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status)}, - * {@link #format(const Formattable* arguments, const UnicodeString *argumentNames, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection, UErrorCode& success)}, - * {@link #format(const UnicodeString* argumentNames, const Formattable* arguments, int32_t count, UnicodeString& appendTo,UErrorCode& status)}. - * These methods are all compatible with patterns that do not used named arguments-- - * in these cases the keys in the input or output use UnicodeStrings - * that name the argument indices, e.g. "0", "1", "2"... etc. - *

- * When named arguments are used, certain methods on MessageFormat that take or - * return arrays do not perform any action, since it is not possible to - * identify positions in an array using a name. UErrorCode is set to - * U_ARGUMENT_TYPE_MISMATCH if there is a status/success field in the method. - * These methods are - * {@link #adoptFormats(Format** newFormats, int32_t count)}, - * {@link #setFormats(const Format** newFormats,int32_t count)}, - * {@link #adoptFormat(int32_t n, Format *newFormat)}, - * {@link #getFormats(int32_t& cnt)}, - * {@link #format(const Formattable* source,int32_t cnt,UnicodeString& appendTo, FieldPosition& ignore, UErrorCode& success)}, - * {@link #format(const UnicodeString& pattern,const Formattable* arguments,int32_t cnt,UnicodeString& appendTo,UErrorCode& success)}, - * {@link #format(const Formattable& source, UnicodeString& appendTo,FieldPosition& ignore, UErrorCode& success)}, - * {@link #format(const Formattable* arguments, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection,UErrorCode& success)}, - * {@link #parse(const UnicodeString& source, ParsePosition& pos,int32_t& count)}, - * {@link #parse(const UnicodeString& source, int32_t& cnt, UErrorCode& status)} - *

- * - *

- * During parsing, an input string is matched against the string - * template of the MessageFormat to produce an array of Formattable - * objects. Plain text of the template string is matched directly - * against intput text. At each position in the template string where - * a subformat is located, the subformat is called to parse the - * corresponding segment of input text to produce an output argument. - * In this way, an array of arguments is created which together - * constitute the parse result. - *

- * Parsing may fail or produce unexpected results in a number of - * circumstances. - *

    - *
  • If one of the arguments does not occur in the pattern, it - * will be returned as a default Formattable. - *
  • If the format of an argument is loses information, such as with - * a choice format where a large number formats to "many", then the - * parse may not correspond to the originally formatted argument. - *
  • MessageFormat does not handle ChoiceFormat recursion during - * parsing; such parses will fail. - *
  • Parsing will not always find a match (or the correct match) if - * some part of the parse is ambiguous. For example, if the pattern - * "{1},{2}" is used with the string arguments {"a,b", "c"}, it will - * format as "a,b,c". When the result is parsed, it will return {"a", - * "b,c"}. - *
  • If a single argument is formatted more than once in the string, - * then the rightmost subformat in the pattern string will produce the - * parse result; prior subformats with the same argument index will - * have no effect. - *
- * Here are some examples of usage: - *

- * Example 1: - *

- * \code
- *     UErrorCode success = U_ZERO_ERROR;
- *     GregorianCalendar cal(success);
- *     Formattable arguments[] = {
- *         7L,
- *         Formattable( (Date) cal.getTime(success), Formattable::kIsDate),
- *         "a disturbance in the Force"
- *     };
- *
- *     UnicodeString result;
- *     MessageFormat::format(
- *          "At {1,time} on {1,date}, there was {2} on planet {0,number}.",
- *          arguments, 3, result, success );
- *
- *     cout << "result: " << result << endl;
- *     //: At 4:34:20 PM on 23-Mar-98, there was a disturbance
- *     //             in the Force on planet 7.
- * \endcode
- * 
- * Typically, the message format will come from resources, and the - * arguments will be dynamically set at runtime. - *

- * Example 2: - *

- *  \code
- *     success = U_ZERO_ERROR;
- *     Formattable testArgs[] = {3L, "MyDisk"};
- *
- *     MessageFormat form(
- *         "The disk \"{1}\" contains {0} file(s).", success );
- *
- *     UnicodeString string;
- *     FieldPosition fpos = 0;
- *     cout << "format: " << form.format(testArgs, 2, string, fpos, success ) << endl;
- *
- *     // output, with different testArgs:
- *     // output: The disk "MyDisk" contains 0 file(s).
- *     // output: The disk "MyDisk" contains 1 file(s).
- *     // output: The disk "MyDisk" contains 1,273 file(s).
- *  \endcode
- *  
- * - * The pattern is of the following form. Legend: - *
- * \code
- *       {optional item}
- *       (group that may be repeated)*
- * \endcode
- *  
- * Do not confuse optional items with items inside quotes braces, such - * as this: "{". Quoted braces are literals. - *
- *  \code
- *       messageFormatPattern := string ( "{" messageFormatElement "}" string )*
- *
- *       messageFormatElement := argumentIndex | argumentName { "," elementFormat }
- *
- *       elementFormat := "time" { "," datetimeStyle }
- *                      | "date" { "," datetimeStyle }
- *                      | "number" { "," numberStyle }
- *                      | "choice" "," choiceStyle
- *
- *       datetimeStyle := "short"
- *                      | "medium"
- *                      | "long"
- *                      | "full"
- *                      | dateFormatPattern
- *
- *       numberStyle :=   "currency"
- *                      | "percent"
- *                      | "integer"
- *                      | numberFormatPattern
- *
- *       choiceStyle :=   choiceFormatPattern
- * 
- *       pluralStyle := pluralFormatPattern
- * \endcode
- * 
- * If there is no elementFormat, then the argument must be a string, - * which is substituted. If there is no dateTimeStyle or numberStyle, - * then the default format is used (e.g. NumberFormat::createInstance(), - * DateFormat::createTimeInstance(DateFormat::kDefault, ...) or DateFormat::createDateInstance(DateFormat::kDefault, ...). For - * a ChoiceFormat, the pattern must always be specified, since there - * is no default. - *

- * In strings, single quotes can be used to quote syntax characters. - * A literal single quote is represented by '', both within and outside - * of single-quoted segments. Inside a - * messageFormatElement, quotes are not removed. For example, - * {1,number,$'#',##} will produce a number format with the pound-sign - * quoted, with a result such as: "$#31,45". - *

- * If a pattern is used, then unquoted braces in the pattern, if any, - * must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab - * {0'}' de" and "ab } de" are not. - *

- *

Warning:
The rules for using quotes within message - * format patterns unfortunately have shown to be somewhat confusing. - * In particular, it isn't always obvious to localizers whether single - * quotes need to be doubled or not. Make sure to inform localizers about - * the rules, and tell them (for example, by using comments in resource - * bundle source files) which strings will be processed by MessageFormat. - * Note that localizers may need to use single quotes in translated - * strings where the original version doesn't have them. - *
Note also that the simplest way to avoid the problem is to - * use the real apostrophe (single quote) character U+2019 (') for - * human-readable text, and to use the ASCII apostrophe (U+0027 ' ) - * only in program syntax, like quoting in MessageFormat. - * See the annotations for U+0027 Apostrophe in The Unicode Standard.

- *
- *

- * The argumentIndex is a non-negative integer, which corresponds to the - * index of the arguments presented in an array to be formatted. The - * first argument has argumentIndex 0. - *

- * It is acceptable to have unused arguments in the array. With missing - * arguments or arguments that are not of the right class for the - * specified format, a failing UErrorCode result is set. - *

- * For more sophisticated patterns, you can use a ChoiceFormat to get - * output: - *

- * \code
- *     UErrorCode success = U_ZERO_ERROR;
- *     MessageFormat* form("The disk \"{1}\" contains {0}.", success);
- *     double filelimits[] = {0,1,2};
- *     UnicodeString filepart[] = {"no files","one file","{0,number} files"};
- *     ChoiceFormat* fileform = new ChoiceFormat(filelimits, filepart, 3);
- *     form.setFormat(1, *fileform); // NOT zero, see below
- *
- *     Formattable testArgs[] = {1273L, "MyDisk"};
- *
- *     UnicodeString string;
- *     FieldPosition fpos = 0;
- *     cout << form.format(testArgs, 2, string, fpos, success) << endl;
- *
- *     // output, with different testArgs
- *     // output: The disk "MyDisk" contains no files.
- *     // output: The disk "MyDisk" contains one file.
- *     // output: The disk "MyDisk" contains 1,273 files.
- * \endcode
- * 
- * You can either do this programmatically, as in the above example, - * or by using a pattern (see ChoiceFormat for more information) as in: - *
- * \code
- *    form.applyPattern(
- *      "There {0,choice,0#are no files|1#is one file|1
- * 

- * Note: As we see above, the string produced by a ChoiceFormat in - * MessageFormat is treated specially; occurences of '{' are used to - * indicated subformats, and cause recursion. If you create both a - * MessageFormat and ChoiceFormat programmatically (instead of using - * the string patterns), then be careful not to produce a format that - * recurses on itself, which will cause an infinite loop. - *

- * Note: Subformats are numbered by their order in the pattern. - * This is not the same as the argumentIndex. - *

- * \code
- *    For example: with "abc{2}def{3}ghi{0}...",
- *
- *    format0 affects the first variable {2}
- *    format1 affects the second variable {3}
- *    format2 affects the second variable {0}
- * \endcode
- * 
- * - *

User subclasses are not supported. While clients may write - * subclasses, such code will not necessarily work and will not be - * guaranteed to work stably from release to release. - */ -class U_I18N_API MessageFormat : public Format { -public: - /** - * Enum type for kMaxFormat. - * @obsolete ICU 3.0. The 10-argument limit was removed as of ICU 2.6, - * rendering this enum type obsolete. - */ - enum EFormatNumber { - /** - * The maximum number of arguments. - * @obsolete ICU 3.0. The 10-argument limit was removed as of ICU 2.6, - * rendering this constant obsolete. - */ - kMaxFormat = 10 - }; - - /** - * Constructs a new MessageFormat using the given pattern and the - * default locale. - * - * @param pattern Pattern used to construct object. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * @stable ICU 2.0 - */ - MessageFormat(const UnicodeString& pattern, - UErrorCode &status); - - /** - * Constructs a new MessageFormat using the given pattern and locale. - * @param pattern Pattern used to construct object. - * @param newLocale The locale to use for formatting dates and numbers. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * @stable ICU 2.0 - */ - MessageFormat(const UnicodeString& pattern, - const Locale& newLocale, - UErrorCode& status); - /** - * Constructs a new MessageFormat using the given pattern and locale. - * @param pattern Pattern used to construct object. - * @param newLocale The locale to use for formatting dates and numbers. - * @param parseError Struct to recieve information on position - * of error within the pattern. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * @stable ICU 2.0 - */ - MessageFormat(const UnicodeString& pattern, - const Locale& newLocale, - UParseError& parseError, - UErrorCode& status); - /** - * Constructs a new MessageFormat from an existing one. - * @stable ICU 2.0 - */ - MessageFormat(const MessageFormat&); - - /** - * Assignment operator. - * @stable ICU 2.0 - */ - const MessageFormat& operator=(const MessageFormat&); - - /** - * Destructor. - * @stable ICU 2.0 - */ - virtual ~MessageFormat(); - - /** - * Clones this Format object polymorphically. The caller owns the - * result and should delete it when done. - * @stable ICU 2.0 - */ - virtual Format* clone(void) const; - - /** - * Returns true if the given Format objects are semantically equal. - * Objects of different subclasses are considered unequal. - * @param other the object to be compared with. - * @return true if the given Format objects are semantically equal. - * @stable ICU 2.0 - */ - virtual UBool operator==(const Format& other) const; - - /** - * Sets the locale. This locale is used for fetching default number or date - * format information. - * @param theLocale the new locale value to be set. - * @stable ICU 2.0 - */ - virtual void setLocale(const Locale& theLocale); - - /** - * Gets the locale. This locale is used for fetching default number or date - * format information. - * @return the locale of the object. - * @stable ICU 2.0 - */ - virtual const Locale& getLocale(void) const; - - /** - * Applies the given pattern string to this message format. - * - * @param pattern The pattern to be applied. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * @stable ICU 2.0 - */ - virtual void applyPattern(const UnicodeString& pattern, - UErrorCode& status); - /** - * Applies the given pattern string to this message format. - * - * @param pattern The pattern to be applied. - * @param parseError Struct to recieve information on position - * of error within pattern. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * @stable ICU 2.0 - */ - virtual void applyPattern(const UnicodeString& pattern, - UParseError& parseError, - UErrorCode& status); - - /** - * Returns a pattern that can be used to recreate this object. - * - * @param appendTo Output parameter to receive the pattern. - * Result is appended to existing contents. - * @return Reference to 'appendTo' parameter. - * @stable ICU 2.0 - */ - virtual UnicodeString& toPattern(UnicodeString& appendTo) const; - - /** - * Sets subformats. - * See the class description about format numbering. - * The caller should not delete the Format objects after this call. - * The array formatsToAdopt is not itself adopted. Its - * ownership is retained by the caller. If the call fails because - * memory cannot be allocated, then the formats will be deleted - * by this method, and this object will remain unchanged. - * - * @stable ICU 2.0 - * @param formatsToAdopt the format to be adopted. - * @param count the size of the array. - */ - virtual void adoptFormats(Format** formatsToAdopt, int32_t count); - - /** - * Sets subformats. - * See the class description about format numbering. - * Each item in the array is cloned into the internal array. - * If the call fails because memory cannot be allocated, then this - * object will remain unchanged. - * - * @stable ICU 2.0 - * @param newFormats the new format to be set. - * @param cnt the size of the array. - */ - virtual void setFormats(const Format** newFormats, int32_t cnt); - - - /** - * Sets one subformat. - * See the class description about format numbering. - * The caller should not delete the Format object after this call. - * If the number is over the number of formats already set, - * the item will be deleted and ignored. - * @stable ICU 2.0 - * @param formatNumber index of the subformat. - * @param formatToAdopt the format to be adopted. - */ - virtual void adoptFormat(int32_t formatNumber, Format* formatToAdopt); - - /** - * Sets one subformat. - * See the class description about format numbering. - * If the number is over the number of formats already set, - * the item will be ignored. - * @param formatNumber index of the subformat. - * @param format the format to be set. - * @stable ICU 2.0 - */ - virtual void setFormat(int32_t formatNumber, const Format& format); - - /** - * Gets format names. This function returns formatNames in StringEnumerations - * which can be used with getFormat() and setFormat() to export formattable - * array from current MessageFormat to another. It is caller's resposibility - * to delete the returned formatNames. - * @param status output param set to success/failure code. - * @draft ICU 4.0 - */ - virtual StringEnumeration* getFormatNames(UErrorCode& status); - - /** - * Gets subformat pointer for given format name. - * This function supports both named and numbered - * arguments-- if numbered, the formatName is the - * corresponding UnicodeStrings (e.g. "0", "1", "2"...). - * The returned Format object should not be deleted by the caller, - * nor should the ponter of other object . The pointer and its - * contents remain valid only until the next call to any method - * of this class is made with this object. - * @param formatName the name or number specifying a format - * @param status output param set to success/failure code. - * @draft ICU 4.0 - */ - virtual Format* getFormat(const UnicodeString& formatName, UErrorCode& status); - - /** - * Sets one subformat for given format name. - * See the class description about format name. - * This function supports both named and numbered - * arguments-- if numbered, the formatName is the - * corresponding UnicodeStrings (e.g. "0", "1", "2"...). - * If there is no matched formatName or wrong type, - * the item will be ignored. - * @param formatName Name of the subformat. - * @param format the format to be set. - * @param status output param set to success/failure code. - * @draft ICU 4.0 - */ - virtual void setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status); - - /** - * Sets one subformat for given format name. - * See the class description about format name. - * This function supports both named and numbered - * arguments-- if numbered, the formatName is the - * corresponding UnicodeStrings (e.g. "0", "1", "2"...). - * If there is no matched formatName or wrong type, - * the item will be ignored. - * The caller should not delete the Format object after this call. - * @param formatName Name of the subformat. - * @param formatToAdopt Format to be adopted. - * @param status output param set to success/failure code. - * @draft ICU 4.0 - */ - virtual void adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status); - - - /** - * Gets an array of subformats of this object. The returned array - * should not be deleted by the caller, nor should the pointers - * within the array. The array and its contents remain valid only - * until the next call to any method of this class is made with - * this object. See the class description about format numbering. - * @param count output parameter to receive the size of the array - * @return an array of count Format* objects, or NULL if out of - * memory. Any or all of the array elements may be NULL. - * @stable ICU 2.0 - */ - virtual const Format** getFormats(int32_t& count) const; - - /** - * Formats the given array of arguments into a user-readable string. - * Does not take ownership of the Formattable* array or its contents. - * - * @param source An array of objects to be formatted. - * @param count The number of elements of 'source'. - * @param appendTo Output parameter to receive result. - * Result is appended to existing contents. - * @param ignore Not used; inherited from base class API. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * @return Reference to 'appendTo' parameter. - * @stable ICU 2.0 - */ - UnicodeString& format( const Formattable* source, - int32_t count, - UnicodeString& appendTo, - FieldPosition& ignore, - UErrorCode& status) const; - - /** - * Formats the given array of arguments into a user-readable string - * using the given pattern. - * - * @param pattern The pattern. - * @param arguments An array of objects to be formatted. - * @param count The number of elements of 'source'. - * @param appendTo Output parameter to receive result. - * Result is appended to existing contents. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * @return Reference to 'appendTo' parameter. - * @stable ICU 2.0 - */ - static UnicodeString& format(const UnicodeString& pattern, - const Formattable* arguments, - int32_t count, - UnicodeString& appendTo, - UErrorCode& status); - - /** - * Formats the given array of arguments into a user-readable - * string. The array must be stored within a single Formattable - * object of type kArray. If the Formattable object type is not of - * type kArray, then returns a failing UErrorCode. - * - * @param obj A Formattable of type kArray containing - * arguments to be formatted. - * @param appendTo Output parameter to receive result. - * Result is appended to existing contents. - * @param pos On input: an alignment field, if desired. - * On output: the offsets of the alignment field. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * @return Reference to 'appendTo' parameter. - * @stable ICU 2.0 - */ - virtual UnicodeString& format(const Formattable& obj, - UnicodeString& appendTo, - FieldPosition& pos, - UErrorCode& status) const; - - /** - * Formats the given array of arguments into a user-readable - * string. The array must be stored within a single Formattable - * object of type kArray. If the Formattable object type is not of - * type kArray, then returns a failing UErrorCode. - * - * @param obj The object to format - * @param appendTo Output parameter to receive result. - * Result is appended to existing contents. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * @return Reference to 'appendTo' parameter. - * @stable ICU 2.0 - */ - UnicodeString& format(const Formattable& obj, - UnicodeString& appendTo, - UErrorCode& status) const; - - - /** - * Formats the given array of arguments into a user-defined argument name - * array. This function supports both named and numbered - * arguments-- if numbered, the formatName is the - * corresponding UnicodeStrings (e.g. "0", "1", "2"...). - * - * @param argumentNames argument name array - * @param arguments An array of objects to be formatted. - * @param count The number of elements of 'argumentNames' and - * arguments. The number of argumentNames and arguments - * must be the same. - * @param appendTo Output parameter to receive result. - * Result is appended to existing contents. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * @return Reference to 'appendTo' parameter. - * @stable ICU 4.0 - */ - UnicodeString& format(const UnicodeString* argumentNames, - const Formattable* arguments, - int32_t count, - UnicodeString& appendTo, - UErrorCode& status) const; - /** - * Parses the given string into an array of output arguments. - * - * @param source String to be parsed. - * @param pos On input, starting position for parse. On output, - * final position after parse. Unchanged if parse - * fails. - * @param count Output parameter to receive the number of arguments - * parsed. - * @return an array of parsed arguments. The caller owns both - * the array and its contents. - * @stable ICU 2.0 - */ - virtual Formattable* parse( const UnicodeString& source, - ParsePosition& pos, - int32_t& count) const; - - /** - * Parses the given string into an array of output arguments. - * - * @param source String to be parsed. - * @param count Output param to receive size of returned array. - * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. - * If the MessageFormat is named argument, the status is - * set to U_ARGUMENT_TYPE_MISMATCH. - * @return an array of parsed arguments. The caller owns both - * the array and its contents. Return NULL if status is not U_ZERO_ERROR. - * - * @stable ICU 2.0 - */ - virtual Formattable* parse( const UnicodeString& source, - int32_t& count, - UErrorCode& status) const; - - /** - * Parses the given string into an array of output arguments - * stored within a single Formattable of type kArray. - * - * @param source The string to be parsed into an object. - * @param result Formattable to be set to the parse result. - * If parse fails, return contents are undefined. - * @param pos On input, starting position for parse. On output, - * final position after parse. Unchanged if parse - * fails. - * @stable ICU 2.0 - */ - virtual void parseObject(const UnicodeString& source, - Formattable& result, - ParsePosition& pos) const; - - /** - * Convert an 'apostrophe-friendly' pattern into a standard - * pattern. Standard patterns treat all apostrophes as - * quotes, which is problematic in some languages, e.g. - * French, where apostrophe is commonly used. This utility - * assumes that only an unpaired apostrophe immediately before - * a brace is a true quote. Other unpaired apostrophes are paired, - * and the resulting standard pattern string is returned. - * - *

Note it is not guaranteed that the returned pattern - * is indeed a valid pattern. The only effect is to convert - * between patterns having different quoting semantics. - * - * @param pattern the 'apostrophe-friendly' patttern to convert - * @param status Input/output error code. If the pattern - * cannot be parsed, the failure code is set. - * @return the standard equivalent of the original pattern - * @stable ICU 3.4 - */ - static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern, - UErrorCode& status); - - /** - * Returns true if this MessageFormat uses named arguments, - * and false otherwise. See class description. - * - * @return true if named arguments are used. - * @draft ICU 4.0 - */ - UBool usesNamedArguments() const; - - /** - * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. - * This method is to implement a simple version of RTTI, since not all - * C++ compilers support genuine RTTI. Polymorphic operator==() and - * clone() methods call this method. - * - * @return The class ID for this object. All objects of a - * given class have the same class ID. Objects of - * other classes have different class IDs. - * @stable ICU 2.0 - */ - virtual UClassID getDynamicClassID(void) const; - - /** - * Return the class ID for this class. This is useful only for - * comparing to a return value from getDynamicClassID(). For example: - *

-     * .   Base* polymorphic_pointer = createPolymorphicObject();
-     * .   if (polymorphic_pointer->getDynamicClassID() ==
-     * .      Derived::getStaticClassID()) ...
-     * 
- * @return The class ID for all objects of this class. - * @stable ICU 2.0 - */ - static UClassID U_EXPORT2 getStaticClassID(void); - -private: - - Locale fLocale; - UnicodeString fPattern; - Format** formatAliases; // see getFormats - int32_t formatAliasesCapacity; - UProperty idStart; - UProperty idContinue; - - MessageFormat(); // default constructor not implemented - - /* - * A structure representing one subformat of this MessageFormat. - * Each subformat has a Format object, an offset into the plain - * pattern text fPattern, and an argument number. The argument - * number corresponds to the array of arguments to be formatted. - * @internal - */ - class Subformat; - - /** - * A MessageFormat contains an array of subformats. This array - * needs to grow dynamically if the MessageFormat is modified. - */ - Subformat* subformats; - int32_t subformatCount; - int32_t subformatCapacity; - - /** - * A MessageFormat formats an array of arguments. Each argument - * has an expected type, based on the pattern. For example, if - * the pattern contains the subformat "{3,number,integer}", then - * we expect argument 3 to have type Formattable::kLong. This - * array needs to grow dynamically if the MessageFormat is - * modified. - */ - Formattable::Type* argTypes; - int32_t argTypeCount; - int32_t argTypeCapacity; - - /** - * Is true iff all argument names are non-negative numbers. - * - */ - UBool isArgNumeric; - - // Variable-size array management - UBool allocateSubformats(int32_t capacity); - UBool allocateArgTypes(int32_t capacity); - - /** - * Default Format objects used when no format is specified and a - * numeric or date argument is formatted. These are volatile - * cache objects maintained only for performance. They do not - * participate in operator=(), copy constructor(), nor - * operator==(). - */ - NumberFormat* defaultNumberFormat; - DateFormat* defaultDateFormat; - - /** - * Method to retrieve default formats (or NULL on failure). - * These are semantically const, but may modify *this. - */ - const NumberFormat* getDefaultNumberFormat(UErrorCode&) const; - const DateFormat* getDefaultDateFormat(UErrorCode&) const; - - /** - * Finds the word s, in the keyword list and returns the located index. - * @param s the keyword to be searched for. - * @param list the list of keywords to be searched with. - * @return the index of the list which matches the keyword s. - */ - static int32_t findKeyword( const UnicodeString& s, - const UChar * const *list); - - /** - * Formats the array of arguments and copies the result into the - * result buffer, updates the field position. - * - * @param arguments The formattable objects array. - * @param cnt The array count. - * @param appendTo Output parameter to receive result. - * Result is appended to existing contents. - * @param status Field position status. - * @param recursionProtection - * Initially zero. Bits 0..9 are used to indicate - * that a parameter has already been seen, to - * avoid recursion. Currently unused. - * @param success The error code status. - * @return Reference to 'appendTo' parameter. - */ - UnicodeString& format( const Formattable* arguments, - int32_t cnt, - UnicodeString& appendTo, - FieldPosition& status, - int32_t recursionProtection, - UErrorCode& success) const; - - UnicodeString& format( const Formattable* arguments, - const UnicodeString *argumentNames, - int32_t cnt, - UnicodeString& appendTo, - FieldPosition& status, - int32_t recursionProtection, - UErrorCode& success) const; - - void makeFormat(int32_t offsetNumber, - UnicodeString* segments, - UParseError& parseError, - UErrorCode& success); - - /** - * Convenience method that ought to be in NumberFormat - */ - NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const; - - /** - * Checks the range of the source text to quote the special - * characters, { and ' and copy to target buffer. - * @param source - * @param start the text offset to start the process of in the source string - * @param end the text offset to end the process of in the source string - * @param appendTo Output parameter to receive result. - * Result is appended to existing contents. - */ - static void copyAndFixQuotes(const UnicodeString& appendTo, int32_t start, int32_t end, UnicodeString& target); - - /** - * Returns array of argument types in the parsed pattern - * for use in C API. Only for the use of umsg_vformat(). Not - * for public consumption. - * @param listCount Output parameter to receive the size of array - * @return The array of formattable types in the pattern - * @internal - */ - const Formattable::Type* getArgTypeList(int32_t& listCount) const { - listCount = argTypeCount; - return argTypes; - } - - /** - * Returns FALSE if the argument name is not legal. - * @param argName argument name. - * @return TRUE if the argument name is legal, otherwise return FALSE. - */ - UBool isLegalArgName(const UnicodeString& argName) const; - - friend class MessageFormatAdapter; // getFormatTypeList() access -}; - -inline UnicodeString& -MessageFormat::format(const Formattable& obj, - UnicodeString& appendTo, - UErrorCode& status) const { - return Format::format(obj, appendTo, status); -} -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_FORMATTING */ - -#endif // _MSGFMT -//eof - http://git-wip-us.apache.org/repos/asf/couchdb/blob/81332b78/apps/couch_collate/platform/osx/icu/unicode/normlzr.h ---------------------------------------------------------------------- diff --git a/apps/couch_collate/platform/osx/icu/unicode/normlzr.h b/apps/couch_collate/platform/osx/icu/unicode/normlzr.h deleted file mode 100644 index 7974f1a..0000000 --- a/apps/couch_collate/platform/osx/icu/unicode/normlzr.h +++ /dev/null @@ -1,823 +0,0 @@ -/* - ******************************************************************** - * COPYRIGHT: - * Copyright (c) 1996-2006, International Business Machines Corporation and - * others. All Rights Reserved. - ******************************************************************** - */ - -#ifndef NORMLZR_H -#define NORMLZR_H - -#include "unicode/utypes.h" - -/** - * \file - * \brief C++ API: Unicode Normalization - */ - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/uobject.h" -#include "unicode/unistr.h" -#include "unicode/chariter.h" -#include "unicode/unorm.h" - - -struct UCharIterator; -typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ - -U_NAMESPACE_BEGIN -/** - * The Normalizer class supports the standard normalization forms described in - * - * Unicode Standard Annex #15: Unicode Normalization Forms. - * - * The Normalizer class consists of two parts: - * - static functions that normalize strings or test if strings are normalized - * - a Normalizer object is an iterator that takes any kind of text and - * provides iteration over its normalized form - * - * The Normalizer class is not suitable for subclassing. - * - * The static functions are basically wrappers around the C implementation, - * using UnicodeString instead of UChar*. - * For basic information about normalization forms and details about the C API - * please see the documentation in unorm.h. - * - * The iterator API with the Normalizer constructors and the non-static functions - * uses a CharacterIterator as input. It is possible to pass a string which - * is then internally wrapped in a CharacterIterator. - * The input text is not normalized all at once, but incrementally where needed - * (providing efficient random access). - * This allows to pass in a large text but spend only a small amount of time - * normalizing a small part of that text. - * However, if the entire text is normalized, then the iterator will be - * slower than normalizing the entire text at once and iterating over the result. - * A possible use of the Normalizer iterator is also to report an index into the - * original text that is close to where the normalized characters come from. - * - * Important: The iterator API was cleaned up significantly for ICU 2.0. - * The earlier implementation reported the getIndex() inconsistently, - * and previous() could not be used after setIndex(), next(), first(), and current(). - * - * Normalizer allows to start normalizing from anywhere in the input text by - * calling setIndexOnly(), first(), or last(). - * Without calling any of these, the iterator will start at the beginning of the text. - * - * At any time, next() returns the next normalized code point (UChar32), - * with post-increment semantics (like CharacterIterator::next32PostInc()). - * previous() returns the previous normalized code point (UChar32), - * with pre-decrement semantics (like CharacterIterator::previous32()). - * - * current() returns the current code point - * (respectively the one at the newly set index) without moving - * the getIndex(). Note that if the text at the current position - * needs to be normalized, then these functions will do that. - * (This is why current() is not const.) - * It is more efficient to call setIndexOnly() instead, which does not - * normalize. - * - * getIndex() always refers to the position in the input text where the normalized - * code points are returned from. It does not always change with each returned - * code point. - * The code point that is returned from any of the functions - * corresponds to text at or after getIndex(), according to the - * function's iteration semantics (post-increment or pre-decrement). - * - * next() returns a code point from at or after the getIndex() - * from before the next() call. After the next() call, the getIndex() - * might have moved to where the next code point will be returned from - * (from a next() or current() call). - * This is semantically equivalent to array access with array[index++] - * (post-increment semantics). - * - * previous() returns a code point from at or after the getIndex() - * from after the previous() call. - * This is semantically equivalent to array access with array[--index] - * (pre-decrement semantics). - * - * Internally, the Normalizer iterator normalizes a small piece of text - * starting at the getIndex() and ending at a following "safe" index. - * The normalized results is stored in an internal string buffer, and - * the code points are iterated from there. - * With multiple iteration calls, this is repeated until the next piece - * of text needs to be normalized, and the getIndex() needs to be moved. - * - * The following "safe" index, the internal buffer, and the secondary - * iteration index into that buffer are not exposed on the API. - * This also means that it is currently not practical to return to - * a particular, arbitrary position in the text because one would need to - * know, and be able to set, in addition to the getIndex(), at least also the - * current index into the internal buffer. - * It is currently only possible to observe when getIndex() changes - * (with careful consideration of the iteration semantics), - * at which time the internal index will be 0. - * For example, if getIndex() is different after next() than before it, - * then the internal index is 0 and one can return to this getIndex() - * later with setIndexOnly(). - * - * @author Laura Werner, Mark Davis, Markus Scherer - * @stable ICU 2.0 - */ -class U_COMMON_API Normalizer : public UObject { -public: - /** - * If DONE is returned from an iteration function that returns a code point, - * then there are no more normalization results available. - * @stable ICU 2.0 - */ - enum { - DONE=0xffff - }; - - // Constructors - - /** - * Creates a new Normalizer object for iterating over the - * normalized form of a given string. - *

- * @param str The string to be normalized. The normalization - * will start at the beginning of the string. - * - * @param mode The normalization mode. - * @stable ICU 2.0 - */ - Normalizer(const UnicodeString& str, UNormalizationMode mode); - - /** - * Creates a new Normalizer object for iterating over the - * normalized form of a given string. - *

- * @param str The string to be normalized. The normalization - * will start at the beginning of the string. - * - * @param length Length of the string, or -1 if NUL-terminated. - * @param mode The normalization mode. - * @stable ICU 2.0 - */ - Normalizer(const UChar* str, int32_t length, UNormalizationMode mode); - - /** - * Creates a new Normalizer object for iterating over the - * normalized form of the given text. - *

- * @param iter The input text to be normalized. The normalization - * will start at the beginning of the string. - * - * @param mode The normalization mode. - * @stable ICU 2.0 - */ - Normalizer(const CharacterIterator& iter, UNormalizationMode mode); - - /** - * Copy constructor. - * @param copy The object to be copied. - * @stable ICU 2.0 - */ - Normalizer(const Normalizer& copy); - - /** - * Destructor - * @stable ICU 2.0 - */ - virtual ~Normalizer(); - - - //------------------------------------------------------------------------- - // Static utility methods - //------------------------------------------------------------------------- - - /** - * Normalizes a UnicodeString according to the specified normalization mode. - * This is a wrapper for unorm_normalize(), using UnicodeString's. - * - * The options parameter specifies which optional - * Normalizer features are to be enabled for this operation. - * - * @param source the input string to be normalized. - * @param mode the normalization mode - * @param options the optional features to be enabled (0 for no options) - * @param result The normalized string (on output). - * @param status The error code. - * @stable ICU 2.0 - */ - static void U_EXPORT2 normalize(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UnicodeString& result, - UErrorCode &status); - - /** - * Compose a UnicodeString. - * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC. - * This is a wrapper for unorm_normalize(), using UnicodeString's. - * - * The options parameter specifies which optional - * Normalizer features are to be enabled for this operation. - * - * @param source the string to be composed. - * @param compat Perform compatibility decomposition before composition. - * If this argument is FALSE, only canonical - * decomposition will be performed. - * @param options the optional features to be enabled (0 for no options) - * @param result The composed string (on output). - * @param status The error code. - * @stable ICU 2.0 - */ - static void U_EXPORT2 compose(const UnicodeString& source, - UBool compat, int32_t options, - UnicodeString& result, - UErrorCode &status); - - /** - * Static method to decompose a UnicodeString. - * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD. - * This is a wrapper for unorm_normalize(), using UnicodeString's. - * - * The options parameter specifies which optional - * Normalizer features are to be enabled for this operation. - * - * @param source the string to be decomposed. - * @param compat Perform compatibility decomposition. - * If this argument is FALSE, only canonical - * decomposition will be performed. - * @param options the optional features to be enabled (0 for no options) - * @param result The decomposed string (on output). - * @param status The error code. - * @stable ICU 2.0 - */ - static void U_EXPORT2 decompose(const UnicodeString& source, - UBool compat, int32_t options, - UnicodeString& result, - UErrorCode &status); - - /** - * Performing quick check on a string, to quickly determine if the string is - * in a particular normalization format. - * This is a wrapper for unorm_quickCheck(), using a UnicodeString. - * - * Three types of result can be returned UNORM_YES, UNORM_NO or - * UNORM_MAYBE. Result UNORM_YES indicates that the argument - * string is in the desired normalized format, UNORM_NO determines that - * argument string is not in the desired normalized format. A - * UNORM_MAYBE result indicates that a more thorough check is required, - * the user may have to put the string in its normalized form and compare the - * results. - * @param source string for determining if it is in a normalized format - * @param mode normalization format - * @param status A reference to a UErrorCode to receive any errors - * @return UNORM_YES, UNORM_NO or UNORM_MAYBE - * - * @see isNormalized - * @stable ICU 2.0 - */ - static inline UNormalizationCheckResult - quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status); - - /** - * Performing quick check on a string; same as the other version of quickCheck - * but takes an extra options parameter like most normalization functions. - * - * @param source string for determining if it is in a normalized format - * @param mode normalization format - * @param options the optional features to be enabled (0 for no options) - * @param status A reference to a UErrorCode to receive any errors - * @return UNORM_YES, UNORM_NO or UNORM_MAYBE - * - * @see isNormalized - * @stable ICU 2.6 - */ - static inline UNormalizationCheckResult - quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status); - - /** - * Test if a string is in a given normalization form. - * This is semantically equivalent to source.equals(normalize(source, mode)) . - * - * Unlike unorm_quickCheck(), this function returns a definitive result, - * never a "maybe". - * For NFD, NFKD, and FCD, both functions work exactly the same. - * For NFC and NFKC where quickCheck may return "maybe", this function will - * perform further tests to arrive at a TRUE/FALSE result. - * - * @param src String that is to be tested if it is in a normalization format. - * @param mode Which normalization form to test for. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Boolean value indicating whether the source string is in the - * "mode" normalization form. - * - * @see quickCheck - * @stable ICU 2.2 - */ - static inline UBool - isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode); - - /** - * Test if a string is in a given normalization form; same as the other version of isNormalized - * but takes an extra options parameter like most normalization functions. - * - * @param src String that is to be tested if it is in a normalization format. - * @param mode Which normalization form to test for. - * @param options the optional features to be enabled (0 for no options) - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Boolean value indicating whether the source string is in the - * "mode" normalization form. - * - * @see quickCheck - * @stable ICU 2.6 - */ - static inline UBool - isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode); - - /** - * Concatenate normalized strings, making sure that the result is normalized as well. - * - * If both the left and the right strings are in - * the normalization form according to "mode/options", - * then the result will be - * - * \code - * dest=normalize(left+right, mode, options) - * \endcode - * - * For details see unorm_concatenate in unorm.h. - * - * @param left Left source string. - * @param right Right source string. - * @param result The output string. - * @param mode The normalization mode. - * @param options A bit set of normalization options. - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return result - * - * @see unorm_concatenate - * @see normalize - * @see unorm_next - * @see unorm_previous - * - * @stable ICU 2.1 - */ - static UnicodeString & - U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right, - UnicodeString &result, - UNormalizationMode mode, int32_t options, - UErrorCode &errorCode); - - /** - * Compare two strings for canonical equivalence. - * Further options include case-insensitive comparison and - * code point order (as opposed to code unit order). - * - * Canonical equivalence between two strings is defined as their normalized - * forms (NFD or NFC) being identical. - * This function compares strings incrementally instead of normalizing - * (and optionally case-folding) both strings entirely, - * improving performance significantly. - * - * Bulk normalization is only necessary if the strings do not fulfill the FCD - * conditions. Only in this case, and only if the strings are relatively long, - * is memory allocated temporarily. - * For FCD strings and short non-FCD strings there is no memory allocation. - * - * Semantically, this is equivalent to - * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2))) - * where code point order and foldCase are all optional. - * - * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match - * the case folding must be performed first, then the normalization. - * - * @param s1 First source string. - * @param s2 Second source string. - * - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Case-sensitive comparison in code unit order, and the input strings - * are quick-checked for FCD. - * - * - UNORM_INPUT_IS_FCD - * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. - * If not set, the function will quickCheck for FCD - * and normalize if necessary. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_COMPARE_IGNORE_CASE - * Set to compare strings case-insensitively using case folding, - * instead of case-sensitively. - * If set, then the following case folding options are used. - * - * - Options as used with case-insensitive comparisons, currently: - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * (see u_strCaseCompare for details) - * - * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT - * - * @param errorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return <0 or 0 or >0 as usual for string comparisons - * - * @see unorm_compare - * @see normalize - * @see UNORM_FCD - * @see u_strCompare - * @see u_strCaseCompare - * - * @stable ICU 2.2 - */ - static inline int32_t - compare(const UnicodeString &s1, const UnicodeString &s2, - uint32_t options, - UErrorCode &errorCode); - - //------------------------------------------------------------------------- - // Iteration API - //------------------------------------------------------------------------- - - /** - * Return the current character in the normalized text. - * current() may need to normalize some text at getIndex(). - * The getIndex() is not changed. - * - * @return the current normalized code point - * @stable ICU 2.0 - */ - UChar32 current(void); - - /** - * Return the first character in the normalized text. - * This is equivalent to setIndexOnly(startIndex()) followed by next(). - * (Post-increment semantics.) - * - * @return the first normalized code point - * @stable ICU 2.0 - */ - UChar32 first(void); - - /** - * Return the last character in the normalized text. - * This is equivalent to setIndexOnly(endIndex()) followed by previous(). - * (Pre-decrement semantics.) - * - * @return the last normalized code point - * @stable ICU 2.0 - */ - UChar32 last(void); - - /** - * Return the next character in the normalized text. - * (Post-increment semantics.) - * If the end of the text has already been reached, DONE is returned. - * The DONE value could be confused with a U+FFFF non-character code point - * in the text. If this is possible, you can test getIndex()startIndex() || first()!=DONE). (Calling first() will change - * the iterator state!) - * - * The C API unorm_previous() is more efficient and does not have this ambiguity. - * - * @return the previous normalized code point - * @stable ICU 2.0 - */ - UChar32 previous(void); - - /** - * Set the iteration position in the input text that is being normalized, - * without any immediate normalization. - * After setIndexOnly(), getIndex() will return the same index that is - * specified here. - * - * @param index the desired index in the input text. - * @stable ICU 2.0 - */ - void setIndexOnly(int32_t index); - - /** - * Reset the index to the beginning of the text. - * This is equivalent to setIndexOnly(startIndex)). - * @stable ICU 2.0 - */ - void reset(void); - - /** - * Retrieve the current iteration position in the input text that is - * being normalized. - * - * A following call to next() will return a normalized code point from - * the input text at or after this index. - * - * After a call to previous(), getIndex() will point at or before the - * position in the input text where the normalized code point - * was returned from with previous(). - * - * @return the current index in the input text - * @stable ICU 2.0 - */ - int32_t getIndex(void) const; - - /** - * Retrieve the index of the start of the input text. This is the begin index - * of the CharacterIterator or the start (i.e. index 0) of the string - * over which this Normalizer is iterating. - * - * @return the smallest index in the input text where the Normalizer operates - * @stable ICU 2.0 - */ - int32_t startIndex(void) const; - - /** - * Retrieve the index of the end of the input text. This is the end index - * of the CharacterIterator or the length of the string - * over which this Normalizer is iterating. - * This end index is exclusive, i.e., the Normalizer operates only on characters - * before this index. - * - * @return the first index in the input text where the Normalizer does not operate - * @stable ICU 2.0 - */ - int32_t endIndex(void) const; - - /** - * Returns TRUE when both iterators refer to the same character in the same - * input text. - * - * @param that a Normalizer object to compare this one to - * @return comparison result - * @stable ICU 2.0 - */ - UBool operator==(const Normalizer& that) const; - - /** - * Returns FALSE when both iterators refer to the same character in the same - * input text. - * - * @param that a Normalizer object to compare this one to - * @return comparison result - * @stable ICU 2.0 - */ - inline UBool operator!=(const Normalizer& that) const; - - /** - * Returns a pointer to a new Normalizer that is a clone of this one. - * The caller is responsible for deleting the new clone. - * @return a pointer to a new Normalizer - * @stable ICU 2.0 - */ - Normalizer* clone(void) const; - - /** - * Generates a hash code for this iterator. - * - * @return the hash code - * @stable ICU 2.0 - */ - int32_t hashCode(void) const; - - //------------------------------------------------------------------------- - // Property access methods - //------------------------------------------------------------------------- - - /** - * Set the normalization mode for this object. - *

- * Note:If the normalization mode is changed while iterating - * over a string, calls to {@link #next() } and {@link #previous() } may - * return previously buffers characters in the old normalization mode - * until the iteration is able to re-sync at the next base character. - * It is safest to call {@link #setIndexOnly }, {@link #reset() }, - * {@link #setText }, {@link #first() }, - * {@link #last() }, etc. after calling setMode. - *

- * @param newMode the new mode for this Normalizer. - * @see #getUMode - * @stable ICU 2.0 - */ - void setMode(UNormalizationMode newMode); - - /** - * Return the normalization mode for this object. - * - * This is an unusual name because there used to be a getMode() that - * returned a different type. - * - * @return the mode for this Normalizer - * @see #setMode - * @stable ICU 2.0 - */ - UNormalizationMode getUMode(void) const; - - /** - * Set options that affect this Normalizer's operation. - * Options do not change the basic composition or decomposition operation - * that is being performed, but they control whether - * certain optional portions of the operation are done. - * Currently the only available option is obsolete. - * - * It is possible to specify multiple options that are all turned on or off. - * - * @param option the option(s) whose value is/are to be set. - * @param value the new setting for the option. Use TRUE to - * turn the option(s) on and FALSE to turn it/them off. - * - * @see #getOption - * @stable ICU 2.0 - */ - void setOption(int32_t option, - UBool value); - - /** - * Determine whether an option is turned on or off. - * If multiple options are specified, then the result is TRUE if any - * of them are set. - *

- * @param option the option(s) that are to be checked - * @return TRUE if any of the option(s) are set - * @see #setOption - * @stable ICU 2.0 - */ - UBool getOption(int32_t option) const; - - /** - * Set the input text over which this Normalizer will iterate. - * The iteration position is set to the beginning. - * - * @param newText a string that replaces the current input text - * @param status a UErrorCode - * @stable ICU 2.0 - */ - void setText(const UnicodeString& newText, - UErrorCode &status); - - /** - * Set the input text over which this Normalizer will iterate. - * The iteration position is set to the beginning. - * - * @param newText a CharacterIterator object that replaces the current input text - * @param status a UErrorCode - * @stable ICU 2.0 - */ - void setText(const CharacterIterator& newText, - UErrorCode &status); - - /** - * Set the input text over which this Normalizer will iterate. - * The iteration position is set to the beginning. - * - * @param newText a string that replaces the current input text - * @param length the length of the string, or -1 if NUL-terminated - * @param status a UErrorCode - * @stable ICU 2.0 - */ - void setText(const UChar* newText, - int32_t length, - UErrorCode &status); - /** - * Copies the input text into the UnicodeString argument. - * - * @param result Receives a copy of the text under iteration. - * @stable ICU 2.0 - */ - void getText(UnicodeString& result); - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * @returns a UClassID for this class. - * @stable ICU 2.2 - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * @return a UClassID for the actual class. - * @stable ICU 2.2 - */ - virtual UClassID getDynamicClassID() const; - -private: - //------------------------------------------------------------------------- - // Private functions - //------------------------------------------------------------------------- - - Normalizer(); // default constructor not implemented - Normalizer &operator=(const Normalizer &that); // assignment operator not implemented - - // Private utility methods for iteration - // For documentation, see the source code - UBool nextNormalize(); - UBool previousNormalize(); - - void init(CharacterIterator *iter); - void clearBuffer(void); - - //------------------------------------------------------------------------- - // Private data - //------------------------------------------------------------------------- - - UNormalizationMode fUMode; - int32_t fOptions; - - // The input text and our position in it - UCharIterator *text; - - // The normalization buffer is the result of normalization - // of the source in [currentIndex..nextIndex[ . - int32_t currentIndex, nextIndex; - - // A buffer for holding intermediate results - UnicodeString buffer; - int32_t bufferPos; - -}; - -//------------------------------------------------------------------------- -// Inline implementations -//------------------------------------------------------------------------- - -inline UBool -Normalizer::operator!= (const Normalizer& other) const -{ return ! operator==(other); } - -inline UNormalizationCheckResult -Normalizer::quickCheck(const UnicodeString& source, - UNormalizationMode mode, - UErrorCode &status) { - if(U_FAILURE(status)) { - return UNORM_MAYBE; - } - - return unorm_quickCheck(source.getBuffer(), source.length(), - mode, &status); -} - -inline UNormalizationCheckResult -Normalizer::quickCheck(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UErrorCode &status) { - if(U_FAILURE(status)) { - return UNORM_MAYBE; - } - - return unorm_quickCheckWithOptions(source.getBuffer(), source.length(), - mode, options, &status); -} - -inline UBool -Normalizer::isNormalized(const UnicodeString& source, - UNormalizationMode mode, - UErrorCode &status) { - if(U_FAILURE(status)) { - return FALSE; - } - - return unorm_isNormalized(source.getBuffer(), source.length(), - mode, &status); -} - -inline UBool -Normalizer::isNormalized(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UErrorCode &status) { - if(U_FAILURE(status)) { - return FALSE; - } - - return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(), - mode, options, &status); -} - -inline int32_t -Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, - uint32_t options, - UErrorCode &errorCode) { - // all argument checking is done in unorm_compare - return unorm_compare(s1.getBuffer(), s1.length(), - s2.getBuffer(), s2.length(), - options, - &errorCode); -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ - -#endif // NORMLZR_H