couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dav...@apache.org
Subject [28/46] support static build
Date Thu, 06 Feb 2014 17:28:10 GMT
http://git-wip-us.apache.org/repos/asf/couchdb-couch-collate/blob/32ffa429/platform/osx/icu/unicode/stsearch.h
----------------------------------------------------------------------
diff --git a/platform/osx/icu/unicode/stsearch.h b/platform/osx/icu/unicode/stsearch.h
new file mode 100644
index 0000000..8499752
--- /dev/null
+++ b/platform/osx/icu/unicode/stsearch.h
@@ -0,0 +1,518 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+
+#ifndef STSEARCH_H
+#define STSEARCH_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file 
+ * \brief C++ API: Service for searching text based on RuleBasedCollator.
+ */
+ 
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/tblcoll.h"
+#include "unicode/coleitr.h"
+#include "unicode/search.h"
+
+U_NAMESPACE_BEGIN
+
+/** 
+ *
+ * <tt>StringSearch</tt> is a <tt>SearchIterator</tt> that provides
+ * language-sensitive text searching based on the comparison rules defined
+ * in a {@link RuleBasedCollator} object.
+ * StringSearch ensures that language eccentricity can be 
+ * handled, e.g. for the German collator, characters &szlig; and SS will be matched 
+ * if case is chosen to be ignored.
+ * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
+ * "ICU Collation Design Document"</a> for more information.
+ * <p> 
+ * The algorithm implemented is a modified form of the Boyer Moore's search.
+ * For more information  see 
+ * <a href="http://icu-project.org/docs/papers/efficient_text_searching_in_java.html">
+ * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i> 
+ * in February, 1999, for further information on the algorithm.
+ * <p>
+ * There are 2 match options for selection:<br>
+ * Let S' be the sub-string of a text string S between the offsets start and 
+ * end <start, end>.
+ * <br>
+ * A pattern string P matches a text string S at the offsets <start, end> 
+ * if
+ * <pre> 
+ * option 1. Some canonical equivalent of P matches some canonical equivalent 
+ *           of S'
+ * option 2. P matches S' and if P starts or ends with a combining mark, 
+ *           there exists no non-ignorable combining mark before or after S? 
+ *           in S respectively. 
+ * </pre>
+ * Option 2. will be the default.
+ * <p>
+ * This search has APIs similar to that of other text iteration mechanisms 
+ * such as the break iterators in <tt>BreakIterator</tt>. Using these 
+ * APIs, it is easy to scan through text looking for all occurances of 
+ * a given pattern. This search iterator allows changing of direction by 
+ * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>. 
+ * Though a direction change can occur without calling <tt>reset</tt> first,  
+ * this operation comes with some speed penalty.
+ * Match results in the forward direction will match the result matches in 
+ * the backwards direction in the reverse order
+ * <p>
+ * <tt>SearchIterator</tt> provides APIs to specify the starting position 
+ * within the text string to be searched, e.g. <tt>setOffset</tt>,
+ * <tt>preceding</tt> and <tt>following</tt>. Since the 
+ * starting position will be set as it is specified, please take note that 
+ * there are some danger points which the search may render incorrect 
+ * results:
+ * <ul>
+ * <li> The midst of a substring that requires normalization.
+ * <li> If the following match is to be found, the position should not be the
+ *      second character which requires to be swapped with the preceding 
+ *      character. Vice versa, if the preceding match is to be found, 
+ *      position to search from should not be the first character which 
+ *      requires to be swapped with the next character. E.g certain Thai and
+ *      Lao characters require swapping.
+ * <li> If a following pattern match is to be found, any position within a 
+ *      contracting sequence except the first will fail. Vice versa if a 
+ *      preceding pattern match is to be found, a invalid starting point 
+ *      would be any character within a contracting sequence except the last.
+ * </ul>
+ * <p>
+ * A breakiterator can be used if only matches at logical breaks are desired.
+ * Using a breakiterator will only give you results that exactly matches the
+ * boundaries given by the breakiterator. For instance the pattern "e" will
+ * not be found in the string "\u00e9" if a character break iterator is used.
+ * <p>
+ * Options are provided to handle overlapping matches. 
+ * E.g. In English, overlapping matches produces the result 0 and 2 
+ * for the pattern "abab" in the text "ababab", where else mutually 
+ * exclusive matches only produce the result of 0.
+ * <p>
+ * Though collator attributes will be taken into consideration while 
+ * performing matches, there are no APIs here for setting and getting the 
+ * attributes. These attributes can be set by getting the collator
+ * from <tt>getCollator</tt> and using the APIs in <tt>coll.h</tt>.
+ * Lastly to update StringSearch to the new collator attributes, 
+ * reset() has to be called.
+ * <p> 
+ * Restriction: <br>
+ * Currently there are no composite characters that consists of a
+ * character with combining class > 0 before a character with combining 
+ * class == 0. However, if such a character exists in the future,  
+ * StringSearch does not guarantee the results for option 1.
+ * <p>
+ * Consult the <tt>SearchIterator</tt> documentation for information on
+ * and examples of how to use instances of this class to implement text
+ * searching.
+ * <pre><code>
+ * UnicodeString target("The quick brown fox jumps over the lazy dog.");
+ * UnicodeString pattern("fox");
+ *
+ * UErrorCode      error = U_ZERO_ERROR;
+ * StringSearch iter(pattern, target, Locale::getUS(), NULL, status);
+ * for (int pos = iter.first(error);
+ *      pos != USEARCH_DONE; 
+ *      pos = iter.next(error))
+ * {
+ *     printf("Found match at %d pos, length is %d\n", pos, 
+ *                                             iter.getMatchLength());
+ * }
+ * </code></pre>
+ * <p>
+ * Note, StringSearch is not to be subclassed.
+ * </p>
+ * @see SearchIterator
+ * @see RuleBasedCollator
+ * @since ICU 2.0
+ */
+
+class U_I18N_API StringSearch : public SearchIterator
+{
+public:
+
+    // public constructors and destructors --------------------------------
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument locale 
+     * language rule set. A collator will be created in the process, which 
+     * will be owned by this instance and will be deleted during 
+     * destruction
+     * @param pattern The text for which this object will search.
+     * @param text    The text in which to search for the pattern.
+     * @param locale  A locale which defines the language-sensitive 
+     *                comparison rules used to determine whether text in the 
+     *                pattern and target matches. 
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status  for errors if any. If pattern or text is NULL, or if
+     *               either the length of pattern or text is 0 then an 
+     *               U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    StringSearch(const UnicodeString &pattern, const UnicodeString &text,
+                 const Locale        &locale,       
+                       BreakIterator *breakiter,
+                       UErrorCode    &status);
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument collator 
+     * language rule set. Note, user retains the ownership of this collator, 
+     * it does not get destroyed during this instance's destruction.
+     * @param pattern The text for which this object will search.
+     * @param text    The text in which to search for the pattern.
+     * @param coll    A <tt>RuleBasedCollator</tt> object which defines 
+     *                the language-sensitive comparison rules used to 
+     *                determine whether text in the pattern and target 
+     *                matches. User is responsible for the clearing of this
+     *                object.
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status for errors if any. If either the length of pattern or 
+     *               text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    StringSearch(const UnicodeString     &pattern, 
+                 const UnicodeString     &text,
+                       RuleBasedCollator *coll,       
+                       BreakIterator     *breakiter,
+                       UErrorCode        &status);
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument locale 
+     * language rule set. A collator will be created in the process, which 
+     * will be owned by this instance and will be deleted during 
+     * destruction
+     * <p>
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param pattern The text for which this object will search.
+     * @param text    The text iterator in which to search for the pattern.
+     * @param locale  A locale which defines the language-sensitive 
+     *                comparison rules used to determine whether text in the 
+     *                pattern and target matches. User is responsible for 
+     *                the clearing of this object.
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status for errors if any. If either the length of pattern or 
+     *               text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    StringSearch(const UnicodeString &pattern, CharacterIterator &text,
+                 const Locale        &locale, 
+                       BreakIterator *breakiter,
+                       UErrorCode    &status);
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument collator 
+     * language rule set. Note, user retains the ownership of this collator, 
+     * it does not get destroyed during this instance's destruction.
+     * <p>
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param pattern The text for which this object will search.
+     * @param text    The text in which to search for the pattern.
+     * @param coll    A <tt>RuleBasedCollator</tt> object which defines 
+     *                the language-sensitive comparison rules used to 
+     *                determine whether text in the pattern and target 
+     *                matches. User is responsible for the clearing of this
+     *                object.
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status for errors if any. If either the length of pattern or 
+     *               text is 0 then an U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    StringSearch(const UnicodeString     &pattern, CharacterIterator &text,
+                       RuleBasedCollator *coll, 
+                       BreakIterator     *breakiter,
+                       UErrorCode        &status);
+
+    /**
+     * Copy constructor that creates a StringSearch instance with the same 
+     * behavior, and iterating over the same text.
+     * @param that StringSearch instance to be copied.
+     * @stable ICU 2.0
+     */
+    StringSearch(const StringSearch &that);
+
+    /**
+    * Destructor. Cleans up the search iterator data struct.
+    * If a collator is created in the constructor, it will be destroyed here.
+    * @stable ICU 2.0
+    */
+    virtual ~StringSearch(void);
+
+    /**
+     * Clone this object.
+     * Clones can be used concurrently in multiple threads.
+     * If an error occurs, then NULL is returned.
+     * The caller must delete the clone.
+     *
+     * @return a clone of this object
+     *
+     * @see getDynamicClassID
+     * @stable ICU 2.8
+     */
+    StringSearch *clone() const;
+
+    // operator overloading ---------------------------------------------
+
+    /**
+     * Assignment operator. Sets this iterator to have the same behavior,
+     * and iterate over the same text, as the one passed in.
+     * @param that instance to be copied.
+     * @stable ICU 2.0
+     */
+    StringSearch & operator=(const StringSearch &that);
+
+    /**
+     * Equality operator. 
+     * @param that instance to be compared.
+     * @return TRUE if both instances have the same attributes, 
+     *         breakiterators, collators and iterate over the same text 
+     *         while looking for the same pattern.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const SearchIterator &that) const;
+
+    // public get and set methods ----------------------------------------
+
+    /**
+     * Sets the index to point to the given position, and clears any state 
+     * that's affected.
+     * <p>
+     * This method takes the argument index and sets the position in the text 
+     * string accordingly without checking if the index is pointing to a 
+     * valid starting point to begin searching. 
+     * @param position within the text to be set. If position is less
+     *          than or greater than the text range for searching, 
+     *          an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+     * @param status for errors if it occurs
+     * @stable ICU 2.0
+     */
+    virtual void setOffset(int32_t position, UErrorCode &status);
+
+    /**
+     * Return the current index in the text being searched.
+     * If the iteration has gone past the end of the text
+     * (or past the beginning for a backwards search), USEARCH_DONE
+     * is returned.
+     * @return current index in the text being searched.
+     * @stable ICU 2.0
+     */
+    virtual int32_t getOffset(void) const;
+
+    /**
+     * Set the target text to be searched.
+     * Text iteration will hence begin at the start of the text string. 
+     * This method is 
+     * useful if you want to re-use an iterator to search for the same 
+     * pattern within a different body of text.
+     * @param text text string to be searched
+     * @param status for errors if any. If the text length is 0 then an 
+     *        U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    virtual void setText(const UnicodeString &text, UErrorCode &status);
+    
+    /**
+     * Set the target text to be searched.
+     * Text iteration will hence begin at the start of the text string. 
+     * This method is 
+     * useful if you want to re-use an iterator to search for the same 
+     * pattern within a different body of text.
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param text text string to be searched
+     * @param status for errors if any. If the text length is 0 then an 
+     *        U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    virtual void setText(CharacterIterator &text, UErrorCode &status);
+
+    /**
+     * Gets the collator used for the language rules.
+     * <p>
+     * Caller may modify but <b>must not</b> delete the <tt>RuleBasedCollator</tt>!
+     * Modifications to this collator will affect the original collator passed in to 
+     * the <tt>StringSearch></tt> constructor or to setCollator, if any.
+     * @return collator used for string search
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator * getCollator() const;
+    
+    /**
+     * Sets the collator used for the language rules. User retains the 
+     * ownership of this collator, thus the responsibility of deletion lies 
+     * with the user. This method causes internal data such as Boyer-Moore 
+     * shift tables to be recalculated, but the iterator's position is 
+     * unchanged.
+     * @param coll    collator 
+     * @param status  for errors if any
+     * @stable ICU 2.0
+     */
+    void setCollator(RuleBasedCollator *coll, UErrorCode &status);
+    
+    /**
+     * Sets the pattern used for matching.
+     * Internal data like the Boyer Moore table will be recalculated, but 
+     * the iterator's position is unchanged.
+     * @param pattern search pattern to be found
+     * @param status for errors if any. If the pattern length is 0 then an 
+     *               U_ILLEGAL_ARGUMENT_ERROR is returned.
+     * @stable ICU 2.0
+     */
+    void setPattern(const UnicodeString &pattern, UErrorCode &status);
+    
+    /**
+     * Gets the search pattern.
+     * @return pattern used for matching
+     * @stable ICU 2.0
+     */
+    const UnicodeString & getPattern() const;
+
+    // public methods ----------------------------------------------------
+
+    /** 
+     * Reset the iteration.
+     * Search will begin at the start of the text string if a forward 
+     * iteration is initiated before a backwards iteration. Otherwise if 
+     * a backwards iteration is initiated before a forwards iteration, the 
+     * search will begin at the end of the text string.
+     * @stable ICU 2.0
+     */
+    virtual void reset();
+
+    /**
+     * Returns a copy of StringSearch with the same behavior, and 
+     * iterating over the same text, as this one. Note that all data will be
+     * replicated, except for the user-specified collator and the
+     * breakiterator.
+     * @return cloned object
+     * @stable ICU 2.0
+     */
+    virtual SearchIterator * safeClone(void) const;
+    
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @stable ICU 2.2
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @stable ICU 2.2
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+protected:
+
+    // protected method -------------------------------------------------
+
+    /**
+     * Search forward for matching text, starting at a given location.
+     * Clients should not call this method directly; instead they should 
+     * call {@link SearchIterator#next }.
+     * <p>
+     * If a match is found, this method returns the index at which the match
+     * starts and calls {@link SearchIterator#setMatchLength } with the number 
+     * of characters in the target text that make up the match. If no match 
+     * is found, the method returns <tt>USEARCH_DONE</tt>.
+     * <p>
+     * The <tt>StringSearch</tt> is adjusted so that its current index 
+     * (as returned by {@link #getOffset }) is the match position if one was 
+     * found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
+     * @param position The index in the target text at which the search 
+     *                 starts
+     * @param status for errors if any occurs
+     * @return The index at which the matched text in the target starts, or 
+     *         USEARCH_DONE if no match was found.
+     * @stable ICU 2.0
+     */
+    virtual int32_t handleNext(int32_t position, UErrorCode &status);
+
+    /**
+     * Search backward for matching text, starting at a given location.
+     * Clients should not call this method directly; instead they should call
+     * <tt>SearchIterator.previous()</tt>, which this method overrides.
+     * <p>
+     * If a match is found, this method returns the index at which the match
+     * starts and calls {@link SearchIterator#setMatchLength } with the number 
+     * of characters in the target text that make up the match. If no match 
+     * is found, the method returns <tt>USEARCH_DONE</tt>.
+     * <p>
+     * The <tt>StringSearch</tt> is adjusted so that its current index 
+     * (as returned by {@link #getOffset }) is the match position if one was 
+     * found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
+     * @param position The index in the target text at which the search 
+     *                 starts.
+     * @param status for errors if any occurs
+     * @return The index at which the matched text in the target starts, or 
+     *         USEARCH_DONE if no match was found.
+     * @stable ICU 2.0
+     */
+    virtual int32_t handlePrev(int32_t position, UErrorCode &status);
+    
+private :
+    StringSearch(); // default constructor not implemented
+
+    // private data members ----------------------------------------------
+
+    /**
+    * RuleBasedCollator, contains exactly the same UCollator * in m_strsrch_
+    * @stable ICU 2.0
+    */
+    RuleBasedCollator  m_collator_;
+    /**
+    * Pattern text
+    * @stable ICU 2.0
+    */
+    UnicodeString      m_pattern_;
+    /**
+    * String search struct data
+    * @stable ICU 2.0
+    */
+    UStringSearch     *m_strsrch_;
+
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
+

http://git-wip-us.apache.org/repos/asf/couchdb-couch-collate/blob/32ffa429/platform/osx/icu/unicode/symtable.h
----------------------------------------------------------------------
diff --git a/platform/osx/icu/unicode/symtable.h b/platform/osx/icu/unicode/symtable.h
new file mode 100644
index 0000000..428f8bf
--- /dev/null
+++ b/platform/osx/icu/unicode/symtable.h
@@ -0,0 +1,112 @@
+/*
+**********************************************************************
+*   Copyright (c) 2000-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   02/04/00    aliu        Creation.
+**********************************************************************
+*/
+#ifndef SYMTABLE_H
+#define SYMTABLE_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file 
+ * \brief C++ API: An interface that defines both lookup protocol and parsing of
+ * symbolic names.
+ */
+ 
+U_NAMESPACE_BEGIN
+
+class ParsePosition;
+class UnicodeFunctor;
+class UnicodeSet;
+class UnicodeString;
+
+/**
+ * An interface that defines both lookup protocol and parsing of
+ * symbolic names.
+ *
+ * <p>A symbol table maintains two kinds of mappings.  The first is
+ * between symbolic names and their values.  For example, if the
+ * variable with the name "start" is set to the value "alpha"
+ * (perhaps, though not necessarily, through an expression such as
+ * "$start=alpha"), then the call lookup("start") will return the
+ * char[] array ['a', 'l', 'p', 'h', 'a'].
+ *
+ * <p>The second kind of mapping is between character values and
+ * UnicodeMatcher objects.  This is used by RuleBasedTransliterator,
+ * which uses characters in the private use area to represent objects
+ * such as UnicodeSets.  If U+E015 is mapped to the UnicodeSet [a-z],
+ * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
+ *
+ * <p>Finally, a symbol table defines parsing behavior for symbolic
+ * names.  All symbolic names start with the SYMBOL_REF character.
+ * When a parser encounters this character, it calls parseReference()
+ * with the position immediately following the SYMBOL_REF.  The symbol
+ * table parses the name, if there is one, and returns it.
+ *
+ * @stable ICU 2.8
+ */
+class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
+public:
+
+    /**
+     * The character preceding a symbol reference name.
+     * @stable ICU 2.8
+     */
+    enum { SYMBOL_REF = 0x0024 /*$*/ };
+
+    /**
+     * Destructor.
+     * @stable ICU 2.8
+     */
+    virtual ~SymbolTable();
+
+    /**
+     * Lookup the characters associated with this string and return it.
+     * Return <tt>NULL</tt> if no such name exists.  The resultant
+     * string may have length zero.
+     * @param s the symbolic name to lookup
+     * @return a string containing the name's value, or <tt>NULL</tt> if
+     * there is no mapping for s.
+     * @stable ICU 2.8
+     */
+    virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
+
+    /**
+     * Lookup the UnicodeMatcher associated with the given character, and
+     * return it.  Return <tt>NULL</tt> if not found.
+     * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
+     * @return the UnicodeMatcher object represented by the given
+     * character, or NULL if there is no mapping for ch.
+     * @stable ICU 2.8
+     */
+    virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
+
+    /**
+     * Parse a symbol reference name from the given string, starting
+     * at the given position.  If no valid symbol reference name is
+     * found, return the empty string and leave pos unchanged.  That is, if the
+     * character at pos cannot start a name, or if pos is at or after
+     * text.length(), then return an empty string.  This indicates an
+     * isolated SYMBOL_REF character.
+     * @param text the text to parse for the name
+     * @param pos on entry, the index of the first character to parse.
+     * This is the character following the SYMBOL_REF character.  On
+     * exit, the index after the last parsed character.  If the parse
+     * failed, pos is unchanged on exit.
+     * @param limit the index after the last character to be parsed.
+     * @return the parsed name, or an empty string if there is no
+     * valid symbolic name at the given position.
+     * @stable ICU 2.8
+     */
+    virtual UnicodeString parseReference(const UnicodeString& text,
+                                         ParsePosition& pos, int32_t limit) const = 0;
+};
+U_NAMESPACE_END
+
+#endif

http://git-wip-us.apache.org/repos/asf/couchdb-couch-collate/blob/32ffa429/platform/osx/icu/unicode/tblcoll.h
----------------------------------------------------------------------
diff --git a/platform/osx/icu/unicode/tblcoll.h b/platform/osx/icu/unicode/tblcoll.h
new file mode 100644
index 0000000..2fdd63b
--- /dev/null
+++ b/platform/osx/icu/unicode/tblcoll.h
@@ -0,0 +1,926 @@
+/*
+******************************************************************************
+* Copyright (C) 1996-2008, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*/
+
+/**
+ * \file 
+ * \brief C++ API: RuleBasedCollator class provides the simple implementation of Collator.
+ */
+
+/**
+* File tblcoll.h
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+*  Date        Name        Description
+*  2/5/97      aliu        Added streamIn and streamOut methods.  Added
+*                          constructor which reads RuleBasedCollator object from
+*                          a binary file.  Added writeToFile method which streams
+*                          RuleBasedCollator out to a binary file.  The streamIn
+*                          and streamOut methods use istream and ostream objects
+*                          in binary mode.
+*  2/12/97     aliu        Modified to use TableCollationData sub-object to
+*                          hold invariant data.
+*  2/13/97     aliu        Moved several methods into this class from Collation.
+*                          Added a private RuleBasedCollator(Locale&) constructor,
+*                          to be used by Collator::createDefault().  General
+*                          clean up.
+*  2/20/97     helena      Added clone, operator==, operator!=, operator=, and copy
+*                          constructor and getDynamicClassID.
+*  3/5/97      aliu        Modified constructFromFile() to add parameter
+*                          specifying whether or not binary loading is to be
+*                          attempted.  This is required for dynamic rule loading.
+* 05/07/97     helena      Added memory allocation error detection.
+*  6/17/97     helena      Added IDENTICAL strength for compare, changed getRules to
+*                          use MergeCollation::getPattern.
+*  6/20/97     helena      Java class name change.
+*  8/18/97     helena      Added internal API documentation.
+* 09/03/97     helena      Added createCollationKeyValues().
+* 02/10/98     damiba      Added compare with "length" parameter
+* 08/05/98     erm         Synched with 1.2 version of RuleBasedCollator.java
+* 04/23/99     stephen     Removed EDecompositionMode, merged with
+*                          Normalizer::EMode
+* 06/14/99     stephen     Removed kResourceBundleSuffix
+* 11/02/99     helena      Collator performance enhancements.  Eliminates the
+*                          UnicodeString construction and special case for NO_OP.
+* 11/23/99     srl         More performance enhancements. Updates to NormalizerIterator
+*                          internal state management.
+* 12/15/99     aliu        Update to support Thai collation.  Move NormalizerIterator
+*                          to implementation file.
+* 01/29/01     synwee      Modified into a C++ wrapper which calls C API
+*                          (ucol.h)
+*/
+
+#ifndef TBLCOLL_H
+#define TBLCOLL_H
+
+#include "unicode/utypes.h"
+
+ 
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/coll.h"
+#include "unicode/ucol.h"
+#include "unicode/sortkey.h"
+#include "unicode/normlzr.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+* @stable ICU 2.0
+*/
+class StringSearch;
+/**
+* @stable ICU 2.0
+*/
+class CollationElementIterator;
+
+/**
+ * The RuleBasedCollator class provides the simple implementation of
+ * Collator, using data-driven tables. The user can create a customized
+ * table-based collation.
+ * <P>
+ * <em>Important: </em>The ICU collation service has been reimplemented 
+ * in order to achieve better performance and UCA compliance. 
+ * For details, see the 
+ * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
+ * collation design document</a>.
+ * <p>
+ * RuleBasedCollator is a thin C++ wrapper over the C implementation.
+ * <p>
+ * For more information about the collation service see 
+ * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>.
+ * <p>
+ * Collation service provides correct sorting orders for most locales supported in ICU. 
+ * If specific data for a locale is not available, the orders eventually falls back
+ * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 
+ * <p>
+ * Sort ordering may be customized by providing your own set of rules. For more on
+ * this subject see the <a href="http://icu-project.org/userguide/Collate_Customization.html">
+ * Collation customization</a> section of the users guide.
+ * <p>
+ * Note, RuleBasedCollator is not to be subclassed.
+ * @see        Collator
+ * @version    2.0 11/15/2001
+ */
+class U_I18N_API RuleBasedCollator : public Collator
+{
+public:
+
+  // constructor -------------------------------------------------------------
+
+    /**
+     * RuleBasedCollator constructor. This takes the table rules and builds a
+     * collation table out of them. Please see RuleBasedCollator class
+     * description for more details on the collation rule syntax.
+     * @param rules the collation rules to build the collation table from.
+     * @param status reporting a success or an error.
+     * @see Locale
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator(const UnicodeString& rules, UErrorCode& status);
+
+    /**
+     * RuleBasedCollator constructor. This takes the table rules and builds a
+     * collation table out of them. Please see RuleBasedCollator class
+     * description for more details on the collation rule syntax.
+     * @param rules the collation rules to build the collation table from.
+     * @param collationStrength default strength for comparison
+     * @param status reporting a success or an error.
+     * @see Locale
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator(const UnicodeString& rules,
+                       ECollationStrength collationStrength,
+                       UErrorCode& status);
+
+    /**
+     * RuleBasedCollator constructor. This takes the table rules and builds a
+     * collation table out of them. Please see RuleBasedCollator class
+     * description for more details on the collation rule syntax.
+     * @param rules the collation rules to build the collation table from.
+     * @param decompositionMode the normalisation mode
+     * @param status reporting a success or an error.
+     * @see Locale
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator(const UnicodeString& rules,
+                    UColAttributeValue decompositionMode,
+                    UErrorCode& status);
+
+    /**
+     * RuleBasedCollator constructor. This takes the table rules and builds a
+     * collation table out of them. Please see RuleBasedCollator class
+     * description for more details on the collation rule syntax.
+     * @param rules the collation rules to build the collation table from.
+     * @param collationStrength default strength for comparison
+     * @param decompositionMode the normalisation mode
+     * @param status reporting a success or an error.
+     * @see Locale
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator(const UnicodeString& rules,
+                    ECollationStrength collationStrength,
+                    UColAttributeValue decompositionMode,
+                    UErrorCode& status);
+
+    /**
+     * Copy constructor.
+     * @param other the RuleBasedCollator object to be copied
+     * @see Locale
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator(const RuleBasedCollator& other);
+
+
+    /** Opens a collator from a collator binary image created using
+    *  cloneBinary. Binary image used in instantiation of the 
+    *  collator remains owned by the user and should stay around for 
+    *  the lifetime of the collator. The API also takes a base collator
+    *  which usualy should be UCA.
+    *  @param bin binary image owned by the user and required through the
+    *             lifetime of the collator
+    *  @param length size of the image. If negative, the API will try to
+    *                figure out the length of the image
+    *  @param base fallback collator, usually UCA. Base is required to be
+    *              present through the lifetime of the collator. Currently 
+    *              it cannot be NULL.
+    *  @param status for catching errors
+    *  @return newly created collator
+    *  @see cloneBinary
+    *  @stable ICU 3.4
+    */
+    RuleBasedCollator(const uint8_t *bin, int32_t length, 
+                    const RuleBasedCollator *base, 
+                    UErrorCode &status);
+    // destructor --------------------------------------------------------------
+
+    /**
+     * Destructor.
+     * @stable ICU 2.0
+     */
+    virtual ~RuleBasedCollator();
+
+    // public methods ----------------------------------------------------------
+
+    /**
+     * Assignment operator.
+     * @param other other RuleBasedCollator object to compare with.
+     * @stable ICU 2.0
+     */
+    RuleBasedCollator& operator=(const RuleBasedCollator& other);
+
+    /**
+     * Returns true if argument is the same as this object.
+     * @param other Collator object to be compared.
+     * @return true if arguments is the same as this object.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator==(const Collator& other) const;
+
+    /**
+     * Returns true if argument is not the same as this object.
+     * @param other Collator object to be compared
+     * @return returns true if argument is not the same as this object.
+     * @stable ICU 2.0
+     */
+    virtual UBool operator!=(const Collator& other) const;
+
+    /**
+     * Makes a deep copy of the object.
+     * The caller owns the returned object.
+     * @return the cloned object.
+     * @stable ICU 2.0
+     */
+    virtual Collator* clone(void) const;
+
+    /**
+     * Creates a collation element iterator for the source string. The caller of
+     * this method is responsible for the memory management of the return
+     * pointer.
+     * @param source the string over which the CollationElementIterator will
+     *        iterate.
+     * @return the collation element iterator of the source string using this as
+     *         the based Collator.
+     * @stable ICU 2.2
+     */
+    virtual CollationElementIterator* createCollationElementIterator(
+                                           const UnicodeString& source) const;
+
+    /**
+     * Creates a collation element iterator for the source. The caller of this
+     * method is responsible for the memory management of the returned pointer.
+     * @param source the CharacterIterator which produces the characters over
+     *        which the CollationElementItgerator will iterate.
+     * @return the collation element iterator of the source using this as the
+     *         based Collator.
+     * @stable ICU 2.2
+     */
+    virtual CollationElementIterator* createCollationElementIterator(
+                                         const CharacterIterator& source) const;
+
+    /**
+     * Compares a range of character data stored in two different strings based
+     * on the collation rules. Returns information about whether a string is
+     * less than, greater than or equal to another string in a language.
+     * This can be overriden in a subclass.
+     * @param source the source string.
+     * @param target the target string to be compared with the source string.
+     * @return the comparison result. GREATER if the source string is greater
+     *         than the target string, LESS if the source is less than the
+     *         target. Otherwise, returns EQUAL.
+     * @deprecated ICU 2.6 Use overload with UErrorCode&
+     */
+    virtual EComparisonResult compare(const UnicodeString& source,
+                                      const UnicodeString& target) const;
+
+
+    /**
+    * The comparison function compares the character data stored in two
+    * different strings. Returns information about whether a string is less 
+    * than, greater than or equal to another string.
+    * @param source the source string to be compared with.
+    * @param target the string that is to be compared with the source string.
+    * @param status possible error code
+    * @return Returns an enum value. UCOL_GREATER if source is greater
+    * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
+    * than target
+    * @stable ICU 2.6
+    **/
+    virtual UCollationResult compare(const UnicodeString& source,
+                                      const UnicodeString& target,
+                                      UErrorCode &status) const;
+
+    /**
+     * Compares a range of character data stored in two different strings based
+     * on the collation rules up to the specified length. Returns information
+     * about whether a string is less than, greater than or equal to another
+     * string in a language. This can be overriden in a subclass.
+     * @param source the source string.
+     * @param target the target string to be compared with the source string.
+     * @param length compares up to the specified length
+     * @return the comparison result. GREATER if the source string is greater
+     *         than the target string, LESS if the source is less than the
+     *         target. Otherwise, returns EQUAL.
+     * @deprecated ICU 2.6 Use overload with UErrorCode&
+     */
+    virtual EComparisonResult compare(const UnicodeString& source,
+                                      const UnicodeString&  target,
+                                      int32_t length) const;
+
+    /**
+    * Does the same thing as compare but limits the comparison to a specified 
+    * length
+    * @param source the source string to be compared with.
+    * @param target the string that is to be compared with the source string.
+    * @param length the length the comparison is limited to
+    * @param status possible error code
+    * @return Returns an enum value. UCOL_GREATER if source (up to the specified 
+    *         length) is greater than target; UCOL_EQUAL if source (up to specified 
+    *         length) is equal to target; UCOL_LESS if source (up to the specified 
+    *         length) is less  than target.
+    * @stable ICU 2.6
+    */
+    virtual UCollationResult compare(const UnicodeString& source,
+                                      const UnicodeString& target,
+                                      int32_t length,
+                                      UErrorCode &status) const;
+
+    /**
+     * The comparison function compares the character data stored in two
+     * different string arrays. Returns information about whether a string array
+     * is less than, greater than or equal to another string array.
+     * <p>Example of use:
+     * <pre>
+     * .       UChar ABC[] = {0x41, 0x42, 0x43, 0};  // = "ABC"
+     * .       UChar abc[] = {0x61, 0x62, 0x63, 0};  // = "abc"
+     * .       UErrorCode status = U_ZERO_ERROR;
+     * .       Collator *myCollation =
+     * .                         Collator::createInstance(Locale::US, status);
+     * .       if (U_FAILURE(status)) return;
+     * .       myCollation->setStrength(Collator::PRIMARY);
+     * .       // result would be Collator::EQUAL ("abc" == "ABC")
+     * .       // (no primary difference between "abc" and "ABC")
+     * .       Collator::EComparisonResult result =
+     * .                             myCollation->compare(abc, 3, ABC, 3);
+     * .       myCollation->setStrength(Collator::TERTIARY);
+     * .       // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
+     * .       // (with tertiary difference between "abc" and "ABC")
+     * .       result =  myCollation->compare(abc, 3, ABC, 3);
+     * </pre>
+     * @param source the source string array to be compared with.
+     * @param sourceLength the length of the source string array. If this value
+     *        is equal to -1, the string array is null-terminated.
+     * @param target the string that is to be compared with the source string.
+     * @param targetLength the length of the target string array. If this value
+     *        is equal to -1, the string array is null-terminated.
+     * @return Returns a byte value. GREATER if source is greater than target;
+     *         EQUAL if source is equal to target; LESS if source is less than
+     *         target
+     * @deprecated ICU 2.6 Use overload with UErrorCode&
+     */
+    virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
+                                      const UChar* target, int32_t targetLength)
+                                      const;
+
+    /**
+    * The comparison function compares the character data stored in two
+    * different string arrays. Returns information about whether a string array 
+    * is less than, greater than or equal to another string array.
+    * @param source the source string array to be compared with.
+    * @param sourceLength the length of the source string array.  If this value
+    *        is equal to -1, the string array is null-terminated.
+    * @param target the string that is to be compared with the source string.
+    * @param targetLength the length of the target string array.  If this value
+    *        is equal to -1, the string array is null-terminated.
+    * @param status possible error code
+    * @return Returns an enum value. UCOL_GREATER if source is greater
+    * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
+    * than target
+    * @stable ICU 2.6
+    */
+    virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
+                                      const UChar* target, int32_t targetLength,
+                                      UErrorCode &status) const;
+
+    /**
+    * Transforms a specified region of the string into a series of characters
+    * that can be compared with CollationKey.compare. Use a CollationKey when
+    * you need to do repeated comparisions on the same string. For a single
+    * comparison the compare method will be faster.
+    * @param source the source string.
+    * @param key the transformed key of the source string.
+    * @param status the error code status.
+    * @return the transformed key.
+    * @see CollationKey
+    * @deprecated ICU 2.8 Use getSortKey(...) instead
+    */
+    virtual CollationKey& getCollationKey(const UnicodeString& source,
+                                          CollationKey& key,
+                                          UErrorCode& status) const;
+
+    /**
+    * Transforms a specified region of the string into a series of characters
+    * that can be compared with CollationKey.compare. Use a CollationKey when
+    * you need to do repeated comparisions on the same string. For a single
+    * comparison the compare method will be faster.
+    * @param source the source string.
+    * @param sourceLength the length of the source string.
+    * @param key the transformed key of the source string.
+    * @param status the error code status.
+    * @return the transformed key.
+    * @see CollationKey
+    * @deprecated ICU 2.8 Use getSortKey(...) instead
+    */
+    virtual CollationKey& getCollationKey(const UChar *source,
+                                          int32_t sourceLength,
+                                          CollationKey& key,
+                                          UErrorCode& status) const;
+
+    /**
+     * Generates the hash code for the rule-based collation object.
+     * @return the hash code.
+     * @stable ICU 2.0
+     */
+    virtual int32_t hashCode(void) const;
+
+    /**
+    * Gets the locale of the Collator
+    * @param type can be either requested, valid or actual locale. For more
+    *             information see the definition of ULocDataLocaleType in
+    *             uloc.h
+    * @param status the error code status.
+    * @return locale where the collation data lives. If the collator
+    *         was instantiated from rules, locale is empty.
+    * @deprecated ICU 2.8 likely to change in ICU 3.0, based on feedback
+    */
+    virtual const Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+    /**
+     * Gets the table-based rules for the collation object.
+     * @return returns the collation rules that the table collation object was
+     *         created from.
+     * @stable ICU 2.0
+     */
+    const UnicodeString& getRules(void) const;
+
+    /**
+     * Gets the version information for a Collator.
+     * @param info the version # information, the result will be filled in
+     * @stable ICU 2.0
+     */
+    virtual void getVersion(UVersionInfo info) const;
+
+    /**
+     * Return the maximum length of any expansion sequences that end with the
+     * specified comparison order.
+     * @param order a collation order returned by previous or next.
+     * @return maximum size of the expansion sequences ending with the collation
+     *         element or 1 if collation element does not occur at the end of
+     *         any expansion sequence
+     * @see CollationElementIterator#getMaxExpansion
+     * @stable ICU 2.0
+     */
+    int32_t getMaxExpansion(int32_t order) const;
+
+    /**
+     * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+     * method is to implement a simple version of RTTI, since not all C++
+     * compilers support genuine RTTI. Polymorphic operator==() and clone()
+     * methods call this method.
+     * @return The class ID for this object. All objects of a given class have
+     *         the same class ID. Objects of other classes have different class
+     *         IDs.
+     * @stable ICU 2.0
+     */
+    virtual UClassID getDynamicClassID(void) const;
+
+    /**
+     * Returns the class ID for this class. This is useful only for comparing to
+     * a return value from getDynamicClassID(). For example:
+     * <pre>
+     * Base* polymorphic_pointer = createPolymorphicObject();
+     * if (polymorphic_pointer->getDynamicClassID() ==
+     *                                          Derived::getStaticClassID()) ...
+     * </pre>
+     * @return The class ID for all objects of this class.
+     * @stable ICU 2.0
+     */
+    static UClassID U_EXPORT2 getStaticClassID(void);
+
+    /**
+     * Returns the binary format of the class's rules. The format is that of
+     * .col files.
+     * @param length Returns the length of the data, in bytes
+     * @param status the error code status.
+     * @return memory, owned by the caller, of size 'length' bytes.
+     * @stable ICU 2.2
+     */
+    uint8_t *cloneRuleData(int32_t &length, UErrorCode &status);
+
+
+    /** Creates a binary image of a collator. This binary image can be stored and 
+    *  later used to instantiate a collator using ucol_openBinary.
+    *  This API supports preflighting.
+    *  @param buffer a fill-in buffer to receive the binary image
+    *  @param capacity capacity of the destination buffer
+    *  @param status for catching errors
+    *  @return size of the image
+    *  @see ucol_openBinary
+    *  @stable ICU 3.4
+    */
+    int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status);
+
+    /**
+     * Returns current rules. Delta defines whether full rules are returned or
+     * just the tailoring.
+     * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
+     * @param buffer UnicodeString to store the result rules
+     * @stable ICU 2.2
+     */
+    void getRules(UColRuleOption delta, UnicodeString &buffer);
+
+    /**
+     * Universal attribute setter
+     * @param attr attribute type
+     * @param value attribute value
+     * @param status to indicate whether the operation went on smoothly or there were errors
+     * @stable ICU 2.2
+     */
+    virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
+                              UErrorCode &status);
+
+    /**
+     * Universal attribute getter.
+     * @param attr attribute type
+     * @param status to indicate whether the operation went on smoothly or there were errors
+     * @return attribute value
+     * @stable ICU 2.2
+     */
+    virtual UColAttributeValue getAttribute(UColAttribute attr,
+                                            UErrorCode &status);
+
+    /**
+     * Sets the variable top to a collation element value of a string supplied.
+     * @param varTop one or more (if contraction) UChars to which the variable top should be set
+     * @param len length of variable top string. If -1 it is considered to be zero terminated.
+     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
+     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
+     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
+     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
+     * @stable ICU 2.0
+     */
+    virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status);
+
+    /**
+     * Sets the variable top to a collation element value of a string supplied.
+     * @param varTop an UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
+     * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
+     *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such a contraction<br>
+     *    U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes
+     * @return a 32 bit value containing the value of the variable top in upper 16 bits. Lower 16 bits are undefined
+     * @stable ICU 2.0
+     */
+    virtual uint32_t setVariableTop(const UnicodeString varTop, UErrorCode &status);
+
+    /**
+     * Sets the variable top to a collation element value supplied. Variable top is set to the upper 16 bits.
+     * Lower 16 bits are ignored.
+     * @param varTop CE value, as returned by setVariableTop or ucol)getVariableTop
+     * @param status error code (not changed by function)
+     * @stable ICU 2.0
+     */
+    virtual void setVariableTop(const uint32_t varTop, UErrorCode &status);
+
+    /**
+     * Gets the variable top value of a Collator.
+     * Lower 16 bits are undefined and should be ignored.
+     * @param status error code (not changed by function). If error code is set, the return value is undefined.
+     * @stable ICU 2.0
+     */
+    virtual uint32_t getVariableTop(UErrorCode &status) const;
+
+    /**
+     * Get an UnicodeSet that contains all the characters and sequences tailored in 
+     * this collator.
+     * @param status      error code of the operation
+     * @return a pointer to a UnicodeSet object containing all the 
+     *         code points and sequences that may sort differently than
+     *         in the UCA. The object must be disposed of by using delete
+     * @stable ICU 2.4
+     */
+    virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
+
+    /**
+     * Thread safe cloning operation.
+     * @return pointer to the new clone, user should remove it.
+     * @stable ICU 2.2
+     */
+    virtual Collator* safeClone(void);
+
+    /**
+     * Get the sort key as an array of bytes from an UnicodeString.
+     * @param source string to be processed.
+     * @param result buffer to store result in. If NULL, number of bytes needed
+     *        will be returned.
+     * @param resultLength length of the result buffer. If if not enough the
+     *        buffer will be filled to capacity.
+     * @return Number of bytes needed for storing the sort key
+     * @stable ICU 2.0
+     */
+    virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
+                               int32_t resultLength) const;
+
+    /**
+     * Get the sort key as an array of bytes from an UChar buffer.
+     * @param source string to be processed.
+     * @param sourceLength length of string to be processed. If -1, the string
+     *        is 0 terminated and length will be decided by the function.
+     * @param result buffer to store result in. If NULL, number of bytes needed
+     *        will be returned.
+     * @param resultLength length of the result buffer. If if not enough the
+     *        buffer will be filled to capacity.
+     * @return Number of bytes needed for storing the sort key
+     * @stable ICU 2.2
+     */
+    virtual int32_t getSortKey(const UChar *source, int32_t sourceLength,
+                               uint8_t *result, int32_t resultLength) const;
+
+    /**
+    * Determines the minimum strength that will be use in comparison or
+    * transformation.
+    * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
+    * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
+    * are ignored.
+    * @return the current comparison level.
+    * @see RuleBasedCollator#setStrength
+    * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
+    */
+    virtual ECollationStrength getStrength(void) const;
+
+    /**
+    * Sets the minimum strength to be used in comparison or transformation.
+    * @see RuleBasedCollator#getStrength
+    * @param newStrength the new comparison level.
+    * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
+    */
+    virtual void setStrength(ECollationStrength newStrength);
+
+private:
+
+    // private static constants -----------------------------------------------
+
+    enum {
+        /* need look up in .commit() */
+        CHARINDEX = 0x70000000,
+        /* Expand index follows */
+        EXPANDCHARINDEX = 0x7E000000,
+        /* contract indexes follows */
+        CONTRACTCHARINDEX = 0x7F000000,
+        /* unmapped character values */
+        UNMAPPED = 0xFFFFFFFF,
+        /* primary strength increment */
+        PRIMARYORDERINCREMENT = 0x00010000,
+        /* secondary strength increment */
+        SECONDARYORDERINCREMENT = 0x00000100,
+        /* tertiary strength increment */
+        TERTIARYORDERINCREMENT = 0x00000001,
+        /* mask off anything but primary order */
+        PRIMARYORDERMASK = 0xffff0000,
+        /* mask off anything but secondary order */
+        SECONDARYORDERMASK = 0x0000ff00,
+        /* mask off anything but tertiary order */
+        TERTIARYORDERMASK = 0x000000ff,
+        /* mask off ignorable char order */
+        IGNORABLEMASK = 0x0000ffff,
+        /* use only the primary difference */
+        PRIMARYDIFFERENCEONLY = 0xffff0000,
+        /* use only the primary and secondary difference */
+        SECONDARYDIFFERENCEONLY = 0xffffff00,
+        /* primary order shift */
+        PRIMARYORDERSHIFT = 16,
+        /* secondary order shift */
+        SECONDARYORDERSHIFT = 8,
+        /* starting value for collation elements */
+        COLELEMENTSTART = 0x02020202,
+        /* testing mask for primary low element */
+        PRIMARYLOWZEROMASK = 0x00FF0000,
+        /* reseting value for secondaries and tertiaries */
+        RESETSECONDARYTERTIARY = 0x00000202,
+        /* reseting value for tertiaries */
+        RESETTERTIARY = 0x00000002,
+
+        PRIMIGNORABLE = 0x0202
+    };
+
+    // private data members ---------------------------------------------------
+
+    UBool dataIsOwned;
+
+    UBool isWriteThroughAlias;
+
+    /**
+    * c struct for collation. All initialisation for it has to be done through
+    * setUCollator().
+    */
+    UCollator *ucollator;
+
+    /**
+    * Rule UnicodeString
+    */
+    UnicodeString urulestring;
+
+    // friend classes --------------------------------------------------------
+
+    /**
+    * Used to iterate over collation elements in a character source.
+    */
+    friend class CollationElementIterator;
+
+    /**
+    * Collator ONLY needs access to RuleBasedCollator(const Locale&,
+    *                                                       UErrorCode&)
+    */
+    friend class Collator;
+
+    /**
+    * Searching over collation elements in a character source
+    */
+    friend class StringSearch;
+
+    // private constructors --------------------------------------------------
+
+    /**
+     * Default constructor
+     */
+    RuleBasedCollator();
+
+    /**
+     * RuleBasedCollator constructor. This constructor takes a locale. The
+     * only caller of this class should be Collator::createInstance(). If
+     * createInstance() happens to know that the requested locale's collation is
+     * implemented as a RuleBasedCollator, it can then call this constructor.
+     * OTHERWISE IT SHOULDN'T, since this constructor ALWAYS RETURNS A VALID
+     * COLLATION TABLE. It does this by falling back to defaults.
+     * @param desiredLocale locale used
+     * @param status error code status
+     */
+    RuleBasedCollator(const Locale& desiredLocale, UErrorCode& status);
+
+    /**
+     * common constructor implementation
+     *
+     * @param rules the collation rules to build the collation table from.
+     * @param collationStrength default strength for comparison
+     * @param decompositionMode the normalisation mode
+     * @param status reporting a success or an error.
+     */
+    void
+    construct(const UnicodeString& rules,
+              UColAttributeValue collationStrength,
+              UColAttributeValue decompositionMode,
+              UErrorCode& status);
+
+    // private methods -------------------------------------------------------
+
+    /**
+    * Creates the c struct for ucollator
+    * @param locale desired locale
+    * @param status error status
+    */
+    void setUCollator(const Locale& locale, UErrorCode& status);
+
+    /**
+    * Creates the c struct for ucollator
+    * @param locale desired locale name
+    * @param status error status
+    */
+    void setUCollator(const char* locale, UErrorCode& status);
+
+    /**
+    * Creates the c struct for ucollator. This used internally by StringSearch.
+    * Hence the responsibility of cleaning up the ucollator is not done by
+    * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
+    * @param collator new ucollator data
+    * @param rules corresponding collation rules
+    */
+    void setUCollator(UCollator *collator);
+
+public:
+    /**
+    * Get UCollator data struct. Used only by StringSearch & intltest.
+    * @return UCollator data struct
+    * @internal
+    */
+    const UCollator * getUCollator();
+
+protected:
+   /**
+    * Used internally by registraton to define the requested and valid locales.
+    * @param requestedLocale the requsted locale
+    * @param validLocale the valid locale
+    * @param actualLocale the actual locale
+    * @internal
+    */
+    virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
+
+private:
+
+    // if not owned and not a write through alias, copy the ucollator
+    void checkOwned(void);
+
+    // utility to init rule string used by checkOwned and construct
+    void setRuleStringFromCollator();
+
+    /**
+    * Converts C's UCollationResult to EComparisonResult
+    * @param result member of the enum UComparisonResult
+    * @return EComparisonResult equivalent of UCollationResult
+    * @deprecated ICU 2.6. We will not need it.
+    */
+    Collator::EComparisonResult getEComparisonResult(
+                                            const UCollationResult &result) const;
+
+    /**
+    * Converts C's UCollationStrength to ECollationStrength
+    * @param strength member of the enum UCollationStrength
+    * @return ECollationStrength equivalent of UCollationStrength
+    */
+    Collator::ECollationStrength getECollationStrength(
+                                        const UCollationStrength &strength) const;
+
+    /**
+    * Converts C++'s ECollationStrength to UCollationStrength
+    * @param strength member of the enum ECollationStrength
+    * @return UCollationStrength equivalent of ECollationStrength
+    */
+    UCollationStrength getUCollationStrength(
+      const Collator::ECollationStrength &strength) const;
+};
+
+// inline method implementation ---------------------------------------------
+
+inline void RuleBasedCollator::setUCollator(const Locale &locale,
+                                               UErrorCode &status)
+{
+    setUCollator(locale.getName(), status);
+}
+
+
+inline void RuleBasedCollator::setUCollator(UCollator     *collator)
+{
+
+    if (ucollator && dataIsOwned) {
+        ucol_close(ucollator);
+    }
+    ucollator   = collator;
+    dataIsOwned = FALSE;
+    isWriteThroughAlias = TRUE;
+    setRuleStringFromCollator();
+}
+
+inline const UCollator * RuleBasedCollator::getUCollator()
+{
+    return ucollator;
+}
+
+inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
+                                           const UCollationResult &result) const
+{
+    switch (result)
+    {
+    case UCOL_LESS :
+        return Collator::LESS;
+    case UCOL_EQUAL :
+        return Collator::EQUAL;
+    default :
+        return Collator::GREATER;
+    }
+}
+
+inline Collator::ECollationStrength RuleBasedCollator::getECollationStrength(
+                                       const UCollationStrength &strength) const
+{
+    switch (strength)
+    {
+    case UCOL_PRIMARY :
+        return Collator::PRIMARY;
+    case UCOL_SECONDARY :
+        return Collator::SECONDARY;
+    case UCOL_TERTIARY :
+        return Collator::TERTIARY;
+    case UCOL_QUATERNARY :
+        return Collator::QUATERNARY;
+    default :
+        return Collator::IDENTICAL;
+    }
+}
+
+inline UCollationStrength RuleBasedCollator::getUCollationStrength(
+                             const Collator::ECollationStrength &strength) const
+{
+    switch (strength)
+    {
+    case Collator::PRIMARY :
+        return UCOL_PRIMARY;
+    case Collator::SECONDARY :
+        return UCOL_SECONDARY;
+    case Collator::TERTIARY :
+        return UCOL_TERTIARY;
+    case Collator::QUATERNARY :
+        return UCOL_QUATERNARY;
+    default :
+        return UCOL_IDENTICAL;
+    }
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif


Mime
View raw message