From stdcxx-commits-return-1400-apmail-incubator-stdcxx-commits-archive=incubator.apache.org@incubator.apache.org Mon Jun 18 03:03:23 2007 Return-Path: Delivered-To: apmail-incubator-stdcxx-commits-archive@www.apache.org Received: (qmail 59575 invoked from network); 18 Jun 2007 03:03:23 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 18 Jun 2007 03:03:23 -0000 Received: (qmail 72501 invoked by uid 500); 18 Jun 2007 03:03:27 -0000 Delivered-To: apmail-incubator-stdcxx-commits-archive@incubator.apache.org Received: (qmail 72484 invoked by uid 500); 18 Jun 2007 03:03:27 -0000 Mailing-List: contact stdcxx-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: stdcxx-dev@incubator.apache.org Delivered-To: mailing list stdcxx-commits@incubator.apache.org Received: (qmail 72473 invoked by uid 99); 18 Jun 2007 03:03:26 -0000 Received: from herse.apache.org (HELO herse.apache.org) (140.211.11.133) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 17 Jun 2007 20:03:26 -0700 X-ASF-Spam-Status: No, hits=-99.5 required=10.0 tests=ALL_TRUSTED,NO_REAL_NAME X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 17 Jun 2007 20:03:22 -0700 Received: by eris.apache.org (Postfix, from userid 65534) id 695EF1A981D; Sun, 17 Jun 2007 20:03:02 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r548179 - in /incubator/stdcxx/trunk/tests: include/rw_locale.h src/locale.cpp Date: Mon, 18 Jun 2007 03:03:01 -0000 To: stdcxx-commits@incubator.apache.org From: sebor@apache.org X-Mailer: svnmailer-1.1.0 Message-Id: <20070618030302.695EF1A981D@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: sebor Date: Sun Jun 17 20:03:00 2007 New Revision: 548179 URL: http://svn.apache.org/viewvc?view=rev&rev=548179 Log: 2007-06-17 Martin Sebor * rw_locale.h (rw_mbchar_array_t, rw_get_mb_chars, rw_find_mb_locale): New type and helper functions to retrieve locales that use multibyte encodings (where MB_CUR_MAX > 1). Modified: incubator/stdcxx/trunk/tests/include/rw_locale.h incubator/stdcxx/trunk/tests/src/locale.cpp Modified: incubator/stdcxx/trunk/tests/include/rw_locale.h URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/tests/include/rw_locale.h?view=diff&rev=548179&r1=548178&r2=548179 ============================================================================== --- incubator/stdcxx/trunk/tests/include/rw_locale.h (original) +++ incubator/stdcxx/trunk/tests/include/rw_locale.h Sun Jun 17 20:03:00 2007 @@ -2,7 +2,7 @@ * * localedef.h - declarations of locale testsuite helpers * - * $Id:$ + * $Id$ * ************************************************************************ * @@ -62,6 +62,26 @@ // returned from prior calls _TEST_EXPORT const char* rw_localedef (const char*, const char*, const char*, const char*); + + +// an array of multibyte characters 1 to MB_LEN_MAX bytes in length +typedef char +rw_mbchar_array_t [_RWSTD_MB_LEN_MAX][_RWSTD_MB_LEN_MAX]; + +// fills consecutive elements of the `mb_chars' array with multibyte +// characters between 1 and MB_CUR_MAX bytes long for the currently +// set locale +// returns the number of elements populated (normally, MB_CUR_MAX) +_TEST_EXPORT _RWSTD_SIZE_T +rw_get_mb_chars (rw_mbchar_array_t /* mb_chars */); + + +// finds the multibyte locale with the largest MB_CUR_MAX value and +// fills consecutive elements of the `mb_chars' array with multibyte +// characters between 1 and MB_CUR_MAX bytes long for such a locale +_TEST_EXPORT const char* +rw_find_mb_locale (_RWSTD_SIZE_T* /* mb_cur_max */, + rw_mbchar_array_t /* mb_chars */); #endif // RW_LOCALE_H_INCLUDED Modified: incubator/stdcxx/trunk/tests/src/locale.cpp URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/tests/src/locale.cpp?view=diff&rev=548179&r1=548178&r2=548179 ============================================================================== --- incubator/stdcxx/trunk/tests/src/locale.cpp (original) +++ incubator/stdcxx/trunk/tests/src/locale.cpp Sun Jun 17 20:03:00 2007 @@ -34,6 +34,7 @@ #include // for rw_putenv() #include // for SHELL_RM_RF, rw_tmpnam +#include // for rw_fprintf() #include // for rw_system() @@ -62,7 +63,7 @@ #include // for assert #include // for EBADF #include // for {FLT,DBL,LDBL}_DIG -#include // for CHAR_BIT, PATH_MAX +#include // for CHAR_BIT, MB_LEN_MAX, PATH_MAX #include // for LC_XXX macros, setlocale #include // for va_copy, va_list, ... #include // for fgets, remove, sprintf, ... @@ -470,3 +471,181 @@ return slocname; } + +/**************************************************************************/ + +// finds a multibyte character that is `bytes' long if `bytes' is less +// than or equal to MB_CUR_MAX, or the longest multibyte sequence in +// the current locale +static const char* +_get_mb_char (char *buf, size_t bytes) +{ + _RWSTD_ASSERT (0 != buf); + + *buf = '\0'; + + if (0 == bytes) + return buf; + + const bool exact = bytes <= size_t (MB_CUR_MAX); + + if (!exact) + bytes = MB_CUR_MAX; + + wchar_t wc; + + // search the first 64K characters sequentially + for (wc = wchar_t (1); wc != wchar_t (0xffff); ++wc) { + + if ( int (bytes) == wctomb (buf, wc) + && int (bytes) == mblen (buf, bytes)) { + // NUL-terminate the multibyte character of the requested length + buf [bytes] = '\0'; + break; + } + + *buf = '\0'; + } + +#if 2 < _RWSTD_WCHAR_T_SIZE + + // if a multibyte character of the requested size is not found + // in the low 64K range, try to find one using a random search + if (wchar_t (0xffff) == wc) { + + // iterate only so many times to prevent an infinite loop + // in case when MB_CUR_MAX is greater than the longest + // multibyte character + for (int i = 0; i != 0x100000; ++i) { + + wc = wchar_t (rand ()); + + if (RAND_MAX < 0x10000) { + wc <<= 16; + wc |= wchar_t (rand ()); + } + + if ( int (bytes) == wctomb (buf, wc) + && int (bytes) == mblen (buf, bytes)) { + // NUL-terminate the multibyte character + buf [bytes] = '\0'; + break; + } + + *buf = '\0'; + } + } + +#endif // 2 < _RWSTD_WCHAR_SIZE + + // return 0 on failure to find a sequence exactly `bytes' long + return !exact || bytes == strlen (buf) ? buf : 0; +} + + +_TEST_EXPORT size_t +rw_get_mb_chars (rw_mbchar_array_t mb_chars) +{ + _RWSTD_ASSERT (0 != mb_chars); + + const char* mbc = _get_mb_char (mb_chars [0], size_t (-1)); + + if (!mbc) { + rw_fprintf (rw_stderr, "*** failed to find any multibyte characters " + "in locale \"%s\" with MB_CUR_MAX = %u\n", + setlocale (LC_CTYPE, 0), MB_CUR_MAX); + return 0; + } + + size_t mb_cur_max = strlen (mbc); + + if (_RWSTD_MB_LEN_MAX < mb_cur_max) + mb_cur_max = _RWSTD_MB_LEN_MAX; + + // fill each element of `mb_chars' with a multibyte character + // of the corresponding length + for (size_t i = mb_cur_max; i; --i) { + + // try to generate a multibyte character `i' bytes long + mbc = _get_mb_char (mb_chars [i - 1], i); + + if (0 == mbc) { + if (i < mb_cur_max) { + rw_fprintf (rw_stderr, "*** failed to find %u-byte characters " + "in locale \"%s\" with MB_CUR_MAX = %u\n", + i + 1, setlocale (LC_CTYPE, 0), MB_CUR_MAX); + mb_cur_max = 0; + break; + } + --mb_cur_max; + } + } + + return mb_cur_max; +} + + +_TEST_EXPORT const char* +rw_find_mb_locale (size_t *mb_cur_max, + rw_mbchar_array_t mb_chars) +{ + _RWSTD_ASSERT (0 != mb_cur_max); + _RWSTD_ASSERT (0 != mb_chars); + + if (2 > _RWSTD_MB_LEN_MAX) { + rw_fprintf (rw_stderr, "MB_LEN_MAX = %d, giving up\n", + _RWSTD_MB_LEN_MAX); + return 0; + } + + static const char *mb_locale_name; + + char saved_locale_name [1024]; + strcpy (saved_locale_name, setlocale (LC_CTYPE, 0)); + + _RWSTD_ASSERT (strlen (saved_locale_name) < sizeof saved_locale_name); + + *mb_cur_max = 0; + + // iterate over all installed locales + for (const char *name = rw_locales (_RWSTD_LC_CTYPE, 0); name && *name; + name += strlen (name) + 1) { + + if (setlocale (LC_CTYPE, name)) { + + // try to generate a set of multibyte characters + // with lengths from 1 and MB_CUR_MAX (or less) + const size_t cur_max = rw_get_mb_chars (mb_chars); + + if (*mb_cur_max < cur_max) { + *mb_cur_max = cur_max; + mb_locale_name = name; + + // break when we've found a multibyte locale + // with the longest possible encoding + if (_RWSTD_MB_LEN_MAX == *mb_cur_max) + break; + } + } + } + + if (*mb_cur_max < 2) { + rw_fprintf (rw_stderr, "*** failed to find a full set of multibyte " + "characters in locale \"%s\" with MB_CUR_MAX = %u " + "(computed)", mb_locale_name, *mb_cur_max); + mb_locale_name = 0; + } + else { + // (re)generate the multibyte characters for the saved locale + // as they may have been overwritten in subsequent iterations + // of the loop above (while searching for a locale with greater + // value of MB_CUR_MAX) + setlocale (LC_CTYPE, mb_locale_name); + rw_get_mb_chars (mb_chars); + } + + setlocale (LC_CTYPE, saved_locale_name); + + return mb_locale_name; +} +