Return-Path: Delivered-To: apmail-incubator-stdcxx-commits-archive@www.apache.org Received: (qmail 94091 invoked from network); 22 Sep 2006 00:42:46 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (209.237.227.199) by minotaur.apache.org with SMTP; 22 Sep 2006 00:42:46 -0000 Received: (qmail 67214 invoked by uid 500); 22 Sep 2006 00:42:46 -0000 Delivered-To: apmail-incubator-stdcxx-commits-archive@incubator.apache.org Received: (qmail 67201 invoked by uid 500); 22 Sep 2006 00:42:45 -0000 Mailing-List: contact stdcxx-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: stdcxx-dev@incubator.apache.org Delivered-To: mailing list stdcxx-commits@incubator.apache.org Received: (qmail 67190 invoked by uid 99); 22 Sep 2006 00:42:45 -0000 Received: from idunn.apache.osuosl.org (HELO idunn.apache.osuosl.org) (140.211.166.84) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 21 Sep 2006 17:42:45 -0700 Authentication-Results: idunn.apache.osuosl.org smtp.mail=sebor@apache.org; spf=permerror X-ASF-Spam-Status: No, hits=-9.4 required=5.0 tests=ALL_TRUSTED,NO_REAL_NAME Received-SPF: error (idunn.apache.osuosl.org: domain apache.org from 140.211.166.113 cause and error) Received: from [140.211.166.113] ([140.211.166.113:51980] helo=eris.apache.org) by idunn.apache.osuosl.org (ecelerity 2.1.1.8 r(12930)) with ESMTP id 2E/72-06791-A7133154 for ; Thu, 21 Sep 2006 17:42:34 -0700 Received: by eris.apache.org (Postfix, from userid 65534) id 40BD81A9825; Thu, 21 Sep 2006 17:42:19 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r448754 [4/6] - /incubator/stdcxx/trunk/util/ Date: Fri, 22 Sep 2006 00:42:17 -0000 To: stdcxx-commits@incubator.apache.org From: sebor@apache.org X-Mailer: svnmailer-1.1.0 Message-Id: <20060922004219.40BD81A9825@eris.apache.org> X-Spam-Rating: minotaur.apache.org 1.6.2 0/1000/N Modified: incubator/stdcxx/trunk/util/ctype.cpp URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/ctype.cpp?view=diff&rev=448754&r1=448753&r2=448754 ============================================================================== --- incubator/stdcxx/trunk/util/ctype.cpp (original) +++ incubator/stdcxx/trunk/util/ctype.cpp Thu Sep 21 17:42:16 2006 @@ -6,16 +6,23 @@ * *************************************************************************** * - * Copyright (c) 1994-2005 Quovadx, Inc., acting through its Rogue Wave - * Software division. Licensed under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the - * License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0. Unless required by - * applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, either express or implied. See the License - * for the specific language governing permissions and limitations under - * the License. + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * Copyright 2001-2006 Rogue Wave Software. * **************************************************************************/ @@ -26,7 +33,7 @@ #include "scanner.h" // for scanner #include // for assert() -#include // for toupper() +#include // for isdigit(), ... #include // for sprintf() #include // for strtol() #include // for memset(), strchr() @@ -34,6 +41,9 @@ #include // for ctype_base::mask +static const char lc_name[] = "LC_CTYPE"; + + static wchar_t convert_literal_to_ucs4 (Scanner::token_t& t) { @@ -72,9 +82,7 @@ got_val = false; break; - case Scanner::tok_decimal_value: - case Scanner::tok_hex_value: - case Scanner::tok_octal_value: + case Scanner::tok_char_value: if (charmap_.mbcharlen (tok.name) == 1) val = scanner_.convert_escape (tok.name.c_str ()); else @@ -105,9 +113,7 @@ got_val = false; break; - case Scanner::tok_decimal_value: - case Scanner::tok_hex_value: - case Scanner::tok_octal_value: + case Scanner::tok_char_value: return charmap_.convert_to_wc ("", tok.name, val); default: @@ -118,73 +124,141 @@ } +// process absolute ellipsis +std::size_t Def:: +process_abs_ellipsis (const Scanner::token_t &nextnext, + std::ctype_base::mask m) +{ + std::size_t nchars = 0; + + typedef unsigned char UChar; + + // first we need to handle narrow chars if the range is a range + // of narrow characters + UChar first; + UChar last; + + // check to see if the start value is in the narrow map + // if it is then we have to add some values to the narrow mask_tab + if (get_n_val (next, first) && get_n_val (nextnext, last)) { + // both the start value and end value are in the mask table + // so add the mask to the narrow table from start value + // to end_value. Make sure that start < end + if (last < first) + issue_diag (E_RANGE, true, &next, + "illegal range [%u, %u] in LC_CTYPE definition\n", + last, first); + + for (unsigned val = first; val <= last; ++val) + ctype_out_.mask_tab [val] |= m; + + nchars += last - first; + } + + wchar_t wfirst; + wchar_t wlast; + + if (get_w_val (next, wfirst) && get_w_val (nextnext, wlast)) { + + for (wchar_t val = wfirst; val != wlast; ) { + + const mask_iter mask_pos = mask_.find (val); + + if (mask_pos == mask_.end ()) + mask_.insert (std::make_pair (val, m)); + else + mask_pos->second |= m; + + val = charmap_.increment_wchar (val); + + ++nchars; + } + + // now add the end_value + mask_iter mask_pos = mask_.find (wlast); + if(mask_pos == mask_.end ()) + mask_.insert (std::make_pair (wlast, m)); + else { + mask_pos->second |= m; + } + } + else { + warnings_occurred_ = + issue_diag (W_RANGE, false, + &next, "beginning or endpoint of range " + "was not found in the character map; " + "ignoring range\n") || warnings_occurred_; + } + + next = scanner_.next_token (); + + return nchars; +} + + // process hexadecimal symbolic ellipsis, decimal symbolic ellipsis, // and double increment hexadecimal symbolic ellipsis -void Def::process_sym_ellipsis (const std::string& start_sym, - const std::string& end_sym, - Scanner::token_id type, - std::ctype_base::mask m) { - +std::size_t Def:: +process_sym_ellipsis (const std::string& start_sym, + const std::string& end_sym, + Scanner::token_id type, + std::ctype_base::mask m) +{ + // number of characters in the range + std::size_t nchars = 0; + // first, get the alphabetic beginning of the sym name std::size_t idx = 0; - std::string begin; + std::string begin; - if (type == Scanner::tok_dellipsis - || type == Scanner::tok_doub_inc_ellipsis) { - while (idx < start_sym.size () && - ((start_sym[idx] < '0' || start_sym[idx] > '9') && - (start_sym[idx] < 'a' || start_sym[idx] > 'f') && - (start_sym[idx] < 'A' || start_sym[idx] > 'F'))) - begin += start_sym[idx++]; + const int base = + type == Scanner::tok_hex_ellipsis + || type == Scanner::tok_dbl_ellipsis ? 16 : 10; + + if (16 == base) { + // append all characters until the first hex digit + while (idx < start_sym.size () && !std::isxdigit (start_sym [idx])) + begin += start_sym [idx++]; } else { - while (idx < start_sym.size () && - ((start_sym[idx] < '0' || start_sym[idx] > '9'))) - begin += start_sym[idx++]; + // append all characters until the first decimal digit + while (idx < start_sym.size () && !std::isdigit (start_sym [idx])) + begin += start_sym [idx++]; } std::string num_str; // the numeric portion of the sym name // get the numeric portion of the sym_name, this is the portion // that will be different for each sym_name within the ellipsis - while (idx < start_sym.size() && start_sym[idx] != '>') - num_str += start_sym[idx++]; + while (idx < start_sym.size () && start_sym [idx] != '>') + num_str += start_sym [idx++]; std::size_t num_len = num_str.size(); // convert the numeric string to a long - unsigned long num = 0; - if (type == Scanner::tok_dellipsis - || type == Scanner::tok_doub_inc_ellipsis) - num = std::strtoul (num_str.c_str(), (char**)0, 16); - else - num = std::strtoul (num_str.c_str(), (char**)0, 10); + unsigned long num = std::strtoul (num_str.c_str(), (char**)0, base); // now create the symbolic name char next_num [32]; std::string sym_name; do { - if ( type == Scanner::tok_dellipsis - || type == Scanner::tok_doub_inc_ellipsis) { - std::sprintf (next_num, "%lx", num++); + int len; - if (type == Scanner::tok_doub_inc_ellipsis) - num++; + if (16 == base) { + len = std::sprintf (next_num, "%lX", num++); - // the numeric portion of the sym name must be uppercase - for (std::size_t i = 0; next_num[i] != '\0'; i++) - if (next_num[i] >= 'a' && next_num[i] <= 'f') - next_num[i] = (std::toupper) (next_num[i]); + if (type == Scanner::tok_dbl_ellipsis) + num++; } else { - std::sprintf (next_num, "%ld", num++); + len = std::sprintf (next_num, "%ld", num++); } sym_name = begin; - for (std::size_t leading_zeros = num_len - std::strlen (next_num); - leading_zeros > 0; leading_zeros--) - sym_name += '0'; + + sym_name.append (num_len - len, '0'); + sym_name += next_num; sym_name += '>'; @@ -197,8 +271,19 @@ } wchar_t w_val; - // if the value is not in the charmap then we cannot continue - if (!get_w_val (next, w_val)) { + if (get_w_val (next, w_val)) { + // add the mask to the mask map + mask_iter mask_pos = mask_.find (w_val); + if (mask_pos != mask_.end()) + mask_pos->second |= m; + else { + mask_.insert (std::make_pair (w_val, m)); + } + } + else { + // if the value is not in the charmap + // then we cannot continue (???) + /* warnings_occurred_ = issue_diag (W_SYM, false, @@ -208,17 +293,14 @@ || warnings_occurred_; */ } - else { - // add the mask to the mask map - mask_iter mask_pos = mask_.find (w_val); - if (mask_pos != mask_.end()) - mask_pos->second |= m; - else { - mask_.insert (std::make_pair (w_val, m)); - } - } + + ++nchars; + } while (sym_name != end_sym); + next = scanner_.next_token (); + + return nchars; } @@ -230,94 +312,71 @@ // the value of the character and adds the character to the mask map (if // the character is not alreay there) with the current mask. void Def:: -process_mask (std::ctype_base::mask m) +process_mask (std::ctype_base::mask m, const char *name) { - next = scanner_.next_token(); - Scanner::token_t nextnext = scanner_.next_token(); - while (next.token != Scanner::tok_nl){ - if (Scanner::tok_ellipsis == nextnext.token) { - // if there are ellipsis then include all characters in between - // the values that surround the ellipsis - nextnext = scanner_.next_token(); - - // first we need to handle narrow chars if the range is a range - // of narrow characters - unsigned char n_start_val; - unsigned char n_end_val; - // check to see if the start value is in the narrow map - // if it is then we have to add some values to the narrow mask_tab - if (get_n_val (next, n_start_val) - && get_n_val(nextnext, n_end_val)) { - // both the start value and end value are in the mask table - // so add the mask to the narrow table from start value - // to end_value. Make sure that start < end - if (n_start_val > n_end_val) - issue_diag (E_RANGE, true, - &next, "illegal range found in " - "ctype definition\n"); - for(unsigned char n_current_val = n_start_val; - n_current_val <= n_end_val; n_current_val++) { - ctype_out_.mask_tab [n_current_val] |= m; - } - } + issue_diag (I_STAGE, false, 0, "processing %s class\n", name); - wchar_t w_start_val; - wchar_t w_end_val; - if (!get_w_val (next, w_start_val) - || !get_w_val (nextnext, w_end_val)) { - warnings_occurred_ = - issue_diag (W_RANGE, false, - &next, "beginning or endpoint of range " - "was not found in the character map; " - "ignoring range\n") || warnings_occurred_; - } - else { - wchar_t w_current_val = w_start_val; - while (w_current_val != w_end_val) { - mask_iter mask_pos = mask_.find (w_current_val); - if(mask_pos != mask_.end()) - mask_pos->second |= m; - else { - mask_.insert(std::make_pair(w_current_val, m)); - } - - w_current_val = charmap_.increment_val (w_current_val); - } - // now add the end_value - mask_iter mask_pos = mask_.find (w_end_val); - if(mask_pos != mask_.end()) - mask_pos->second |= m; - else { - mask_.insert(std::make_pair(w_end_val, m)); - } + next = scanner_.next_token (); - } + Scanner::token_t nextnext = scanner_.next_token (); - next = scanner_.next_token(); + std::size_t nchars = 0; + + typedef unsigned char UChar; + + for ( ; next.token != Scanner::tok_nl; ) { + + switch (nextnext.token) { + case Scanner::tok_abs_ellipsis: { + + // if there are ellipses then include all characters + // in between the values that surround the ellipsis + + // the next token will be the end of the range + nextnext = scanner_.next_token (); + nchars += process_abs_ellipsis (nextnext, m); + break; } - else if (Scanner::tok_dellipsis == nextnext.token - || Scanner::tok_qellipsis == nextnext.token - || Scanner::tok_doub_inc_ellipsis == nextnext.token) { - Scanner::token_id tok = nextnext.token; + case Scanner::tok_hex_ellipsis: + case Scanner::tok_dec_ellipsis: + case Scanner::tok_dbl_ellipsis: { + + const Scanner::token_id id = nextnext.token; // the next token will be the end of the range - nextnext = scanner_.next_token (); - process_sym_ellipsis (next.name, nextnext.name, tok, m); - next = scanner_.next_token(); + nextnext = scanner_.next_token (); + nchars += process_sym_ellipsis (next.name, nextnext.name, id, m); + break; } - else if (Scanner::tok_sym_name == next.token) { - unsigned char n_val; - // if the value is <= UCHARMAX then we will add this mask to the - // mask_tab table + case Scanner::tok_nl: + case Scanner::tok_sym_name: + case Scanner::tok_char_value: { + + UChar n_val; + // if the value is <= UCHARMAX then add this mask + // to the mask table if (get_n_val (next, n_val)) { ctype_out_.mask_tab [n_val] |= m; + ++nchars; } wchar_t w_val; - // if the value is not in the charmap then we cannot continue - if (!get_w_val (next, w_val)) { + if (get_w_val (next, w_val)) { + // add the mask to the mask map + const mask_iter mask_pos = mask_.find (w_val); + if (mask_pos == mask_.end ()) + mask_.insert (std::make_pair (w_val, m)); + else { + mask_pos->second |= m; + } + + ++nchars; + } + else { + // if the value is not in the charmap + // then we cannot continue (???) /* warnings_occurred_ = issue_diag (W_SYM, false, @@ -327,42 +386,12 @@ || warnings_occurred_; */ } - else { - // add the mask to the mask map - mask_iter mask_pos = mask_.find (w_val); - if (mask_pos != mask_.end()) - mask_pos->second |= m; - else { - mask_.insert (std::make_pair (w_val, m)); - } - } - next = nextnext; - } - - else if ( Scanner::tok_decimal_value == next.token - || Scanner::tok_hex_value == next.token - || Scanner::tok_octal_value == next.token) { - - unsigned char n_val; - if (get_n_val (next, n_val)) - ctype_out_.mask_tab[n_val] |= m; - - wchar_t w_val; - if (get_w_val (next, w_val)) { - mask_iter mask_pos = mask_.find (w_val); - if (mask_pos != mask_.end()) - mask_pos->second |= m; - else { - mask_.insert (std::make_pair (w_val, m)); - } - - } - next = nextnext; + break; } - else { + default: { // the ctype category definition contains non-symbolic characters // the actual value of the characters will be used. This is // unportable @@ -379,21 +408,28 @@ "length. Ignoring character\n", next.name.c_str()) || warnings_occurred_; else { - ctype_out_.mask_tab [(unsigned char) next.name[0]] |= m; - wchar_t mb_val = wchar_t ((unsigned char)next.name[0]); + ctype_out_.mask_tab [UChar (next.name [0])] |= m; + wchar_t mb_val = wchar_t (UChar (next.name [0])); mask_iter mask_pos = mask_.find (mb_val); if (mask_pos != mask_.end()) mask_pos->second |= m; else mask_.insert (std::make_pair (mb_val, m)); + ++nchars; } next = nextnext; } + + } + // if we are not at the newline get the next token if (Scanner::tok_nl != next.token) - nextnext = scanner_.next_token(); - + nextnext = scanner_.next_token (); } + + issue_diag (I_STAGE, false, 0, + "done processing %s class (%lu characters)\n", + name, nchars); } @@ -404,10 +440,18 @@ { assert (Scanner::tok_toupper == tok || Scanner::tok_tolower == tok); + const char* const name = + Scanner::tok_toupper == tok ? "upper" : "lower"; + + issue_diag (I_STAGE, false, 0, "processing ctype to%s map\n", name); + + std::size_t nchars = 0; + // process the toupper and tolower ctype categories next = scanner_.next_token(); - while (next.token != Scanner::tok_nl) { + for (; next.token != Scanner::tok_nl; ) { + std::string sym, sym2; // seperate the symbolic names in the toupper or tolower pair @@ -415,46 +459,54 @@ strip_pair(next.name, sym, sym2); // first process toupper or tolower for the narrow characters - n_cmap_iter sym1_pos = charmap_.get_n_cmap().find (sym); - n_cmap_iter sym2_pos = charmap_.get_n_cmap().find (sym2); - if (sym1_pos != charmap_.get_n_cmap().end() + const n_cmap_iter sym1_pos = charmap_.get_n_cmap().find (sym); + const n_cmap_iter sym2_pos = charmap_.get_n_cmap().find (sym2); + if ( sym1_pos != charmap_.get_n_cmap().end() && sym2_pos != charmap_.get_n_cmap().end()) { if (tok == Scanner::tok_toupper) ctype_out_.toupper_tab [sym1_pos->second] = sym2_pos->second; else ctype_out_.tolower_tab [sym1_pos->second] = sym2_pos->second; + + ++nchars; } // now process toupper or tolower fot the wide characters - w_cmap_iter wsym1_pos = charmap_.get_w_cmap().find (sym); - w_cmap_iter wsym2_pos = charmap_.get_w_cmap().find (sym2); - if (wsym1_pos == charmap_.get_w_cmap().end()) + const w_cmap_iter wsym1_pos = charmap_.get_w_cmap().find (sym); + const w_cmap_iter wsym2_pos = charmap_.get_w_cmap().find (sym2); + if (wsym1_pos == charmap_.get_w_cmap().end ()) warnings_occurred_ = issue_diag (W_SYM, false, &next, "unknown symbol name %s found in " - "LC_CTYPE definition\n", - sym.c_str()) || warnings_occurred_; + "%s definition\n", sym.c_str (), lc_name) + || warnings_occurred_; else if (wsym2_pos == charmap_.get_w_cmap().end()) warnings_occurred_ = issue_diag (W_SYM, false, &next, "unknown symbol name %s found in " - "LC_CTYPE definition\n", - sym2.c_str()) || warnings_occurred_; + "%s definition\n", + sym2.c_str (), lc_name) + || warnings_occurred_; else { if (tok == Scanner::tok_toupper) upper_.insert (std::make_pair (wsym1_pos->second, - wsym2_pos->second)); + wsym2_pos->second)); else lower_.insert (std::make_pair (wsym1_pos->second, - wsym2_pos->second)); + wsym2_pos->second)); + + ++nchars; } next = scanner_.next_token(); } + + issue_diag (I_STAGE, false, 0, + "done processing to%s map (%lu characters)\n", name, nchars); } void Def:: -process_transliteration_statement () +process_xlit_statement (std::size_t &nchars) { // convert the name we have for a symbolic name std::string sym_s (next.name); @@ -488,6 +540,7 @@ w_cmap_iter w_pos = charmap_.get_w_cmap().find (next.name); if (w_pos != charmap_.get_w_cmap().end()) { it->second.push_back(convert_to_ext(w_pos->second)); + ++nchars; } break; } @@ -503,13 +556,14 @@ if (enc.empty()) break; it->second.push_back (enc); + ++nchars; break; } default: issue_diag (W_SYNTAX, false, &next, - "unexpected token while processing " - "a transliteration statement\n"); + "ignoring unexpected token in " + "transliteration statement\n"); break; } @@ -524,8 +578,12 @@ void Def:: -process_transliteration () +process_xlit () { + issue_diag (I_STAGE, false, 0, "processing transliteration\n"); + + std::size_t nchars = 0; + // used in processing the include directive int nesting_level = 0; std::list file_list; @@ -562,15 +620,15 @@ // get comment char and escape char; // these informations are stored by the scanner while ((next = scanner_.next_token ()).token - != Scanner::tok_translit_start ); + != Scanner::tok_xlit_start ); break; } case Scanner::tok_sym_name: { - process_transliteration_statement (); + process_xlit_statement (nchars); break; } - case Scanner::tok_translit_end: { + case Scanner::tok_xlit_end: { if (nesting_level == 0) return; @@ -595,19 +653,24 @@ // get comment char and escape char; // these informations are stored by the scanner while ((next = scanner_.next_token ()).token - != Scanner::tok_translit_start ); + != Scanner::tok_xlit_start); } default: break; } } + + issue_diag (I_STAGE, false, 0, "done processing transliteration " + "(%lu tokens, %lu characters)"); } void Def:: process_ctype () { + issue_diag (I_STAGE, false, 0, "processing %s section\n", lc_name); + ctype_def_found_ = true; // used in processing the copy/include directive @@ -665,7 +728,7 @@ != Scanner::tok_ctype ){ // the LC_IDENTIFICATION section may also have a // LC_CTYPE token that will mess up the parsing - if (next.token == Scanner::tok_identification) { + if (next.token == Scanner::tok_ident) { while ((next = scanner_.next_token()).token != Scanner::tok_end ); next = scanner_.next_token(); @@ -678,43 +741,43 @@ break; case Scanner::tok_upper: - process_mask(std::ctype_base::upper); + process_mask (std::ctype_base::upper, "upper"); break; case Scanner::tok_lower: - process_mask(std::ctype_base::lower); + process_mask (std::ctype_base::lower, "lower"); break; case Scanner::tok_alpha: - process_mask(std::ctype_base::alpha); + process_mask (std::ctype_base::alpha, "alpha"); break; case Scanner::tok_digit: - process_mask(std::ctype_base::digit); + process_mask (std::ctype_base::digit, "digit"); break; case Scanner::tok_space: - process_mask(std::ctype_base::space); + process_mask (std::ctype_base::space, "space"); break; case Scanner::tok_cntrl: - process_mask(std::ctype_base::cntrl); + process_mask (std::ctype_base::cntrl, "cntrl"); break; case Scanner::tok_punct: - process_mask(std::ctype_base::punct); + process_mask (std::ctype_base::punct, "punct"); break; case Scanner::tok_graph: - process_mask(std::ctype_base::graph); + process_mask (std::ctype_base::graph, "graph"); break; case Scanner::tok_print: - process_mask(std::ctype_base::print); + process_mask (std::ctype_base::print, "print"); break; case Scanner::tok_xdigit: - process_mask(std::ctype_base::xdigit); + process_mask (std::ctype_base::xdigit, "xdigit"); break; case Scanner::tok_toupper: @@ -729,8 +792,8 @@ scanner_.ignore_line(); break; - case Scanner::tok_translit_start: - process_transliteration (); + case Scanner::tok_xlit_start: + process_xlit (); break; case Scanner::tok_end: @@ -766,7 +829,7 @@ assert (!dir_name.empty()); if (ctype_filename_.empty ()) { - ctype_filename_ = dir_name + _RWSTD_PATH_SEP + "LC_CTYPE"; + ctype_filename_ = dir_name + _RWSTD_PATH_SEP + lc_name; ctype_symlink_ = false; } @@ -841,7 +904,7 @@ ctype_filename_.size ()); } - std::string sname ("LC_CTYPE"); + std::string sname (lc_name); create_symlink (output_name_, xname, sname); return; } Modified: incubator/stdcxx/trunk/util/def.cpp URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/def.cpp?view=diff&rev=448754&r1=448753&r2=448754 ============================================================================== --- incubator/stdcxx/trunk/util/def.cpp (original) +++ incubator/stdcxx/trunk/util/def.cpp Thu Sep 21 17:42:16 2006 @@ -6,16 +6,23 @@ * *************************************************************************** * - * Copyright (c) 1994-2005 Quovadx, Inc., acting through its Rogue Wave - * Software division. Licensed under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the - * License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0. Unless required by - * applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, either express or implied. See the License - * for the specific language governing permissions and limitations under - * the License. + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * Copyright 2001-2006 Rogue Wave Software. * **************************************************************************/ @@ -62,9 +69,9 @@ // a narrow character string in the current locale's encoding std::string Def::convert_to_ext (wchar_t val) { - rn_cmap_iter2 it; - if ((it = charmap_.get_rn_cmap2().find(val)) - != charmap_.get_rn_cmap2().end()){ + rmb_cmap_iter it; + if ((it = charmap_.get_rmb_cmap().find(val)) + != charmap_.get_rmb_cmap().end()){ return it->second; } @@ -483,32 +490,26 @@ break; case Scanner::tok_ctype: - issue_diag (I_STAGE, false, 0, "processing LC_CTYPE\n"); process_ctype (); break; case Scanner::tok_collate: - issue_diag (I_STAGE, false, 0, "processing LC_COLLATE\n"); process_collate (); break; case Scanner::tok_monetary: - issue_diag (I_STAGE, false, 0, "processing LC_MONETARY\n"); process_monetary (); break; case Scanner::tok_numeric: - issue_diag (I_STAGE, false, 0, "processing LC_NUMERIC\n"); process_numeric (); break; case Scanner::tok_time: - issue_diag (I_STAGE, false, 0, "processing LC_TIME\n"); process_time (); break; case Scanner::tok_messages: - issue_diag (I_STAGE, false, 0, "processing LC_MESSAGES\n"); process_messages (); break; Modified: incubator/stdcxx/trunk/util/def.h URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/def.h?view=diff&rev=448754&r1=448753&r2=448754 ============================================================================== --- incubator/stdcxx/trunk/util/def.h (original) +++ incubator/stdcxx/trunk/util/def.h Thu Sep 21 17:42:16 2006 @@ -2,20 +2,27 @@ * * def.h * - * $Id: //stdlib/dev/source/stdlib/util/def.h#2 $ + * $Id$ * *************************************************************************** * - * Copyright (c) 1994-2005 Quovadx, Inc., acting through its Rogue Wave - * Software division. Licensed under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the - * License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0. Unless required by - * applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, either express or implied. See the License - * for the specific language governing permissions and limitations under - * the License. + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * Copyright 2001-2006 Rogue Wave Software. * **************************************************************************/ @@ -30,6 +37,7 @@ #include // for assert() #include // for UCHAR_MAX +#include // for size_t #include @@ -144,25 +152,23 @@ // copy a file void copy_file(const std::string &name, const std::string &outname); + // process absolute ellipsis + std::size_t process_abs_ellipsis (const Scanner::token_t&, + std::ctype_base::mask); + // process hexadecimal symbolic ellipsis, decimal symbolic ellipsis, // and double increment hexadecimal symbolic ellipsis - void process_sym_ellipsis (const std::string& start_sym, - const std::string& end_sym, - Scanner::token_id type, - std::ctype_base::mask m); - - // hexadecimally increment the symbolic name - std::string hex_increment (const std::string& sym); - - // decimally increment the symbolic name - std::string dec_increment (const std::string& sym); + std::size_t process_sym_ellipsis (const std::string&, + const std::string&, + Scanner::token_id, + std::ctype_base::mask); // parse the era string void parse_era (const token_t&); // process the ctype category specified by m with the exception of // (e.g. std::ctype_base::upper) - void process_mask(std::ctype_base::mask m); + void process_mask (std::ctype_base::mask, const char*); // process the ctype toupper and tolower definitions void process_upper_lower(Scanner::token_id tok); @@ -171,11 +177,12 @@ void process_ctype(); // process transliteration information - void process_transliteration (); - void process_transliteration_statement (); + void process_xlit (); + + void process_xlit_statement (std::size_t&); // process the collate section of the locale definition file - void process_collate (); + void process_collate (); // processing of collating definition statements void process_collate_definition (bool, collate_entry_t&, @@ -216,6 +223,12 @@ // process the numeric section of the locale definition file void process_numeric(); + // extracts and converts an array of strings such as those + // representing the names of weekdays in the LC_TIME section + Scanner::token_t + extract_string_array (std::string*, std::wstring*, std::size_t); + + // process the time section of the locale definition file void process_time(); @@ -273,67 +286,52 @@ // maps characters to their upper case representation std::map upper_; - std::mapmb_char_off_map_; typedef std::map::iterator mb_char_off_map_iter; - struct ctype_offset_tab_t { - unsigned int off[UCHAR_MAX + 1]; - }; - - std::mapwchar_off_map_; - std::map mb_char_offs_; - typedef std::map::iterator - mb_char_offs_iter; - - std::map wchar_offs_; - typedef std::map::iterator - wchar_offs_iter; - - std::map utf8_offs_; - typedef std::map::iterator - utf8_offs_iter; + struct codecvt_offset_tab_t { + unsigned int off [UCHAR_MAX + 1]; + }; void create_wchar_utf8_table (); std::map wchar_utf8_to_ext_; typedef std::map::iterator wchar_utf8_iter; void gen_valid_coll_wchar_set (); - void gen_valid_codecvt_wchar_set (); - void gen_valid_codecvt_utf8_set (); - void gen_utf8_map(); - std::set valid_coll_wchar_set_; typedef std::set::iterator valid_coll_wchar_set_iter; std::set valid_codecvt_wchar_set_; typedef std::set::iterator valid_codecvt_wchar_set_iter; - std::set valid_codecvt_utf8_set_; - typedef std::set::iterator valid_codecvt_utf8_set_iter; - - // the set of complete utf8 strings in the current character map - std::map utf8_map_; - typedef std::map::iterator utf8_map_iter; - unsigned int next_codecvt_tab_num_; - void generate_codecvt_table (const std::string &charp, - unsigned int tab_num); - - unsigned int next_wchar_codecvt_tab_num_; - unsigned int next_utf8_codecvt_tab_num_; - void generate_wchar_codecvt_table (const std::string &charp, - unsigned int tab_num); - - void generate_utf8_codecvt_table (const std::string &charp, - unsigned int tab_num); + typedef std::map + codecvt_offsets_map_t; + // generates conversion tables of all valid multibyte characters + // from a multibyte character map populated from the character + // set description file + std::size_t + gen_mbchar_tables (codecvt_offsets_map_t&, + std::map&, + const std::string& = "", + unsigned = 0); + + std::size_t + gen_wchar_tables (codecvt_offsets_map_t&, + const std::string& = "", + unsigned = 0); + + std::size_t + gen_utf8_tables (codecvt_offsets_map_t&, + std::map&, + const std::string& = "", + unsigned = 0); std::set valid_coll_mb_set_; - std::set valid_codecvt_mb_set_; + void gen_valid_coll_mb_set(); - void gen_valid_codecvt_mb_set(); // generation of transliteration tables - void generate_xliteration_data (); + void gen_xlit_data (); // specifies if the locale file has already been written such as when // the "copy" directive is used in a locale definition file @@ -449,20 +447,20 @@ typedef std::map::iterator upper_iter; typedef std::map::iterator lower_iter; typedef std::map< std::string, unsigned char >::const_iterator n_cmap_iter; - typedef std::map::const_iterator n_cmap_iter2; - typedef std::map::const_iterator rn_cmap_iter2; + typedef std::map::const_iterator mb_cmap_iter; + typedef std::map::const_iterator rmb_cmap_iter; typedef std::map::const_iterator w_cmap_iter; typedef std::map::const_iterator rw_cmap_iter; typedef std::map::iterator off_mapr_iter; - typedef std::map::const_iterator strval_map_iter; typedef std::map::const_iterator ucs4_cmap_iter; + typedef std::list::const_iterator symnames_list_iter; + // the structures used to hold the offsets for each locale category // and any non-pointer locale information _RW::__rw_punct_t mon_punct_out_; _RW::__rw_punct_t num_punct_out_; _RW::__rw_ctype_t ctype_out_; - _RW::__rw_codecvt_t codecvt_out_; _RW::__rw_time_t time_out_; _RW::__rw_collate_t collate_out_; _RW::__rw_mon_t mon_out_; Modified: incubator/stdcxx/trunk/util/diagnostic.cpp URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/diagnostic.cpp?view=diff&rev=448754&r1=448753&r2=448754 ============================================================================== --- incubator/stdcxx/trunk/util/diagnostic.cpp (original) +++ incubator/stdcxx/trunk/util/diagnostic.cpp Thu Sep 21 17:42:16 2006 @@ -2,37 +2,45 @@ * * diagnostic.cpp * - * $Id: //stdlib/dev/source/stdlib/util/diagnostic.cpp#18 $ + * $Id$ * *************************************************************************** * - * Copyright (c) 1994-2005 Quovadx, Inc., acting through its Rogue Wave - * Software division. Licensed under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the - * License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0. Unless required by - * applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, either express or implied. See the License - * for the specific language governing permissions and limitations under - * the License. + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * Copyright 2001-2006 Rogue Wave Software. * **************************************************************************/ #include "diagnostic.h" -#include "scanner.h" // for token_t +#include "scanner.h" // for Scanner::token_t #include "loc_exception.h" -#include -#include -#include -#include +#include // for va_list, ... +#include // for puts(), fprintf(), ... +#include // for set +// set of disabled warnings static std::set disabled; -static bool warn = true; -static bool info = false; +static bool warn = true; // warnings (on by default) +static bool info = false; // info messages (off by default) + // write a warning or error message to standard output. If it is a warning // that is issued and that warning has not been disabled then return true. @@ -43,17 +51,23 @@ if (0 == fmt) { - // special treatment when format string is 0 + // special treatment when format string is 0: a request + // to enable or disable this type of diagnostic, e.g., + // in response to a command line option if (W_DISABLE == type) { + // disable all warnings enabled = warn; warn = false; } else if (I_ENABLE == type) { + // enable all informational messages enabled = info; info = true; } else { + // disable a specific warning and return its previous + // setting (i.e., enabled or disabled) enabled = disabled.find (type) == disabled.end (); disabled.insert (type); } @@ -65,13 +79,18 @@ const bool is_warn = !is_info && W_FIRST <= type && type <= W_LAST; const bool is_error = !is_info && !is_warn; - if (is_warn && (!warn || disabled.end () != disabled.find (type))) + if (is_warn && (!warn || disabled.end () != disabled.find (type))) { + // warning disabled return enabled; + } - if (is_info && !info) + if (is_info && !info) { + // info disabled return enabled; + } - // all errors and those warnings that are not disabled should be written + // all errors and those warnings that are not disabled + // must be issued enabled = true; if (token && token->file) @@ -91,46 +110,51 @@ std::vfprintf (stderr, fmt, va); va_end (va); - if (token) { - // if the token pointer is non-zero, find the file and line - // the token appears on and print it out, followed by a line - // underscoring the token that caused the diagnostic with - // a string of carets ('^') - - std::FILE* const f = std::fopen (token->file, "r"); - - if (f) { - int i; - char line [1024]; - - for (i = 0; i < token->line; i++) { - std::fgets (line, 1024, f); + // if the token pointer is non-zero, find the file and line + // the token appears on and print it out, followed by a line + // underscoring the token that caused the diagnostic with + // a string of carets ('^') + + std::FILE* const ftok = token ? std::fopen (token->file, "r") : 0; + + if (ftok) { + int i; + char line [1024]; // FIXME: handle longer lines + + // advance to the specified line in the file + for (i = 0; i < token->line; ++i) { + if (0 == std::fgets (line, 1024, ftok)) { + *line = '\0'; + break; } + } - std::fprintf (stderr, "\t\t%s\t\t", line); + if (i == token->line && '\0' != *line) { + std::fputs ("\t\t", stderr); + std::fputs (line, stderr); + std::fputs ("\t\t", stderr); // tok->col is the column number where the first character // in the token begins. Go through the line saving tabs // so that the '^' will line up with the token - for (i = 0; i < token->column; i++) { - if (line [i] == '\t') - std::fprintf (stderr, "\t"); - else - std::fprintf (stderr, " "); - } + for (i = 0; i < token->column; ++i) + std::fputc (line [i] == '\t' ? '\t' : ' ', stderr); - for (unsigned int j = 0; j < token->name.size(); j++) - std::fprintf (stderr, "^"); + for (unsigned j = 0; j < token->name.size (); ++j) + std::fputc ('^', stderr); - std::fprintf (stderr, "\n"); - std::fclose (f); + std::fputc ('\n', stderr); } + + std::fclose (ftok); } if (is_error) { + // throw an exception if the diagnostic is a hard error throw loc_exception (); } + // return otherwise (i.e., the diagnostic is not an error) return enabled; } Modified: incubator/stdcxx/trunk/util/diagnostic.h URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/util/diagnostic.h?view=diff&rev=448754&r1=448753&r2=448754 ============================================================================== --- incubator/stdcxx/trunk/util/diagnostic.h (original) +++ incubator/stdcxx/trunk/util/diagnostic.h Thu Sep 21 17:42:16 2006 @@ -2,20 +2,27 @@ * * diagnostic.h * - * $Id: //stdlib/dev/source/stdlib/util/diagnostic.h#12 $ + * $Id$ * *************************************************************************** * - * Copyright (c) 1994-2005 Quovadx, Inc., acting through its Rogue Wave - * Software division. Licensed under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the - * License. You may obtain a copy of the License at - * http://www.apache.org/licenses/LICENSE-2.0. Unless required by - * applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, either express or implied. See the License - * for the specific language governing permissions and limitations under - * the License. + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + * + * Copyright 2001-2006 Rogue Wave Software. * **************************************************************************/ @@ -33,12 +40,12 @@ E_COLNUM = 114, // bad number of collation orders E_CVT2EXT = 123, // convert a character to external representation E_SYMEND = 127, // unterminated symbolic name (missing '>') - E_CVT = 136, // convert a character + E_CVT = 136, // error converting a character E_IFMT = 135, // invalid integer format E_MBCHAR = 308, // illegal/incomplete multibyte character E_UCS = 315, // ill-formed or invalid UCS character E_MBTOWC = 310, // mbtowc() error - E_RANGE = 311, // invalid range + E_RANGE = 311, // invalid range (ellipsis) E_SYNTAX = 312, // bad syntax E_COLORD = 133, // bad collating order E_REORD = 291, // bad reorder-after @@ -48,6 +55,7 @@ E_NOARG = 403, // missing command line argument E_OPTARG = 404, // missing argument to a command line option E_CALL = 405, // system or libc call failed + E_NOTSUP = 712, // feature not supported (hard error) E_LAST = 699, W_FIRST = 700, @@ -55,14 +63,17 @@ W_COMPAT = 701, // no compatible locale installed W_NOPCS = 702, // PCS character value not defined W_COLSYM = 703, // undefined collating symbol or collating element - W_COLVAL = 704, // missing collation value for symbol + W_MISSING = 704, // missing value W_REORD = 705, // bad reorder-after W_ICONV = 706, // iconv_open() or iconv() error W_SYM = 707, // unknown symbolic constant - W_SYNTAX = 708, // recoverable syntax error - W_RANGE = 709, // recoverable invalid range - W_INVAL = 710, // recoverable invalid value - W_CALL = 711, // system or libc call failed + W_CHAR = 708, // unknown character (no corresponding symbol) + W_CHARMAP = 709, // unknown charmap + W_SYNTAX = 709, // recoverable syntax error + W_RANGE = 710, // recoverable invalid range + W_INVAL = 711, // recoverable invalid value + W_CALL = 712, // system or libc call failed + W_NOTSUP = 713, // feature not supported (will be ignored) W_LAST = 799, I_FIRST = 800, @@ -72,6 +83,7 @@ I_OPENWR = 803, // information about files being opened for writing I_READ = 804, // information about data reads I_WRITE = 805, // information about data writes + I_SKIP = 806, // information about skpping an operation I_LAST = 899 };