stdcxx-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From se...@apache.org
Subject svn commit: r540193 - in /incubator/stdcxx/trunk/examples/manual: insert_wchar.cpp out/insert_wchar.out
Date Mon, 21 May 2007 16:23:17 GMT
Author: sebor
Date: Mon May 21 09:23:16 2007
New Revision: 540193

URL: http://svn.apache.org/viewvc?view=rev&rev=540193
Log:
2007-05-21  Martin Sebor  <sebor@roguewave.com>

	STDCXX-296
	* insert_wchar.cpp: New example program demonstrating an implementation
	of an inserter operator overloaded for arrays of wchar_t that performs
	codeset conversion from arrays of wchar_t to mutlibyte characters.
	* insert_wchar.out: Expected output of the example program.

Added:
    incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp   (with props)
    incubator/stdcxx/trunk/examples/manual/out/insert_wchar.out

Added: incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp?view=auto&rev=540193
==============================================================================
--- incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp (added)
+++ incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp Mon May 21 09:23:16 2007
@@ -0,0 +1,211 @@
+/**************************************************************************
+ *
+ * insert_wchar.cpp
+ *
+ * Example program demonstrating an implementation of an inserter
+ * operator overloaded for arrays of wchar_t that performs codeset
+ * conversion from wchar_t to mutlibyte characters.
+ *
+ * $Id$
+ *
+ ***************************************************************************
+ *
+ * Licensed to the Apache Software  Foundation (ASF) under one or more
+ * contributor  license agreements.  See  the NOTICE  file distributed
+ * with  this  work  for  additional information  regarding  copyright
+ * ownership.   The ASF  licenses this  file to  you under  the Apache
+ * License, Version  2.0 (the  "License"); you may  not use  this file
+ * except in  compliance with the License.   You may obtain  a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the  License is distributed on an  "AS IS" BASIS,
+ * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
+ * implied.   See  the License  for  the  specific language  governing
+ * permissions and limitations under the License.
+ * 
+ **************************************************************************/
+ 
+#include <cassert>    // for assert()
+#include <cwchar>     // for mbstate_t, size_t
+#include <ios>        // for hex
+#include <iostream>   // for cout
+#include <locale>     // for codecvt, isalnum(), locale
+#include <ostream>    // for basic_ostream
+#include <sstream>    // for ostringstream
+
+
+// inserts a wide character string into a stream buffer performing
+// codeset conversion if necessary
+template <class charT, class Traits>
+void
+streambuf_insert (std::basic_ostream<charT, Traits> &strm,
+                  const wchar_t                     *s)
+{
+    typedef typename Traits::state_type                  StateT;
+    typedef std::codecvt<wchar_t, charT, StateT>         Codecvt;
+
+    const Codecvt &cvt = std::use_facet<Codecvt>(strm.getloc ());
+
+    const std::size_t slen = std::char_traits<wchar_t>::length (s);
+
+    // perform codeset conversion in chunks to avoid dynamic
+    // memory allocation
+
+    const std::size_t    xbufsize = 32;
+
+    charT                xbuf [xbufsize];
+    charT*               xbuf_end  = xbuf + xbufsize;
+    charT*               to_next   = 0;
+    const wchar_t*       from_next = 0;
+    const wchar_t* const end       = s + slen;
+
+    StateT state = StateT ();
+
+    for (const wchar_t* base = s; from_next != end; base = from_next) {
+
+        const std::codecvt_base::result res =
+            cvt.out (state, base, end, from_next,
+                     xbuf, xbuf_end, to_next);
+
+        std::streamsize nbytes = to_next - xbuf;
+
+        switch (res) {
+        case Codecvt::error:
+            // write out the sequence successfully converted up
+            // to the point of the error in the internal sequence
+            // and fail
+            strm.rdbuf ()->sputn (xbuf, nbytes);
+            strm.setstate (strm.badbit);
+
+        case Codecvt::noconv:
+            // write the entire sequence
+            if (nbytes != strm.rdbuf ()->sputn (xbuf, nbytes)) {
+                strm.setstate (strm.badbit);
+                return;
+            }
+
+            from_next = end;   // effectively break
+            break;
+
+        default:
+            assert (cvt.ok == res || cvt.partial == res);
+
+            // partial conversion will result if there isn't enough
+            // space in the conversion buffer to hold the converted
+            // sequence, but we're O.K. since we'll be passing any
+            // remaining unconverted characters (starting at
+            // from_next) in the next iteration
+
+            nbytes = to_next - xbuf;
+
+            if (nbytes != strm.rdbuf ()->sputn (xbuf, nbytes)) {
+                strm.setstate (strm.badbit);
+                return;
+            }
+        }
+    }
+}
+
+
+// stream insertion operator overloaded for arrays of wchar_t characters
+template <class charT, class Traits>
+std::basic_ostream<charT, Traits>&
+operator<< (std::basic_ostream<charT, Traits> &strm,
+            const wchar_t                     *s)
+{
+    const typename std::basic_ostream<charT, Traits>::sentry opfx (strm);
+
+    if (opfx) {
+
+        try {
+            // try to insert character array into stream buffer
+            streambuf_insert (strm, s);
+        }
+        catch (...) {
+            bool threw;
+            try {
+                // set badbit on exception without throwing ios::failure
+                strm.setstate (strm.badbit);
+                threw = false;
+            }
+            catch (std::ios_base::failure&) {
+                // make a note of the exception thrown from setstate()...
+                threw = true;
+            }
+            if (threw) {
+                // ...and rethrow the original exception
+                throw;
+            }
+        }
+    }
+
+    return strm;
+}
+
+
+// examples of wide character strings
+static const wchar_t* const wcs [] = {
+    L"a", L"abc",
+    // Greek letter Alpha:
+    L"\x0391",   // "\xce\x91"
+    // Greek letters Alpha Beta:
+    L"\x0391\x0392",   // "\xce\x91\xce\x91\xce\x92"
+    // Greek letters Alpha Beta Gamma:
+    L"\x0391\x0392\x0393",   // "\xce\x91\xce\x92\xce\x93"
+    // Tibetan digit zero:
+    L"\x0f20",   // "\xe0\xbc\xa0"
+    // Tibetan digits one, zero:
+    L"\x0f21\x0f20",   // "\xe0\xbc\xa1\xe0\xbc\xa0"
+    // Tibetan digits two, one, zero:
+    L"\x0f22\x0f21\x0f20"   // "\xe0\xbc\xa2\xe0\xbc\xa1\xe0\xbc\xa0"
+};
+
+
+int main ()
+{
+    typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> Codecvt;
+
+    // create a UCS/UTF-8 codecvt facet and install it in a locale
+    const std::locale utf (std::cout.getloc (), new Codecvt ("UTF-8@UCS"));
+
+    for (std::size_t i = 0; i != sizeof wcs / sizeof *wcs; ++i) {
+
+        std::ostringstream strm;
+
+        // imbue the UTF-8/UCS capable locale in a stringstream
+        strm.imbue (utf);
+
+        // insert each wide character string into the narrow stream
+        // object relying on the inserter to convert each wide string
+        // into the corresponding multibyte character string
+        strm << wcs [i];
+
+        // write out the wide character string in Unicode notation
+        std::cout << "UCS-2: " << std::hex;
+
+        for (const wchar_t *pwc = wcs [i]; *pwc != L'\0'; ++pwc)
+            std::cout << "U+" << unsigned (*pwc) << ' ';
+
+        const std::string str = strm.str ();
+
+        std::cout << " ==> UTF-8: \"";
+
+        typedef unsigned char UChar;
+
+        // write out the the multibyte character sequence using
+        // ordinary aphanumeric symbols or hex escape sequences
+        // where necessary
+        for (const char *pc = str.c_str (); *pc != '\0'; ++pc) {
+
+            if (std::isalnum (*pc, std::cout.getloc ()))
+                std::cout << *pc;
+            else
+                std::cout << "\\x" << int (UChar (*pc));
+        }
+
+        std::cout << "\"\n";
+    }
+}

Propchange: incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp
------------------------------------------------------------------------------
    svn:keywords = Id

Added: incubator/stdcxx/trunk/examples/manual/out/insert_wchar.out
URL: http://svn.apache.org/viewvc/incubator/stdcxx/trunk/examples/manual/out/insert_wchar.out?view=auto&rev=540193
==============================================================================
--- incubator/stdcxx/trunk/examples/manual/out/insert_wchar.out (added)
+++ incubator/stdcxx/trunk/examples/manual/out/insert_wchar.out Mon May 21 09:23:16 2007
@@ -0,0 +1,8 @@
+UCS-2: U+61  ==> UTF-8: "a"
+UCS-2: U+61 U+62 U+63  ==> UTF-8: "abc"
+UCS-2: U+391  ==> UTF-8: "\xce\x91"
+UCS-2: U+391 U+392  ==> UTF-8: "\xce\x91\xce\x92"
+UCS-2: U+391 U+392 U+393  ==> UTF-8: "\xce\x91\xce\x92\xce\x93"
+UCS-2: U+f20  ==> UTF-8: "\xe0\xbc\xa0"
+UCS-2: U+f21 U+f20  ==> UTF-8: "\xe0\xbc\xa1\xe0\xbc\xa0"
+UCS-2: U+f22 U+f21 U+f20  ==> UTF-8: "\xe0\xbc\xa2\xe0\xbc\xa1\xe0\xbc\xa0"



Mime
View raw message