harmony-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Robert Muir <rcm...@gmail.com>
Subject Re: UTF-8 decode broken for supplementary characters?
Date Wed, 01 Sep 2010 12:57:36 GMT
On Wed, Sep 1, 2010 at 8:46 AM, Robert Muir <rcmuir@gmail.com> wrote:

>
>
> On Wed, Sep 1, 2010 at 5:43 AM, Deven You <devyoudw@gmail.com> wrote:
>
>> I have run the test on Linux, and got the same error. Seems it is due to
>> our
>> UTF-8 decoder. I will do more debugging to narrow down the root cause. Any
>> one is familiar with UTF-8? I hope I can get some help.
>>
>>
>
Here's a patch:

--- UTF_8.java.orig 2010-09-01 08:38:54.035000000 -0400
+++ UTF_8.java 2010-09-01 08:55:28.004000000 -0400
@@ -157,8 +157,17 @@
                         }
                         inIndex += tail;
                     }
-                    cArr[outIndex++] = (char) jchar;
-                    outRemaining--;
+                    if (jchar <= 0xffff) {
+                      cArr[outIndex++] = (char) jchar;
+                      outRemaining--;
+                    } else {
+                      final int chHalf = jchar - 0x10000;
+                      cArr[outIndex++] = (char) ((chHalf >> 0xA) + 0xD800);
+                      outRemaining--;
+                      if (outRemaining == 0) return CoderResult.OVERFLOW;
+                      cArr[outIndex++] = (char) ((chHalf & 0x3FF) +
0xDC00);
+                      outRemaining--;
+                    }
                 }
                 in.position(inIndex - in.arrayOffset());
                 out.position(outIndex - out.arrayOffset());
@@ -199,8 +208,17 @@
                             pos += tail;
                         }
                         pos++;
-                        out.put((char) jchar);
-                        outRemaining--;
+                        if (jchar <= 0xffff) {
+                          out.put((char) jchar);
+                          outRemaining--;
+                        } else {
+                          final int chHalf = jchar - 0x10000;
+                          out.put((char) ((chHalf >> 0xA) + 0xD800));
+                          outRemaining--;
+                          if (outRemaining == 0) return
CoderResult.OVERFLOW;
+                          out.put((char) ((chHalf & 0x3FF) + 0xDC00));
+                          outRemaining--;
+                        }
                     }
                     return CoderResult.UNDERFLOW;
                 } finally {

-- 
Robert Muir
rcmuir@gmail.com

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message