Return-Path: Delivered-To: apmail-commons-commits-archive@minotaur.apache.org Received: (qmail 92675 invoked from network); 23 Mar 2011 19:07:13 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 23 Mar 2011 19:07:13 -0000 Received: (qmail 66541 invoked by uid 500); 23 Mar 2011 19:07:28 -0000 Delivered-To: apmail-commons-commits-archive@commons.apache.org Received: (qmail 66494 invoked by uid 500); 23 Mar 2011 19:07:28 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 66487 invoked by uid 99); 23 Mar 2011 19:07:28 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 23 Mar 2011 19:07:28 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 23 Mar 2011 19:07:26 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 4FDED2388A43; Wed, 23 Mar 2011 19:07:02 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1084677 [2/2] - in /commons/sandbox/runtime/trunk/src/main: java/org/apache/commons/runtime/exception/ native/ native/include/acr/ native/shared/ Date: Wed, 23 Mar 2011 19:07:02 -0000 To: commits@commons.apache.org From: mturk@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110323190704.4FDED2388A43@eris.apache.org> Added: commons/sandbox/runtime/trunk/src/main/native/shared/string.c URL: http://svn.apache.org/viewvc/commons/sandbox/runtime/trunk/src/main/native/shared/string.c?rev=1084677&view=auto ============================================================================== --- commons/sandbox/runtime/trunk/src/main/native/shared/string.c (added) +++ commons/sandbox/runtime/trunk/src/main/native/shared/string.c Wed Mar 23 19:07:01 2011 @@ -0,0 +1,945 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "acr/string.h" +#include "acr/memory.h" +#include "acr/clazz.h" + +extern int acr_native_codepage; + +J_DECLARE_CLAZZ = { + NULL, + NULL, + "java/lang/String" +}; + +J_DECLARE_M_ID(0000) = { + NULL, + "", + "([B)V" +}; + +J_DECLARE_M_ID(0001) = { + NULL, + "getBytes", + "()[B" +}; + +ACR_CLASS_LOADER(String) +{ + int rv; + + if ((rv = AcrLoadClass(_E, &_clazzn, 0)) != ACR_SUCCESS) + return rv; + J_LOAD_METHOD(0000); + J_LOAD_METHOD(0001); + + return ACR_SUCCESS; +} + +ACR_CLASS_UNLOADER(String) +{ + AcrUnloadClass(_E, &_clazzn); +} + +static const char *iso_8859_1_aliases[] = { + "iso-8859-1", "iso_8859-1", "iso_8859_1", "8859-1", "8859_1", + "iso8859-1", "iso8859_1 ", "latin1", "ibm-819", "ibm819", + "cp819", "819", "28591", "windows-28591", NULL +}; + +static const char *utf_8_aliases[] = { + "utf8", "utf-8", "cp1208", "65001", "windows-65001", NULL +}; + +static const char *us_ascii_aliases[] = { + "us-ascii", "ascii", "ascii7", "iso646-us", "us", "ibm367", + "cp367", "ansi_x3.4-1968", "646", "646us", "windows-20127", NULL +}; + +int +AcrGetNativeCodePage(const char *cs) +{ + int i; + if (cs && *cs) { + for (i = 0; iso_8859_1_aliases[i]; i++) { + if (strcasecmp(cs, iso_8859_1_aliases[i]) == 0) + return ACR_CP_ISO8859_1; + } + for (i = 0; utf_8_aliases[i]; i++) { + if (strcasecmp(cs, utf_8_aliases[i]) == 0) + return ACR_CP_UTF_8; + } + for (i = 0; us_ascii_aliases[i]; i++) { + if (strcasecmp(cs, us_ascii_aliases[i]) == 0) + return ACR_CP_ISO8859_1; + } + } + return ACR_CP_DEFAULT; +} + +static char *get_string_iso_8859_1(JNIEnv *_E, jstring str, char *b) +{ + jsize sl; + const jchar *sr; + char *rv = NULL; + + sl = (*_E)->GetStringLength(_E, str); + if (b && sl < ACR_MBUFF_LEN) + rv = b; + else { + rv = ACR_MALLOC(char, sl + 1); + if (rv == NULL) { + /* Exception has already neen throw from AcrMalloc + */ + return NULL; + } + } + sr = (*_E)->GetStringCritical(_E, str, NULL); + if (!sr) { + if (rv != b) + AcrFree(rv); + return NULL; + } + else { + jsize i; + for (i = 0; i < sl; i++) + rv[i] = (char)(sr[i] & 0xFF); + } + rv[sl] = '\0'; + (*_E)->ReleaseStringCritical(_E, str, sr); + return rv; +} + +/* Implementation of RFC 3629, "UTF-8, a transformation format of ISO 10646" + * with particular attention to canonical translation forms (see section 10 + * "Security Considerations" of the RFC for more info). + * + * Since several architectures including Windows support unicode, with UCS2 + * used as the actual storage conventions by that archicture, these functions + * exist to transform or validate UCS2 strings into APR's 'char' type + * convention. It is left up to the operating system to determine the + * validitity of the string, e.g. normative forms, in the context of + * its native language support. Other file systems which support filename + * characters of 0x80-0xff but have no explicit requirement for Unicode + * will find this function useful only for validating the character sequences + * and rejecting poorly encoded UTF8 sequences. + * + * Len UCS-4 range (hex) UTF-8 octet sequence (binary) + * 1:2 00000000-0000007F 0xxxxxxx + * 2:2 00000080-000007FF 110XXXXx 10xxxxxx + * 3:2 00000800-0000FFFF 1110XXXX 10Xxxxxx 10xxxxxx + * 4:4 00010000-001FFFFF 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx + * 00200000-03FFFFFF 111110XX 10XXXxxx 10xxxxxx 10xxxxxx 10xxxxxx + * 04000000-7FFFFFFF 1111110X 10XXXXxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx + * + * One of the X bits must be 1 to avoid overlong representation of ucs2 values. + * + * For conversion into ucs2, the 4th form is limited in range to 0010 FFFF, + * and the final two forms are used only by full ucs4, per RFC 3629; + * + * "Pairs of UCS-2 values between D800 and DFFF (surrogate pairs in + * Unicode parlance), being actually UCS-4 characters transformed + * through UTF-16, need special treatment: the UTF-16 transformation + * must be undone, yielding a UCS-4 character that is then transformed + * as above." + * + * From RFC2781 UTF-16: the compressed ISO 10646 encoding bitmask + * + * U' = U - 0x10000 + * U' = 00000000 0000yyyy yyyyyyxx xxxxxxxx + * W1 = 110110yy yyyyyyyy + * W2 = 110111xx xxxxxxxx + * Max U' = 0000 00001111 11111111 11111111 + * Max U = 0000 00010000 11111111 11111111 + * + * Len is the table above is a mapping of bytes used for utf8:ucs2 values, + * which results in these conclusions of maximum allocations; + * + * conv_utf8_to_ucs2 out bytes:sizeof(in) * 1 <= Req <= sizeof(in) * 2 + * conv_ucs2_to_utf8 out words:sizeof(in) / 2 <= Req <= sizeof(in) * 3 / 2 + */ +static int conv_utf8_to_ucs2(const char *in, jsize inbytes, + jchar *out, jsize *outwords) +{ + acr_i64_t newch, mask; + jsize expect, eating; + int ch; + + while (inbytes && *outwords) { + ch = (unsigned char)(*in++); + if (!(ch & 0200)) { + /* US-ASCII-7 plain text + */ + --inbytes; + --*outwords; + *(out++) = ch; + } + else { + if ((ch & 0300) != 0300) { + /* Multibyte Continuation is out of place + */ + return ACR_EINVAL; + } + else { + /* Multibyte Sequence Lead Character + * + * Compute the expected bytes while adjusting + * or lead byte and leading zeros mask. + */ + mask = 0340; + expect = 1; + while ((ch & mask) == mask) { + mask |= mask >> 1; + if (++expect > 3) /* (truly 5 for ucs-4) */ + return ACR_EINVAL; + } + newch = ch & ~mask; + eating = expect + 1; + if (inbytes <= expect) + return ACR_INCOMPLETE; + /* Reject values of excessive leading 0 bits + * utf-8 _demands_ the shortest possible byte length + */ + if (expect == 1) { + if (!(newch & 0036)) + return ACR_EINVAL; + } + else { + /* Reject values of excessive leading 0 bits + */ + if (!newch && !((unsigned char)*in & 0077 & (mask << 1))) + return ACR_EINVAL; + if (expect == 2) { + /* Reject values D800-DFFF when not utf16 encoded + * (may not be an appropriate restriction for ucs-4) + */ + if (newch == 0015 && ((unsigned char)*in & 0040)) + return ACR_EINVAL; + } + else if (expect == 3) { + /* Short circuit values > 110000 + */ + if (newch > 4) + return ACR_EINVAL; + if (newch == 4 && ((unsigned char)*in & 0060)) + return ACR_EINVAL; + } + } + /* Where the boolean (expect > 2) is true, we will need + * an extra word for the output. + */ + if (*outwords < (jsize)(expect > 2) + 1) + break; /* buffer full */ + while (expect--) { + /* Multibyte Continuation must be legal */ + if (((ch = (unsigned char)*(in++)) & 0300) != 0200) + return ACR_EINVAL; + newch <<= 6; + newch |= (ch & 0077); + } + inbytes -= eating; + /* newch is now a true ucs-4 character + * + * now we need to fold to ucs-2 + */ + if (newch < 0x10000) { + --*outwords; + *(out++) = (jchar) newch; + } + else { + *outwords -= 2; + newch -= 0x10000; + *(out++) = (jchar) (0xD800 | (newch >> 10)); + *(out++) = (jchar) (0xDC00 | (newch & 0x03FF)); + } + } + } + } + /* Buffer full 'errors' aren't errors, the client must inspect both + * the inbytes and outwords values + */ + return ACR_SUCCESS; +} + +static int conv_utf8_to_wcs(const char *in, size_t inbytes, + wchar_t *out, size_t *outwords) +{ + acr_i64_t newch, mask; + size_t expect, eating; + int ch; + + while (inbytes && *outwords) { + ch = (unsigned char)(*in++); + if (!(ch & 0200)) { + /* US-ASCII-7 plain text + */ + --inbytes; + --*outwords; + *(out++) = ch; + } + else { + if ((ch & 0300) != 0300) { + /* Multibyte Continuation is out of place + */ + return ACR_EINVAL; + } + else { + /* Multibyte Sequence Lead Character + * + * Compute the expected bytes while adjusting + * or lead byte and leading zeros mask. + */ + mask = 0340; + expect = 1; + while ((ch & mask) == mask) { + mask |= mask >> 1; + if (++expect > 3) /* (truly 5 for ucs-4) */ + return ACR_EINVAL; + } + newch = ch & ~mask; + eating = expect + 1; + if (inbytes <= expect) + return ACR_INCOMPLETE; + /* Reject values of excessive leading 0 bits + * utf-8 _demands_ the shortest possible byte length + */ + if (expect == 1) { + if (!(newch & 0036)) + return ACR_EINVAL; + } + else { + /* Reject values of excessive leading 0 bits + */ + if (!newch && !((unsigned char)*in & 0077 & (mask << 1))) + return ACR_EINVAL; + if (expect == 2) { + /* Reject values D800-DFFF when not utf16 encoded + * (may not be an appropriate restriction for ucs-4) + */ + if (newch == 0015 && ((unsigned char)*in & 0040)) + return ACR_EINVAL; + } + else if (expect == 3) { + /* Short circuit values > 110000 + */ + if (newch > 4) + return ACR_EINVAL; + if (newch == 4 && ((unsigned char)*in & 0060)) + return ACR_EINVAL; + } + } + /* Where the boolean (expect > 2) is true, we will need + * an extra word for the output. + */ + if (*outwords < (size_t)(expect > 2) + 1) + break; /* buffer full */ + while (expect--) { + /* Multibyte Continuation must be legal */ + if (((ch = (unsigned char)*(in++)) & 0300) != 0200) + return ACR_EINVAL; + newch <<= 6; + newch |= (ch & 0077); + } + inbytes -= eating; +#if CC_SIZEOF_WCHAR_T == 2 + /* newch is now a true ucs-4 character + * + * now we need to fold to ucs-2 + */ + if (newch < 0x10000) { + --*outwords; + *(out++) = (wchar_t) newch; + } + else { + *outwords -= 2; + newch -= 0x10000; + *(out++) = (wchar_t) (0xD800 | (newch >> 10)); + *(out++) = (wchar_t) (0xDC00 | (newch & 0x03FF)); + } +#else + --*outwords; + *(out++) = (wchar_t) newch; +#endif + } + } + } + /* Buffer full 'errors' aren't errors, the client must inspect both + * the inbytes and outwords values + */ + return ACR_SUCCESS; +} + +/* Java implementation of GetStringUTF is bogus. + * It breaks on embeded NUL in strings. + * Use the APR implementation instead. + */ +static int conv_ucs2_to_utf8(const jchar *in, jsize inwords, + char *out, jsize *outbytes) +{ + acr_i64_t newch, require; + jsize need; + char *invout; + int ch; + + while (inwords && *outbytes) { + ch = (unsigned short)(*in++); + if (ch < 0x80) { + --inwords; + --*outbytes; + *(out++) = (unsigned char) ch; + } + else { + if ((ch & 0xFC00) == 0xDC00) { + /* Invalid Leading ucs-2 Multiword Continuation Character + */ + return ACR_EINVAL; + } + if ((ch & 0xFC00) == 0xD800) { + /* Leading ucs-2 Multiword Character + */ + if (inwords < 2) { + /* Missing ucs-2 Multiword Continuation Character + */ + return ACR_INCOMPLETE; + } + if (((unsigned short)(*in) & 0xFC00) != 0xDC00) { + /* Invalid ucs-2 Multiword Continuation Character + */ + return ACR_EINVAL; + } + newch = (ch & 0x03FF) << 10 | ((unsigned short)(*in++) & 0x03FF); + newch += 0x10000; + } + else { + /* ucs-2 Single Word Character + */ + newch = ch; + } + /* Determine the absolute minimum utf-8 bytes required + */ + require = newch >> 11; + need = 1; + while (require) + require >>= 5, ++need; + if (need >= *outbytes) + break; /* Insufficient buffer */ + inwords -= (need > 2) + 1; + *outbytes -= need + 1; + /* Compute the utf-8 characters in last to first order, + * calculating the lead character length bits along the way. + */ + ch = 0200; + out += need + 1; + invout = out; + while (need--) { + ch |= ch >> 1; + *(--invout) = (unsigned char)(0200 | (newch & 0077)); + newch >>= 6; + } + /* Compute the lead utf-8 character and move the dest offset + */ + *(--invout) = (unsigned char)(ch | newch); + } + } + /* Buffer full 'errors' aren't errors, the client must inspect both + * the inwords and outbytes values + */ + return ACR_SUCCESS; +} + +static jsize java_ucs2_to_utf8_len(const jchar *in, jsize inwords) +{ + jsize need = 1; + int ch; + + while (inwords) { + ch = (unsigned short)(*in++); + if (ch == 0) + need += 2; + if (ch < 0x80) + need += 1; + else { + if (ch < 0x0800) + need += 2; + else + need += 3; + } + --inwords; + } + /* Buffer full 'errors' aren't errors, the client must inspect both + * the inwords and outbytes values + */ + return need; +} + +/* Modified UTF-8 according to the java.io.DataInput + * specification + */ +static int java_ucs2_to_utf8(const jchar *in, jsize inwords, + char *out, jsize *outbytes) +{ + int ch; + + while (inwords && *outbytes) { + ch = (unsigned short)(*in++); + if (ch == 0) { + if (*outbytes < 2) + return ACR_INCOMPLETE; + *outbytes -= 2; + *(out++) = (unsigned char)0xC0; + *(out++) = (unsigned char)0x80; + } + if (ch < 0x80) { + --*outbytes; + *(out++) = (unsigned char)ch; + } + else { + if (ch < 0x0800) { + /* Two byte sequence + */ + if (*outbytes < 2) + return ACR_INCOMPLETE; + *outbytes -= 2; + *(out++) = (unsigned char)(0xC0 | ((ch >> 6) & 0x1F)); + *(out++) = (unsigned char)(0x80 | ((ch) & 0x3F)); + } + else { + /* Three byte sequence + */ + if (*outbytes < 3) + return ACR_INCOMPLETE; + *outbytes -= 3; + *(out++) = (unsigned char)(0xE0 | ((ch >> 12) & 0x0F)); + *(out++) = (unsigned char)(0x80 | ((ch >> 6) & 0x3F)); + *(out++) = (unsigned char)(0x80 | ((ch) & 0x3F)); + } + } + --inwords; + } + if (*outbytes) { + *(out++) = '\0'; + --*outbytes; + } + return ACR_SUCCESS; +} +/* Modified UTF-8 according to the java.io.DataOutput + * specification + */ +static int java_utf8_to_ucs2(const char *in, jsize inbytes, + jchar *out, jsize *outwords) +{ + int i, ch; + + while (inbytes && *outwords) { + ch = (unsigned char)(*in++); + if (ch == 0) { + --*outwords; + *(out) = (jchar)0; + return ACR_SUCCESS; + } + else if (!(ch & 0x80)) { + /* US-ASCII-7 plain text + */ + --inbytes; + } + else if ((ch & 0xE0) == 0xC0) { + /* Two byte sequence */ + if (inbytes < 2) + return ACR_INCOMPLETE; + inbytes -= 2; + ch = ch & 0x1F; + if ((*in & 0xC0) != 0x80) + return ACR_EILSEQ; + ch <<= 6; + ch |= (unsigned char)(*(in++) & 0x3F); + } + else if ((ch & 0xF0) == 0xE0) { + /* Three byte sequence */ + if (inbytes < 3) + return ACR_INCOMPLETE; + inbytes -= 3; + ch = ch & 0x0F; + for (i = 0; i < 2; i++) { + if ((*in & 0xC0) != 0x80) + return ACR_EILSEQ; + ch <<= 6; + ch |= (unsigned char)(*(in++) & 0x3F); + } + } + else { + return ACR_EILSEQ; + } + *(out++) = (jchar)ch; + --*outwords; + } + return ACR_SUCCESS; +} + +jchar * +AcrUtf8ToUcs2(JNIEnv *_E, const char *str) +{ + int rc; + jsize len, out; + jchar *dst; + + out = len = (jsize)strlen(str) + 1; + dst = ACR_MALLOC(jchar, len); + if (!dst) + return NULL; + if ((rc = java_utf8_to_ucs2(str, len, dst, &out))) { + /* Invalid UTF-8 string */ + AcrFree(dst); + ACR_SET_OS_ERROR(rc); + return NULL; + } + return dst; +} + +wchar_t * +AcrUtf8ToWcs(JNIEnv *_E, const char *str) +{ + int rc; + size_t len, out; + wchar_t *dst; + + out = len = strlen(str) + 1; + dst = ACR_MALLOC(wchar_t, len); + if (!dst) + return NULL; + if ((rc = conv_utf8_to_wcs(str, len, dst, &out))) { + /* Invalid UTF-8 string */ + AcrFree(dst); + ACR_SET_OS_ERROR(rc); + return NULL; + } + return dst; +} + +char * +AcrUsc2ToUtf8(JNIEnv *_E, const jchar *str, jsize len) +{ + int rc; + jsize out; + char *dst; + + out = java_ucs2_to_utf8_len(str, len); + dst = ACR_MALLOC(char, out); + if (!dst) + return NULL; + if ((rc = java_ucs2_to_utf8(str, len, dst, &out))) { + /* Invalid UTF-8 string */ + AcrFree(dst); + ACR_SET_OS_ERROR(rc); + return NULL; + } + return dst; +} + +static char *get_string_utf_8(JNIEnv *_E, jstring str, char *b) +{ + jsize sl, nl; + const jchar *sr; + char *rv = NULL; + + if (!str) { + return NULL; + } + if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) { + /* JNI out of memory error */ + return NULL; + } + sl = (*_E)->GetStringLength(_E, str); + nl = sl * 3; + if (b && nl < ACR_MBUFF_LEN) + rv = b; + else { + rv = ACR_MALLOC(char, nl + 1); + if (!rv) { + /* Exception has already neen throw from ACR_Malloc + */ + return NULL; + } + } + sr = (*_E)->GetStringCritical(_E, str, NULL); + if (!sr) { + if (rv != b) + AcrFree(rv); + return NULL; + } + else { + jsize ol = nl; + if (conv_ucs2_to_utf8(sr, sl, rv, &nl) == ACR_SUCCESS) + rv[ol - nl] = '\0'; + else { + /* XXX: Throw some exception ? + */ + if (rv != b) + AcrFree(rv); + return NULL; + } + } + (*_E)->ReleaseStringCritical(_E, str, sr); + return rv; +} + +static char *get_string_default(JNIEnv *_E, jstring str, char *b) +{ + jbyteArray sb = NULL; + char *rs = NULL; + + if (!_clazzn.i || !J4MID(0000)) { + ACR_SET_OS_ERROR(ACR_EINIT); + return NULL; + } + sb = CALL_METHOD0(Object, 0001, str); + if ((*_E)->ExceptionCheck(_E)) + return NULL; + else { + jint len = (*_E)->GetArrayLength(_E, sb); + if (b && len < ACR_PBUFF_LEN) { + /* Use provided stack storage */ + rs = b; + } + else { + rs = ACR_MALLOC(char, len + 1); + if (rs == NULL) { + (*_E)->DeleteLocalRef(_E, sb); + return NULL; + } + } + (*_E)->GetByteArrayRegion(_E, sb, 0, len, (jbyte *)rs); + rs[len] = '\0'; /* NUL-terminate */ + } + (*_E)->DeleteLocalRef(_E, sb); + return rs; +} + +static jstring new_string_default(JNIEnv *_E, const char *str) +{ + jstring rs; + jbyteArray ba; + jsize sl; + + sl = (jsize)strlen(str); + ba = (*_E)->NewByteArray(_E, sl); + if (ba != NULL) { + (*_E)->SetByteArrayRegion(_E, ba, 0, sl, (jbyte *)str); + rs = (*_E)->NewObject(_E, _clazzn.i, J4MID(0000), ba); + (*_E)->DeleteLocalRef(_E, ba); + return rs; + } + return NULL; +} + +static jstring new_string_iso_8859_1(JNIEnv *_E, const char *s) +{ + jstring rs = NULL; + if (s) { + size_t l = strlen(s); + if (l < ACR_MBUFF_SIZ) { + jchar cc[ACR_MBUFF_SIZ]; + size_t i; + for (i = 0; i < l; i++) { + cc[i] = s[i]; + } + rs = (*_E)->NewString(_E, cc, (jsize)l); + } + else { + jchar *cc; + if ((cc = ACR_MALLOC(jchar, l + 1))) { + size_t i; + for (i = 0; i < l; i++) { + cc[i] = s[i]; + } + rs = (*_E)->NewString(_E, cc, (jsize)l); + AcrFree(cc); + } + } + } + return rs; +} + +static jstring new_string_utf_8(JNIEnv *_E, const char *s) +{ + jstring rs = NULL; + if (s) { + int ex; + jsize sl = (jsize)strlen(s); + if (sl < ACR_MBUFF_SIZ) { + jchar cc[ACR_MBUFF_SIZ]; + jsize wl = ACR_MBUFF_LEN; + if ((ex = conv_utf8_to_ucs2(s, sl, cc, &wl)) == ACR_SUCCESS) + rs = (*_E)->NewString(_E, cc, sl); + else + AcrThrowException(_E, __FILE_FUNC_LINE__, ACR_EX_EINVAL, ex); + } + else { + jchar *cc; + if ((cc = ACR_MALLOC(jchar, sl + 1))) { + jsize wl = sl; + if ((ex = conv_utf8_to_ucs2(s, sl, cc, &wl)) == ACR_SUCCESS) + rs = (*_E)->NewString(_E, cc, sl); + else + AcrThrowException(_E, __FILE_FUNC_LINE__, ACR_EX_EINVAL, ex); + AcrFree(cc); + } + } + } + return rs; +} + + +wchar_t * +AcrGetJavaStringW(JNIEnv *_E, jstring str, wchar_t *b) +{ + jsize sl; + const jchar *sr; + wchar_t *rv = NULL; + + if (!str) { + return NULL; + } + if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) { + /* JNI out of memory error */ + return NULL; + } + sl = (*_E)->GetStringLength(_E, str); + if (b && sl < ACR_MBUFF_LEN) + rv = b; + else { + rv = ACR_MALLOC(wchar_t, sl + 1); + if (!rv) { + /* Exception has already neen throw from ACR_Malloc + */ + return NULL; + } + } + sr = (*_E)->GetStringCritical(_E, str, NULL); + if (!sr) { + if (rv != b) + AcrFree(rv); + return NULL; + } + else { +#if CC_SIZEOF_WCHAR_T == 2 + memcpy(rv, sr, sl * sizeof(wchar_t)); +#else + jsize i; + for (i = 0; i < sl; i++) + rv[i] = sr[i]; +#endif + } + rv[sl] = L'\0'; + (*_E)->ReleaseStringCritical(_E, str, sr); + return rv; +} + +char * +AcrGetJavaStringA(JNIEnv *_E, jstring str, char *b) +{ + char *rv = NULL; + if (!str) { + return NULL; + } + if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) { + /* JNI out of memory error */ + return NULL; + } + switch (acr_native_codepage) { + case ACR_CP_ISO8859_1: + rv = get_string_iso_8859_1(_E, str, b); + break; + case ACR_CP_UTF_8: + rv = get_string_utf_8(_E, str, b); + break; + default: + rv = get_string_default(_E, str, b); + break; + } + return rv; +} + +jstring +AcrNewJavaStringW(JNIEnv *_E, const wchar_t *s) +{ + jstring r = NULL; + if (s) { + size_t l = wcslen(s); +#if CC_SIZEOF_WCHAR_T == 2 + r = (*_E)->NewString(_E, (const jchar *)s, (jsize)l); +#else + if (l < ACR_MBUFF_SIZ) { + jchar cc[ACR_MBUFF_SIZ]; + size_t i; + for (i = 0; i < l; i++) { + /* Simply assign utf32 to utf16 */ + cc[i] = (jchar)s[i]; + } + r = (*_E)->NewString(_E, cc, l); + } + else { + jchar *cc; + if ((cc = ACR_MALLOC(jchar, l + 1))) { + size_t i; + for (i = 0; i < l; i++) { + /* Simply assign utf32 to utf16 */ + cc[i] = (jchar)s[i]; + } + r = (*_E)->NewString(_E, cc, l); + AcrFree(cc); + } + } +#endif + } + return r; +} + +jstring +AcrNewJavaStringA(JNIEnv *_E, const char *str) +{ + jstring rv = NULL; + if (!str) + return NULL; + if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) { + /* JNI out of memory error */ + return NULL; + } + switch (acr_native_codepage) { + case ACR_CP_ISO8859_1: + rv = new_string_iso_8859_1(_E, str); + break; + case ACR_CP_UTF_8: + rv = new_string_utf_8(_E, str); + break; + default: + rv = new_string_default(_E, str); + break; + } + return rv; +} + +jstring +AcrNewJavaStringU(JNIEnv *_E, const char *str) +{ + if (!str) + return NULL; + if ((*_E)->EnsureLocalCapacity(_E, 2) < 0) { + /* JNI out of memory error */ + return NULL; + } + return new_string_utf_8(_E, str); +} Propchange: commons/sandbox/runtime/trunk/src/main/native/shared/string.c ------------------------------------------------------------------------------ svn:eol-style = native