Return-Path: Delivered-To: apmail-jakarta-commons-httpclient-dev-archive@www.apache.org Received: (qmail 86174 invoked from network); 18 Sep 2003 07:03:13 -0000 Received: from daedalus.apache.org (HELO mail.apache.org) (208.185.179.12) by minotaur-2.apache.org with SMTP; 18 Sep 2003 07:03:13 -0000 Received: (qmail 80051 invoked by uid 500); 18 Sep 2003 07:02:48 -0000 Delivered-To: apmail-jakarta-commons-httpclient-dev-archive@jakarta.apache.org Received: (qmail 80028 invoked by uid 500); 18 Sep 2003 07:02:48 -0000 Mailing-List: contact commons-httpclient-dev-help@jakarta.apache.org; run by ezmlm Precedence: bulk List-Unsubscribe: List-Subscribe: List-Help: List-Post: List-Id: "Commons HttpClient Project" Reply-To: "Commons HttpClient Project" Delivered-To: mailing list commons-httpclient-dev@jakarta.apache.org Received: (qmail 80003 invoked from network); 18 Sep 2003 07:02:47 -0000 Received: from unknown (HELO smtp.MacauCableTV.com) (202.175.49.51) by daedalus.apache.org with SMTP; 18 Sep 2003 07:02:47 -0000 Received: from mis004 ([192.168.100.92]) by smtp.MacauCableTV.com (Lotus Domino Release 5.0.2b (Intl)) with SMTP id 2003091815025640:855 ; Thu, 18 Sep 2003 15:02:56 +0800 Message-ID: <001b01c37db2$dfb90080$5c64a8c0@mctv> Reply-To: "Eric Chow" From: "Eric Chow" To: Cc: "Eric@MacauCableTV" Subject: HTTPClient NameValuePair in UTF-8 problem??? Date: Thu, 18 Sep 2003 15:02:45 +0800 MIME-Version: 1.0 X-Priority: 3 (Normal) X-MSMail-Priority: Normal X-Mailer: Microsoft Outlook Express 6.00.2800.1106 X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2800.1106 X-MIMETrack: Itemize by SMTP Server on mctv-off-srv/Macaucabletv(Release 5.0.2b (Intl)|16 December 1999) at 09/18/2003 03:02:56 PM, Serialize by Router on mctv-off-srv/Macaucabletv(Release 5.0.2b (Intl)|16 December 1999) at 09/18/2003 03:02:59 PM Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N X-Spam-Rating: minotaur-2.apache.org 1.6.2 0/1000/N Hello, When I tried to use HttpClient to query a web site with a "UTF-8" paramter, it just can't match. When I tried to "Copy & Paste" the character into that web site, it works, but failed to use HttpClient. Is there any problem in HttpClient to handle UTF-8 parameters ??? Web Side: http://www.mandarintools.com/chardict=5Fu8.html In "Paste in Character", select "UTF-8", and paste =E4=B8=8E, (the UTF-8 of= the previous character is \u4E0E), and then "Search by Character". It should return something .... The following is my source, please teach me how can I pass UTF-8 query parameters in HttpClient. =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D import org.apache.commons.httpclient.*; import org.apache.commons.httpclient.methods.*; import org.apache.commons.httpclient.cookie.*; import org.apache.commons.httpclient.util.*; import java.io.*; import java.util.*; public class TradSimUnicode { private static String checkURL =3D "cgibin.erols.com"; public static void main(String argv[]) { HttpClient client =3D new HttpClient(); try { HostConfiguration hc =3D new HostConfiguration(); hc.setHost(checkURL, 80, "http"); client.setHostConfiguration(hc); client.getState().setCookiePolicy(CookiePolicy.COMPATIBILITY); check(client, '\u4e0e'); } catch(Exception e) { e.printStackTrace(); } } public static void check(HttpClient client, char c) throws Exception { String code =3D toFormat(c); String s =3D unicodeToString(code); PostMethod post =3D new PostMethod("/mandarintools/cgi-bin/charlook.pl"); // Prepare login parameters NameValuePair v1 =3D new NameValuePair("searchmode", "standard"); NameValuePair v2 =3D new NameValuePair("printtype", "utf8"); NameValuePair v3 =3D new NameValuePair("chartype", "trad"); NameValuePair v4 =3D new NameValuePair("ordering", "frequency"); NameValuePair v5 =3D new NameValuePair("display", "char"); NameValuePair v6 =3D new NameValuePair("display", "variants"); NameValuePair v7 =3D new NameValuePair("display", "unicode"); NameValuePair v8 =3D new NameValuePair("enctype", "utf8"); NameValuePair v9 =3D new NameValuePair("whatchar", s); NameValuePair v10 =3D new NameValuePair("searchchar", "Search by Character"); NameValuePair[] valPairs =3D { v1, v2, v3, v4, v5, v6, v7, v8, v9, v10 }; post.setRequestBody(valPairs); client.executeMethod(post); String resp =3D post.getResponseBodyAsString(); System.out.write(resp.getBytes("UTF-8")); post.releaseConnection(); } public static String unicodeToString(String unicodeString) { if (unicodeString =3D=3D null) { return null; } StringBuffer buf =3D new StringBuffer(); StringTokenizer tokens =3D new StringTokenizer(unicodeString, "\\u"); while(tokens.hasMoreTokens()) { String token =3D (String)tokens.nextToken(); char oneUnicodeChar =3D (char)Integer.parseInt(token, 16); buf.append(oneUnicodeChar); } return buf.toString(); } public static String toFormat(int n){ String zeros =3D "000"; String body =3D Integer.toHexString(n); return "\\u" + zeros.substring(0, 4-body.length()) + body; } } =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D If you know what you are doing, it is not called RESEARCH! =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D --------------------------------------------------------------------- To unsubscribe, e-mail: commons-httpclient-dev-unsubscribe@jakarta.apache.org For additional commands, e-mail: commons-httpclient-dev-help@jakarta.apache.org