commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Eric Chow" <ec...@macaucabletv.com>
Subject HttpClient UTF-8 problem !!!
Date Thu, 18 Sep 2003 07:03:11 GMT
Hello,

When I tried to use HttpClient to query a web site with a "UTF-8" paramter,
it just can't match.

When I tried to "Copy & Paste" the character into that web site, it works,
but failed to use HttpClient.

Is there any problem in HttpClient to handle UTF-8 parameters ???


Web Side: http://www.mandarintools.com/chardict_u8.html
In "Paste in Character", select "UTF-8", and paste 与, (the UTF-8 of the
previous character is \u4E0E),
 and then "Search by Character".

It should return something ....


The following is my source, please teach me how can I pass UTF-8 query
parameters in HttpClient.

============================================================================
import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.*;
import org.apache.commons.httpclient.cookie.*;
import org.apache.commons.httpclient.util.*;
import java.io.*;
import java.util.*;


public class TradSimUnicode {
   private static String checkURL = "cgibin.erols.com";


   public static void main(String argv[]) {

      HttpClient client = new HttpClient();

      try {

         HostConfiguration hc = new HostConfiguration();
         hc.setHost(checkURL, 80, "http");
         client.setHostConfiguration(hc);
         client.getState().setCookiePolicy(CookiePolicy.COMPATIBILITY);

          check(client, '\u4e0e');

      } catch(Exception e) {
         e.printStackTrace();
      }
   }

   public static void check(HttpClient client, char c) throws Exception {

      String code = toFormat(c);
      String s = unicodeToString(code);


      PostMethod post = new
PostMethod("/mandarintools/cgi-bin/charlook.pl");

      // Prepare login parameters
      NameValuePair v1     = new NameValuePair("searchmode", "standard");
      NameValuePair v2     = new NameValuePair("printtype", "utf8");
      NameValuePair v3     = new NameValuePair("chartype", "trad");
      NameValuePair v4     = new NameValuePair("ordering", "frequency");
      NameValuePair v5     = new NameValuePair("display", "char");
      NameValuePair v6     = new NameValuePair("display", "variants");
      NameValuePair v7     = new NameValuePair("display", "unicode");
      NameValuePair v8     = new NameValuePair("enctype", "utf8");
      NameValuePair v9     = new NameValuePair("whatchar", s);
      NameValuePair v10    = new NameValuePair("searchchar", "Search by
Character");


      NameValuePair[] valPairs = { v1, v2, v3, v4, v5, v6, v7, v8, v9,
v10 };

      post.setRequestBody(valPairs);


      client.executeMethod(post);

      String resp = post.getResponseBodyAsString();

      System.out.write(resp.getBytes("UTF-8"));

      post.releaseConnection();
   }

   public static String unicodeToString(String unicodeString) {
  if (unicodeString == null) {
   return null;
  }

  StringBuffer buf = new StringBuffer();

  StringTokenizer tokens = new StringTokenizer(unicodeString, "\\u");

  while(tokens.hasMoreTokens()) {
   String token = (String)tokens.nextToken();

   char oneUnicodeChar = (char)Integer.parseInt(token, 16);

   buf.append(oneUnicodeChar);
  }

  return buf.toString();
 }

 public static String toFormat(int n){
  String zeros = "000";
  String body = Integer.toHexString(n);
  return "\\u" + zeros.substring(0, 4-body.length()) + body;
 }
}

============================================================





==========================
If you know what you are doing,
it is not called RESEARCH!
==========================


Mime
View raw message