hc-httpclient-users mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Oleg Kalnichevski <ol...@apache.org>
Subject Re: Japanese charset?
Date Wed, 15 Jun 2005 09:23:20 GMT
Andrew,

Consider using the getResponseBodyAsStream method instead of
getResponseBodyAsString. Apprently the server does not include the
charset attribute in the content type header, which causes
getResponseBodyAsString to fall back onto the default content charset 
(ISO-8859-1)

Reader reader = new InputStreamReader(
            httpget.getResponseBodyAsStream(),
	    "JIS"); 

Hope this helps

Oleg


On Wed, Jun 15, 2005 at 11:20:01AM +0700, Andrew A. Sabitov wrote:
> 
> Hi all!
> 
> Could anybody be so kind to help me? I should to make a robot, that will 
> fetch some data from amazon.co.jp. It will work under Linux. 
> 
> 
> This URL is a point of start for me:
> http://s1.amazon.co.jp/exec/varzea/subst/your-account/downloadable-reports.html
> 
> There is a class code that downloads page below. The problem is that 
> method.getResponseBodyAsString() returns string, where all Japanese chars 
> replaced by question-mark. 
> 
> How can I fix this problem?
> 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 
> import java.io.FileWriter;
> import java.io.IOException;
> 
> import org.apache.commons.httpclient.Cookie;
> import org.apache.commons.httpclient.HostConfiguration;
> import org.apache.commons.httpclient.HttpConnection;
> import org.apache.commons.httpclient.HttpException;
> import org.apache.commons.httpclient.HttpState;
> import org.apache.commons.httpclient.HttpStatus;
> import org.apache.commons.httpclient.URI;
> import org.apache.commons.httpclient.protocol.Protocol;
> import org.apache.commons.httpclient.cookie.CookiePolicy;
> import org.apache.commons.httpclient.methods.GetMethod;
> 
> import ru.pp.sabitov.common.HttpResponse;
> 
> public class Client {
> 
>     private String         url        = null;
> 
>     private HttpConnection connection = null;
>     private Cookie[]       cookies    = null;
> 
>     private String         proxyHost  = null;
>     private int            proxyPort  = -1;
> 
>     public Client () {
> 
>     public void setProxy ( String host, String port ) {
> 
>     public void setProxy ( String host, int port ) {
> 
>     public HttpResponse openGetHttpConnection ( String url ) throws NullPointerException,
HttpException, IOException {
>         HttpResponse result = null;
> 
>         System.out.println ( url );
>         
>         URI uri = new URI ( url.toCharArray () );
> 
>         String schema = uri.getScheme ();
>         if ( ( schema == null ) || ( schema.equals ( "" ) ) ) {
>             schema = "http";
>         }
>         Protocol protocol = Protocol.getProtocol ( schema );
> 
>         HttpState state = new HttpState ();
>         state.setCookiePolicy ( CookiePolicy.RFC2109 );
>         if ( cookies != null ) {
>             for ( int idx = 0; idx < cookies.length; idx++ ) {
>                 Cookie cookie = cookies [ idx ];
>                 System.out.println ( "Cookie: " + cookie );
>                 state.addCookie ( cookie );
>             }
>         }
> 
>         String host = uri.getHost ();
>         int port = uri.getPort ();
>         GetMethod method = new GetMethod ( uri.toString () );
>         method.setFollowRedirects ( true );
>         
>         HostConfiguration hostConfig = new HostConfiguration();
>         if ( ( proxyHost != null ) && ( proxyPort != -1 ) ) {
>             hostConfig.setProxy( proxyHost, proxyPort );
>         }
> 
>         org.apache.commons.httpclient.HttpClient client = new org.apache.commons.httpclient.HttpClient
();
>         client.setHostConfiguration( hostConfig );
>         client.setState ( state );
>         client.executeMethod( method );
> 
>         if ( method.getStatusCode() == HttpStatus.SC_OK ) {
>             cookies = client.getState().getCookies ();
>             FileWriter w = new FileWriter ("123.txt", true);
>             w.write( method.getResponseBodyAsString () );
>             w.close();
>             result = new HttpResponse ( method.getResponseBodyAsString () );
>         } else {
>             System.out.println ( "Unexpected failure: " + method.getStatusLine ().toString
() );
>         }
>         method.releaseConnection ();
> 
>         return result;
>     }
> 
> }
> 
> 
> 
> -- 
>        ,,,,
>        /'^'\
>       ( o o )
> --oOOO--(_)--OOOo------------------------------------------------
> |                  Andrew A. Sabitov
> |                  Email: sabitov@catalysis.nsk.su
> |                  WWW:   fir.catalysis.nsk.su/~sabitov
> | .oooO   ?? ????? ?????? - ???? ?? ?????, ?? ???????!
> | (   )   Oooo.
> ---\ (----(   )-------------------------------------------------
>     \_)    ) /
>           (_/
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: httpclient-user-unsubscribe@jakarta.apache.org
> For additional commands, e-mail: httpclient-user-help@jakarta.apache.org
> 

---------------------------------------------------------------------
To unsubscribe, e-mail: httpclient-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: httpclient-user-help@jakarta.apache.org


Mime
View raw message