hc-httpclient-users mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Ken Krugler <kkrugler_li...@transpac.com>
Subject Re: Downloading HTML frameset pages via HTTPClient
Date Tue, 25 Aug 2009 13:30:40 GMT
Hi Melroyr,

On Aug 25, 2009, at 3:19am, melroyr wrote:

> Ken, Thanks for your response.
> If you look at the source at
> http://flyer.harristeeter.com/HT_eVIC/ThisWeek/ReviewAllSpecials.jsp?ToCat=0
> thru 13, the page changes its content. However downloading the same  
> pages
> thru HTTPClient, I get a message that says the browser does not  
> support
> framesets and there is no content.

The use of the frameset tag isn't the issue.

Your problem is that this site sets a cookie (StoreNumberCK) with a  
store id. If that's set, then you get a page with full content.

If it's not set, you get the page that you sent to the list, which  
contains a link that, when clicked, will let you pick your local store.

You'l need to figure out what content to set in that cookie, and  
programmatically create it before making the HTTP GET request.

-- Ken

>
>
> melroyr wrote:
>>
>> I have written a program to download html pages from harristeeter.
>> However, when I run my program, I get the following
>>
>> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"
>> "http://www.w3.org/TR/html4/frameset.dtd">
>> <html>
>> <head>
>> <title>Your Personal Shopping List</title>
>> <meta http-equiv="Content-Type" content="text/html;  
>> charset=iso-8859-1">
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>>
>> <script language='javascript'>
>>
>> if (top.location != self.location) {
>> 		top.location = self.location
>> }
>>
>> if ('null' == 'null')
>> {
>> 	var width = screen.width;
>> 	var height = screen.height;
>>
>> 	var myWidth = 640, myHeight = 480;
>> 	if( typeof( window.innerWidth ) == 'number' ) {
>> 		//Non-IE
>> 		myWidth = window.innerWidth;
>> 		myHeight = window.innerHeight;
>> 	}
>> 	else if( document.documentElement &&
>> 	  ( document.documentElement.clientWidth ||
>> document.documentElement.clientHeight ) )
>> 	{
>> 		//IE 6+ in 'standards compliant mode'
>> 		myWidth = document.documentElement.clientWidth;
>> 		myHeight = document.documentElement.clientHeight;
>> 	}
>> 	else if( document.body &&
>> 		 ( document.body.clientWidth || document.body.clientHeight ) )
>> 	{
>> 		//IE 4 compatible
>> 		myWidth = document.body.clientWidth;
>> 		myHeight = document.body.clientHeight;
>> 		height = screen.availHeight;
>> 		width = screen.availWidth;
>> 	}
>>
>> 	
>>
>> 	var x = 0;
>> 	var y = 0;
>>
>> 	
>>
>> 	var minWidth = (width < 960) ? width : 960;
>>
>> 	if (myWidth < minWidth && width >= minWidth && myWidth >
0 &&  
>> myHeight >
>> 0)
>> 	{
>> 		if (navigator.appName=="Netscape") y = self.screenY;
>> 		else y = self.top;
>>
>> 		var w = 800;
>> 		var h = myHeight;
>> 		var new_y = y;
>> 		if (screen.width > 1024) w = 1024;
>> 		else if (screen.width > 960) w = 960;
>> 		if (myHeight < (0.80) * height)
>> 		{
>> 			h = (0.80)*height;
>> 			new_y = (height - h)/2;
>> 		}
>>
>> 		if (new_y < y) y = new_y;
>>
>> 		x = (width - w)/2;
>>
>> 		if (x < 0)
>> 		{
>> 			w += x;
>> 			x = 0;
>> 		}
>>
>> 		if (y < 0)
>> 		{
>> 			h += y;
>> 			y = 0;
>> 		}
>>
>> 		if (w > width) w = width;
>>
>> 		
>>
>> 		if (parseInt(navigator.appVersion)>3)
>> 		{
>> 		   if (navigator.appName=="Netscape")
>> 		   {
>> 				self.outerWidth=w;
>> 				self.outerHeight=h;
>> 				self.moveTo(x,y);
>> 		   }
>> 		   else
>> 		   {
>> 				self.resizeTo(w,h);
>> 				self.moveTo(0,0);
>> 		   }
>> 		}
>> 	}
>>
>>
>> location='index.jsp?screenwidth='+screen.width 
>> +'&default_screenwidth=1&rand='+Math.random();
>> }
>>
>> if ('false' == 'true')
>> {
>> 	top.location='index.jsp?ID'+Math.round(Math.random()*10000);
>> }
>>
>> </script>
>>
>> </head>
>>
>>
>> <frameset rows="*,0" cols="*" frameborder="no" border="0"
>> framespacing="0">
>> <frameset rows="132,*" cols="*" frameborder="no" border="0"
>> framespacing="0">
>>  <frame src="top.jsp" name="topFrame" scrolling="no" noresize>
>>  <frameset rows="*" cols="400,*" framespacing="0" frameborder="no"
>> border="0">
>> 	<frame src="ReviewAllSpecials.jsp" name="mainFrame" scrolling="YES">
>> 	<frame src="list.jsp" name="rightFrame" scrolling="YES" noresize>
>>  </frameset>
>> </frameset>
>> <frame src="actions.jsp" name="bottomFrame" scrolling="YES" noresize>
>> </frameset>
>>
>> <noframes><body>
>> This application requires the use of frames, which your browser  
>> does not
>> support.
>> </body></noframes>
>>
>> </html>
>>
>> The URL I am using to download the pages is
>> http://flyer.harristeeter.com/HT_eVIC/ThisWeek/ReviewAllSpecials.jsp
>>
>> Please advise if there is some setting that I need do set in  
>> HttpClient? I
>> have read about HtmlCleaner and stuff but I do not think they will  
>> help.
>>
>> Thanks,
>> Melroy
>>
>
> -- 
> View this message in context: http://www.nabble.com/Downloading-HTML-frameset-pages-via-HTTPClient-tp25121961p25131807.html
> Sent from the HttpClient-User mailing list archive at Nabble.com.
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: httpclient-users-unsubscribe@hc.apache.org
> For additional commands, e-mail: httpclient-users-help@hc.apache.org
>

--------------------------
Ken Krugler
TransPac Software, Inc.
<http://www.transpac.com>
+1 530-210-6378


---------------------------------------------------------------------
To unsubscribe, e-mail: httpclient-users-unsubscribe@hc.apache.org
For additional commands, e-mail: httpclient-users-help@hc.apache.org


Mime
View raw message