From: bigD on
Hello,

I am doing a project that involves downloading a few hundred web pages
with a dynamic url (see below) and then parsing out the necessary
information.

I am able to this pretty well, but the files downloaded files are
pretty big (8 megs each). When I view and save the file source in
firefox, the file is only a few hundred Kb. I assume that SAS is
downloading the jpgs and whatnot into the file. Is there a way to just
get the txt elements out of the html file downloaded.

Thanks.

PS: My attempts in doing this in Perl have been far more frustrating,
so please don't offer that solution.



%macro adds;
proc sql;
select address into:address separated by '-'
from new;
quit;

%do i = 1 %to &sqlobs;

filename foo url
"http://www.stoplanner1.com/travelplanner/travelplanner?origin=
%Qscan(&address.,&i,'-')&originRegion=&b"
lrecl=5000;



/*try on a short list if two names,*/
data _%sysfunc(translate(%Qscan(&address.,&i,'-'),"_","+"));/* need to
get rid of the pluses */
infile foo length=len;
input record $varying5000. len;
run;

data _%sysfunc(translate(%Qscan(&address.,&i,'-'),"_","+"))_2; set _
%sysfunc(translate(%Qscan(&address.,&i,'-'),"_","+"));
informat duration 6.0;
format duration 6.0;
informat walk 6.0;
format walk 6.0;
*where id in (128,129);
if _N_ = 1 then
do;
retain ExpressionID;
retain ExpressionIDw;

/*change back to url parse as macro*/
pattern = '/Duration:\s\d\d?\s\minutes/';
ExpressionID = prxparse(pattern);

patternw = '/Walking:\s\d\d?\s\minutes/';
ExpressionIDw = prxparse(patternw);
end;
call prxsubstr(ExpressionID, record, position, length);
if position ^= 0 then
do;
match = substr(record, position, length);
duration =(substrn( record, (position + 10), ((length -10 -
8) )))*1; * length - start -(length of Minutes);
end;

etc.....