From: Patrick on
Hi

The "issue" is that hash tables use exact key matches but you have to
deal with a date range. The option multidata: 'y' in the declare
statement allow to load multiple observations per key into the hash
table.

The approach is to first match over the ID and then loop through the
subset to find the observation with the correct date range.

See example below.

Data Main(drop=j);
format EventDate date9.;
do personID=1 to 3;
do j=1 to 10;
EventDate='01jan2010'd+floor(ranuni(1)*365);
output;
end;
end;
run;

Data Lookup;
format FromDate ToDate date9.;
retain RateCode Cycle 'Example Value';
do personID=1 to 10;
do FromDate='01jan2010'd to '01Dec2010'd by 30;
ToDate=FromDate+10;
output;
end;
end;
run;

Data Main(drop=hit rc);
/* declare hash table */
attrib FromDate ToDate length=8 format=date9.;
attrib RateCode Cycle length=$32;
if _n_=1 then
do;
declare hash h(dataset: "Lookup", hashexp: 10,multidata: 'y');
h.defineKey('personID');
h.defineData('FromDate','ToDate','RateCode','Cycle');
h.defineDone();
call missing(FromDate,Todate,RateCode,Cycle); end;

set Main;

rc=h.find();
do while(rc = 0);
if EventDate>=FromDate and EventDate<=ToDate then
do;
hit='1';
leave;
end;
rc = h.find_next();
end;
if hit ne '1' then call missing(FromDate,Todate,RateCode,Cycle);
run;



In case your data is already sorted by ID and date then may be another
approach could perform bettter.

HTH
Patrick

From: MichelleZ on

MarySAS,

If you submit some sample data with your question, you may get a
quicker response.

Start simple, with sample data, use examples from real data. I made
some up....take a look at the following.
Check your results carefully and check against other methods to see if
another is better.


/*
The first table, JAN, is huge, 140million records,
distinct by PersonID, EventDate and
Hour.PersonIDEventDateHourEventAmt

*/
data JAN;
PersonId=1;
EventDate=today();
Hour=timepart(today());
EventAmt=100;
output;
PersonId=2;
EventDate=today()+24;
Hour=timepart(today())+3600;
EventAmt=200;
output;
PersonId=3;
EventDate=today()+48;
Hour=timepart(today())+7200;
Eventamt=300;
output;
run;


proc print u;
title 'JAN';
format eventdate mmddyy9. hour time11.2;

run;

/*The second table, MASTER, has 2 fields RateCode and Cycle that I
need
to lookup and put onto the first file. A person can have different
cycle and rate codes for different days or range of days. The Second
table has 1 million records, distinct by PersonID, FromDate and
ToDate:
PersonID
FromDate
ToDate
RateCode
Cycle
*/

data MASTER;
RateCode=300;
Cycle=.5;
PersonId=1;
FromDate='01mar2010'd;
Todate='31may2010'd;
output;
RateCode=400;
Cycle=.75;
PersonId=2;
FromDate='01apr2010'd;
Todate='31may2010'd;
output;
RateCode=500;
Cycle=.95;
PersonId=3;
FromDate='01may2010'd;
Todate='31may2010'd;
output;
run;

proc print u;
title 'MASTER';
format FromDate ToDate mmddyy10.;
run;

*****************************************************************************************/;
/*
Data JanNew;
If _n_ = 0 then set Master;
If _n_ = 1 then do;
Declare hash MAS (Dataset : ‘master’, multidata: ‘Y’, Ordered: ‘A’)

Rc=mas.definekey (‘PersonId’);

Rc=mas.definedata (‘fromdt’, ‘todate’, ‘ratecode’, ‘cycle’);

RC=mas.definedone();

Do until (eof);
Set Jan end=eof;
RC=mas.find ();
If RC = 0 then do;
If FromDate <= EventDate <= ToDate then output;
run;
*/

data JanNew;
declare hash MAS () ;
rc = MAS.DefineKey ( 'PersonId') ;
rc = MAS.DefineData ( 'EventDate','EventAmt','Hour') ;
rc = MAS.DefineDone () ;
do until ( eof1 ) ;
set Jan end = eof1 ;
rc = MAS.add () ;
end ;
do until ( eof2 ) ;
set MASTER end = eof2 ;
rc = MAS.find () ;
if rc = 0 and FromDate < EventDate < ToDate then output;
end ;
run ;

proc print data=JanNew;
title 'JanNew';
format EventDate FromDate ToDate mmddyy10. hour time11.2;
run;