Example 1
Using open code to do a many to many
merge - by Shelley Derksen.
* Example of many to many merge without using proc sql;
* file1 has 1 obs for id=1 and 2 obs
for id=2;
data file1;
infile cards;
input id $ dateadm $
datesep $;
cards;
1 950101 950102
2 950101 950201
2 950210 950220
;
* file2 has 2 obs for id=1 and 3 obs
for id=2;
data file2;
infile cards;
input id $ dateserv $;
cards;
1 950101
1 950105
2 950101
2 950110
2 950211
;
proc print data=file1;
title 'data=file1';
run;
proc print data=file2;
title 'data=file2';
run;
*make a new data set from file2 with
one observation per id and a start
pointer and an end pointer which
indicate the observation number of the
start and end for each id;
proc sort data=file2;
by id;
run;
data pointer;
set file2;
by id;
retain start end 0;
if first.id then start=_n_;
*observation number of start for id;
if last.id then do;
end=_n_;
* observation number of end for id;
output;
* output 1 observation per id;
end;
keep id start end; *
only keep the merge variable(s) and pointers;
run;
proc print data=pointer;
title 'Pointers to start
and end observations for each id in file2';
run;
* merge pointers to file1.
Since pointer only has 1 obs
per id there is no problem merging;
proc sort data=file1;
by id;
run;
data file1;
merge file1 (in=m1)
pointer (in=m2);
by id;
if m1 and m2;
run;
proc print data=file1;
title 'data=file1 with
pointers to file2 attached';
run;
* now use the pointers to merge the
observation in file2 to file1;
data mer1;
set file1;
do i=start to end;
set
file2 point=i;
output;
end;
run;
proc print data=mer1;
title 'Merge of file1
and file2 using pointer method';
run;
Using SAS SQL to do a many to many merge - by Shelley Derksen.
* Example of using proc sql;
* file1 has 1 obs for id=1 and 2 obs
for id=2;
data file1;
infile cards;
input id $ dateadm $
datesep $;
cards;
1 950101 950102
2 950101 950201
2 950210 950220
;
* file2 has 2 obs for id=1 and 3 obs
for id=2;
data file2;
infile cards;
input id $ dateserv $;
cards;
1 950101
1 950105
2 950101
2 950110
2 950211
;
proc print data=file1;
title 'data=file1';
run;
proc print data=file2;
title 'data=file2';
run;
* merge file1 and file2 so that 1*2=2
obs for id=1 and 2*3=6 obs for id=2;
proc sql;
create table mer1 as
select *
from file1 as h,
file2 as m
where h.id=m.id;
proc print data=mer1;
title 'Merge of file1
and file2 where file1.id=file2.id';
run;
Example from Charles Burchill, November 15, 2000
*** PCH information from statistical
data, can have multiple
records/PHIN. Selected for records
in a single fiscal year
using date by date of admission and
paid to date ;
data pch ;
set pch.pch9899
;
ndatepad=input(datepad,yymmdd8.)
;
ndatpaid=input(datepaid,yymmdd8.)
;
run;
*** Get hospital data keeping only
those records and
variables needed - keep data small
so SQL does not choke
to much ;
data hosp ;
set cpe.hsp9899l(keep=los
dateadm datesep phin91 transact
where=(transact='1')) ;
*** note adjust for yearcutoff=1920
;
ndtadm=input(dateadm,yymmdd6.)
;
ndtsep=input(datesep,yymmdd6.)
;
run;
*** Create new SAS data set selecting
necessary variables
out of hospital and PCH data.
Match records on PHIN (phindate)
with hospital admission dates within PCH stay
;
proc sql ;
create table pch2 as
select h.phin91,
h.ndtadm, h.ndtsep
p.phindata, p.ndatepad, p.ndatpaid
from hosp as h,
pch as p
where h.phin91=p.phindata
and
p.ndatpaid > h.ndtadm > p.ndatepad ;
Code modified from Pat Nicol.
* file1 has 1 obs for id=1 and 2 obs
for id=2;
data file1;
infile cards;
input id $ dateadm $
datesep $;
cards;
1 950101 950102
2 950101 950201
2 950210 950220
;
* file2 has 2 obs for id=1 and 3 obs
for id=2;
data file2;
infile cards;
input id $ dateserv $;
cards;
1 950101
1 950105
2 950101
2 950110
2 950211
;
*** Sort data sets by appropriate variables
so they will interleave appropriately;
proc sort data=file1 ;
by id dateadm descending
datesep;
run;
proc sort data=file2 ;
by id dateserv ;
run;
data final(drop=datesep) ;
set file1(in=a rename=(dateadm=dateserv))
file2(in=b)
;
by id dateserv ;
** retain variables so
admission and separation are carried onto
single
date (medical) records ;
retain adm sep ;
** initialize adm and
sep ;
if first.id then do ;
adm = ' ' ;
sep = ' ' ;
end ;
** identify dates to
carry over for each following record ;
if a then do ;
adm = dateserv;
sep=datesep ;
end ;
** Limit data to records
of interest - single date (medical)
records
that fall between admission and separation dates ;
if b & (adm <=
dateserv <= sep) then output ;
run;
proc print ;
run;