Many-to-Many: Examples

Date: March 26, 2002

Example 1
Using open code to do a many to many merge - by Shelley Derksen.

* Example of many to many merge without using proc sql;

* file1 has 1 obs for id=1 and 2 obs for id=2;
data file1;
   infile cards;
   input id $ dateadm $ datesep $;
cards;
1  950101  950102
2  950101  950201
2  950210  950220
;

* file2 has 2 obs for id=1 and 3 obs for id=2;
data file2;
   infile cards;
   input id $ dateserv $;
cards;
1 950101
1 950105
2 950101
2 950110
2 950211
;

proc print data=file1;
   title 'data=file1';
run;

proc print data=file2;
   title 'data=file2';
run;

*make a new data set from file2 with one observation per id and a start
 pointer and an end pointer which indicate the observation number of the
 start and end for each id;

proc sort data=file2;
   by id;
run;

data pointer;
   set file2;
   by id;
   retain start end 0;
   if first.id then start=_n_; *observation number of start for id;
   if last.id then do;
      end=_n_; * observation number of end for id;
      output;  * output 1 observation per id;
   end;
   keep id start end; * only keep the merge variable(s) and pointers;
run;

proc print data=pointer;
   title 'Pointers to start and end observations for each id in file2';
run;

* merge pointers to file1.
  Since pointer only has 1 obs per id there is no problem merging;

proc sort data=file1;
   by id;
run;

data file1;
   merge file1 (in=m1)
         pointer (in=m2);
   by id;
   if m1 and m2;
run;

proc print data=file1;
   title 'data=file1 with pointers to file2 attached';
run;

* now use the pointers to merge the observation in file2 to file1;
data mer1;
   set file1;
   do i=start to end;
      set file2 point=i;
      output;
   end;
run;

proc print data=mer1;
   title 'Merge of file1 and file2 using pointer method';
 run;
 

Example 2

Using SAS SQL to do a many to many merge - by Shelley Derksen.

* Example of using proc sql;

* file1 has 1 obs for id=1 and 2 obs for id=2;
data file1;
   infile cards;
   input id $ dateadm $ datesep $;
cards;
1  950101  950102
2  950101  950201
2  950210  950220
;

* file2 has 2 obs for id=1 and 3 obs for id=2;
data file2;
   infile cards;
   input id $ dateserv $;
cards;
1 950101
1 950105
2 950101
2 950110
2 950211
;

proc print data=file1;
   title 'data=file1';
run;

proc print data=file2;
   title 'data=file2';
run;

* merge file1 and file2 so that 1*2=2 obs for id=1 and 2*3=6 obs for id=2;
proc sql;
   create table mer1 as
   select *
   from file1 as h,
        file2 as m
   where h.id=m.id;

proc print data=mer1;
   title 'Merge of file1 and file2 where file1.id=file2.id';
 run;

Example 3

Example from Charles Burchill, November 15, 2000

*** PCH information from statistical data, can have multiple
records/PHIN.  Selected for records in a single fiscal year
using date by date of admission and paid to date ;
data pch ;
    set pch.pch9899 ;
    ndatepad=input(datepad,yymmdd8.) ;
    ndatpaid=input(datepaid,yymmdd8.) ;
    run;

*** Get hospital data keeping only those records and
variables needed - keep data small so SQL does not choke
to much ;
data hosp ;
   set cpe.hsp9899l(keep=los dateadm datesep phin91 transact
                    where=(transact='1')) ;
   *** note adjust for yearcutoff=1920 ;
   ndtadm=input(dateadm,yymmdd6.) ;
   ndtsep=input(datesep,yymmdd6.) ;
   run;

*** Create new SAS data set selecting necessary variables
out of hospital and PCH data.  Match records on PHIN (phindate)
with hospital admission dates within PCH stay ;
proc sql ;
   create table pch2 as
   select h.phin91,   h.ndtadm, h.ndtsep
          p.phindata, p.ndatepad, p.ndatpaid
   from hosp as h,
        pch as p
   where h.phin91=p.phindata and
         p.ndatpaid > h.ndtadm > p.ndatepad ;

Example 4

Code modified from Pat Nicol.

* file1 has 1 obs for id=1 and 2 obs for id=2;
data file1;
   infile cards;
   input id $ dateadm $ datesep $;
cards;
1  950101  950102
2  950101  950201
2  950210  950220
;

* file2 has 2 obs for id=1 and 3 obs for id=2;
data file2;
   infile cards;
   input id $ dateserv $;
cards;
1 950101
1 950105
2 950101
2 950110
2 950211
;

*** Sort data sets by appropriate variables so they will interleave appropriately;
proc sort data=file1 ;
   by id dateadm  descending datesep;
   run;
proc sort data=file2 ;
   by id dateserv ;
   run;

data final(drop=datesep) ;
   set file1(in=a rename=(dateadm=dateserv))
      file2(in=b) ;
   by id dateserv ;
   ** retain variables so admission and separation are carried onto
      single date (medical) records ;
   retain adm sep ;
   ** initialize adm and sep ;
   if first.id then do ;
       adm = ' ' ;
       sep = ' ' ;
       end ;
   ** identify dates to carry over for each following record ;
   if a then do ;
        adm = dateserv;
        sep=datesep ;
        end ;
   ** Limit data to records of interest - single date (medical)
      records that fall between admission and separation dates ;
   if b & (adm <= dateserv <= sep) then output ;
   run;

proc print ;
run;
 


©2003 Manitoba Centre for Health Policy (MCHP)