%macro iterated_relationships(cohort, keeplist); %put ; %put Identify Relationships from MH Registry - Randy Walld ; %put February 21, 2017. Original code T:\RandyW\maclib\registry_relationship.sas ; %put Revised November 23, 2020 ; %put Documentation: http://portal.cpe.umanitoba.ca/workunits/programmers/Meeting%20Minutes/2017/17feb22_Relationship_macro_rw.pptx ; %put ; data reg; set registry.mhmrs_regphin_19702020; length realphin 3; length covbrth $1; covyymm=substr(put(rieffdt,yymmddn8.),1,6); brthyymm=substr(put(ribirthdt,yymmddn8.),1,6); realphin=(substr(put(scrphin,15.),10,6)='000000'); covbrth=(covyymm=brthyymm); format rieffdt rienddt ribirthdt yymmdd8.; run; * File BD is birthdates for determination of twin pairs; * defined as full siblings born on the same or successive days; * Sort by SCRPHIN REGYEAR to take BIRTHDT from most recent registry; proc sort data=reg out=bd; by scrphin regyear; run; data bd; set bd; by scrphin regyear; if last.scrphin; run; * Single-day records with cancode of 6 or W dropped because they are zero-length periods; * which do not indicate any coverage duration. Note that you do not want to delete all; * single-day records because deaths can occur on day of birth; data reg; set reg; if rieffdt=rienddt and canccode in ('6','W') then delete; run; * Remove duplicates on REGNO/sex/birth year/month where one record has real PHIN and one does not; proc sort data=reg; by regno sex brthyymm descending realphin; run; data dups regtmp; set reg; by regno sex brthyymm descending realphin; if ^(first.brthyymm and last.brthyymm) then output dups; else output regtmp; run; data dups; set dups; retain keep; by regno sex brthyymm descending realphin; if first.brthyymm then do; if realphin=1 then keep=1; else keep=0; end; if keep=1 and realphin=0 then delete; run; data reg; set regtmp dups; drop realphin keep; run; proc sort data=reg; by scrphin rieffdt; run; * Some kids are born as grandchildren (RELCD=8) if mother is underage. Most of these records; * (about 85%) will ultimately be assigned to a new family head when mother reaches adulthood,; * at which point the RELCD changes to 5; * There are at least two situations in which a child is never assigned a dependent RELCD; * 1) Child remains registered as a grandchild up to adulthood (~15% of those registered as grandchild); * 2) Child is born to mother who is still underage at the time of the last registry.; * These of course may be found on future registries under their mothers REGNO; * Try flagging those who are RELCD=8 at birth and see how many of these are never assigned; * a dependent REGNO; data reg2; set reg; by scrphin rieffdt; length dr grchild 3 depreg01-depreg10 5 deprdt01-deprdt10 4 deprel01-deprel10 $1; array depreg(10) depreg01-depreg10; array deprdt(10) deprdt01-deprdt10; array deprel(10) deprel01-deprel10; retain dr grchild depreg01-depreg10 deprdt01-deprdt10 deprel01-deprel10; if first.scrphin then do; dr=0; grchild=0; do i=1 to 10; depreg{i}=.; deprdt{i}=.; deprel{i}=" "; end; end; * Has child ever been registered as a grandchild?; if rirelcd='8' then grchild=1; * RELCODE indicating Child, Stepchild, Incapacitated Child; if rirelcd in ('5','6','7') then do; dr=dr+1; depreg{dr}=regno; deprdt{dr}=rieffdt; deprel{dr}=rirelcd; end; if last.scrphin; run; * For first dependent REGNO, who were male and female family heads?; * Note: Allow for some difference between birth/registration of child and mother/father; * at that REGNO. It sometimes happens that mother is registered at the REGNO the; * child was registered under, several months later when she turns 18; proc sql; create table tst01 as select a.*, b.rieffdt, b.rienddt, b.sex as fhsex01, b.regyear as year, b.scrphin as fhphin01, (b.rieffdt<=a.deprdt01<=b.rienddt) as overlap from reg2 as a join reg as b on a.depreg01=b.regno and (b.rieffdt-1096)<=a.deprdt01<=b.rienddt and b.rirelcd in ('1','2','3'); run; * About 1% of the time we end up with >2 family heads, which should be impossible; * When there are two same-sex family heads, choose the one which overlaps; proc sort data=tst01; by scrphin fhsex01 descending overlap; run; data tst01; set tst01; by scrphin fhsex01 descending overlap; if first.fhsex01; drop overlap; run; proc sql; create table tst02 as select a.*, b.rieffdt, b.rienddt, b.sex as fhsex02, b.regyear as year, b.scrphin as fhphin02, (b.rieffdt<=a.deprdt02<=b.rienddt) as overlap from reg2 as a join reg as b on a.depreg02=b.regno and (b.rieffdt-1096)<=a.deprdt02<=b.rienddt and b.rirelcd in ('1','2','3'); run; proc sort data=tst02; by scrphin fhsex02 descending overlap; run; data tst02; set tst02; by scrphin fhsex02 descending overlap; if first.fhsex02; drop overlap; run; * Sort by YEAR to put later registries last. This will overwrite older registry fake PHINS with; * later registry real PHINs; proc sort data=tst01; by scrphin year; run; data tst01; set tst01; by scrphin year; retain malefh01 femalefh01; if first.scrphin then do; malefh01=.; femalefh01=.; end; if fhsex01='1' then malefh01=fhphin01; if fhsex01='2' then femalefh01=fhphin01; if last.scrphin; keep scrphin sex ribirthdt malefh01 femalefh01; run; proc sort data=tst02; by scrphin year; run; data tst02; set tst02; by scrphin year; retain malefh02 femalefh02; if first.scrphin then do; malefh02=.; femalefh02=.; end; if fhsex02='1' then malefh02=fhphin02; if fhsex02='2' then femalefh02=fhphin02; if last.scrphin; keep scrphin sex ribirthdt malefh02 femalefh02; run; data tst01; merge tst01(in=a) tst02(in=b); by scrphin; if a or b; run; * If first REGNO returns blank, take results from second REGNO (if any); * WHO ARE THESE SECOND REGNOS?? WHY DO WE NEED TO GO TO SECOND REGNO FOR 10% OR PEOPLE?; data tst01; set tst01; if malefh01=. and femalefh01=. then do; malefh01=malefh02; femalefh01=femalefh02; end; drop malefh02 femalefh02; run; proc sort data=tst01; by femalefh01 malefh01; run; data tst01; set tst01; by femalefh01 malefh01; retain fn; if _N_=1 then fn=0; if femalefh01=. then do; if first.malefh01 then fn=fn+1; end; else do; if first.femalefh01 then fn=fn+1; end; run; data tst01; set tst01; mfh=(malefh01>.); ffh=(femalefh01>.); run; * Who has ever been registered as spouse?; data fh; set reg; by scrphin rieffdt; length spreg 5 sprdt spedt spyear 4; if first.scrphin then do; sr=0; spreg=.; sprdt=.; spedt=.; spyear=.; end; * RELCODE indicating Family Head; if rirelcd='1' then do; spreg=regno; spyear=regyear; sprdt=rieffdt; spedt=rienddt; output; end; keep scrphin sex ribirthdt spyear rirelcd spreg sprdt spedt; run; * For each registration as family head, were there any spouses?; proc sql; create table spouse as select a.*, b.rieffdt, b.rienddt, b.scrphin as spphin, b.sex as spsex, b.ribirthdt as spbdt from fh as a join reg as b on a.spreg=b.regno and a.sprdt<=b.rieffdt<=a.spedt and b.rirelcd in ('2','3'); run; * Duration of joint registration is defined as max(startdate1,startdate2) to min(enddate1,enddate2); data spouse; set spouse; sprdt=max(sprdt,rieffdt); spedt=min(spedt,rienddt); drop spreg spyear rirelcd rieffdt rienddt; run; * Replicate each record exchanging spouses. This is done to put each member of the pair as; * SCRPHIN, because we want to count people, not marriages!; * Keep track of spouse sex and birthdate as well... previously this just kept family head info; * 1 SCRPHIN SPPHIN; * 2 SPPHIN SCRPHIN; data spouse; set spouse; length tmpphin tmpbdt 8 tmpsex $1; output; tmpphin=spphin; tmpbdt=spbdt; tmpsex=spsex; spphin=scrphin; spsex=sex; spbdt=ribirthdt; scrphin=tmpphin; sex=tmpsex; ribirthdt=tmpbdt; output; drop tmp:; run; data spouse; set spouse; spousephin=spphin; format spousephin z15.; run; proc sort data=spouse; by scrphin; run; * Population of siblings; proc sql; create table siblings as select a.*, b.scrphin as relphin, b.sex as relsex from tst01 a inner join tst01 b on a.fn=b.fn and a.scrphin ne b.scrphin; data siblings; set siblings; length relation $25 reltype $1; if relsex='2' then do; relation='Sibling - Sister'; reltype='S'; end; else if relsex='1' then do; relation='Sibling - Brother'; reltype='S'; end; keep scrphin relphin relation reltype; run; * Spouses; data spouse; set spouse; * Drop intervals of zero length; if spedt=sprdt then delete; format sprdt spedt yymmdd8.; run; * Parents; data parents; set tst01; keep scrphin malefh01 femalefh01; format malefh01 femalefh01 z15.; run; data parents; set parents; length relation $25 reltype $1; if malefh01>. then do; relation='Parent - Male'; relphin=malefh01; reltype='U'; output; end; if femalefh01>. then do; relation='Parent - Female'; relphin=femalefh01; reltype='U'; output; end; format relphin z15.; keep scrphin relphin relation reltype; run; * Children; * Sort TST01 by male and female family heads respectively and enumerate children; data children; set tst01; rename scrphin=relphin; format malefh01 femalefh01 z15.; keep scrphin sex malefh01 femalefh01; run; * Output one record per parent; data children; set children; length relation $25 reltype $1; if mod(malefh01,1E6)=0 then do; scrphin=malefh01; if sex='1' then relation='Child - Male'; else if sex='2' then relation='Child - Female'; reltype='D'; output; end; if mod(femalefh01,1E6)=0 then do; scrphin=femalefh01; if sex='1' then relation='Child - Male'; else if sex='2' then relation='Child - Female'; reltype='D'; output; end; format scrphin z15.; keep scrphin relphin relation reltype; run; * Spouse file will be limited to current and former spouses; * In the event of duplicates take later SPRDT; * Lags in the system make it more likely that the previouss marriage has ended but has not been; * recorded yet, and the nextmarriage has already started; proc sort data=spouse; by scrphin sprdt; run; data spouse; set spouse; by scrphin sprdt; length relation $25 reltype $1; if spsex='2' then do; relation='Spouse - Female'; reltype='X'; relphin=spousephin; end; else if spsex='1' then do; relation='Spouse - Male'; reltype='X'; relphin=spousephin; end; format relphin z15.; keep scrphin relphin relation reltype sprdt spedt; run; data relation; set siblings spouse parents children; run; proc sort data=relation; by scrphin relphin; run; * Duplicates at this point are two people involved in two separate relationships; * This should not be possible but it does happen.; * At this point Im not sure which is more correct, so Im going to choose one; * at random. The presence of these may indicate some refinements to the code above; * is needed; data dups; set relation; by scrphin relphin; if ^(first.relphin and last.relphin); run; data relation; set relation; by scrphin relphin; if first.relphin; run; proc format; value $ reltype 'S'='Sibling' 'D'='Child' 'U'='Parent' 'X'='Spouse' 'F'='Ex-Spouse' 'Z'='Soliton' 'UU'='Grandparent' 'DD'='Grandchild' 'US','USX'='Aunt/Uncle' 'SX','XS'='Brother/Sister-in-law' 'XU'='Father/Mother-in-law' 'DX'='Son/Daughter-in-law' 'SD','XSD'='Niece/Nephew' 'XD'='Step-Child' 'UD'='Half-Sibling' 'UX'='Step-Parent' 'UF'='Ex-Spouse of Parent' 'XF'='Ex-Spouse of Spouse' 'FD'='Child of Ex-Spouse' 'FS','SF','FSX'='Ex-Bro/Sis-in-law' 'FF'='Ex-Spouse of Ex-Spouse' 'FU'='Ex-Mother/Father-in-law' 'FX'='Spouse of Ex-Spouse' 'DF'='Ex-Spouse of Child' 'DU'='Parent of Child' 'DDD'='Great-Grandchild' 'DDX'='Grandchild-in-law' 'FXU'='Mother/Father-in-law of Ex-Spouse' 'SDD','XSDD'='Grand-Niece/Nephew' 'SDX'='Niece/Nephew-in-law' 'SXD','SDU'='Step-Niece/Nephew' 'SDF','FSD'='Ex-Niece/Nephew-in-law' 'SFS'='Sibling of Ex-Bro/Sis-in-law' 'SFU'='Parent of Ex-Bro/Sis-in-law' 'SXS'='Sibling of Bro/Sis-in-law' 'DBL'='Double First Cousin' 'DUS'='Aunt/Uncle of Child' 'DDU'='Parent of Grandchild' 'DUU'='Grandparent of Child' 'DUX'='Spouse of Parent of Child' 'DXS'='Sibling of Son/Dtr-in-law' 'SXU'='Parent of Bro/Sis-in law' 'DXU'='Parent of Son/Dtr-in-law' 'UDD'='Grandchild of Parent' 'UUD'='Child of Grandparent' 'UUS'='Grand-Uncle/Aunt' 'UUU'='Great-Grandparent' 'XUS'='Aunt/Uncle of Spouse' 'XUU'='Grandparent of Spouse' 'XDD'='Step-Grandchild' 'UUX'='Spouse of Grandparent' 'UUF'='Ex-Spouse of Grandparent' 'UFU'='Parent of Ex-Spouse of Parent' 'USF'='Ex-Spouse of Aunt/Uncle' 'UFS'='Sibling of Ex-Spouse of Parent' 'UXD'='Step-Sibling' 'UDX'='Spouse of Half-Sibling' 'UXS'='Step-Aunt/Uncle' 'UXU'='Step-Grandparent' 'UDU','SU'='Parent of Half-Sibling' 'USU'='Parent of Half-Aunt/Uncle' 'XSX'='Spouse of Bro/Sis-in-law' 'DXSD'='Niece/Nephew of Son/Dtr-in-law' 'DXUS'='Aunt/Uncle of Son/Dtr-in-law' 'DXUU'='Grandparent of Son/Dtr-in-law' 'DFSD'='Niece/Nephew of Ex-Son/Dtr-in-law' 'DXSX'='Bro/Sis-in-law of Son/Dtr-in-law' 'DDDD'='Great-Great-Grandchild' 'XSXS'='Sibling of Spouse of Bro/Sis-in-law' 'SXSX'='Spouse of Sibling of Bro/Sis-in-law' 'XSXU'='Mother/Father-in-law of Bro/Sis-in-law' 'USXU'='Mother/Father-in-law of Aunt/Uncle' 'USXS'='Sibling of Aunt/Uncle' 'UUSX'='Spouse of Grand-Uncle/Aunt' 'UUSF'='Ex-Spouse of Grand-Uncle/Aunt' 'UUFS'='Sibling of Ex-Spouse of Grandparent' 'USFS'='Sibling of Ex-Spouse of Aunt/Uncle' 'USFD'='Child of Ex-Spouse of Aunt/Uncle' 'USFU'='Ex-Mother/Father-in-law of Aunt/Uncle' 'USDX','XUSD'='Cousin-in-law' 'UFSD'='Niece/Nephew of Ex-Spouse of Parent' 'UFUS'='Aunt/Uncle of Ex-Spouse of Parent' 'UUXD'='Child of Spouse of Grandparent' 'UUFU'='Parent of Ex-Spouse of Grandparent' 'SXSD'='Niece/Nephew of Bro/Sis-in-law' 'SFSD'='Niece/Nephew of Ex-Bro/Sis-in-law' 'SDDD'='Great-Grand-Niece/Nephew' 'UUUU'='Great-Great-Grandparent' 'UUUD'='Child of Great-Grandparent' 'UUUS'='Great-Granduncle/Aunt' 'UUDD'='Grandchild of Grandparent' 'UDDD'='Great-Grandchild of Parent' 'DDDDD'='Great-Great-Great-Grandchild' 'UUDDD'='Great-Grandchild of Grandparent' 'UUUDD'='Grandchild of Great-Grandparent' 'SDDDD'='Great-Great-Grand-Niece/Nephew' 'UUUUS'='Great-Great-Granduncle/Aunt' 'UUUUU'='Great-Great-Great-Grandparent' /* The general rule for cousins is USD, Child of Sibling of Parent. Technically USXD, Child of Spouse of Sibling of Parent, is also a cousin, but this is likely to be a step-relationship. If the child was a blood relative of the parents sibling it would have been discovered as a Level 3 relationship (USD). Degree of cousinhood is determined by the number of generations since two people have a common ancestor. First Cousins USD share a grandparent, Second Cousins UUSDD share a great-grandparent, etc. The lesser of the number of U and D terms describes the degree of cousinhood. Removal describes the number of generations separating the cousins. Once Removed Cousins are separated by one generation, Twice Removed by two generations, etc. In my notation this is coded by the difference in the number of U and D terms. So the formula is: Nth Cousin Mth Removed, where N=min(# of U,# of D) M=diff(# of U, # of D) If M=0 the removal is not specified. Half-Cousins are children of a parents half-sibling: US(h)D This requires a distinction between full and half siblings */ 'USD'='First Cousin' 'UUSD', 'USDD'='First Cousin Once Removed' 'UUUSD', 'USDDD'='First Cousin Twice Removed' 'UUUUSD','USDDDD'='First Cousin Thrice Removed' 'UUUUUSD','USDDDDD'='First Cousin Fourth Removed' 'UUSDD'='Second Cousin' 'UUUSDD', 'UUSDDD'='Second Cousin Once Removed' 'UUUUSDD', 'UUSDDDD'='Second Cousin Twice Removed' 'UUUUUSDD','UUSDDDDD'='Second Cousin Thrice Removed' 'UUUSDDD'='Third Cousin' 'UUUUSDDD', 'UUUSDDDD'='Third Cousin Once Removed' 'UUUUUSDDD', 'UUUSDDDDD'='Third Cousin Twice Removed' 'UUUUUUSDDD','UUUSDDDDDD'='Third Cousin Thrice Removed'; value $ sibtypef 'FULL'='Full sibling' 'HMOM'='Half-Sibling, Same Mother' 'HDAD'='Half-Sibling, Same Father' 'UMOM'='Unknown Sibtype, Same Mother' 'UDAD'='Unknown Sibtype, Same Father'; run; * Take cohort and determine: ; * Parents; * Spouses; * Siblings; * Children; data _targetpop; set &cohort; keep scrphin &keeplist; run; * Iterate through the relation file pulling successive relationships; ********************************; ********** Level 1 *************; ********************************; proc sql; create table level1 as select a.*, b.relphin as scrphin_lev1, b.relation as relation_lev1, b.reltype as rel_lev1, b.sprdt as marcov_lev1, b.spedt as marend_lev1 from _targetpop a left join relation b on a.scrphin=b.scrphin order by scrphin, scrphin_lev1; * Anyone with no Level 1 relations is in a REGNO by themselves; data level1; set level1; if scrphin_lev1=. then do; rel_lev1='Z'; relation_lev1=put(rel_lev1,$reltype.); end; run; * Determine sib type: Full (confirmed), Half (confirmed) and Indeterminate (at least half); * Merge in birthdate to RELATION (name it _bd to avoid colliding with anything in KEEPLIST macro variable); data relation; merge relation(in=a) bd(in=b keep=scrphin ribirthdt rename=(ribirthdt=_bd)); by scrphin; if a; run; data l1_parents; set relation; by scrphin; if first.scrphin then do; l1mom=.; l1dad=.; end; retain l1mom l1dad; if relation='Parent - Female' then l1mom=relphin; else if relation='Parent - Male' then l1dad=relphin; if last.scrphin; keep scrphin _bd l1mom l1dad; format l1mom l1dad z15.; run; data sibs; set level1; if rel_lev1='S'; keep scrphin scrphin_lev1; run; data sibs; merge sibs(in=a) l1_parents(in=b); by scrphin; if a; run; proc sort data=sibs; by scrphin_lev1; run; data sibs; merge sibs(in=a) l1_parents(in=b rename=(scrphin=scrphin_lev1 _bd=sib_bd l1mom=l1_sibmom l1dad=l1_sibdad)); by scrphin_lev1; if a; length sibtype_lev1 $4 twin_lev1 $1; if l1mom>. and l1mom=l1_sibmom and l1dad>. and l1dad=l1_sibdad then sibtype_lev1='FULL'; else if (l1mom>. and l1mom=l1_sibmom and l1dad>. and l1_sibdad>. and l1dad ne l1_sibdad) then sibtype_lev1='HMOM'; else if (l1dad>. and l1dad=l1_sibdad and l1mom>. and l1_sibmom>. and l1mom ne l1_sibmom) then sibtype_lev1='HDAD'; else if (l1mom>. and l1mom=l1_sibmom) then sibtype_lev1='UMOM'; else sibtype_lev1='UDAD'; * Define twins as full siblings born +/- 1 day apart; twin_lev1='0'; if sibtype_lev1='FULL' and _bd>. and sib_bd>. and abs(_bd-sib_bd)<=1 then twin_lev1='1'; run; * Attach sibtype flag for Level 1; proc sort data=sibs; by scrphin scrphin_lev1; run; data level1; merge level1(in=a) sibs(in=b keep=scrphin scrphin_lev1 sibtype_lev1 twin_lev1); by scrphin scrphin_lev1; run; proc freq data=level1; tables rel_lev1; tables sibtype_lev1 twin_lev1; format rel_lev1 $reltype.; run; ********************************; ********** Level 2 *************; ********************************; * Level 2; * This query is almost the same as Level 1, i.e. pulls RELPHIN from relation by SCRPHIN; * but the b.relphin ne a.scrphin drops those matches that refer back to the original; * target person. This drops links such as Target(Parent)Mom(Child)Target; * It is also an INNER join rather than a LEFT join. INNER removes people who have no; * Level 2 relationships. We had to keep everyone at Level 1 with a LEFT join because; * we needed to know what happened to all SCRPHINs in the original target file. Once; * we know how many solitons (reltype Z) we have, we do not need to keep them around.; * Similarly for anyone who has no Level 2 links, we can drop them because they are; * already present in Level 1 (in general, drop those with no links at Level N; * because they are present at Level N-1); * The LENGTH statement increases the length of REL_LEV2 to $2 so that it can be expanded later; * to the 2-character Level 2 relationship code; proc sql; create table level2 as select a.*, b.relphin as scrphin_lev2, b.relation as relation_lev2, b.sprdt as marcov_lev2, b.spedt as marend_lev2, b.reltype as rel_lev2 length 2 from level1 a inner join relation b on a.scrphin_lev1=b.scrphin and b.relphin ne a.scrphin order by a.scrphin, scrphin_lev2; * Remove relatives already found at a lower level. Mother at this stage is UX, Parent - Male->Spouse, but she has; * also been found as U, Parent - Female.; * Since the interest in this iterative application is finding relationships to the target person,; * it is simpler to refer to each person once at the lowest level (i.e. the closest relationship); * that they appear in. So mother will always be a level 1 relation, not a level 2 (spouse of parent); data previous; set level1; rename scrphin_lev1=scrphin_lev2; keep scrphin scrphin_lev1; run; proc sort data=previous nodupkey; by scrphin scrphin_lev2; run; data level2; length rel_level2 $2 flag 3; merge level2(in=a) previous(in=b); by scrphin scrphin_lev2; if a; flag=0; if b then flag=1; rel_level2=rel_lev1||rel_lev2; run; * What was their relation at Level 1?; data l2dups; set level2; if flag=1; run; proc sort data=l2dups; by scrphin scrphin_lev2; run; proc sort data=l2dups out=s; by scrphin scrphin_lev1; run; data l2dups; merge l2dups(in=a) s(in=b keep=scrphin rel_lev1 scrphin_lev1 rename=(scrphin_lev1=scrphin_lev2 rel_lev1=rel_level1)); by scrphin scrphin_lev2; if a; run; proc freq data=l2dups; tables rel_level2*rel_level1; title1 'People found at Level 2 and Level 1: Level 1 relation vs pathway to Level 2'; run; * Drop these redundant Level 2 relationships; data level2; set level2; if flag=0; drop flag rel_level2; run; data level2; set level2; length reltarget_lev2 $2; * RELTARGET_LEVX is multi-level relationship to original target SCRPHIN; * These are different from REL_LEVX which are the single-level intermediate relationships; reltarget_lev2=trim(rel_lev1)||trim(rel_lev2); format scrphin: z15.; run; /*********/ * How many people are described by multiple L2 relationships; proc sort data=level2; by scrphin scrphin_lev2 reltarget_lev2; run; data dup2; set level2; by scrphin scrphin_lev2; if ^(first.scrphin_lev2 and last.scrphin_lev2); run; * Remove people found at Level 2 as both XD and DS, or UX and SU; * The XD/DS pairs are step-children found as either siblings of children (half-siblings) or children of spouse; * The UX/SU pairs are step-parents found as either spouses of parents or parents of siblings (half-siblings); data level2; set level2; length pref_xd pref_ux 3; * Flag preferred relationships to be kept; pref_xd=0; pref_ux=0; if reltarget_lev2='XD' then pref_xd=1; if reltarget_lev2='UX' then pref_ux=1; run; proc sort data=level2; by scrphin scrphin_lev2 descending pref_xd; run; data level2; set level2; by scrphin scrphin_lev2 descending pref_xd; retain has_xd; if pref_xd=1 then has_xd=1; if has_xd=1 and reltarget_lev2='DS' then delete; drop pref_xd has_xd; run; proc sort data=level2; by scrphin scrphin_lev2 descending pref_ux; run; data level2; set level2; by scrphin scrphin_lev2 descending pref_ux; retain has_ux; if pref_ux=1 then has_ux=1; if has_ux=1 and reltarget_lev2='SU' then delete; drop pref_ux has_ux; run; /********************/ * Flag Spouses who end relationship before target person birthdate; * These will be flagged as former spouses and no further relationships beyond this will be tracked; data level2; set level2; if reltarget_lev2 in ('UX','SX') and .. and l2mom=l2_sibmom and l2dad>. and l2dad=l2_sibdad then sibtype_lev2='FULL'; else if (l2mom>. and l2mom=l2_sibmom and l2dad>. and l2_sibdad>. and l2dad ne l2_sibdad) then sibtype_lev2='HMOM'; else if (l2dad>. and l2dad=l2_sibdad and l2mom>. and l2_sibmom>. and l2mom ne l2_sibmom) then sibtype_lev2='HDAD'; else if (l2mom>. and l2mom=l2_sibmom) then sibtype_lev2='UMOM'; else sibtype_lev2='UDAD'; * Define twins as full siblings born +/- 1 day apart; twin_lev2='0'; if sibtype_lev2='FULL' and _bd>. and sib_bd>. and abs(_bd-sib_bd)<=1 then twin_lev2='1'; run; * Attach sibtype flag for Level 2; proc sort data=sibs; by scrphin_lev1 scrphin_lev2; run; proc sort data=level2; by scrphin_lev1 scrphin_lev2; run; data level2; merge level2(in=a) sibs(in=b keep=scrphin_lev1 scrphin_lev2 sibtype_lev2 twin_lev2); by scrphin_lev1 scrphin_lev2; if a; run; proc freq data=level2; tables rel_lev2; tables sibtype_lev2 twin_lev2; format rel_lev2 $reltype.; run; ********************************; ********** Level 3 *************; ********************************; proc sql; create table level3 as select a.*, b.relphin as scrphin_lev3, b.relation as relation_lev3, b.sprdt as marcov_lev3, b.spedt as marend_lev3, b.reltype as rel_lev3 length 3 from level2 a inner join relation b on a.scrphin_lev2=b.scrphin and b.relphin ne a.scrphin order by a.scrphin, scrphin_lev3; * Remove relatives already found at a lower level.; data previous; set level1(in=a rename=(scrphin_lev1=scrphin_lev3)) level2(in=b rename=(scrphin_lev2=scrphin_lev3)); if a then prevlev=1; else if b then prevlev=2; keep scrphin prevlev scrphin_lev3; run; proc sort data=previous nodupkey; by scrphin scrphin_lev3; run; data level3; length rel_level3 $3 flag 3; merge level3(in=a) previous(in=b); by scrphin scrphin_lev3; if a; flag=0; if b then flag=1; rel_level3=trim(rel_lev1)||trim(rel_lev2)||trim(rel_lev3); run; * What was their relation at Level 1?; data l1dups; set level3; if flag=1 and prevlev=1; run; proc sort data=l1dups; by scrphin scrphin_lev3; run; proc sort data=l1dups out=s; by scrphin scrphin_lev1; run; data l1dups; merge l1dups(in=a) s(in=b keep=scrphin rel_lev1 scrphin_lev1 rename=(scrphin_lev1=scrphin_lev3 rel_lev1=rel_level1)); by scrphin scrphin_lev3; if a; run; /* proc freq data=l1dups; tables rel_level3*rel_level1; where flag=1 and prevlev=1; title1 'People found at Level 3 and Level 1: Level 1 relation vs pathway to Level 3'; run; */ * What was their relation at Level 2?; * These are relations like Grandparent being found as Target(Parent)Mom(Sibling)Uncle(Parent)Grandparent; * or Siblings as Target(Sibling)Sister(Child)Niece(Parent)Sibling; data l2dups; set level3; if flag=1 and prevlev=2; run; proc sort data=l2dups; by scrphin scrphin_lev3; run; proc sort data=l2dups out=s; by scrphin scrphin_lev2; run; data l2dups; merge l2dups(in=a) s(in=b keep=scrphin reltarget_lev2 scrphin_lev2 rename=(scrphin_lev2=scrphin_lev3 reltarget_lev2=rel_level2)); by scrphin scrphin_lev3; if a; run; /* proc freq data=l2dups order=freq; tables rel_level3*rel_level2; where flag=1 and prevlev=2; title1 'People found at Level 3 and Level 2: Level 2 relation vs pathway to Level 3'; run; * Use a picture format to truncate SCRPHIN so that it is not identifiable; proc format; picture p8r (round) 0-high = '99999999999999'; proc print data=level3 noobs; var rel_level3 scrphin scrphin_lev1 relation_lev1 scrphin_lev2 relation_lev2 scrphin_lev3 relation_lev3; where flag=1 and prevlev=2 and rel_level3 in: ('USX','S'); format scrphin: p8r.; run; */ * Drop some of these redundant Level 3 relationships; * The ones to keep at this level are spouse relationships which explain how Double Cousins; * are formed. These are people related through both pathways USX and US (i.e. one aunt/uncle; * from mothers side married to aunt/uncle from fathers side). Their children are Double Cousins; * to the target person; data l2dups; set l2dups; keep scrphin rel_level2 scrphin_lev3; run; * Add in redundant level 1 dups; data l2dups; set l2dups l1dups(keep=scrphin scrphin_lev3); run; proc sort data=l2dups nodupkey; by scrphin scrphin_lev3; run; proc sort data=level3; by scrphin scrphin_lev3; run; data level3; merge level3(in=a) l2dups(in=b); by scrphin scrphin_lev3; if a; if b then do; if not (rel_level3='USX' and rel_level2='US') then delete; end; run; data level3; set level3; drop flag rel_level2 rel_level3 prevlev; run; data level3; set level3; length reltarget_lev3 $3; * RELTARGET_LEVX is multi-level relationship to original target SCRPHIN; * These are different from REL_LEVX which are the single-level intermediate relationships; reltarget_lev3=trim(rel_lev1)||trim(rel_lev2)||trim(rel_lev3); format scrphin: z15.; run; * Duplicates at this point can result from double cousins; * It is only at Level 3 that double cousins can be detected, since at Level 2 we dont; * yet know that the cousin at Level 3 can be reached through a sibling of either parent; data dups nodups; set level3; by scrphin scrphin_lev3; if ^(first.scrphin_lev3 and last.scrphin_lev3) then output dups; else output nodups; run; proc sort data=dups; by scrphin scrphin_lev3 rel_lev3; run; * Bring down to one record per SCRPHIN-SCRPHIN_LEV3 combination; * Aside from Double Cousins, it is difficult to describe these multiple; * relationships. To avoid getting too complicated at this point, keep; * only one relationship (Double Cousins will be specified separately); data dups2; set dups; length r1 r2 r3 altreltarget_lev3 $3; by scrphin scrphin_lev3; retain r1 r2 r3 altscrphin_lev1 altscrphin_lev2 altrelation_lev1 altrelation_lev2 altsibtype_lev1 altsibtype_lev2 altrelation_lev3 altreltarget_lev3 c; if first.scrphin_lev3 then do; r1=' '; r2=' '; r3=' '; altscrphin_lev2=scrphin_lev2; altrelation_lev2=relation_lev2; altsibtype_lev1=sibtype_lev1; altsibtype_lev2=sibtype_lev2; altscrphin_lev1=scrphin_lev1; altrelation_lev1=relation_lev1; altrelation_lev3=relation_lev3; altreltarget_lev3=reltarget_lev3; c=0; end; c=c+1; if c=1 then r1=reltarget_lev3; if c=2 and reltarget_lev3 ne r1 then r2=reltarget_lev3; if c=3 and reltarget_lev3 ne r2 and reltarget_lev3 ne r1 then r3=reltarget_lev3; if last.scrphin_lev3; drop c; format altscrphin: z15.; run; * Double cousins are USD with no other relationships; data dups2; set dups2; if r1='USD' and r2=' ' and r3=' ' then double=1; run; /* proc print data=dups2 noobs; var r1 r2 r3 relation_lev1 relation_lev2 relation_lev3 altrelation_lev1 altrelation_lev2 altrelation_lev3; where double ne 1; format scrphin: altscrphin: p8r.; run; proc print data=dups2; var r1 r2 r3 scrphin relation_lev1 scrphin_lev1 relation_lev2 scrphin_lev2 relation_lev3 scrphin_lev3; where double ne 1; format scrphin: altscrphin: p8r. relation: $10.; run; proc print data=dups2; var r1 r2 r3 scrphin altrelation_lev1 altrelation_lev2 altrelation_lev3; where double ne 1; format scrphin: altscrphin: p8r.; run; */ data level3; set nodups dups2; drop r1 r2 r3; run; proc sort data=level3; by scrphin scrphin_lev3; run; /********************/ * Flag Spouses who end relationship before target person birthdate; * These will be flagged as former spouses and no further relationships beyond this will be tracked; data level3; set level3; if substr(reltarget_lev3,3,1)='X' and .. and l3mom=l3_sibmom and l3dad>. and l3dad=l3_sibdad then sibtype_lev3='FULL'; else if (l3mom>. and l3mom=l3_sibmom and l3dad>. and l3_sibdad>. and l3dad ne l3_sibdad) then sibtype_lev3='HMOM'; else if (l3dad>. and l3dad=l3_sibdad and l3mom>. and l3_sibmom>. and l3mom ne l3_sibmom) then sibtype_lev3='HDAD'; else if (l3mom>. and l3mom=l3_sibmom) then sibtype_lev3='UMOM'; else sibtype_lev3='UDAD'; * Define twins as full siblings born +/- 1 day apart; twin_lev3='0'; if sibtype_lev3='FULL' and _bd>. and sib_bd>. and abs(_bd-sib_bd)<=1 then twin_lev3='1'; run; * Attach sibtype flag for Level 3; proc sort data=sibs; by scrphin_lev2 scrphin_lev3; run; proc sort data=level3; by scrphin_lev2 scrphin_lev3; run; data level3; merge level3(in=a) sibs(in=b keep=scrphin_lev2 scrphin_lev3 sibtype_lev3 twin_lev3); by scrphin_lev2 scrphin_lev3; if a; run; proc freq data=level3; tables rel_lev3; tables sibtype_lev3 twin_lev3; format rel_lev3 $reltype.; run; data allrel; set level1(in=a) level2(in=b) level3(in=c); length reltype $8 level $1; if a then do; level='1'; reltype=rel_lev1; relphin=scrphin_lev1; relation=put(reltype,$reltype.); end; else if b then do; level='2'; reltype=reltarget_lev2; relphin=scrphin_lev2; relation=put(reltype,$reltype.); end; else if c then do; level='3'; reltype=reltarget_lev3; if double=1 then reltype='DBL'; relphin=scrphin_lev3; relation=put(reltype,$reltype.); end; format relphin z15.; run; proc sort data=allrel; by scrphin level scrphin_lev1-scrphin_lev3; run; ************/ %mend iterated_relationships;