/****************************************************************************** Project: Immigration Data Linkage Project (ircc_link) File: S:\ircc_link\prog\gillesd\concept_samples\retention.sas Programmer: Gilles R. Detillieux First Created: Sep. 9, 2022 Revisions: none Input datasets: registry.ircc_crosswalk_1985jan: IRCC linkage dataset registry.ircc_legacy_landing_1985jan: IRCC landing data from 1985 to 1999 registry.ircc_landing_2000jan: IRCC landing data from 2000 to 2017 registry.mhmrs_1970jun: MH insurance registry Description: This program finds landing date of immigrants living in Manitoba, for anyone listed in the IRCC immigration landing datasets and crosswalk, then gets all MB health insurance registry coverage episodes ending after landing, to calculate rates of retention of permanent residents in Manitoba. Note that retention rates are based on a snapshot of registry coverage after the period of time in question, and do not require continous coverage for the entire period. An immigrant is considered to be retained if they are in the province at the end of that period of time beginning at the later of landing date or start of coverage, even if they left the provice during that period and returned before the end of the period. For consistency with other migration dynamics analyses, we ignore coverage gaps of 360 days or less. ******************************************************************************/ options nocenter; *** Get immigrant cohort from IRCC crosswalk file, include all valid linkages; *** and sort by IRCC's IDNO identifier; proc sort data=registry.ircc_crosswalk_1985jan (keep=filephin filephintype idno acqdt where=(filephintype in ('0','1') and acqdt = '14may2019'd)) out=ircccw (drop=filephintype rename=(filephin=scrphin)) nodupkey; by idno filephin; run; *** Get arrival & landing dates from legacy and current landing datasets; data landing; set registry.ircc_legacy_landing_1985jan (in=in_ll keep=idno arrival_dt landing_dt acqdt) registry.ircc_landing_2000jan (in=in_cl keep=idno arrival_dt landing_dt acqdt); where acqdt = '14may2019'd; run; *** sort landing data, then merge with crosswalk data, by IDNO identifier; proc sort data=landing; by idno landing_dt; run; data landing; merge ircccw (in=in_cw) landing (in=in_la); by idno; * keep all individuals with linkage and landing records; if in_cw & in_la; run; *** sort by PHIN for duplicate detection; proc sort data=landing; by scrphin landing_dt; run; *** avoid duplicate PHINs and keep earliest landing record per person; data landing; set landing; by scrphin landing_dt; * keep one record per valid PHIN; if first.scrphin & scrphin ^= .; run; *** coverage episodes pulled from registry using regcov macro; *** ignore coverage gaps of 360 days or less; %regcov(dsname=landing, startdt=01jan1985, enddt=30nov2019, id=scrphin, outds=regcov, gapdays=360); *** sort by person so we can merge with grouped landing data; proc sort data=regcov; by scrphin covdt enddt; run; *** merge in landing data to find coverage episodes during or after landing; data landing; merge landing (in=in_la) regcov (in=in_co); by scrphin; if in_la and in_co and covdt > . and enddt >= landing_dt; run; *** keep one record per person, categorizing reason for leaving; data landing; set landing; by scrphin covdt enddt; retain firststartdt retent3y retent5y dropphin; if first.scrphin then do; firststartdt = max(covdt, landing_dt); retent3y = 0; retent5y = 0; dropphin = 0; end; *** check for 3- & 5-year retention (at snapshot times) in any episode; if covdt <= intnx('year', firststartdt, 3, 'sameday') <= enddt then retent3y = 1; if covdt <= intnx('year', firststartdt, 5, 'sameday') <= enddt then retent5y = 1; *** drop dupl. or registered in error (may show up under another phin); if canccode in ('W','5','6') then dropphin = 1; if last.scrphin & ^dropphin; * keep last episode if valid; pop = 1; *** check that 3- & 5-year snapshot time is within study period; if intnx('year', firststartdt, 3, 'sameday') > '31dec2017'd then retent3y = .; * nullify variable if beyond follow-up period; if intnx('year', firststartdt, 5, 'sameday') > '31dec2017'd then retent5y = .; lyear = year(landing_dt); label lyear = 'Landing Year'; label retent3y = '3-Year retention'; label retent5y = '5-Year retention'; run; *** show 3-year retention rates for imm. by landing year; *** ensuring snapshot date falls within study period; *** (only count those we can follow up on); proc tabulate data=landing; where retent3y > .; class lyear / order=formatted; var retent3y pop; table lyear, n='Immigrants'*f=6.0 retent3y*(sum='Count'*f=6.0 pctsum='Rate %'*f=8.3) / nocellmerge; title 'Three-year retention rate for immigrants by landing year'; run; *** show 5-year retention rates for imm. by landing year; *** ensuring snapshot date falls within study period; proc tabulate data=landing; where retent5y > .; class lyear / order=formatted; var retent5y pop; table lyear, n='Immigrants'*f=6.0 retent5y*(sum='Count'*f=6.0 pctsum='Rate %'*f=8.3) / nocellmerge; title 'Five-year retention rate for immigrants by landing year'; run; *** a little cleanup; proc delete data=ircccw landing regcov; run;