/****************************************************************************** Project: Immigration Data Linkage Project (ircc_link) File: S:\ircc_link\prog\gillesd\concept_samples\return_migration.sas Programmer: Gilles R. Detillieux First Created: Sep. 8, 2022 Revisions: none Input datasets: registry.ircc_crosswalk_1985jan: IRCC linkage dataset registry.ircc_legacy_landing_1985jan: IRCC landing data from 1985 to 1999 registry.ircc_landing_2000jan: IRCC landing data from 2000 to 2017 registry.mhmrs_1970jun: MH insurance registry Description: This program finds landing date of immigrants living in Manitoba, for anyone listed in the IRCC immigration landing datasets and crosswalk, then gets their MB health insurance registry coverage episodes ending after landing, to calculate rates of return migration to Manitoba. ******************************************************************************/ options nocenter; *** Get immigrant cohort from IRCC crosswalk file, include all valid linkages; *** and sort by IRCC's IDNO identifier; proc sort data=registry.ircc_crosswalk_1985jan (keep=filephin filephintype idno acqdt where=(filephintype in ('0','1') and acqdt = '14may2019'd)) out=ircccw (drop=filephintype rename=(filephin=scrphin)) nodupkey; by idno filephin; run; *** Get arrival & landing dates from legacy and current landing datasets; data landing; set registry.ircc_legacy_landing_1985jan (in=in_ll keep=idno arrival_dt landing_dt acqdt) registry.ircc_landing_2000jan (in=in_cl keep=idno arrival_dt landing_dt acqdt); where acqdt = '14may2019'd; run; *** sort landing data, then merge with crosswalk data, by IDNO identifier; proc sort data=landing; by idno landing_dt; run; data landing; merge ircccw (in=in_cw) landing (in=in_la); by idno; * keep all individuals with linkage and landing records; if in_cw & in_la; run; *** sort by PHIN for duplicate detection; proc sort data=landing; by scrphin landing_dt; run; *** avoid duplicate PHINs and keep earliest landing record per person; data landing; set landing; by scrphin landing_dt; * keep one record per valid PHIN; if first.scrphin & scrphin ^= .; run; *** coverage episodes pulled from registry using regcov macro; *** ignore coverage gaps of 360 days or less; %regcov(dsname=landing, startdt=01jan1985, enddt=30nov2019, id=scrphin, outds=regcov, gapdays=360); *** sort by person so we can merge with grouped landing data; proc sort data=regcov; by scrphin covdt enddt; run; *** merge in landing data to find coverage episodes during or after landing; data landing; merge landing (in=in_la) regcov (in=in_co); by scrphin; if in_la and in_co and covdt > . and enddt >= landing_dt; run; *** get covcode variable from registry to match our coverage episodes; *** keep only covcode values we need to resolve returns after 'cannot locate'; proc sort data=registry.mhmrs_1970jun (keep=scrphin covdt acqdt rectype covcode) out=regcov; where acqdt <= '30nov2019'd and rectype in ('2','R') and '01jan1985'd <= covdt <= '31dec2019'd and ('2' <= covcode <= '9' or 'A' <= covcode <= 'G'); by scrphin covdt acqdt; run; * keep most recent acqdt; data regcov; set regcov; by scrphin covdt acqdt; if last.covdt; run; *** merge covcodes into landing data with matching coverage episode data; data landing; merge landing (in=in_la) regcov (in=in_co keep=scrphin covdt covcode); by scrphin covdt; if in_la; run; *** keep one record per person, categorizing reason for leaving & returning; data landing; set landing; by scrphin covdt enddt; retain covcnt rcovcode firstcanccode dropphin; if first.scrphin then do; covcnt = 0; rcovcode = ' '; firstcanccode = canccode; dropphin = 0; end; *** count coverage episodes and keep return date from 2nd after landing; covcnt = covcnt + 1; if covcnt = 2 then rcovcode = covcode; *** drop dupl. or registered in error (may show up under another phin); if canccode in ('W','5','6') then dropphin = 1; if last.scrphin and ^dropphin; canccode = firstcanccode; pop = 1; *** categorize outmigration; * Note: canccode A, C-R = Left Manitoba to some other known destination; length outmigration $18; if canccode = '0' then outmigration = 'Remained'; else if canccode = 'V' then outmigration = 'Remained'; else if canccode = '2' then outmigration = 'Remained'; * or 'Deceased'; else if canccode = '7' then outmigration = 'Cannot Locate'; else if canccode = 'A' or 'C' <= canccode <= 'R' then outmigration = 'Moved'; else outmigration = 'Other'; leftmb = (outmigration in ('Moved','Cannot Locate')); *** categorize return migration; * Note: covcode 2-9, A-G = New Resident from some other known location; * These count as return migration after cancellation due to 'cannot locate'; returned = (covcnt >= 2); remigration = 0; if returned and canccode = '7' and ('2' <= rcovcode <= '9' or 'A' <= rcovcode <= 'G') or returned and (canccode = 'A' or 'C' <= canccode <= 'R') then remigration = 1; lyear = year(landing_dt); label lyear = 'Landing Year'; label outmigration = 'Outmigration by Reason for Coverage Cancellation'; label leftmb = 'Status of outmigration (Moved or Cannot Locate)'; label remigration = 'Return Migration'; label returned = 'Person had 2+ coverage episodes'; run; *** summarize out/remigration counts by landing year and reason; proc tabulate data=landing; where leftmb; class lyear outmigration / order=formatted; var leftmb remigration; table lyear, leftmb='Outmigration'*outmigration=''*sum='N'*f=6.0 remigration*outmigration=''*(sum='N'*f=6.0 pctsum='%') / nocellmerge; title 'Return Migration Tates for Immigrants to Manitoba, by landing year and reason'; run; *** a little cleanup; proc delete data=ircccw landing regcov; run;