/******************************************************************************* Description: This program runs a factor analysis to calculate the Social & Material Deprivation indices by DA using the 2011 Census. Census variables included in social and material deprivation factor analysis: 1. Average household income 2. Unemployment rate age 15+ 3. Proportion of population age 15+ without high school graduation 4. Proportion of population age 15+ separated, divorced or widowed 5. Proportion of population living alone 6. Proportion of population that moved in the past 5 years Variables with missing values at the DA level were imputed with values at CSD level where possible. Another imputation is done for First Nations Reserves using the weighted mean for North and South First Nations communities. *******************************************************************************/ *****************************************************************; title 'Social & Material Deprivation Calculated from 2011 Census'; *****************************************************************; *** read in 2011 census data at DA level; proc sort data = census2011 out=census (where=(geo_type='DA' and POPTOT ^= .) keep=geo_type geo_id geo_name POPTOT INC_PHH_AVEINC INC_PHH LF_UNEMRATE15 LF_POPTOT15 EDHS_WOHS15 EDHS_POP15 PHHNFAM_LIVALONE PHH_TOTPOP MOB5_MOVE MOB5_TOTPOP5 MSTAT_SEP MSTAT_DIV MSTAT_WID MSTAT_TOTPOP); by geo_id; run; *** calculate derived variables; data census; length dauid $8; set census; dauid = geo_id; *** set zero income to missing (and then impute); zero_income = 0; if INC_PHH_AVEINC = 0 then do; INC_PHH_AVEINC = .; zero_income = 1; end; if EDHS_POP15 > 0 then nohigh = EDHS_WOHS15/EDHS_POP15; if MSTAT_TOTPOP > 0 then sdw = sum(MSTAT_SEP,MSTAT_DIV,MSTAT_WID)/MSTAT_TOTPOP; if PHH_TOTPOP > 0 then alone = PHHNFAM_LIVALONE/PHH_TOTPOP; if MOB5_TOTPOP5 > 0 then mobile5yr = MOB5_MOVE/MOB5_TOTPOP5; label POPTOT = 'Total Population' INC_PHH_AVEINC = 'Average Household Income ($)' LF_UNEMRATE15 = 'Unemployment Rate for Labour Force Population aged 15 years and older' nohigh = 'Proportion of population 15 years and older without High School Graduation' sdw = 'Proportion of population 15 years and older Separated, Divorced or Widowed' alone = 'Proportion of population living alone' mobile5yr = 'Proportion of population that moved in the past 5 years'; run; proc means data = census mean n nmiss; var POPTOT INC_PHH_AVEINC INC_PHH LF_UNEMRATE15 LF_POPTOT15 nohigh EDHS_WOHS15 EDHS_POP15 sdw MSTAT_SEP MSTAT_DIV MSTAT_WID MSTAT_TOTPOP alone PHHNFAM_LIVALONE PHH_TOTPOP mobile5yr MOB5_MOVE MOB5_TOTPOP5; title2 'Check for missing values on 2011 Census at DA level'; run; proc print data = census; where LF_UNEMRATE15 = . | nohigh = . | sdw = . | alone = . | mobile5yr = .; var dauid geo_name POPTOT INC_PHH_AVEINC INC_PHH LF_UNEMRATE15 LF_POPTOT15 nohigh EDHS_WOHS15 EDHS_POP15 sdw MSTAT_SEP MSTAT_DIV MSTAT_WID MSTAT_TOTPOP alone PHHNFAM_LIVALONE PHH_TOTPOP mobile5yr MOB5_MOVE MOB5_TOTPOP5; run; proc freq data = census; tables zero_income; run; *** read in census data at CSD level; proc sort data = census2011 out=census_csd (where=(geo_type='CSD' and POPTOT ^= .) keep=geo_type geo_id geo_name POPTOT INC_PHH_AVEINC INC_PHH LF_UNEMRATE15 LF_POPTOT15 EDHS_WOHS15 EDHS_POP15 PHHNFAM_LIVALONE PHH_TOTPOP MOB5_MOVE MOB5_TOTPOP5 MSTAT_SEP MSTAT_DIV MSTAT_WID MSTAT_TOTPOP); by geo_id; run; *** calculate derived variables; data census_csd (rename=(INC_PHH_AVEINC=INC_PHH_AVEINC_csd LF_UNEMRATE15=LF_UNEMRATE15_csd)); length csdid $7; set census_csd; csdid = geo_id; *** set zero income to missing (and then impute); zero_income_csd = 0; if INC_PHH_AVEINC = 0 then do; INC_PHH_AVEINC = .; zero_income_csd = 1; end; if EDHS_POP15 > 0 then nohigh_csd = EDHS_WOHS15/EDHS_POP15; if MSTAT_TOTPOP > 0 then sdw_csd = sum(MSTAT_SEP,MSTAT_DIV,MSTAT_WID)/MSTAT_TOTPOP; if PHH_TOTPOP > 0 then alone_csd = PHHNFAM_LIVALONE/PHH_TOTPOP; if MOB5_TOTPOP5 > 0 then mobile5yr_csd = MOB5_MOVE/MOB5_TOTPOP5; label INC_PHH_AVEINC = 'Average Household Income ($)' LF_UNEMRATE15 = 'Unemployment Rate for Labour Force Population aged 15 years and older' nohigh_csd = 'Proportion of population 15 years and older without High School Graduation' sdw_csd = 'Proportion of population 15 years and older Separated, Divorced or Widowed' alone_csd = 'Proportion of population living alone' mobile5yr_csd = 'Proportion of population that moved in the past 5 years'; run; proc means data = census_csd mean n nmiss; var POPTOT INC_PHH_AVEINC_csd INC_PHH LF_UNEMRATE15_csd LF_POPTOT15 nohigh_csd EDHS_WOHS15 EDHS_POP15 sdw_csd MSTAT_SEP MSTAT_DIV MSTAT_WID MSTAT_TOTPOP alone_csd PHHNFAM_LIVALONE PHH_TOTPOP mobile5yr_csd MOB5_MOVE MOB5_TOTPOP5; title2 'Check for missing values on 2011 Census at CSD level'; run; proc print data = census_csd; where LF_UNEMRATE15_csd = . | nohigh_csd = . | sdw_csd = . | alone_csd = . | mobile5yr_csd = .; var csdid geo_name POPTOT INC_PHH_AVEINC_csd INC_PHH LF_UNEMRATE15_csd LF_POPTOT15 nohigh_csd EDHS_WOHS15 EDHS_POP15 sdw_csd MSTAT_SEP MSTAT_DIV MSTAT_WID MSTAT_TOTPOP alone_csd PHHNFAM_LIVALONE PHH_TOTPOP mobile5yr_csd MOB5_MOVE MOB5_TOTPOP5; run; proc freq data = census_csd; tables zero_income_csd; run; *** use concordance file to merge DAs to CSDs; *** keep one record per DA (there is one record per DA block); proc sort data = conc_2011 out = conc(keep=pruid cduid csduid dauid) nodupkey; where pruid = '46'; by dauid; run; proc sort data = conc; by csduid; run; proc sort data = census_csd; by csdid; run; proc print data = conc (obs=20); var cduid csduid dauid; title2 'Check CSD & DA codes in Concordance File'; run; data census_csd; merge conc (in=m1 rename=(csduid=csdid cduid=cdid) drop=pruid) census_csd (in=m2); by csdid; if m2; run; proc sort data = census_csd; by dauid; run; proc sort data = census; by dauid; run; proc print data = census_csd (obs=20); var dauid; title2 'Check DA codes from Concordance File'; run; proc print data = census (obs=20); var dauid; title2 'Check DA codes from Census Data'; run; *** imput missing values at DA level with CSD values; data census; merge census_csd (in=m1 keep=cdid csdid dauid INC_PHH_AVEINC_csd LF_UNEMRATE15_csd nohigh_csd sdw_csd alone_csd mobile5yr_csd geo_name rename=(geo_name=csd_name)) census (in=m2); by dauid; if m2; in_csd = m1; *** impute CSD value to DA if DA value is missing or zero; if INC_PHH_AVEINC = . and INC_PHH_AVEINC_csd > 0 then INC_PHH_AVEINC = INC_PHH_AVEINC_csd; if LF_UNEMRATE15 = . then LF_UNEMRATE15 = LF_UNEMRATE15_csd; if nohigh = . then nohigh = nohigh_csd; if sdw = . then sdw = sdw_csd; if alone = . then alone = alone_csd; if mobile5yr = . then mobile5yr = mobile5yr_csd; run; proc freq data = census; tables in_csd; run; proc means data = census mean n nmiss; var POPTOT INC_PHH_AVEINC LF_UNEMRATE15 nohigh sdw alone mobile5yr; title2 'Check for missing values after imputation at CSD level'; run; proc print data = census; where INC_PHH_AVEINC <= 0 | LF_UNEMRATE15 = . | nohigh = . | sdw = . | alone = . | mobile5yr = .; var csd_name geo_name dauid csdid POPTOT INC_PHH_AVEINC LF_UNEMRATE15 nohigh sdw alone mobile5yr; run; *** now impute weighted means of variables by north/south for FN Reserves; *** use pccf to provide information on DAs such as FN Reserves (csdtype=R) and latitude; proc freq data = pccf_2011; tables csdtype; run; proc sort data = pccf_2011 out = pccf(keep=dauid lat csdtype csdname) nodups; by dauid descending lat; run; *** add in postal codes retired before Jan 1, 2011 (160 DAs are not in PCCF but are in census data); proc sort data = pccf_2011_ret out = pccf_ret(keep=dauid lat csdtype csdname) nodups; by dauid descending lat; run; data pccf; set pccf; by dauid; if first.dauid; run; data pccf_ret; set pccf_ret; by dauid; if first.dauid; run; data pccf_ret; merge pccf (in=m1) pccf_ret (in=m2); by dauid; if m2 & ^m1; run; data pccf; set pccf pccf_ret; by dauid; run; proc sort data = census nodupkey; by dauid; run; data census miss; merge census (in=m1) pccf (in=m2); by dauid; if m1; in_pccf = m2; *** identify FN communities for imputation including Fairford (46180005), Hollow Water (46190058), Jackhead (46190083) and Cross Lake (46220033) which are missing from this pccf file; fnda = (csdtype = 'IRI'); **or (dauid in ('46180005','46190058','46190083','46220033')); label fnda = 'First Nations DA (1=Yes)'; *** identify FN communities north of 53 parallel or remote (Poplar River, Berens River, Pauingassi, Little Grand Rapids, Bloodvein; if fnda = 1 then do; fnnorth = (lat >= 53); **or dauid in ('46190070','46190068','46190076','46190077','46190061','46190063'); label fnnorth = 'First Nations DA North of 53rd Parallel (1=Yes)'; end; if m1 & ^m2 then output miss; output census; run; proc freq data = census; tables in_pccf; run; proc print data = census; where in_pccf = 0; var csdtype csdname dauid; run; proc freq data = census; where fnda = 1; tables fnnorth*csdname*dauid /list missing; title2 'North and South FN DAs'; run; *** generate weighted means for FN DAs by north/south; %macro wmean (var,wgt,output); proc summary data = census nway; where fnda = 1; class fnnorth; var &var; weight &wgt; output out=&output sum= sumwgt= mean=/autoname; title2 "Calculate Weighted Means for &var in North and South FN DAs"; run; %mend; options mprint; %wmean (var=INC_PHH_AVEINC,wgt=INC_PHH,output=impute_income); %wmean (var=LF_UNEMRATE15,wgt=LF_POPTOT15,output=impute_umemp); %wmean (var=nohigh,wgt=EDHS_POP15,output=impute_nohigh); %wmean (var=sdw,wgt=MSTAT_TOTPOP,output=impute_sdw); %wmean (var=alone,wgt=PHH_TOTPOP,output=impute_alone); %wmean (var=mobile5yr,wgt=MOB5_TOTPOP5,output=impute_mobile); proc sort data = census; by fnnorth; run; data census; merge census (in=m1) impute_income (in=m2 keep=fnnorth INC_PHH_AVEINC_mean) impute_umemp (in=m3 keep=fnnorth LF_UNEMRATE15_mean) impute_nohigh (in=m4 keep=fnnorth nohigh_mean) impute_sdw (in=m5 keep=fnnorth sdw_mean) impute_alone (in=m6 keep=fnnorth alone_mean) impute_mobile (in=m7 keep=fnnorth mobile5yr_mean); by fnnorth; if m1; fix_inc = 0; fix_unemp = 0; fix_hs = 0; fix_sdw = 0; fix_alone = 0; fix_mobile = 0; if m2 = 1 and INC_PHH_AVEINC = . and INC_PHH_AVEINC_mean ^= . then do; INC_PHH_AVEINC = int(INC_PHH_AVEINC_mean); fix_inc = 1; end; if m3 = 1 and LF_UNEMRATE15 = . and LF_UNEMRATE15_mean ^= . then do; LF_UNEMRATE15 = int(LF_UNEMRATE15_mean); fix_unemp = 1; end; if m4 = 1 and nohigh = . and nohigh_mean ^= . then do; nohigh = int(nohigh_mean); fix_hs = 1; end; if m5 = 1 and sdw = . and sdw_mean ^= . then do; sdw = int(sdw_mean); fix_sdw = 1; end; if m6 = 1 and alone = . and alone_mean ^= . then do; alone = int(alone_mean); fix_alone = 1; end; if m7 = 1 and mobile5yr = . and mobile5yr_mean ^= . then do; mobile5yr = int(mobile5yr_mean); fix_mobile = 1; end; run; proc freq data = census; tables fix_inc fix_unemp fix_hs fix_sdw fix_alone fix_mobile fix_inc*fix_unemp*fix_hs*fix_sdw*fix_alone*fix_mobile /list missing; title2 'Census Values based on DA level after imputation from FN communities'; run; proc print data = census; where INC_PHH_AVEINC = . | LF_UNEMRATE15 = . | nohigh = . | sdw = . | alone = . | mobile5yr = .; var csd_name geo_name dauid csdid POPTOT INC_PHH_AVEINC LF_UNEMRATE15 nohigh sdw alone mobile5yr; title2 'No Imputed Values Found for at least one Deprivation variable'; run; proc means data = census n nmiss mean stddev median min max nolabels; var INC_PHH_AVEINC LF_UNEMRATE15 nohigh singleparent sdw alone mobile5yr; title2 'Mean and Median values for Census Variables'; run; ******************************************************; title2 'Social & Material Deprivation Factor Analysis'; ******************************************************; proc factor data = census rotate=varimax nfactors=2 out=dep(rename=(factor1=social factor2=material)); var INC_PHH_AVEINC LF_UNEMRATE15 nohigh sdw alone mobile5yr; title3 'Orthogonal Rotation'; run; data dep; set dep; label social = 'Social Deprivation Index, 2011 Census' material = 'Material Deprivation Index, 2011 Census'; keep dauid social material POPTOT INC_PHH_AVEINC LF_UNEMRATE15 nohigh sdw alone mobile5yr; run; proc sort data = dep; by dauid; run; proc means data = dep n nmiss mean var median min max; var social material; title3 'Simple Statistics'; run;