/*-------------------------------------------------------------------------- © Copyright 2011-2013 University of Manitoba This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . ----------------------------------------------------------------------------- Author: Say Hong Validation Macro Please direct questions and comments to info@cpe.umanitoba.ca June 04, 2013 This macro is written and tested in SAS windows 9.3 (TS1M0), PC 64bit. My intention to write this macro is to fill in the gap of the VIMO macro. Note that VIMO macro can only perform a univariate invalid check, for example, out of range check and invalid value check. However, even if the data pass the univariate check, it does not imply that the data is free of error. There are situations in which it is useful to compare the values of two or more variables to determine if they are mutually consistent. For example, a pregnant man, hospital separation is occurred before admission, these all indicate data inconsistency. Note that, though this macro is designed to check for data inconsistency that involve two or more variables, it can be used to check for univariate inconsistency as well. This macro can perform the following 1. Cross-field or within record check. 2. Cross-table check (Fields from one table can be checked for consistency with other fields in another table). Macro parameters: primary = name of the primary dataset. pkey = space-delimited list of key variable(s) from the primary dataset. secondary = name of the secondary dataset (leave blank if it is not a cross-table check). skey = a space-delimited list of key variable(s) from secondary dataset (leave blank if key variable(s) from secondary dataset have the same name as primary dataset) validaterule = location and name of the TAB delimited text file that contains the rules to check for inconsistencies among variables. odsout = location and name to save the output (output can be saved with an extension of .rtf, .pdf, or .html). If blank, then output will be shown in sas output windows only. Note: If secondary is not blank, then pkey have to form unique keys for records in primary dataset. ---------------------------------------------------------------------------*/ %macro dq_validation(primary=, pkey=, secondary=, skey=, validaterule=, odsout=); %put NOTE: -----------------------; %put NOTE: Validation Macro; %put NOTE: -----------------------; %if &primary = %then %do; %put; %put ERROR: Macro parameter primary= cannot be blank; %return; %end; %if %sysfunc(exist(&primary, data)) = 0 %then %do; %put; %put ERROR: Dataset &primary does not exist; %return; %end; %if &validaterule = %then %do; %put; %put ERROR: Macro parameter validaterule= cannot be blank; %return; %end; %else %do; %if %index(&validaterule, .) = 0 %then %do; %put; %put WARNING: There is no data extension for macro parameter validaterule=; %put WARNING: Please include data extension and re-run the macro; %return; %end; %let qt = %qsubstr(&validaterule,1,1); %if &qt ^= %str(%') & &qt ^= %str(%") %then %let validaterule="&validaterule"; %end; %if &odsout ^= %then %do; %if %index(&odsout, .) = 0 %then %do; %put; %put WARNING: There is no data extension for macro parameter odsout=; %put WARNING: Please include data extension and re-run the macro; %return; %end; %let qt = %qsubstr(&odsout,1,1); %if &qt ^= %str(%') & &qt ^= %str(%") %then %let odsout = "&odsout"; %let ext = %qscan(&odsout, 2, .); %let ext = %substr(&ext, 1, %length(&ext) - 1); %end; data validaterule; infile &validaterule delimiter = '09'x lrecl=1000; input ErrorMsg :$250. condition :$250.; run; data _null_; set validaterule nobs = n; if _n_ = 1 then call symputx('nchks', n); call symputx(cats('ErrorMsg', _n_), ErrorMsg); call symputx(cats('Cond', _n_), condition); run; %local n_dup; %if &secondary ^= %then %do; proc sort data=&primary nodupkey out=p_dsn dupout=chkdup; by &pkey; run; %let dsid = %sysfunc(open(chkdup)); %let n_dup = %sysfunc(attrn(&dsid,nlobs)); %let rc = %sysfunc(close(&dsid)); %if &n_dup > 0 %then %do; proc datasets nolist lib=work; delete validaterule; quit; %put; %put WARNING: Dataset &primary contains duplicate key variable(s); %put WARNING: Cross-table validation requires the primary dataset to have unique key variable(s) value; %put WARNING: This macro stop without executing; %put; %return; %end; %if &skey = %then %let skey = &pkey; %let rename_k = %scan(&skey, 1) = %scan(&pkey, 1); %do i = 2 %to %sysfunc(countw(&skey)); %let rename_k = &rename_k %scan(&skey, &i) = %scan(&pkey, &i); %end; proc sort data=&secondary out=s_dsn; by &skey; run; data validate(keep=counter); merge p_dsn s_dsn(rename=(&rename_k)); by &pkey; %do i = 1 %to &nchks; if &&cond&i then do; counter = &i; output; end; %end; run; %end; %else %do; data validate(keep=counter); set &primary; %do i = 1 %to &nchks; if &&cond&i then do; counter = &i; output; end; %end; run; %end; %let dsid = %sysfunc(open(validate)); %let n_validate = %sysfunc(attrn(&dsid,nlobs)); %let rc = %sysfunc(close(&dsid)); %if &n_validate > 0 %then %do; proc freq data=validate noprint; tables counter/out=cnts(keep=counter count); run; data validate(drop=counter); set cnts; ErrorMsg = symget(cats('ErrorMsg', counter)); condition = symget(cats('cond', counter)); run; %if &odsout= %then %do; title 'Validation Check for Data Consistency'; proc print data=validate noobs label; label count = 'count' ErrorMsg = 'Error Message'; run; title; %end; %else %do; title h=11pt bold 'Validation Check for Data Consistency'; footnote; options nodate nonumber orientation=landscape leftmargin=0.25in rightmargin=0.25in topmargin=1in bottommargin=1in; %if %upcase(&ext) = RTF %then ods &ext file=&odsout style=minimal bodytitle%str(;); /*bodytitle only works for rtf destination*/ %else ods &ext file=&odsout style=minimal%str(;); proc print data=validate noobs label style(report)=[rules=groups frame=hsides borderspacing=0] style(header)=[font_size=10pt font_weight=bold] style(data)=[font_size=10pt]; var count/style=[background=#f0f0f0]; var ErrorMsg condition; label count = 'Count' ErrorMsg = 'Error Message' condition = 'Condition'; run; title; ods &ext close; %end; %end; %else %do; data _null_; file print; put; put; put '************ No data inconsistency was found based on the rules provided ************'; run; %end; proc datasets nolist lib=work; delete cnts validate validaterule; quit; %mend dq_validation;