/*** This macro will create a series of dummy variables on and input data set based on the values of a variable(s). Usage: _dumvar ; Options: data= SAS data set name. [required] out= SAS output data set name. If missing will default to the input data set name. dvar= Variables to use when generating dummy variables. Multiple variables may be passed in a quoted string. There must be a matching prefix for each. [required] prefix= Prefix to use on dummy variable names. First character must be A-Z and it must be at least 1 character long. You must pass a separate prefix for each variable in the dvar= list. If multiple variables are used this string must be quoted [required] inter= Generate interaction terms between the dvar variables. Use the format VAR1*VAR2. Currently you can only generate pairwise interactions (e.g. age*hsp). The program will generate an interaction variable based on the value in each interaction. If this option is used the combined total length of the prefix and value for each interaction must be less than 8 characters. You may pass multiple pairwise interactions in quotes (e.g. inter='age*hsp rvw*sex'). The global macro variable &inters contains a list of all the interaction terms, minus the droped combinations. drop= Drop one or more of the dummy variables. The dummy variables have the given prefix followed by the value you want to drop (e.g. hsp81, where hsp is the prefix, 81 is the value). If you want to drop multiple variables the list must be in a quoted string. Note: You can not drop interaction dummy variables. Output: A new variable for each value of the input variables (dvar=) will be generated along with all of the specified interactions. These variables will contain a 1 where value of the input variable and the dummy variable name match, and 0 where they do not. Two global macro variables are generated (&dvars, &inters). These macro variables contain the list of dummy and interaction data set variables that the macro generated. They may be used in a model statement of a subsequent procedure without having to enter the list of dummy variables. NOTES: - The total length of the prefix, and the value of the variable can not exceed 8 characters. - The values in DVAR must not have imbeded or leading spaces. - If DVAR is a numeric variable there should be no missing values. - If you are using character variables there should be no "." values. - Interaction dummy variables can not be dropped. Any interaction variables based on a dropped variable will not be created. - This macro has the potential to generate a huge number of variables. The macro is limited to creating 500 dummy variables. **/ %macro _dumvar(data= , dvar= , drop= , prefix= , out=, inter=, debug=nodebug) /stmt ; %put Dummy Variable Generation Macro ; %put Manitoba Centre for Health Policy and Evaluation ; %put Charles Burchill, Julie Horrocks ; %put $Id: _dumvar.mac,v 2.3 1996/03/14 21:12:40 burchil Exp burchil $ ; options nonotes nomprint ; %* Define global variable, and set it to missing ; %global dvars inters; %let dvars = ; %let inters= ; %* define bail out counter, and set it to 0 ; %let overall = 0 ; %* Test for required options ; %if &data= | &dvar= | &prefix= %then %goto err1 ; %* if the output data set is not defined then define it as the input ; %if &out= %then %let out=&data ; %* Set the debug options ; %if &debug = 1 %then %let debug=debug ; %if &debug = debug %then options mprint notes ; ; %* Check if the dvar string is quoted. Strip the quotes and determine the number of variables ; %let n = 1 ; %let qte = %qsubstr(&dvar,1,1) ; %if &qte = %str(%') | &qte = %str(%") %then %do ; %let rq= %length(&dvar) ; %let dvar = %substr(&dvar,2,%eval(&rq-2)) ; %let word=%qscan(&dvar,&n,%str( )) ; %do %while(&word^= ) ; %let n=%eval(&n+1) ; %let word=%qscan(&dvar,&n,%str( )) ; %end ; %end ; %if &n > 1 %then %let n = %eval(&n-1) ; %* Check if the prefix string is quoted. Strip the quotes. Count the number of names passed. Check if this number is The same as the DVAR number ; %let np = 1 ; %let qte = %qsubstr(&prefix,1,1) ; %if &qte = %str(%') | &qte = %str(%") %then %do ; %let rq=%length(&prefix) ; %let prefix=%substr(&prefix,2,%eval(&rq-2)); %let word=%qscan(&prefix,&np,%str( )) ; %do %while(&word^= ) ; %let np=%eval(&np+1) ; %let word=%qscan(&prefix,&np,%str( )) ; %end ; %end ; %if &np > 1 %then %let np = %eval(&np-1) ; %* Test to make sure that the same number of dvar and prefix vars passed ; %if &n ^= &np %then %goto err4 ; %* Determine if the drop variable is quoted. Strip the quotes and determine the number of variables ; %let nd = 1 ; %if &drop ^= %then %do ; %let qte = %qsubstr(&drop,1,1) ; %if &qte = %str(%') | &qte = %str(%") %then %do ; %let rq= %length(&drop) ; %let drop = %substr(&drop,2,%eval(&rq-2)) ; %let word=%qscan(&drop,&nd,%str( )) ; %do %while(&word^= ) ; %let nd=%eval(&nd+1) ; %let word=%qscan(&drop,&nd,%str( )) ; %end ; %end ; %if &nd > 1 %then %let nd = %eval(&nd-1) ; %end ; %* Determine if the inter variable is quoted. Strip the quotes and determine the number of variables ; %let ni = 1 ; %if %quote(&inter) ^= %quote( ) %then %do ; %let qte = %qsubstr(&inter,1,1) ; %if &qte = %str(%') | &qte = %str(%") %then %do ; %let rq= %length(&inter) ; %let inter = %substr(&inter,2,%eval(&rq-2)) ; %let word=%qscan(&inter,&ni,%str( )) ; %do %while(&word^= ) ; %let ni=%eval(&ni+1) ; %let word=%qscan(&inter,&ni,%str( )) ; %end ; %end ; %if &ni > 1 %then %let ni = %eval(&ni-1) ; %end ; %* Allow users to mix case (case insensitive). Since Variable names are uppercase force the dvar, prefix, inter, and drop to be uppercase ; %let dvar = %upcase(&dvar) ; %let prefix = %upcase(&prefix) ; %let drop = %upcase(&drop) ; %let inter = %upcase(&inter) ; %* Check if interaction terms match dvar variables If if there is any miss-match then get out will error notice ; %if %quote(&inter) ^= %quote( ) %then %do ; %do p = 1 %to &ni ; %let int1 = 0 ; %let int2 = 0 ; %let i1 = %scan(%scan(&inter,&p,%str( )),1,*) ; %let i2 = %scan(%scan(&inter,&p,%str( )),2,*) ; %do i = 1 %to &n ; %if &i1 = %scan(&dvar,&i,%str( )) %then %let int1 = %eval(&int1 + 1) ; %if &i2 = %scan(&dvar,&i,%str( )) %then %let int2 = %eval(&int2 + 1) ; %end ; %if &int1 = 0 | &int2 = 0 %then %goto err5 ; %end ; %end ; %* Create a data set that contains a single entry for each value of the DVAR variable. RESOLUTION NOTES: Note ampersand (&) replaced by percent (%). %%dvar%i will resolve the the DVAR variable in the correspinding i-th location. %%prefix%i will resolve to the PREFIX value in the i-th location ; proc freq data=&data ; %do i = 1 %to &n ; %let dvar&i = %qscan(&dvar,&i,%str( )) ; %let prefix&i = %qscan(&prefix,&i,%str( )) ; tables &&dvar&i / out=_temp&i.(keep=&&dvar&i) noprint ; %end ; run; %* Check for errors in the freq run; %if %eval(&SYSERR>0) %then %goto err6 ; %do i = 1 %to &n ; %* Generate a macro variable for each value of the DVAR variable, and a total count variable. ; data _null_ ; set _temp&i end=eof ; retain err3 "0" ; %* Check for missing values ; if _n_ = 1 & (&&dvar&i = "." | &&dvar&i = " ") then goto getout ; nvar=compress("&&prefix&i" || &&dvar&i) ; %* Generate variables for each value of dvar, and add the prefix ; call symput("nvar&i" || left(_n_),compress(upcase(nvar))) ; %* Generate variables for each value of dvar, with out prefix; call symput("cvar&i" || left(_n_),compress(upcase(&&dvar&i))) ; %* At the end of the file get the total number of values; if eof then call symput("total&i",compress(_n_)) ; goto exit ; getout: err3 = "1" ; call symput('err3',trim(err3)) ; stop ; exit : call symput('err3',trim(err3)) ; run; %* Warn user if there are more than 500 values in &dvar. ; %let overall = %eval(&&total&i + &overall) ; %if &overall > 500 %then %goto err8 ; %* Warn user that there are missing values of the variable &DVAR ; %if &err3 = 1 %then %goto err3 ; %if %eval(&SYSERR>0) %then %goto err7 ; %* RESOLUTION NOTES: %%total%i will resolve to the number of values for the i-th value in the DVAR variable %%cvar%i%k will resolve to the k-th value of the i-th variable passed in the DVAR variable. %%nvar%i%k will resolve to the k-th value of the i-th variable passed in the DVAR variable. The value will have the corresponding i-th prefix added. ; %* Test to make sure none of the new variable names will be longer than 8 characters - note interaction terms not tested.; %do k = 1 %to &&total&i ; %if %length(&&nvar&i&k) > 8 %then %goto err2 ; %end ; %end ; %* Generate an output data set with the Dummy variables. Generate the DVARS and INTERS global macro variables.; %* RESOLUTION NOTES - See above. ; data &out ; set &data ; %* Flag to determine if all drop variables exist in data ; %let ndrop = 0 ; %* Generate all of the dvar variables. &n = # of dvar parameters ; %do i= 1 %to &n ; %* Do for each value of DVAR variable &&total&i = number of values ; %do k= 1 %to &&total&i ; %* Generate the &dvars global variable ; %let addit = 0 ; %do j = 1 %to &nd ; %if &&nvar&i&k = %scan(&drop,&j,%str( )) %then %do ; %let addit = 1 ; %let ndrop = %eval(&ndrop+1) ; %end ; %end ; %if &addit = 0 %then %do ; %let dvars = &dvars &&nvar&i&k ; %* Make new variable 1/0. Note the upcase, and left will do an implicit Character conversion to numeric variables ; if upcase(left(&&dvar&i)) = "&&cvar&i&k" then &&nvar&i&k = 1 ; else &&nvar&i&k = 0 ; %end ; %end ; %end ; %* Generate all of the inter variables. Sorry about all the nested if and do statements. In the next version I will see if I can get rid of a few ; %if %quote(&inter) ^= %quote( ) %then %do ; %* do for each interaction term ; %do l = 1 %to &ni ; %* Get each variable from interaction term ; %let i1 = %scan(%scan(&inter,&l,%str( )),1,*) ; %let i2 = %scan(%scan(&inter,&l,%str( )),2,*) ; %* For each dvar test if it is part of the interaction term. Only the first term in the interaction is used ; %do i= 1 %to &n ; %if &i1 = &&dvar&i %then %do ; %* Test for each value of the first dvar variable ; %do k = 1 %to &&total&i ; %* check if for a match on the second interaction var. RESOLUTION NOTES: %%total%m is the total number of values in the second interaction variable %%dvar&m is the second interaction variable. %%nvar&m&v is the value (with prefix) of the second interaction variable ; %do m= 1 %to &n ; %if &i2 = &&dvar&m %then %do ; %* Do over each value of the second interaction var; %do v = 1 %to &&total&m ; %* Check if either of the interaction terms has been dropped, mark it for the &inters global macro variable ; %let addit = 0 ; %do fi1 = 1 %to &nd ; %if &&nvar&i&k = %scan(&drop,&fi1,%str( )) %then %let addit = 1 ; %end ; %do fi1 = 1 %to &nd ; %if &&nvar&m&v = %scan(&drop,&fi1,%str( )) %then %let addit = 1 ; %end ; %* finally build the inters macro variable and generate interaction dummy vars. ; %if &addit = 0 %then %do ; &&nvar&i&k.&&nvar&m&v = &&nvar&i&k * &&nvar&m&v ; %let inters = &inters &&nvar&i&k.&&nvar&m&v ; %end ; %end ; %end ; %end ; %end ; %end ; %end ; %end ; %end ; run; %goto exit ; %err1: options notes ; %put ERROR: Missing at least one option (data, dvar, prefix) ; %put ; %goto fini ; %err2: options notes ; %put ERROR: Combined length of the var %upcase(&dvar) and the prefix %upcase(&prefix) is greater than 8 ; %put ; %goto fini ; %err3: options notes ; %put ERROR: Some of the values of &DVAR are missing or periods ; %put ; %goto fini ; %err4: options notes ; %put ERROR: Number of DVAR variables does not match the number of PREFIX values; %put ; %goto fini ; %err5: options notes; %put ERROR: At least one of your interaction variables does not match the dvar list ; %put ERROR: Check for spelling, and make sure there is no spaces around the '*' in the interaction term ; %put ; %goto fini ; %err6: options notes; %put ERROR: There was an error while calculating the number of values. ; %put ERROR: This is generally caused by an incorrect variable name. ; %put ; %goto fini ; %err7: options notes: %put ERROR: There was an error in generating the dummy variable names ; %put ; %goto fini ; %err8: options notes ; %put ERROR: The total number of values in &dvar is than more 500. ; %put ; %goto fini ; %exit: options nomprint notes ; %*if at least one dropped variable value does not exist on the data then warn the user ; %if &drop^= & &ndrop ^= &nd %then %put WARNING: At least one of the drop="&drop" values does not exist in the data ; %put Variable Information: ; %do i = 1 %to &n ; %* put a note in the log telling the user the number of classes in each DVAR variable ; %put &&dvar&i has &&total&i values. ; %end ; %* Tell the user which variables where dropped. Note that this currently includes any variables that do not exist ; %if &drop ^= %then %do ; %put ; %put &drop dummy variables dropped ; %end ; %* Count the number of variables in the &dvars global ; %let n= 1 ; %let word=%qscan(&dvars,&n,%str( )) ; %do %while(&word^= ) ; %let n=%eval(&n+1) ; %let word=%qscan(&dvars,&n,%str( )) ; %end ; %put ; %put %eval(&n-1) %NRSTR(%(&DVARS%)) dummy variables generated. ; %* Count the number of variables in &inters global ; %if &inters ^= %then %do ; %let n = 1 ; %let word=%qscan(&inters,&n,%str( )) ; %do %while(&word^= ) ; %let n=%eval(&n+1) ; %let word=%qscan(&inters,&n,%str( )) ; %end ; %put%eval(&n-1) %nrstr(%(&INTERS%)) interaction variables generated. ; %end ; %put ; %fini: ; %mend ; /** Example 1: *** Set up test data set for dummy variable creation ; data test ; input t1 $ t2 $ ; cards ; a g h t ; run; *** Run the macro with the multiple variables option ; _dumvar data=test out=temp dvar='t1 t2' prefix='t1 t2' drop='t1a t1h t2g' ; proc print data=temp ; var t1 t2 &dvars ; run; ** Test the macro with the single variable options ; _dumvar data=test out=temp dvar=t1 prefix=t1 drop=t1a ; proc print data=temp ; run; %put &dvars ; Example 2: ** This example provided by Julie Horrocks **; data acute(drop=mis sex); set dsd.acute(drop=phin91 ptid urbquint); mis=adacut+sex+agegrp+wpg+hosp+hospgrp; if mis=. then delete; if revw="." then delete; if sex=1 then female=0; if sex=2 then female=1; run; _dumvar data=acute dvar="female agegrp hospgrp revw" inter="female*agegrp" prefix="f age hspgrp rev" drop="f0 age1 hspgrp1 revDB"; ** Note the usage of the global vars here ; proc logistic DESCENDING data=acute; model adacut= wpg &dvars &inters /lackfit ; title "GLM model - hspgrp"; title2 "Baseline - male, non-wpg, agegrp1, hspgrp1, revwDB"; output out=mod1 pred=pred; run; ***/