/**
  This is a quick and simple bootstrap program
  for calculating confidence intervals around mean and median.
  The jackknife/bootstrap macros from SAS provides more capability and
  provides the ability to do jackknife analysis as well, it is also
  more confusing to use.

  This code was developed from several sources - mostly descriptions of
  the bootstrap method found on the WWW.

  Charles Burchill, October 13, 2000

Macro Call 
   _boot  ;

Options
data=   Input data set, this is required. 

repeat= Number of iterations to sample for boot strap.  Default is 50 

var=  Variable of interest for median/mean using boot strap

ci= Confidence interval (interger 1-100) default is 95


Output 
  Three printed tables: 1. Standard mean and CI for var.
2. Standard method bootstrap mean, median, CI for var
3. Bias reduced method bootstrap mean, median, CI for var.

The following datasets and macro variables are used internally
and not indicated in macro call.
   &i - iterations for bootstrap samples
   &upper, &lower - Points for selecting upper and lower bounds
   _test - itermediate data in bootstrap
   _itest - intermediate data in bootstrap
   final - bootstrap sampled database
   final2 - values for mean, median, CIs (standard method)
   final3 - values for mean, median, CIs (bias reduced method)
   btest  - Mean, Median for whole data set - not bootstraped
   btest2 - St. Deviation for whole dataset.

NOTE:
   This macro will read in the data set provided (only 1 variable though)
the number of times defined in repeat and run a proc univariate 
the same number of times.  This means for very large datasets
this can be a slow procedure.

   The number of iterations (repeat=) of sampling done is limited to 
less than 2000.  According to some literature you should not
need to run with more than 250 repeated samples.  Many other
sources suggest an limit of 1000 samples should be adequate.

   The macro is currently limited to median/mean calculations only but it
can be easily modified for other applications.  I would recommend
using the SAS jack/boot macro group instead because of the 
flexability.

Comment on another method:

KC Carrier has suggested the following technique for 
getting the confidence intervals of the median 
without using a bootstrap method.  Carolyn Decoster
has a nice writeup of the method.

The rank order position in the data set of the 
upper/lower bounds is given by:
Lower = floor(((nobs+1)/2) - ((nobs)**0.5)/2*1.96) ;
Upper = ceil(((nobs+1)/2) + ((nobs)**0.5)/2*1.96) ;

This gives you the record in the sorted database that
represents the upper and lower bound of the
confidence interval.  The following code
can be used to get the bounds:

*** get median and number of records ;
proc univariate data=testboot  noprint ;
      var los ;
      output out=itest  median=median n=n  ;
      run ;
*** Calculate upper and lower record numbers (bounds) ;
data itest ;
   set itest ;
   z = probit(0.05/2) ;  *** set alpha here, divide by 2 for two tailed test ;
   upper = ceil(((n+1)/2)+((n)**0.5)/2*z) ; 
   lower = floor(((n+1)/2)-((n)**0.5)/2*z) ;
   run;
*** Sort by the variable you are getting the median value for ;
proc sort data=testboot ;
   by los ;
   run;
*** Find the value of the variable for the record representing the bound ;
data test ;
   if _n_ = 1 then set itest ;
   set testboot ;
   if _n_ = lower then do ; ci='Lower' ; output ; end ;
   if _n_ = upper then do ; ci='Upper' ; output ; end  ;
   run;
*** print it out. ;
proc print data=test ;
   var ci los median ;
   run;

From testing both methods (KC, Bootstrap) they produce similar
results.  The mehtod KC suggests is far less computer
(resource) intensive.

**** Create sample data set to test code ;
libname dsd1 '/dsd1/burchil';

data dsd1.testboot ;
    set cpe.hsp9798l(keep=los where=(1<=los<=30) ) ;
    run;

_boot data=dsd1.testboot repeat=5000 var=los ci=90 ;
**/

*** The following macro randomly samples (with replacement) a data set the
    size of the  original and does a proc univariate to get the mean
    and median on the data ; 
%macro _boot(data=, repeat=50, var=, ci=95, debug= ) / stmt ;

%put Simple Boot Strap Macro for Calculating Median Confidence Intervals ;
%put Charles A. Burchill, Manitoba Centre for Health Policy and Evaluation ;
%put $Id: _boot.sas,v 1.1 2000/10/14 00:03:18 burchil Exp burchil $ ;

%if &debug = 1 %then %let debug=debug ;
%if &debug ^= debug %then options nonotes nomprint ;
%else options notes mprint ; ;

%* The differnce from 100 of the upper and lower bound used in the 
   proc univariate procedure is only 1/2 the alpha removed
   from the top and bottom ;
%let low=%sysevalf((100-&ci)/2) ;
%let upper=%sysevalf(100-&low) ;
%let ci = %sysevalf(&ci/100) ;

%* If request is over 2000 iterations bail out ;
%if %eval(&repeat>2000) %then %goto out0 ;

%*** Run a means to provide some base information for the variable ;
proc means data=&data mean clm alpha=%sysevalf(1-&ci) max min n ;
   var &var ;
   title "Mean and Confidence Interval (%sysevalf(&ci*100) %) using Proc Means for &var" ;
   run;

%** delete any existing final dataset ;
proc datasets nolist ;
   delete final ;
   run;

%*** Create random samples with replacement of provided data set ;
%do i = 1 %to &repeat ;
   data _test ;
      _choice = int(ranuni(500+&i)*n) + 1; 
      set &data(keep=&var) point=_choice nobs=n ; 
          * keep forces only variable used in boot strap to be kept - limit size ;
      j+1 ;
      if j > n then stop ;
      run;

   %if %eval(&SYSERR>0) %then %goto out1;

   %*** Procedure that you want to boot strap - in this case mean and median values. ;
   proc univariate data=_test  noprint ;
      var &var ;
      output out=_itest mean=l_Mean median=l_median ;
      run ;

   %if %eval(&SYSERR>0) %then %goto out1;

   %*** Build new dataset based on output from above procedure ;
   proc append base=final data=_itest ;

   %end ;

  ** Get the 95% confidence interval around the mean - standard method using percent points i
n boot strapped data ;
  proc univariate  data=final noprint ; ;
     var l_mean l_median ;
     output out=final2 pctlpts=&low &upper pctlpre=Mean_ Med_  ;
     run;

  *** Get mean and median of variable for reporting and bias reduced test below ;
  proc univariate data=&data  noprint ;
      var &var ;
      output out=btest mean=b_Mean median=b_median ;
      run ;

   %if %eval(&SYSERR>0) %then %goto out1;

  data final2 ;
    merge btest final2 ;
    label b_mean="Mean &var"
          b_median="Median &var" ;
    run;

  proc print data=final2 label ;
     title1 'Standard upper and lower confidence intervals for Mean and Median using Bootstra
p' ;
     title2 "Data = &data Variable=&var CI=%sysevalf(&ci*100)";;
     run;  

  *** Calculate the sample bias-reduced estimates 
    see:  http://www.dmstat1.com/bootstrap.html, and T method refered to in SAS bootstrap mac
ro ; 
  *** See following references for variations 
  E&T     Efron, B. and Tibshirani, R.J. (1993), An Introduction to the
        Bootstrap, New York: Chapman & Hall.

  S&T     Shao, J. and Tu, D. (1995), The Jackknife and Bootstrap, New York:
        Springer-Verlag. ;
  data final3 ;
    if _n_ = 1 then set btest ;
    set final ; ;
    s_mean = 2*b_mean - l_mean ;
    s_median = 2*b_median - l_median ;
    run;

  proc univariate data=final3 noprint ;
    var s_mean s_median ;
    output out=btest2 std=s_mean s_median ;
    run;

  data final3(drop=s_mean s_median alpha pro) ;
    merge btest btest2 ;
    alpha = (1-&ci)/2 ;  *** Set probability based on confidence interval e.g. (0.95)  ;
    pro = probit(alpha) ;   *** pro is the probability statistic based on the given alpha ;
    bl_mn = b_mean+pro*s_mean ;
    bu_mn = b_mean-pro*s_mean ;
    bl_md = b_median+pro*s_median ;
    bu_md = b_median-pro*s_median ;
    label b_mean="Mean &var "
          b_median="Median &var" 
          bl_mn = "Lower Mean" 
          bu_mn = "Upper Mean" 
          bl_md = "Lower Median" 
          bu_md = "Upper Median" ;

    run;

  proc print label ;
    title1 'Bias Reduced upper and lower confidence intervals for Mean and Median using Boots
trap' ;
    title2 "Data = &data Variable=&var CI=%sysevalf(&ci*100)";
    run;


  %goto exit;  
  %out0:
     option notes ;
     %put WARNING: This is an iterative procedure that will duplicate the data set &repeat ti
mes ;
     %put WARNING: There is a limit of 2000 iterations, generally boot strap methods need no 
more than 500 iterations ;
     %put WARNING: The macro did not execute ;
     %goto exit ;
  %out1:
      options notes ;
     %put WARNING: There was an ERROR in the boot macro. ;
     %put WARNING: The macro did not execute. ;
     %goto exit ;
  %exit:
   options notes ;
   title1 ;
   title2 ;

%mend ;

©2003 Manitoba Centre for Health Policy (MCHP)