* SAS code for Gini coefficient calculations; proc format; value grpfmt 1='0-10 %ile' 2='10-20 %ile' 3='20-30 %ile' 4='30-40 %ile' 5='40-50 %ile' 6='50-60 %ile' 7='60-70 %ile' 8='70-80 %ile' 9='80-90 %ile' 10='90-95 %ile' 11='95-98 %ile' 12='98-99 %ile' 13='99-100 %ile'; run; * Simulated cost data: 1000 individuals with the following breakdown: ; * 600 with a mean cost of $100 ; * 300 with a mean cost of $500 ; * 100 with a mean cost of $2000 ; * This simulates a commonly observed pattern of health * expenditures: most people low, and a small group very high; data person; do i=1 to 1000; if i<=600 then avgcost=(rannor(128)*20)+100; else if i<=900 then avgcost=(rannor(128)*100)+500; else avgcost=(rannor(128)*400)+2000; output; end; drop i; run; * Sort sample by increasing avgcost; proc sort data=person; by avgcost; run; * Assign GROUP based on the expenditure percentile each person; * falls into. These are deciles up to 90%, and smaller groups; * after that (the curve can become quite steep at the highest ; * percentile values, so more points are used for a better ; * approximation); data pctile2; set person nobs=nobs; by avgcost; if _N_=1 then count=0; count+1; frac=(count/nobs); if frac<=.1 then group=1; else if frac<=.2 then group=2; else if frac<=.3 then group=3; else if frac<=.4 then group=4; else if frac<=.5 then group=5; else if frac<=.6 then group=6; else if frac<=.7 then group=7; else if frac<=.8 then group=8; else if frac<=.9 then group=9; else if frac<=.95 then group=10; else if frac<=.98 then group=11; else if frac<=.99 then group=12; else group=13; run; proc freq data=pctile2 noprint; tables group / out=outpop; format group grpfmt.; run; proc freq data=pctile2 noprint; tables group / missing out=outgin; weight avgcost; format group grpfmt.; run; data cumpct; merge outpop(in=a keep=group percent rename=(percent=pctpop)) outgin(in=b keep=group percent rename=(percent=pctcost)); by group; if a; run; data cumpct; set cumpct; if _n_=1 then cumcost=0; cumcost+(pctcost/100); cumpop+(pctpop/100); label cumpop='Cumulative percentage of population' cumcost='Cumulative percentage of total cost'; run; proc print data=cumpct label; var group cumpop cumcost; format cumcost cumpop percent7.2; title1 'Percent of population and total cost by percentile: AVGCOST'; run; * Plot Lorenz curve; symbol1 i=j value=none line=1; proc gplot data=cumpct; plot cumcost*cumpop; format cumcost cumpop percent7.0; title1 'Lorenz curve for simulated costs'; run; * Calculate Gini using individual-level data.; * This requires data to be sorted by the analysis variable; * in ascending order; data gini; set person end=eof nobs=nobs; by avgcost; retain ginisum 0 sum 0 ipt 0; ipt=ipt+1; sum=sum+avgcost; ginisum=ginisum+(2*ipt-(nobs+1))*avgcost; if eof then do; gini=ginisum/((nobs-1)*sum); output; end; run; proc print data=gini; var gini; title1 'Gini coefficient'; title2 'Dixon et al (Ecology 69, p. 1307, 1988.)'; run;