668 ************************************************* 669 * file = manip_if.sas * 670 * The SAS program in this file creates new * 671 * variables using IF-THEN statements and several* 672 * several assignment statements. * 673 * The program assumes that the data set "htwt" * 674 * has been created. * 675 *************************************************; 676 677 options linesize=min; 678 679 *------------------------------------------------* 680 * Create 2 grouping formats, one character, one * 681 * numeric. Also create 2 labelling formats, * 682 * both of which must be character because the * 683 * grouping format's new values are character. * 684 * All 4 formats are used later in the program. * 685 *------------------------------------------------*; 686 687 /* The values on the right of "=" have been 688 enclosed in quotes so that when they are used 689 to create new variables the new variables will 690 be character (and take up less space) */ 691 692 proc format; 693 value $namef /* requires "$" because values 694 on left of "=" are character */ 695 'Elizabeth','David','James' = '1' 696 other = '0'; NOTE: Format $NAMEF is already on the library. NOTE: Format $NAMEF has been output. 697 698 value agefmt /* does not require "$" because 699 values on left are numeric */ 700 0-29 = '1' 701 30-39 = '2' 702 40-49 = '3' 703 50-high = '4'; NOTE: Format AGEFMT is already on the library. NOTE: Format AGEFMT has been output. 704 705 value $namel '1' = '3 names' 706 '0' = 'all other names'; NOTE: Format $NAMEL is already on the library. NOTE: Format $NAMEL has been output. 707 708 value namel 1 = '3 names' 709 0 = 'all other names'; NOTE: Format NAMEL is already on the library. NOTE: Format NAMEL has been output. 710 711 value $agelbl '1' = '1: 0 to 29 yrs' 712 '2' = '2: 30 to 39 yrs' 713 '3' = '3: 40 to 49 yrs' 714 '4' = '4: 50+ years old'; NOTE: Format $AGELBL is already on the library. NOTE: Format $AGELBL has been output. 715 run; NOTE: PROCEDURE FORMAT used: real time 0.10 seconds 716 717 ****************************************** 718 * Create a new temporary SAS data set, * 719 * same name, to add new variables * 720 ******************************************; 721 722 data htwt; 723 set htwt; 724 725 *------------------------------------------------* 726 * 1. Create dichotomous variables by referencing * 727 * one category of values of one variable. * 728 *------------------------------------------------*; 729 730 /* The original variable is "age" and the new 731 variables are "age2grp" and "age2grpx". Both 732 new variables have identical values ("1" and "0") 733 except the 1st approach results in a character 734 variable and the 2nd approach results in a 735 numeric value. */ 736 737 if age<50 then age2grp='1'; /* using IF/THEN */ 738 else age2grp='0'; 739 740 age2grpx=(age<50); /* assignment statement */ 741 742 *------------------------------------------------* 743 * 2. Create dichotomous variable by referencing * 744 * multiple values of one variable. * 745 *------------------------------------------------*; 746 747 /* The original variable is "name" and the new 748 variables are "newname" and "newnamex". All 3 749 new variables have identical values ("1" and "0") 750 which differ re numeric vs character */ 751 752 if name in ('Elizabeth','David','James') 753 then newname='1'; 754 else newname='0'; /* new variable is character 755 - use character format to label*/ 756 757 /* new variable is numeric 758 - use numeric format to label */ 759 newnamex=(name in ('Elizabeth','David','James')); 760 761 newnamey=put(name,$namef.);/* new variable is character */ 762 763 /* If the series of values were numeric, no quotes 764 would be used, e.g., if region in (1,5,9)... */ 765 766 *-----------------------------------------------------* 767 * 3. Create 2 multi-value variables called "agegroup" * 768 * and "agegrpx", referencing ranges of values of * 769 * one variable ("age"). * 770 *-----------------------------------------------------*; 771 772 if 0<=age<=29 then agegroup='1'; 773 else if 30<=age<=39 then agegroup='2'; 774 else if 40<=age<=49 then agegroup='3'; 775 else if age>49 then agegroup='4'; 776 777 agegrpx=put(age,agefmt.); /* use the agefmt format */ 778 779 /* label one of the new variables */ 780 label agegrpx = 'Age grouped into 4 categories'; 781 782 run; NOTE: The data set WORK.HTWT has 18 observations and 12 variables. NOTE: DATA statement used: real time 0.27 seconds 783 784 785 proc freq data=htwt; 786 tables age * age2grp * age2grpx /list missing; 787 tables name * newname * newnamex * newnamey/list missing; 788 tables age * agegroup * agegrpx /list missing; 789 790 /* add labels to the values of the new variables */ 791 format newname newnamey $namel. newnamex namel. 792 agegroup agegrpx $agelbl.; 793 title1 'The height/weight data set'; 794 title2 'Check new variables against original variables'; 795 run; NOTE: PROCEDURE FREQ used: real time 0.59 seconds 796 797 proc contents data=htwt; 798 title2; /* remove 2nd title for remaining procs */ 799 run; NOTE: PROCEDURE CONTENTS used: real time 0.16 seconds 800 801 proc print data=htwt (obs=10); 802 run; NOTE: PROCEDURE PRINT used: real time 0.04 seconds