/************************************************************************************ * program: char2fmt.mac * date: 20Aug2005 * moddate: 06Oct2005 * programmer: Dan Blanchette * comments: macro to encode character variables to numeric using * user-defined formats to store the character string. * - the format name will be the variable name so any format already * existing with the same name will be over written. * - variables with names that end in numbers are not valid format * names so these formats will be changed to end with an underscore * - sort order of data set will be maintained thus if data set is known * to be sorted by one or more character variables those variables * will not be converted. * - there is no option for specifying a varlist. * * * WARNING: This changes your data set by changing some/all character variables * to numeric variables. Make a back-up copy of your data set * before runing CHAR2FMT. * ************************************************************************************ * Options: ************************************************************************************ * dset= enter name of data set to be modified. Include libref if you want * to modify a non-work data set. * * maxlen= set to what maximum length the character variable can be and not * be converted. Not setting maxlen will convert all character variables. * * temp_dir= enter directory name you would like have the file _&tfns_longcharvar.sas * created in if you want to see the code generated by CHAR2FMT, * otherwise the work directory will be used and _&tfns_longcharvar.sas * will be deleted when your SAS session is over. * * tfns= is for a file name of your choice. If no set, it will be based on * the SAS &SYSJOBID and &SYSINDEX macro variables. * ************************************************************************************/ %macro char2fmt(dset= , maxlen= , temp_dir= , tfns= ); ** Save option settings so they can be restored at the end of this macro. **; %let notes=%sysfunc(getoption(notes)); %let obs=%sysfunc(getoption(obs)); options obs=MAX; *** Reason for maximizing it is because user could have * set it lower than the number of variables in the dataset. **; options nonotes; *** Shut off notes while program is running in order to reduce log size. **; %if &sysver >=9 %then %do; options noquotelenmax ;; %end; proc contents data=work.&dset. out=_conten2 noprint; run; %let toolong=0; %let maxfilewidth=32300; ** as wide as I am willing to make file **; %let maxvarlength=32200; ** this needs to be a little smaller than maxfilewidth **; %let hasvars=0; ** initialize hasvars **; data _conten2; set _conten2; where type=2 %if "&maxlen." ^= "" %then %do; and length > &maxlen. %end; ;;; call symput('hasvars',_n_); run; ** if hasvars then do rest of program **; %if (&hasvars. > 0) %then %do; %if "%nrbquote(&temp_dir)"="" %then %let temp_dir = %sysfunc(pathname(work)); %if "&tfns"="" %then %let tfns= &sysjobid.&sysindex.; %let csortedby =; data _null_; dsid=open("work.&dset","i"); csortedby=lowcase(attrc(dsid,"SORTEDBY")); call symput("csortedby",csortedby); rc=close(dsid); run; %if %length(&csortedby.) = 0 %then %do; data work.&dset.; set work.&dset.; ___ob___ = _n_; ** create var to preserve sort order **; run; %let csortedby =___ob___; %end; ** create formats from long character data **; data _null_; set _conten2 ; where lowcase(name) not in(%sortvars(&csortedby.)); ** end of where statement, do not want to convert vars needed for sort order *; file "%nrbquote(&temp_dir.)_&tfns._longcharvars.sas" ls=&maxfilewidth; name=left(name); if length(name)> &maxvarlength. then do; name=substr(name,1,&maxvarlength.); call symput('toolong',1); end; put " "; put " proc sort data=&dset;"; put " by " name ";"; put " run;"; put " "; put " data _conten2;"; put " set &dset.(keep= " name ");"; put " by " name ";"; put " if first." name ";"; put " run;"; put " "; put " data _conten2;"; put " set _conten2;"; invalidf=0; %let vallen=32; %if &sysver. < 9 %then %let vallen=8; ** check that variable name does not end in a number and if so then add "_" to name **; %do i=0 %to 9; if substr(compress(name),length(compress(name)),1)="&i." or name in(%invalid_formats) then do; invalidf=1; if length(name) < &vallen. then /** add an underscore and period **/ name_f = compress(name || "_"); else if length(name) = &vallen. or name in(%invalid_formats) then /** overwrite last character with underscore and add period **/ name_f = compress(substr(compress(name),1,length(compress(name))-1) || "_"); end; %end; if invalidf=1 then do; put " fmtname=compress(""" name_f """);"; name_p = compress(name_f || "."); end; else do; /** if no change required **/ put " fmtname=compress(""" name """);"; name_p = compress(name || "."); end; put " start=_n_;"; put " ___st___=_n_;"; put " end=_n_;"; put " label=" name ";"; ** SAS versions 8+ have the same max length for vars and formats *; put " run;"; put " "; put " proc format library=work cntlin=_conten2(keep=fmtname start end label);"; put " run;"; put " "; put " data &dset.(drop= " name " rename=(___st___=" name "));"; put " merge &dset. _conten2(keep=" name " ___st___);"; put " by " name ";"; put " label " name " = """%nrbquote(label)""";"; put " run;"; put " "; put " data &dset.;"; put " set &dset.;"; put " format " name " " name_p ";"; put " run;"; put " "; run; %include"&temp_dir._&tfns._longcharvars.sas"; ** return data to original sort oder **; proc sort data=&dset.; by &csortedby.; run; %if "&csortedby" = "___ob___" %then %do; data &dset; set &dset (drop=___ob___); run; %end; %end; ** of if hasvars to process **; %if &toolong=1 %then %do; %put WARNING: CHAR2FMT truncated at least one character variable because it was longer than &maxvarlength. *; %end; %goto done; %fail1:; %put ERROR: CHAR2FMT did not run because you have a least one character variable that is longer than &maxvarlength. *; %let savastata_err=1; %goto done; %done:; proc datasets nodetails nolist nowarn; delete _conten2; run; options obs=&obs. ¬es.; ** Restore options. **; %mend char2fmt; %macro invalid_formats; "best" , "binary" , "comma" , "commax" , "d" , "date" , "datetime" "dateampm" , "day" , "ddmmyy" , "dollar" , "dollarx" , "downame" , "e" "eurdfdd" , "eurdfde" , "eurdfdn" , "eurdfdt" , "eurdfdwn" , "eurdfmn" , "eurdfmy" "eurdfwdx" , "eurdfwkx" , "float" , "fract" , "hex" , "hhmm" , "hour" "ib" , "ibr" , "ieee" , "julday" , "julian" , "percent" , "minguo" "mmddyy" , "mmss" , "mmyy" , "monname" , "month" , "monyy" , "negparen" "nengo" , "numx" , "octal" , "pd" , "pdjulg" , "pdjuli" , "pib" "pibr" , "pk" , "pvalue" , "qtr" , "qtrr" , "rb" , "roman" "s370ff" , "s370fib" , "s370fibu" , "s370fpd" , "s370fpdu" , "s370fpib" , "s370frb" "s370fzd" , "s370fzdl" , "s370fzds" , "s370fzdt" , "s370fzdu" , "ssn" , "time" "timeampm" , "tod" , "weekdate" , "weekdatx" , "weekday" , "worddate" , "worddatx" "wordf" , "words" , "year" , "yen" , "yymm" , "yymmdd" , "yymon" "yyq" , "yyqr" , "z" , "zd" , "f" %mend invalid_formats; %macro sortvars (varlist) ; %let i=1; %do %while(%length(%scan(%cmpres(&varlist.),&i,%str( ))) GT 0); %global var&i.; %let var&i.= %scan(%cmpres(&varlist.),&i,%str( )); %let i=%eval(&i + 1); %end; %do j=1 %to %eval(&i-1); %sysfunc(compress(%str(%")&&var&j.%str(%"))) %end; %mend sortvars ;