/************************************************************************************ * program: char2fmt.sas * Programmer: Dan Blanchette dan_blanchette@unc.edu * The Carolina Population Center at The University of North Carolina at Chapel Hill * Developed at: * The Carolina Population Center at The University of North Carolina at Chapel Hill, * Research Computing, UNC-CH, and * Center for Entrepreneurship and Innovation Duke University's Fuqua School of Business * date: 20Aug2005 ** Modified: 09Mar2009 - renamed file to char2fmt.sas * - added varlist option ** Modified: 18Mar2008 - set local macro vars to be local. * - made &dset. be the syslast dataset. * - variable labels were not being kept for new numeric vars so * that is now fixed. quotes and macro vars inside new var labels * are also okay * - fixed test of invalid formats since the varname may not always * be in lowercase the test now does: lowcase(name). ** Modified: 26Sep2007 - fixed &sysver to be &isysver. since &sysver almost * always contains a decimal value it gets treated as text which SAS * always considers to be greater than any number. * * Disclaimer: This program is free to use and to distribute as long as credit is given to * Dan Blanchette * The Carolina Population Center * University of North Carolina at Chapel Hill * * There is no warranty on this software either expressed or implied. This program * is released under the terms and conditions of GNU General Public License. * * comments: macro to encode character variables to numeric using * user-defined formats to store the character string. * - the format name will be the variable name so any format already * existing with the same name will be over written. * - variables with names that end in numbers are not valid format * names so these formats will be changed to end with an underscore * - sort order of data set will be maintained thus if data set is known * to be sorted by one or more character variables those variables * will not be converted. * * * WARNING: This changes your data set by changing some/all character variables * to numeric variables. Make a back-up copy of your data set * before runing CHAR2FMT. * ************************************************************************************ * Options: ************************************************************************************ * dset= enter name of data set (no libref) in the WORK library in which variables are to be encoded. * * maxlen= set to what maximum length the character variable can be and not * be converted. Not setting maxlen will convert all character variables. * * temp_dir= enter directory name you would like have the file _&tfns_longcharvar.sas * created in if you want to see the code generated by CHAR2FMT, * otherwise the work directory will be used and _&tfns_longcharvar.sas * will be deleted when your SAS session is over. * * varlist= list of variables to be encoded to numeric variables. NOTE: maxlen= option * will overide this if both are specfied and variables specified in varlist * are not longer than what is specified in maxlen. * * tfns= is for a file name of your choice. If not set, it will be based on * the SAS &SYSJOBID and &SYSINDEX macro variables. * ************************************************************************************/ %macro char2fmt(dset= , maxlen= , temp_dir= , varlist= , tfns= ); %** local macro vars used by char2fmt: *; %local csortedby hasvars i isysver maxfilewidth maxvarlength notes obs savastata_err toolong temp_dir vallen varlist; %** Save option settings so they can be restored at the end of this macro. **; %let notes=%sysfunc(getoption(notes)); %let obs=%sysfunc(getoption(obs)); options obs=MAX; %*** Reason for maximizing it is because user could have **; %* set it lower than the number of variables in the dataset. **; options nonotes; %*** Shut off notes while program is running in order to reduce log size. **; %** make sysver an integer so it can be properly evaluated **; %let isysver = %sysevalf(&sysver.,integer); %if &isysver >=9 %then %do; options noquotelenmax ;; %end; proc contents data=work.&dset. %if &varlist. ^= %then %do; (keep= &varlist. ) %end; out=_conten2 noprint; run; %let toolong=0; %let maxfilewidth=32300; %** as wide as I am willing to make file **; %let maxvarlength=32200; %** this needs to be a little smaller than maxfilewidth **; %let hasvars=0; %** initialize hasvars **; data _conten2; set _conten2; where type=2 %if "&maxlen." ^= "" %then %do; and length > &maxlen. %end; ;;; call symput('hasvars',_n_); run; %** if hasvars then do rest of program **; %if (&hasvars. > 0) %then %do; %if "%nrbquote(&temp_dir)" = "" %then %let temp_dir = %sysfunc(pathname(work)); %if "&tfns" = "" %then %let tfns= &sysjobid.&sysindex.; %let csortedby =; data _null_; dsid=open("work.&dset","i"); csortedby=lowcase(attrc(dsid,"SORTEDBY")); call symput("csortedby",csortedby); rc=close(dsid); run; %if %length(&csortedby.) = 0 %then %do; data work.&dset.; set work.&dset.; ___ob___ = _n_; %** create var to preserve sort order **; run; %let csortedby =___ob___; %end; ** create formats from long character data **; data _null_; length label $300; %** increase length for single quotes normal max length is 256 for a var label **; set _conten2 ; where lowcase(name) not in(%sortvars(&csortedby.)); %** end of where statement, do not want to convert vars needed for sort order *; file "%nrbquote(&temp_dir.)/_&tfns._longcharvars.sas" ls=&maxfilewidth; name=left(name); if length(name)> &maxvarlength. then do; name=substr(name,1,&maxvarlength.); call symput('toolong',1); end; put " "; put " proc sort data=&dset;"; put " by " name ";"; put " run;"; put " "; put " data _conten2;"; put " set &dset.(keep= " name ");"; put " by " name ";"; put " if first." name ";"; put " run;"; put " "; put " data _conten2;"; put " set _conten2 (where=(" name "^=''));"; invalidf=0; %let vallen=32; %if &isysver. < 9 %then %let vallen=8; %** check that variable name does not end in a number and if so then add "_" to name **; %do i=0 %to 9; if substr(compress(name),length(compress(name)),1)="&i." or lowcase(name) in(%invalid_formats) then do; invalidf=1; if length(name) < &vallen. then name_f = compress(name || "_"); %** add an underscore and period **; else if length(name) = &vallen. or lowcase(name) in(%invalid_formats) then %** overwrite last character with underscore and add period **; name_f = compress(substr(compress(name),1,length(compress(name))-1) || "_"); end; %end; if invalidf=1 then do; put " fmtname=compress(""" name_f """);"; name_p = compress(name_f || "."); end; else do; %** if no change required **; put " fmtname=compress(""" name """);"; name_p = compress(name || "."); end; put " start=_n_;"; put " ___st___=_n_;"; put " end=_n_;"; put " label=" name ";"; %** SAS versions 8+ have the same max length for vars and formats *; put " run;"; put " "; put " proc format library=work cntlin=_conten2(keep=fmtname start end label);"; put " run;"; put " "; put " data &dset.(drop= " name " rename=(___st___=" name "));"; put " merge &dset. _conten2(keep=" name " ___st___);"; put " by " name ";"; if index(label,"'") then label=tranwrd(label,"'","''"); %** subinstr single quote with two single quotes **; put " label ___st___ = '" label "';"; put " run;"; put " "; put " data &dset.;"; put " set &dset.;"; put " format " name " " name_p ";"; put " run;"; put " "; run; %include"&temp_dir./_&tfns._longcharvars.sas"; %** return data to original sort oder **; proc sort data=&dset.; by &csortedby.; run; %if "&csortedby" = "___ob___" %then %do; data &dset; set &dset (drop=___ob___); run; %end; %end; %** of if hasvars to process **; %if &toolong=1 %then %do; %put WARNING: CHAR2FMT truncated at least one character variable because it was longer than &maxvarlength. *; %end; %goto done; %fail1:; %put ERROR: CHAR2FMT did not run because you have a least one character variable that is longer than &maxvarlength. *; %let savastata_err=1; %goto done; %done:; proc datasets nodetails nolist nowarn; delete _conten2; run; quit; %let syslast=&dset.; options obs=&obs. ¬es.; %** Restore options. **; %mend char2fmt; %macro invalid_formats; "best" , "binary" , "comma" , "commax" , "d" , "date" , "datetime" "dateampm" , "day" , "ddmmyy" , "dollar" , "dollarx" , "downame" , "e" "eurdfdd" , "eurdfde" , "eurdfdn" , "eurdfdt" , "eurdfdwn" , "eurdfmn" , "eurdfmy" "eurdfwdx" , "eurdfwkx" , "float" , "fract" , "hex" , "hhmm" , "hour" "ib" , "ibr" , "ieee" , "julday" , "julian" , "percent" , "minguo" "mmddyy" , "mmss" , "mmyy" , "monname" , "month" , "monyy" , "negparen" "nengo" , "numx" , "octal" , "pd" , "pdjulg" , "pdjuli" , "pib" "pibr" , "pk" , "pvalue" , "qtr" , "qtrr" , "rb" , "roman" "s370ff" , "s370fib" , "s370fibu" , "s370fpd" , "s370fpdu" , "s370fpib" , "s370frb" "s370fzd" , "s370fzdl" , "s370fzds" , "s370fzdt" , "s370fzdu" , "ssn" , "time" "timeampm" , "tod" , "weekdate" , "weekdatx" , "weekday" , "worddate" , "worddatx" "wordf" , "words" , "year" , "yen" , "yymm" , "yymmdd" , "yymon" "yyq" , "yyqr" , "z" , "zd" , "f" %mend invalid_formats; %macro sortvars (varlist) ; %* local macro vars used by sortvars: *; %local i j; %let i=1; %do %while(%length(%scan(%cmpres(&varlist.),&i.,%str( ))) GT 0); %local var&i.; %let var&i.= %scan(%cmpres(&varlist.),&i.,%str( )); %let i=%eval(&i. + 1); %end; %do j=1 %to %eval(&i.-1); %sysfunc(compress(%str(%")&&var&j..%str(%"))) %end; %mend sortvars ;