cap prog drop scale_transformation prog def scale_transformation version 13 syntax, Type(integer) score1(varname numeric) score2(varname numeric) /// [COMPGroup(varname numeric) /// CONTrols(varlist numeric) controls1(varlist numeric) controls2(varlist numeric) Weights(varname numeric) /// ITERations(integer 1000) MAXOPTITERations(integer 25) BOUNDDown(integer -1500) BOUNDUp(integer 1500) /// SINGHMEThod(integer 1) MONOtonicity(integer 1) monofile(string) timeroff save(string) seed(integer -987654321) /// robust(integer 0)] ** Initial checks if inlist(`type',1,2,7)==1 & "`compgroup'"=="" { di "" di as err "Error: Variable compgroup should be specified if type is 1, 2 or 7." e } if "`compgroup'"!="" { if !mod(`compgroup',1)==0 | `compgroup'<0 { di "" di as err "Error: Variable compgroup should only take on integers greater or equal to 0" exit } if inlist(`type',3,4,5,6)==1 { di "" di as err "Error: Variable compgroup should NOT be specified if type is 3, 4, 5 or 6." e } } if inlist(`type',1,2,3,4,5,6,7)==0 { di "" di as err "Error: Type should be 1 and 2 for Gap Growth max and min; 3 and 4 for Correlation max and min; 5 and 6 for R-squared max and min; or 7 for Controls Explanation Index max, respectively." e } if inlist(`type',7)==1 & ("`controls1'"=="" & "`controls2'"=="") { di "" di as err "Error: At least one control variable needed (i.e. controls1 or controls2) for Controls Explanation max" e } if inlist(`type',3,4,5,6)==1 & ("`controls'"!="" | "`controls1'"!="" | "`controls2'"!="") { di "" di as err "Error: Control variables not allowed for Correlation or R-squared max and min" e } if inlist(`singhmethod',1,2)==0 { di "" di as err "Error: Singular H Method should be 1 for `"`"hybrid"'"' (recommended), a mixture of steepest descent and Newton, or 2 for modified Marquardt algorithm" e } if `iterations'<=0 { di "" di as err "Error: Iterations should be greater than 0" e } if inlist(`monotonicity',1,2,3)==0 { di "" di as err "Error: Monotonicity check should be 1 for Standard, 2 for Sample, or 3 for External" e } if `monotonicity'==3 & "`monofile'"=="" { di "" di as err " Error: Monotonicity filename should be specified if monotonicity check is to use external file" e } if `monotonicity'!=3 & "`monofile'"!="" { di "" di as err "Error: Monotonicity filename specified when monotonicity check is NOT set to use external file" e } if `seed'!=-987654321 & (`seed'<0 | `seed'>2147483647) { di "" di as err "Error: Seed should be an integer greater or equal to 0 and less or equal to 2,147,483,647." e } if ( `score1'<0 | (`score1'>1 & `score1'!=.) ) | ( `score2'<0 | (`score2'>1 & `score2'!=.) ) { qui sum `score1' local s1min = r(min) local s1max = r(max) qui sum `score2' local s2min = r(min) local s2max = r(max) local max = ceil(max(`s1max',`s2max')) local min = floor(min(`s1min',`s2min')) qui replace `score1' = (`score1'-`min') / (`max'-`min') qui replace `score2' = (`score2'-`min') / (`max'-`min') if ( `score1'<0 | (`score1'>1 & `score1'!=.) ) | ( `score2'<0 | (`score2'>1 & `score2'!=.) ) { di "" di as err "Error: Re-scaling of variables `score1' and `score2' to be between 0 and 1 did not work." "Please re-scale them manually." e } } if `robust'!=0 & (`robust'<20 | `robust'>300 | mod(`robust',2)==1) { di "" di as err "Error: Robust option should only contain even, positive integers greater or equal to 20 and less than 300." di as err "This additional number of max/min iterations (half each) should allow for enough sign checks." e } if `robust'!=0 & (`robust'>=20 & `robust'<=300 & mod(`robust',2)==0) & inlist(`type',3,4,5,6,7)==1 { di "" di as err "Error: Robust option can only be specify with Gap Growth Max or Min (i.e. type is 1 or 2)." e } ** Save using DTA qui { set more off timer clear timer on 1 mata: mata clear marksample touse keep if `touse' tempfile theData save `theData', replace clear } ** Locals if `seed'==-987654321 { local seed = round(uniform()*100000000) } set seed `seed' local today_str = string(date("`c(current_date)'","DMY"),"%tdCCYYNNDD") + "_" + subinstr(substr("`c(current_time)'",1,5),":","h_",1) + "m" local curdir : pwd m: type = `type' if `type'==1 | `type'==2 { local type2 = "Gap_Growth" } else if `type'==3 | `type'==4 { local type2 = "Correlation" } else if `type'==5 | `type'==6 { local type2 = "R-squared" } else if `type'==7 { local type2 = "Explanation" } if `type'==1 | `type'==3 | `type'==5 | `type'==7 { local type = "max" } else if `type'==2 | `type'==4 | `type'==6 { local type = "min" } if `singhmethod'==1 { local singhmethod = "hybrid" } else if `singhmethod'==2 { local singhmethod = "m-marquardt" } if `monotonicity'==1 { local montype = "Standard" } else if `monotonicity'==2 { local montype = "Sample" } else if `monotonicity'==3 { local montype = "External" } local timer_status = "On" if "`timeroff'"=="timeroff" local timer_status = "Off" local n_controls: word count `controls' local n_controls1: word count `controls1' local n_controls2: word count `controls2' local weights_status = "No" if "`weights'"!="" local weights_status = "Yes" local robust_status = "No" if `robust'>0 local robust_status = "Yes (`robust' iterations)" di "" _newline(1) di in y "**************************************************************" di in y "Optimization Type: " proper("`type'") " " proper("`type2'") di in y "Seed Number (for replication): `seed'" di in y "Singular H Method: " proper("`singhmethod'") di in y "Monotonicity check: `montype'" di in y "Control variables: `n_controls'" di in y "Control t=1 variables: `n_controls1'" di in y "Control t=2 variables: `n_controls2'" di in y "Weights: `weights_status'" di in y "Timer: `timer_status'" di in y "Robust option: `robust_status'" ** Gen random starting parameters if `robust'>0 { local iterations = `iterations' + `robust' } qui set obs `iterations' foreach param in a b c d e f g { gen `param' = runiform(`bounddown',`boundup') } m: init_params=st_data(.,.) qui count local obs = `r(N)' * Restore data use `theData', clear * Create control for others not in comparisons groups if "`compgroup'"!="" { qui { sum `compgroup' local compgroupmin= `r(min)' local compgroupmax= `r(max)' levelsof `compgroup', local(theGroups) foreach x of local theGroups { gen temp_g`x' = (`compgroup'==`x') } local gmin = "temp_g`compgroupmin'" local gmax = "temp_g`compgroupmax'" gen temp_other_group_crtl = (`compgroup'!=`compgroupmin' & `compgroup'!=`compgroupmax') } } else { qui gen temp_other_group_crtl = . } * Create weights = 1 if no weights specified if "`weights_status'" == "No" { qui gen temp_weight = 1 local weights "temp_weight" } * Import external monotonicity data, if available preserve qui { if `monotonicity'==3 { if regexm("`monofile'","dta$") { use "`monofile'", clear } else if regexm("`monofile'","xls$") | regexm("`monofile'","xlsx$") { import excel "`monofile'", clear firstrow } else if regexm("`monofile'","csv$") { import delimited "`monofile'", clear varnames(1) case(lower) } ds tokenize `r(varlist)' local monovar = "`1'" di "`monovar'" keep `monovar' sort `monovar' drop if `monovar'==. m: mono_data = st_data(.,.,("`monovar'")) m: mono_data = uniqrows(mono_data) } else if `monotonicity'==2 { m: mono_data = ("a","a") } else if `monotonicity'==1 { m: st_view(irt1=.,.,("`score1'")) m: mono_data = J(rows(irt1),0,.) } } restore * Prepare for grid search m { st_view(sesgroup=.,.,("`gmax' temp_other_group_crtl")) st_view(irt1=.,.,("`score1'")) st_view(irt2=.,.,("`score2'")) st_view(w=.,.,("`weights'")) st_view(controls=.,.,("`controls'")) st_view(c1=.,.,("`controls1'")) st_view(c2=.,.,("`controls2'")) results = J(0,17,.) results_robust = J(0,17,.) } * Robustness test max/min N observations if `robust'>0 { forv row = 1/`robust' { local half = `robust'/2 local half_plus = `half'+1 if `row'<=`half' local rtype "max" if `row'>`half' local rtype "min" m: init_cond = init_params[`row',1..7] m: S=optimize_init() m: optimize_init_evaluator(S, &objf()) m: optimize_init_evaluatortype(S,"d0") m: optimize_init_which(S, "`rtype'") m: optimize_init_argument(S,1,sesgroup) m: optimize_init_argument(S,2,irt1) m: optimize_init_argument(S,3,irt2) m: optimize_init_argument(S,4,controls) m: optimize_init_argument(S,5,c1) m: optimize_init_argument(S,6,c2) m: optimize_init_argument(S,7,w) m: optimize_init_argument(S,8,mono_data) m: optimize_init_argument(S,9,type) m: optimize_init_singularHmethod(S,"`singhmethod'") m: optimize_init_trace_params(S,"on") m: optimize_init_conv_maxiter(S,`maxoptiterations') m: optimize_init_verbose(S,0) m: optimize_init_params(S, init_cond) di "" _newline(1) local RTYPE = upper("`rtype'") di "Optimizing - Iteration `row'/`robust' ROBUST `RTYPE'" _newline(1) cap noi m: _optimize(S) m: row = strtoreal("`row'") m: conditional(row,results_robust,S) if `c(rc)'==1 { qui cap drop temp_g* temp_other_group_crtl temp_weight error(1) e } } preserve clear getmata (obj b1 b2 b3 b4 b5 b6 c init_b1 init_b2 init_b3 init_b4 init_b5 init_b6 init_c gapgrowth nc_gapgrowth) = results_robust, double qui sum obj if _n<=`half' local max_avg = `r(mean)' qui sum obj if _n>`half' local min_avg = `r(mean)' if ("`type'"=="max" & `max_avg'>`min_avg') { local type "max" m: results = results_robust[1..`half',.] } if ("`type'"=="max" & `min_avg'>`max_avg') { local type "min" m: results = results_robust[`half_plus'..`robust',.] } if ("`type'"=="min" & `max_avg'>`min_avg') { local type "min" m: results = results_robust[`half_plus'..`robust',.] } if ("`type'"=="min" & `min_avg'>`max_avg') { local type "max" m: results = results_robust[1..`half',.] } local TYPE = upper("`type'") restore } * Run loop using random sets of initial parameters local start = 1 if `robust'>0 { local start = 1+`robust' } local counter = 0 forv row = `start'/`obs' { local ++counter local denom = `obs'-`robust' m { init_cond = init_params[`row',1..7] S=optimize_init() optimize_init_evaluator(S, &objf()) optimize_init_evaluatortype(S,"d0") optimize_init_which(S, "`type'") optimize_init_argument(S,1,sesgroup) optimize_init_argument(S,2,irt1) optimize_init_argument(S,3,irt2) optimize_init_argument(S,4,controls) optimize_init_argument(S,5,c1) optimize_init_argument(S,6,c2) optimize_init_argument(S,7,w) optimize_init_argument(S,8,mono_data) optimize_init_argument(S,9,type) optimize_init_singularHmethod(S,"`singhmethod'") optimize_init_trace_params(S,"on") optimize_init_conv_maxiter(S,`maxoptiterations') optimize_init_verbose(S,0) optimize_init_params(S, init_cond) } di "" _newline(1) di "Optimizing - Iteration `counter'/`denom'" _newline(1) cap noi m: _optimize(S) m { if (optimize_result_errorcode(S)==0) { p = optimize_result_params(S) max_min = optimize_result_value(S) init_vals = optimize_init_params(S) if ("`nc_gapgrowth'"!="") { nc_gapgrowth = strtoreal("`nc_gapgrowth'") } else { nc_gapgrowth= J(1,1,.) } if ("`gapgrowth'"!="") { gapgrowth = strtoreal("`gapgrowth'") } else { gapgrowth= J(1,1,.) } results = results \ (max_min, p , init_vals, nc_gapgrowth, gapgrowth) } if (optimize_result_errorcode(S)!=0) { p = J(1,7,.) max_min = J(1,1,.) nc_gapgrowth= J(1,1,.) gapgrowth = J(1,1,.) init_vals = optimize_init_params(S) results = results \ (max_min, p , init_vals, nc_gapgrowth, gapgrowth) } results S=optimize_init() } if `c(rc)'==1 { qui cap drop temp_g* temp_other_group_crtl temp_weight error(1) e } } * Save results into Stata clear getmata (obj b1 b2 b3 b4 b5 b6 c init_b1 init_b2 init_b3 init_b4 init_b5 init_b6 init_c gapgrowth nc_gapgrowth) = results, double qui foreach var in gapgrowth nc_gapgrowt { count if `var'!=. if r(N)==0 drop `var' } m: mata clear di "" di "**** Program (finally!) completed :) ****" if "`save'"!="" { save `save', replace } qui { timer off 1 timer list } if "`timer_status'"=="On" { di "" di in y "Total program time: " round(r(t1)/60,.01) " minutes" } end ** Define optimization functions m /*Optimization function*/ void function objf(todo, p, sesgroup, irt1, irt2, controls, c1, c2, w, mono_data, type, obj, g, H) { real vector s_irt1 real vector s_irt2 real vector b_irt1 real vector b_irt2 real vector nc_b_irt1 real vector nc_b_irt2 real vector scores real vector r real vector Z real vector V real vector s_irt1_pred real matrix X real matrix X1 real matrix X2 real scalar gap_irt1 real scalar gap_irt2 real scalar nc_gap_irt1 real scalar nc_gap_irt2 real scalar i real scalar b_corr real scalar gapgrowth real scalar nc_gapgrowth r = p s_irt1= r[1]:*(irt1:+r[7]) + r[2]:*(irt1:+r[7]):^2 + r[3]:*(irt1:+r[7]):^3 + r[4]:*(irt1:+r[7]):^4 + r[5]:*(irt1:+r[7]):^5 + r[6]:*(irt1:+r[7]):^6 s_irt2 = r[1]:*(irt2:+r[7]) + r[2]:*(irt2:+r[7]):^2 + r[3]:*(irt2:+r[7]):^3 + r[4]:*(irt2:+r[7]):^4 + r[5]:*(irt2:+r[7]):^5 + r[6]:*(irt2:+r[7]):^6 s_irt_comb = s_irt1 \ s_irt2 mean_s_irt_comb = mean(s_irt_comb) var_s_irt_comb = variance(s_irt_comb) s_irt_comb = (s_irt_comb:-mean_s_irt_comb):/(var_s_irt_comb^.5) s_irt1 = s_irt_comb[1..rows(s_irt1),.] s_irt2 = s_irt_comb[rows(s_irt1)+1..rows(s_irt_comb),.] /*Optimization Objects*/ if (type==1|type==2) { X1 = (sesgroup,controls,c1) X1 = (X1,J(rows(X1),1,1)) b_irt1 = invsym(cross(X1,w,X1))*cross(X1,w,s_irt1) X2 = (sesgroup,controls,c2) X2 = (X2,J(rows(X2),1,1)) b_irt2 = invsym(cross(X2,w,X2))*cross(X2,w,s_irt2) gap_irt1 = b_irt1[1] gap_irt2 = b_irt2[1] if (gap_irt1<0) { obj = -(gap_irt2 - gap_irt1) } else { obj = gap_irt2 - gap_irt1 } } else if (type==3|type==4) { Z = (s_irt2,J(rows(s_irt2),1,1)) b_corr = invsym(cross(Z,w,Z))*cross(Z,w,s_irt1) obj = b_corr[1] } else if (type==5|type==6) { V = (s_irt1,s_irt2) obj = correlation(V,w) obj = obj[1,2]^2 } else { X = (sesgroup,controls) X = (X,J(rows(X),1,1)) nc_b_irt1 = invsym(cross(X,w,X))*cross(X,w,s_irt1) nc_b_irt2 = invsym(cross(X,w,X))*cross(X,w,s_irt2) nc_gap_irt1 = nc_b_irt1[1] nc_gap_irt2 = nc_b_irt2[1] if (nc_gap_irt1<0) { nc_gapgrowth = -(nc_gap_irt2 - nc_gap_irt1) } else { nc_gapgrowth = nc_gap_irt2 - nc_gap_irt1 } st_local("nc_gapgrowth",strofreal(nc_gapgrowth)) X1 = (sesgroup,controls,c1) X1 = (X1,J(rows(X1),1,1)) b_irt1 = invsym(cross(X1,w,X1))*cross(X1,w,s_irt1) X2 = (sesgroup,controls,c2) X2 = (X2,J(rows(X2),1,1)) b_irt2 = invsym(cross(X2,w,X2))*cross(X2,w,s_irt2) gap_irt1 = b_irt1[1] gap_irt2 = b_irt2[1] if (gap_irt1<0) { gapgrowth = -(gap_irt2 - gap_irt1) } else { gapgrowth = gap_irt2 - gap_irt1 } st_local("gapgrowth",strofreal(gapgrowth)) obj = abs(nc_gapgrowth - gapgrowth) } /*Monotonicity Checks*/ if (cols(mono_data)==1) { scores = mono_data' } else if (cols(mono_data)==2) { scores = (irt2 \ irt1) scores = uniqrows(scores) scores = scores[1..rows(scores)-1,.] scores = scores' } else { scores = J(1,round(10000*max(irt2))-round(10000*min(irt1))+2,0) for (i=1; i<=cols(scores); i++) { scores[i]= round(10000*min(irt1)) + i -2 } scores = scores:/10000 } scores = r[1]:*(scores:+r[7]) + r[2]:*(scores:+r[7]):^2 + r[3]:*(scores:+r[7]):^3 + r[4]:*(scores:+r[7]):^4 + r[5]:*(scores:+r[7]):^5 + r[6]:*(scores:+r[7]):^6 scores = scores[| 1,2 \ .,. |]:-scores[| 1,1 \ .,cols(scores)-1 |] scores = rowmin(scores) if (type==2|type==4|type==6) { if (scores<0) obj = obj + 1 } else { if (scores<0) obj = obj - 1 } } end m /*Conditional Sub-routine (needed as the if-loop breaks without it)*/ void conditional(row,results_robust,S) { real scalar p real scalar nc_gapgrowth real scalar gapgrowth real matrix max_min real matrix init_vals if (optimize_result_errorcode(S)==0) { p = optimize_result_params(S) max_min = optimize_result_value(S) init_vals = optimize_init_params(S) nc_gapgrowth= J(1,1,.) gapgrowth= J(1,1,.) results_robust = results_robust \ (max_min, p , init_vals, nc_gapgrowth, gapgrowth) } else { p = J(1,7,.) max_min = J(1,1,.) nc_gapgrowth= J(1,1,.) gapgrowth = J(1,1,.) init_vals = optimize_init_params(S) results_robust = results_robust \ (max_min, p , init_vals, nc_gapgrowth, gapgrowth) } results_robust S=optimize_init() } end