/*version 1.0.0 3 March 2008*/
/*version 1.0.1 25 November 2008*/
/*fixes in 1.0.1
    - added seeding number as an argument to avoid getting the same results for simulation occuring on the same second
      thanks to ALlan Garland for noticing the issue
*/
program define skbim
	version 9.2
	/*Generates a dataset of values from bimodal distribution . The two distribution that make up the
    bimodal can be normal or skew-normal and are named d1 and d2 for easier reference below
	requires 6-11 arguments (last 5 are optional)
	the first is the probability of the 1st distribution
	for arguments 2 to 5 we have three different options (defined by argument no 6):
	   option1: mean of d1, var of d1, mean of d2, var of d2
	   option2: bimodal mean, bimodal var, var of d1, var of d2
       option3: bimodal mean, bimodal var, mean of d1, var of d1
    args 7 to 12 are optional: size, seed number, skewness d1, kurtosis d1, skewness d2, kurtosis d2. If they are
    omitted, skewness and kurtosis values for normal distributions are used (sk=0, ku=3) and the size of the dataset
    is used as default

    Uses Ramberg's method and tables - A probability Distribution and its uses in fitting data
    http://www.jstor.org/view/00401706/ap040083/04a00080/0
    The method uses a formula with 4 parameters and parameter values which generate data of various
    degrees of skewness and kurtosis have been provided in the paper. only a few of those have been included
    in the command, but it can easily be updated for specific values*/

    /*arguments*/
    args p_d1 v1 v2 v3 v4 optn bsize snum psk1 ku1 psk2 ku2
    /*VALIDATE INPUT*/
    if "`p_d1'"=="" | "`v1'"=="" | "`v2'"=="" | "`v3'"=="" | "`v4'"=="" | "`optn'"=="" {
        di "command requires at least six numeric arguments - please see help for details"
        error 197
    }
    if `p_d1'<=0 | `p_d1'>=1 {
        di "probability for 1st distribution (argument 1) needs to be within the (0,1) range"
        error 197
    }
    scalar p1 = `p_d1'
    /*for each option possibility verify input and caclulate the rest from bimodal equations*/
    if `optn'==1 {
        di "option 1 selected: m1, var1, m2 & var2"
        if `v2'<=0 | `v4'<=0 {
            di "variances of d1 and d2 (arguments 4&5) need to be positive!"
            error 459
        }
        scalar m1 = `v1'
        scalar var1 = `v2'
        scalar m2 = `v3'
        scalar var2 = `v4'
        scalar mb = p1*m1 + (1-p1)*m2
        scalar varb = p1*var1 + (1-p1)*var2 + p1*(1-p1)*(m1-m2)^2
    }
    else if `optn'==2 {
        di "option 2 selected: mb, varb, var1 & var2"
        if `v2'<=0 | `v3'<=0 | `v4'<=0 {
            di "variances of bimodal, d1 and d2 (arguments 3,4&5) need to be positive!"
            error 459
        }
        scalar mb = `v1'
        scalar varb = `v2'
        scalar var1 = `v3'
        scalar var2 = `v4'
        scalar temp = (varb-p1*var1-(1-p1)*var2) / (p1*(1-p1))
        /*break if there is a problem with the values*/
        if temp<=0 {
            di "cannot proceed with the provided values. it must hold that var(b)-p*var(d1)-(1-p)*var(d2)>=0"
            error 459
        }
        scalar m1 = mb + (1-p1)*sqrt(temp)
        scalar m2 = (mb-p1*m1)/(1-p1)
    }
    else if `optn'==3 {
        di "option 2 selected: mb, varb, m1 & var1"
        if `v2'<=0 | `v4'<=0 {
            di "variances of bimodal and d1 (arguments 3&5) need to be positive!"
            error 459
        }
        scalar mb = `v1'
        scalar varb = `v2'
        scalar m1 = `v3'
        scalar var1 = `v4'
        scalar m2 = (mb-p1*m1)/(1-p1)
        scalar var2 = (varb-p1*(1-p1)*(m1-m2)^2-p1*var1)/(1-p1)
        /*break if there is a problem with the values*/
        if var2<=0 {
            di "cannot proceed with the provided values. it must hold that var(b)>p*var(d1)+p(1-p)*(m1-m2)^2"
            error 459
        }
    }
    else {
        di "option (argument 6) needs to be either 1, 2 or 3"
        error 197
    }
    /*optional arguments skenwness and kurtosis of d1 and d2*/
	if "`psk1'"=="" local psk1 = 0
	if "`ku1'"=="" local ku1 = 3
	if "`psk2'"=="" local psk2 = 0
	if "`ku2'"=="" local ku2 = 3
	if "`bsize'"=="" {
        qui count
        local bsize = `r(N)'
    }
	if "`snum'"=="" {
        local snum = 777
    }
    /*just in case an invalid sample size has been provided*/
    if `bsize'<=0 {
        di "invalid sample size provided, current num of obs will be used instead"
        qui count
        local bsize = `r(N)'
    }
    if `bsize'!=int(`bsize') {
        di "invalid sample size provided, provided number will be truncated"
        local bsize = int(`bsize')
    }
    /*if negative skewness provided take into account*/
    local sk1 = abs(`psk1')
    local sk2 = abs(`psk2')
    /*define lamda values - 2 first columns are skewness and kurtosis for looking up*/
    matrix input L = (0, 1.75, 0 , 0.5943, 1.4501, 1.4501\ /*
    */ 0, 3, 0, 0.1974, 0.1349, 0.1349\ 0, 4, 0, 0.0262, 0.0148, 0.0148\ /*
    */ 0, 5, 0, -0.0870, -0.0433, -0.0433\ 0, 6, 0, -0.1686, -0.0802, -0.0802\ /*
    */ 0, 7, 0, -0.2306, -0.1045, -0.1045\ 0, 8, 0, -0.2800, -0.1223, -0.1223\ /*
    */ 0, 9, 0, -0.3203, -0.1359, -0.1359\ 0, 3.2, 0, 0.1563, 0.1016, 0.1016\ /*
    */ 0, 3.3, 0, 0.1371, 0.0872, 0.0872\ 0, 3.4, 0, 0.1191, 0.0742, 0.0742\ /*
    */ 0, 3.6, 0, 0.0852, 0.0512, 0.0512\ 0, 4.4, 0, -0.0241, -0.0130, -0.0130\ /*
    */ 0.5, 4, -0.290, 0.0604, 0.0259, 0.0447\ 0.5, 5, -0.188, -0.0546, -0.0236, -0.0333\ /*
    */ 0.5, 6, -0.142, -0.1398, -0.0591, -0.0764\ 0.5, 7, -0.117, -0.2050, -0.0845, -0.1042\ /*
    */ 0.5, 8, -0.101, -0.2567, -0.1035, -0.1238\ 0.5, 9, -0.089, -0.2986, -0.1181, -0.1384\ /*
    */ 1, 4, -0.886, 0.1333, 0.0193, 0.1588\ 1, 5, -0.533, 0.0340, 0.009695, 0.0285\ /*
    */ 1, 6, -0.379, -0.0562, -0.0187, -0.0388\ 1, 7, -0.215, -0.2356, -0.0844, -0.1249\ /*
    */ 1, 8, -0.248, -0.1878, -0.0670, -0.1058\ 1, 9, -0.215, -0.2356, -0.0844, -0.1249\ /*
    */ 1, 10, -0.191, -0.2752, -0.0985, -0.1393\ 0.5, 3, -0.639, 0.2006, 0.0630, 0.2307\ /*
    */ 1.5, 6, -0.957, 0.0622, 0.003907, 0.0677\ 1.5, 7, -0.684, -0.0115, -0.002088, -0.009875\ /*
    */ 1.5, 8, -0.536, -0.0767, -0.0184, -0.0565\ 1.5, 9, -0.443, -0.1330, -0.0362, -0.0880\ /*
    */ 1.5, 10, -0.382, -0.1803, -0.0524, -0.1104\ 1.5, 11, -0.338, -0.2202, -0.0665, -0.1271\ /*
    */ 1.5, 12, -0.305, -0.2544, -0.0786, -0.1401\ 2, 9, -0.993, -0.001081, -0.00000407, -0.001076\ /*
    */ 2, 10, -0.796, -0.0538, -0.005187, -0.0458\ 2, 11, -0.670, -0.1005, -0.0154, -0.0766\ /*
    */ 2, 12, -0.579, -0.1423, -0.0273, -0.0995\ 2, 13, -0.515, -0.1784, -0.0388, -0.1168\ /*
    */ 2, 14, -0.466, -0.2101, -0.0495, -0.1305\ 2, 15, -0.428, -0.2380, -0.0592, -0.1415)
    /*randomly set the seed number because it is set and not random*/
    local seednum = `snum'
    local time = c(current_time) /*get time to add to seed number*/
    local time : subinstr local time ":" "", all /*remove all the :s*/
    local seednum = `seednum' + mb*10 + varb*10^2 + `bsize' + `time'
    set seed `seednum'
    /*repeat for each of the 2 distributions*/
    forvalues dist=1(1)2 {
        /*look up the lambda values that correspond to the inputed sk and ku*/
        local rownum = rowsof(L)
        local rowhit = 0
        local i = 0
        while `rowhit'==0 & `i'<`rownum' {
            local i = `i' + 1
            if L[`i',1]==`sk`dist'' & L[`i',2]==`ku`dist'' {
                local rowhit = `i'
            }
        }
        if `rowhit'==0 {
            display "lambda matrix does not include skewness or kurtosis values you inputted"
            error 504
        }
        else {
            forvalues i=1(1)4 {
                scalar L`dist'_`i' = L[`rowhit',`i'+2]
            }
        }
    }
    /*set number of observations. if it smaller than the current one avoid error*/
    qui capture set obs `bsize'
    /*DRAW STUDY MEANS FROM POPULATION*/
    local mb = mb
    local m1 = m1
    local m2 = m2
    local varb = varb
    local var1 = var1
    local var2 = var2
    qui capture gen skewbim=.
    if _rc!=0 {
        di "There is already a variable with the same name - delete or rename it and repeat procedure"
        error 4
    }
    /*strings for label*/
    foreach x in mb varb m1 var1 m2 var2 psk1 ku1 psk2 ku2 {
        local `x's = string(``x'',"%2.1g")
    }
    local strlab = "bimodal of m=`mbs' var=`varbs' (m1=`m1s' v1=`var1s' sk1=`psk1s' ku1=`ku1s' m2=`m2s'"
    local strlab = "`strlab' v2=`var2s' sk2=`psk2s' ku2=`ku2s')"
    label var skewbim "`strlab'"
    /*generate the values*/
    forvalues j=1(1)`bsize' {
        /*toss a coin and decide on which distribution this case will follow*/
        local tcoin = uniform()
        if `tcoin' < `p_d1' {
            local dsel = 1
        }
        else {
            local dsel = 2
        }
        /*random uniform dist number that will be used in Ramberg's method*/
        scalar u = uniform()
        if u == 0 {
            scalar u = 10^-20
        }
        /*Ramberg's random num generator - num follow dist w m=0, var=1*/
        scalar Rb = L`dsel'_1+(u^L`dsel'_3 - (1-u)^L`dsel'_4)/L`dsel'_2
        /*rescale to selected distribution's mean and standard deviation*/
        scalar temp = `m`dsel'' + Rb*sqrt(`var`dsel'')
        /*if given negative skewness flip distribution*/
        if `psk`dsel''<0 {
            scalar temp = 2*`m`dsel'' - temp
        }
        qui replace skewbim = temp in `j'
    }
    di "generated `bsize' obs from bimodal distribution of mean=`mbs' & var=`varbs'"
    di "with m1=`m1s' v1=`var1s' sk1=`psk1s' ku1=`ku1s' & m2=`m2s' v2=`var2s' sk2=`psk2s' ku2=`ku2s'"
    di "note: numbers have been rounded to the first decimal for display purposes (in the label as well)"
end