----------------------------------------------------------------------------------------------- log: e:\analysis\stataboston\work\sug_groups_sim.log log type: text opened on: 20 Jul 2006, 09:56:04 . version 9 . . // sug_groups_sim.do - stata user's group - july 2006 . // simulation data illustrating invariance of predictions . // Scott Long - 20Jul2006 . . // create artifical data . clear . set seed 11020 . set obs 5000 obs was 0, now 5000 . local half = _N/2 . local half1 = `half'+1 . * indep variable is uniform . gen x = uniform() . label var x "articles" . * divide into two groups . gen f = _n < `half1' . label var f "is female?" . replace f = 0 if _n > `half' (0 real changes made) . gen m = 1 - f . label var m "is male?" . * group interactions . gen fx = f*x . label var fx "female*articles" . gen mx = m*x . label var fx "female*articles" . . // specify parameters - same slopes for men and women, different variances . local alpha = -1 . local beta = 2 . * sd for men and women differ . local f_sd = 2 // twice as large for women . local m_sd = 1 . . // generate errors, ystar and y . gen double e = invnorm(uniform()) . label var e "normal errors" . * rescale errors for each group . replace e = e*`f_sd' if f==1 (2500 real changes made) . replace e = e*`m_sd' if f==0 (0 real changes made) . * ystar . gen ystar = `alpha' + `beta'*x + e . label var ystar "latent y*" . * observed binary y . gen y = ystar>0 . label var y "observed y" . . // estimates based on female only sample . probit y x if f==1 , nolog Probit regression Number of obs = 2500 LR chi2(1) = 105.29 Prob > chi2 = 0.0000 Log likelihood = -1680.1948 Pseudo R2 = 0.0304 ------------------------------------------------------------------------------ y | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- x | .9210303 .090553 10.17 0.000 .7435497 1.098511 _cons | -.4752884 .0526805 -9.02 0.000 -.5785402 -.3720365 ------------------------------------------------------------------------------ . predict f_p (option p assumed; Pr(y)) . label var f_p "Pr(y) females only model" . local f_ll = e(ll) . estimates store f_only . . // estimates based on male only sample . probit y x if f==0 , nolog Probit regression Number of obs = 2500 LR chi2(1) = 467.17 Prob > chi2 = 0.0000 Log likelihood = -1498.9323 Pseudo R2 = 0.1348 ------------------------------------------------------------------------------ y | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- x | 2.008575 .0971009 20.69 0.000 1.81826 2.198889 _cons | -.9858719 .0554147 -17.79 0.000 -1.094483 -.8772612 ------------------------------------------------------------------------------ . predict m_p (option p assumed; Pr(y)) . label var m_p "Pr(y) males only model" . local m_ll = e(ll) . estimates store m_only . . // compare predictions for men and women . sort x . twoway line f_p m_p x . . // estimate model using interactions . probit y m mx f fx , nolog nocon Probit regression Number of obs = 5000 LR chi2(4) = . Log likelihood = -3179.1272 Prob > chi2 = . ------------------------------------------------------------------------------ y | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- m | -.9858719 .0554147 -17.79 0.000 -1.094483 -.8772612 mx | 2.008575 .0971009 20.69 0.000 1.81826 2.198889 f | -.4752884 .0526805 -9.02 0.000 -.5785403 -.3720365 fx | .9210303 .090553 10.17 0.000 .7435497 1.098511 ------------------------------------------------------------------------------ . mat b = e(b) . predict all_p (option p assumed; Pr(y)) . label var all_p "Pr(y) combined male/female model" . local all_ll = e(ll) . local m_f_ll = `f_ll' + `m_ll' . estimates store mf_inter . . // compare variables for men and women . sum all_p f_p y ystar x e if f==1 Variable | Obs Mean Std. Dev. Min Max -------------+-------------------------------------------------------- all_p | 2500 .4975967 .1019909 .3173448 .6721035 f_p | 2500 .4975967 .1019909 .3173448 .6721035 y | 2500 .4976 .5000943 0 1 ystar | 2500 .0075016 2.083321 -6.0852 6.657582 x | 2500 .5091124 .2833771 .0001647 .9999861 -------------+-------------------------------------------------------- e | 2500 -.0107232 2.01544 -6.351376 7.014125 . sum all_p m_p y ystar x e if f==0 Variable | Obs Mean Std. Dev. Min Max -------------+-------------------------------------------------------- all_p | 2500 .5082911 .2103069 .1623361 .8467247 m_p | 2500 .5082911 .2103069 .1623361 .8467247 y | 2500 .5084 .5000295 0 1 ystar | 2500 .0267689 1.142302 -4.088279 4.38683 x | 2500 .5027663 .2884878 .0004828 .9998926 -------------+-------------------------------------------------------- e | 2500 .0212363 .9946508 -3.422641 3.437464 . . // compare pr(y) from two models . * predictions for women . sum f_p all_p if f==1 Variable | Obs Mean Std. Dev. Min Max -------------+-------------------------------------------------------- f_p | 2500 .4975967 .1019909 .3173448 .6721035 all_p | 2500 .4975967 .1019909 .3173448 .6721035 . pwcorr f_p all_p if f==1 | f_p all_p -------------+------------------ f_p | 1.0000 all_p | 1.0000 1.0000 . * predictions for men . sum f_p all_p if f==0 Variable | Obs Mean Std. Dev. Min Max -------------+-------------------------------------------------------- f_p | 2500 .4953029 .1037993 .3174492 .6720724 all_p | 2500 .5082911 .2103069 .1623361 .8467247 . pwcorr m_p all_p if f==0 | m_p all_p -------------+------------------ m_p | 1.0000 all_p | 1.0000 1.0000 . . // compare model results . di "LL in interaction model: `all_ll'" LL in interaction model: -3179.12715826 . di "LL-male + LL-female: `m_f_ll'" LL-male + LL-female: -3179.12715826851 . . estimates table _all, stats(N ll) eform b(%9.3f) t(%6.2f) -------------------------------------------------- Variable | f_only m_only mf_inter -------------+------------------------------------ x | 2.512 7.453 | 10.17 20.69 m | 0.373 | -17.79 mx | 7.453 | 20.69 f | 0.622 | -9.02 fx | 2.512 | 10.17 _cons | 0.622 0.373 | -9.02 -17.79 -------------+------------------------------------ N | 2500.000 2500.000 5000.000 ll | -1680.195 -1498.932 -3179.127 -------------------------------------------------- legend: b/t . . log close log: e:\analysis\stataboston\work\sug_groups_sim.log log type: text closed on: 20 Jul 2006, 09:56:07 -----------------------------------------------------------------------------------------------