* version 4.3, DEC2013 * Artur Pokropek & Maciej Jakubowski * decriptive statistics with PIAAC data cap program drop piaacdes program define piaacdes, rclass syntax [varlist(numeric default=none)] [if] [in] /// , save(string) [ weight(varlist max=1) rep(varlist) /// countryid(varname) vemethodn(varname numeric) pv(string) stats(string) centile(string) /// over(string) round(integer 2)] version 10.0 tempvar probka cntid jkvar proflevel1 tempname b se over_values local oldover="`over'" local longlist=0 local overpv=0 if "`over'"=="pvlit" | "`over'"=="pvnum" | "`over'"=="PVLIT" | "`over'"=="PVNUM" { qui recode `over'1 (.=.) (0/175.9999=0) (176/225.9999=1) (226/275.9999=2) (276/325.9999=3) (326/375.9999=4) (376/999=5), gen(`proflevel1') local overpv=1 local over="`proflevel1'" } else if "`over'"=="pvpsl" | "`over'"=="PVPLS" { qui recode pvpsl1 (.=.) (0/240.9999=0) (241/290.9999=1) (291/340.9999=2) (341/999=3), gen(`proflevel1') local overpv=1 local over="`proflevel1'" } if "`countryid'"!="" { capture confirm string variable `countryid' if !_rc { qui gen `cntid'=`countryid' qui levelsof `cntid' `if' `in', local(cnt) } else { qui tostring(`countryid'), gen(`cntid') qui levelsof `cntid' `if' `in', local(cnt) } } else { cap confirm variable cntryid if !_rc { qui tostring(cntryid), gen(`cntid') qui local cnt="36 40 124 203 208 233 246 250 276 372 380 392 410 528 578 616 703 724 752 840 SUB 56 826 OECD 196 643" local longlist=1 } else { cap confirm variable CNTRYID if !_rc { qui tostring(CNTRYID), gen(`cntid') qui local cnt="36 40 124 203 208 233 246 250 276 372 380 392 410 528 578 616 703 724 752 840 SUB 56 826 OECD 196 643" local longlist=1 } else { di as error "Your dataset does not contain variable cntryid or CNTRYID. Please specify option countryid()" exit 100 } } } cap confirm variable vemethodn if !_rc { qui gen `jkvar'=vemethodn } else { cap confirm variable VEMETHODN if !_rc { qui gen `jkvar'=VEMETHODN } else { if "`vemethodn'"=="" { di as error "Your dataset does not contain variable vemethodn or VEMETHODN. Please specify vemethodn() option" exit 100 } else gen `jkvar'=`vemethodn' } } if "`weight'"=="" { cap confirm variable spfwt0 if !_rc local weight="spfwt0" else { cap confirm variable SPFWT0 if !_rc local weight="SPFWT0" else { di as error "Your dataset does not contain main survey weight spfwt0 or SPFWT0. Please specify weight() option" exit 100 } } } local ok_small=0 local ok_big=0 if "`rep'"=="" { forvalues i=1(1)80 { cap confirm variable spfwt`i' if !_rc local ok_small=`ok_small'+1 cap confirm variable SPFWT`i' if !_rc local ok_big=`ok_big'+1 } if `ok_small'==80 local rep="spfwt1-spfwt80" else if `ok_big'==80 local rep="SPFWT1-SPFWT80" else { di as error "Your dataset does not contain all replicate weights spfwt1-spfwt80 or SPFWT1-SPFWT80. Please specify rep() option" exit 100 } } if "`stats'"=="" & "`centile'"=="" { local stats="mean" local meanonly="meanonly" } else if "`stats'"=="mean" local meanonly="meanonly" if "`if'"=="" local if=" if " else local if=`"`if' "'+" & " local n_vars: word count `varlist' local n_pv: word count `pv' local n_stats: word count `stats' `centile' local n_cnt: word count `cnt' if `longlist'==1 local n_cnt=`n_cnt'-2 if "`over'"!="" { tab `over', nofreq matrow(over_values) local n_over=r(r) } else { local over="1" matrix over_values=(1,1) local n_over=1 } tempname tempfile file open `tempfile' using "`save'.html", write replace file write `tempfile' `""' " " * TABLE HEADER - OVER CATEGORIES - if over() or overpv() specified if `n_over'>1 { local col_span=`n_stats'*2*(`n_vars'+`n_pv') file write `tempfile' `""' forvalues i=1(1)`n_over' { local numerek=over_values[`i',1] if `overpv'!=1 { local labelka: label (`over') `numerek' file write `tempfile' `""' } else { if `numerek'==0 { file write `tempfile' `""' } else { file write `tempfile' `""' } } } } * SECOND ROW - predictors file write `tempfile' `"
Over categories:`labelka'Below Level 1Level `numerek'
"' forvalues i=1(1)`n_over' { foreach var in `pv' `varlist' { local colspan=2*`n_stats' file write `tempfile' `""' } } * third row file write `tempfile' "" forvalues i=1(1)`n_over' { foreach var in `pv' `varlist' { foreach stat in `stats' `centile' { file write `tempfile' `"" `"" local OECD_`var'_`stat'_`i'=0 local OECD_SE_`var'_`stat'_`i'=0 } } } local decimal=0.1^`round' forvalues i=1(1)`n_over' { local noc_`i'=0 * creating output matrices foreach var in `pv' `varlist' { mat b_`var'_over`i'=J(`n_cnt',`n_stats',.) mat se_`var'_over`i'=J(`n_cnt',`n_stats',.) } } local OECD_saved=0 local country_list="" local pvlist="" if "`pv'"!="" local pvlist="`pv'*" if `overpv'==1 local pvlist="`pvlist' `oldover'*" local cnt_i=0 foreach l of local cnt { if "`l'"=="OECD" | "`l'"=="SUB" { if "`l'"=="SUB" file write `tempfile' `""' else { local OECD_saved=1 local pvvar="`pv'"+" `varlist'" file write `tempfile' `""' forvalues i=1(1)`n_over' { foreach v of local pvvar { foreach stat in `stats' `centile' { local mean=string(round(`OECD_`v'_`stat'_`i''/`noc_`i'',`decimal'),"%20.`round'f") local se=string(round(sqrt(`OECD_SE_`v'_`stat'_`i'')/`noc_`i'',`decimal'),"%20.`round'f") file write `tempfile' `" "' } } } file write `tempfile' `""' } } else { _cnt `l' local name=r(name) local country_list="`country_list'`l' " local cnt_i=`cnt_i'+1 di "" di as result "`name'" _continue file write `tempfile' "" sum `jkvar' if `cntid'=="`l'" , meanonly if `r(N)'>0 { local jk=`r(mean)' if `jk'<1 | `jk'>2 { di as error "Please specify values 1 or 2 in `jkvar'" exit 100 } forvalues i=1(1)`n_over' { if 1!=`n_over' di as text " `i'" _c return clear cap drop `probka' if `overpv'==0 qui gen `probka'=1 `if' `cntid'=="`l'" & `over'==over_values[`i',1] `in' else qui gen `probka'=1 `if' `cntid'=="`l'" `in' preserve qui keep if `probka'==1 keep `pvlist' `varlist' `weight' `rep' qui count if r(N)>0 { local noc_`i'=`noc_`i''+1 if "`pv'"!="" { foreach v of local pv { local pvlist2="`v'1 `v'2 `v'3 `v'4 `v'5 `v'6 `v'7 `v'8 `v'9 `v'10" local temp=over_values[`i',1] _piaacdespv `pvlist2', overpv(`oldover') overval(`temp') weight(`weight') rep(`rep') jk(`jk') stats(`stats') centile(`centile') matrix b=r(est) matrix se=r(se) mat b_`v'_over`i'[`cnt_i',1]=b mat se_`v'_over`i'[`cnt_i',1]=se local col=1 foreach stat in `stats' `centile' { local b=b[1,`col'] local se=se[1,`col'] local col=`col'+1 if `b'==0 & `se'==0 file write `tempfile' `""' `""' else { local mean=string(round(`b',`decimal'),"%20.`round'f") local se=string(round(`se',`decimal'),"%20.`round'f") file write `tempfile' `""' `""' local OECD_`v'_`stat'_`i'=`OECD_`v'_`stat'_`i''+`mean' local OECD_SE_`v'_`stat'_`i'=`OECD_SE_`v'_`stat'_`i''+`se'^2 } } } } if "`varlist'"!="" & `overpv'==1 { foreach v in `varlist' { local pvlist2="`v' `v' `v' `v' `v' `v' `v' `v' `v' `v'" return clear local temp=over_values[`i',1] _piaacdespv `pvlist2', overpv(`oldover') overval(`temp') weight(`weight') rep(`rep') jk(`jk') stats(`stats') centile(`centile') matrix b=r(est) matrix se=r(se) mat b_`v'_over`i'[`cnt_i',1]=b mat se_`v'_over`i'[`cnt_i',1]=se local col=1 foreach stat in `stats' `centile' { local b=b[1,`col'] local se=se[1,`col'] local col=`col'+1 if `b'==0 & `se'==0 file write `tempfile' `""' `""' else { local mean=string(round(`b',`decimal'),"%20.`round'f") local se=string(round(`se',`decimal'),"%20.`round'f") file write `tempfile' `""' `""' local OECD_`v'_`stat'_`i'=`OECD_`v'_`stat'_`i''+`mean' local OECD_SE_`v'_`stat'_`i'=`OECD_SE_`v'_`stat'_`i''+`se'^2 } } } } if "`varlist'"!="" & `overpv'==0 { _piaacdes `varlist', weight(`weight') rep(`rep') jk(`jk') stats(`stats') centile(`centile') matrix b=r(coef) matrix se=r(se) local row=0 foreach var in `varlist' { local col=0 local row=`row'+1 mat b_`var'_over`i'[`cnt_i',1]=b[`row',1..`n_stats'] mat se_`var'_over`i'[`cnt_i',1]=se[`row',1..`n_stats'] foreach stat in `stats' `centile' { local col=`col'+1 local b=b[`row',`col'] local se=se[`row',`col'] if `b'==0 & `se'==0 file write `tempfile' `""' `""' else { local mean=string(round(`b',`decimal'),"%20.`round'f") local se=string(round(`se',`decimal'),"%20.`round'f") file write `tempfile' `""' `""' local OECD_`var'_`stat'_`i'=`OECD_`var'_`stat'_`i''+`mean' local OECD_SE_`var'_`stat'_`i'=`OECD_SE_`var'_`stat'_`i''+`se'^2 } } } } } else { di as error "no observations" _c foreach stat in `stats' `centile' { if "`pv'"!="" { file write `tempfile' `""' `""' } foreach var in `varlist' { file write `tempfile' `""' `""' } } } restore } } } } if `OECD_saved'==0 { local pvvar="`pv'"+" `varlist'" file write `tempfile' `""' forvalues i=1(1)`n_over' { foreach v of local pvvar { foreach stat in `stats' `centile' { local mean=string(round(`OECD_`v'_`stat'_`i''/`noc_`i'',`decimal'),"%20.`round'f") local se=string(round(sqrt(`OECD_SE_`v'_`stat'_`i'')/`noc_`i'',`decimal'),"%20.`round'f") file write `tempfile' `" "' } } } } file write `tempfile' _n "
Variable`var'
" "Country" ""' "`stat'" ""' "S.E." "
Sub-national entities
OECD Average `mean' `se'
Partners
" "`name'" " 0 0 `mean' `se' 0 0 `mean' `se' 0 0 `mean' `se'....
Average `mean' `se'
" file close `tempfile' di "" di "Results saved in the `save'.html file" * returning results matrices * separate matrix with estimates for each variable and over category * separate matrix with standard errors for each variable and over category foreach var in `pv' `varlist' { forvalues i=1(1)`n_over' { matrix colnames b_`var'_over`i' = `stats' `centile' matrix colnames se_`var'_over`i' = `stats' `centile' matrix rownames b_`var'_over`i' = `country_list' matrix rownames se_`var'_over`i' = `country_list' } if `n_over'==1 { return matrix b_`var'=b_`var'_over1 return matrix se_`var'=se_`var'_over1 } else { forvalues i=1(1)`n_over' { return matrix b_`var'_over`i'=b_`var'_over`i' return matrix se_`var'_over`i'=se_`var'_over`i' } } } end *********** cap program drop _cnt program define _cnt, rclass args l local name="`l'" if "`l'"=="36" local name="Australia" if "`l'"=="40" local name="Austria" if "`l'"=="124" local name="Canada" if "`l'"=="203" local name="Czech Republic" if "`l'"=="276" local name="Germany" if "`l'"=="208" local name="Denmark" if "`l'"=="724" local name="Spain" if "`l'"=="233" local name="Estonia" if "`l'"=="246" local name="Finland" if "`l'"=="250" local name="France" if "`l'"=="826" local name="England/N. Ireland (UK)" if "`l'"=="372" local name="Ireland" if "`l'"=="380" local name="Italy" if "`l'"=="392" local name="Japan" if "`l'"=="410" local name="Korea" if "`l'"=="528" local name="Netherlands" if "`l'"=="578" local name="Norway" if "`l'"=="616" local name="Poland" if "`l'"=="643" local name="Russian Federation" if "`l'"=="703" local name="Slovak Republic" if "`l'"=="752" local name="Sweden" if "`l'"=="840" local name="United States" if "`l'"=="56" local name="Flanders (Belgium)" if "`l'"=="196" local name="Cyprus" return local name "`name'" end ********************************************************** cap program drop _piaacdes program define _piaacdes, rclass syntax [varlist], weight(varlist max=1) /// rep(varlist) jk(integer) [stats(string)] [centile(string)] return clear * SETUP local num_rep : word count `rep' local n_centile: word count `centile' local n_stat: word count `stats' local n_stats: word count `stats' `centile' local n_vars: word count `varlist' if `n_centile' >0 { local namestats "`stats'" foreach numc of local centile { local namestats "`namestats' p`numc'" } } matrix coef = J(`n_vars',`n_stats',.) matrix colnames coef =`namestats' matrix rownames coef =`varlist' matrix se= J(`n_vars',`n_stats',.) matrix colnames se =`namestats' matrix rownames se =`varlist' local ithavar=0 foreach var of varlist `varlist' { local ++ithavar qui: sum `var' [iweight=`weight'], `meanonly' foreach stat of local stats { local fulstat_`stat'=r(`stat') local sumofsq_`stat'=0 } foreach replication of local rep { qui: sum `var' [iweight=`replication'], `meanonly' foreach stat of local stats { local sq_`stat' = (r(`stat') - `fulstat_`stat'')^2 local sumofsq_`stat'=`sumofsq_`stat''+ `sq_`stat'' } } foreach stat of local stats { if `jk'==1 local sdr_`stat'=sqrt(((`num_rep'-1)/`num_rep')*`sumofsq_`stat'') else local sdr_`stat'=sqrt(1*`sumofsq_`stat'') } local icol=0 foreach stat of local stats { local ++icol matrix coef[`ithavar',`icol'] = `fulstat_`stat'' matrix se[`ithavar',`icol'] = `sdr_`stat'' } } ***** centiles if `n_centile' >0 { local ithavar=0 foreach var of varlist `varlist' { local ++ithavar qui _pctile `var' [iweight=`weight'], p(`centile') forvalues stat =1(1)`n_centile' { local fulstat_`stat'=r(r`stat') local sumofsq_`stat'=0 } foreach replication of local rep { qui _pctile `var' [iweight=`replication'], p(`centile') forvalues stat =1(1)`n_centile' { local sq_`stat' = (r(r`stat') - `fulstat_`stat'')^2 local sumofsq_`stat'=`sumofsq_`stat''+ `sq_`stat'' } } forvalues stat =1(1)`n_centile' { if `jk'==1 local sdr_`stat'=sqrt(((`num_rep'-1)/`num_rep')*`sumofsq_`stat'') else local sdr_`stat'=sqrt(1*`sumofsq_`stat'') } local icol=`n_stat' forvalues stat =1(1)`n_centile' { local ++icol matrix coef[`ithavar',`icol'] = `fulstat_`stat'' matrix se[`ithavar',`icol'] = `sdr_`stat'' } } } return matrix coef =coef return matrix se =se end *********************************************** cap program drop _piaacdespv program define _piaacdespv, rclass syntax [varlist(numeric)], [overpv(string) overval(string)] weight(varlist max=1) /// rep(varlist) jk(integer) [stats(string)] [centile(string)] if "`overpv'"=="pvlit" | "`overpv'"=="pvnum" { tempvar proflevel1 proflevel2 proflevel3 proflevel4 proflevel5 proflevel6 proflevel7 proflevel8 proflevel9 proflevel10 forvalues i=1(1)10 { qui recode `overpv'`i' (.=0) (0/175.9999=1) (176/225.9999=2) (226/275.9999=3) /// (276/325.9999=4) (326/375.9999=5) (376/999=6), gen(`proflevel`i'') } local overpv=1 } else if "`overpv'"=="pvpsl" { tempvar proflevel1 proflevel2 proflevel3 proflevel4 proflevel5 proflevel6 proflevel7 proflevel8 proflevel9 proflevel10 forvalues i=1(1)10 { qui recode pvpsl`i' (.=0) (0/240.9999=2) (241/290.9999=3) (291/340.9999=4) /// (341/999=5), gen(`proflevel`i'') } local overpv=1 } else local overpv=0 tempvar probkaif local num_rep : word count `rep' local n_centile: word count `centile' local n_stat: word count `stats' local n_stats: word count `stats' `centile' local n_vars: word count `varlist' local num_pv : word count `varlist' local num_rep : word count `rep' if `n_centile' >0 { local namestats "`stats'" foreach numc of local centile { local namestats "`namestats' p`numc'" } } matrix pvstats = J(`num_pv',`n_stats',.) matrix rstats = J(`num_pv',`n_stats',.) matrix colnames pvstats =`namestats' ** PV loop local pvcount=0 foreach var of varlist `varlist' { di "." _c local ++pvcount cap drop `probkaif' if `overpv'==1 qui gen `probkaif'=1 if `proflevel`pvcount''==`overval' else local probkaif=1 qui sum `var' [iweight=`weight'] if `probkaif'==1, `meanonly' foreach stat of local stats { local fulstat_`stat'=r(`stat') local sumofsq_`stat'=0 } foreach replication of local rep { qui: sum `var' [iweight=`replication'] if `probkaif'==1, `meanonly' foreach stat of local stats { local sq_`stat' = (r(`stat') - `fulstat_`stat'')^2 local sumofsq_`stat'=`sumofsq_`stat''+ `sq_`stat'' } } foreach stat of local stats { if `jk'==1 local rVAR_`stat'=(((`num_rep'-1)/`num_rep')*`sumofsq_`stat'') else local rVAR_`stat'=(1*`sumofsq_`stat'') } local l=0 foreach stat of local stats { local ++l matrix rstats[`pvcount',`l'] = `rVAR_`stat'' matrix pvstats[`pvcount',`l'] = `fulstat_`stat'' } } **** centiles if `n_centile' >0 { local pvcount=0 `rVAR_`stat'' foreach var of varlist `varlist' { local ++pvcount cap drop `probkaif' if `overpv'==1 qui gen `probkaif'=1 if `proflevel`pvcount''==`overval' else local probkaif=1 qui _pctile `var' [iweight=`weight'] if `probkaif'==1, p(`centile') forvalues stat =1(1)`n_centile' { local fulstat_`stat'=r(r`stat') local sumofsq_`stat'=0 } foreach replication of local rep { qui _pctile `var' [iweight=`replication'] if `probkaif'==1, p(`centile') forvalues stat =1(1)`n_centile' { local sq_`stat' = (r(r`stat') - `fulstat_`stat'')^2 local sumofsq_`stat'=`sumofsq_`stat''+ `sq_`stat'' } } forvalues stat =1(1)`n_centile' { if `jk'==1 local rVAR_`stat'=(((`num_rep'-1)/`num_rep')*`sumofsq_`stat'') else local rVAR_`stat'=(1*`sumofsq_`stat'') } local l=`n_stat' forvalues stat =1(1)`n_centile' { local ++l matrix rstats[`pvcount',`l'] = `rVAR_`stat'' matrix pvstats[`pvcount',`l'] = `fulstat_`stat'' } } } qui: mata: compPVvar() matrix colnames meanPV =`namestats' matrix colnames SDR =`namestats' return matrix est =meanPV return matrix se =SDR end ************ version 10 clear mata mata: function compPVvar() { mata: pvs=st_matrix("pvstats") repVAR=mean(st_matrix("rstats")) pvdif=pvs:-mean(pvs) sumofsqpv=pvdif'pvdif VAR=1.1:*sumofsqpv:/9 pvVAR= diagonal(VAR)' totVAR=repVAR+pvVAR SDR=totVAR:^0.5 st_matrix("SDR", SDR) st_matrix("meanPV", mean(pvs)) } end