* version 4.3, DEC2013
* Artur Pokropek & Maciej Jakubowski
* decriptive statistics with PIAAC data
cap program drop piaacdes
program define piaacdes, rclass
syntax [varlist(numeric default=none)] [if] [in] ///
, save(string) [ weight(varlist max=1) rep(varlist) ///
countryid(varname) vemethodn(varname numeric) pv(string) stats(string) centile(string) ///
over(string) round(integer 2)]
version 10.0
tempvar probka cntid jkvar proflevel1
tempname b se over_values
local oldover="`over'"
local longlist=0
local overpv=0
if "`over'"=="pvlit" | "`over'"=="pvnum" | "`over'"=="PVLIT" | "`over'"=="PVNUM" {
qui recode `over'1 (.=.) (0/175.9999=0) (176/225.9999=1) (226/275.9999=2) (276/325.9999=3) (326/375.9999=4) (376/999=5), gen(`proflevel1')
local overpv=1
local over="`proflevel1'"
}
else if "`over'"=="pvpsl" | "`over'"=="PVPLS" {
qui recode pvpsl1 (.=.) (0/240.9999=0) (241/290.9999=1) (291/340.9999=2) (341/999=3), gen(`proflevel1')
local overpv=1
local over="`proflevel1'"
}
if "`countryid'"!="" {
capture confirm string variable `countryid'
if !_rc {
qui gen `cntid'=`countryid'
qui levelsof `cntid' `if' `in', local(cnt)
}
else {
qui tostring(`countryid'), gen(`cntid')
qui levelsof `cntid' `if' `in', local(cnt)
}
}
else {
cap confirm variable cntryid
if !_rc {
qui tostring(cntryid), gen(`cntid')
qui local cnt="36 40 124 203 208 233 246 250 276 372 380 392 410 528 578 616 703 724 752 840 SUB 56 826 OECD 196 643"
local longlist=1
}
else {
cap confirm variable CNTRYID
if !_rc {
qui tostring(CNTRYID), gen(`cntid')
qui local cnt="36 40 124 203 208 233 246 250 276 372 380 392 410 528 578 616 703 724 752 840 SUB 56 826 OECD 196 643"
local longlist=1
}
else {
di as error "Your dataset does not contain variable cntryid or CNTRYID. Please specify option countryid()"
exit 100
}
}
}
cap confirm variable vemethodn
if !_rc {
qui gen `jkvar'=vemethodn
}
else {
cap confirm variable VEMETHODN
if !_rc {
qui gen `jkvar'=VEMETHODN
}
else {
if "`vemethodn'"=="" {
di as error "Your dataset does not contain variable vemethodn or VEMETHODN. Please specify vemethodn() option"
exit 100
}
else gen `jkvar'=`vemethodn'
}
}
if "`weight'"=="" {
cap confirm variable spfwt0
if !_rc local weight="spfwt0"
else {
cap confirm variable SPFWT0
if !_rc local weight="SPFWT0"
else {
di as error "Your dataset does not contain main survey weight spfwt0 or SPFWT0. Please specify weight() option"
exit 100
}
}
}
local ok_small=0
local ok_big=0
if "`rep'"=="" {
forvalues i=1(1)80 {
cap confirm variable spfwt`i'
if !_rc local ok_small=`ok_small'+1
cap confirm variable SPFWT`i'
if !_rc local ok_big=`ok_big'+1
}
if `ok_small'==80 local rep="spfwt1-spfwt80"
else if `ok_big'==80 local rep="SPFWT1-SPFWT80"
else {
di as error "Your dataset does not contain all replicate weights spfwt1-spfwt80 or SPFWT1-SPFWT80. Please specify rep() option"
exit 100
}
}
if "`stats'"=="" & "`centile'"=="" {
local stats="mean"
local meanonly="meanonly"
}
else if "`stats'"=="mean" local meanonly="meanonly"
if "`if'"=="" local if=" if "
else local if=`"`if' "'+" & "
local n_vars: word count `varlist'
local n_pv: word count `pv'
local n_stats: word count `stats' `centile'
local n_cnt: word count `cnt'
if `longlist'==1 local n_cnt=`n_cnt'-2
if "`over'"!="" {
tab `over', nofreq matrow(over_values)
local n_over=r(r)
}
else {
local over="1"
matrix over_values=(1,1)
local n_over=1
}
tempname tempfile
file open `tempfile' using "`save'.html", write replace
file write `tempfile' `"
"' " | "
* TABLE HEADER - OVER CATEGORIES - if over() or overpv() specified
if `n_over'>1 {
local col_span=`n_stats'*2*(`n_vars'+`n_pv')
file write `tempfile' `" Over categories: | "'
forvalues i=1(1)`n_over' {
local numerek=over_values[`i',1]
if `overpv'!=1 {
local labelka: label (`over') `numerek'
file write `tempfile' `"`labelka' | "'
}
else {
if `numerek'==0 {
file write `tempfile' `"Below Level 1 | "'
}
else {
file write `tempfile' `"Level `numerek' | "'
}
}
}
}
* SECOND ROW - predictors
file write `tempfile' `" Variable | "'
forvalues i=1(1)`n_over' {
foreach var in `pv' `varlist' {
local colspan=2*`n_stats'
file write `tempfile' `"`var' | "'
}
}
* third row
file write `tempfile' "" "Country" " | "
forvalues i=1(1)`n_over' {
foreach var in `pv' `varlist' {
foreach stat in `stats' `centile' {
file write `tempfile' `""' "`stat'" " | " `""' "S.E." " | "
local OECD_`var'_`stat'_`i'=0
local OECD_SE_`var'_`stat'_`i'=0
}
}
}
local decimal=0.1^`round'
forvalues i=1(1)`n_over' {
local noc_`i'=0
* creating output matrices
foreach var in `pv' `varlist' {
mat b_`var'_over`i'=J(`n_cnt',`n_stats',.)
mat se_`var'_over`i'=J(`n_cnt',`n_stats',.)
}
}
local OECD_saved=0
local country_list=""
local pvlist=""
if "`pv'"!="" local pvlist="`pv'*"
if `overpv'==1 local pvlist="`pvlist' `oldover'*"
local cnt_i=0
foreach l of local cnt {
if "`l'"=="OECD" | "`l'"=="SUB" {
if "`l'"=="SUB" file write `tempfile' `"
Sub-national entities | "'
else {
local OECD_saved=1
local pvvar="`pv'"+" `varlist'"
file write `tempfile' `"
OECD Average | "'
forvalues i=1(1)`n_over' {
foreach v of local pvvar {
foreach stat in `stats' `centile' {
local mean=string(round(`OECD_`v'_`stat'_`i''/`noc_`i'',`decimal'),"%20.`round'f")
local se=string(round(sqrt(`OECD_SE_`v'_`stat'_`i'')/`noc_`i'',`decimal'),"%20.`round'f")
file write `tempfile' `" `mean' | `se' | "'
}
}
}
file write `tempfile' `"
Partners | "'
}
}
else {
_cnt `l'
local name=r(name)
local country_list="`country_list'`l' "
local cnt_i=`cnt_i'+1
di ""
di as result "`name'" _continue
file write `tempfile' "
" "`name'" " | "
sum `jkvar' if `cntid'=="`l'" , meanonly
if `r(N)'>0 {
local jk=`r(mean)'
if `jk'<1 | `jk'>2 {
di as error "Please specify values 1 or 2 in `jkvar'"
exit 100
}
forvalues i=1(1)`n_over' {
if 1!=`n_over' di as text " `i'" _c
return clear
cap drop `probka'
if `overpv'==0 qui gen `probka'=1 `if' `cntid'=="`l'" & `over'==over_values[`i',1] `in'
else qui gen `probka'=1 `if' `cntid'=="`l'" `in'
preserve
qui keep if `probka'==1
keep `pvlist' `varlist' `weight' `rep'
qui count
if r(N)>0 {
local noc_`i'=`noc_`i''+1
if "`pv'"!="" {
foreach v of local pv {
local pvlist2="`v'1 `v'2 `v'3 `v'4 `v'5 `v'6 `v'7 `v'8 `v'9 `v'10"
local temp=over_values[`i',1]
_piaacdespv `pvlist2', overpv(`oldover') overval(`temp') weight(`weight') rep(`rep') jk(`jk') stats(`stats') centile(`centile')
matrix b=r(est)
matrix se=r(se)
mat b_`v'_over`i'[`cnt_i',1]=b
mat se_`v'_over`i'[`cnt_i',1]=se
local col=1
foreach stat in `stats' `centile' {
local b=b[1,`col']
local se=se[1,`col']
local col=`col'+1
if `b'==0 & `se'==0 file write `tempfile' `" 0 | "' `" 0 | "'
else {
local mean=string(round(`b',`decimal'),"%20.`round'f")
local se=string(round(`se',`decimal'),"%20.`round'f")
file write `tempfile' `" `mean' | "' `"`se' | "'
local OECD_`v'_`stat'_`i'=`OECD_`v'_`stat'_`i''+`mean'
local OECD_SE_`v'_`stat'_`i'=`OECD_SE_`v'_`stat'_`i''+`se'^2
}
}
}
}
if "`varlist'"!="" & `overpv'==1 {
foreach v in `varlist' {
local pvlist2="`v' `v' `v' `v' `v' `v' `v' `v' `v' `v'"
return clear
local temp=over_values[`i',1]
_piaacdespv `pvlist2', overpv(`oldover') overval(`temp') weight(`weight') rep(`rep') jk(`jk') stats(`stats') centile(`centile')
matrix b=r(est)
matrix se=r(se)
mat b_`v'_over`i'[`cnt_i',1]=b
mat se_`v'_over`i'[`cnt_i',1]=se
local col=1
foreach stat in `stats' `centile' {
local b=b[1,`col']
local se=se[1,`col']
local col=`col'+1
if `b'==0 & `se'==0 file write `tempfile' `" 0 | "' `" 0 | "'
else {
local mean=string(round(`b',`decimal'),"%20.`round'f")
local se=string(round(`se',`decimal'),"%20.`round'f")
file write `tempfile' `" `mean' | "' `"`se' | "'
local OECD_`v'_`stat'_`i'=`OECD_`v'_`stat'_`i''+`mean'
local OECD_SE_`v'_`stat'_`i'=`OECD_SE_`v'_`stat'_`i''+`se'^2
}
}
}
}
if "`varlist'"!="" & `overpv'==0 {
_piaacdes `varlist', weight(`weight') rep(`rep') jk(`jk') stats(`stats') centile(`centile')
matrix b=r(coef)
matrix se=r(se)
local row=0
foreach var in `varlist' {
local col=0
local row=`row'+1
mat b_`var'_over`i'[`cnt_i',1]=b[`row',1..`n_stats']
mat se_`var'_over`i'[`cnt_i',1]=se[`row',1..`n_stats']
foreach stat in `stats' `centile' {
local col=`col'+1
local b=b[`row',`col']
local se=se[`row',`col']
if `b'==0 & `se'==0 file write `tempfile' `" 0 | "' `" 0 | "'
else {
local mean=string(round(`b',`decimal'),"%20.`round'f")
local se=string(round(`se',`decimal'),"%20.`round'f")
file write `tempfile' `" `mean' | "' `"`se' | "'
local OECD_`var'_`stat'_`i'=`OECD_`var'_`stat'_`i''+`mean'
local OECD_SE_`var'_`stat'_`i'=`OECD_SE_`var'_`stat'_`i''+`se'^2
}
}
}
}
}
else {
di as error "no observations" _c
foreach stat in `stats' `centile' {
if "`pv'"!="" {
file write `tempfile' `". | "' `". | "'
}
foreach var in `varlist' {
file write `tempfile' `". | "' `". | "'
}
}
}
restore
}
}
}
}
if `OECD_saved'==0 {
local pvvar="`pv'"+" `varlist'"
file write `tempfile' `"
Average | "'
forvalues i=1(1)`n_over' {
foreach v of local pvvar {
foreach stat in `stats' `centile' {
local mean=string(round(`OECD_`v'_`stat'_`i''/`noc_`i'',`decimal'),"%20.`round'f")
local se=string(round(sqrt(`OECD_SE_`v'_`stat'_`i'')/`noc_`i'',`decimal'),"%20.`round'f")
file write `tempfile' `" `mean' | `se' | "'
}
}
}
}
file write `tempfile' _n "
"
file close `tempfile'
di ""
di "Results saved in the `save'.html file"
* returning results matrices
* separate matrix with estimates for each variable and over category
* separate matrix with standard errors for each variable and over category
foreach var in `pv' `varlist' {
forvalues i=1(1)`n_over' {
matrix colnames b_`var'_over`i' = `stats' `centile'
matrix colnames se_`var'_over`i' = `stats' `centile'
matrix rownames b_`var'_over`i' = `country_list'
matrix rownames se_`var'_over`i' = `country_list'
}
if `n_over'==1 {
return matrix b_`var'=b_`var'_over1
return matrix se_`var'=se_`var'_over1
}
else {
forvalues i=1(1)`n_over' {
return matrix b_`var'_over`i'=b_`var'_over`i'
return matrix se_`var'_over`i'=se_`var'_over`i'
}
}
}
end
***********
cap program drop _cnt
program define _cnt, rclass
args l
local name="`l'"
if "`l'"=="36" local name="Australia"
if "`l'"=="40" local name="Austria"
if "`l'"=="124" local name="Canada"
if "`l'"=="203" local name="Czech Republic"
if "`l'"=="276" local name="Germany"
if "`l'"=="208" local name="Denmark"
if "`l'"=="724" local name="Spain"
if "`l'"=="233" local name="Estonia"
if "`l'"=="246" local name="Finland"
if "`l'"=="250" local name="France"
if "`l'"=="826" local name="England/N. Ireland (UK)"
if "`l'"=="372" local name="Ireland"
if "`l'"=="380" local name="Italy"
if "`l'"=="392" local name="Japan"
if "`l'"=="410" local name="Korea"
if "`l'"=="528" local name="Netherlands"
if "`l'"=="578" local name="Norway"
if "`l'"=="616" local name="Poland"
if "`l'"=="643" local name="Russian Federation"
if "`l'"=="703" local name="Slovak Republic"
if "`l'"=="752" local name="Sweden"
if "`l'"=="840" local name="United States"
if "`l'"=="56" local name="Flanders (Belgium)"
if "`l'"=="196" local name="Cyprus"
return local name "`name'"
end
**********************************************************
cap program drop _piaacdes
program define _piaacdes, rclass
syntax [varlist], weight(varlist max=1) ///
rep(varlist) jk(integer) [stats(string)] [centile(string)]
return clear
* SETUP
local num_rep : word count `rep'
local n_centile: word count `centile'
local n_stat: word count `stats'
local n_stats: word count `stats' `centile'
local n_vars: word count `varlist'
if `n_centile' >0 {
local namestats "`stats'"
foreach numc of local centile {
local namestats "`namestats' p`numc'"
}
}
matrix coef = J(`n_vars',`n_stats',.)
matrix colnames coef =`namestats'
matrix rownames coef =`varlist'
matrix se= J(`n_vars',`n_stats',.)
matrix colnames se =`namestats'
matrix rownames se =`varlist'
local ithavar=0
foreach var of varlist `varlist' {
local ++ithavar
qui: sum `var' [iweight=`weight'], `meanonly'
foreach stat of local stats {
local fulstat_`stat'=r(`stat')
local sumofsq_`stat'=0
}
foreach replication of local rep {
qui: sum `var' [iweight=`replication'], `meanonly'
foreach stat of local stats {
local sq_`stat' = (r(`stat') - `fulstat_`stat'')^2
local sumofsq_`stat'=`sumofsq_`stat''+ `sq_`stat''
}
}
foreach stat of local stats {
if `jk'==1 local sdr_`stat'=sqrt(((`num_rep'-1)/`num_rep')*`sumofsq_`stat'')
else local sdr_`stat'=sqrt(1*`sumofsq_`stat'')
}
local icol=0
foreach stat of local stats {
local ++icol
matrix coef[`ithavar',`icol'] = `fulstat_`stat''
matrix se[`ithavar',`icol'] = `sdr_`stat''
}
}
***** centiles
if `n_centile' >0 {
local ithavar=0
foreach var of varlist `varlist' {
local ++ithavar
qui _pctile `var' [iweight=`weight'], p(`centile')
forvalues stat =1(1)`n_centile' {
local fulstat_`stat'=r(r`stat')
local sumofsq_`stat'=0
}
foreach replication of local rep {
qui _pctile `var' [iweight=`replication'], p(`centile')
forvalues stat =1(1)`n_centile' {
local sq_`stat' = (r(r`stat') - `fulstat_`stat'')^2
local sumofsq_`stat'=`sumofsq_`stat''+ `sq_`stat''
}
}
forvalues stat =1(1)`n_centile' {
if `jk'==1 local sdr_`stat'=sqrt(((`num_rep'-1)/`num_rep')*`sumofsq_`stat'')
else local sdr_`stat'=sqrt(1*`sumofsq_`stat'')
}
local icol=`n_stat'
forvalues stat =1(1)`n_centile' {
local ++icol
matrix coef[`ithavar',`icol'] = `fulstat_`stat''
matrix se[`ithavar',`icol'] = `sdr_`stat''
}
}
}
return matrix coef =coef
return matrix se =se
end
***********************************************
cap program drop _piaacdespv
program define _piaacdespv, rclass
syntax [varlist(numeric)], [overpv(string) overval(string)] weight(varlist max=1) ///
rep(varlist) jk(integer) [stats(string)] [centile(string)]
if "`overpv'"=="pvlit" | "`overpv'"=="pvnum" {
tempvar proflevel1 proflevel2 proflevel3 proflevel4 proflevel5 proflevel6 proflevel7 proflevel8 proflevel9 proflevel10
forvalues i=1(1)10 {
qui recode `overpv'`i' (.=0) (0/175.9999=1) (176/225.9999=2) (226/275.9999=3) ///
(276/325.9999=4) (326/375.9999=5) (376/999=6), gen(`proflevel`i'')
}
local overpv=1
}
else if "`overpv'"=="pvpsl" {
tempvar proflevel1 proflevel2 proflevel3 proflevel4 proflevel5 proflevel6 proflevel7 proflevel8 proflevel9 proflevel10
forvalues i=1(1)10 {
qui recode pvpsl`i' (.=0) (0/240.9999=2) (241/290.9999=3) (291/340.9999=4) ///
(341/999=5), gen(`proflevel`i'')
}
local overpv=1
}
else local overpv=0
tempvar probkaif
local num_rep : word count `rep'
local n_centile: word count `centile'
local n_stat: word count `stats'
local n_stats: word count `stats' `centile'
local n_vars: word count `varlist'
local num_pv : word count `varlist'
local num_rep : word count `rep'
if `n_centile' >0 {
local namestats "`stats'"
foreach numc of local centile {
local namestats "`namestats' p`numc'"
}
}
matrix pvstats = J(`num_pv',`n_stats',.)
matrix rstats = J(`num_pv',`n_stats',.)
matrix colnames pvstats =`namestats'
** PV loop
local pvcount=0
foreach var of varlist `varlist' {
di "." _c
local ++pvcount
cap drop `probkaif'
if `overpv'==1 qui gen `probkaif'=1 if `proflevel`pvcount''==`overval'
else local probkaif=1
qui sum `var' [iweight=`weight'] if `probkaif'==1, `meanonly'
foreach stat of local stats {
local fulstat_`stat'=r(`stat')
local sumofsq_`stat'=0
}
foreach replication of local rep {
qui: sum `var' [iweight=`replication'] if `probkaif'==1, `meanonly'
foreach stat of local stats {
local sq_`stat' = (r(`stat') - `fulstat_`stat'')^2
local sumofsq_`stat'=`sumofsq_`stat''+ `sq_`stat''
}
}
foreach stat of local stats {
if `jk'==1 local rVAR_`stat'=(((`num_rep'-1)/`num_rep')*`sumofsq_`stat'')
else local rVAR_`stat'=(1*`sumofsq_`stat'')
}
local l=0
foreach stat of local stats {
local ++l
matrix rstats[`pvcount',`l'] = `rVAR_`stat''
matrix pvstats[`pvcount',`l'] = `fulstat_`stat''
}
}
**** centiles
if `n_centile' >0 {
local pvcount=0
`rVAR_`stat''
foreach var of varlist `varlist' {
local ++pvcount
cap drop `probkaif'
if `overpv'==1 qui gen `probkaif'=1 if `proflevel`pvcount''==`overval'
else local probkaif=1
qui _pctile `var' [iweight=`weight'] if `probkaif'==1, p(`centile')
forvalues stat =1(1)`n_centile' {
local fulstat_`stat'=r(r`stat')
local sumofsq_`stat'=0
}
foreach replication of local rep {
qui _pctile `var' [iweight=`replication'] if `probkaif'==1, p(`centile')
forvalues stat =1(1)`n_centile' {
local sq_`stat' = (r(r`stat') - `fulstat_`stat'')^2
local sumofsq_`stat'=`sumofsq_`stat''+ `sq_`stat''
}
}
forvalues stat =1(1)`n_centile' {
if `jk'==1 local rVAR_`stat'=(((`num_rep'-1)/`num_rep')*`sumofsq_`stat'')
else local rVAR_`stat'=(1*`sumofsq_`stat'')
}
local l=`n_stat'
forvalues stat =1(1)`n_centile' {
local ++l
matrix rstats[`pvcount',`l'] = `rVAR_`stat''
matrix pvstats[`pvcount',`l'] = `fulstat_`stat''
}
}
}
qui: mata: compPVvar()
matrix colnames meanPV =`namestats'
matrix colnames SDR =`namestats'
return matrix est =meanPV
return matrix se =SDR
end
************
version 10
clear mata
mata:
function compPVvar()
{
mata:
pvs=st_matrix("pvstats")
repVAR=mean(st_matrix("rstats"))
pvdif=pvs:-mean(pvs)
sumofsqpv=pvdif'pvdif
VAR=1.1:*sumofsqpv:/9
pvVAR= diagonal(VAR)'
totVAR=repVAR+pvVAR
SDR=totVAR:^0.5
st_matrix("SDR", SDR)
st_matrix("meanPV", mean(pvs))
}
end