* Version 2.2.2 - 20 October 2015 * By J.M.C. Santos Silva & Silvana Tenreyro * Thanks to Markus Baldauf for the help in developing the initial version of this code * Please email jmcss@surrey.ac.uk for help and support * The software is provided as is, without warranty of any kind, express or implied, including * but not limited to the warranties of merchantability, fitness for a particular purpose and * noninfringement. In no event shall the authors be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, out of or in * connection with the software or the use or other dealings in the software. program define ppml, eclass // Defines program name version 11.1 // Stata version // Command syntax syntax varlist(numeric min=2) [if] [in] [aweight pweight fweight iweight] [, CLuster(string) TECHnique(string) /// STrict mu(string) Keep ITERate(integer 5000) DIFficult search TRace GRADient showstep HESSian /// SHOWTOLerance fisher(integer 1) TOLerance(real 1e-6) LTOLerance(real 1e-7) NRTOLerance(real 1e-5) /// NONRTOLerance OFFset(string) from(string) NOCONstant CHECKonly] marksample touse markout `touse' `cluster', s // Defines observations to use tempname logy y _rhs _rhss _drop beta fit ll zeros yh // Temporary names used in the program gettoken y _rhs: varlist // Breaks varlist into y and x di di as txt "note: checking the existence of the estimates" qui gen `logy'=. if (`touse') // Creates regressand for first step qui replace `logy'=log(`y') if (`touse')&(`y'>0) // Modifies regressand for first step qui reg `logy' `_rhs' if (`touse')&(`y'>0) [`weight'`exp'], `noconstant' // Performs first step gen `zeros'=1 // Initialize observations selector local _drop "" // List of regressors to exclude local _rhss "" // List of regressors to include qui summarize `y' if (`touse') if r(max)>1000000 di as error "WARNING: `y' has very large values, consider rescaling" // Warn if there are large values foreach x of local _rhs{ // Start loop over all regressors: LOOP 1 if (_se[`x']==0) { // Try to include regressors dropped in the qui summarize `x' if (`touse') // first stage: LOOP 1.1 if r(max)>ln(1000000)|r(min)<-ln(1000000){ di as error "WARNING: `x' has very large values, consider rescaling or recentering" } qui summarize `x' if (`y'>0)&(`touse'), meanonly local _mean=r(mean) qui summarize `x' if (`y'==0)&(`touse') if (r(min)<`_mean')&(r(max)>`_mean')&("`strict'" == "") { // Include regressor if conditions met and local _rhss "`_rhss' `x'" // strict is off } else{ qui su `x' if `touse', d // Otherwise, drop regressor local _mad=r(p50) qui inspect `x' if `touse' qui replace `zeros'=0 if (`x'!=`_mad')&(r(N_unique)==2)&(`touse') // Mark observations to drop local _drop "`_drop' `x'" } } // End of LOOP 1.1 if _se[`x']>0 { // Include safe regressors: LOOP 1.2 qui summarize `x' if (`touse') if r(max)>ln(1000000)|r(min)<-ln(1000000){ // Warn if there are large values di as error "WARNING: `x' has very large values, consider rescaling or recentering" } local _rhss "`_rhss' `x'" } // End LOOP 1.2 } // End LOOP 1 qui su `touse' if `touse', mean // Summarize touse to obtain N local _enne=r(sum) // Save N qui replace `touse'=0 if (`zeros'==0)&("`keep'"=="")&(`y'==0)&(`touse') // Drop observations with perfect fit di // if keep is off local k_excluded : word count `_drop' // Number of variables causing perfect fit di as error "Number of regressors excluded to ensure that the estimates exist: `k_excluded'" if ("`_drop'" != "") di "Excluded regressors: `_drop'" // List dropped variables if any qui su `touse' if `touse', mean local _enne = `_enne' - r(sum) // Number of observations dropped di as error "Number of observations excluded: `_enne'" local _enne = r(sum) di _rmcoll `_rhss' [`weight'`exp'] if `touse', forcedrop `noconstant' // Remove collinear variables local _rhss "`r(varlist)'" if r(k_omitted) >0 di if ("`checkonly'" != "") { local w : word count `_rhss' ereturn clear ereturn post , esample(`touse') ereturn scalar N = `_enne' ereturn scalar k = `w'+("`noconstant'" == "") ereturn scalar k_omitted = r(k_omitted) // Post number of collinear variables ereturn scalar k_excluded = `k_excluded' // Post number of excluded variables ereturn local excluded(`_drop') // List of excluded regressors ereturn local included(`_rhss') // List of included regressors ereturn local cmd "ppml" // Post cmd on e exit } di as txt "note: starting ppml estimation" local k_omitted=r(k_omitted) local w : word count `cluster' // Counts arguments in option cluster if `w' >1 { // Do if number of arguments in cluster di as error "ERROR: Too many variables to cluster" // is valid exit } if `w'==0{ // Estimation with robust s.e.: LOOP 2 if ("`technique'" == "")|("`technique'" == "irls") glm `y' `_rhss' if `touse' [`weight'`exp'], family(poisson) /// // IRLS link(log) irls mu(`mu') noheader iterate (`iterate') notable vce(robust) `trace' `noconstant' offset(`offset') else glm `y' `_rhss' if `touse' [`weight'`exp'], family(poisson) link(log) technique(`technique') `difficult' /// // ML `search' noheader iterate (`iterate') notable vce(robust) `trace' `gradient' /// `showstep' `hessian' `showtolerance' fisher(`fisher') tolerance(`tolerance') offset(`offset') /// ltolerance(`ltolerance') nrtolerance(`nrtolerance') `nonrtolerance' from(`from') `noconstant' } // End of LOOP 2 if `w'==1{ // Estimation with cluster s.e.: LOOP 3 if ("`technique'" == "")|("`technique'" == "irls") glm `y' `_rhss' if `touse' [`weight'`exp'], family(poisson) /// // IRLS link(log) irls mu(`mu') noheader iterate (`iterate') notable vce(cluster `cluster') `trace' `noconstant' offset(`offset') else glm `y' `_rhss' if `touse' [`weight'`exp'], family(poisson) link(log) technique(`technique') `difficult' /// // ML `search' noheader iterate (`iterate') notable vce(cluster `cluster') `trace' `gradient' /// `showstep' `hessian' `showtolerance' fisher(`fisher') tolerance(`tolerance') offset(`offset') /// ltolerance(`ltolerance') nrtolerance(`nrtolerance') `nonrtolerance' from(`from') `noconstant' } // End of LOOP 3 di "Number of parameters: " e(k) // Display number of parameters di "Number of observations: " e(N) // Display number of observations matrix `beta' =e(b) // Start computation of predictions matrix score double `fit' = `beta' if `touse' // Get predictions if ("`technique'" == "")|("`technique'" == "irls"){ // If irls is used, compute ll: LOOP 4 qui gen double `ll' = e(N)*(-exp(`fit')+`y'*`fit'- lngamma(`y'+1)) if `touse' // N times individual log-likelihood qui summarize `ll' if `touse', meanonly // Get log-likelihood ereturn scalar ll = r(mean) // Post ll on e } // End of LOOP 4 di "Pseudo log-likelihood: " e(ll) // Display log-likelihood qui gen `yh'=exp(`fit') // Compute R2 qui corr `yh' `y' if (`touse') di "R-squared: " r(rho)^2 ereturn scalar r2 = r(rho)^2 // Post R2 on e if ("`strict'" == "") di "Option strict is: off" // Display option for strict else di "Option strict is: on qui su `y' if (`y'>0)&(`touse') // Start check for overfitting local _lbp=r(min) qui su `yh' if (`y'==0)&(`touse') if (r(min)<1e-6*`_lbp') di as error "WARNING: The model appears to overfit some observations with `y'=0" // End check for overfitting ereturn display // Display results table ereturn scalar k_omitted = `k_omitted' // Post number of collinear variables ereturn scalar k_excluded = `k_excluded' // Post number of excluded variables ereturn local excluded(`_drop') // List of excluded regressors ereturn local included(`_rhss') // List of included regressors ereturn local cmd "ppml" // Post cmd on e end // End program