*! version 1.0.5  C F Baum / R Sperling
* 0724: add lags numlist
* 1204: add trend option
* 1205: add Ghysels et al. crit vals for F1234, F234
* 1610: correct defn of macro aug
* 1812: add noTest and Size command line options and sequential t-test for optimal lags (version 7 now required)
* 1813: add case 6 for seasonal trends and fixed CVs for reversed pi3 and pi4
* 1813: add CVs for n=48 and n=200 and interpolation routine

program define hegy4, rclass
	version 7
* lags numlist rather than integer 0
	syntax varname(ts) [if] [in] [ , Lags(numlist >0) Det(string) GENerate(string) noTest LEVel(integer $S_level) ]

    local generat = cond("`generate'" == "", "", "`generate'") 
    	if "`generat'" != "" {
        capture confirm new variable `generat' 
        if _rc { 
                di in r "`generat' already exists: " _c  
                di in r "specify new variable with generate( ) option"
                exit 110 
        } 
        }

   	marksample touse
			/* get time variables */
	_ts timevar, sort
	markout `touse' `timevar'
	tsreport if `touse', report
	if r(N_gaps) {
		di in red "sample may not contain gaps"
		exit
	}
	qui tsset
	if r(unit1) != "q" {
		di in red "hegy4 must be used with quarterly data"
		exit
		}
	local tmin=r(tmins)
	local tmax=r(tmaxs)
	tempvar count trd patn seas1 x y1 y2 y3 y4 y5 et 
	tempname b v cv
* seas (seasonal dummies and constant) default
	local aux "err"
	if "`det'"=="none" {
		local aux ", noc"
		local daux "None"
		local case 1
		}	
	if "`det'"=="const" {
		local aux " "
		local daux "Constant"
		local case 2
		}
	if "`det'"=="" | "`det'" =="seas" {
		local aux "L(1/3).`seas1' " 
		local daux "Seasonal dummies + constant"
		local case 3	
		}
	if "`det'"=="trend" {	
		local aux "`trd'" 
		local daux "Constant + trend"
		local case 4
		}
	if "`det'"=="strend" {	
		local aux "L(1/3).`seas1' `trd'" 
		local daux "Seasonal dummies + constant + trend"
		local case 5
		}
	if "`det'"=="mult" {
		local aux "`st1' `st2' `st3' `trd'"
		local daux "Seasonal dummies + constant + seasonal trends"
		local case 6
		}
	if "`aux'"=="err" {
		di in red "Error: det must be none, const, seas, trend, strend or mult"
		exit
		}
	qui gen `count'=sum(`touse')
	local nobs=`count'[_N]
	local aug ""
	if "`lags'" != "" { 
		numlist "`lags'", int miss sort
		local lags="`r(numlist)'"
		tokenize `lags'
		local i 1
      	while "``i''" != "" {
			local numlags = `i'
			local maxlag = ``i''
			local aug  "`aug'  L``i''.`y5' "
			local i = `i' +1
		}
	}
	else {local lags = "none"}
	drop `count'
	qui gen `count'=sum(`touse')
	local nobs=`count'[_N]
* nobs reflects the max lag provided or calculated 
	gen double `trd' = _n
	egen `patn' = fill(1/4 1/4)
	qui gen `seas1'= (`patn'==1)
* calculate seasonal trends for case 6
	if "`det'"=="mult" {
		forv i = 1/3 {local st`i' = L`i'.`seas1'*`trd'}
	}
* generate y1..y5; reverse 3,4 per HEGY, Franses p64-65; also must switch CVs
	qui { gen double `x' = `varlist' if `touse'
	gen double `y1' =  L.`x' + L2.`x' + L3.`x' + L4.`x'
	gen double `y2' = -L.`x' + L2.`x' - L3.`x' + L4.`x'
	gen double `y3' = -L2.`x' + L4.`x'
	gen double `y4' = -L.`x' + L3.`x'
	gen double `y5' = `x' - L4.`x'
	}
* sequential t-test for largest lag
	local testflag "off"
	if "`aug'" !="" & "`test'" !="notest" {
		if `numlags' == `maxlag' {
			local testflag "on"
			local highlag 0
			forv i = `maxlag'(-1)1 {
				qui {
					reg `y5' `y1' `y2' `y3' `y4' `aug' `aux'
					test L`i'.`y5'
				}
				if `r(p)' <= (1 - `level'/100) {
					local highlag = `i'
					continue,break
				}
			}
* include only individually significant lags
			local userlags `lags'
			if `highlag'>0 {
				local aug ""
				local laglist ""
				forv j = 1/`highlag' {
					qui test L`j'.`y5'
					if `r(p)' <= (1 - `level'/100) {
						local aug "`aug' L`j'.`y5' "
						local laglist "`laglist' `j' "
					}
				}
* modify list of lags to include only individually significant lags
				numlist "`laglist'", int
				local lags="`r(numlist)'"
			}
* sequential t-test detected no significant lags
			else {
				local aug ""
				local lags="none"
			}
		}
	}
* run the basic regression to generate pi(1)..pi(4)
 	qui {
	reg `y5' `y1' `y2' `y3' `y4' `aug' `aux' 
	predict double `et',r
	mat `b'=e(b)
	mat `v'=e(V)
	local n=e(N)
	local j 1
	while `j'<5 {
		 return scalar pi`j' =`b'[1,`j']/sqrt(`v'[`j',`j'])
		 local j = `j'+1
		 } 
* joint (F) test on pi(3), pi(4)
	test `y3' `y4'
	return scalar F34 = `r(F)'
* joint (F) test on pi(2) - pi(4)
	test `y2' `y3' `y4'
	return scalar F234 = `r(F)'
* joint (F) test on pi(1) - pi(4)
	test `y1' `y2' `y3' `y4'
	return scalar F1234 = `r(F)'
	GetCritH `case' `n'
	mat `cv'= r(cv)
	return scalar N=`n'
	return local lags `lags'
	return local detvar `daux'
	return local varlist `varlist'
*	local n4=`n'+4+`lags'
      local n4=`n'
  	}
* run the auxiliary regression (for diagnostics) if generate specified
	if "`generat'" != "" {
		qui reg `et' L(1/4).`et' `y1' `y2' `y3' `y4' `aug' `aux' if `touse'
		qui predict `generat',r
		}
	di in gr " "
	di in gr "HEGY Quarterly seasonal unit root test for " in ye  "`varlist'"
*	summ `varlist' if e(sample)
*	summ `y5' `y1' `y2' `y3' `y4' `aug' `aux' if e(sample)
	di in gr " "
	di in gr "Number of observations  : `n4' " /* ( based on `tmin' - `tmax')*/  
	di in gr "Deterministic variables : `daux'"
*	di in gr "Lag length test: `testflag'"
	if "`testflag'"=="on" {di in gr "Lags tested: `userlags'"}
	di in gr "Augmented by lags : `lags'"
	di in gr " "
	di in gr _col(11) "Stat" _col(20) "5% critical" _col(34) "10% critical" 
	di in gr _dup(78) "-"
	di in ye "t[Pi1]" _col(7) %9.3f `return(pi1)' _col(19) %9.3f `cv'[1,1] /*
	*/ _col(33) %9.3f `cv'[1,2] 
	di "t[Pi2]" _col(7) %9.3f `return(pi2)' _col(19) %9.3f `cv'[1,3] /*
	*/ _col(33) %9.3f `cv'[1,4] 
	di "t[Pi3]" _col(7) %9.3f `return(pi3)' _col(19) %9.3f `cv'[1,5] /*
	*/ _col(33) %9.3f `cv'[1,6] 
	di "t[Pi4]" _col(7) %9.3f `return(pi4)' _col(19) %9.3f `cv'[1,7] /*
	*/ _col(33) %9.3f `cv'[1,8] 
	di " "
	di "F[3-4]" _col(7) %9.3f `return(F34)' _col(19) %9.3f `cv'[1,9] /*
	*/ _col(33) %9.3f `cv'[1,10]
	di "F[2-4]" _col(7) %9.3f `return(F234)' _col(19) %9.3f `cv'[1,11] /*
	*/ _col(33) %9.3f `cv'[1,12]
	di "F[1-4]" _col(7) %9.3f `return(F1234)' _col(19) %9.3f `cv'[1,13] /*
	*/ _col(33) %9.3f `cv'[1,14]

end

program define GetCritH, rclass
* values from HEGY Table 1 (blocks correspond to cases 1,2,3,4,5,6) for 48, 100 and 200 obs.
* concat. with Ghysels et al. CVs for F1234, F234, 100 obs. F stats: 5%, 10%
* CVs for case 6 from Smith and Taylor (1998).
	args c n
	tempname zt cv
	mat `zt'48 = (		-1.95,-1.59, -1.95,-1.60, -1.93,-1.52, -1.76,-1.35,  3.26, 2.45, 2.80, 2.21, 2.62, 2.11 \ /*
	*/			-2.96,-2.62, -1.95,-1.60, -1.90,-1.52, -1.72,-1.33,  3.04, 2.32, 2.63, 2.17, 3.47, 2.86 \ /*  
	*/			-3.08,-2.72, -3.04,-2.69, -3.61,-3.24, -1.98,-1.53,  6.60, 5.50, 6.06, 5.16, 5.96, 5.02 \ /*   
	*/			-3.56,-3.21, -1.91,-1.57, -1.92,-1.52, -1.70,-1.33,  2.95, 2.23, 2.67, 2.14, 4.28, 3.60 \ /*
	*/			-3.71,-3.37, -3.08,-2.73, -3.66,-3.28, -1.91,-1.48,  6.55, 5.37, 6.09, 5.13, 6.53, 5.71 \ /*
	*/			-3.39,-3.07, -3.38,-3.07, -4.21,-3.88, -1.76,-1.36,  9.98, 8.59, 9.57, 8.28, 9.38, 8.15)
        
	mat `zt'100 = (		-1.97,-1.61, -1.92,-1.57, -1.90,-1.53, -1.68,-1.32,  3.12, 2.39, 2.76, 2.21, 2.55, 2.11 \ /*
	*/			-2.88,-2.58, -1.95,-1.60, -1.90,-1.54, -1.68,-1.30,  3.08, 2.35, 2.74, 2.18, 3.37, 2.86 \ /*  
	*/			-2.95,-2.63, -2.94,-2.63, -3.44,-3.14, -1.96,-1.53,  6.57, 5.56, 6.05, 5.18, 5.74, 4.95 \ /*   
	*/			-3.47,-3.16, -1.94,-1.60, -1.89,-1.54, -1.65,-1.28,  2.98, 2.31, 2.76, 2.15, 4.26, 3.58 \ /*
	*/			-3.53,-3.22, -2.94,-2.63, -3.48,-3.14, -1.94,-1.51,  6.60, 5.52, 5.99, 5.13, 6.47, 5.68 \ /*
	*/			-3.39,-3.09, -3.38,-3.08, -4.16,-3.86, -1.93,-1.50,  9.77, 8.49, 9.13, 8.02, 8.74, 7.83)

	mat `zt'200 = (		-1.94,-1.62, -1.95,-1.61, -1.92,-1.55, -1.65,-1.30,  3.16, 2.42, 2.73, 2.22, 2.56, 2.12 \ /*
	*/			-2.87,-2.57, -1.92,-1.59, -1.90,-1.53, -1.66,-1.29,  3.12, 2.37, 2.75, 2.19, 3.32, 2.81 \ /*  
	*/			-2.91,-2.59, -2.89,-2.60, -3.38,-3.07, -1.96,-1.54,  6.61, 5.56, 5.99, 5.14, 5.58, 4.87 \ /*   
	*/			-3.44,-3.15, -1.95,-1.62, -1.92,-1.56, -1.66,-1.29,  3.07, 2.34, 2.72, 2.17, 4.12, 3.55 \ /*
	*/			-3.49,-3.18, -2.91,-2.60, -3.41,-3.10, -1.92,-1.48,  6.57, 5.56, 5.89, 5.10, 6.38, 5.61 \ /*
	*/			-3.40,-3.11, -3.40,-3.11, -4.17,-3.88, -2.00,-1.57,  9.75, 8.51, 8.97, 8.00, 8.55, 7.71)

	if `n'<=48 {matrix `cv'=`zt'48[`c',1...]}
	else if `n'>48 & `n'<100 {
		local n1 48
		local n2 100
		Interp `n1' `n2' `n' `zt'48[`c',1...] `zt'100[`c',1...]
		matrix `cv'=r(cvi)
	}
	else if `n'>101 & `n'<200 {
		local n1 100
		local n2 200
		Interp `n1' `n2' `n' `zt'100[`c',1...] `zt'200[`c',1...]
		matrix `cv'=r(cvi)
	}
	else if `n'>=200 {matrix `cv'=`zt'200[`c',1...]}
	else 	if `n'==100 {matrix `cv'=`zt'100[`c',1...]}
	return matrix cv `cv'
end

program define Interp, rclass
* Interpolates critical values
	args n1 n2 n lo hi
	tempname cvi

	matrix `cvi' = (0,0,0,0,0,0,0,0,0,0,0,0,0,0)
	matrix `cvi'=`lo' + (`n'-`n1')/(`n2'-`n1')*(`hi'-`lo')
	return matrix cvi `cvi'
end
exit