*! part of -mpitb- the MPI toolbox

cap program drop _mpitb_refsh
program define _mpitb_refsh 
	* di "mpitb refsh was run!"
	syntax using/  , id(name) [clear Path(string) File(string) NEWFiles UPDate(namelist) /// 
		sid(name) Keep(namelist) Char(namelist) Depind(string) GENTvar(name)] 

	/* ToDo:   
		- subgroup / sid option  
		- varlist */

	* input checks 
	if "`clear'" != "" & ("`newfiles'" != "" | "`update'" != "") | ("`newfiles'" != "" & "`update'" != "") {
		di as err "Please choose only one of options {bf:clear}, {bf:update}, or {bf:newfiles}!"
		e 198
	}
	if "`clear'" == "" & "`update'" == "" & "`newfiles'" == "" {
		di as err "One option of {bf:clear}, {bf:update}, or {bf:newfiles} is required!"
		e 198
	} 
	if "`gentvar'" != "" {
		loc allvars `keep' `char'
		loc tinall : list gentvar in allvars 
		if `tinall' == 0 {
			di as err "tvar {bf:`gentvar'} neither found in {bf:keep()} nor in {bf:char()}"
			exit 197
		}
	}
	if "`path'`file'" == "" | ("`path'" != "" & "`file'" != "") {
		di as err "Please specify one of {bf:path} or {bf:file} option."
		exit 198
	}
	if "`file'" != "" & "`update'`newfiles'" != "" {
		di as err "Option {bf:file} may not be combined with options {bf:update} or {bf:newfiles}."
		exit 198
	}
	
	
*	if inlist("`=substr("`path'",-1,. )'","/","\") {
*		di as err "`path' is not correctly specified! Please remove slash!"	// make sure path is directory
*		err 198
*	}

	if "`path'" != "" {
		m: st_numscalar("direxists", direxists("`path'")) 						
		if scalar(direxists) == 0 {
			di as err "directory {bf:`path'} does not exist!" 
			e 601 
		}
		
		if "`clear'" == "" {				// => newfiles or update
			conf f `using'								// refsh exists?
			use `using' , clear
			conf v `id'								// id var exists?
		}
		* assembling file lists 
		if "`update'" == "" {				// => newfiles or clear
			loc fl : dir "`path'" file "*.dta" , respectcase				// full file list		
		}

		if "`update'" != "" {
			tempvar upd 	
			qui gen `upd' = .
			foreach c in `update' {
				qui count if `id' == "`c'"
				if r(N) == 0 {
					di as err "Country {bf:`c'} not found in reference sheet."
					e 119
				}
				qui replace `upd' = 1 if `id' == "`c'"
				qui levelsof fname if `upd' == 1 , l(nfl) c
			}
			loc fl `nfl'
		}

		if "`newfiles'" != "" {
			qui levelsof fname , l(ofl) 						
			loc fl : list fl - ofl
			if `"`fl'"' == "" {
				di as txt _n "No new files found. Exiting..."
				e 
			}
			* di `" `fl' "'
		}
	}
	if "`file'" != "" {
		loc fl `file'
		loc path .
	}
	
	* process individual micro data files
	foreach f in `fl' {
		loc f = subinstr("`f'",".dta","",.)
		use "`path'/`f'" , clear
		di as txt "Note: processing " as res "`f'.dta" as txt " now."

		* check keep variables being 
		if "`keep'" != "" {
			foreach v of varlist `keep' {						
				qui count if mi(`v')						// MV in ID vars?
				if r(N) > 0 {
					di as txt "Note: {bf:`v'} has missing values being dropped now."	// report existence of MV
					drop if mi(`v')
				}
				sort `v'
				cap assert `v'[1] == `v'[_N]					// test for id vars to be constant
				if _rc != 0 {
					di as err "variable {bf:`v'} not constant for all obs!"
					e 9 
				}
			}
		}
		
		* gen vars from chars
		if "`char'" != "" { 
			foreach c of loc char {
				qui gen `c' = "`_dta[`c']'" // di "`c'"
				if ("`_dta[`c']'" == "") di as txt "Note: char" as res " `c' " as txt "not found."
			}
			*loc keep `keep' `char'						// all chars are automatically kept
		}
		conf v `id' `keep' `char'   						// confirm vars exists
		if "`sid'" != "" {
			cap conf v `sid'
			if _rc != 0 {						// introduced for COT
				loc nosid `nosid' `f'
				di as txt "Skipping " as res " `f' " as txt " since sid var is missing." _n
				continue 
			}
		}
		* best place ?
		if "`depind'" != "" {
			_mpitb_missvars , ind(`depind') // sub(region agec4 area)		// make miss-var options accessible through -refsh- options
			loc Nind `r(NMind)'
			loc misind "`r(misind)'"
		}	
		* OLD PLACE FOR LOOP: check for missings and constants (over id originally)
		
	
		*loc cty "`_dta[ccty]'" 			// `id'[1]			// obtain country code (data comes sorted)
			// make above optional: (i) main id in data, (ii) recovered from char, (iii) exclusively provided by user
		tempfile `id' //`cty'

		qui duplicates drop `id' `sid' , force				// reduce data
		keep `id' `sid' `keep' `char'
		
		if "`sid'" != "" {
			if "`: val lab `sid''" != "" {
				decode `sid' , gen(`sid'_name)			// only for cty that allow disaggregation
				lab var `sid'_name "name in c-data"
			}
			lab var `sid' "code in c-data"
		}
		
		gen fname = "`f'"
		gen fdate = Clock("`c(filedate)'","DMY hm")
		gen adate = Clock("`c(current_date)' `c(current_time)'","DMY hms")
		format ?date %tcdd_Mon_CCYY_HH:MM
		
		if "`depind'" != "" {
			gen Nind = `Nind'
			qui gen misind = "`misind'"
		}
		
		qui save ``id''	// cty
		loc slist `slist' ``id'' // cty						// tempfiles saved
		* di "Note: processing " as res "`f'" as txt " completed." _n
		* di as txt  "Done." _n
	}

	* assemble reference sheet
	if "`clear'" != "" {
		di as txt "Note: creating reference sheet now."
		clear				
		qui save "`using'" , empty replace
	}

	if "`clear'" ==  "" {							// => update OR newfiles
		qui use "`using'" , clear
	}

	tempvar appd
	qui append using `slist' , gen(`appd')				// dummy appended: 0=master, 1=first file, 2=second file, etc

	qui levelsof `id' if `appd' > 0 , c l(cappd)			// all countries finally appended
	loc Nappd : word count `cappd'

	if "`update'" == "" {								// =>  clear or new
		di as txt _n "Note: Countries added to reference sheet: " as res "`Nappd'" as txt "." _n "(`cappd')"
	}	
	if "`update'" != "" {
		foreach c of loc cappd {
			qui drop if `id' == "`c'" & `appd' ==  0
		}
		di as txt _n "Note : Countries updated: " as res "`Nappd'" as txt " (`cappd')."
	}

	* COT:
	if "`gentvar'" != "" {
		conf var `gentvar'
		conf new v t		// allow option to change name?
		conf new v T
		qui count if mi(`gentvar')
		if r(N) != 0 {
			di as err "Encountered missings in `gentvar'!"
			e 
		}
		tempvar nid
		qui {
			bys `id' `gentvar' : gen `nid' = 1 if _n == 1		
			bys `id' : gen t = sum(`nid')
			bys `id' : egen T = max(t)						
		}
	}
	* report countries skipped entirely
	if "`nosid'" != "" {
		loc Nnosid : word count `nosid'
		loc nosid : list sort nosid
		di as txt _n "Note: " as res "`Nnosid'" as txt " files not covered for lacking" /// 
			as res " `sid' " as txt " variable" _n as txt "(`nosid')." _n
	}
	
	* tidy up
	drop `appd'

	foreach v of varlist * {
	loc ilist: char `v'[]
		foreach i in `ilist' {
		    char `v'[`i']		// remove all characteristics attached to variables
		}
	}

	la drop _all				// remove all potential value labels


	lab var fname "file name of micro data"	
	lab var fdate "date of micro data (last save)"	
	lab var adate "date when added to reference sheet"
	/* infos to add as chars: 
	- path to micro data, 
	- country and sid ids */
	loc clist : char _dta[]
	foreach c of loc clist {
		char _dta[`c']
	}
	char _dta[type] "refsheet"
	label data `"GMPI reference sheet. Compiled on `c(current_date)'"'
	
	save "`using'" , replace

end

* make public tool, if needed
capture program drop _mpitb_missvars
program define _mpitb_missvars , rclass
	syntax , [INDicator(varlist numeric) Other(varlist numeric)]
	
	if "`indicator'" == "" & "`other'" == "" { 
		di as err "At least one of {bf:indicator()} and {bf:other()} has to be specified"
		err 197
	}
	
	if "`indicator'" != "" {}
		loc Nind : word count `indicator'
		foreach v of varlist `indicator' {
			qui count if !mi(`v')
			if (`r(N)' == 0) loc misind `misind' `v'
		}
		loc Nmind : word count `misind'
		loc N = `Nind' - `Nmind'
		di as txt "# indicator: {bf:`N'}, missing indicators: {bf:`misind'}."
		ret loc misind "`misind'"
		ret sca NMind = `N'			// non-missing indicator
	}
	if "`other'" != "" {
		foreach v of varlist `other' {
			qui count if !mi(`v')
			if (`r(N)' == 0) loc mv_`v' "has only missings"
			else if (`r(N)' == _N) loc mv_`v' "has no missings"
			else loc mv_`v' "has some missings (`=`=_N'-`r(N)'')"
			di as txt "{bf:`v'} `mv_`v''."
			ret loc mv_`v' "`mv_`v''"
		}
		
	}
end


exit