/********************************************************************************

	These are functions used across SCTO commands in the sctotools package


********************************************************************************/


**Funtion to test that a folder exist. If two folders are passed as arguments, then
* both are tested that they exist, in addition to testing that they are not the same folder.
cap program drop 	sctotestfolder
	program define 	sctotestfolder

	syntax , ///
		folder1(string)  /// The folder path
		name1(string) 	 /// The folder name to be used in the error message
		[folder2(string) name2(string)] // Same as above but for optional second folder

	di "sctotestfolder syntax ok"

	*Set number of folders in syntax
	local numFolds 1
	if "`folder1'" != "" local numFolds 2

	*Test that each folder exist
	forvalues fNum = 1/`numFolds' {

		*Forward and back slash means the same but are not the same in string comparison
		local folder`fNum'	= subinstr("`folder`fNum''" , "\", "/", .)

		*Test that the folder exists
		mata : st_numscalar("r(dirExist)", direxists("`folder`fNum''"))
		if `r(dirExist)' == 0 {

			noi di as error `"{phang}The folder in `name`fNum''(`folder`fNum'') does not exist. You must enter the full path. For example, on most Windows computers it starts with {it:C:} and on most Mac computers with {it:/user/}.{p_end}"'
			error 693
			exit
		}
	}

	*If two folders specified, thest that they are not the same
	if `numFolds' == 2 & trim(lower("`folder1'")) == trim(lower("`folder2'")) {

		di as error "{phang}The `name1'(`folder1') folder may not be the same folder as the `name2'(`folder2') folder.{p_end}"
		error 198
		exit
	}

end



cap program drop 	test_file_name
	program define 	test_file_name, rclass

	qui {
		syntax, filename(string)

		if (regexm("`filename'","[A-Z][A-Z]_[a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9]-[a-z0-9][a-z0-9][a-z0-9][a-z0-9]-[a-z0-9][a-z0-9][a-z0-9][a-z0-9]-[a-z0-9][a-z0-9][a-z0-9][a-z0-9]-[a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9][a-z0-9]_PERIOD_[a-z0-9]+.csv") != 1) {

			noi di as error `"{phang}The file name [`filename'] is not on the expected format. the format SurveyCTO's server gives to these filem names are [XX_xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx_PERIOD_0+.csv] where X is any upper case letter, x is any lower case letter or any digit, and 0+ any number of digitis.{p_end}"'
			error 693
			exit
		}
	}

end

cap program drop 	sctocalculatestats
	program define 	sctocalculatestats, rclass

		syntax, file(string) [ btwnstr(string) quiet still moving dta csv keyvar]

			noi di "sctocalculatestats syntax ok!"

			*Import file data
			import delimited "`file'", clear

			*Test that the sensor stream file is on acceptable format
			test_data_format, filename("`file'")

			*The list of expected variables each sensor_stream data set
			local expected_varlist second count mean min max sd fieldname

			*Test that the file has all the variables expected
			cap confirm variable `expected_varlist'
			if _rc {

				noi di as error "{phang}The file `filecsv' does not have all of the variables [`expected_varlist'] that are expected in a sensor_stream output file.p_end}"
				error 198
				exit
			}

			*Reversed file name (this is a work around to allow code to work Stata versions <14)
			local fileReverse = strreverse("`file'")
			local forwSlsh = strpos("`fileReverse'","/")
			local backSlsh = strpos("`fileReverse'","\")

			*If only one type of slash used the other is zero and will make
			*the min() function incorrectly pick the 0 index in next step
			if `backSlsh' == 0 local backSlsh = `forwSlsh'
			if `forwSlsh' == 0 local forwSlsh = `backSlsh'

			*Get only the file name out of the file path
			local lastslash_fromRight = min(`forwSlsh',`backSlsh')
			local lastslash_fromLeft = strlen("`file'") - `lastslash_fromRight' + 2
			local filename = substr("`file'",`lastslash_fromLeft',.)

			*Get the filename without sensor prefix and file extension
			local filename_noext = substr(subinstr("`filename'",".csv", "", .),4,.)

			*Find the first underscore that splits the key and period number
			local first_undscore = strpos("`filename_noext'","_")

			*Get submission key from file name and add "uuid:" to match format on server
			local key = "uuid:" + substr("`filename_noext'",1,`first_undscore' -1)	//-1 to not include underscore

			*Get period length from file name,
			local period_from_filename = substr("`filename_noext'",`first_undscore' + 1, .) 		//+1 to not include underscore

			*Find the first underscore that splits the key and period number
			local second_undscore = strpos("`period_from_filename'","_")
			local period = substr("`period_from_filename'",`second_undscore' + 1, .)

			*Period and key only relevant for dta file (key is in csv file name)
			if "`dta'" != "" {

				*Extract key and period indication from file name and generate variable with that info
				gen key 	= "`key'"
				gen period 	= `period'

				*Order these vars to the front
				order key period

			}

			*Key variable is option in csv output as it takes space
			if "`csv'" != "" & "`keyvar'" != "" {

				*Extract key from file name and generate variable with that info
				gen key 	= "`key'"

				*Order these vars to the front
				order key `periodvar'
			}

			**********************
			*Calculate pct_categories equivalences variables

			*If option quiet is used generate var quiet
			if "`quiet'"  != "" {
				gen quiet = (mean < 25) & !missing(mean) //No values should be missing given current sensor_stream output, but good to include if that changes
				//label variable quiet "Time period has sound level below 25dB" //Not needed as outputted in csv where labels are not included
				local catvars "`catvars' pct_quiet = quiet"
			}

			*If option quiet is used generate var quiet
			if "`still'"  != "" {
				gen still = (mean < 0.15) & !missing(mean) //No values should be missing given current sensor_stream output, but good to include if that changes
				//label variable still "Time period has movement less than 0.25 m/s^2" //Not needed as outputted in csv where labels are not included
				local catvars "`catvars' pct_still = still"
			}

			*If option quiet is used generate var quiet
			if "`moving'" != ""{
				gen moving = (mean > 1.5) & !missing(mean) //No values should be missing given current sensor_stream output, but good to include if that changes
				//label variable moving "Time period has movement greater than 2 m/s^2" //Not needed as outputted in csv where labels are not included
				local catvars "`catvars' pct_moving = moving"
			}



			**********************
			*Calculate pct_between equivalences variables
			local counter = 0

			*Loop over all btwn varaibles. If no between option, then the btwnstring is empty, then the loop is skipped
			while ("`btwnstr'" != "" & `counter' < 1000) {

				local openindex  = strpos("`btwnstr'","[")
				local closeindex = strpos("`btwnstr'","]")

				local betweenname 	= substr("`btwnstr'", 1 ,`openindex' - 1)
				local betweeneq 	= substr("`btwnstr'", `openindex' + 1,`closeindex' - `openindex' - 1)

				gen `betweenname' = (`betweeneq')

				local btwnstr = trim(substr("`btwnstr'", `closeindex' + 1 , .))
				local ++counter
			}

end

** Test that the file has the correct format
cap program drop 	test_data_format
	program define 	test_data_format, rclass

	syntax , filename(string)


	****************************
	* Test that all expected variables exist

	*Create a list of all variables in the choice sheet
	ds
	local vars_in_this_file `r(varlist)'

	local expected_vars "second count mean min max sd fieldname"

	*Test that all required vars are actually in the survey sheets
	if `: list expected_vars in vars_in_this_file' == 0 {

		*Generate a list of the vars missing and display error
		local missing_vars : list expected_vars - vars_in_this_file
		noi di as error "{phang}The file [`filename'] does not have all the variables required. The variable(s) [`missing_vars'] are missing. Either download the file again, or remove the file.{p_end}"
		error 688
	}

	****************************
	* Test that there are observations in the data set
	if _N == 0 {
		noi di as error "{phang}The file [`filename'] does not have any observations. Either download the file again, or remove the file.{p_end}"
		error 688
	}

end



**This function parse the strings that are passed as values in
* the options llbetween(), mvbetween(), slbetween() and spbetween()
* and turn them into code that makes sense to Stata.
cap program drop 	parsebetween
	program define 	parsebetween, rclass

	syntax , prefix(string) between_options(string) usednames(string)

	*Create a lower case version of the prefix local
	local lc_prefix	= lower("`prefix'")

	*Remove excessive white space
	local btwn_list = trim(itrim("`between_options'"))

	*Start the counter
	local counter = 0

	*Parse through btwn_list and stop when it is empty (with a break for the unlikely case of infinite loops)
	while ("`btwn_list'" != "" & `counter' < 1000) {

		*Count the number of categories
		local ++counter

		*Get the index of the first square and the first round open bracket
		local opensquare = strpos("`btwn_list'","[")
		local openround  = strpos("`btwn_list'","(")

		*If there is no more open square or round index, set it to an index larger than any possible real index
		if `opensquare' == 0 	local opensquare = strlen("`btwn_list'") + 1
		if `openround' == 0 	local openround = strlen("`btwn_list'") + 1

		*Get the index of the first square and the first round closed bracket
		local closesquare = strpos("`btwn_list'","]")
		local closeround  = strpos("`btwn_list'",")")

		*If there is no more closed square or round index, set it to an index larger than any possible real index
		if `closesquare' == 0 	local closesquare = strlen("`btwn_list'") + 1
		if `closeround' == 0 	local closeround = strlen("`btwn_list'") + 1

		*Set the index of the first open bracket and first closed bracket regardless of square or round
		local openindex 	= min(`opensquare' ,`openround')
		local closeindex 	= min(`closesquare',`closeround')

		*Parse the individual components of the string for this between var
		local catvarname 	= substr("`btwn_list'", 1, `openindex' -1)
		local minbracket 	= substr("`btwn_list'", `openindex', 1)
		local minmaxstr 	= substr("`btwn_list'", `openindex' +1, (`closeindex' - `openindex') - 1 )
		local min			: word 1 of `minmaxstr'
		local max			: word 2 of `minmaxstr'
		local maxbracket 	= substr("`btwn_list'", `closeindex', 1)

		*Test that catvarname is a valid new name
		cap confirm new variable `catvarname'
		if _rc {

			di as error "{phang}The varname {it:`catvarname'} used in {inp:`lc_prefix'between(`between_options')} is not a valid varname, already in use or the varname is not immideatly followed by a ( or a [ sign.{p_end}"
			error 198
			exit
		}

		*Test that names are not already used in any other options
		if `:list catvarname in usednames' != 0 {

			di as error "{phang}The varname {it:`catvarname'} in {inp:`catvarname'`minbracket'`minmaxstr'`maxbracket'}  was already used for this sensor_stream or in another option. Names may not be duplicates across sensor streams or across options as all sensor varaibles are merged into one data set. Names may also not be the same name as a variable name used in any option with the prefix pct_.{p_end}"
			error 198
			exit
		}

		* Add name to list of used names
		local usednames "`usednames' pct_`catvarname' `catvarname'"

		if `:word count `minmaxstr'' > 2 | `=strpos("`minmaxstr'","(")' != 0 | `=strpos("`minmaxstr'","[")' != 0 {

			di as error "{phang}The value in {inp:`catvarname'`minbracket'`minmaxstr'`maxbracket'} used in {inp:`lc_prefix'between(`between_options')} is not enclosed properly. It must be enclosed in round or square brackets, like (), (], [) or []{p_end}"
			error 198
			exit
		}

		*Test that min and max are valid numbers
		foreach value in min max {
			cap confirm number ``value''
			if _rc {

				*The value is not a valid number, test if it is ? that is also valid
				if "``value''" == "?" {
					*The value is valid input but not a number
					local `value'num = 0
				}
				else {
					*The value is not a valid input
					di as error "{phang}The `value'imum value in {inp:`catvarname'`minbracket'`minmaxstr'`maxbracket'} used in {inp:`lc_prefix'between(`between_options')} is not a valid value. It must be a number or a question mark.{p_end}"
					error 198
					exit
				}
			}
			else {

				*The value is a valid number
				local `value'num = 1

				*Prepare the sign to use in the conditional statement for the valid number
				if "`value'" == "min" {
					local sqrbracket "["
					local rndbracket "("
					local rndsign ">"
					local sqrsign ">="
				}
				else {
					local sqrbracket "]"
					local rndbracket ")"
					local rndsign "<"
					local sqrsign "<="
				}

				*Prepare the sign to use based on square or round bracket
				if "``value'bracket'" == "`rndbracket'" {
					local `value'sign "`rndsign'"
					local `value'lbl  "(excl)"
				}
				else if "``value'bracket'" == "`sqrbracket'" {
					local `value'sign "`sqrsign'"
					local `value'lbl  "(incl)"
				}
				else {
					di as error "{phang}The value in {inp:`catvarname'`minbracket'`minmaxstr'`maxbracket'} used in {inp:`lc_prefix'between(`between_options')} must be enclosed in round or square brackets, like (), (], [) or [].{p_end}"
					error 198
					exit
				}
			}
		}

		*Prepare the conditional equation to be returned
		if `minnum' + `maxnum' == 0 {

			*Both min and max are ? which is not allowed
			di as error "{phang}There is no valid value in {inp:`catvarname'`minbracket'`minmaxstr'`maxbracket'} used in {inp:`lc_prefix'between(`between_options')}. Both values may not be a question mark.{p_end}"
			error 198
			exit
		}
		else if `minnum' + `maxnum' == 2 {

			*Both min and max are valid numbers

			*Test that min is smaller than max
			if `min' >= `max' {
				di as error "{phang}In {inp:`catvarname'`minbracket'`minmaxstr'`maxbracket'} used in {inp:`lc_prefix'between(`between_options')} the miniumum value is greater or equal to the maximum value which is not allowed.{p_end}"
				error 198
				exit
			}

			*Prepare the equation
			local cond_equ "mean `minsign' `min' & mean `maxsign' `max'"

			*Prepare part of variable label
			local label_values "between `min' `minlbl' and `max' `maxlbl'"
		}
		else if `minnum' == 1 {
			*Min is a valid number and max is a question marks, prepare equation with only min
			local cond_equ "mean `minsign' `min'"

			*Prepare part of variable label
			local label_values "between `min' `minlbl' and `max' `maxlbl'"

			*Prepare part of variable label
			local label_values "greater than `min' `minlbl'"
		}
		else if `maxnum' == 1 {
			*Max is a valid number and min is a question marks, prepare equation with only max
			local cond_equ "mean `maxsign' `max'"

			*Prepare part of variable label
			local label_values "less than `max' `maxlbl'"
		}

		*Return the varname and the conditional equation
		local btwn	"`btwn' `catvarname'[`cond_equ' & !missing(mean)]"

		*For .dta file, return the labels and the names
		return local btwnname`counter'		"`catvarname'"
		return local btwnlabels`counter'	"% (in decimals) `label_values' in `prefix'"

		local catvarnames "`catvarnames' `catvarname'"

		*Remove the part of the btwn_list string just parsed and repeat the while loop if applicable
		local btwn_list = trim(substr("`btwn_list'", `closeindex' + 1 , .))
	}

	*Return a string to be parsed where each var is on format: varname[mean < 25 & mean > 35 & !missing(mean)]
	return local btwn "`btwn'"

	*Return the number of between categories and the updated list of used names
	return local btwncount `counter'
	return local btwnnames = itrim(trim("`catvarnames'"))
	return local usednames "`usednames'"

end