*! version 1.1 März 17, 2009 @ 10:23:49 UK *! Generates Variable holding the frequency of respective sequence-type * History * version 1.0 Distributed on SSC * version 1.1 All filenames embedded in comp. double quotes program _gsqfreq version 9 gettoken type 0 : 0 gettoken h 0 : 0 gettoken eqs 0 : 0 syntax [varname(default=none)] [if] [in] /// [, so se gapinclude SUBSEQuence(string) * ] // Sq-Data if "`_dta[SQis]'" == "" { di as error "data not declared as SQ-data; use -sqset-" exit 9 } // if/in if "`if'" != "" { tokenize "`if'", parse(" =+-*/^~!|&<>(),.") while "`1'" != "" { capture confirm variable `1' if !_rc { local iflist "`iflist' `1'" } macro shift } } if "`iflist'" != "" CheckConstant `iflist', stop marksample touse, novarlist if "`subsequence'" != "" quietly replace `touse' = 0 if !inrange(`_dta[SQtis]',`subsequence') quietly { tempfile orig reshaped save `"`orig'"' // Drop Sequences with Gaps if "`gapinclude'" == "" { tempvar lcensor rcensor gap by `_dta[SQiis]' (`_dta[SQtis]'), sort: gen `lcensor' = sum(!mi(`_dta[SQis]')) by `_dta[SQiis]' (`_dta[SQtis]'): gen `rcensor' = sum(mi(`_dta[SQis]')) by `_dta[SQiis]' (`_dta[SQtis]'): /// replace `rcensor' = ((_N-_n) == (`rcensor'[_N]-`rcensor'[_n])) & mi(`_dta[SQis]') by `_dta[SQiis]' (`_dta[SQtis]'): /// gen `gap' = sum(mi(`_dta[SQis]') & `lcensor' & !`rcensor') by `_dta[SQiis]' (`_dta[SQtis]'): /// replace `touse' = 0 if `gap'[_N]>0 } keep if `touse' if _N == 0 { noi di as text "(No observations)" exit } if "`so'" == "so" { by `_dta[SQiis]' (`_dta[SQtis]'), sort: /// keep if `_dta[SQis]' ~= `_dta[SQis]'[_n-1] by `_dta[SQiis]' (`_dta[SQtis]'): replace `_dta[SQtis]' = _n } if "`se'" == "se" { by `_dta[SQiis]' `_dta[SQis]', sort: keep if _n == 1 by `_dta[SQiis]' (`_dta[SQis]'): replace `_dta[SQtis]' = _n } // Reshape to Wide // --------------- keep `_dta[SQiis]' `_dta[SQtis]' `_dta[SQis]' reshape wide `_dta[SQis]', i(`_dta[SQiis]') j(`_dta[SQtis]') // Generate Variable by `_dta[SQis]'*, sort: gen `h' = _N // Store and merge back // -------------------- keep `_dta[SQiis]' `h' sort `_dta[SQiis]' save "`reshaped'" use `"`orig'"' tempvar sorter gen `sorter' = _n sort `_dta[SQiis]' merge `_dta[SQiis]' using `"`reshaped'"' assert _merge != 2 drop _merge sort `sorter' // Labels if "`so'" == "" & "`ss'" == "" { label variable `h' "Frequency of sequence-pattern" } if "`so'" != "" & "`ss'" == "" { label variable `h' "Frequency of SO sequence-pattern" } if "`so'" == "" & "`ss'" != "" { label variable `h' "Frequency of SS sequence-pattern" } char _dta[SQlength] "`_dta[SQlength]' $EGEN_Varname" } end program CheckConstant, rclass syntax varlist(default=none) [, stop] sort `_dta[SQiis]' foreach var of local varlist { capture by `_dta[SQiis]': assert `var' == `var'[_n-1] if _n != 1 if _rc & "`stop'" == "" { di as res "`var'" as text " is not constant over time; not used" local varlist: subinstr local varlist "`var'" "", word } if _rc & "`stop'" != "" { di as error "`var' is not constant over time" exit 9 } } return local checked "`varlist'" end