*! version 1.0 December 22, 2006 @ 13:10:36 *! Calculate Dijkstra and Taris measurs of sequence distance *1.0 Initial version program sqdt, rclass version 9.1 syntax [if] [in] [, /// Alpha Beta Gamma /// REFseqid(string) full ] // SQ-Data // ------- if "`_dta[SQis]'" == "" { di as error "data not declared as SQ-data; use -sqset-" exit 9 } // Store definition of sqom-sample for sqclusterdat // ------------------------------------------------ char _dta[SQomsample] `if' `in' // Declarations // ------------ tempvar gap epiid epicount SQom SQid cutter N tempfile SQorig SQrefseq noshrink results // Error Checks // ------------ if "`name'" != "" confirm new variable `name' if "`alpha'" == "" & "`beta'" == "" & "`gamma'" == "" local alpha "alpha" capture assert ("`alpha'" != "") + ("`beta'" != "") + ("`gamma'" != "") == 1 if _rc { di as error "Only one of -alpha-, -beta-, -gamma- allowed" exit _rc } // Defaults // -------- if "`alpha'" == "alpha" local method 1 if "`beta'" == "beta" local method 2 if "`gamma'" == "gamma" local method 3 local full = cond("`full'"=="full",1,0) if !`full' capture drop _SQdist quietly { // Shrink the Data -> Speed // ------------------------ preserve marksample touse // Drop Sequences with Gaps if "`gapinclude'" == "" { tempvar lcensor rcensor gap by `_dta[SQiis]' (`_dta[SQtis]'), sort: gen `lcensor' = sum(!mi(`_dta[SQis]')) by `_dta[SQiis]' (`_dta[SQtis]'): gen `rcensor' = sum(mi(`_dta[SQis]')) by `_dta[SQiis]' (`_dta[SQtis]'): /// replace `rcensor' = ((_N-_n) == (`rcensor'[_N]-`rcensor'[_n])) & mi(`_dta[SQis]') by `_dta[SQiis]' (`_dta[SQtis]'): /// gen `gap' = sum(mi(`_dta[SQis]') & `lcensor' & !`rcensor') by `_dta[SQiis]' (`_dta[SQtis]'): /// replace `touse' = 0 if `gap'[_N]>0 } keep if `touse' if _N == 0 { noi di as text "(No observations)" exit } drop `lcensor' `rcensor' // Reshape Wide keep `_dta[SQis]' `_dta[SQiis]' `_dta[SQtis]' reshape wide `_dta[SQis]', i(`_dta[SQiis]') j(`_dta[SQtis]') unab varlist: `_dta[SQis]'* // Store reference sequence if "`refseqid'" != "" { save `"`results'"' capture confirm numeric variable `_dta[SQiis]' if !_rc { count if `_dta[SQiis]' == `refseqid' capture assert r(N) > 0 if _rc { noi di as error "reference sequence does not exist" exit 198 } keep if `_dta[SQiis]' == `refseqid' } else { count if `_dta[SQiis]' == "`refseqid'" capture assert r(N) > 0 if _rc { noi di as error "reference sequence does not exist" exit 198 } keep if `_dta[SQiis]' == "`refseqid'" } save `"`SQrefseq'"' use `"`results'"', clear } // Store a copy by `varlist', sort: gen `SQid' = 1 if _n==1 replace `SQid' = sum(`SQid') sort `SQid' save `"`noshrink'"' // Keep only one Sequence of each type by `varlist', sort: gen `N' = _N by `SQid', sort: keep if _n==1 if "`refseqid'" != "" append using `SQrefseq' sort `SQid' // Call the Mata-Function from lsq.lib (Source-Code: lsq.mata) if !`full' { capture drop _SQdist mata: sqdtref("`varlist'","`method'") drop if `SQid'==. sort `SQid' save `"`results'"', replace use `"`noshrink'"', clear merge `SQid' using `results' assert _merge ==3 drop _merge keep `_dta[SQiis]' _SQdist label var _SQdist /// `"sqom with k(`k') indel(`indelcost') subcost(`subcost') refseqid(`refseqid') "' if "`name'" != "" ren _SQdist `name' sort `_dta[SQiis]' save `"`results'"', replace restore sort `_dta[SQiis]' merge `_dta[SQiis]' using `results' assert _merge!=2 drop _merge noi di as text `"Distance Variable saved as"' /// as res `" `=cond("`name'"=="","_SQdist","`name'")' "' } if `full' { noi di as text "Perform " /// as res _N*(_N-1)/2 /// as text " calculations of DT distances" mata: sqdtfull("`varlist'",`method') restore noi di as text "Distance matrix saved as " as res "SQdist" } } // Return // ------ return local name =cond("`name'"=="","_SQdist","`name'") end exit