*! v1 by Ryan Knight 10may2011 *! version 2.0.0 Matthew White 26aug2014 pr cfout, rclass vers 10.1 /* ---------------------------------------------------------------------- */ /* check input for errors */ cap cfout_syntax 2 `0' if _rc { cap cfout_syntax 1 `0' if !_rc { * Do not suppress warning messages. cfout_syntax 1 `0' } else { cfout_syntax 2 `0' /*NOTREACHED*/ } } * Check the ID in the master data. loc id : list uniq id check_id `id', data("the master data") * Check -lower- and -upper-. if "`lower'" != "" & "`upper'" != "" { * cscript 18 di as err "options lower and upper are mutually exclusive" ex 198 } * Check -strcomp()-. if `:length loc strcomp' /// parse_cmd_opt strcomp, syntax(, *): `strcomp' * Check -numcomp()-. if `:length loc numcomp' /// parse_cmd_opt numcomp, syntax(, *): `numcomp' * Define `cfvars', the list of variables to compare. loc cfvars : list uniq varlist * Remove `id' from `cfvars'. if "`:list cfvars & id'" == "" /// loc warnid 0 else { loc cfvars : list cfvars - id loc warnid 1 } * Parse -saving()-. if `:length loc saving' { tempfile propsdta parse_saving `using', id(`id') cfvars(`cfvars') /// propsdta(`propsdta'): `saving' loc keepmaster `s(keepmaster)' loc keepusing `s(keepusing)' loc saving_args "`s(save_diffs)'" } * Define `numvarsm'. qui ds `cfvars', has(t numeric) * "m" suffix for "master": "numvarsm" for "numeric variables master." loc numvarsm `r(varlist)' * Define ID locals. qui ds `id', has(t numeric) loc idnumm `r(varlist)' foreach var of loc id { loc idtypes `idtypes' `:type `var'' loc idformats `idformats' `:form `var'' loc idvallabs "`idvallabs' "`:val lab `var''"" loc varlab st_varlabel(st_local("var")) mata: st_local("idvarlabs", st_local("idvarlabs") + /// sprintf("%f:%s", strlen(`varlab'), `varlab')) } if "`nopreserve'" == "" /// preserve keep `id' `cfvars' `keepmaster' sort `id' qui lab dir loc labnames `r(names)' tempfile tempmaster qui sa `tempmaster', o qui u `using', clear * Check -id()-. foreach var of loc id { cap conf var `var', exact if _rc { * cscript 20 di as err "variable `var' not found in using data" _n /// "(error in option {bf:id()})" ex 111 } } check_id `id', data("the using data") * Check that each ID variable is numeric in both datasets or * string in both datasets. qui ds `id', has(t numeric) * "u" suffix for "using": "idnumu" for "ID numeric using." loc idnumu `r(varlist)' if !`:list idnumm === idnumu' { foreach var of loc id { if `:list var in idnumm' + `:list var in idnumu' == 1 { * cscript 21 loc typem : word `:list posof "`var'" in id' of `idtypes' loc typeu : type `var' di as err "option id(): variable `var' is " /// "`typem' in master but `typeu' in using data" ex 106 } } } /* check input for errors */ /* ---------------------------------------------------------------------- */ * Error messages stop here; warnings start. if `warnid' /// di as txt "note: ID variables will not be compared." * Variables not in the using data unab all : _all loc varonlym : list cfvars - all if "`varonlym'" != "" { p di "note: the following variables are not in the using data:" di as res "`varonlym' di "{p_end}" loc cfvars : list cfvars - varonlym loc numvarsm : list numvarsm - varonlym } * Return stored result. ret loc varonlym `varonlym' * Variables that are numeric in one dataset and string in the other qui ds `cfvars', has(t numeric) loc numvarsu `r(varlist)' loc numonlym : list numvarsm - numvarsu loc numonlyu : list numvarsu - numvarsm loc difftype : list numonlym | numonlyu if "`difftype'" != "" { p di "note: the following variables are numeric in one dataset and" di "string in the other and will not be compared:" di as res "`difftype' di "{p_end}" loc cfvars : list cfvars - difftype loc numvarsm : list numvarsm - difftype } loc numvars `numvarsm' * Return stored result. ret loc difftype `difftype' * Implement -nostring-. if "`nostring'" != "" /// loc cfvars `numvars' * Implement -nonumeric-. if "`nonumeric'" != "" /// loc cfvars : list cfvars - numvars keep `id' `cfvars' `keepusing' sort `id' if "`keepusing'" != "" { tempfile tempusing qui sa `tempusing' } * Use temporary variable names to prevent name conflicts with * `cfvars' in the master data. foreach var of loc cfvars { tempvar cftemp ren `var' `cftemp' loc cftemps : list cftemps | cftemp } * Merge, using the value labels and ID metadata from the master data. * Drop shared value labels, including orphans in the master. foreach lab of loc labnames { cap lab drop `lab' } * Remove ID characteristics from the using data. foreach var of loc id { loc chars : char `var'[] foreach char of loc chars { char `var'[`char'] } } * Merge. tempvar merge qui merge `id' using `tempmaster', uniq keep(`cfvars') _merge(`merge') * Use the ID metadata from the master data. foreach var of loc id { gettoken format idformats : idformats gettoken lab idvallabs : idvallabs form `var' `format' cap conf numeric var `var' if !_rc /// lab val `var' `lab' } mata: attach_varlabs("id", "idvarlabs") * Observations in only one dataset foreach data in master using { * "ab" for "abbreviation" loc ab = substr("`data'", 1, 1) loc result = cond("`data'" == "master", 2, 1) qui cou if `merge' == `result' * Return stored result. ret sca Nonly`ab' = r(N) if `return(Nonly`ab')' & "`nomatch'" == "" { di as txt "note: the following observations are only in " /// "the `data' data:" sort `id' li `id' if `merge' == `result', ab(32) noo di } } qui keep if `merge' == 3 loc nmerged = _N * Implement string comparison options. forv i = 1/`:list sizeof cfvars' { loc var : word `i' of `cfvars' loc temp : word `i' of `cftemps' cap conf str var `var' if !_rc { qui cfsetstr `var' `temp', /// `lower' `upper' `nopunct' /// strcomp(`strcomp') caller(`=_caller()') } } * Parse -numcomp()-. gettoken numcomp_cmd rest : numcomp, p(", ") gettoken comma numcomp_opts : rest, p(", ") if `:length loc comma' /// mata: assert(st_local("comma") == ",") else /// assert !`:length loc numcomp_opts' if !`:length loc saving' { mata: cfout("discrep", "alldiff", "cfvars", "cftemps", /// "numcomp_cmd", "numcomp_opts", `=_caller()', "`dropdiff'" != "") } else { save_diffs, tempmaster(`tempmaster') tempusing(`tempusing') id(`id') /// cfvars(`cfvars') cftemps(`cftemps') `saving_args' /// numcomp_cmd(`numcomp_cmd') numcomp_opts(`numcomp_opts') /// caller(`=_caller()') `dropdiff' loc discrep = r(discrep) loc alldiff `r(alldiff)' } * Variables different on every observation if "`alldiff'" != "" { p di "note: the following variables differ on every observation" _c if "`dropdiff'" != "" /// di " and will not be included" _c di ":" di as res "`alldiff'" } if "`dropdiff'" != "" /// loc cfvars : list cfvars - alldiff * Return stored result. ret loc alldiff `alldiff' * Return stored results. ret loc varlist `cfvars' ret sca N = `nmerged' * `:list sizeof cfvars' ret sca discrep = `discrep' * Display summary. display_summary `return(discrep)' `return(N)' * Display warning messages. if `warnid' | "`return(varonlym)'`return(difftype)'" != "" | /// "`dropdiff'" != "" & "`return(alldiff)'" != "" { di as txt "note: not all variables specified are included." } if "`nomatch'" == "" { if return(Nonlym) { p di "note: not all observations were compared;" di "there are observations only in the master data." di "{p_end}" } if return(Nonlyu) { p di "note: not all observations were compared;" di "there are observations only in the using data." di "{p_end}" } } * Ensure that if -saving()- and -nopreserve- are both specified, * the differences dataset is left in memory. if `:length loc saving' & "`nopreserve'" != "" { * Drop value label orphans, * which are not saved in the differences dataset. qui lab dir foreach lab in `r(names)' { qui ds, has(vallab `lab') if "`r(varlist)'" == "" /// lab drop `lab' } assert !c(changed) } end /* -------------------------------------------------------------------------- */ /* error message programs */ pr assert_is_opt mata: st_local("name", (regexm(st_local("0"), "^(.*)\(\)$") ? /// regexs(1) : st_local("0"))) cap conf name `name' if `:list sizeof name' > 1 | _rc /// err 198 end pr error_overlap syntax anything(name=overlap id=overlap), opt1(str) opt2(str) [what(str)] * Parse `overlap'. gettoken overlap rest : overlap if !`:length loc overlap' | `:length loc rest' /// err 198 * Parse -opt*()-. forv i = 1/2 { loc 0 "`opt`i''" syntax anything(name=opt`i'), [SUBopt] loc temp : subinstr loc opt`i' "(" "", cou(loc count) if !`count' /// loc opt`i' `opt`i''() loc sub`i' = "`subopt'" != "" } if "`what'" != "" /// di as err "`what' " _c loc options = cond(`sub1' & `sub2', "sub", "") + "options" di as err `"`overlap' cannot be specified to "' /// "both `options' `opt1' and `opt2'" if !(`sub1' & `sub2') /// ex 198 end pr error_saving syntax anything(name=rc id="return code"), [SUBopt(str)] if "`subopt'" != "" { assert_is_opt `subopt' di as err "invalid `subopt' suboption" } di as err "invalid saving() option" ex `rc' end pr warn_deprecated syntax anything(name=old), [new(str asis)] assert_is_opt `old' if !`:length loc new' /// di as txt "note: option {cmd:`old'} is deprecated and will be ignored." else { loc 0 "`new'" syntax anything(name=new), [SUBopt] gettoken new rest : new if `:length loc rest' /// err 198 loc option = cond("`subopt'" != "", "suboption", "option") di as txt "note: option {cmd:`old'} is deprecated; " /// "use `option' {cmd:`new'} instead." } end /* error message programs */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* parse user input */ pr cfout_syntax gettoken version 0 : 0 * Check that `0' satisfies version `version' syntax. if `version' == 1 { #d ; syntax [varlist] using, /* main */ id(varname) /* string comparison */ [Lower Upper NOPunct] /* other */ [NAme(str) Format(str) ALTid(varname) replace NOString NOMATch] ; #d cr di as txt "note: you are using old {cmd:cfout} syntax; " /// "see {helpb cfout} for new syntax." if `"`name'"' == "" /// loc name discrepancy report.csv else /// warn_deprecated name(), new("saving()") if "`replace'" != "" /// warn_deprecated replace, new("saving(,replace)", sub) loc saving "`"`name'"', csv `replace'" if "`format'" != "" /// warn_deprecated format() if "`altid'" != "" /// warn_deprecated altid() di as txt "note: option {cmd:dropdiff} is implied." loc dropdiff dropdiff } else if `version' == 2 { #d ; syntax [varlist] using, /* main */ id(varlist) /* string comparison */ [Lower Upper NOPunct STRComp(str asis)] /* other */ [SAving(str asis) NUMComp(str asis) NOString NONUMeric DROPDiff NOMATch NOPreserve] ; #d cr } else { err 198 } mata: st_local("names", invtokens(st_dir("local", "macro", "*")')) foreach name of loc names { c_local `name' "``name''" } end pr check_id syntax varlist, data(str) cap isid `varlist', missok if _rc { * cscript 15 * "nid" for "number of IDs" loc nid : list sizeof varlist di as err "option id(): " plural(`nid', "variable") " `varlist' " /// plural(`nid', "does", "do") " not uniquely identify " /// "the observations in `data'" ex 459 } if c(stata_version) >= 13 { qui ds `varlist', has(t strL) if "`r(varlist)'" != "" { * cscript 16 loc nothe = regexr("`data'", "^the ", "") di as err "option id(): `nothe':" _nostrl error : `r(varlist)' /*NOTREACHED*/ } } end * Syntax: parse_cmd_opt option_name, syntax(): command * Parse an option named option_name that takes a command as its argument, * checking that it matches the syntax specified to option -syntax()-. pr parse_cmd_opt _on_colon_parse `0' loc 0 "`s(before)'" loc command "`s(after)'" syntax name(name=opt), [syntax(str)] gettoken cmdname 0 : command, p(", ") cap conf name `cmdname' if `:list sizeof cmdname' > 1 | _rc { * cscript 62 di as err "invalid command name" di as err "(error in option {bf:`opt'()})" ex 198 } cap noi syntax `syntax' if _rc { * cscript 50 di as err "(error in option {bf:`opt'()})" ex `=_rc' } end pr notes_count, rclass syntax [varlist] loc N 0 foreach var of loc varlist { loc note0 : char `var'[note0] if "`note0'" != "" /// loc N = max(`N', `note0') } ret sca N = `N' end pr parse_saving, sclass _on_colon_parse `0' loc 0 "`s(before)'" syntax using, id(varlist) cfvars(varlist) propsdta(str) loc 0 "`s(after)'" loc temp `using' cap noi syntax anything(name=fn id=filename equalok everything), /// [Variable(name) MASterval(name) USingval(name) All(name) All2 /// KEEPMASter(varlist) KEEPUSing(str asis) Properties(str asis) LAbval /// csv replace] loc using `temp' if _rc { error_saving `=_rc' /*NOTREACHED*/ } * Parse `fn'. gettoken fn rest : fn if `:length loc rest' { * cscript 101 di as err "invalid filename" error_saving 198 /*NOTREACHED*/ } * Add a file extension to `fn' if necessary. mata: if (pathsuffix(st_local("fn")) == "") /// st_local("fn", st_local("fn") + /// (st_local("csv") != "" ? ".csv" : ".dta"));; * Check `fn' and -replace-. cap conf new f `"`fn'"' if ("`replace'" == "" & _rc) | ("`replace'" != "" & !inlist(_rc, 0, 602)) { * cscript 8 cap noi conf new f `"`fn'"' error_saving `=_rc' /*NOTREACHED*/ } * Check -all()- and -all-. if "`all'" != "" & "`all2'" != "" { * cscript 55 di as err "suboptions all() and all are mutually exclusive" error_saving 198 /*NOTREACHED*/ } if `:length loc properties' { notes_count `cfvars' loc notesN = r(N) } * Parse -keepusing()-. if `:length loc keepusing' | `:length loc properties' { preserve qui d `using' if r(N) /// qui u `using' in 1, clear else qui u `using', clear if `:length loc keepusing' { cap noi unab keepusing : `keepusing' if _rc { * cscript 73 error_saving `=_rc', sub(keepusing()) /*NOTREACHED*/ } } if `:length loc properties' { notes_count `cfvars' loc notesN = max(`notesN', r(N)) } restore } * Parse -keepmaster()- and -keepusing()-. foreach list in keepmaster keepusing { loc `list' : list uniq `list' loc `list' : list `list' - id } * Default variable names if "`variable'" == "" /// loc variable Question if "`masterval'" == "" /// loc masterval Master if "`usingval'" == "" /// loc usingval Using if "`all2'" != "" /// loc all diff * Parse -properties()-. if `:length loc properties' { parse_saving_properties, saving(`propsdta') cfvars(`cfvars') /// variable(`variable') notes_count(`notesN'): `properties' qui d using `propsdta', varl loc propvars `r(varlist)' loc propvars : list propvars - variable } * Check variable names. loc properties `propvars' loc opts variable masterval usingval all keepmaster keepusing properties while `:list sizeof opts' { gettoken opt1 opts : opts foreach opt2 of loc opts { loc overlap : list `opt1' & `opt2' if "`overlap'" != "" { * cscript 29 gettoken first : overlap error_overlap `first', what(variable) /// opt1(`opt1', sub) opt2(`opt2', sub) error_saving 198 /*NOTREACHED*/ } } loc overlap : list id & `opt1' if "`overlap'" != "" { * cscript 29 gettoken first : overlap error_overlap `first', what(variable) /// opt1(id) opt2("saving(,`opt1'())", sub) /*NOTREACHED*/ } } sret loc keepmaster `keepmaster' sret loc keepusing `keepusing' * Arguments for -save_diffs- if `:length loc properties' /// loc propsdta_opt propsdta(`propsdta') loc args fn(`"`fn'"') variable(`variable') /// masterval(`masterval') usingval(`usingval') all(`all') /// keepmaster(`keepmaster') keepusing(`keepusing') `propsdta_opt' /// `labval' `csv' `replace' sret loc save_diffs "`args'" end pr parse_saving_properties _on_colon_parse `0' loc 0 "`s(before)'" syntax, saving(str) cfvars(varlist) variable(name) notes_count(integer) loc 0 ", `s(after)'" cap noi syntax, [Type(name) Type2 Format(name) Format2 /// VALLabel(name) VALLabel2 VARLabel(name) VARLabel2 /// Char(namelist) CHARStub(name) Notes(str) NOTESStub(name)] loc sub sub(properties()) if _rc { error_saving `=_rc', `sub' /*NOTREACHED*/ } foreach opt in type format vallabel varlabel { if "``opt''" != "" & "``opt'2'" != "" { * cscript 83 di as err "suboptions `opt'() and `opt' are mutually exclusive" error_saving 198, `sub' /*NOTREACHED*/ } * Default names if "``opt'2'" != "" /// loc `opt' `opt' } * Parse -char()- and -charstub()-. if "`charstub'" == "" /// loc charstub char_ loc char : list uniq char foreach c of loc char { loc charvar `charstub'`c' loc charvars : list charvars | charvar cap conf name `charvar' if _rc { * cscript 85 di as err "suboptions char(), charstub(): `charvar' invalid name" error_saving `=_rc', `sub' /*NOTREACHED*/ } } * Parse -notes()- and -notesstub()-. if "`notesstub'" == "" /// loc notesstub note loc _all _all if `:list _all in notes' { if `notes_count' { numlist "1/`notes_count'" loc notes_all `r(numlist)' } while `:list _all in notes' { loc notes : list notes - _all } } if "`notes'" != "" { cap noi numlist "`notes'", min(1) int r(>0) if _rc { * cscript 88 di as err "suboption notes() invalid" error_saving `=_rc', `sub' /*NOTREACHED*/ } loc notes `r(numlist)' loc notes : list uniq notes } loc notes : list notes | notes_all if "`notes'" != "" { numlist "`notes'", sort loc notes `r(numlist)' loc notes : list retok notes loc notes " `notes'" loc notevars : subinstr loc notes " " " `notesstub'", all loc notes `notes' loc notevars `notevars' foreach var of loc notevars { cap conf name `var' if _rc { * cscript 88 di as err "suboptions notes(), notesstub(): `var' invalid name" error_saving `=_rc', `sub' /*NOTREACHED*/ } } } * Check variable names. loc tempchar `char' loc char `charvars' loc tempnotes `notes' loc notes `notevars' loc opts type format vallabel varlabel char notes while `:list sizeof opts' { gettoken opt1 opts : opts foreach opt2 of loc opts { loc overlap : list `opt1' & `opt2' if "`overlap'" != "" { * cscript 29 gettoken first : overlap error_overlap `first', what(variable) /// opt1(`opt1', sub) opt2(`opt2', sub) error_saving 198, `sub' /*NOTREACHED*/ } } if `:list variable in `opt1'' { * cscript 29 error_overlap `variable', what(variable) opt1(variable, sub) /// opt2("properties(`opt1'())", sub) error_saving 198 /*NOTREACHED*/ } } loc char `tempchar' loc notes `tempnotes' preserve mata: load_props("cfvars", "variable", "type", "format", "vallabel", /// "varlabel", "char", "charstub", "notes", "notesstub") sort `variable' qui sa `"`saving'"' end /* parse user input */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* display programs */ pr p di as txt "{p 0 4 2}" end pr display_summary args discrep N loc line1a "Number of differences: " loc line1b `discrep' loc line2a "Number of values compared: " loc line2b `N' loc line3a "Percent differences: " loc line3b = strofreal(100 * `discrep' / `N', "%9.3f") + "%" loc linelen = max(strlen("`line1a'`line1b'"), /// strlen("`line2a'`line2b'"), strlen("`line3a'`line3b'")) loc col _col(3) #d ; di _n `col' "{hline `linelen'}" _n `col' as txt "`line1a'" as res "`line1b'" _n `col' as txt "`line2a'" as res "`line2b'" _n `col' as txt "`line3a'" as res "`line3b'" _n `col' "{hline `linelen'}" ; #d cr end /* display programs */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* string comparison */ pr cfsetstr syntax varlist(min=2 max=2), caller(real) /// [lower upper NOPUNCT strcomp(str asis)] foreach var of loc varlist { if "`lower'`upper'" != "" { qui replace `var' = `lower'`upper'(`var') } if "`nopunct'" != "" { foreach c in ! ? "'" { qui replace `var' = subinstr(`var', "`c'", "", .) } foreach c in . , -- / ; : ( ) { qui replace `var' = subinstr(`var', "`c'", " ", .) } qui replace `var' = itrim(strtrim(`var')) } } if `:length loc strcomp' { gettoken cmd opts : strcomp, p(", ") cap noi vers `caller': `cmd' `varlist'`opts' if _rc { * cscript 52 di as err "(error in option {bf:strcomp()})" ex `=_rc' } } end /* string comparison */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* save differences dataset */ pr save_diffs, rclass #d ; syntax, /* main */ tempmaster(str) [tempusing(str)] id(varlist) [cfvars(varlist) cftemps(varlist)] /* -saving()- arguments */ fn(str) variable(name) masterval(name) usingval(name) [all(name) keepmaster(namelist) keepusing(namelist) propsdta(str) labval csv replace] /* other */ caller(real) [numcomp_cmd(name) numcomp_opts(str asis) dropdiff] ; #d cr * Index `id'. * "ididx" for "ID index" tempvar ididx gen double `ididx' = _n qui compress `ididx' preserve keep `id' `ididx' sort `ididx' tempfile idmap qui sa `idmap' restore drop `id' #d ; mata: cfout( /* output */ "discrep", "alldiff", /* comparison variables */ "cfvars", "cftemps", /* other */ "numcomp_cmd", "numcomp_opts", `caller', "`dropdiff'" != "", /* -id()- */ "ididx", /* new variable names */ "variable", "masterval", "usingval", "all", /* other */ "`labval'" != ""); #d cr tempvar order gen double `order' = _n ret sca discrep = `discrep' tempvar merge * Merge back in the ID variables. sort `ididx' qui merge `ididx' using `idmap', uniqus _merge(`merge') qui drop if `merge' == 2 drop `ididx' `merge' * -saving(, keepmaster() keepusing())- foreach data in master using { if "`keep`data''" != "" { sort `id' qui merge `id' using `temp`data'', uniqus /// keep(`keep`data'') _merge(`merge') qui drop if `merge' == 2 drop `merge' } } * -saving(, properties())- if !`:length loc propsdta' /// loc propvars `variable' else { qui d using `"`propsdta'"', varl loc propvars `r(varlist)' sort `variable' qui merge `variable' using `"`propsdta'"', uniqus _merge(`merge') qui drop if `merge' == 2 drop `merge' } * Sort so that within `id', Question remains sorted by * the original variable order. sort `id' `order' drop `order' order `id' `keepmaster' `keepusing' `propvars' `all' `masterval' `usingval' if "`csv'" == "" { * Remove the dataset's label and characteristics. lab data loc chars : char _dta[] foreach char of loc chars { char _dta[`char'] } qui compress qui sa `"`fn'"', `replace' } else { qui ds, has(t numeric) loc numvars `r(varlist)' if "`numvars'" != "" { foreach var of loc numvars { lab val `var' } form `numvars' %24.0g } qui outsheet using `"`fn'"', c `replace' } ret loc alldiff `alldiff' end /* save differences dataset */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* type definitions, etc. */ vers 10.1 * Convert real x to string using -strofreal(x, `RealFormat')-. loc RealFormat ""%24.0g"" loc RS real scalar loc RR real rowvector loc RC real colvector loc RM real matrix loc SS string scalar loc SR string rowvector loc SC string colvector loc SM string matrix loc TS transmorphic scalar loc TR transmorphic rowvector loc TC transmorphic colvector loc TM transmorphic matrix loc boolean `RS' loc True 1 loc False 0 * A local macro name loc lclname `SS' mata: /* type definitions, etc. */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* interface with Stata */ void st_sviewL(`SM' V, `RM' i, `TR' j) { `RS' n, ctr `boolean' any any = `False' ctr = 0 n = length(j) while (++ctr <= n & !any) any = st_vartype(j[ctr]) == "strL" if (any) V = st_sdata(i, j) else { pragma unset V st_sview(V, i, j) } } `SS' smallest_vartype(`TC' var) { `RS' min, max `SS' strpound if (eltype(var) == "real") { if (!all(var :== floor(var)) & length(var)) return("double") else { min = min(var) max = max(var) if (min >= -127 & max <= 100 | min >= .) return("byte") if (min >= -32767 & max <= 32740) return("int") if (min >= -9999999 & max <= 9999999) return("float") if (min >= -2147483647 & max <= 2147483620) return("long") return("double") } } else if (eltype(var) == "string") { max = max(strlen(var)) if (max >= .) max = 0 strpound = sprintf("str%f", min((max((max, 1)), c("maxstrvarlen")))) if (c("stata_version") < 13 | !max) return(strpound) return(max > c("maxstrvarlen") | any(strpos(var, char(0))) ? "strL" : strpound) } else { _error("invalid var") } /*NOTREACHED*/ } void st_store_new(`TC' vals, `SS' name, |`SS' varlab) { `RS' idx, nobs if (!anyof(("real", "string"), eltype(vals))) _error("invalid vals") nobs = rows(vals) if (nobs > st_nobs()) st_addobs(nobs - st_nobs()) idx = st_addvar(smallest_vartype(vals), name) if (nobs) { if (eltype(vals) == "real") st_store((1, nobs), idx, vals) else st_sstore((1, nobs), idx, vals) } st_varlabel(idx, varlab) } void attach_varlabs(`lclname' _varlist, `lclname' _varlabs) { `RS' pos, len, n, i `SS' varlabs `SR' vars vars = tokens(st_local(_varlist)) varlabs = st_local(_varlabs) n = length(vars) for (i = 1; i <= n; i++) { pos = strpos(varlabs, ":") assert(pos) len = strtoreal(substr(varlabs, 1, pos - 1)) assert(len == floor(len) & len >= 0 & len <= 80) st_varlabel(vars[i], substr(varlabs, pos + 1, len)) varlabs = substr(varlabs, pos + len + 1, .) } assert(varlabs == "") } /* interface with Stata */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* properties dataset */ // Create and load the properties dataset. void load_props(`lclname' _cfvars, `lclname' _variable, `lclname' _type, `lclname' _format, `lclname' _vallabel, `lclname' _varlabel, `lclname' _char, `lclname' _charstub, `lclname' _notes, `lclname' _notesstub) { `RS' nvars, nchars, ncharsall, i, j `SS' name, charstub, notesstub, lab `SC' var, type, format, vallab, varlab, chars, charsall `SM' charcols var = tokens(st_local(_cfvars))' nvars = length(var) type = format = vallab = varlab = J(nvars, 1, "") chars = tokens(st_local(_char)) charsall = chars, "note" :+ tokens(st_local(_notes)) nchars = length(chars) ncharsall = length(charsall) charcols = J(nvars, ncharsall, "") for (i = 1; i <= nvars; i++) { type[i] = st_vartype(var[i]) format[i] = st_varformat(var[i]) vallab[i] = st_varvaluelabel(var[i]) varlab[i] = st_varlabel(var[i]) for (j = 1; j <= ncharsall; j++) charcols[i, j] = st_global(sprintf("%s[%s]", var[i], charsall[j])) } st_dropvar(.) st_store_new(var, st_local(_variable)) name = st_local(_type) if (name != "") st_store_new(type, name, "Storage type") name = st_local(_format) if (name != "") st_store_new(format, name, "Display format") name = st_local(_vallabel) if (name != "") st_store_new(vallab, name, "Value label") name = st_local(_varlabel) if (name != "") st_store_new(varlab, name, "Variable label") charstub = st_local(_charstub) notesstub = st_local(_notesstub) for (i = 1; i <= ncharsall; i++) { if (i <= nchars) { name = charstub + charsall[i] lab = "Characteristic " + charsall[i] } else { name = subinstr(charsall[i], "note", notesstub, 1) lab = subinstr(charsall[i], "note", "Note ", 1) } st_store_new(charcols[,i], name, lab) } } /* properties dataset */ /* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */ /* compare datasets */ void resize_colvector(`TC' v, `RS' n) { `RS' ncur `SS' eltype `TS' miss ncur = length(v) if (n > ncur) { eltype = eltype(v) if (eltype == "real") miss = . else if (eltype == "string") miss = "" else _error("invalid eltype") v = v \ J(n - ncur, 1, miss) } else if (n < ncur) v = v[|1 \ n|] } void diff_dta_resize(pointer(`TC') rowvector v, `RS' n) { `RS' nv, i nv = length(v) for (i = 1; i <= nv; i++) resize_colvector(*v[i], n) } void diff_dta_post( /* differences dataset */ `RC' id_merge, `RC' id_diff, `SC' varname, `TC' master, `TC' usingval, `RC' all, `RS' nofill, pointer(`TC') rowvector diffdta, /* differences to post */ `SS' cfvar, `TM' mu, `RC' diff, `boolean' tostring, `boolean' labval, /* other */ `boolean' postall) { // "n" prefix for "number of": "ncomps" for "number of comparisons." `RS' lastrow, ncomps, i, j `RC' select, blankvals, idx `SS' format, vallab `SC' blanktext `TM' comps if (postall) { ncomps = st_nobs() select = J(ncomps, 1, 1) } else { select = diff ncomps = sum(diff) } if (!ncomps) return // Store the master and using values in comps. comps = select(mu, select) if (tostring) { if (!labval) comps = strofreal(comps, `RealFormat') else { // -saving(, labval)- format = st_varformat(cfvar) vallab = st_varvaluelabel(cfvar) if (vallab != "") if (!st_vlexists(vallab)) vallab = "" if (vallab == "") comps = strofreal(comps, format) else { // Values with blank value label text pragma unset blankvals pragma unset blanktext st_vlload(vallab, blankvals, blanktext) blankvals = select(blankvals, blanktext :== "") comps = st_vlmap(vallab, comps) idx = select(1::st_nobs(), select) for (i = 1; i <= ncomps; i++) for (j = 1; j <= 2; j++) if (comps[i, j] == "") if (!anyof(blankvals, mu[idx[i], j])) comps[i, j] = strofreal(mu[idx[i], j], format) } } } // Add observations to the dataset. lastrow = nofill + ncomps - 1 if (lastrow > length(id_diff)) diff_dta_resize(diffdta, 2 * lastrow) id_diff[|nofill \ lastrow|] = select(id_merge, select) varname[|nofill \ lastrow|] = J(ncomps, 1, cfvar) master[|nofill \ lastrow|] = comps[,1] usingval[|nofill \ lastrow|] = comps[,2] if (postall) all[|nofill \ lastrow|] = diff nofill = nofill + ncomps } void error_numcomp(`RS' rc, `SS' cmd) { errprintf("\ncommand line was:\n %s\n\n", cmd) errprintf("(error in option {bf:numcomp()})\n") exit(rc) } // Compare the master and using datasets, // optionally creating the differences dataset. void cfout( /* output */ `lclname' _discrep, `lclname' _alldiff, /* comparison variables */ `lclname' _cfvars, `lclname' _cftemps, /* other */ `lclname' _numcomp_cmd, `lclname' _numcomp_opts, `RS' _caller, `boolean' _dropdiff, /* ---------------------------------------------------------------------- */ /* -saving()- */ /* ---------------------------------------------------------------------- */ /* -id()- */ |`lclname' _id, /* new variable names */ `lclname' _variable, `lclname' _masterval, `lclname' _usingval, `lclname' _all, /* other */ `boolean' _labval) { // Constants `RS' N_ARGS, N_ARGS_SAVING `RS' nofill, vardiffs, nvars, ndiffs, i `RC' id_merge, id_diff, all, diff `SS' numcomp_cmd, numcomp_opts, id_name, all_name, numcomp_gen, cmd `SR' cfvars, cftemps, strvars, alldiff `SC' var `TC' master, usingval // "mu" for "master/using" `TM' mu `boolean' diffdta pointer(`TC') rowvector cols N_ARGS = 8 N_ARGS_SAVING = 6 assert(anyof((N_ARGS, N_ARGS + N_ARGS_SAVING), args())) diffdta = args() == N_ARGS + N_ARGS_SAVING cfvars = tokens(st_local(_cfvars)) cftemps = tokens(st_local(_cftemps)) nvars = length(cfvars) assert(nvars == length(cftemps)) numcomp_cmd = st_local(_numcomp_cmd) numcomp_opts = st_local(_numcomp_opts) // Prepare the differences dataset. if (diffdta) { // id_merge is a view onto the ID variable in the merged dataset. // It must be the first variable in the dataset so that // the view does not need to be updated. id_name = st_local(_id) stata("order " + id_name) pragma unset id_merge st_view(id_merge, ., id_name) assert(cols(id_merge) == 1) for (i = 1; i <= nvars; i++) { if (st_isstrvar(cfvars[i])) { pragma unset strvars strvars = strvars, cfvars[i] } } // Variables of the differences dataset id_diff = J(0, 1, .) var = J(0, 1, "") master = usingval = J(0, 1, (length(strvars) | _labval ? "" : .)) cols = &id_diff, &var, &master, &usingval all = J(0, 1, .) if ((all_name = st_local(_all)) != "") cols = cols, &all } ndiffs = 0 numcomp_gen = st_tempname() // Index of the first unfilled element of the differences dataset vectors nofill = 1 for (i = 1; i <= nvars; i++) { // Make mu a view onto cfvars[i] and cftemps[i]. pragma unset mu if (st_isnumvar(cfvars[i])) st_view(mu, ., (cfvars[i], cftemps[i])) else st_sviewL(mu = "", ., (cfvars[i], cftemps[i])) if (st_isstrvar(cfvars[i]) | numcomp_cmd == "") diff = mu[,1] :!= mu[,2] else { // -numcomp()- cmd = sprintf("%s %s %s, generate(%s) %s", numcomp_cmd, cfvars[i], cftemps[i], numcomp_gen, numcomp_opts) stata(sprintf("cap noi version %f: %s", _caller, cmd)) if (c("rc")) { // cscript 64 error_numcomp(c("rc"), cmd) /*NOTREACHED*/ } if (st_isstrvar(numcomp_gen)) { // cscript 68 errprintf(numcomp_cmd + " created string variable where " + "numeric variable expected\n") error_numcomp(109, cmd) /*NOTREACHED*/ } st_view(diff, ., numcomp_gen) diff = diff :!= 0 } vardiffs = sum(diff) if (vardiffs == st_nobs() & st_nobs()) { pragma unset alldiff alldiff = alldiff, cfvars[i] if (_dropdiff & diffdta & !_labval & anyof(strvars, cfvars[i])) { strvars = select(strvars, strvars :!= cfvars[i]) if (!length(strvars)) { // Convert master usingval to real. if (!length(master)) master = usingval = J(0, 1, .) else { master = strtoreal(master) usingval = strtoreal(usingval) } } } } if (!(_dropdiff & vardiffs == st_nobs())) { ndiffs = ndiffs + vardiffs if (diffdta) { diff_dta_post( /* differences dataset */ id_merge, id_diff, var, master, usingval, all, nofill, cols, /* differences to post */ cfvars[i], mu, diff, st_isnumvar(cfvars[i]) & (length(strvars) | _labval), _labval, /* other */ all_name != "") } } // This should require no view updates. if (st_isnumvar(cfvars[i]) & numcomp_cmd != "") st_dropvar(numcomp_gen) st_dropvar((cfvars[i], cftemps[i])) } // Load the differences dataset. if (diffdta) { diff_dta_resize(cols, nofill - 1) st_dropvar(.) st_store_new(id_diff, id_name) st_store_new(var, st_local(_variable), "Variable name") st_store_new(master, st_local(_masterval), "Master value") st_store_new(usingval, st_local(_usingval), "Using value") if (all_name != "") st_store_new(all, all_name, "Master and using values differ") } st_local(_discrep, strofreal(ndiffs, `RealFormat')) st_local(_alldiff, invtokens(alldiff)) } /* compare datasets */ /* -------------------------------------------------------------------------- */ end