*! 1.0.0 KNK 31 July 2025 program define sumbar version 12.0 syntax varlist [if] [in], /// [By(varname)] /// [Title(string)] /// [Percent] /// [n] /// [TOtal] /// [KEEPmiss] /// [REcast(string)] /// [SOrt] /// [SAving(string)] /// [Intensity(integer 50)] /// [OVERopts(string asis)] /// [BLABELopts(string asis)] /// [LEGENDopts(string asis)] /// [GRAPHopts(string asis)] qui { ************************************************************ /* SECTION 0: RESOLVE VARLIST AND VALIDATE INPUTS Lock in the exact variable list order and validate inputs before any data operations */ ************************************************************ // Validate all variables exist before other checks foreach var of local varlist { capture confirm variable `var' if _rc { di as error "Variable `var' not found" exit 111 } } unab varlist : `varlist' // Fully resolve and lock in the varlist local nvars : word count `varlist' * Check that varlist contains only numeric variables ds `varlist', has(type string) local string_vars `r(varlist)' if "`string_vars'" != "" { di as error "String variables not allowed in varlist: `string_vars'" di as error "sumbar calculates sums and so, only numeric variables are valid" exit 198 } ************************************************************ /* SECTION 1: PRESERVE DATA AND APPLY CONDITIONS Use preserve/restore for clean data handling and check for empty results after applying if/in conditions */ ************************************************************ * Preserve original data - will automatically restore on any exit preserve * Sample indicator for mapping the [IF] [IN] conditions if "`keepmiss'" != "" { marksample touse, strok novarlist } else { marksample touse, strok } * For category breakdown, exclude observations with missing by() variable (for accurate N calculations) if "`by'" != "" & "`keepmiss'" == "" { replace `touse' = 0 if missing(`by') } keep if `touse' == 1 * Check if any observations remain after if/in conditions count if r(N) == 0 { di as error "No observations remain after applying if/in conditions" exit 2000 } if "`by'" != "" { // Save reference to original by variable local original_by "`by'" capture confirm string variable `by' if !_rc { // String variable - encode it tempvar by_encoded encode `by', gen(`by_encoded') local by "`by_encoded'" } else { // Numeric variable - must have value labels local vallbl : value label `by' if "`vallbl'" == "" { di as error "Numeric by() variable must have value labels attached" di as error "Either use a string variable or attach value labels to `original_by'" exit 198 } // Has labels - use decode/encode to preserve them tempvar by_sequential tempvar temp_decoded decode `by', gen(`temp_decoded') encode `temp_decoded', gen(`by_sequential') local by "`by_sequential'" } if "`keepmiss'" != "" { // Check if original by variable had missing values that were kept count if missing(`original_by') if r(N) > 0 { // Get the highest category number qui levelsof `by', local(levels) local max_level : word count `levels' local missing_code = `max_level' + 1 // Assign missing values to new category replace `by' = `missing_code' if missing(`original_by') // Add label for missing category local vallbl : value label `by' if "`vallbl'" != "" { label define `vallbl' `missing_code' "Missing", add } } } } * Set graph command based on recast option local graphcmd = cond("`recast'" != "", "graph `recast'", "graph bar") * Set default title if not specified if "`title'" == "" { local title = cond("`percent'" != "", "Percentage Distribution", "Totals") } * Set sort option for graph command if "`sort'" != "" { local sort "sort(1) descending" } * Set up saving option with PNG default if "`saving'" != "" { * Check if filename has an extension if strpos("`saving'", ".") == 0 { local saving "`saving'.png" } } ************************************************************ /* SECTION 2: COMMON CALCULATIONS Calculate totals, counts, and labels that are needed regardless of whether by() is specified. The touse filtering from Section 1 ensures we're working with the correct observations for each case. */ ************************************************************ * Set up common formatting with user-controlled decimals local ytitle = cond("`percent'" != "", "Percentage", "Total") local graph_decimal = cond("`percent'" != "", "%11.1f", "%11.0fc") local yaxis_format = cond("`percent'" != "", "", "ylabel(, format(%15.0fc))") * Count observations per variable local varying_n = 0 if "`n'" != "" { forval i = 1/`nvars' { local thisvar : word `i' of `varlist' count if !missing(`thisvar') local n_`i' = r(N) * Check if N varies (compare to first variable) if `i' > 1 & `n_`i'' != `n_1' { local varying_n = 1 } } } * Calculate overall total local grand_total = 0 forval i = 1/`nvars' { local thisvar : word `i' of `varlist' sum `thisvar' local grand_total = `grand_total' + r(sum) } * Build all variable labels with fallback and n annotations forval i = 1/`nvars' { local thisvar : word `i' of `varlist' local varlabel`i' : variable label `thisvar' if "`varlabel`i''" == "" { local varlabel`i' "`thisvar'" // Use variable name as fallback } * Add n to individual labels if n varies across variables if "`n'" != "" & `varying_n' == 1 { local varlabel`i' "`varlabel`i'' (n = `n_`i'')" } } * Build subtitle combining N and grand total info local subtitle "" local sub_parts "" * Only add N to subtitle if it's constant across variables if "`n'" != "" & `varying_n' == 0 { local sub_parts "N = `n_1'" } if "`total'" != "" { local gt_formatted : di %15.0fc `grand_total' local gt_formatted = trim("`gt_formatted'") if "`sub_parts'" != "" { local sub_parts "`sub_parts', Overall total = `gt_formatted'" } else { local sub_parts "Overall total = `gt_formatted'" } } if "`sub_parts'" != "" { local subtitle `"subtitle("`sub_parts'", size(small))"' } local common_opts `"title("`title'", size(medsmall)) `subtitle' ytitle("`ytitle'") `yaxis_format' blabel(bar, format(`graph_decimal')) blabel(bar, `blabelopts') legend(pos(6) row(1)) legend(`legendopts') intensity(`intensity') `graphopts'"' ************************************************************ /* SECTION 3: HANDLE CATEGORY BREAKDOWN (when by() is specified) */ ************************************************************ if "`by'" != "" { ************************************************************ /* Step 3A: Store category labels before data manipulation Extract category labels before we clear the dataset because once we reshape/clear, we lose access to original labels */ ************************************************************ levelsof `by', local(byvals_numeric) // Get numeric category codes (1,2,3...) * Store category labels (e.g., "Urban", "Rural" for codes 1, 2) local category_labels "" foreach val of local byvals_numeric { local catlab : label (`by') `val' // Get string label for numeric code local category_labels "`category_labels' `val' "`catlab'"" } * Store variable labels for x-axis using labels built in Section 2 local order_labels "" forval i = 1/`nvars' { local order_labels "`order_labels' `i' "`varlabel`i''"" } ************************************************************ /* Step 3B: Calculate sums for each variable x category combination For each variable (G1, G2, etc.) and each category (Urban, Rural), calculate the sum and store it in a local macro Format: sum_1_1 = G1 Urban, sum_1_2 = G1 Rural, etc. Using positional indexing and the grand_total from Section 2 */ ************************************************************ forval i = 1/`nvars' { local thisvar : word `i' of `varlist' foreach val of local byvals_numeric { sum `thisvar' if `by' == `val' local sum_`i'_`val' = r(sum) * Convert to percentage if requested if "`percent'" != "" { local sum_`i'_`val' = (`sum_`i'_`val'' / `grand_total') * 100 } } } ************************************************************ /* Step 3C: Create new dataset structure for graphing Clear current data and create a structure that graph bar can use: - One row per variable (G1, G2, G3, etc.) - One column per category (total_cat1, total_cat2, etc.) */ ************************************************************ clear set obs `nvars' // One row per variable gen order = _n // Order variable (1, 2, 3, ...) * Create one column for each category foreach val of local byvals_numeric { gen total_cat`val' = . } * Fill in the calculated values using positional indexing forval i = 1/`nvars' { foreach val of local byvals_numeric { replace total_cat`val' = `sum_`i'_`val'' in `i' } } ************************************************************ /* Step 3D: Reshape for graphing and create chart Reshape from wide to long format: Before: total_cat1, total_cat2, total_cat3 After: total_cat (values), category (1,2,3) This is the format that graph bar needs for colored bars */ ************************************************************ reshape long total_cat, i(order) j(category) * Create the graph with proper labeling and legend positioning `graphcmd' total_cat, /// over(category, relabel(`category_labels')) /// over(order, `sort' relabel(`order_labels') `overopts') /// asyvars `common_opts' * Save graph if requested if "`saving'" != "" { graph export "`saving'", replace } } ************************************************************ /* SECTION 4: HANDLE SIMPLE CASE (no by() variable specified) */ ************************************************************ if "`by'" == "" { ************************************************************ /* Step 4A: Calculate sums and build label string For the simple case, we sum each variable and create the relabel string using the labels built in Section 2 */ ************************************************************ local label_string "" * Calculate sums and build label string using pre-built labels and grand_total forval i = 1/`nvars' { local thisvar : word `i' of `varlist' sum `thisvar' local val`i' = r(sum) * Convert to percentage if requested if "`percent'" != "" { local val`i' = (`val`i'' / `grand_total') * 100 } * Build the relabel string using pre-built labels from Section 2 local label_string "`label_string' `i' "`varlabel`i''"" } ************************************************************ /* Step 4B: Create simple dataset and graph */ ************************************************************ clear set obs `nvars' // One row per variable gen total = . // Column for the sums gen order = _n // Order variable (1, 2, 3, ...) * Fill in the calculated sums using positional indexing forval i = 1/`nvars' { replace total = `val`i'' in `i' } `graphcmd' total, over(order, `sort' relabel(`label_string') `overopts') `common_opts' * Save graph if requested if "`saving'" != "" { graph export "`saving'", replace } } ************************************************************ /* SECTION 5: AUTOMATIC DATA RESTORATION The preserve command at the beginning automatically restores the original dataset when the program ends (successfully or with error) */ ************************************************************ restore } end