*! 1.0.0 KNK 31 July 2025

program define sumbar
  version 12.0
  syntax varlist [if] [in], ///
	[By(varname)] ///
	[Title(string)] ///
	[Percent] ///
	[n] ///
	[TOtal] ///
	[KEEPmiss] ///
	[REcast(string)] ///
	[SOrt] ///
	[SAving(string)] ///
	[Intensity(integer 50)] ///
	[OVERopts(string asis)] ///
	[BLABELopts(string asis)] ///
	[LEGENDopts(string asis)] ///
	[GRAPHopts(string asis)] 
 
  qui {
  
  ************************************************************
  /*
  SECTION 0: RESOLVE VARLIST AND VALIDATE INPUTS
  
  Lock in the exact variable list order and validate inputs
  before any data operations
  */
  ************************************************************
  
  // Validate all variables exist before other checks
	foreach var of local varlist {
    capture confirm variable `var'
    if _rc {
        di as error "Variable `var' not found"
        exit 111
    }
}
  
  unab varlist : `varlist'    // Fully resolve and lock in the varlist
  local nvars : word count `varlist'
  
  * Check that varlist contains only numeric variables
  ds `varlist', has(type string)
  local string_vars `r(varlist)'
  if "`string_vars'" != "" {
      di as error "String variables not allowed in varlist: `string_vars'"
      di as error "sumbar calculates sums and so, only numeric variables are valid"
      exit 198
  }
  
  ************************************************************
  /*
  SECTION 1: PRESERVE DATA AND APPLY CONDITIONS
  
  Use preserve/restore for clean data handling and check for
  empty results after applying if/in conditions
  */
  ************************************************************
  
  * Preserve original data - will automatically restore on any exit
  preserve
     
  * Sample indicator for mapping the [IF] [IN] conditions
  if "`keepmiss'" != "" {
    marksample touse, strok novarlist
}
else {
    marksample touse, strok
}
  
  * For category breakdown, exclude observations with missing by() variable (for accurate N calculations)
  if "`by'" != "" & "`keepmiss'" == "" {
  replace `touse' = 0 if missing(`by')
  }
  
  keep if `touse' == 1
  
  * Check if any observations remain after if/in conditions
  count
  if r(N) == 0 {
      di as error "No observations remain after applying if/in conditions"
      exit 2000
  }
  
  
  if "`by'" != "" {
    // Save reference to original by variable
    local original_by "`by'"
    
    capture confirm string variable `by'
    if !_rc {
        // String variable - encode it
        tempvar by_encoded
        encode `by', gen(`by_encoded')
        local by "`by_encoded'"
    }
    else {
        // Numeric variable - must have value labels
        local vallbl : value label `by'
        if "`vallbl'" == "" {
            di as error "Numeric by() variable must have value labels attached"
            di as error "Either use a string variable or attach value labels to `original_by'"
            exit 198
        }
        
        // Has labels - use decode/encode to preserve them
        tempvar by_sequential  
		tempvar temp_decoded
		decode `by', gen(`temp_decoded')
        encode `temp_decoded', gen(`by_sequential')
        local by "`by_sequential'"
    }
    
    	if "`keepmiss'" != "" {
    // Check if original by variable had missing values that were kept
    count if missing(`original_by') 
    if r(N) > 0 {
        // Get the highest category number
        qui levelsof `by', local(levels)
        local max_level : word count `levels'
        local missing_code = `max_level' + 1
        
        // Assign missing values to new category
        replace `by' = `missing_code' if missing(`original_by')
        
        // Add label for missing category
        local vallbl : value label `by'
        if "`vallbl'" != "" {
            label define `vallbl' `missing_code' "Missing", add
        }
    }
}
}
     
  * Set graph command based on recast option
  local graphcmd = cond("`recast'" != "", "graph `recast'", "graph bar")
  
  * Set default title if not specified
  if "`title'" == "" {
      local title = cond("`percent'" != "", "Percentage Distribution", "Totals")
  }
  
  * Set sort option for graph command
  if "`sort'" != "" {
      local sort "sort(1) descending"
  }
  
  * Set up saving option with PNG default
  if "`saving'" != "" {
      * Check if filename has an extension
      if strpos("`saving'", ".") == 0 {
          local saving "`saving'.png"
      }
  }
  
  ************************************************************
  /*
  SECTION 2: COMMON CALCULATIONS
  
  Calculate totals, counts, and labels that are needed regardless
  of whether by() is specified. The touse filtering from Section 1 
  ensures we're working with the correct observations for each case.
  */
  ************************************************************
  
  * Set up common formatting with user-controlled decimals
  local ytitle = cond("`percent'" != "", "Percentage", "Total")
  local graph_decimal = cond("`percent'" != "", "%11.1f", "%11.0fc")
  local yaxis_format =  cond("`percent'" != "", "", "ylabel(, format(%15.0fc))")
  
  * Count observations per variable 
  local varying_n = 0
  if "`n'" != "" {
      forval i = 1/`nvars' {
          local thisvar : word `i' of `varlist'
          count if !missing(`thisvar')
          local n_`i' = r(N)
          
          * Check if N varies (compare to first variable)
          if `i' > 1 & `n_`i'' != `n_1' {
              local varying_n = 1
          }
      }
  }
  
  * Calculate overall total 
  local grand_total = 0
  forval i = 1/`nvars' {
      local thisvar : word `i' of `varlist'
      sum `thisvar'
      local grand_total = `grand_total' + r(sum)
  }
  
  * Build all variable labels with fallback and n annotations
  forval i = 1/`nvars' {
      local thisvar : word `i' of `varlist'
      local varlabel`i' : variable label `thisvar'
      if "`varlabel`i''" == "" {
          local varlabel`i' "`thisvar'"  // Use variable name as fallback
      }
      
      * Add n to individual labels if n varies across variables
      if "`n'" != "" & `varying_n' == 1 {
          local varlabel`i' "`varlabel`i'' (n = `n_`i'')"
      }
  }
  
  * Build subtitle combining N and grand total info
  local subtitle ""
  local sub_parts ""
  
  * Only add N to subtitle if it's constant across variables
  if "`n'" != "" & `varying_n' == 0 {
      local sub_parts "N = `n_1'"
  }
  
  if "`total'" != "" {
      local gt_formatted : di %15.0fc `grand_total'
      local gt_formatted = trim("`gt_formatted'")
      if "`sub_parts'" != "" {
          local sub_parts "`sub_parts', Overall total = `gt_formatted'"
      }
      else {
          local sub_parts "Overall total = `gt_formatted'"
      }
  }
  
  if "`sub_parts'" != "" {
      local subtitle `"subtitle("`sub_parts'", size(small))"'
  }
  
    local common_opts `"title("`title'", size(medsmall)) `subtitle' ytitle("`ytitle'") `yaxis_format' blabel(bar, format(`graph_decimal')) blabel(bar, `blabelopts') legend(pos(6) row(1)) legend(`legendopts') intensity(`intensity') `graphopts'"'  
   
  ************************************************************
  /*
  SECTION 3: HANDLE CATEGORY BREAKDOWN (when by() is specified)
  */
  ************************************************************
  
  if "`by'" != "" {
      
      ************************************************************
      /*
      Step 3A: Store category labels before data manipulation
      
      Extract category labels before we clear the dataset
      because once we reshape/clear, we lose access to original labels
      */
      ************************************************************
      
      levelsof `by', local(byvals_numeric)  	// Get numeric category codes (1,2,3...)
      
      * Store category labels (e.g., "Urban", "Rural" for codes 1, 2)
      local category_labels ""
      foreach val of local byvals_numeric {
          local catlab : label (`by') `val'  // Get string label for numeric code
          local category_labels "`category_labels' `val' "`catlab'""
      }
      
      * Store variable labels for x-axis using labels built in Section 2
      local order_labels ""
      forval i = 1/`nvars' {
          local order_labels "`order_labels' `i' "`varlabel`i''""
      }
      
      ************************************************************
      /*
      Step 3B: Calculate sums for each variable x category combination
      
      For each variable (G1, G2, etc.) and each category (Urban, Rural),
      calculate the sum and store it in a local macro
      Format: sum_1_1 = G1 Urban, sum_1_2 = G1 Rural, etc.
      Using positional indexing and the grand_total from Section 2
      */
      ************************************************************
      
      forval i = 1/`nvars' {
          local thisvar : word `i' of `varlist'
          foreach val of local byvals_numeric {
              sum `thisvar' if `by' == `val'
              local sum_`i'_`val' = r(sum)
              
              * Convert to percentage if requested
              if "`percent'" != "" {
                  local sum_`i'_`val' = (`sum_`i'_`val'' / `grand_total') * 100
              }
          }
      }
      
      ************************************************************
      /*
      Step 3C: Create new dataset structure for graphing
      
      Clear current data and create a structure that graph bar can use:
      - One row per variable (G1, G2, G3, etc.)
      - One column per category (total_cat1, total_cat2, etc.)
      */
      ************************************************************
      
      clear                              
      set obs `nvars'                    // One row per variable
      gen order = _n                     // Order variable (1, 2, 3, ...)
      
      * Create one column for each category
      foreach val of local byvals_numeric {
          gen total_cat`val' = .
      }
      
      * Fill in the calculated values using positional indexing
      forval i = 1/`nvars' {
          foreach val of local byvals_numeric {
              replace total_cat`val' = `sum_`i'_`val'' in `i'
          }
      }
      
      ************************************************************
      /*
      Step 3D: Reshape for graphing and create chart
      
      Reshape from wide to long format:
      Before: total_cat1, total_cat2, total_cat3
      After: total_cat (values), category (1,2,3)
      This is the format that graph bar needs for colored bars
      */
      ************************************************************
      
      reshape long total_cat, i(order) j(category)
      
      * Create the graph with proper labeling and legend positioning
      `graphcmd' total_cat, ///
          over(category, relabel(`category_labels')) ///
          over(order, `sort' relabel(`order_labels') `overopts') ///
          asyvars `common_opts'
      
      * Save graph if requested
      if "`saving'" != "" {
          graph export "`saving'", replace
      }
  }
  
  ************************************************************
  /*
  SECTION 4: HANDLE SIMPLE CASE (no by() variable specified)
  */
  ************************************************************
  
  if "`by'" == "" {
      
      ************************************************************
      /*
      Step 4A: Calculate sums and build label string
      
      For the simple case, we sum each variable and create
      the relabel string using the labels built in Section 2
      */
      ************************************************************
      
      local label_string ""
      
      * Calculate sums and build label string using pre-built labels and grand_total
      forval i = 1/`nvars' {
          local thisvar : word `i' of `varlist'
          sum `thisvar'                              
          local val`i' = r(sum)
          
          * Convert to percentage if requested
          if "`percent'" != "" {
              local val`i' = (`val`i'' / `grand_total') * 100
          }
          
          * Build the relabel string using pre-built labels from Section 2
          local label_string "`label_string' `i' "`varlabel`i''""
      }
      
      ************************************************************
      /*
      Step 4B: Create simple dataset and graph
      */
      ************************************************************
      
      clear
      set obs `nvars'          // One row per variable
      gen total = .            // Column for the sums
      gen order = _n           // Order variable (1, 2, 3, ...)
      
      * Fill in the calculated sums using positional indexing
      forval i = 1/`nvars' {
          replace total = `val`i'' in `i'   
      }
      
      `graphcmd' total, over(order, `sort' relabel(`label_string') `overopts') `common_opts'
      
      * Save graph if requested
      if "`saving'" != "" {
          graph export "`saving'", replace
      }
  }
  
  ************************************************************
  /*
  SECTION 5: AUTOMATIC DATA RESTORATION
  
  The preserve command at the beginning automatically restores
  the original dataset when the program ends (successfully or with error)
  */
  ************************************************************
  
  restore
  
  }
    
end