*! version 1.4.0 Philippe VAN KERM, February 1998
*! Simple Correspondence Analysis
*! Syntax: coranal <var1> <var2> [weight] [if <exp>] [in <range>] [, d(#) q(#) asymmetric]

capture program drop coranal
program define coranal

version 5.0

loc varlist "req ex min(2) max(2)"
loc in "opt"
loc if "opt"
loc weight "aweight fweight"
loc options "d(integer 0) q(real 0) Asymmetric"
parse "`*'"

if `d'<0 | `d'==1 {Alert1} 
if `q'<0 | `q'>1 {
	di in gr "Quality threshold mis-specified : option q(`q') rejected"
	loc q = 0
	} 
parse "`varlist'", parse (" ")
loc weight "[`weight'`exp']"
loc var1 `1'
loc var2 `2'
checkstr `var1' `var2'

tempname N IN JN tmp1 tmp2 RS CS RM CM P DR DC DR12 DC12 tmp3 A PrinIn U T /*
*/ Totin F G V OT UU VV DL1 DL2 FS GS DL1C DL2C RI CI SQC1 SQC2 FP tmp GP n overn


/*****************     Base Matrices Definitions     *****************/

preserve
cap qui tab `var1' `var2' `weight' `if' `in',matcell(`N') matrow(`IN') matcol(`JN')
if _result(1)==0 {
	di in blue "no observations"
	exit
	}
if _rc==134 {
	loc tmp = "`var1'"
	loc var1 = "`var2'"
	loc var2 = "`tmp'"
cap qui tab `var1' `var2' `weight' `if' `in',matcell(`N') matrow(`IN') matcol(`JN')
	if _rc==134 {Alert2}
	}
scalar n=_result(1)
loc i = rowsof(`N')
loc j = colsof(`N')
if `i'<`j' {
	mat `N'=`N'' 
	mat `tmp1'=`IN''
	mat `IN' = `JN''
	mat `JN'=`tmp1'
	loc tmp = `i'
	loc i=`j'
	loc j=`tmp'
	loc tmp = "`var1'"
	loc var1 = "`var2'"
	loc var2 = "`tmp'"
	}
if `j' <= 1 {Alert3}

mat `RS' = J(`i',1,0) 
mat `CS' = J(1,`j',0)
loc n1 1
while `n1' <= `i' {
	loc n2 1
	loc sum 0	
	while `n2' <= `j' {
		loc sum = `sum' + `N'[`n1',`n2']
		loc n2 = `n2' + 1
		}
	mat `RS'[`n1',1] = `sum'
	loc n1 = `n1' + 1
	}
loc n1 1
while `n1' <= `j' {
	loc n2 1
	loc sum 0	
	while `n2' <= `i' {
		loc sum = `sum' + `N'[`n2',`n1']
		loc n2 = `n2' + 1
		}
	mat `CS'[1,`n1'] = `sum'
	loc n1 = `n1' + 1
	}

scalar overn= 1/scalar(n)
mat `RM' = overn*`RS'
mat `CM' = overn*`CS'
mat `P' = overn*`N'
mat `DR' = diag(`RM')
mat `DC' = diag(`CM')

/***     Computation of the matrix of standardized residuals     ***/

mat `DR12'=J(`i',`i',0)
loc n1 1
while `n1'<=`i' {
	mat `DR12'[`n1',`n1'] = (1/(sqrt(`DR'[`n1',`n1']))) 
	loc n1 = `n1'+1
	}
mat `DC12'=J(`j',`j',0)
loc n1 1
while `n1'<=`j' {
	mat `DC12'[`n1',`n1'] = (1/(sqrt(`DC'[`n1',`n1']))) 
	loc n1 = `n1'+1
	}
mat `tmp1' = `RM'*`CM'
mat `tmp1' = `P'-`tmp1'
mat `tmp1' = `DR12'*`tmp1'
mat `A' = `tmp1'*`DC12'

 
/***     Singular Value Decomposition, Inertia and Coordinates     ***/

/* Singular value decomposition */

mat svd `U' `T' `V' = `A'

/* Ordering of U T and V */ 

mat `OT' = `T'
mat `UU' = `U'
mat `VV' = `V'
loc n1 1
while `n1' <= `j' {
	loc n2 1
	loc max =  -1
	loc posmax 0
	while `n2' <= `j' {
		if `T'[1,`n2'] > `max' {
			loc max = `T'[1,`n2'] 
			loc posmax = `n2'
			}
		loc n2 = `n2' + 1
		}
	mat `T'[1,`posmax'] = 0 - 1
	mat `OT'[1,`n1'] = `max'
	loc n3 1
	while `n3'<=`i' {
		mat `UU'[`n3',`n1'] = `U'[`n3',`posmax']
		loc n3 = `n3' + 1
		}
	loc n3 1
	while `n3'<=`j' {
		mat `VV'[`n3',`n1'] = `V'[`n3',`posmax']
		loc n3 = `n3' + 1
		}
	loc n1 = `n1' + 1
	}
mat `U' = `UU'
mat `V' = `VV'
mat `T' = diag(`OT')

/* Inertia  */	

loc sum  0
loc K = `j' - 1
mat `PrinIn' = J(3,`K',0)
loc n1 1
while `n1' <= `K' {
	mat `PrinIn'[1,`n1'] = (`T'[`n1',`n1'])^2
	loc sum = `sum' + ((`T'[`n1',`n1'])^2)
	loc n1 = `n1' + 1
	}
scalar Totin = `sum'
loc n1 1
loc sum 0
while `n1' <= `K' {
	mat `PrinIn'[2,`n1'] = `PrinIn'[1,`n1']/scalar(Totin)
	loc sum  = `sum' + `PrinIn'[2,`n1']
	mat `PrinIn'[3,`n1'] = `sum'
	loc n1 = `n1' + 1
	}

/* Row coordinates in F and decompositions of inertia by rows  */

mat `tmp1' = `DR12'*`U'
mat `tmp2' = `tmp1'*`T'
mat `F' = J(`i',`K',0) 
mat `FS' = J(`i',`K',0)
mat `DL1' = J(`i',`K',0)
mat `DL1C' = J(`i',`K',0)
loc n1 1
while `n1' <= `i' {
	loc n2 1
	while `n2' <= `K' {
		mat `F'[`n1',`n2'] = `tmp2'[`n1',`n2']
		mat `FS'[`n1',`n2'] = `F'[`n1',`n2']/`T'[`n2',`n2']
		mat `DL1'[`n1',`n2'] = ((`F'[`n1',`n2'])^2)*(`RM'[`n1',1])
		mat `DL1C'[`n1',`n2'] = `DL1'[`n1',`n2'] / `PrinIn'[1,`n2'] 
	 	loc n2 = `n2' + 1
		}
	loc n1 = `n1' + 1
	}

/* Column principal coordinates in G and decompositions of inertia by 
   columns */

mat `tmp1' = `DC12'*`V'
mat `tmp2' = `tmp1'*`T'
mat `G' = J(`j',`K',0)
mat `GS' = J(`j',`K',0)
mat `DL2' = J(`j',`K',0)
mat `DL2C' = J(`j',`K',0)
loc n1 1
while `n1' <= `j' {
	loc n2 1
	while `n2' <= `K' {
		mat `G'[`n1',`n2'] = `tmp2'[`n1',`n2']
		mat `GS'[`n1',`n2'] = `G'[`n1',`n2']/`T'[`n2',`n2']
		mat `DL2'[`n1',`n2'] = ((`G'[`n1',`n2'])^2)*(`CM'[1,`n1'])
		mat `DL2C'[`n1',`n2'] = `DL2'[`n1',`n2'] / `PrinIn'[1,`n2'] 
	 	loc n2 = `n2' + 1
		}
	loc n1 = `n1' + 1
	}
mat `RI' = J(`i',1,0)
loc n1 1
while `n1' <= `i' {
	loc n2 1
 	loc sum 0
	while `n2'<=`K' {
		loc sum = `sum' + `DL1'[`n1',`n2']
		loc n2 = `n2' + 1
		}
	mat `RI'[`n1',1] = `sum'
	loc n1 = `n1' + 1
	} 
mat `CI' = J(`j',1,0)
loc n1 1
while `n1' <= `j' {
	loc n2 1
 	loc sum 0
	while `n2'<=`K' {
		loc sum = `sum' + `DL2'[`n1',`n2']
		loc n2 = `n2' + 1
		}
	mat `CI'[`n1',1] = `sum'
	loc n1 = `n1' + 1
	} 
mat `SQC1'=J(`i',`K',0)
loc n1 1
while `n1' <=`i' {
	loc n2 1
	while `n2' <= `K' {
		mat `SQC1'[`n1',`n2'] = `DL1'[`n1',`n2'] / `RI'[`n1',1]
		loc n2 = `n2' + 1
		}
	loc n1 = `n1' + 1
	}
mat `SQC2'=J(`j',`K',0)
loc n1 1
while `n1' <=`j' {
	loc n2 1
	while `n2' <= `K' {
		mat `SQC2'[`n1',`n2'] = `DL2'[`n1',`n2'] / `CI'[`n1',1]
		loc n2 = `n2' + 1
		}
	loc n1 = `n1' + 1
	}
 
/**************     Output Display     **************/

title

/*** Table display ***/

loc n1 1
loc n2 1
loc rname1 " "
while `n1' <=`i' {
	loc nam = `IN'[`n1',1]
	loc rname`n2' = "`rname`n2''" + "`nam'" + " " 
	if length("`rname`n2''")>=50 {loc n2 = `n2' + 1}
	loc n1 =`n1' + 1
	}
loc n1 1
loc n2 1
loc cname1 " "
while `n1' <=`j' {
	loc nam = `JN'[1,`n1']
	loc cname`n2' = "`cname`n2''" + "`nam'" + " " 
	if length("`cname`n2''")>=50 {loc n2 = `n2' + 1}
	loc n1 =`n1' + 1
	}

if (`d'==0) | (`d'>`K') {loc dg = `K'}
else {loc dg = `d'}

loc n1 1
loc nam  "Dim"
while `n1' <=`dg' {
	loc name = "`nam'`n1'" 
	loc xname = "`xname'" + "`name'" + " "
	loc n1 =`n1' + 1
	}

PrtTI Totin

mat `tmp' = `PrinIn'[.,1..`dg']
mat `tmp' = `tmp''
mat colnames `tmp' = Inertia Share Cumul
mat rownames `tmp' = `xname'
di in green " Principal Inertias and Percentages :"
mat list `tmp', noheader f(%10.3f)

mat `F' = `F'[.,1..`dg']
mat `tmp' = `RI',`F'
mat `FP' = `RM',`tmp'
mat rownames `FP' = `rname1'`rname2'`rname3'`rname4'`rname5'`rname6' /*
                   */  `rname7'`rname8'`rname9'`rname10'
mat roweq `FP' = `var1'
mat colnames `FP' = Mass Inertia `xname'
di
di in green " " "`var1'" " coordinates : "
mat list `FP' , noheader f(%10.3f) 

mat `G' = `G'[.,1..`dg']
mat `tmp1' = `CI',`G'
mat `tmp2' = `CM''
mat `GP' = `tmp2',`tmp1'
mat rownames `GP' = `cname1'`cname2'`cname3'`cname4'`cname5'`cname6' /*
                   */ `cname7'`cname8'`cname9'`cname10'
mat roweq `GP' = `var2'
mat colnames `GP' = Mass Inertia `xname'
di
di in green " " "`var2'" " coordinates : "
mat list `GP' , noheader f(%10.3f) 

mat `DL1C' = `DL1C'[.,1..`dg']
mat rownames `DL1C' = `rname1'`rname2'`rname3'`rname4'`rname5'`rname6' /*
                   */ `rname7'`rname8'`rname9'`rname10'
mat roweq `DL1C' = `var1'
mat colnames `DL1C' = `xname'
di
di in green " Explained inertia of axis by " "`var1'" " :"
mat list `DL1C' , noheader f(%10.4f) 

mat `DL2C' = `DL2C'[.,1..`dg']
mat rownames `DL2C' = `cname1'`cname2'`cname3'`cname4'`cname5'`cname6' /*
                   */ `cname7'`cname8'`cname9'`cname10'
mat roweq `DL2C' = `var2'
mat colnames `DL2C' = `xname'
di
di in green " Explained inertia of axis by " "`var2'" " :"
mat list `DL2C' , noheader f(%10.4f) 

mat `SQC1' = `SQC1'[.,1..`dg']
mat rownames `SQC1' = `rname1'`rname2'`rname3'`rname4'`rname5'`rname6' /*
                    */ `rname7'`rname8'`rname9'`rname10'
mat roweq `SQC1' = `var1'
mat colnames `SQC1' = `xname'
di
di in green " Contributions of principal axes to " "`var1'" " :"
mat list `SQC1' , noheader f(%10.4f) 

mat `SQC2' = `SQC2'[.,1..`dg']
mat rownames `SQC2' = `cname1'`cname2'`cname3'`cname4'`cname5'`cname6' /*
                    */ `cname7'`cname8'`cname9'`cname10'
mat roweq `SQC2' = `var2'
mat colnames `SQC2' = `xname'
di
di in green " Contributions of principal axes to " "`var2'" " :"
mat list `SQC2' , noheader f(%10.4f) 

loc lb1 : value label `var1'
loc lb2 : value label `var2'

di
if "`lb1'" ~= "" {label list `lb1'}
if "`lb2'" ~= "" {label list `lb2'}

/*** Graphical display ***/

if `d' ~= 0 {
	if `K' < `d' {
di in red "Not enough principal dimensions for the representations requested"}
	else {
		mat F = `F'
		mat G = `G'
		mat colnames F = `xname'
		mat colnames G = `xname'
		svmat F ,names(matcol)
		svmat G ,names(matcol)
		if "`asymmetric'"~="" {
			mat FS = `FS'
			mat GS = `GS'
			mat FS = FS[.,1..`dg']
			mat GS = GS[.,1..`dg']
			mat colnames FS = `xname'
			mat colnames GS = `xname'
			svmat FS ,names(matcol)
			svmat GS ,names(matcol)
			}
		svmat `IN' ,names(Flab)
		mat `JN' = `JN''
		svmat `JN' ,names(Glab)
		label values Flab1 `lb1'
		label values Glab1 `lb2'
		if `q'~=0 {
			loc n1 1
			while `n1'<=`i' {
				loc n2 1
				loc qual 0
				while `n2' <=`dg' {
				     loc qual = `qual' + `SQC1'[`n1',`n2']
				     loc n2 = `n2' + 1
				     }
				if `qual' < `q' {qui replace Flab1=. in `n1'}
				loc n1 =`n1' + 1
				}
			loc n1 1
			while `n1'<=`j' {
				loc n2 1
				loc qual 0
				while `n2' <=`dg' {
				     loc qual = `qual' + `SQC2'[`n1',`n2']
				     loc n2 = `n2' + 1
					}
				if `qual' < `q' {qui replace Glab1=. in `n1'}
				loc n1 =`n1' + 1
				}
 			}
		loc n1 1 
		while `n1' <= `d'-1 {
			loc n2 = `n1' + 1
			while `n2'<=`d' {
   graph FDim`n2' FDim`n1', s([Flab1]) ti("`var1' scores") b2("Axis `n1'") /*
         */  l1(" ") l2("Axis `n2'") xla yla xline(0) yline(0)  border
				loc xma = _result(4)
				loc xmi = _result(3)
				loc yma = _result(2)
				loc ymi = _result(1)
				more
   graph GDim`n2' GDim`n1', s([Glab1]) ti("`var2' scores") b2("Axis `n1'") /*
         */  l1(" ") l2("Axis `n2'") xla yla xline(0) yline(0)  border
				if _result(4)>`xma' {loc xma = _result(4)}
				if _result(3)<`xmi' {loc xmi = _result(3)}
				if _result(2)>`yma' {loc yma = _result(2)}
				if _result(1)<`ymi' {loc ymi = _result(1)}
				more
				if "`asymmetric'" == "" {
					gph open
  graph GDim`n2' GDim`n1', xscale(`xmi',`xma') yscale(`ymi',`yma') s([Glab1]) /*
    */ ti("Joint `var1' and `var2' scores (symmetric map)") b2("Axis `n1'") /*
    */ l1(" ") l2("Axis `n2'") xla yla xline(0) yline(0)  border 
  graph FDim`n2' FDim`n1', xscale(`xmi',`xma') yscale(`ymi',`yma') s([Flab1]) /*
    */ ti(" ") b2(" ") l1(" ") l2(" ") xla yla xline(0) yline(0)  border     
					gph close
					more
					}
				else {
as_gr GSDim`n2' GSDim`n1' Glab1 `var1' `var2' `n1' `n2' FDim`n2' FDim`n1' Flab1
					more
as_gr FSDim`n2' FSDim`n1' Flab1 `var1' `var2' `n1' `n2' GDim`n2' GDim`n1' Glab1
					more
					}
				loc n2 = `n2' + 1
				}
			loc n1 = `n1' + 1
			}
		}
	}
restore
end


/* Subroutines */

capture program drop Alert1
program define Alert1
	di in red "Invalid number of dimensions"
	exit 198
end

capture program drop Alert2
program define Alert2
	di in red "Variables take on too many values"
	exit 134
end

capture program drop Alert3
program define Alert3
	di in red "A variable is uniquely valued"
	exit
end
	  
capture program drop checkstr
program define checkstr
	cap confirm string variable `1'
	if _rc==0 {
              di in red "variables in varlist must be numeric"
	      exit 109
	      }	
	cap confirm string variable `2'
        if _rc==0 {
              di in red "variables in varlist must be numeric"
	      exit 109
	      }	
end

capture program drop title
program define title
	di in green _d(78) "-"
	di 
	di in green _col(20) "SIMPLE CORRESPONDENCE ANALYSIS"
	di
	di in green _d(78) "-"
	di
end

capture program drop PrtTI
program define PrtTI
        di
        di in green " Total Inertia : " in yellow %10.3f scalar(`1')
        di
end

capture program drop as_gr 
program define as_gr
	gph open
	graph `1' `2' , s([`3']) /* 
 */ ti("Joint `4' and `5' scores (asymmetric map)") b2("Axis `6'") l1(" ") /*
 */ l2("Axis `7'") xla yla xline(0) yline(0)  border
	loc xma = _result(4)
	loc xmi = _result(3)
	loc yma = _result(2)
	loc ymi = _result(1)
	graph `8' `9', xscale(`xmi',`xma') yscale(`ymi',`yma') s([`10']) /*
 */ ti(" ") b2(" ") l1(" ") l2(" ") xla yla xline(0) yline(0)  border     
	gph close
end