# Linear discriminant analysis (LDA)

## TODO

• link to regressionLogistic, regressionOrdinal, regressionMultinom

## Install required packages

wants <- c("mvtnorm", "MASS")
has   <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has])

## Conventional LDA

### Simulate data

set.seed(123)
library(mvtnorm)
Nj    <- c(15, 25, 20)
Sigma <- matrix(c(16,-2, -2,9), byrow=TRUE, ncol=2)
mu1   <- c(-4,  4)
mu2   <- c( 3,  3)
mu3   <- c( 1, -1)
Y1    <- rmvnorm(Nj[1], mean=mu1, sigma=Sigma)
Y2    <- rmvnorm(Nj[2], mean=mu2, sigma=Sigma)
Y3    <- rmvnorm(Nj[3], mean=mu3, sigma=Sigma)
Y     <- rbind(Y1, Y2, Y3)
IV    <- factor(rep(1:length(Nj), Nj))
Ydf   <- data.frame(IV, DV1=Y[ , 1], DV2=Y[ , 2])

### Run the analysis

library(MASS)
(ldaRes <- lda(IV ~ DV1 + DV2, data=Ydf))
Call:
lda(IV ~ DV1 + DV2, data = Ydf)

Prior probabilities of groups:
1         2         3
0.2500000 0.4166667 0.3333333

Group means:
DV1        DV2
1 -3.9300600  3.6858137
2  3.0763419  3.2682592
3  0.8326377 -0.8284297

Coefficients of linear discriminants:
LD1         LD2
DV1 0.30281673 -0.02978952
DV2 0.01135247 -0.34212141

Proportion of trace:
LD1    LD2
0.6019 0.3981 
ldaP <- lda(IV ~ DV1 + DV2, CV=TRUE, data=Ydf)
head(ldaP$posterior)  1 2 3 1 0.87692334 0.03892749 0.08414917 2 0.05948677 0.77916960 0.16134364 3 0.75783381 0.22929176 0.01287443 4 0.23218561 0.16280484 0.60500954 5 0.87531088 0.02860185 0.09608726 6 0.15145306 0.71535330 0.13319364 ldaPred <- predict(ldaRes, Ydf) ld <- ldaPred$x
head(ld)
          LD1         LD2
1 -2.02645465 -0.30063936
2  0.51103261 -0.64927925
3 -1.29829423 -2.28573892
4 -0.74102145  0.72661802
5 -2.16732369 -0.07970205
6  0.09253344 -0.93543324

cls <- ldaPred$class head(cls) [1] 1 2 1 3 1 2 Levels: 1 2 3 cTab <- table(IV, cls, dnn=c("IV", "ldaPred")) addmargins(cTab)  ldaPred IV 1 2 3 Sum 1 9 4 2 15 2 3 19 3 25 3 1 4 15 20 Sum 13 27 20 60 sum(diag(cTab)) / sum(cTab) [1] 0.7166667 anova(lm(ld[ , 1] ~ IV)) Analysis of Variance Table Response: ld[, 1] Df Sum Sq Mean Sq F value Pr(>F) IV 2 42.074 21.037 21.037 1.437e-07 *** Residuals 57 57.000 1.000 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 anova(lm(ld[ , 2] ~ IV)) Analysis of Variance Table Response: ld[, 2] Df Sum Sq Mean Sq F value Pr(>F) IV 2 27.831 13.916 13.916 1.198e-05 *** Residuals 57 57.000 1.000 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 priorP <- rep(1/nlevels(IV), nlevels(IV)) ldaEq <- lda(IV ~ DV1 + DV2, prior=priorP, data=Ydf) ## Robust LDA library(MASS) (ldaRob <- lda(IV ~ DV1 + DV2, method="mve", data=Ydf)) Call: lda(IV ~ DV1 + DV2, data = Ydf, method = "mve") Prior probabilities of groups: 1 2 3 0.2500000 0.4166667 0.3333333 Group means: DV1 DV2 1 -3.9300600 3.6858137 2 3.0763419 3.2682592 3 0.8326377 -0.8284297 Coefficients of linear discriminants: LD1 LD2 DV1 0.1597400 -0.2562351 DV2 -0.3714238 -0.2406715 Proportion of trace: LD1 LD2 0.5715 0.4285  predict(ldaRob)$class
 [1] 1 2 1 3 1 2 1 1 3 2 1 1 3 2 1 2 2 2 2 2 2 1 3 2 2 2 2 2 1 2 2 3 1 2 2 3 2 2
[39] 3 2 3 3 3 3 2 2 3 3 2 3 3 3 3 3 2 3 3 3 3 3
Levels: 1 2 3

library(MASS)
(qdaRes <- qda(IV ~ DV1 + DV2, data=Ydf))
Call:
qda(IV ~ DV1 + DV2, data = Ydf)

Prior probabilities of groups:
1         2         3
0.2500000 0.4166667 0.3333333

Group means:
DV1        DV2
1 -3.9300600  3.6858137
2  3.0763419  3.2682592
3  0.8326377 -0.8284297
predict(qdaRes)\$class
 [1] 1 2 1 3 1 2 1 1 3 1 1 1 3 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 3 1 2 2 3 2 2
[39] 3 2 3 3 3 2 2 2 3 2 2 3 3 3 3 3 3 3 3 3 3 3
Levels: 1 2 3

## Detach (automatically) loaded packages (if possible)

try(detach(package:MASS))
try(detach(package:mvtnorm))