Linear discriminant analysis (LDA)

TODO
Install required packages
Conventional LDA
Robust LDA
Quadratic Discriminant Analysis
Detach (automatically) loaded packages (if possible)
Get the article source from GitHub

TODO

link to regressionLogistic, regressionOrdinal, regressionMultinom

Install required packages

mvtnorm, MASS

wants <- c("mvtnorm", "MASS")
has   <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has])

Conventional LDA

Simulate data

set.seed(123)
library(mvtnorm)
Nj    <- c(15, 25, 20)
Sigma <- matrix(c(16,-2, -2,9), byrow=TRUE, ncol=2)
mu1   <- c(-4,  4)
mu2   <- c( 3,  3)
mu3   <- c( 1, -1)
Y1    <- rmvnorm(Nj[1], mean=mu1, sigma=Sigma)
Y2    <- rmvnorm(Nj[2], mean=mu2, sigma=Sigma)
Y3    <- rmvnorm(Nj[3], mean=mu3, sigma=Sigma)
Y     <- rbind(Y1, Y2, Y3)
IV    <- factor(rep(1:length(Nj), Nj))
Ydf   <- data.frame(IV, DV1=Y[ , 1], DV2=Y[ , 2])

Run the analysis

library(MASS)
(ldaRes <- lda(IV ~ DV1 + DV2, data=Ydf))

Call:
lda(IV ~ DV1 + DV2, data = Ydf)

Prior probabilities of groups:
        1         2         3 
0.2500000 0.4166667 0.3333333 

Group means:
         DV1        DV2
1 -3.9300600  3.6858137
2  3.0763419  3.2682592
3  0.8326377 -0.8284297

Coefficients of linear discriminants:
           LD1         LD2
DV1 0.30281673 -0.02978952
DV2 0.01135247 -0.34212141

Proportion of trace:
   LD1    LD2 
0.6019 0.3981

ldaP <- lda(IV ~ DV1 + DV2, CV=TRUE, data=Ydf)
head(ldaP$posterior)

           1          2          3
1 0.87692334 0.03892749 0.08414917
2 0.05948677 0.77916960 0.16134364
3 0.75783381 0.22929176 0.01287443
4 0.23218561 0.16280484 0.60500954
5 0.87531088 0.02860185 0.09608726
6 0.15145306 0.71535330 0.13319364

ldaPred <- predict(ldaRes, Ydf)
ld      <- ldaPred$x
head(ld)

          LD1         LD2
1 -2.02645465 -0.30063936
2  0.51103261 -0.64927925
3 -1.29829423 -2.28573892
4 -0.74102145  0.72661802
5 -2.16732369 -0.07970205
6  0.09253344 -0.93543324

Predicted classification

cls <- ldaPred$class
head(cls)

[1] 1 2 1 3 1 2
Levels: 1 2 3

cTab <- table(IV, cls, dnn=c("IV", "ldaPred"))
addmargins(cTab)

     ldaPred
IV     1  2  3 Sum
  1    9  4  2  15
  2    3 19  3  25
  3    1  4 15  20
  Sum 13 27 20  60

sum(diag(cTab)) / sum(cTab)

[1] 0.7166667

anova(lm(ld[ , 1] ~ IV))

Analysis of Variance Table

Response: ld[, 1]
          Df Sum Sq Mean Sq F value    Pr(>F)    
IV         2 42.074  21.037  21.037 1.437e-07 ***
Residuals 57 57.000   1.000                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(lm(ld[ , 2] ~ IV))

Analysis of Variance Table

Response: ld[, 2]
          Df Sum Sq Mean Sq F value    Pr(>F)    
IV         2 27.831  13.916  13.916 1.198e-05 ***
Residuals 57 57.000   1.000                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

priorP <- rep(1/nlevels(IV), nlevels(IV))
ldaEq  <- lda(IV ~ DV1 + DV2, prior=priorP, data=Ydf)

Robust LDA

library(MASS)
(ldaRob <- lda(IV ~ DV1 + DV2, method="mve", data=Ydf))

Call:
lda(IV ~ DV1 + DV2, data = Ydf, method = "mve")

Prior probabilities of groups:
        1         2         3 
0.2500000 0.4166667 0.3333333 

Group means:
         DV1        DV2
1 -3.9300600  3.6858137
2  3.0763419  3.2682592
3  0.8326377 -0.8284297

Coefficients of linear discriminants:
           LD1        LD2
DV1  0.1597400 -0.2562351
DV2 -0.3714238 -0.2406715

Proportion of trace:
   LD1    LD2 
0.5715 0.4285

predict(ldaRob)$class

 [1] 1 2 1 3 1 2 1 1 3 2 1 1 3 2 1 2 2 2 2 2 2 1 3 2 2 2 2 2 1 2 2 3 1 2 2
[36] 3 2 2 3 2 3 3 3 3 2 2 3 3 2 3 3 3 3 3 2 3 3 3 3 3
Levels: 1 2 3

Quadratic Discriminant Analysis

library(MASS)
(qdaRes <- qda(IV ~ DV1 + DV2, data=Ydf))

Call:
qda(IV ~ DV1 + DV2, data = Ydf)

Prior probabilities of groups:
        1         2         3 
0.2500000 0.4166667 0.3333333 

Group means:
         DV1        DV2
1 -3.9300600  3.6858137
2  3.0763419  3.2682592
3  0.8326377 -0.8284297

predict(qdaRes)$class

 [1] 1 2 1 3 1 2 1 1 3 1 1 1 3 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 3 1 2 2
[36] 3 2 2 3 2 3 3 3 2 2 2 3 2 2 3 3 3 3 3 3 3 3 3 3 3
Levels: 1 2 3

Detach (automatically) loaded packages (if possible)

try(detach(package:MASS))
try(detach(package:mvtnorm))

Get the article source from GitHub

R markdown - markdown - R code - all posts