Diagrams for multivariate data

TODO

Install required packages

car, ellipse, lattice, mvtnorm, rgl

wants <- c("car", "ellipse", "lattice", "mvtnorm", "rgl")
has   <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has])

3-D data

Contour plots

mu    <- c(1, 3)
sigma <- matrix(c(1, 0.6, 0.6, 1), nrow=2)
rng   <- 2.5
N     <- 50
X     <- seq(mu[1]-rng*sigma[1, 1], mu[1]+rng*sigma[1, 1], length.out=N)
Y     <- seq(mu[2]-rng*sigma[2, 2], mu[2]+rng*sigma[2, 2], length.out=N)
set.seed(123)
library(mvtnorm)
genZ <- function(x, y) { dmvnorm(cbind(x, y), mu, sigma) }
matZ <- outer(X, Y, FUN="genZ")
contour(X, Y, matZ, main="Contours for 2D-normal density")
plot of chunk rerDiagMultivariate01

plot of chunk rerDiagMultivariate01

filled.contour(X, Y, matZ, main="Colored contours for 2D-normal density")
plot of chunk rerDiagMultivariate01

plot of chunk rerDiagMultivariate01

Bubble plot et c.

N      <- 10
age    <- rnorm(N, 30, 8)
sport  <- abs(-0.25*age + rnorm(N, 60, 40))
weight <- -0.3*age -0.4*sport + 100 + rnorm(N, 0, 3)
wScale <- (weight-min(weight)) * (0.8 / abs(diff(range(weight)))) + 0.2
symbols(age, sport, circles=wScale, inch=0.6, fg=NULL, bg=rainbow(N),
        main="Weight against age and sport")
plot of chunk rerDiagMultivariate02

plot of chunk rerDiagMultivariate02

See sunflowerplot() and stars() for altenative approaches.

3-D grid plot

par(cex.main=1.4, mar=c(2, 2, 4, 2) + 0.1)
persp(X, Y, matZ, xlab="x", ylab="y", zlab="Density", theta=5, phi=35,
      main="2D-normal probability density")
plot of chunk rerDiagMultivariate03

plot of chunk rerDiagMultivariate03

Interactive 3-D scatter plot

library(rgl)
vecX <- rep(seq(-10, 10, length.out=10), times=10)
vecY <- rep(seq(-10, 10, length.out=10),  each=10)
vecZ <- vecX*vecY
plot3d(vecX, vecY, vecZ, main="3D Scatterplot",
       col="blue", type="h", aspect=TRUE)
spheres3d(vecX, vecY, vecZ, col="red", radius=2)
grid3d(c("x", "y+", "z"))
plot of chunk rerDiagMultivariate04

plot of chunk rerDiagMultivariate04

demo(rgl)
example(persp3d)
# not shown

Conditioning plots

Njk    <- 25
P      <- 2
Q      <- 2
IQ     <- rnorm(P*Q*Njk, mean=100, sd=15)
height <- rnorm(P*Q*Njk, mean=175, sd=7)
IV1    <- factor(rep(c("control", "treatment"), each=Q*Njk))
IV2    <- factor(rep(c("f", "m"), times=P*Njk))
myDf   <- data.frame(IV1, IV2, IQ, height)
coplot(IQ ~ height | IV1*IV2, pch=16, data=myDf)
plot of chunk rerDiagMultivariate06

plot of chunk rerDiagMultivariate06

library(lattice)
res <- histogram(IQ ~ height | IV1*IV2, data=myDf,
                 main="Histograms per group")
print(res)
plot of chunk rerDiagMultivariate07

plot of chunk rerDiagMultivariate07

Scatterplot matrices

N      <- 20
P      <- 2
IV     <- rep(c("CG", "T"), each=N/P)
age    <- sample(18:35, N, replace=TRUE)
IQ     <- round(rnorm(N, mean=rep(c(100, 115), each=N/P), sd=15))
rating <- round(0.4*IQ - 30 + rnorm(N, 0, 10), 1)
score  <- round(-0.3*IQ + 0.7*age + rnorm(N, 0, 8), 1)
mvDf   <- data.frame(IV, age, IQ, rating, score)
pairs(mvDf[c("age", "IQ", "rating", "score")], main="Scatter plot matrix",
      pch=16, col=c("red", "blue")[unclass(mvDf$IV)])
plot of chunk rerDiagMultivariate08

plot of chunk rerDiagMultivariate08

myHist <- function(x, ...) { par(new=TRUE); hist(x, ..., main="") }
myEll  <- function(x, y, nSegments=100, rad=1, ...) {
    splLL <- split(data.frame(x, y), mvDf$IV)
    CG <- data.matrix(splLL$CG)
    TT <- data.matrix(splLL$T)

    library(car)
    dataEllipse(CG, level=0.5, col="red",  center.pch=4,
                plot.points=FALSE, add=TRUE)
    dataEllipse(TT, level=0.5, col="blue", center.pch=4,
                plot.points=FALSE, add=TRUE)
}
pairs(mvDf[c("age", "IQ", "rating", "score")], diag.panel=myHist,
      upper.panel=myEll, main="Scatter plot matrix", pch=16,
      col=c("red", "blue")[unclass(mvDf$IV)])
plot of chunk rerDiagMultivariate09

plot of chunk rerDiagMultivariate09

Heatmap

Illustrating the correlation matrix of several variables.

library(mvtnorm)
N <- 200
P <- 8
Q <- 2
Lambda <- matrix(round(runif(P*Q, min=-0.9, max=0.9), 1), nrow=P)
FF <- rmvnorm(N, mean=c(0, 0),   sigma=diag(Q))
E  <- rmvnorm(N, mean=rep(0, P), sigma=diag(P)*0.3)
X  <- FF %*% t(Lambda) + E
corMat <- cor(X)
rownames(corMat) <- paste("X", 1:P, sep="")
colnames(corMat) <- paste("X", 1:P, sep="")
round(corMat, 2)
      X1    X2    X3    X4    X5    X6    X7    X8
X1  1.00 -0.24  0.22  0.52  0.57 -0.60  0.25 -0.61
X2 -0.24  1.00 -0.34  0.08  0.04 -0.04 -0.31  0.19
X3  0.22 -0.34  1.00 -0.04  0.03 -0.10  0.60 -0.23
X4  0.52  0.08 -0.04  1.00  0.74 -0.72  0.01 -0.53
X5  0.57  0.04  0.03  0.74  1.00 -0.78  0.06 -0.63
X6 -0.60 -0.04 -0.10 -0.72 -0.78  1.00 -0.10  0.63
X7  0.25 -0.31  0.60  0.01  0.06 -0.10  1.00 -0.29
X8 -0.61  0.19 -0.23 -0.53 -0.63  0.63 -0.29  1.00
image(corMat, axes=FALSE, main=paste("Correlation matrix of", P, "variables"))
axis(side=1, at=seq(0, 1, length.out=P), labels=rownames(corMat))
axis(side=2, at=seq(0, 1, length.out=P), labels=colnames(corMat))
plot of chunk rerDiagMultivariate10

plot of chunk rerDiagMultivariate10

See heatmap() for a heatmap including dendograms added to the plot sides and correlation for an alternative approach to visualize correlation matrices.

Correlation matrix plot

library(ellipse)
plotcorr(corMat, type="lower", diag=FALSE, main="Bivariate correlations")
plot of chunk rerDiagMultivariate11

plot of chunk rerDiagMultivariate11

Useful packages

  • See package tourr for an alternative to visualizing high-dimensional data.
  • Packages ggplot2 and lattice provide their own graphics system and many functions for multi-panel plots.
  • Packages iplots, rggobi, and playwith also create interactive diagrams.

Detach (automatically) loaded packages (if possible)

try(detach(package:car))
try(detach(package:ellipse))
try(detach(package:nnet))
try(detach(package:MASS))
try(detach(package:mvtnorm))
try(detach(package:rgl))
try(detach(package:lattice))

Get the article source from GitHub

R markdown - markdown - R code - all posts