# Descriptive statistics for data in matrices

## Sums and means

age    <- c(19, 19, 31, 19, 24, 17, 43, 28)
weight <- c(95, 76, 94, 76, 76, 68, 81, 65)
height <- c(197, 178, 189, 184, 173, 165, 181, 192)
(mat   <- cbind(age, weight, height))
     age weight height
[1,]  19     95    197
[2,]  19     76    178
[3,]  31     94    189
[4,]  19     76    184
[5,]  24     76    173
[6,]  17     68    165
[7,]  43     81    181
[8,]  28     65    192
sum(mat)
[1] 2290
rowSums(mat)
[1] 311 273 314 279 273 250 305 285
mean(mat)
[1] 95.41667
colMeans(mat)
    age  weight  height
25.000  78.875 182.375 

## Apply any arithmetic function to rows or columns

apply(mat, 2, sum)
   age weight height
200    631   1459 
apply(mat, 1, max)
[1] 197 178 189 184 173 165 181 192
apply(mat, 1, range)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,]   19   19   31   19   24   17   43   28
[2,]  197  178  189  184  173  165  181  192
apply(mat, 2, mean, trim=0.1)
    age  weight  height
25.000  78.875 182.375 

## Center and scale a matrix

### Using scale()

(ctrMat <- scale(mat, center=TRUE, scale=FALSE))
     age  weight  height
[1,]  -6  16.125  14.625
[2,]  -6  -2.875  -4.375
[3,]   6  15.125   6.625
[4,]  -6  -2.875   1.625
[5,]  -1  -2.875  -9.375
[6,]  -8 -10.875 -17.375
[7,]  18   2.125  -1.375
[8,]   3 -13.875   9.625
attr(,"scaled:center")
age  weight  height
25.000  78.875 182.375 
colMeans(ctrMat)
   age weight height
0      0      0 
(sclMat <- scale(mat, center=TRUE, scale=TRUE))
            age     weight     height
[1,] -0.6818685  1.4818499  1.4000184
[2,] -0.6818685 -0.2642058 -0.4188089
[3,]  0.6818685  1.3899523  0.6341964
[4,] -0.6818685 -0.2642058  0.1555576
[5,] -0.1136447 -0.2642058 -0.8974477
[6,] -0.9091580 -0.9993872 -1.6632697
[7,]  2.0456055  0.1952825 -0.1316257
[8,]  0.3409342 -1.2750802  0.9213796
attr(,"scaled:center")
age  weight  height
25.000  78.875 182.375
attr(,"scaled:scale")
age    weight    height
8.799351 10.881669 10.446291 
apply(sclMat, 2, sd)
   age weight height
1      1      1 

### Using sweep()

Mj <- rowMeans(mat)
Mk <- colMeans(mat)
sweep(mat, 1, Mj, "-")
           age     weight   height
[1,] -84.66667  -8.666667 93.33333
[2,] -72.00000 -15.000000 87.00000
[3,] -73.66667 -10.666667 84.33333
[4,] -74.00000 -17.000000 91.00000
[5,] -67.00000 -15.000000 82.00000
[6,] -66.33333 -15.333333 81.66667
[7,] -58.66667 -20.666667 79.33333
[8,] -67.00000 -30.000000 97.00000
t(scale(t(mat), center=TRUE, scale=FALSE))
           age     weight   height
[1,] -84.66667  -8.666667 93.33333
[2,] -72.00000 -15.000000 87.00000
[3,] -73.66667 -10.666667 84.33333
[4,] -74.00000 -17.000000 91.00000
[5,] -67.00000 -15.000000 82.00000
[6,] -66.33333 -15.333333 81.66667
[7,] -58.66667 -20.666667 79.33333
[8,] -67.00000 -30.000000 97.00000
attr(,"scaled:center")
[1] 103.66667  91.00000 104.66667  93.00000  91.00000  83.33333 101.66667
[8]  95.00000
sweep(mat, 2, Mk, "-")
     age  weight  height
[1,]  -6  16.125  14.625
[2,]  -6  -2.875  -4.375
[3,]   6  15.125   6.625
[4,]  -6  -2.875   1.625
[5,]  -1  -2.875  -9.375
[6,]  -8 -10.875 -17.375
[7,]  18   2.125  -1.375
[8,]   3 -13.875   9.625

## Covariance and correlation matrices

### Covariance matrix

Corrected

cov(mat)
            age    weight    height
age    77.42857  16.42857  17.28571
weight 16.42857 118.41071  60.48214
height 17.28571  60.48214 109.12500
cor(mat)
             age    weight    height
age    1.0000000 0.1715749 0.1880505
weight 0.1715749 1.0000000 0.5320709
height 0.1880505 0.5320709 1.0000000

Extract the variances from the diagonal

diag(cov(mat))
      age    weight    height
77.42857 118.41071 109.12500 

Uncorrected

(res <- cov.wt(mat, method="ML"))
$cov age weight height age 67.750 14.37500 15.12500 weight 14.375 103.60938 52.92188 height 15.125 52.92188 95.48438$center
age  weight  height
25.000  78.875 182.375

$n.obs [1] 8 res$cov
          age    weight   height
age    67.750  14.37500 15.12500
weight 14.375 103.60938 52.92188
height 15.125  52.92188 95.48438

### Robust covariance estimator

library(robustbase)
covMcd(mat)
Minimum Covariance Determinant (MCD) estimator approximation.
Method: Fast MCD(alpha=0.5 ==> h=6); nsamp = 500; (n,k)mini = (300,5)
Call:
covMcd(x = mat)
Log(Det.):  10.38

Robust Estimate of Location:
age  weight  height
21.50   80.83  181.00
Robust Estimate of Covariance:
age  weight  height
age     67.42   79.36   43.78
weight  79.36  302.91  286.58
height  43.78  286.58  327.37

### Correlation matrix

vec <- rnorm(nrow(mat))
cor(mat, vec)
             [,1]
age     0.6061347
weight -0.5364582
height -0.2328038
cor(vec, mat)
           age     weight     height
[1,] 0.6061347 -0.5364582 -0.2328038

### Robust correlation matrix

library(robustbase)
cov_rob <- covMcd(mat, cor=TRUE)
cov_rob$cor  age weight height age 1.0000000 0.5553181 0.2947135 weight 0.5553181 1.0000000 0.9100397 height 0.2947135 0.9100397 1.0000000 cov_rob$center
      age    weight    height
21.50000  80.83333 181.00000 

## Detach (automatically) loaded packages (if possible)

try(detach(package:robustbase))