Covariance, correlation, association measures for continuous variables

TODO

  • psych cor.plot()
  • link to diagScatter, diagMultivariate, association, associationOrder

Install required packages

coin, psych

wants <- c("coin", "psych")
has   <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has])

Bivariate covariance and correlation

Covariance

Corrected (sample) covariance

x <- c(17, 30, 30, 25, 23, 21)
y <- c(1, 12, 8, 10, 5, 3)
cov(x, y)
[1] 19.2

Uncorrected (population) covariance

(cmML <- cov.wt(cbind(x, y), method="ML")$cov)
         x        y
x 21.88889 16.00000
y 16.00000 14.91667
cmML[upper.tri(cmML)]
[1] 16

Correlation

Empirical correlation

(r <- cor(x, y))
[1] 0.8854667

Fisher’s \(Z\)-transformation

Used, e.g., for averaging correlations

library(psych)
(rZ <- fisherz(r))
[1] 1.400533
fisherz2r(rZ)
[1] 0.8854667

Partial and semi-partial correlation

set.seed(123)
N  <- 100
z1 <- runif(N)
z2 <- runif(N)
x  <- -0.3*z1 + 0.2*z2 + rnorm(N, 0, 0.3)
y  <-  0.3*z1 - 0.4*z2 + rnorm(N, 0, 0.3)
cor(x, y)
[1] -0.1620401

Partial correlation \(r_{(xy).z}\)

x.z1 <- residuals(lm(x ~ z1))
y.z1 <- residuals(lm(y ~ z1))
cor(x.z1, y.z1)
[1] -0.05298174
x.z12 <- residuals(lm(x ~ z1 + z2))
y.z12 <- residuals(lm(y ~ z1 + z2))
cor(x.z12, y.z12)
[1] 0.02470899

Semi-partial correlation \(r_{(x.z)y}\)

cor(x.z1, y)
[1] -0.04772153

Covariance matrix

X1 <- c(19, 19, 31, 19, 24)
X2 <- c(95, 76, 94, 76, 76)
X3 <- c(197, 178, 189, 184, 173)
(X <- cbind(X1, X2, X3))
     X1 X2  X3
[1,] 19 95 197
[2,] 19 76 178
[3,] 31 94 189
[4,] 19 76 184
[5,] 24 76 173
(covX <- cov(X))
      X1     X2   X3
X1 27.80  22.55  0.4
X2 22.55 102.80 82.4
X3  0.40  82.40 87.7
(cML <- cov.wt(X, method="ML"))
$cov
      X1    X2    X3
X1 22.24 18.04  0.32
X2 18.04 82.24 65.92
X3  0.32 65.92 70.16

$center
   X1    X2    X3 
 22.4  83.4 184.2 

$n.obs
[1] 5
cML$cov
      X1    X2    X3
X1 22.24 18.04  0.32
X2 18.04 82.24 65.92
X3  0.32 65.92 70.16

Correlation matrix

cor(X)
            X1        X2          X3
X1 1.000000000 0.4218204 0.008100984
X2 0.421820411 1.0000000 0.867822404
X3 0.008100984 0.8678224 1.000000000
cov2cor(covX)
vec <- rnorm(nrow(X))
cor(vec, X)
              X1         X2         X3
[1,] -0.04054191 -0.1729373 -0.4405556

Correlation for ordinal continuous variables

Spearman’s \(\rho\)

DV1   <- c(97, 76, 56, 99, 50, 62, 36, 69, 55,  17)
DV2   <- c(42, 74, 22, 99, 73, 44, 10, 68, 19, -34)
DV3   <- c(61, 88, 21, 29, 56, 37, 21, 70, 46,  88)
DV4   <- c(58, 65, 38, 19, 55, 23, 26, 60, 50,  91)
DVmat <- cbind(DV1, DV2, DV3, DV4)
cor(DV1, DV2, method="spearman")
[1] 0.7333333
cor(DVmat, method="spearman")
            DV1        DV2        DV3        DV4
DV1  1.00000000  0.7333333 0.05487907 -0.1878788
DV2  0.73333333  1.0000000 0.11585581 -0.1636364
DV3  0.05487907  0.1158558 1.00000000  0.8963581
DV4 -0.18787879 -0.1636364 0.89635813  1.0000000

Kendall’s \(\tau\)-b

cor(DV1, DV2, method="kendall")
[1] 0.6444444
cor(DVmat, method="kendall")
            DV1         DV2        DV3         DV4
DV1  1.00000000  0.64444444 0.02273314 -0.15555556
DV2  0.64444444  1.00000000 0.11366572 -0.06666667
DV3  0.02273314  0.11366572 1.00000000  0.79566006
DV4 -0.15555556 -0.06666667 0.79566006  1.00000000

Correlation tests

Pearson correlation

cor.test(DV1, DV2)

    Pearson's product-moment correlation

data:  DV1 and DV2
t = 3.4996, df = 8, p-value = 0.008084
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.2902442 0.9447410
sample estimates:
      cor 
0.7777418 
library(psych)
corr.test(DVmat, adjust="bonferroni")
Call:corr.test(x = DVmat, adjust = "bonferroni")
Correlation matrix 
      DV1   DV2   DV3   DV4
DV1  1.00  0.78 -0.09 -0.35
DV2  0.78  1.00 -0.07 -0.39
DV3 -0.09 -0.07  1.00  0.89
DV4 -0.35 -0.39  0.89  1.00
Sample Size 
[1] 10
Probability values (Entries above the diagonal are adjusted for multiple tests.) 
     DV1  DV2 DV3 DV4
DV1 0.00 0.05   1   1
DV2 0.01 0.00   1   1
DV3 0.80 0.86   0   0
DV4 0.32 0.27   0   0

 To see confidence intervals of the correlations, print with the short=FALSE option

Spearman’s \(\rho\)

cor.test(DV1, DV2, method="spearman")

    Spearman's rank correlation rho

data:  DV1 and DV2
S = 44, p-value = 0.02117
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.7333333 
library(coin)
spearman_test(DV1 ~ DV2, distribution=approximate(B=9999))

    Approximative Spearman Correlation Test

data:  DV1 by DV2
Z = 2.2, p-value = 0.0207
alternative hypothesis: true mu is not equal to 0
library(psych)
corr.test(DVmat, method="spearman", adjust="bonferroni")
Call:corr.test(x = DVmat, method = "spearman", adjust = "bonferroni")
Correlation matrix 
      DV1   DV2  DV3   DV4
DV1  1.00  0.73 0.05 -0.19
DV2  0.73  1.00 0.12 -0.16
DV3  0.05  0.12 1.00  0.90
DV4 -0.19 -0.16 0.90  1.00
Sample Size 
[1] 10
Probability values (Entries above the diagonal are adjusted for multiple tests.) 
     DV1  DV2 DV3 DV4
DV1 0.00 0.09   1   1
DV2 0.02 0.00   1   1
DV3 0.88 0.75   0   0
DV4 0.60 0.65   0   0

 To see confidence intervals of the correlations, print with the short=FALSE option

Kendall’s \(\tau\)-b

cor.test(DV1, DV2, method="kendall")

    Kendall's rank correlation tau

data:  DV1 and DV2
T = 37, p-value = 0.009148
alternative hypothesis: true tau is not equal to 0
sample estimates:
      tau 
0.6444444 
library(psych)
corr.test(DVmat, method="kendall", adjust="bonferroni")
Call:corr.test(x = DVmat, method = "kendall", adjust = "bonferroni")
Correlation matrix 
      DV1   DV2  DV3   DV4
DV1  1.00  0.64 0.02 -0.16
DV2  0.64  1.00 0.11 -0.07
DV3  0.02  0.11 1.00  0.80
DV4 -0.16 -0.07 0.80  1.00
Sample Size 
[1] 10
Probability values (Entries above the diagonal are adjusted for multiple tests.) 
     DV1  DV2  DV3  DV4
DV1 0.00 0.27 1.00 1.00
DV2 0.04 0.00 1.00 1.00
DV3 0.95 0.75 0.00 0.04
DV4 0.67 0.85 0.01 0.00

 To see confidence intervals of the correlations, print with the short=FALSE option

Difference between two independent correlations

N <- length(DV1)
library(psych)
r.test(n=N, n2=N, r12=cor(DV1, DV2), r34=cor(DV3, DV4))
Correlation tests 
Call:r.test(n = N, r12 = cor(DV1, DV2), r34 = cor(DV3, DV4), n2 = N)
Test of difference between two independent correlations 
 z value 0.73    with probability  0.46

Detach (automatically) loaded packages (if possible)

try(detach(package:psych))
try(detach(package:coin))
try(detach(package:survival))
try(detach(package:splines))

Get the article source from GitHub

R markdown - markdown - R code - all posts