c("boot")
wants <- wants %in% rownames(installed.packages())
has <-if(any(!has)) install.packages(wants[!has])
set.seed(123)
100
N <- rnorm(N, 175, 7)
X1 <- rnorm(N, 30, 8)
X2 <- abs(rnorm(N, 60, 30))
X3 <- 0.5*X1 - 0.3*X2 - 0.4*X3 + 10 + rnorm(N, 0, 3)
Y <- data.frame(X1, X2, X3, Y) dfRegr <-
glm(Y ~ X1 + X2 + X3, data=dfRegr,
glmFit <-family=gaussian(link="identity"))
library(boot) # for cv.glm()
3
k <- cv.glm(data=dfRegr, glmfit=glmFit, K=k)
kfCV <-$delta kfCV
[1] 10.16324 10.03843
cv.glm(data=dfRegr, glmfit=glmFit, K=N) LOOCV <-
CVE = mean(PRESS)
$delta LOOCV
[1] 10.3501 10.3460
Generate data and fit logistic regression.
c(18, 16, 16, 15, 14, 20, 14, 21, 25, 11)
SSRIpre <- c(12, 0, 10, 9, 0, 11, 2, 4, 15, 10)
SSRIpost <- c(18, 16, 15, 14, 20, 25, 11, 25, 11, 22)
PlacPre <- c(11, 4, 19, 15, 3, 14, 10, 16, 10, 20)
PlacPost <- c(15, 19, 10, 29, 24, 15, 9, 18, 22, 13)
WLpre <- c(17, 25, 10, 22, 23, 10, 2, 10, 14, 7)
WLpost <- c(SSRIpre, PlacPre, WLpre)
DVpre <- c(SSRIpost, PlacPost, WLpost)
DVpost <- cut(DVpost, breaks=c(-Inf, median(DVpost), Inf),
postFac <-labels=c("lo", "hi"))
data.frame(DVpre, DVpost, postFac)
dfAncova <-
glm(postFac ~ DVpre, family=binomial(link="logit"), data=dfAncova) glmLR <-
\(k\)-fold crossvalidation using Brier score \(B\) as a strictly proper score.
# Brier score loss function - general version for several GLMs
function(y, pHat) {
brierA <-mean(((y == 1) * pHat)^2 + ((y == 0) * (1-pHat))^2)
}
library(boot) # for cv.glm()
cv.glm(data=dfAncova, glmfit=glmLR, cost=brierA, K=10)
B1 <-$delta B1
[1] 0.5560471 0.5544728
# Brier score loss function - simplified version for logistic regression only
function(y, pHat) {
brierB <-mean((y-pHat)^2)
}
cv.glm(data=dfAncova, glmfit=glmLR, cost=brierB, K=10)
B2 <-$delta B2
[1] 0.1841384 0.1824343
Function to fit logistic regression and calculate Brier score. Take into account that logistic regression might not converge for resample due to perfect separation: Turn warnings into errors and use try()
to handle errors.
function(dat, idx) {
getBSB <- options(warn=2)
op <-on.exit(options(op))
try(glm(postFac ~ DVpre, family=binomial(link="logit"), subset=idx, data=dat))
bsFit <- inherits(bsFit, "try-error")
fail <-if(fail || !bsFit$converged) {
return(NA)
else {
} brierB(bsFit$y, predict(bsFit, type="response"))
BbsTrn <- brierB(as.numeric(dat$postFac)-1, predict(bsFit, newdata=dat, type="response"))
BbsTst <-return(BbsTrn - BbsTst)
} }
Bootstrap logistic regression, calculate optimim in prediction error.
library(boot) # for boot()
999
nR <- boot(dfAncova, statistic=getBSB, R=nR)
bsRes <-
brierB(glmLR$y, predict(glmLR, type="response"))) (Btrain <-
[1] 0.1494605
mean(bsRes$t, na.rm=TRUE)) (optimism <-
[1] -0.01881823
Optimism-corrected bootstrap estimate of prediction error.
Btrain - optimism) (predErr <-
[1] 0.1682787
Packages caret
and ipred
provide more crossvalidation and bootstrapping options like the +.632 bootstrap.
try(detach(package:boot))
R markdown - markdown - R code - all posts