pROC
, regressionOrdinal, regressionMultinom, regressionDiag for outliers, collinearity, crossvalidation c("rms")
wants <- wants %in% rownames(installed.packages())
has <-if(any(!has)) install.packages(wants[!has])
set.seed(123)
c(18, 16, 16, 15, 14, 20, 14, 21, 25, 11)
SSRIpre <- c(12, 0, 10, 9, 0, 11, 2, 4, 15, 10)
SSRIpost <- c(18, 16, 15, 14, 20, 25, 11, 25, 11, 22)
PlacPre <- c(11, 4, 19, 15, 3, 14, 10, 16, 10, 20)
PlacPost <- c(15, 19, 10, 29, 24, 15, 9, 18, 22, 13)
WLpre <- c(17, 25, 10, 22, 23, 10, 2, 10, 14, 7)
WLpost <-
3
P <- rep(length(SSRIpre), times=P)
Nj <- factor(rep(1:P, Nj), labels=c("SSRI", "Placebo", "WL"))
IV <- c(SSRIpre, PlacPre, WLpre)
DVpre <- c(SSRIpost, PlacPost, WLpost)
DVpost <- cut(DVpost, breaks=c(-Inf, median(DVpost), Inf),
postFac <-labels=c("lo", "hi"))
data.frame(IV, DVpre, DVpost, postFac) dfAncova <-
cdplot(postFac ~ DVpre, data=dfAncova, subset=IV == "SSRI",
main="Estimated categ probs SSRI")
cdplot(postFac ~ DVpre, data=dfAncova, subset=IV == "Placebo",
main="Estimated categ probs placebo")
cdplot(postFac ~ DVpre, data=dfAncova, subset=IV == "WL",
main="Estimated categ probs WL")
glm(postFac ~ DVpre + IV, family=binomial(link="logit"), data=dfAncova)) (glmFit <-
Call: glm(formula = postFac ~ DVpre + IV, family = binomial(link = "logit"),
data = dfAncova)
Coefficients:
(Intercept) DVpre IVPlacebo IVWL
-8.4230 0.4258 1.7306 1.2027
Degrees of Freedom: 29 Total (i.e. Null); 26 Residual
Null Deviance: 41.46
Residual Deviance: 24.41 AIC: 32.41
Odds ratios
exp(coef(glmFit))
(Intercept) DVpre IVPlacebo IVWL
0.0002197532 1.5308001795 5.6440022784 3.3291484767
Profile likelihood based confidence intervals for odds ratios
exp(confint(glmFit))
2.5 % 97.5 %
(Intercept) 1.488482e-07 0.0251596
DVpre 1.193766e+00 2.2446549
IVPlacebo 5.343091e-01 95.1942030
IVWL 2.916673e-01 52.2883653
100
N <- rnorm(N, 100, 15)
x1 <- rnorm(N, 10, 3)
x2 <- sample(40:60, N, replace=TRUE)
total <- rbinom(N, total, prob=0.4)
hits <- cbind(hits, total-hits)
hitMat <-glm(hitMat ~ x1 + x2, family=binomial(link="logit"))
Call: glm(formula = hitMat ~ x1 + x2, family = binomial(link = "logit"))
Coefficients:
(Intercept) x1 x2
-0.102410 -0.003373 0.005638
Degrees of Freedom: 99 Total (i.e. Null); 97 Residual
Null Deviance: 99.35
Residual Deviance: 96.44 AIC: 532.5
hits/total
relHits <-glm(relHits ~ x1 + x2, weights=total, family=binomial(link="logit"))
Call: glm(formula = relHits ~ x1 + x2, family = binomial(link = "logit"),
weights = total)
Coefficients:
(Intercept) x1 x2
-0.102410 -0.003373 0.005638
Degrees of Freedom: 99 Total (i.e. Null); 97 Residual
Null Deviance: 99.35
Residual Deviance: 96.44 AIC: 532.5
predict(glmFit, type="link")
logitHat <-plot(logitHat, pch=16, col=c("red", "blue")[unclass(dfAncova$postFac)])
abline(h=0)
fitted(glmFit)
Phat <- predict(glmFit, type="response")
Phat <-head(Phat)
1 2 3 4 5 6
0.31891231 0.16653918 0.16653918 0.11545968 0.07856997 0.52318493
mean(Phat)
[1] 0.4666667
prop.table(xtabs(~ postFac, data=dfAncova))
postFac
lo hi
0.5333333 0.4666667
0.5
thresh <- cut(Phat, breaks=c(-Inf, thresh, Inf), labels=c("lo", "hi"))
facHat <- xtabs(~ postFac + facHat, data=dfAncova)
cTab <-addmargins(cTab)
facHat
postFac lo hi Sum
lo 12 4 16
hi 4 10 14
Sum 16 14 30
Correct classification rate
sum(diag(cTab)) / sum(cTab)) (CCR <-
[1] 0.7333333
Deviance, log-likelihood and AIC
deviance(glmFit)
[1] 24.40857
logLik(glmFit)
'log Lik.' -12.20428 (df=4)
AIC(glmFit)
[1] 32.40857
Nagelkerke’s pseudo-\(R^{2}\) (R2), area under the ROC-Kurve (C), Somers’ \(D_{xy}\) (Dxy), Goodman & Kruskal’s \(\gamma\) (Gamma), Kendall’s \(\tau\) (Tau-a)
library(rms)
lrm(postFac ~ DVpre + IV, data=dfAncova)
Logistic Regression Model
lrm(formula = postFac ~ DVpre + IV, data = dfAncova)
Model Likelihood Discrimination Rank Discrim.
Ratio Test Indexes Indexes
Obs 30 LR chi2 17.05 R2 0.579 C 0.900
lo 16 d.f. 3 g 2.686 Dxy 0.799
hi 14 Pr(> chi2) 0.0007 gr 14.672 gamma 0.803
max |deriv| 2e-06 gp 0.404 tau-a 0.411
Brier 0.139
Coef S.E. Wald Z Pr(>|Z|)
Intercept -8.4230 2.9502 -2.86 0.0043
DVpre 0.4258 0.1553 2.74 0.0061
IV=Placebo 1.7306 1.2733 1.36 0.1741
IV=WL 1.2027 1.2735 0.94 0.3450
For plotting the ROC-curve, see pROC
in associationOrder
Log-likelihoods for full model and 0-model without predictors X1, X2
nobs(glmFit)
N <- update(glmFit, . ~ 1)
glm0 <- logLik(glmFit)
LLf <- logLik(glm0) LL0 <-
McFadden pseudo-\(R^2\)
as.vector(1 - (LLf / LL0))
[1] 0.411209
Cox & Snell
as.vector(1 - exp((2/N) * (LL0 - LLf)))
[1] 0.4334714
Nagelkerke
as.vector((1 - exp((2/N) * (LL0 - LLf))) / (1 - exp(LL0)^(2/N)))
[1] 0.578822
cv.glm()
function from package boot
, see crossvalidation
3
Nnew <- data.frame(DVpre=rnorm(Nnew, 20, sd=7),
dfNew <-IV=factor(rep("SSRI", Nnew), levels=levels(dfAncova$IV)))
predict(glmFit, newdata=dfNew, type="response")
1 2 3
0.11516886 0.10427434 0.06270597
Wald-tests for parameters
summary(glmFit)
Call:
glm(formula = postFac ~ DVpre + IV, family = binomial(link = "logit"),
data = dfAncova)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.9865 -0.5629 -0.2372 0.4660 1.5455
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -8.4230 2.9502 -2.855 0.0043 **
DVpre 0.4258 0.1553 2.742 0.0061 **
IVPlacebo 1.7306 1.2733 1.359 0.1741
IVWL 1.2027 1.2735 0.944 0.3450
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 41.455 on 29 degrees of freedom
Residual deviance: 24.409 on 26 degrees of freedom
AIC: 32.409
Number of Fisher Scoring iterations: 5
Or see lrm()
above
anova(glm0, glmFit, test="Chisq")
Analysis of Deviance Table
Model 1: postFac ~ 1
Model 2: postFac ~ DVpre + IV
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 29 41.455
2 26 24.409 3 17.047 0.0006912 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(glmFit, test="Chi")
Single term deletions
Model:
postFac ~ DVpre + IV
Df Deviance AIC LRT Pr(>Chi)
<none> 24.409 32.409
DVpre 1 39.540 45.540 15.1319 0.0001003 ***
IV 2 26.566 30.566 2.1572 0.3400666
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Or see lrm()
above
update(glmFit, . ~ . - IV) # no IV factor
glmPre <-anova(glmPre, glmFit, test="Chisq")
Analysis of Deviance Table
Model 1: postFac ~ DVpre
Model 2: postFac ~ DVpre + IV
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 28 26.566
2 26 24.409 2 2.1572 0.3401
anova(glm0, glmPre, test="Chisq")
Analysis of Deviance Table
Model 1: postFac ~ 1
Model 2: postFac ~ DVpre
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 29 41.455
2 28 26.566 1 14.89 0.000114 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
For penalized logistic regression, see packages logistf
(using Firth’s penalized likelihood) and glmnet
. An example using glmnet
for linear regression is in regressionRobPen.
try(detach(package:rms))
try(detach(package:Hmisc))
try(detach(package:grid))
try(detach(package:lattice))
try(detach(package:survival))
try(detach(package:splines))
try(detach(package:Formula))
R markdown - markdown - R code - all posts