c("survival")
wants <- wants %in% rownames(installed.packages())
has <-if(any(!has)) install.packages(wants[!has])
Time independent covariates with proportional hazards
\(T = (-\ln(U) \, b \, e^{-\bf{X} \bf{\beta}})^{\frac{1}{a}}\), where \(U \sim \mathcal{U}(0, 1)\)
set.seed(123)
180
N <- 3
P <- factor(sample(c("f", "m"), N, replace=TRUE))
sex <- rnorm(N, 0, 1)
X <- factor(rep(LETTERS[1:P], each=N/P))
IV <- c(0, -1, 1.5)
IVeff <- 0.7*X + IVeff[unclass(IV)] + rnorm(N, 0, 2)
Xbeta <- 1.5
weibA <- 100
weibB <- runif(N, 0, 1)
U <- ceiling((-log(U)*weibB*exp(-Xbeta))^(1/weibA))
eventT <- 120
obsLen <- rep(obsLen, N)
censT <- pmin(eventT, censT)
obsT <- eventT <= censT
status <- data.frame(obsT, status, sex, X, IV) dfSurv <-
Survival data in counting process (start-stop) notation.
library(survival)
survSplit(dfSurv, cut=seq(30, 90, by=30), end="obsT",
dfSurvCP <-event="status", start="start", id="ID", zero=0)
Assumptions
\[ \begin{equation*} \begin{array}{rclcl} \ln \lambda(t) &=& \ln \lambda_{0}(t) + \beta_{1} X_{1} + \dots + \beta_{p} X_{p} &=& \ln \lambda_{0}(t) + \bf{X} \bf{\beta}\\ \lambda(t) &=& \lambda_{0}(t) \, e^{\beta_{1} X_{1} + \dots + \beta_{p} X_{p}} &=& \lambda_{0}(t) \, e^{\bf{X} \bf{\beta}}\\ S(t) &=& S_{0}(t)^{\exp(\bf{X} \bf{\beta})} &=& \exp\left(-\Lambda_{0}(t) \, e^{\bf{X} \bf{\beta}}\right)\\ \Lambda(t) &=& \Lambda_{0}(t) \, e^{\bf{X} \bf{\beta}} & & \end{array} \end{equation*} \]
library(survival)
coxph(Surv(obsT, status) ~ X + IV, data=dfSurv)) (fitCPH <-
Call:
coxph(formula = Surv(obsT, status) ~ X + IV, data = dfSurv)
coef exp(coef) se(coef) z p
X 0.493 1.637 0.0937 5.26 1.4e-07
IVB -0.822 0.439 0.2108 -3.90 9.6e-05
IVC 0.377 1.457 0.1934 1.95 5.1e-02
Likelihood ratio test=51.6 on 3 df, p=3.62e-11 n= 180, number of events= 157
Use counting process data format.
coxph(Surv(start, obsT, status) ~ X + IV, data=dfSurvCP)
summary(fitCPH)
# not shown
library(survival)
extractAIC(fitCPH)
[1] 3.000 1367.667
Log-likelihoods for full model and 0-model without predictors X1, X2
fitCPH$loglik[2]
LLf <- fitCPH$loglik[1] LL0 <-
McFadden pseudo-\(R^2\)
as.vector(1 - (LLf / LL0))
[1] 0.0365218
Cox & Snell
as.vector(1 - exp((2/N) * (LL0 - LLf)))
[1] 0.2493033
Nagelkerke
as.vector((1 - exp((2/N) * (LL0 - LLf))) / (1 - exp(LL0)^(2/N)))
[1] 0.2494003
Restricted model without factor IV
library(survival)
coxph(Surv(obsT, status) ~ X, data=dfSurv)
fitCPH1 <-anova(fitCPH1, fitCPH) # model comparison
Analysis of Deviance Table
Cox model: response is Surv(obsT, status)
Model 1: ~ X
Model 2: ~ X + IV
loglik Chisq Df P(>|Chi|)
1 -698.77
2 -680.83 35.867 2 1.628e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
For average pseudo-observation with covariate values equal to the sample means. For factors: means of dummy-coded indicator variables.
library(survival) # for survfit()
survfit(fitCPH)) (CPH <-
Call: survfit(formula = fitCPH)
records n.max n.start events median 0.95LCL 0.95UCL
180 180 180 157 14 11 19
quantile(CPH, probs=c(0.25, 0.5, 0.75), conf.int=FALSE)
25 50 75
4 14 39
More meaningful: Estimated survival function for new specific data
data.frame(sex=factor(c("f", "f"), levels=levels(dfSurv$sex)),
dfNew <-X=c(-2, -2),
IV=factor(c("A", "C"), levels=levels(dfSurv$IV)))
library(survival)
survfit(fitCPH, newdata=dfNew)
CPHnew <-
par(mar=c(5, 4.5, 4, 2)+0.1, cex.lab=1.4, cex.main=1.4)
plot(CPH, main=expression(paste("Cox PH-estimate ", hat(S)(t), " with CI")),
xlab="t", ylab="Survival", lwd=2)
lines(CPHnew$time, CPHnew$surv[ , 1], lwd=2, col="blue")
lines(CPHnew$time, CPHnew$surv[ , 2], lwd=2, col="red")
legend(x="topright", lwd=2, col=c("black", "blue", "red"),
legend=c("pseudo-observation", "sex=f, X=-2, IV=A", "sex=f, X=-2, IV=C"))
library(survival) # for basehaz()
exp(coef(fitCPH))
expCoef <- basehaz(fitCPH, centered=FALSE)
Lambda0A <- expCoef[2]*Lambda0A$hazard
Lambda0B <- expCoef[3]*Lambda0A$hazard
Lambda0C <-plot(hazard ~ time, main=expression(paste("Cox PH-estimate ", hat(Lambda)[g](t), " per group")),
type="s", ylim=c(0, 5), xlab="t", ylab="cumulative hazard", lwd=2, data=Lambda0A)
lines(Lambda0A$time, Lambda0B, lwd=2, col="red")
lines(Lambda0A$time, Lambda0C, lwd=2, col="green")
legend(x="bottomright", lwd=2, col=1:3, legend=LETTERS[1:3])
log-log plot for categorized predictor X
.
library(survival) # for survfit()
transform(dfSurv, Xcut=cut(X, breaks=c(-Inf, median(X), Inf),
dfSurv <-labels=c("lo", "hi")))
survfit(Surv(obsT, status) ~ IV, type="kaplan-meier", data=dfSurv)
KMiv <- survfit(Surv(obsT, status) ~ Xcut, type="kaplan-meier", data=dfSurv)
KMxcut <-
plot(KMiv, fun="cloglog", main="cloglog-Plot for IV1", xlab="ln t",
ylab=expression(ln(-ln(hat(S)[g](t)))), col=c("black", "blue", "red"), lty=1:3)
legend(x="topleft", col=c("black", "blue", "red"), lwd=2, lty=1:3, legend=LETTERS[1:3])
plot(KMxcut, fun="cloglog", main="cloglog-Plot for Xcut", xlab="ln t",
ylab=expression(ln(-ln(hat(S)[g](t)))), col=c("black", "blue"), lty=1:2)
legend(x="topleft", col=c("black", "blue"), lwd=2, lty=1:2, legend=c("lo", "hi"))
Test based on scaled Schoenfeld-residuals
library(survival) # for cox.zph()
cox.zph(fitCPH)) (czph <-
rho chisq p
X -0.1039 2.073 0.1500
IVB 0.1559 4.006 0.0453
IVC 0.0406 0.265 0.6069
GLOBAL NA 5.053 0.1680
Plot scaled Schoenfeld-residuals against covariates
par(mfrow=c(2, 2), cex.main=1.4, cex.lab=1.4)
plot(czph)
residuals(fitCPH, type="dfbetas")
dfbetas <-
par(mfrow=c(2, 2), cex.main=1.4, cex.lab=1.4)
plot(dfbetas[ , 1], type="h", main="DfBETAS for X", ylab="DfBETAS", lwd=2)
plot(dfbetas[ , 2], type="h", main="DfBETAS for IV-B", ylab="DfBETAS", lwd=2)
plot(dfbetas[ , 3], type="h", main="DfBETAS for IV-C", ylab="DfBETAS", lwd=2)
Plot martingale-residuals against continuous covariate with a nonparametric LOESS-smoother
residuals(fitCPH, type="martingale")
resMart <-plot(dfSurv$X, resMart, main="Martingale-residuals for X",
xlab="X", ylab="Residuen", pch=20)
lines(loess.smooth(dfSurv$X, resMart), lwd=2, col="blue")
legend(x="bottomleft", col="blue", lwd=2, legend="LOESS fit", cex=1.4)
With respect to an average pseudo-observation with covariate values equal to the sample means. For factors: means of dummy-coded indicator variables.
library(survival)
predict(fitCPH, type="risk")
predRes <-head(predRes, n=10)
[1] 1.9166403 1.5389801 1.3210722 0.8617067 2.2969713 0.8735328 3.4527637
[8] 2.5002058 1.0455378 0.7079919
Estimated global survival function.
library(survival)
survexp(~ 1, ratetable=fitCPH, data=dfSurv)
Shat1 <-with(Shat1, head(data.frame(time, surv), n=4))
time surv
1 1 0.9269729
2 2 0.8643413
3 3 0.7681378
4 4 0.7173365
Estimated survival function per group.
library(survival)
survexp(~ IV, ratetable=fitCPH, data=dfSurv)
Shat2 <-with(Shat2, head(data.frame(time, surv), n=4))
time IV.A IV.B IV.C
1 1 0.9265656 0.9633824 0.8909708
2 2 0.8626937 0.9299972 0.8003328
3 3 0.7630677 0.8746553 0.6666905
4 4 0.7097748 0.8431836 0.5990511
For Cox PH models with Firth’s penalized likelihood, see package coxphf
.
try(detach(package:survival))
try(detach(package:splines))
R markdown - markdown - R code - all posts