wants <- c("mlogit", "nnet", "VGAM", "DescTools")
has <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has])
set.seed(123)
N <- 100
X1 <- rnorm(N, 175, 7)
X2 <- rnorm(N, 30, 8)
Ycont <- 0.5*X1 - 0.3*X2 + 10 + rnorm(N, 0, 6)
Ycateg <- cut(Ycont, breaks=quantile(Ycont), include.lowest=TRUE,
labels=c("1", "2", "3", "4"), ordered=FALSE)
dfMN <- data.frame(X1, X2, Ycateg)
vglm()
from package VGAM
Estimator based on likelihood-inference
library(VGAM)
vglmFitMN <- vglm(Ycateg ~ X1 + X2, family=multinomial(refLevel=1),
model=TRUE, data=dfMN)
Odds ratios
(Intercept):1 (Intercept):2 (Intercept):3 X1:1 X1:2
1.183772e-10 4.131415e-14 4.061779e-16 1.164906e+00 1.234359e+00
X1:3 X2:1 X2:2 X2:3
1.261100e+00 8.907694e-01 8.206984e-01 8.462248e-01
multinom()
from package nnet
Estimator based on neural networks -> slightly different results than vglm()
, mlogit()
mlogit()
from package mlogit
Uses person-choice (long) format, so data frame has to be reshaped with mlogit.data()
library(mlogit)
dfMNL <- mlogit.data(dfMN, choice="Ycateg", shape="wide", varying=NULL)
(mlogitFit <- mlogit(Ycateg ~ 0 | X1 + X2, reflevel="1", data=dfMNL))
# not shown
1 2 3 4
1 0.19037931 0.2966176 0.2834440 0.2295591
2 0.33362746 0.3022576 0.1751682 0.1889468
3 0.02114807 0.2064772 0.3437437 0.4286311
4 0.13548641 0.2961053 0.2880800 0.2803283
5 0.06174357 0.2512944 0.3720063 0.3149558
6 0.02202100 0.2108382 0.3276685 0.4394723
[1] "2" "1" "4" "2" "3" "4"
facHat <- factor(categHat, levels=levels(dfMN$Ycateg))
cTab <- xtabs(~ Ycateg + facHat, data=dfMN)
addmargins(cTab)
facHat
Ycateg 1 2 3 4 Sum
1 17 2 6 0 25
2 6 8 7 4 25
3 3 8 8 6 25
4 4 6 5 10 25
Sum 30 24 26 20 100
Correct classification rate
[1] 0.43
[1] 240.6621
[1] -120.3311
[1] 258.6621
McFadden CoxSnell Nagelkerke
0.1319948 0.3064745 0.3269061
predict.mlogit()
requires a new data frame in long format. Therefore also add new (irrelevant) categorical responses to enable reshaping the data frame with mlogit.data()
.
Nnew <- 3
dfNew <- data.frame(X1=rnorm(Nnew, 175, 7),
X2=rnorm(Nnew, 30, 8),
Ycateg=factor(sample(c("1", "2", "3", "4"), Nnew, TRUE),
levels=c("1", "2", "3", "4")))
1 2 3 4
1 0.007734798 0.1456354 0.4577509 0.3888789
2 0.420616161 0.2888815 0.1281280 0.1623743
3 0.177018139 0.3050724 0.2601631 0.2577464
predict(mnFit, dfNew, type="probs")
dfNewL <- mlogit.data(dfNew, choice="Ycateg", shape="wide", varying=NULL)
predict(mlogitFit, dfNewL)
# not shown
Estimated standard deviations, z-values, and p-values for parameters based on normality on assumption that z-values are asymptotically \(N(0, 1)\) distributed.
Estimate Std. Error z value Pr(>|z|)
(Intercept):1 -22.8571447 10.27166961 -2.225261 0.0260637291
(Intercept):2 -30.8175714 10.93523124 -2.818191 0.0048295056
(Intercept):3 -35.4397403 11.06665231 -3.202390 0.0013629219
X1:1 0.1526408 0.06112749 2.497090 0.0125217285
X1:2 0.2105521 0.06512916 3.232839 0.0012256681
X1:3 0.2319840 0.06565479 3.533391 0.0004102658
X2:1 -0.1156697 0.04695990 -2.463160 0.0137718475
X2:2 -0.1975995 0.05414592 -3.649389 0.0002628643
X2:3 -0.1669702 0.05258129 -3.175468 0.0014959522
Profile likelihood based confidence intervals
2.5 % 97.5 %
(Intercept):1 -44.64504851 -3.88007164
(Intercept):2 -53.97866708 -10.71542914
(Intercept):3 -58.91839639 -15.18638093
X1:1 0.04085011 0.28363601
X1:2 0.09215660 0.34985077
X1:3 0.11301172 0.37253474
X2:1 -0.21617246 -0.03045939
X2:2 -0.31247895 -0.09906949
X2:3 -0.27852083 -0.07104205
Tests for other models.
Likelihood-ratio-test for predictor X2
vglmFitR <- vglm(Ycateg ~ X1, family=multinomial(refLevel=1), data=dfMN)
anova(vglmFitMN, vglmFitR, type="I")
Analysis of Deviance Table
Model 1: Ycateg ~ X1 + X2
Model 2: Ycateg ~ X1
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 291 240.66
2 294 261.00 -3 -20.332 0.0001448 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Likelihood-ratio-test for the full model against the 0-model without predictors (just intercept)
vglm0 <- vglm(Ycateg ~ 1, family=multinomial(refLevel=1), data=dfMN)
anova(vglmFitMN, vglm0, type="I")
Analysis of Deviance Table
Model 1: Ycateg ~ X1 + X2
Model 2: Ycateg ~ 1
Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1 291 240.66
2 297 277.26 -6 -36.597 2.11e-06 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
try(detach(package:mlogit))
try(detach(package:nnet))
try(detach(package:VGAM))
try(detach(package:splines))
try(detach(package:stats4))
try(detach(package:dfidx))
try(detach(package:DescTools))
R markdown - markdown - R code - all posts