is.na()In R, missing values are coded as NA (not available)
vec <- c(2, 5, 7)
vec[5][1] NA(vec1 <- c(10, 20, NA, 40, 50, NA))[1] 10 20 NA 40 50 NAlength(vec1)[1] 6Identify missing values with is.na()
is.na(vec1)[1] FALSE FALSE TRUE FALSE FALSE TRUEany(is.na(vec1))[1] TRUEwhich(is.na(vec1))[1] 3 6sum(is.na(vec1))[1] 2vec2 <- c(NA, 7, 9, 10, 1, 8)
(matNA <- rbind(vec1, vec2)) [,1] [,2] [,3] [,4] [,5] [,6]
vec1 10 20 NA 40 50 NA
vec2 NA 7 9 10 1 8is.na(matNA) [,1] [,2] [,3] [,4] [,5] [,6]
vec1 FALSE FALSE TRUE FALSE FALSE TRUE
vec2 TRUE FALSE FALSE FALSE FALSE FALSENA in different situationsLETTERS[c(1, NA, 3)][1] "A" NA "C"factor(LETTERS[c(1, NA, 3)])[1] A <NA> C
Levels: A Cfactor(LETTERS[c(1, NA, 3)], exclude=NULL)[1] A <NA> C
Levels: A C <NA>NA & TRUE[1] NATRUE | NA[1] TRUEvecNA <- c(-3, 2, 0, NA, -7, 5)
(logIdx <- vecNA > 0)[1] FALSE TRUE FALSE NA FALSE TRUEvecNA[logIdx][1] 2 NA 5vecNA[which(logIdx)][1] 2 5NAWhen data is entered in other applications (spreadsheets, SPSS, etc.), missing values are often coded as a reserved numeric value, e.g., 99 or 9999. These values need to be replaced with NA.
vec <- c(30, 25, 23, 21, -999, 999)
is.na(vec) <- vec %in% c(-999, 999)
vec[1] 30 25 23 21 NA NA(mat <- matrix(c(30, 25, 23, 21, -999, 999), nrow=2, ncol=3)) [,1] [,2] [,3]
[1,] 30 23 -999
[2,] 25 21 999is.na(mat) <- mat %in% c(-999, 999)
mat [,1] [,2] [,3]
[1,] 30 23 NA
[2,] 25 21 NAvecNA <- c(-3, 2, 0, NA, -7, 5)
mean(vecNA)[1] NAgoodIdx <- !is.na(vecNA)
mean(vecNA[goodIdx])[1] -0.6sd(na.omit(vecNA))[1] 4.615192sum(vecNA, na.rm=TRUE)[1] -3ageNA <- c(18, NA, 27, 22)
DV1 <- c(NA, 1, 5, -3)
DV2 <- c(9, 4, 2, 7)
(matNA <- cbind(ageNA, DV1, DV2)) ageNA DV1 DV2
[1,] 18 NA 9
[2,] NA 1 4
[3,] 27 5 2
[4,] 22 -3 7apply(matNA, 1, FUN=mean)[1] NA NA 11.333333 8.666667apply(matNA, 1, FUN=mean, na.rm=TRUE)[1] 13.500000 2.500000 11.333333 8.666667(rowNAidx <- apply(is.na(matNA), 1, any))[1] TRUE TRUE FALSE FALSEmatNA[!rowNAidx, ] ageNA DV1 DV2
[1,] 27 5 2
[2,] 22 -3 7na.omit(matNA) ageNA DV1 DV2
[1,] 27 5 2
[2,] 22 -3 7
attr(,"na.action")
[1] 2 1
attr(,"class")
[1] "omit"colMeans(na.omit(matNA))ageNA DV1 DV2
24.5 1.0 4.5 cov(matNA, use="complete.obs") ageNA DV1 DV2
ageNA 12.5 20 -12.5
DV1 20.0 32 -20.0
DV2 -12.5 -20 12.5all(cov(matNA, use="complete.obs") == cov(na.omit(matNA)))[1] TRUESet casewise deletion as a permanent option for statistical functions (another choice is "na.fail")
options(na.action="na.omit")rowMeans(matNA)[1] NA NA 11.333333 8.666667rowMeans(mat, na.rm=TRUE)[1] 26.5 23.0cov(matNA, use="pairwise.complete.obs") ageNA DV1 DV2
ageNA 20.33333 20 -16.000000
DV1 20.00000 16 -10.000000
DV2 -16.00000 -10 9.666667Multiple imputation is supported by functions in packages Hmisc, Amelia II, and mice.
R markdown - markdown - R code - all posts