# Factors: Representing categorical data

## TODO

• link to recode for transforming continuous variables into factors

## Install required packages

forcats

wants <- c("forcats")
has   <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has])

## Unordered factors

### Create factors from existing variables

sex     <- c("m", "f", "f", "m", "m", "m", "f", "f")
(sexFac <- factor(sex))
[1] m f f m m m f f
Levels: f m
factor(c(1, 1, 3, 3, 4, 4), levels=1:5)
[1] 1 1 3 3 4 4
Levels: 1 2 3 4 5
(sexNum <- rbinom(10, size=1, prob=0.5))
 [1] 0 1 1 1 1 1 0 0 0 0
factor(sexNum, labels=c("man", "woman"))
 [1] man   woman woman woman woman woman man   man   man   man
Levels: man woman
levels(sexFac) <- c("female", "male")
sexFac
[1] male   female female male   male   male   female female
Levels: female male

### Generate factors

(fac1 <- factor(rep(c("A", "B"), c(5, 5))))
 [1] A A A A A B B B B B
Levels: A B
(fac2 <- gl(2, 5, labels=c("less", "more"), ordered=TRUE))
 [1] less less less less less more more more more more
Levels: less < more
sample(fac2, length(fac2), replace=FALSE)
 [1] more less more more less less less more less more
Levels: less < more
expand.grid(IV1=gl(2, 2, labels=c("a", "b")), IV2=gl(3, 1))
   IV1 IV2
1    a   1
2    a   1
3    b   1
4    b   1
5    a   2
6    a   2
7    b   2
8    b   2
9    a   3
10   a   3
11   b   3
12   b   3

nlevels(sexFac)
[1] 2
summary(sexFac)
female   male
4      4 
levels(sexFac)
[1] "female" "male"  
str(sexFac)
 Factor w/ 2 levels "female","male": 2 1 1 2 2 2 1 1
unclass(sexFac)
[1] 2 1 1 2 2 2 1 1
attr(,"levels")
[1] "female" "male"  
unclass(factor(10:15))
[1] 1 2 3 4 5 6
attr(,"levels")
[1] "10" "11" "12" "13" "14" "15"
as.character(sexFac)
[1] "male"   "female" "female" "male"   "male"   "male"   "female" "female"

## Joining factors

### Concatenating factors

library(forcats)
(fac1 <- factor(sample(LETTERS[1:5], 4), levels=LETTERS[1:5]))
[1] A B E D
Levels: A B C D E
(fac2 <- factor(sample(letters[1:5], 3), levels=letters[1:5]))
[1] e b d
Levels: a b c d e
fct_c(fac1, fac2)
[1] A B E D e b d
Levels: A B C D E a b c d e

### Repeating factors

rep(fac1, times=2)
[1] A B E D A B E D
Levels: A B C D E

### Crossing two factors

Njk  <- 2
P    <- 2
Q    <- 3
(IV1 <- factor(rep(c("lo", "hi"), each=Njk*Q)))
 [1] lo lo lo lo lo lo hi hi hi hi hi hi
Levels: hi lo
(IV2 <- factor(rep(1:Q, times=Njk*P)))
 [1] 1 2 3 1 2 3 1 2 3 1 2 3
Levels: 1 2 3
interaction(IV1, IV2)
 [1] lo.1 lo.2 lo.3 lo.1 lo.2 lo.3 hi.1 hi.2 hi.3 hi.1 hi.2 hi.3
Levels: hi.1 lo.1 hi.2 lo.2 hi.3 lo.3

## Ordered factors

(status <- factor(c("hi", "lo", "hi", "mid")))
[1] hi  lo  hi  mid
Levels: hi lo mid
(ordStat <- ordered(status, levels=c("lo", "mid", "hi")))
[1] hi  lo  hi  mid
Levels: lo < mid < hi
ordStat[1] > ordStat[2]
[1] TRUE

## Control the order of factor levels

### Free ordering of group levels

(chars <- rep(LETTERS[1:3], each=5))
 [1] "A" "A" "A" "A" "A" "B" "B" "B" "B" "B" "C" "C" "C" "C" "C"
(fac1  <- factor(chars))
 [1] A A A A A B B B B B C C C C C
Levels: A B C
factor(chars, levels=c("C", "A", "B"))
 [1] A A A A A B B B B B C C C C C
Levels: C A B

#### Using fct_relevel() from package forcats

(facGrp <- factor(rep(LETTERS[1:3], each=5)))
 [1] A A A A A B B B B B C C C C C
Levels: A B C
library(forcats)
fct_relevel(facGrp, "A", after=1)
 [1] A A A A A B B B B B C C C C C
Levels: B A C
fct_relevel(fac1, "A", after=Inf)
 [1] A A A A A B B B B B C C C C C
Levels: B C A

### Reorder group levels according to group statistics

vec <- rnorm(15, rep(c(10, 5, 15), each=5), 3)
tapply(vec, fac1, FUN=mean)
        A         B         C
10.571183  3.775138 13.249144 
reorder(fac1, vec, FUN=mean)
 [1] A A A A A B B B B B C C C C C
attr(,"scores")
A         B         C
10.571183  3.775138 13.249144
Levels: B A C

### Relevance of level order for sorting factors

(fac2 <- factor(sample(1:2, 10, replace=TRUE), labels=c("B", "A")))
 [1] B A B A B B A B A A
Levels: B A
sort(fac2)
 [1] B B B B B A A A A A
Levels: B A
sort(as.character(fac2))
 [1] "A" "A" "A" "A" "A" "B" "B" "B" "B" "B"

## Add, combine and remove factor levels

(status <- factor(c("hi", "lo", "hi")))
[1] hi lo hi
Levels: hi lo
status[4] <- "mid"
status
[1] hi   lo   hi   <NA>
Levels: hi lo
levels(status) <- c(levels(status), "mid")
status[4] <- "mid"
status
[1] hi  lo  hi  mid
Levels: hi lo mid

Using package forcats

library(forcats)
fct_expand(status, "new_level")
[1] hi  lo  hi  mid
Levels: hi lo mid new_level

### Combine factor levels

hiNotHi <- status
levels(hiNotHi) <- list(hi="hi", notHi=c("mid", "lo"))
hiNotHi
[1] hi    notHi hi    notHi
Levels: hi notHi

Using package forcats

fct_collapse(status, notHi=c("mid", "lo"))
[1] hi    notHi hi    notHi
Levels: hi notHi

### Remove factor levels

status[1:2]
[1] hi lo
Levels: hi lo mid
(newStatus <- droplevels(status[1:2]))
[1] hi lo
Levels: hi lo

Using package forcats

fct_drop(status[1:2], "mid")
[1] hi lo
Levels: hi lo

## Detach (automatically) loaded packages (if possible)

try(detach(package:forcats))