wants <- c("dplyr", "forcats")
has <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has])
grep()
, dfSplitMerge, dfReshapeset.seed(123)
N <- 12
sex <- factor(sample(c("f", "m"), N, replace=TRUE), levels=c("f", "m"))
group <- factor(sample(rep(c("CG", "WL", "T"), 4), N, replace=FALSE), levels=c("CG", "WL", "T"))
age <- sample(18:35, N, replace=TRUE)
IQ <- round(rnorm(N, mean=100, sd=15))
rating <- round(runif(N, min=0, max=6))
(myDf1 <- data.frame(id=1:N, sex, group, age, IQ, rating))
id sex group age IQ rating
1 1 f T 25 95 5
2 2 f T 24 84 5
3 3 f CG 27 99 3
4 4 m WL 26 116 5
5 5 f T 21 98 4
6 6 m WL 31 83 4
7 7 m CG 34 88 0
8 8 m CG 28 110 3
9 9 f T 24 95 1
10 10 f WL 29 80 2
11 11 m CG 32 91 4
12 12 m WL 27 98 2
id sex fac age IQ score
1 1 f T 25 95 5
2 2 f T 24 84 5
3 3 f CG 27 99 3
4 4 m WL 26 116 5
5 5 f T 21 98 4
6 6 m WL 31 83 4
7 7 m CG 34 88 0
8 8 m CG 28 110 3
9 9 f T 24 95 1
10 10 f WL 29 80 2
11 11 m CG 32 91 4
12 12 m WL 27 98 2
Make all names lower case that start with ‘i’ or ‘I’
id sex group age iq rating
1 1 f T 25 95 5
2 2 f T 24 84 5
3 3 f CG 27 99 3
4 4 m WL 26 116 5
5 5 f T 21 98 4
6 6 m WL 31 83 4
7 7 m CG 34 88 0
8 8 m CG 28 110 3
9 9 f T 24 95 1
10 10 f WL 29 80 2
11 11 m CG 32 91 4
12 12 m WL 27 98 2
Prefix all character
variables with fac_
id sex group age IQ rating
1 1 f T 25 95 5
2 2 f T 24 84 5
3 3 f CG 27 99 3
4 4 m WL 26 116 5
5 5 f T 21 98 4
6 6 m WL 31 83 4
7 7 m CG 34 88 0
8 8 m CG 28 110 3
9 9 f T 24 95 1
10 10 f WL 29 80 2
11 11 m CG 32 91 4
12 12 m WL 27 98 2
Make all variable names upper case
ID SEX GROUP AGE IQ RATING
1 1 f T 25 95 5
2 2 f T 24 84 5
3 3 f CG 27 99 3
4 4 m WL 26 116 5
5 5 f T 21 98 4
6 6 m WL 31 83 4
7 7 m CG 34 88 0
8 8 m CG 28 110 3
9 9 f T 24 95 1
10 10 f WL 29 80 2
11 11 m CG 32 91 4
12 12 m WL 27 98 2
group IQ
1 T 95
2 T 84
3 CG 99
4 WL 116
5 T 98
6 WL 83
7 CG 88
8 CG 110
9 T 95
10 WL 80
11 CG 91
12 WL 98
id IQ
1 1 95
2 2 84
3 3 99
4 4 116
5 5 98
6 6 83
7 7 88
8 8 110
9 9 95
10 10 80
11 11 91
12 12 98
id age IQ rating
1 1 25 95 5
2 2 24 84 5
3 3 27 99 3
4 4 26 116 5
5 5 21 98 4
6 6 31 83 4
7 7 34 88 0
8 8 28 110 3
9 9 24 95 1
10 10 29 80 2
11 11 32 91 4
12 12 27 98 2
group
1 T
2 T
3 CG
4 WL
5 T
6 WL
7 CG
8 CG
9 T
10 WL
11 CG
12 WL
id group IQ
1 1 T 95
2 2 T 84
3 3 CG 99
4 4 WL 116
5 5 T 98
6 6 WL 83
7 7 CG 88
8 8 CG 110
9 9 T 95
10 10 WL 80
11 11 CG 91
12 12 WL 98
id sex rating
1 1 f 5
2 2 f 5
3 3 f 3
4 4 m 5
5 5 f 4
6 6 m 4
7 7 m 0
8 8 m 3
9 9 f 1
10 10 f 2
11 11 m 4
12 12 m 2
Matching all criteria simultaneously - logical AND
id sex group age IQ rating
1 8 m CG 28 110 3
2 11 m CG 32 91 4
3 12 m WL 27 98 2
Matching some criteria - logical OR
id sex group age IQ rating
1 2 f T 24 84 5
2 4 m WL 26 116 5
3 6 m WL 31 83 4
4 7 m CG 34 88 0
5 10 f WL 29 80 2
By row number
id sex group age IQ rating
1 5 f T 21 98 4
2 6 m WL 31 83 4
3 7 m CG 34 88 0
Drop duplicate rows
sex group
1 f T
2 f CG
3 m WL
4 m CG
5 f WL
sex group
1 f T
2 f CG
3 m WL
4 m CG
5 f WL
Count distinct rows
[1] 12
Add some missing values to data frame first
getNA <- function(x, prob=c(0.7, 0.3)) {
NAval <- x[length(x) + 1]
if_else(sample(c(TRUE, FALSE), length(x), replace=TRUE, prob=prob), x, NAval)
}
set.seed(123)
myDf1NA <- myDf1 %>%
mutate(group=getNA(group),
age=getNA(age),
IQ=getNA(IQ),
rating=getNA(rating))
myDf1NA
id sex group age IQ rating
1 1 f T 25 95 NA
2 2 f <NA> 24 NA 5
3 3 f CG 27 99 3
4 4 m <NA> NA 116 5
5 5 f <NA> 21 98 4
6 6 m WL 31 83 4
7 7 m CG 34 NA 0
8 8 m <NA> NA NA 3
9 9 f T NA 95 1
10 10 f WL 29 NA 2
11 11 m <NA> 32 91 4
12 12 m WL NA 98 2
Show only cases with all missings in variables “from group to IQ”
id sex group age IQ rating
1 8 m <NA> NA NA 3
Show only cases with uneven values on all numeric variables
id sex group age IQ rating
1 3 f CG 27 99 3
Add new variables or overwrite existing variables in mutate()
id sex group age IQ rating married
1 1 f T 25 95 5 TRUE
2 2 f T 24 84 5 TRUE
3 3 f CG 27 99 3 FALSE
4 4 m WL 26 116 5 TRUE
5 5 f T 21 98 4 TRUE
6 6 m WL 31 83 4 TRUE
7 7 m CG 34 88 0 TRUE
8 8 m CG 28 110 3 FALSE
9 9 f T 24 95 1 FALSE
10 10 f WL 29 80 2 TRUE
11 11 m CG 32 91 4 FALSE
12 12 m WL 27 98 2 TRUE
myDf1 %>%
mutate(group=fct_collapse(group, CG_WL=c("CG", "WL")),
ratingSq=rating^2,
ratingSqZ=scale(ratingSq))
id sex group age IQ rating ratingSq ratingSqZ
1 1 f T 25 95 5 25 1.3359593
2 2 f T 24 84 5 25 1.3359593
3 3 f CG_WL 27 99 3 9 -0.3740686
4 4 m CG_WL 26 116 5 25 1.3359593
5 5 f T 21 98 4 16 0.3740686
6 6 m CG_WL 31 83 4 16 0.3740686
7 7 m CG_WL 34 88 0 0 -1.3359593
8 8 m CG_WL 28 110 3 9 -0.3740686
9 9 f T 24 95 1 1 -1.2290825
10 10 f CG_WL 29 80 2 4 -0.9084523
11 11 m CG_WL 32 91 4 16 0.3740686
12 12 m CG_WL 27 98 2 4 -0.9084523
Conditional changes to variables
myDf1 %>%
mutate(age_even=if_else((age %% 2) == 0, TRUE, FALSE),
sex_IQ=case_when(
((sex == "f") & (IQ < 100)) ~ "female_lo",
((sex == "f") & (IQ >= 100)) ~ "female_hi",
((sex == "m") & (IQ < 100)) ~ "male_lo",
((sex == "m") & (IQ >= 100)) ~ "male_hi",
TRUE ~ "other"))
id sex group age IQ rating age_even sex_IQ
1 1 f T 25 95 5 FALSE female_lo
2 2 f T 24 84 5 TRUE female_lo
3 3 f CG 27 99 3 FALSE female_lo
4 4 m WL 26 116 5 TRUE male_hi
5 5 f T 21 98 4 FALSE female_lo
6 6 m WL 31 83 4 FALSE male_lo
7 7 m CG 34 88 0 TRUE male_lo
8 8 m CG 28 110 3 TRUE male_hi
9 9 f T 24 95 1 TRUE female_lo
10 10 f WL 29 80 2 FALSE female_lo
11 11 m CG 32 91 4 TRUE male_lo
12 12 m WL 27 98 2 FALSE male_lo
Recode special values to missing (NA
)
myDf9999 <- myDf1
myDf9999$IQ[2] <- 9999
myDf9999$rating[3] <- 9999
(myDfNA <- myDf9999 %>%
mutate(IQ=na_if(IQ, 9999),
rating=na_if(rating, 9999)))
id sex group age IQ rating
1 1 f T 25 95 5
2 2 f T 24 NA 5
3 3 f CG 27 99 NA
4 4 m WL 26 116 5
5 5 f T 21 98 4
6 6 m WL 31 83 4
7 7 m CG 34 88 0
8 8 m CG 28 110 3
9 9 f T 24 95 1
10 10 f WL 29 80 2
11 11 m CG 32 91 4
12 12 m WL 27 98 2
Drop missing values
id sex group age IQ rating
1 1 f T 25 95 5
4 4 m WL 26 116 5
5 5 f T 21 98 4
6 6 m WL 31 83 4
7 7 m CG 34 88 0
8 8 m CG 28 110 3
9 9 f T 24 95 1
10 10 f WL 29 80 2
11 11 m CG 32 91 4
12 12 m WL 27 98 2
id sex group age IQ rating
1 -5.5 f T -2.3333333 0.25 1.8333333
2 -4.5 f T -3.3333333 -10.75 1.8333333
3 -3.5 f CG -0.3333333 4.25 -0.1666667
4 -2.5 m WL -1.3333333 21.25 1.8333333
5 -1.5 f T -6.3333333 3.25 0.8333333
6 -0.5 m WL 3.6666667 -11.75 0.8333333
7 0.5 m CG 6.6666667 -6.75 -3.1666667
8 1.5 m CG 0.6666667 15.25 -0.1666667
9 2.5 f T -3.3333333 0.25 -2.1666667
10 3.5 f WL 1.6666667 -14.75 -1.1666667
11 4.5 m CG 4.6666667 -3.75 0.8333333
12 5.5 m WL -0.3333333 3.25 -1.1666667
myDf1 %>%
mutate(across(age:IQ, list(ctr=~scale(., center=TRUE, scale=FALSE),
scl=~scale(., center=FALSE, scale=TRUE))))
id sex group age IQ rating age_ctr age_scl IQ_ctr IQ_scl
1 1 f T 25 95 5 -2.3333333 0.8683335 0.25 0.9544135
2 2 f T 24 84 5 -3.3333333 0.8336002 -10.75 0.8439025
3 3 f CG 27 99 3 -0.3333333 0.9378002 4.25 0.9945994
4 4 m WL 26 116 5 -1.3333333 0.9030668 21.25 1.1653891
5 5 f T 21 98 4 -6.3333333 0.7294001 3.25 0.9845529
6 6 m WL 31 83 4 3.6666667 1.0767335 -11.75 0.8338560
7 7 m CG 34 88 0 6.6666667 1.1809336 -6.75 0.8840883
8 8 m CG 28 110 3 0.6666667 0.9725335 15.25 1.1051104
9 9 f T 24 95 1 -3.3333333 0.8336002 0.25 0.9544135
10 10 f WL 29 80 2 1.6666667 1.0072669 -14.75 0.8037166
11 11 m CG 32 91 4 4.6666667 1.1114669 -3.75 0.9142277
12 12 m WL 27 98 2 -0.3333333 0.9378002 3.25 0.9845529
id sex group age IQ rating
1 1 <NA> T NA 95 5
2 2 f T 24 84 5
3 3 f <NA> 27 NA 3
4 4 m <NA> 26 116 5
5 NA f <NA> 21 98 4
6 6 m WL 31 NA NA
7 NA m CG 34 NA 0
8 NA m CG NA 110 3
9 NA f T 24 95 1
10 10 f WL NA NA 2
11 NA m CG NA 91 4
12 12 <NA> WL 27 98 NA
Ascending
id sex group age IQ rating
1 7 m CG 34 88 0
2 9 f T 24 95 1
3 10 f WL 29 80 2
4 12 m WL 27 98 2
5 3 f CG 27 99 3
6 8 m CG 28 110 3
7 5 f T 21 98 4
8 6 m WL 31 83 4
9 11 m CG 32 91 4
10 1 f T 25 95 5
11 2 f T 24 84 5
12 4 m WL 26 116 5
Descending
id sex group age IQ rating
1 8 m CG 28 110 3
2 3 f CG 27 99 3
3 11 m CG 32 91 4
4 7 m CG 34 88 0
5 4 m WL 26 116 5
6 12 m WL 27 98 2
7 6 m WL 31 83 4
8 10 f WL 29 80 2
9 5 f T 21 98 4
10 1 f T 25 95 5
11 9 f T 24 95 1
12 2 f T 24 84 5
Move to front
group age id sex IQ rating
1 T 25 1 f 95 5
2 T 24 2 f 84 5
3 CG 27 3 f 99 3
4 WL 26 4 m 116 5
5 T 21 5 f 98 4
6 WL 31 6 m 83 4
7 CG 34 7 m 88 0
8 CG 28 8 m 110 3
9 T 24 9 f 95 1
10 WL 29 10 f 80 2
11 CG 32 11 m 91 4
12 WL 27 12 m 98 2
Specify position directly
id sex age IQ rating group
1 1 f 25 95 5 T
2 2 f 24 84 5 T
3 3 f 27 99 3 CG
4 4 m 26 116 5 WL
5 5 f 21 98 4 T
6 6 m 31 83 4 WL
7 7 m 34 88 0 CG
8 8 m 28 110 3 CG
9 9 f 24 95 1 T
10 10 f 29 80 2 WL
11 11 m 32 91 4 CG
12 12 m 27 98 2 WL
R markdown - markdown - R code - all posts