wants <- c("dplyr")
has <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has])
set.seed(123)
N <- 12
sex <- factor(sample(c("f", "m"), N, replace=TRUE), levels=c("f", "m"))
group <- factor(sample(rep(c("CG", "WL", "T"), 4), N, replace=FALSE), levels=c("CG", "WL", "T"))
age <- sample(18:35, N, replace=TRUE)
IQ <- round(rnorm(N, mean=100, sd=15))
rating <- round(runif(N, min=0, max=6))
(myDf1 <- data.frame(id=1:N, sex, group, age, IQ, rating))
id sex group age IQ rating
1 1 f T 25 95 5
2 2 f T 24 84 5
3 3 f CG 27 99 3
4 4 m WL 26 116 5
5 5 f T 21 98 4
6 6 m WL 31 83 4
7 7 m CG 34 88 0
8 8 m CG 28 110 3
9 9 f T 24 95 1
10 10 f WL 29 80 2
11 11 m CG 32 91 4
12 12 m WL 27 98 2
Group-wise rank added to original data.
library(dplyr)
myDf1_grp <- myDf1 %>%
group_by(sex) %>%
mutate(IQ_rank=rank(IQ)) %>%
arrange(sex, IQ)
myDf1_grp
# A tibble: 12 x 7
# Groups: sex [2]
id sex group age IQ rating IQ_rank
<int> <fct> <fct> <int> <dbl> <dbl> <dbl>
1 10 f WL 29 80 2 1
2 2 f T 24 84 5 2
3 1 f T 25 95 5 3.5
4 9 f T 24 95 1 3.5
5 5 f T 21 98 4 5
6 3 f CG 27 99 3 6
7 6 m WL 31 83 4 1
8 7 m CG 34 88 0 2
9 11 m CG 32 91 4 3
10 12 m WL 27 98 2 4
11 8 m CG 28 110 3 5
12 4 m WL 26 116 5 6
Grouped data frame retains grouping which is used automatically by other dplyr
functions, but not by base R functions. Grouping is removed with ungroup()
.
# A tibble: 4 x 7
# Groups: sex [2]
id sex group age IQ rating IQ_rank
<int> <fct> <fct> <int> <dbl> <dbl> <dbl>
1 10 f WL 29 80 2 1
2 2 f T 24 84 5 2
3 6 m WL 31 83 4 1
4 7 m CG 34 88 0 2
# A tibble: 2 x 7
# Groups: sex [1]
id sex group age IQ rating IQ_rank
<int> <fct> <fct> <int> <dbl> <dbl> <dbl>
1 10 f WL 29 80 2 1
2 2 f T 24 84 5 2
# A tibble: 12 x 7
id sex group age IQ rating IQ_rank
<int> <fct> <fct> <int> <dbl> <dbl> <dbl>
1 10 f WL 29 80 2 1
2 2 f T 24 84 5 2
3 1 f T 25 95 5 3.5
4 9 f T 24 95 1 3.5
5 5 f T 21 98 4 5
6 3 f CG 27 99 3 6
7 6 m WL 31 83 4 1
8 7 m CG 34 88 0 2
9 11 m CG 32 91 4 3
10 12 m WL 27 98 2 4
11 8 m CG 28 110 3 5
12 4 m WL 26 116 5 6
myDf1 %>%
group_by(group) %>%
summarise(age_M=mean(age),
age_SD=sd(age),
IQ_M=mean(IQ),
IQ_SD=sd(IQ),
n=n())
# A tibble: 3 x 6
group age_M age_SD IQ_M IQ_SD n
<fct> <dbl> <dbl> <dbl> <dbl> <int>
1 CG 30.2 3.30 97 9.83 4
2 WL 28.2 2.22 94.2 16.5 4
3 T 23.5 1.73 93 6.16 4
If grouping is done by multiple factors, summarise()
removes the last one from the active grouping variables.
# A tibble: 5 x 3
# Groups: sex [2]
sex group rating_M
<fct> <fct> <dbl>
1 f CG 3
2 f WL 2
3 f T 3.75
4 m CG 2.33
5 m WL 3.67
# A tibble: 2 x 2
sex n
<fct> <int>
1 f 3
2 m 2
# A tibble: 2 x 2
sex n
<fct> <int>
1 f 6
2 m 6
across()
# A tibble: 2 x 2
sex M_IQ
<fct> <dbl>
1 f 91.8
2 m 97.7
# A tibble: 1 x 1
M_IQ
<dbl>
1 94.8
# A tibble: 3 x 3
group sex rating
<fct> <int> <int>
1 CG 2 3
2 WL 2 3
3 T 1 3
# A tibble: 3 x 5
group id age IQ rating
<fct> <dbl> <dbl> <dbl> <dbl>
1 CG 7.25 30.2 97 2.5
2 WL 8 28.2 94.2 3.25
3 T 4.25 23.5 93 3.75
myDf1 %>%
group_by(sex) %>%
summarise(across(where(is.numeric),
list(median=~median(., na.rm=TRUE))))
# A tibble: 2 x 5
sex id_median age_median IQ_median rating_median
<fct> <dbl> <dbl> <dbl> <dbl>
1 f 4 24.5 95 3.5
2 m 7.5 29.5 94.5 3.5
R markdown - markdown - R code - all posts