c("DescTools")
wants <- wants %in% rownames(installed.packages())
has <-if(any(!has)) install.packages(wants[!has])
set.seed(123)
sample(LETTERS[1:5], 12, replace=TRUE)) (myLetters <-
[1] "B" "D" "C" "E" "E" "A" "C" "E" "C" "C" "E" "C"
table(myLetters)) (tab <-
myLetters
A B C D E
1 1 5 1 4
names(tab)
[1] "A" "B" "C" "D" "E"
"B"] tab[
B
1
barplot(tab, main="Counts")
prop.table(tab)) (relFreq <-
myLetters
A B C D E
0.08333333 0.08333333 0.41666667 0.08333333 0.33333333
cumsum(relFreq)
A B C D E
0.08333333 0.16666667 0.58333333 0.66666667 1.00000000
factor(myLetters, levels=c(LETTERS[1:5], "Q"))
letFac <- letFac
[1] B D C E E A C E C C E C
Levels: A B C D E Q
table(letFac)
letFac
A B C D E Q
1 1 5 1 4 0
rep(rep(c("f", "m"), 3), c(1, 3, 2, 4, 1, 2))) (vec <-
[1] "f" "m" "m" "m" "f" "f" "m" "m" "m" "m" "f" "m" "m"
rle(vec)) (res <-
Run Length Encoding
lengths: int [1:6] 1 3 2 4 1 2
values : chr [1:6] "f" "m" "f" "m" "f" "m"
length(res$lengths)
[1] 6
inverse.rle(res)
[1] "f" "m" "m" "m" "f" "f" "m" "m" "m" "m" "f" "m" "m"
table()
10
N <- factor(sample(c("f", "m"), N, replace=TRUE))) (sex <-
[1] m m f m f f f m m m
Levels: f m
factor(sample(c("home", "office"), N, replace=TRUE))) (work <-
[1] office office office office office office home home office office
Levels: home office
table(sex, work)) (cTab <-
work
sex home office
f 1 3
m 1 5
summary(cTab)
Number of cases in table: 10
Number of factors: 2
Test for independence of all factors:
Chisq = 0.10417, df = 1, p-value = 0.7469
Chi-squared approximation may be incorrect
barplot(cTab, beside=TRUE, legend.text=rownames(cTab), ylab="absolute frequency")
xtabs()
sample(0:5, N, replace=TRUE)
counts <- data.frame(sex, work, counts)) (persons <-
sex work counts
1 m office 4
2 m office 4
3 f office 0
4 m office 2
5 f office 4
6 f office 1
7 f home 1
8 m home 1
9 m office 0
10 m office 2
xtabs(~ sex + work, data=persons)
work
sex home office
f 1 3
m 1 5
xtabs(counts ~ sex + work, data=persons)
work
sex home office
f 1 5
m 1 12
apply(cTab, MARGIN=1, FUN=sum)
f m
4 6
colMeans(cTab)
home office
1 4
addmargins(cTab, c(1, 2), FUN=mean)
Margins computed over dimensions
in the following order:
1: sex
2: work
work
sex home office mean
f 1.0 3.0 2.0
m 1.0 5.0 3.0
mean 1.0 4.0 2.5
prop.table(cTab)) (relFreq <-
work
sex home office
f 0.1 0.3
m 0.1 0.5
prop.table(cTab, margin=1)
work
sex home office
f 0.2500000 0.7500000
m 0.1666667 0.8333333
prop.table(cTab, margin=2)
work
sex home office
f 0.500 0.375
m 0.500 0.625
factor(sample(c("A", "B"), 10, replace=TRUE))) (group <-
[1] A A A A A A A B A A
Levels: A B
ftable(work, sex, group, row.vars="work", col.vars=c("sex", "group"))
sex f m
group A B A B
work
home 1 0 0 1
office 3 0 5 0
Individual-level data frame
library(DescTools)
Untable(cTab)
sex work
1 f home
2 m home
3 f office
4 f office
5 f office
6 m office
7 m office
8 m office
9 m office
10 m office
Group-level data frame
as.data.frame(cTab, stringsAsFactors=TRUE)
sex work Freq
1 f home 1
2 m home 1
3 f office 3
4 m office 5
round(rnorm(10), 2)) (vec <-
[1] 0.84 0.15 -1.14 1.25 0.43 -0.30 0.90 0.88 0.82 0.69
ecdf(vec)
Fn <-Fn(vec)
[1] 0.7 0.3 0.1 1.0 0.4 0.2 0.9 0.8 0.6 0.5
100 * Fn(0.1)
[1] 20
Fn(sort(vec))
[1] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
knots(Fn)
[1] -1.14 -0.30 0.15 0.43 0.69 0.82 0.84 0.88 0.90 1.25
plot(Fn, main="cumulative frequencies")
try(detach(package:DescTools))
R markdown - markdown - R code - all posts