Document complémentaire au module 8 du cours SDD I de 2025-2026. Distribué sous licence CC BY-NC-SA 4.0.
Veuillez vous référer au cours en ligne pour les explications et les interprétations de cette analyse.
Installer un environnement R adéquat pour reproduire cette analyse.
# Probabilité de l'aire à droite du quantile 8.5, t(n = 9; mu = 8, sd = 2/3)
mu <- 8
s <- 2/3
pt((8.5 - mu)/s, df = 8, lower.tail = FALSE)## [1] 0.2373656
# Quantile définissant une aire à gauche de 5%, t(n = 9; mu = 8, sd = 2/3)
mu <- 8
s <- 2/3
mu + s * qt(0.05, df = 8, lower.tail = TRUE)## [1] 6.760301
# Résolution des deux questions précédentes en utilisant dist_student()
student_t <- dist_student_t(df = 8, mu = 8, sigma = 2/3)
# Aire à droite du quantile 8.5 (1 - x car cdf() renvoie toujours l'aire à gauche)
1 - cdf(student_t, 8.5)## [1] 0.2373656
## [1] 6.760301
# Aire à gauche de 8 - 0.5 + à droite de 8 + 0.5
student_t <- dist_student_t(df = 8, mu = 8, sigma = 2/3)
(left_area <- cdf(student_t, 7.5))## [1] 0.2373656
## [1] 0.2373656
## [1] 0.4747312
## [1] 0.4747312
## [1] 6.462664
## [1] 9.537336
| Name | crabs |
| Number of rows | 200 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| factor | 2 |
| numeric | 6 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| species | 0 | 1 | FALSE | 2 | B: 100, O: 100 |
| sex | 0 | 1 | FALSE | 2 | F: 100, M: 100 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| index | 0 | 1 | 25.50 | 14.47 | 1.0 | 13.00 | 25.50 | 38.00 | 50.0 | ▇▇▇▇▇ |
| front | 0 | 1 | 15.58 | 3.50 | 7.2 | 12.90 | 15.55 | 18.05 | 23.1 | ▂▆▇▆▃ |
| rear | 0 | 1 | 12.74 | 2.57 | 6.5 | 11.00 | 12.80 | 14.30 | 20.2 | ▂▆▇▃▁ |
| length | 0 | 1 | 32.11 | 7.12 | 14.7 | 27.28 | 32.10 | 37.23 | 47.6 | ▂▆▇▇▃ |
| width | 0 | 1 | 36.41 | 7.87 | 17.1 | 31.50 | 36.80 | 42.00 | 54.6 | ▂▆▇▇▂ |
| depth | 0 | 1 | 14.03 | 3.42 | 6.1 | 11.40 | 13.90 | 16.60 | 21.6 | ▂▅▇▆▂ |
# Test t de Student : moyenne de la largeur arrière de catace en fonction du sexe
t.test(data = crabs, rear ~ sex,
alternative = "two.sided", conf.level = 0.95, var.equal = TRUE)##
## Two Sample t-test
##
## data: rear by sex
## t = 4.2896, df = 198, p-value = 2.797e-05
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
## 0.8087907 2.1852093
## sample estimates:
## mean in group F mean in group M
## 13.487 11.990
# Variante de Welch du test t de Student avec variances inégales
t.test(data = crabs, rear ~ sex,
alternative = "two.sided", conf.level = 0.95, var.equal = FALSE)##
## Welch Two Sample t-test
##
## data: rear by sex
## t = 4.2896, df = 187.76, p-value = 2.862e-05
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
## 0.8085599 2.1854401
## sample estimates:
## mean in group F mean in group M
## 13.487 11.990
# Test t de Student unilatéral à droite avec variances inégales
t.test(data = crabs, rear ~ sex,
alternative = "greater", conf.level = 0.95, var.equal = FALSE)##
## Welch Two Sample t-test
##
## data: rear by sex
## t = 4.2896, df = 187.76, p-value = 1.431e-05
## alternative hypothesis: true difference in means between group F and group M is greater than 0
## 95 percent confidence interval:
## 0.9201205 Inf
## sample estimates:
## mean in group F mean in group M
## 13.487 11.990
# Calcul de delta f-r et de t_obs pour le test t apparié
crabs %>.%
smutate(., delta_f_r = front - rear) %>.%
ssummarise(.,
mean_f_r = fmean(delta_f_r),
se_f_r = fsd(delta_f_r) / sqrt(fnobs(delta_f_r))) %>.%
smutate(., t_obs = mean_f_r / se_f_r)## # A data.trame: [1 × 3]
## mean_f_r se_f_r t_obs
## <dbl> <dbl> <dbl>
## 1 2.84 0.112 25.3
## [1] 3.667686e-64
# Graphique largeur arrière en fonction de la largeur à l'avance
chart(data = crabs, rear ~ front) +
geom_point() +
geom_abline(slope = 1, intercept = 0)# Test t de Student apparié
t.test(crabs$front, crabs$rear,
alternative = "two.sided", conf.level = 0.95, paired = TRUE)##
## Paired t-test
##
## data: crabs$front and crabs$rear
## t = 25.324, df = 199, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 2.623004 3.065996
## sample estimates:
## mean difference
## 2.8445
# Exemple d'un jeu de données encodé incorrectement
sleep <- read("sleep", package = "datasets")
tabularise(sleep)extra | group | ID |
|---|---|---|
0.7 | 1 | 1 |
-1.6 | 1 | 2 |
-0.2 | 1 | 3 |
-1.2 | 1 | 4 |
-0.1 | 1 | 5 |
3.4 | 1 | 6 |
3.7 | 1 | 7 |
0.8 | 1 | 8 |
0.0 | 1 | 9 |
2.0 | 1 | 10 |
1.9 | 2 | 1 |
0.8 | 2 | 2 |
1.1 | 2 | 3 |
0.1 | 2 | 4 |
-0.1 | 2 | 5 |
4.4 | 2 | 6 |
5.5 | 2 | 7 |
1.6 | 2 | 8 |
4.6 | 2 | 9 |
3.4 | 2 | 10 |
# Restructuration du tableau pour un test t apparié
sleep2 <- spivot_wider(sleep, names_from = group, values_from = extra)
names(sleep2) <- c("id", "med1", "med2")
tabularise(sleep2)id | med1 | med2 |
|---|---|---|
1 | 0.7 | 1.9 |
2 | -1.6 | 0.8 |
3 | -0.2 | 1.1 |
4 | -1.2 | 0.1 |
5 | -0.1 | -0.1 |
6 | 3.4 | 4.4 |
7 | 3.7 | 5.5 |
8 | 0.8 | 1.6 |
9 | 0.0 | 4.6 |
10 | 2.0 | 3.4 |
# Graphique de ces données
chart(data = sleep2, med2 ~ med1) +
geom_point() +
geom_abline(slope = 1, intercept = 0)# Test t de Student apparié
t.test(sleep2$med1, sleep2$med2,
alternative = "two.sided", conf.level = 0.95, paired = TRUE)##
## Paired t-test
##
## data: sleep2$med1 and sleep2$med2
## t = -4.0621, df = 9, p-value = 0.002833
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -2.4598858 -0.7001142
## sample estimates:
## mean difference
## -1.58
# Graphique de med1
chart(data = sleep2, med1 ~ "") +
geom_boxplot() +
geom_hline(yintercept = 0, col = "red") +
xlab("") +
ylab("Sommeil supplémentaire avec med1 [h]")# Test t de Student univarié sur med1
t.test(sleep2$med1,
alternative = "two.sided", mu = 0, conf.level = 0.95)##
## One Sample t-test
##
## data: sleep2$med1
## t = 1.3257, df = 9, p-value = 0.2176
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.5297804 2.0297804
## sample estimates:
## mean of x
## 0.75
# Test de Wilcoxon-Mann-Whitney univarié sur med1
wilcox.test(sleep2$med1,
alternative = "two.sided", mu = 0, conf.level = 0.95)##
## Wilcoxon signed rank test with continuity correction
##
## data: sleep2$med1
## V = 31, p-value = 0.3433
## alternative hypothesis: true location is not equal to 0
# Test de Wilcoxon-Mann-Whitney indépendant unilatéral à droite
wilcox.test(data = crabs, rear ~ sex,
alternative = "greater", conf.level = 0.95)##
## Wilcoxon rank sum test with continuity correction
##
## data: rear by sex
## W = 6710, p-value = 1.473e-05
## alternative hypothesis: true location shift is greater than 0
# Calcul de puissance d'un test t de Student
pwr::pwr.t.test(n = 10, d = 1.3, sig.level = 0.05,
type = "one.sample", alternative = "two.sided")##
## One-sample t test power calculation
##
## n = 10
## d = 1.3
## sig.level = 0.05
## power = 0.9538774
## alternative = two.sided
# Représentation graphique des données pour un test t de Student ou de Wilcoxon
a <- chart(data = crabs, rear ~ sex) +
stat_summary(geom = "col", fun = "mean") +
stat_summary(geom = "errorbar", width = 0.1,
fun.data = "mean_cl_normal", fun.args = list(conf.int = 0.95))
b <- chart(data = crabs, rear ~ sex) +
geom_jitter(alpha = 0.3, width = 0.2) +
stat_summary(geom = "point", fun = "mean", size = 2) +
stat_summary(geom = "errorbar", width = 0.1,
fun.data = "mean_cl_normal", fun.args = list(conf.int = 0.95), linewidth = 1)
combine_charts(list(a,b))# Meilleure représentation graphique pour un test de Wilcoxon-Mann-Withney
chart(data = crabs, rear ~ sex) +
geom_boxplot()# Meilleure représentation graphique pour un test t de Student
chart(data = crabs, rear ~ sex) +
geom_jitter(alpha = 0.3, width = 0.2) +
stat_summary(geom = "point", fun = "mean") +
stat_summary(geom = "errorbar", width = 0.1,
fun.data = "mean_cl_normal", fun.args = list(conf.int = 0.95))# 4 graphiques avec différentes barres d'erreurs (IC95%, écart type, 2*écart type et erreur standard)
p <- chart(data = crabs, rear ~ sex) +
geom_jitter(alpha = 0.1, width = 0.2) +
stat_summary(geom = "point", fun = "mean") +
scale_y_continuous(limits = c(5,22))
a <- p +
stat_summary(geom = "errorbar", width = 0.1,
fun.data = "mean_cl_normal", fun.args = list(conf.int = 0.95))
b <- p +
stat_summary(geom = "errorbar", width = 0.1,
fun.data = "mean_sdl", fun.args = list(mult = 1))
c <- p +
stat_summary(geom = "errorbar", width = 0.1,
fun.data = "mean_sdl", fun.args = list(mult = 2))
d <- p +
stat_summary(geom = "errorbar", width = 0.1,
fun.data = "mean_se", fun.args = list(mult = 1))
combine_charts(list(a,b,c,d))