Document complémentaire au module 6 du cours SDD I de 2025-2026. Distribué sous licence CC BY-NC-SA 4.0.

Veuillez vous référer au cours en ligne pour les explications et les interprétations de cette analyse.

Installer un environnement R adéquat pour reproduire cette analyse.

# Initie le dialecte SciViews::R avec le module d'inférence
SciViews::R("infer", lang = "fr")

## En statistique, nous appelons cela un **tirage au sort sans remise**. Le résultat est très différent si le premier individu tiré au hasard était remis dans la population et pouvait être éventuellement pris à nouveau au second ou troisième tirage (**tirage au sort avec remise**). Notez aussi que, pour une population de taille infinie ou très grande, les deux types de tirage au sort sont équivalents à celui **avec** remise, car enlever un individu d'une population infinie ne change pas fondamentalement son effectif, donc les probabilités ultérieures.

Distribution uniforme

dtx(Portée = 1:4, Probabilité = 1/4) %>.%
  chart(., aes()) +
    geom_segment(aes(x = Portée, xend = Portée, y = 0, yend = Probabilité)) +
  ylab("Probabilité")

## Dans le cas de probabilités continues, la probabilité d'un évènement en particulier est **toujours nulle**. Nous pouvons seulement calculer que l'un parmi plusieurs évènements se produise (compris dans un intervalle).

# Probabilité pour le quantile 2.5 de la distribution uniforme [0, 4]
punif(2.5, min = 0, max = 4, lower.tail = TRUE)
## [1] 0.625
# Probabilité comprise entre 2 et 2.5 pour la distribution uniforme [0, 4]
punif(2.5, min = 0, max = 4, lower.tail = TRUE) -
punif(2.0, min = 0, max = 4, lower.tail = TRUE)
## [1] 0.125
# Quantile pour une probabilité d'1/3 de la distribution uniforme [0, 4]
qunif(1/3, min = 0, max = 4, lower.tail = TRUE)
## [1] 1.333333
# Distribution uniforme [0, 4]
U <- dist_uniform(min = 0, max = 4)
U
## <distribution[1]>
## [1] U(0, 4)
# Fonctions donnant un résultat identique
cdf(U, q = 3); punif(q = 3, min = 0, max = 4)
## [1] 0.75
## [1] 0.75
quantile(U, p = 1/3); qunif(p = 1/3, min = 0, max = 4, lower.tail = TRUE)
## [1] 1.333333
## [1] 1.333333
density(U, 1:3)[[1]]; dunif(1:3, min = 0, max = 4)
## [1] 0.25 0.25 0.25
## [1] 0.25 0.25 0.25
set.seed(575); generate(U, 5)[[1]]
## [1] 3.5515874 3.6412791 1.4006474 0.2001286 3.8390097
set.seed(575); runif(5, min = 0, max = 4)
## [1] 3.5515874 3.6412791 1.4006474 0.2001286 3.8390097
# Initialisation du générateur de nombre pseudo-aléatoires (pour la reproductibilité)
set.seed(946)
# Génération de 10 nombres selon la distribution uniforme [0, 1]
runif(10, min = 0, max = 1) # Série de 10 nombres les mêmes à chaque exécution
##  [1] 0.6378020 0.7524999 0.5593599 0.6688387 0.8989262 0.5300384 0.1520689 0.9031163 0.2693327 0.6738862
# Information à propos de la distribution U
format(U)
## [1] "U(0, 4)"
parameters(U)
##   l u
## 1 0 4
mean(U)
## [1] 2
variance(U)
## [1] 1.333333
support(U)
## <support_region[1]>
## [1] [0,4]
# Graphique de la distribution U
chart(U) +
  geom_funfill(fun = dfun(U), from = 1, to = 3) +
  annotate("text", x = 2, y = 0.10, label = "P[1, 3]", col = "red")

# Graphique de densité de probabilité cumulée de la distribution U
chart$cumulative(U) +
  geom_funfill(fun = cdfun(U), from = 1, to = 3)

Distribution normale

# Distribution normale N(12, 1.5^2)
N1 <- dist_normal(mu = 12, sigma = 1.5) # Arguments mu =, sigma =
N1 # Attention: N(mu, variance) 1.5^2 = 2.2
## <distribution[1]>
## [1] N(12, 2.2)
quantile(N1, p = 0.95)
## [1] 14.46728
qnorm(p = 0.95, mean = 12, sd = 1.5) # Notez le nom des arguments mean = et sd =
## [1] 14.46728

Corrélation

# Jeu de données artificiel
set.seed(653643)
df <- dtx(
  x  =  rnorm(100),
  y1 = x + rnorm(100, sd = 0.2),
  y2 = rnorm(100),
  y3 = -x + rnorm(100, sd = 0.2))

# Graphiques
pl <- list(
  chart(data = df, y1 ~ x) + geom_point(),
  chart(data = df, y2 ~ x) + geom_point(),
  chart(data = df, y3 ~ x) + geom_point()
)
combine_charts(pl, ncol = 3L)

# Covariance de df
cov(df$x, df$y1)
## [1] 0.9336631
cov(df$x, df$y2)
## [1] 0.004064163
cov(df$x, df$y3)
## [1] -0.9230542
# Covariance de df2 == df * 10
df2 <- df * 10
cov(df2$x, df2$y1)
## [1] 93.36631
cov(df2$x, df2$y2)
## [1] 0.4064163
cov(df2$x, df2$y3)
## [1] -92.30542
# Corrélation de df
cor(df$x, df$y1)
## [1] 0.9781819
cor(df2$x, df2$y1)
## [1] 0.9781819
cor(df$x, df$y2)
## [1] 0.003939552
cor(df2$x, df2$y2)
## [1] 0.003939552
cor(df$x, df$y3)
## [1] -0.979511
cor(df2$x, df2$y3)
## [1] -0.979511
# Jeu de données trees
trees <- read("trees", package = "datasets")
tabularise$headtail(trees)

Diamètre à 1,4m [m]

Hauteur [m]

Volume de bois [m^3]

0.211

21.3

0.292

0.218

19.8

0.292

0.224

19.2

0.289

0.267

21.9

0.464

0.272

24.7

0.532

...

...

...

0.444

25.0

1.577

0.455

24.4

1.651

0.457

24.4

1.458

0.457

24.4

1.444

0.523

26.5

2.180

Premières et dernières 5 lignes d'un total de 31

trees_cor <- correlation(trees)
trees_cor |> tabularise()
## Warning in set2(resolve(...)): The object is read-only and cannot be modified. If you have to modify it for a legitimate reason, call the method
## $lock(FALSE) on the object before $set(). Using $lock(FALSE) to modify the object will be enforced in future versions of knitr and this warning
## will become an error.

Matrice de coefficients de corrélation de Pearson r

diameter

height

volume

diameter

1.000

0.519

0.967

height

0.519

1.000

0.597

volume

0.967

0.597

1.000

# Résumé de la corrélation de trees
summary(trees_cor)
## Matrix of Pearson's product-moment correlation:
## (calculation uses everything)
##          d h v
## diameter 1    
## height   . 1  
## volume   B . 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
# Graphique de la corrélation de trees
plot(trees_cor)
## Warning in rep(col, length = length(corr)): partial argument match of 'length' to 'length.out'
## Warning in seq.default(0, 2 * pi, len = npoints): partial argument match of 'len' to 'length.out'
## Warning in seq.default(0, 2 * pi, len = npoints): partial argument match of 'len' to 'length.out'
## Warning in seq.default(0, 2 * pi, len = npoints): partial argument match of 'len' to 'length.out'
## Warning in seq.default(0, 2 * pi, len = npoints): partial argument match of 'len' to 'length.out'
## Warning in seq.default(0, 2 * pi, len = npoints): partial argument match of 'len' to 'length.out'
## Warning in seq.default(0, 2 * pi, len = npoints): partial argument match of 'len' to 'length.out'
## Warning in seq.default(0, 2 * pi, len = npoints): partial argument match of 'len' to 'length.out'
## Warning in seq.default(0, 2 * pi, len = npoints): partial argument match of 'len' to 'length.out'
## Warning in seq.default(0, 2 * pi, len = npoints): partial argument match of 'len' to 'length.out'

# Autre exemple de corrélation sur le zooplancton
zoo <- read("zooplankton", package = "data.io")
zoo %>.%
  sselect(., size:density) %>.%
  correlation(.) ->
  zoo_cor
plot(zoo_cor)
## Warning in rep(col, length = length(corr)): partial argument match of 'length' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'

# Graphique de la matrice de corrélation de zoo
plot(zoo_cor, type = "lower")
## Warning in rep(col, length = length(corr)): partial argument match of 'length' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'
## Warning in rep(col, length = length(corr)): partial argument match of 'len' to 'length.out'

# Jeu de données Anscombe
anscombe <- read("anscombe", package = "datasets")
head(anscombe)
## # A data.trame: [6 × 8]
##      x1    x2    x3    x4    y1    y2    y3    y4
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1    10    10    10     8  8.04  9.14  7.46  6.58
## 2     8     8     8     8  6.95  8.14  6.77  5.76
## 3    13    13    13     8  7.58  8.74 12.7   7.71
## 4     9     9     9     8  8.81  8.77  7.11  8.84
## 5    11    11    11     8  8.33  9.26  7.81  8.47
## 6    14    14    14     8  9.96  8.1   8.84  7.04
# Deux variables d'Anscombe
ans_x <- anscombe[, 1:4]
ans_y <- anscombe[, 5:8]

# Description statistique de X
fmean(ans_x)
## x1 x2 x3 x4 
##  9  9  9  9
fvar(ans_x)
## x1 x2 x3 x4 
## 11 11 11 11
fsd(ans_x)
##       x1       x2       x3       x4 
## 3.316625 3.316625 3.316625 3.316625
# Description statistique de Y
fmean(ans_y)
##       y1       y2       y3       y4 
## 7.500909 7.500909 7.500000 7.500909
fvar(ans_y)
##       y1       y2       y3       y4 
## 4.127269 4.127629 4.122620 4.123249
fsd(ans_y)
##       y1       y2       y3       y4 
## 2.031568 2.031657 2.030424 2.030579
# Coefficients de corrélation entre x1 et y1, x2 et y2, ...
diag(correlation(ans_x, ans_y))
## [1] 0.8164205 0.8162365 0.8162867 0.8165214
# Graphique d'Anscombe
pl <- list(
  chart(data = anscombe, y1 ~ x1) + geom_point(),
  chart(data = anscombe, y2 ~ x2) + geom_point(),
  chart(data = anscombe, y3 ~ x3) + geom_point(),
  chart(data = anscombe, y4 ~ x4) + geom_point()
)
combine_charts(pl)

# Matrice de nuage de points de trees
GGally::ggscatmat(trees, 1:3)

# Matrice variance-covariance de trees
cov(trees) |> tabularise()

diameter

height

volume

diameter

0.00634

0.0805

0.0358

height

0.08051

3.8001

0.5414

volume

0.03584

0.5414

0.2166

# Matrice de corrélation de Spearman pour trees
correlation(trees, method = "spearman")
## Matrix of Spearman's rank correlation rho:
## (calculation uses everything)
##          diameter height volume
## diameter 1.000    0.441  0.955 
## height   0.441    1.000  0.579 
## volume   0.955    0.579  1.000
# Matrice de corrélation de Kendall pour trees
correlation(trees, method = "kendall")
## Matrix of Kendall's rank correlation tau:
## (calculation uses everything)
##          diameter height volume
## diameter 1.000    0.317  0.830 
## height   0.317    1.000  0.450 
## volume   0.830    0.450  1.000
# Matrice de corrélation de Pearson pour trees
correlation(trees) # Équivalent à method = "pearson"
## Matrix of Pearson's product-moment correlation:
## (calculation uses everything)
##          diameter height volume
## diameter 1.000    0.519  0.967 
## height   0.519    1.000  0.597 
## volume   0.967    0.597  1.000
# Test de corrélation pour trees, variables diameter et volume
cor.test(data = trees, ~ diameter + volume, alternative = "greater")
## 
##  Pearson's product-moment correlation
## 
## data:  diameter and volume
## t = 20.44, df = 29, p-value < 2.2e-16
## alternative hypothesis: true correlation is greater than 0
## 95 percent confidence interval:
##  0.9394172 1.0000000
## sample estimates:
##       cor 
## 0.9670023
# Idem, mais mise en forme du tableau avec tabularise()
cor.test(data = trees, ~ diameter + volume, alternative = "greater") |> tabularise()

Coefficent de Pearson r (IC:95%)

Valeur de tobs.

Ddl

Valeur sous H0

Valeur de p

1.0 (0.9-1.0)

20.4

29

0

4.55·10-19

***

0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

# Test de corrélation entre diameter et height pour trees
cor.test(data = trees, ~ diameter + height, alternative = "greater") |> tabularise()

Coefficent de Pearson r (IC:95%)

Valeur de tobs.

Ddl

Valeur sous H0

Valeur de p

0.5 (0.3-1.0)

3.27

29

0

0.0014

**

0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

# Idem, mais corrélation de Spearman
trees_cor_test <- cor.test(data = trees, ~ diameter + height,
  alternative = "greater", method = "spearman")
## Warning in cor.test.default(x = mf[[1L]], y = mf[[2L]], ...): Cannot compute exact p-value with ties
# Mise en forme du tableau avec tabularise()
tabularise(trees_cor_test)

Coefficent de Spearman ρ\rho

Valeur de Sobs.

Valeur sous H0

Valeur de p

0.441

2773

0

0.00653

**

0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

# Idem, mais corrélation de Kendall
trees_cor_test <- cor.test(data = trees, ~ diameter + height,
  alternative = "greater", method = "kendall")
## Warning in cor.test.default(x = mf[[1L]], y = mf[[2L]], ...): Cannot compute exact p-value with ties
# Mise en forme du tableau avec tabularise()
tabularise(trees_cor_test)

Coefficient de Kendall τ\tau

Valeur de Zobs.

Valeur sous H0

Valeur de p

0.317

2.46

0

0.007

**

0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05