Document complémentaire au module 9 du cours SDD II de 2025-2026. Distribué sous licence CC BY-NC-SA 4.0.
Veuillez vous référer au cours en ligne pour les explications et les interprétations de cette analyse.
Installer un environnement R adéquat pour reproduire cette analyse.
# Installer le dialecte SciViews::R avec le module "explore"
SciViews::R("explore")
# Charger les packages nécessaires à l'accès à la base de données DuckDB
library(DBI)
library(duckdb)
# Créer la base de données dans un fichier
drv <- duckdb(dbdir = "duckdb_test.db")
# Se connecter à la base de données
con <- dbConnect(drv)
# Lecture des données who
who <- read("who", package = "tidyr")
# Structure du jeu de données who
str(who)## dtrm [7,240 Ă— 60] (S3: data.trame/data.frame)
## $ country : chr [1:7240] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ iso2 : chr [1:7240] "AF" "AF" "AF" "AF" ...
## $ iso3 : chr [1:7240] "AFG" "AFG" "AFG" "AFG" ...
## $ year : num [1:7240] 1980 1981 1982 1983 1984 ...
## $ new_sp_m014 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_m1524: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_m2534: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_m3544: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_m4554: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_m5564: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_m65 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_f014 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_f1524: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_f2534: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_f3544: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_f4554: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_f5564: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sp_f65 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_m014 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_m1524: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_m2534: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_m3544: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_m4554: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_m5564: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_m65 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_f014 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_f1524: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_f2534: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_f3544: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_f4554: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_f5564: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_sn_f65 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_m014 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_m1524: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_m2534: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_m3544: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_m4554: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_m5564: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_m65 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_f014 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_f1524: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_f2534: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_f3544: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_f4554: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_f5564: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ new_ep_f65 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_m014 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_m1524: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_m2534: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_m3544: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_m4554: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_m5564: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_m65 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_f014 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_f1524: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_f2534: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_f3544: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_f4554: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_f5564: num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## $ newrel_f65 : num [1:7240] NA NA NA NA NA NA NA NA NA NA ...
## - attr(*, "comment")= chr ""
## ..- attr(*, "lang")= chr "fr"
## ..- attr(*, "lang_encoding")= chr "UTF-8"
## ..- attr(*, "src")= chr "tidyr::who"
## - attr(*, ".internal.selfref")=<externalptr>
# Lecture du jeu de données pop
pop <- read("population", package = "tidyr")
# Structure du jeu de données pop
str(pop)## dtrm [4,060 Ă— 3] (S3: data.trame/data.frame)
## $ country : chr [1:4060] "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ year : num [1:4060] 1995 1996 1997 1998 1999 ...
## $ population: num [1:4060] 17586073 18415307 19021226 19496836 19987071 ...
## - attr(*, "comment")= chr ""
## ..- attr(*, "lang")= chr "fr"
## ..- attr(*, "lang_encoding")= chr "UTF-8"
## ..- attr(*, "src")= chr "tidyr::population"
## - attr(*, ".internal.selfref")=<externalptr>
# Pivoter le tableau who on long
whol <- pivot_longer(who, starts_with("new"), names_to = "type", values_to = "new_cases")
dim(whol)## [1] 405440 6
## # A tibble: 6 Ă— 6
## country iso2 iso3 year type new_cases
## <chr> <chr> <chr> <dbl> <chr> <dbl>
## 1 Afghanistan AF AFG 1980 new_sp_m014 NA
## 2 Afghanistan AF AFG 1980 new_sp_m1524 NA
## 3 Afghanistan AF AFG 1980 new_sp_m2534 NA
## 4 Afghanistan AF AFG 1980 new_sp_m3544 NA
## 5 Afghanistan AF AFG 1980 new_sp_m4554 NA
## 6 Afghanistan AF AFG 1980 new_sp_m5564 NA
# Découpage des trois champs à partir de la chaine de caractères "new_ep_f2534"
substring("new_ep_f2534", 1, 6) # method## [1] "new_ep"
## [1] "f"
## [1] "2534"
# Extractions des trois champs depuis type
whol %>.%
mutate(.,
method = substring(type, 1, 6),
sex = substring(type, 8, 8),
age = substring(type, 9, 12)) %>.%
select(., -type) ->
whol
head(whol)## # A tibble: 6 Ă— 8
## country iso2 iso3 year new_cases method sex age
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Afghanistan AF AFG 1980 NA new_sp m 014
## 2 Afghanistan AF AFG 1980 NA new_sp m 1524
## 3 Afghanistan AF AFG 1980 NA new_sp m 2534
## 4 Afghanistan AF AFG 1980 NA new_sp m 3544
## 5 Afghanistan AF AFG 1980 NA new_sp m 4554
## 6 Afghanistan AF AFG 1980 NA new_sp m 5564
# Traitement des données pour répondre à la question
whol %>.%
left_join(., pop) %>.% # étape 1 fusion
filter(., year >= 2000 & year < 2010 & sex == "f" & age %in% c("2534", "3544", "4554")) %>.% # étape 2 filtrage
group_by(., country, year) %>.% # étape 3 regroupement par pays et année
summarise(., total = first(population), all_new = sum(new_cases, na.rm = TRUE)) %>.% # étape 4 somme des cas
mutate(., prevalence = all_new / total) %>.% # étape 5, calcul de la prévalence
group_by(., country) %>.% # étape 6 regroupement par pays
summarise(., mean_prev = mean(prevalence, na.rm = TRUE)) %>.% # étape 7 prévalence moyenne
slice_max(., mean_prev, n = 10) # étape 8 ne garder que les 10 plus élevés## Joining with `by = join_by(country, year)`
## `summarise()` has grouped output by 'country'. You can override using the `.groups` argument.
## # A tibble: 10 Ă— 2
## country mean_prev
## <chr> <dbl>
## 1 South Africa 0.000971
## 2 Swaziland 0.000807
## 3 Namibia 0.000726
## 4 Botswana 0.000617
## 5 Lesotho 0.000606
## 6 Zimbabwe 0.000502
## 7 Cambodia 0.000382
## 8 Kiribati 0.000353
## 9 Djibouti 0.000344
## 10 Zambia 0.000338
## [1] 0
## [1] NA
# MĂªme traitement, mais avec fsum()
whol %>.%
left_join(., pop) %>.% # étape 1 fusion
filter(., year >= 2000 & year < 2010 & sex == "f" & age %in% c("2534", "3544", "4554")) %>.% # étape 2 filtrage
group_by(., country, year) %>.% # étape 3 regroupement par pays et année
summarise(., total = first(population), all_new = fsum(new_cases, na.rm = TRUE)) %>.% # étape 4 somme des cas AVEC fsum()
mutate(., prevalence = all_new / total) %>.% # étape 5, calcul de la prévalence
group_by(., country) %>.% # étape 6 regroupement par pays
summarise(., mean_prev = mean(prevalence, na.rm = TRUE)) %>.% # étape 7 prévalence moyenne
slice_max(., mean_prev, n = 10) # étape 8 ne garder que les 10 plus élevés## Joining with `by = join_by(country, year)`
## `summarise()` has grouped output by 'country'. You can override using the `.groups` argument.
## # A tibble: 10 Ă— 2
## country mean_prev
## <chr> <dbl>
## 1 South Africa 0.000971
## 2 Swaziland 0.000807
## 3 Namibia 0.000726
## 4 Lesotho 0.000674
## 5 Zimbabwe 0.000627
## 6 Botswana 0.000617
## 7 Cambodia 0.000382
## 8 Zambia 0.000375
## 9 Kiribati 0.000353
## 10 Djibouti 0.000344
library(DBI)
library(duckdb)
# création et connexion à la base de données
drv <- duckdb(dbdir = "duckdb_test.db")
con <- dbConnect(drv)
# Injection de deux data frames comme tables
# notez bien que nous utilisons le data frame long et correctement nettoyé
# `whol` pour notre table "who",... pas l'horrible data frame `who` initial !
dbWriteTable(con, "who", as.data.frame(whol))
dbWriteTable(con, "pop", as.data.frame(pop))
# Chargement du package dm
library(dm)
# Création d'un objet dm qui reprend le schéma de la base
# nous indiquons learn_keys = FALSE car nous n'avons pas encore de clés
# primaires et de toutes façons, {dm} ne les détecte pas depuis DuckDB
dm_from_con(con, learn_keys = FALSE) %>.%
dm_set_colors(., red = who, darkgreen = pop) -> # Couleurs pour les tables (pour le schéma)
who_dm
# Graphique du schéma de la base
dm_draw(who_dm, view_type = "all")## # A tibble: 3 Ă— 3
## columns candidate why
## <keys> <lgl> <chr>
## 1 country FALSE has duplicate values: Afghanistan (19), Albania (19), Algeria (19), American Samoa (19), Andorra (19)…
## 2 year FALSE has duplicate values: 2011 (217), 2012 (217), 2013 (217), 2010 (216), 2005 (214), …
## 3 population FALSE has duplicate values: 20186 (2), 52161 (2), 59117 (2), 108511 (2), 109373 (2)
# Création des clés primaires
who_dm %>.%
dm_add_pk(., pop, c(country, year), force = TRUE) %>.%
dm_add_pk(., who, c(country, year, method, sex, age), force = TRUE) ->
who_dm
# Graphique du schéma de la base
dm_draw(who_dm, view_type = "all")# Ajout d'un clé étrangère de who vers pop
who_dm <- dm_add_fk(who_dm, who, c(country, year), pop)
# Graphique du schéma de la base avec relation entre les tables
dm_draw(who_dm, view_type = "all")try({
# Vérification de la cardinalité 0 -> n
check_cardinality_0_n(pop, c(country, year), whol, c(country, year))
})## # A tibble: 10 Ă— 2
## country year
## <chr> <dbl>
## 1 Afghanistan 1980
## 2 Afghanistan 1980
## 3 Afghanistan 1980
## 4 Afghanistan 1980
## 5 Afghanistan 1980
## 6 Afghanistan 1980
## 7 Afghanistan 1980
## 8 Afghanistan 1980
## 9 Afghanistan 1980
## 10 Afghanistan 1980
## Error in abort_not_subset_of(x_label, colnames(x), y_label, colnames(y)) :
## Columns (`country`, `year`) of table `whol` contain values (see examples above) that are not present in columns (`country`, `year`) of table `pop`.
## [1] 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
## [1] 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
## [25] 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
# Élimination des années antérieures à 1995 dans whol
whol1995 <- filter(whol, year >= 1995)
# Vérification des apys entre les deux tables
whol_countries <- unique(whol1995$country)
pop_countries <- unique(pop$country)
all(whol_countries %in% pop_countries)## [1] FALSE
## [1] "Cote d'Ivoire" "Curacao"
# Pays dont le nom commence par 'C' dans pop_countries
pop_countries[substring(pop_countries, 1, 1) == "C"]## [1] "Cabo Verde" "Cambodia" "Cameroon" "Canada"
## [5] "Cayman Islands" "Central African Republic" "Chad" "Chile"
## [9] "China" "China, Hong Kong SAR" "China, Macao SAR" "Colombia"
## [13] "Comoros" "Congo" "Cook Islands" "Costa Rica"
## [17] "Côte d'Ivoire" "Croatia" "Cuba" "Curaçao"
## [21] "Cyprus" "Czech Republic"
# Correction des noms de pays mal orthographiés
whol1995$country[whol1995$country == "Cote d'Ivoire"] <- "CĂ´te d'Ivoire"
whol1995$country[whol1995$country == "Curacao"] <- "Curaçao"
# Vérification
all(unique(whol1995$country) %in% pop_countries)## [1] TRUE
# Vérification de la cardinalité 0 -> n après correction
check_cardinality_0_n(pop, c(country, year), whol1995, c(country, year))
# Effacement de l'ancienne table "whol" et remplacement par la version corrigée
dbRemoveTable(con, "who")
dbWriteTable(con, "who", whol1995)
# Nouvel objet data model
dm_from_con(con, learn_keys = FALSE) %>.%
dm_set_colors(., red = who, darkgreen = pop) %>.% # Couleurs (optionel)
dm_add_pk(., pop, c(country, year)) %>.% # Clé primaire pop
dm_add_pk(., who, c(country, year, method, sex, age)) %>.% # Clé primaire who
dm_add_fk(., who, c(country, year), pop) -> # Clé étrangère who -> pop
who_dm
# Graphique du schéma de la base
dm_draw(who_dm, view_type = "all")# Type de cardinalité entre pop et whol1995
examine_cardinality(pop, c(country, year), whol1995, c(country, year))## [1] "surjective mapping (child: 1 to n -> parent: 1)"
# Création du data frame countries
whol1995 %>.%
select(., country, iso2, iso3) %>.%
distinct(.) ->
countries
head(countries)## # A tibble: 6 Ă— 3
## country iso2 iso3
## <chr> <chr> <chr>
## 1 Afghanistan AF AFG
## 2 Albania AL ALB
## 3 Algeria DZ DZA
## 4 American Samoa AS ASM
## 5 Andorra AD AND
## 6 Angola AO AGO
# Élimination des colonnes iso2 et iso3 de la table who (avec langage SQL)
dbExecute(con, 'ALTER TABLE "who" DROP COLUMN "iso2";')## [1] 0
## [1] 0
## [1] "country" "year" "new_cases" "method" "sex" "age"
# Ajout de la table countries
dbWriteTable(con, "countries", countries)
# Vérification
dbListTables(con)## [1] "countries" "pop" "who"
# Nouveau dm qui tient compte de la table countries également
dm_from_con(con, learn_keys = FALSE) %>.%
dm_set_colors(., red = who, darkgreen = pop, blue = countries) %>.% # Couleurs (optionel)
dm_add_pk(., pop, c(country, year)) %>.% # Clé primaire pop
dm_add_pk(., who, c(country, year, method, sex, age)) %>.% # Clé primaire who
dm_add_pk(., countries, country) %>.% # Clé primaire countries
dm_add_fk(., who, c(country, year), pop) %>.% # Clé étrangère who -> pop
dm_add_fk(., pop, country, countries) %>.% # Clé étrangère pop -> countries
dm_add_fk(., who, country, countries) -> # Clé étrangère who -> countries
who_dm
# Graphique du schéma de la base
dm_draw(who_dm, view_type = "all")# Version plus simple de la dm sans relation cyclique
dm_from_con(con, learn_keys = FALSE) %>.%
dm_set_colors(., red = who, darkgreen = pop, blue = countries) %>.% # Couleurs (optionel)
dm_add_pk(., pop, c(country, year)) %>.% # Clé primaire pop
dm_add_pk(., who, c(country, year, method, sex, age)) %>.% # Clé primaire who
dm_add_pk(., countries, country) %>.% # Clé primaire countries
dm_add_fk(., who, c(country, year), pop) %>.% # Clé étrangère who -> pop
# Nous n'utilisons pas cette clé étrangère pour éviter une relation cyclique
#dm_add_fk(., pop, country, countries) %>.% # Clé étrangère pop -> countries
dm_add_fk(., who, country, countries) -> # Clé étrangère who -> countries
who_dm
# Graphique du schéma de la base
dm_draw(who_dm, view_type = "all")# RequĂªte de base de donnĂ©es avec dm
who_dm %>.%
dm_filter(., who = year >= 2000 & year < 2010 & sex == "f" & age %in% c("2534", "3544", "4554")) %>.% # étape 2 filtrage
dm_flatten_to_tbl(., who) ->
who_flat
head(who_flat)## # Source: SQL [?? x 9]
## # Database: DuckDB v1.2.1 [root@Darwin 25.3.0:R 4.4.3//Users/phgrosjean/Documents/GitHub/BioDataScience-Course/sdd-umons2-2025/R/duckdb_test.db]
## country year new_cases method sex age iso2 iso3 population
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Afghanistan 2008 NA newrel f 4554 AF AFG 27032197
## 2 Albania 2002 NA newrel f 4554 AL ALB 3263596
## 3 Albania 2005 NA newrel f 4554 AL ALB 3196130
## 4 Angola 2003 NA newrel f 4554 AO AGO 15421075
## 5 Antigua and Barbuda 2002 NA newrel f 4554 AG ATG 80030
## 6 Antigua and Barbuda 2005 NA newrel f 4554 AG ATG 82565
# Notre requĂªte d'eemple en utilisant dm
who_dm %>.% # étape 1 de jointure inutile avec un objet dm
dm_filter(., who = year >= 2000 & year < 2010 & sex == "f" & age %in% c("2534", "3544", "4554")) %>.% # étape 2 filtrage
dm_flatten_to_tbl(., who) %>.% # Réduction en une seule table
# Le traitement ci-dessous est identique Ă celui dans R,
# sauf pour la gestion des pays ne rencontrant aucun cas !
group_by(., country, year) %>.% # étape 3 regroupement par pays et année
summarise(., total = first(population), all_new = sum(new_cases, na.rm = TRUE)) %>.% # étape 4 somme des cas
mutate(., prevalence = all_new / total) %>.% # étape 5, calcul de la prévalence
group_by(., country) %>.% # étape 6 regroupement par pays
summarise(., mean_prev = mean(prevalence, na.rm = TRUE)) %>.% # étape 7 prévalence moyenne
# Ă©tape supplĂ©mentaire nĂ©cessaire ici : Ă©liminer les pays oĂ¹ mean_prev est NA
filter(., !is.na(mean_prev)) %>.% # drop_na() ne fonctionne pas ici
slice_max(., mean_prev, n = 10) # étape 8 garder les 10 plus élevés## `summarise()` has grouped output by "country". You can override using the `.groups` argument.
## # Source: SQL [?? x 2]
## # Database: DuckDB v1.2.1 [root@Darwin 25.3.0:R 4.4.3//Users/phgrosjean/Documents/GitHub/BioDataScience-Course/sdd-umons2-2025/R/duckdb_test.db]
## country mean_prev
## <chr> <dbl>
## 1 South Africa 0.000971
## 2 Swaziland 0.000807
## 3 Namibia 0.000726
## 4 Lesotho 0.000674
## 5 Zimbabwe 0.000627
## 6 Botswana 0.000617
## 7 Cambodia 0.000382
## 8 Zambia 0.000375
## 9 Kiribati 0.000353
## 10 Djibouti 0.000344
# Mise en place du dialecte SciViews::R avec le module "explore"
SciViews::R("explore", lang = "fr")
# Lecture des données et renommage de la première colonne en station
read("varespec", package = "vegan") %>.%
srename(., station = .rownames) ->
veg
veg## # A data.trame: [24 Ă— 45]
## station Callvulg Empenigr Rhodtome Vaccmyrt Vaccviti Pinusylv Descflex Betupube Vacculig Diphcomp Dicrsp Dicrfusc Dicrpoly
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 18 0.55 11.1 0 0 17.8 0.07 0 0 1.6 2.07 0 1.62 0
## 2 15 0.67 0.17 0 0.35 12.1 0.12 0 0 0 0 0.33 10.9 0.02
## 3 24 0.1 1.55 0 0 13.5 0.25 0 0 0 0 23.4 0 1.68
## 4 27 0 15.1 2.42 5.92 16.0 0 3.7 0 1.12 0 0 3.63 0
## 5 23 0 12.7 0 0 23.7 0.03 0 0 0 0 0 3.42 0.02
## 6 19 0 8.92 0 2.42 10.3 0.12 0.02 0 0 0 0 0.32 0.02
## 7 22 4.73 5.12 1.55 6.05 12.4 0.1 0.78 0.02 2 0 0.03 37.1 0
## 8 16 4.47 7.33 0 2.15 4.33 0.1 0 0 0 0 1.02 25.8 0.23
## 9 28 0 1.63 0.35 18.3 7.13 0.05 0.4 0 0.2 0 0.3 0.52 0.2
## 10 13 24.1 1.9 0.07 0.22 5.3 0.12 0 0 0 0.07 0.02 2.5 0
## # ℹ 14 more rows
## # ℹ 31 more variables: Hylosple <dbl>, Pleuschr <dbl>, Polypili <dbl>, Polyjuni <dbl>, Polycomm <dbl>, Pohlnuta <dbl>,
## # Ptilcili <dbl>, Barbhatc <dbl>, Cladarbu <dbl>, Cladrang <dbl>, Cladstel <dbl>, Cladunci <dbl>, Cladcocc <dbl>,
## # Cladcorn <dbl>, Cladgrac <dbl>, Cladfimb <dbl>, Cladcris <dbl>, Cladchlo <dbl>, Cladbotr <dbl>, Cladamau <dbl>,
## # Cladsp <dbl>, Cetreric <dbl>, Cetrisla <dbl>, Flavniva <dbl>, Nepharct <dbl>, Stersp <dbl>, Peltapht <dbl>,
## # Icmaeric <dbl>, Cladcerv <dbl>, Claddefo <dbl>, Cladphyl <dbl>
# Graphique d'abondances des différentes espèces
veg %>.%
sselect(., -station) %>.% # Colonne 'station' pas utile ici
spivot_longer(., everything(), names_to = "espèce", values_to = "couverture") %>.%
chart(., espèce ~ couverture) +
geom_boxplot() + # Boites de dispersion
labs(x = "Espèce", y = "Couverture [%]")# MĂªme graphqiue, mais après transformation log(x + 1)
veg %>.%
sselect(., -station) %>.%
spivot_longer(., everything(), names_to = "espèce", values_to = "couverture") %>.%
chart(., espèce ~ log1p(couverture)) + # Transformation log(couverture + 1)
geom_boxplot() +
labs(x = "Espèce", y = "Couverture [%]")# Matrice de dissimilarité de Bray-Curtis sur données transformées log(x + 1)
veg %>.%
sselect(., -station) %>.%
log1p(.) %>.%
dissimilarity(., method = "bray") ->
veg_dist
# Initialisation du générateur de nombres pseudo-aléatoires
set.seed(9)
# MDS métrique sur la matrice de dissimilarité
veg_mds <- mds$metric(veg_dist)
# Graphique de la MDS métrique
chart(veg_mds, labels = veg$station, col = veg$station)## # A tibble: 1 Ă— 2
## GOF1 GOF2
## <dbl> <dbl>
## 1 0.527 0.554
# Initialisation du générateur de nombres pseudo-aléatoires
set.seed(295)
# MDS non métrique
veg_nmds <- mds$nonmetric(veg_dist)## Run 0 stress 0.1256617
## Run 1 stress 0.1262346
## Run 2 stress 0.1262346
## Run 3 stress 0.1262346
## Run 4 stress 0.1256617
## ... New best solution
## ... Procrustes: rmse 2.394457e-06 max resid 9.288965e-06
## ... Similar to previous best
## Run 5 stress 0.1256617
## ... New best solution
## ... Procrustes: rmse 4.541188e-07 max resid 1.210067e-06
## ... Similar to previous best
## Run 6 stress 0.1256617
## ... New best solution
## ... Procrustes: rmse 5.986824e-06 max resid 2.004336e-05
## ... Similar to previous best
## Run 7 stress 0.1262346
## Run 8 stress 0.1262346
## Run 9 stress 0.1256617
## ... Procrustes: rmse 2.28195e-06 max resid 8.031862e-06
## ... Similar to previous best
## Run 10 stress 0.1262346
## Run 11 stress 0.1912665
## Run 12 stress 0.1262346
## Run 13 stress 0.1256617
## ... Procrustes: rmse 2.43849e-06 max resid 8.559077e-06
## ... Similar to previous best
## Run 14 stress 0.1256617
## ... Procrustes: rmse 5.831956e-06 max resid 2.038821e-05
## ... Similar to previous best
## Run 15 stress 0.200448
## Run 16 stress 0.1256617
## ... Procrustes: rmse 3.740821e-06 max resid 1.04062e-05
## ... Similar to previous best
## Run 17 stress 0.1262346
## Run 18 stress 0.1262346
## Run 19 stress 0.2250299
## Run 20 stress 0.2105864
## *** Best solution repeated 5 times
## # A tibble: 1 Ă— 2
## linear_R2 nonmetric_R2
## <dbl> <dbl>
## 1 0.919 0.984
# Diagramme de Shepard pour visualiser la fonction de stress
veg_sh <- shepard(veg_dist, veg_nmds)
chart(veg_sh) +
labs(x = "Dissimilarité observée", y = "Distance sur l'ordination")