<- "Z:/07-Données BD/Database/contaminants-rlavoie-eccc.sqlite"
db_path
<- stringr::str_replace(
db_bak
db_path, pattern = ".sqlite",
replacement = glue::glue("-{format(Sys.Date(), '%Y%m%d')}.bak.sqlite")
)
file.copy(db_path, db_bak)
Lab results importation example
Data importation procedure
To maintain the referential integrity of the data, the data injection must be done in a certain order. For example, all sites must be entered first before the associated samples can be declared.
Here is the expected injection order according to the database structure:
- Projects
- Reports
- Sites
- Species
- Field samples
- Lab samples
- Lab measurement
All the data intended to populate the tables was consolidated during the previous steps. We will now proceed with importing them into the database.
This document allows the import of laboratory measurements for the MET-Thg-23-19 report.
Database backup
The first step is to ensure that we can restore the database if the import introduces errors into the database.
Loading library dependancy
if(!require(devtools)) install.packages("devtools")
if(!require(toxbox)) devtools::install_github("ECCC-lavoie-ecotox/toxbox.git")
Loading the data report within the R environment
<- "Z:/03-Labo/Results Reports/À entrer dans la BD/MET-THg-23-19 - RALA01-2023.xlsx"
target_path_lab_results
# Informations sur les échantillons
<- readxl::read_xlsx(target_path_lab_results, "SampleInfo")
sampleInfoLab
# Informations sur les mesures
<- readxl::read_xlsx(target_path_lab_results, "SampleData") measurementsLab
Initialize the database connection
<- toxbox::init_con() con
Data injection
Report and project metadata importation
<- data.frame(
report id_report = "MET-Thg-23-19",
id_project = "RALA01-2023",
report_date = "2024-07-31",
report_access_path = stringr::str_replace("Z:/03-Labo/Results Reports/À entrer dans la BD/MET-THg-23-19 - RALA01-2023.xlsx", "À entrer dans la BD", "Saisie dans la BD")
)
We first validate if the project ID already exists in the database.
::search_tbl(con, "project", id_project = "RALA01-2023") toxbox
It already exists in the database. Therefore, we can import the report metadata that will be attached to this project.
::dbWriteTable(con, "report", report, append = TRUE) DBI
Data preparation
Gather field sample informations
<- sampleInfoLab |> dplyr::select(
field_sample id_lab_sample = SampleID,
id_field_sample = ClientID,
collection_date = CollectionDate,
id_site = Location,
id_species = Species,
tissue = Tissue,
age = Age
)
Gather lab meaurements
<- measurementsLab |> dplyr::select(
measurements id_lab_sample = SampleID,
id_field_sample = ClientID,
pmoisture = `% Moisture`,
value = `Total Mercury (µg/g (dry))`
|> dplyr::mutate(
) id_analyte = "thg_dw"
)
Site importation
We must establish correspondance with existing sites in database and create the ones that are not already present in the database.
First, we isolate the sites present in the results sent by the laboratory.
<- dplyr::distinct(sampleInfoLab, Location, Latitude, Longitude)) (sitesLab
We search the database for the existing sites using keywords.
<- toxbox::search_tbl(con, "sites", id_site = "%betchouanes%")
betchouanes_site_db <- toxbox::search_tbl(con, "sites", id_site = "%mingan%") mingan_site_db
After searching the database for sites, we can see that the Betchouanes site is already recorded in the database, but the Longue-Pointe-de-Mingan site is not. Therefore, we add this site.
<- sitesLab |>
add_site ::filter(Location == "Longue-Pointe-de-Mingan") |>
dplyr::select(
dplyrid_site = Location,
lat = Latitude,
lon = Longitude
|>
) ::mutate(
dplyrprovince = 'Québec',
srid = 4326
)
::dbWriteTable(con, "sites", add_site, append = TRUE) DBI
We verify that the site has indeed been added to the database.
::search_tbl(con, "sites", id_site = "Longue-Pointe-de-Mingan") toxbox
<- field_sample |>
field_sample ::mutate(
dplyrid_site = dplyr::case_when(
::str_detect(id_site, "Ile a Calculot des Betchouanes") == TRUE ~ betchouanes_site_db$id_site,
stringr.default = id_site
) )
Species importation
We list the species, starting with those present in the laboratory results.
unique(field_sample$id_species)
We compare it with the list of species present in the database.
::dbReadTable(con, "species") DBI
We declare a data.frame
with the missing species
<- tibble::tribble(
new_species ~org_species, ~id_species, ~organism, ~genus, ~species, ~vernacular_fr, ~vernacular_en,
"Alose sp.", "SHSP", "Fish", "Alosa", "Alosa sp.", "Alose", "Shad",
"Gadidae sp.", "COSP", "Fish", "Gadidae", "Gadidae sp.", "Morue", "Cod",
"Lompenie tachete", "DASH", "Fish", "Gadidae", "Leptoclinus maculatus", "Lompénie tachetée", "Daubed shanny"
)
We add those missing species in the database
::dbWriteTable(con, "species", dplyr::select(new_species, -org_species)
DBIappend = TRUE) ,
We visually confirm that the new species are indeed present in the database.
::dbReadTable(con, "species") DBI
We match the species codes from the laboratory report with those in the database.
<- field_sample |> dplyr::mutate(
field_sample id_species = dplyr::case_when(
== "Alose sp." ~ "SHSP",
id_species == "Capelan" ~ "CAPE",
id_species == "Gadidae sp." ~ "COSP",
id_species == "Lancon" ~ "SAND",
id_species == "Lompenie tachete" ~ "DASH",
id_species .default = NA
)|> dplyr::filter(!is.na(id_species)) )
Lab measurements injection
The previous steps have updated the reference tables for sites and species.
We can now proceed to import the data on samples and measurements.
Field samples
::select(field_sample, id_field_sample, collection_date, id_site, id_species, age, tissue) |>
dplyr::distinct() |>
dplyr::dbWriteTable(con, "field_sample", data, append = TRUE))() (\(data) DBI
Lab samples
::select(field_sample, id_lab_sample) |>
dplyr::distinct() |>
dplyr::dbWriteTable(con, "lab_sample", data, append = TRUE))() (\(data) DBI
Lab field samples
This table links the laboratory samples with the field samples. This join table is necessary as lab samples can hold several field sample (often the case with eggs).
::select(field_sample, id_lab_sample, id_field_sample) |>
dplyr::distinct() |>
dplyr::dbWriteTable(con, "lab_field_sample", data, append = TRUE))() (\(data) DBI
Lab measurements
Finaly, all the parent table (field_sample
, lab_sample
, lab_field_sample
) have been recorded in the database. We can now import the lab measurements record. We have to manualy declare the MDL/LOD value in order to assess if the measurement value is cencored or not.
# Set method detection limit
<- 0.0001
MDL
::select(measurements, id_lab_sample, id_analyte, value, percent_moisture = pmoisture) |>
dplyr# The line below assess if the value is censored or not
::mutate(is_censored = ifelse(value > MDL, 0, 1)) |>
dplyr::mutate(id_report = report$id_report) |>
dplyr::dbWriteTable(con, "lab_measurement", data, append = TRUE))() (\(data) DBI