Validate 'riskintro' Datasets
validate_table_content.Rd
Validates the datasets with 'riskintro' rules. The function checks the required and optional columns, and validates the data using the rules defined in the specifications.
Arguments
- x
A data frame or an 'sf' object to be validated.
- table_name
A character string specifying the name of the dataset. It accepts one of the following values:
"animal_mobility"
"epi_units"
"entry_points"
"emission_risk_factors"
- ...
Additional arguments to be passed to the function. It is expected to be a named list of columns to be renamed in the dataset. For example,
col1 = "new_col1", col2 = "new_col2"
.
Value
A list containing the validation status of the dataset. The list contains the following elements:
required_columns
: A list with the status of required columns.optional_columns
: A list with the status of optional columns.validate_rules
: A list with the status of validation rules.dataset
: The dataset after renaming and selecting the specified columns. If validation fails, this element will be NULL, not available.
Details
The function checks if the dataset contains the required and optional columns as specified in the specifications. It also validates the data using the rules defined in the specifications. If errors are met or validity rules are not satisfied, the function returns a list with the status of the validation.
Examples
## ---- read-epi-units-tunisia ----
tun_epi_files <-
system.file(
package = "riskintrodata",
"samples",
"tunisia",
"epi_units", "tunisia_adm2_raw.gpkg"
)
tun_epi_unit <- read_geo_file(tun_epi_files)
DATA_EPI_UNITS <- validate_table_content(
x = tun_epi_unit,
table_name = "epi_units",
eu_name = "shapeName",
user_id = "fid"
)
DATA_EPI_UNITS
#> $table_name
#> [1] "epi_units"
#>
#> $required_columns
#> $chk
#> [1] FALSE
#>
#> $msg
#> The following required columns are missing: `eu_name` and `geometry`
#>
#> $details
#> [1] "eu_name" "geometry"
#>
#> attr(,"class")
#> [1] "validation_status"
#>
#> $optional_columns
#> $chk
#> [1] TRUE
#>
#> $msg
#> [1] "Optional columns selected are available."
#>
#> $details
#> character(0)
#>
#> attr(,"class")
#> [1] "validation_status"
#>
#> $validate_rules
#> $chk
#> [1] FALSE
#>
#> $msg
#> [1] "Found invalidities while checking dataset."
#>
#> $details
#> # A tibble: 4 × 8
#> colname valid required column_found n index value msg
#> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <glue>
#> 1 eu_id TRUE FALSE TRUE NA NA NA "eu_id" has been valid…
#> 2 eu_id TRUE FALSE TRUE NA NA NA "eu_id" has been valid…
#> 3 eu_name FALSE TRUE FALSE NA NA NA Column: "eu_name" is m…
#> 4 geometry FALSE TRUE FALSE NA NA NA Column: "geometry" is …
#>
#> attr(,"class")
#> [1] "validation_status"
#>
#> $dataset
#> Simple feature collection with 268 features and 0 fields
#> Geometry type: MULTIPOLYGON
#> Dimension: XY
#> Bounding box: xmin: 7.530076 ymin: 30.23681 xmax: 11.59826 ymax: 37.55986
#> Geodetic CRS: WGS 84
#> # A tibble: 268 × 1
#> geom
#> <MULTIPOLYGON [°]>
#> 1 (((10.13861 36.89453, 10.14495 36.89476, 10.15127 36.89476, 10.1576 36.89235…
#> 2 (((10.05585 36.84308, 10.06575 36.85019, 10.07327 36.8544, 10.07366 36.85451…
#> 3 (((10.13862 36.89416, 10.1329 36.88994, 10.13283 36.88892, 10.1326 36.88572,…
#> 4 (((10.1317 36.88428, 10.1317 36.88271, 10.1317 36.8797, 10.12929 36.87579, 1…
#> 5 (((10.16651 36.88694, 10.16422 36.88874, 10.1576 36.89235, 10.15127 36.89476…
#> 6 (((10.27118 36.88874, 10.26842 36.88874, 10.26149 36.88783, 10.25577 36.8863…
#> 7 (((10.01018 37.00285, 10.0102 37.00285, 10.01045 37.00283, 10.01063 37.00281…
#> 8 (((10.19313 36.85656, 10.19313 36.85892, 10.19313 36.86404, 10.19313 36.8667…
#> 9 (((9.141866 36.86897, 9.140129 36.86767, 9.137473 36.86604, 9.133329 36.8623…
#> 10 (((9.086732 36.70221, 9.082556 36.70772, 9.078131 36.71146, 9.075724 36.7131…
#> # ℹ 258 more rows
#>
#> attr(,"class")
#> [1] "table_validation_status"
## ---- read-animal-mobility-tunisia ----
tun_animal_mobility <-
system.file(
package = "riskintrodata",
"samples",
"tunisia",
"animal_mobility", "ANIMAL_MOBILITY_raw.csv"
)
x <- readr::read_csv(
tun_animal_mobility,
)
#> Rows: 112 Columns: 11
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (6): ORIGIN_NAME, ORIGIN_COUNTRY, ORIGIN_ISO3, DESTINATION_NAME, DESTINA...
#> dbl (5): ORIGIN_LONGITUDE_X, ORIGIN_LATITUDE_Y, DESTINATION_LONGITUDE_X, DES...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
DATA_ANIMAL_MOBILITY <- apply_mapping(
x,
mapping = mapping_animal_mobility(
o_name = "ORIGIN_NAME",
o_lng = "ORIGIN_LONGITUDE_X",
o_lat = "ORIGIN_LATITUDE_Y",
d_name = "DESTINATION_NAME",
d_lng = "DESTINATION_LONGITUDE_X",
d_lat = "DESTINATION_LATITUDE_Y",
quantity = "HEADCOUNT"
),
validate = TRUE
)
#> ✔ All data in "animal_mobility" valided.
DATA_ANIMAL_MOBILITY
#> # A tibble: 112 × 12
#> animal_mobility_id o_name o_lng o_lat d_name d_lng d_lat quantity o_country
#> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <chr>
#> 1 am-00001 AEROP… -4.08 14.5 AEROP… 10.8 33.9 2217 Mali
#> 2 am-00002 AEROP… -17.5 14.7 AEROP… 10.7 34.7 5343 Senegal
#> 3 am-00003 SENGH… -13.2 14.6 POINT… 7.66 33.5 7798 Senegal
#> 4 am-00004 TESSA… 0.706 20.4 POINT… 10.7 32.0 9519 Mali
#> 5 am-00005 PERUV… -77.1 -12.0 PORT … 9.89 37.3 6133 Peru
#> 6 am-00006 BAHIA… -62.3 48.9 PORT … 10.1 33.9 1785 Canada
#> 7 am-00007 AINZE… 8.26 35.6 BOUJA… 8.49 35.7 2693 Algeria
#> 8 am-00008 AINZE… 8.26 35.6 BOUJA… 8.49 35.7 6307 Algeria
#> 9 am-00009 AINZE… 8.26 35.6 BOUJA… 8.49 35.7 4791 Algeria
#> 10 am-00010 BOUGO… 8.36 36.7 HALIMA 10.0 36.4 7299 Algeria
#> # ℹ 102 more rows
#> # ℹ 3 more variables: o_iso3 <chr>, d_country <chr>, d_iso3 <chr>
# read Tunisia emission risk factors dataset ----
tun_erf_file <-
system.file(
package = "riskintrodata",
"samples",
"tunisia",
"emission_risk_factor",
"emission_risk_factors.csv"
)
x <- read_emission_risk_factor_file(tun_erf_file)
DATA_ERF <- validate_table_content(x, table_name = "emission_risk_factors")
DATA_ERF
#> $table_name
#> [1] "emission_risk_factors"
#>
#> $required_columns
#> $chk
#> [1] TRUE
#>
#> $msg
#> [1] "All required columns selected."
#>
#> $details
#> character(0)
#>
#> attr(,"class")
#> [1] "validation_status"
#>
#> $optional_columns
#> $chk
#> [1] TRUE
#>
#> $msg
#> [1] "No optional columns selected."
#>
#> $details
#> character(0)
#>
#> attr(,"class")
#> [1] "validation_status"
#>
#> $validate_rules
#> $chk
#> [1] TRUE
#>
#> $msg
#> 19 valid rules checking dataset.
#>
#> $details
#> # A tibble: 19 × 8
#> colname valid required column_found n index value msg
#> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <glu>
#> 1 iso3 TRUE TRUE TRUE NA NA NA "iso…
#> 2 iso3 TRUE TRUE TRUE NA NA NA "iso…
#> 3 country TRUE TRUE TRUE NA NA NA "cou…
#> 4 animal_category TRUE TRUE TRUE NA NA NA "ani…
#> 5 animal_category TRUE TRUE TRUE NA NA NA "ani…
#> 6 species TRUE TRUE TRUE NA NA NA "spe…
#> 7 species TRUE TRUE TRUE NA NA NA "spe…
#> 8 disease_notification TRUE TRUE TRUE NA NA NA "dis…
#> 9 targeted_surveillance TRUE TRUE TRUE NA NA NA "tar…
#> 10 general_surveillance TRUE TRUE TRUE NA NA NA "gen…
#> 11 screening TRUE TRUE TRUE NA NA NA "scr…
#> 12 precautions_at_the_borde… TRUE TRUE TRUE NA NA NA "pre…
#> 13 slaughter TRUE TRUE TRUE NA NA NA "sla…
#> 14 selective_killing_and_di… TRUE TRUE TRUE NA NA NA "sel…
#> 15 zoning TRUE TRUE TRUE NA NA NA "zon…
#> 16 official_vaccination TRUE TRUE TRUE NA NA NA "off…
#> 17 last_outbreak_end_date TRUE TRUE TRUE NA NA NA "las…
#> 18 commerce_illegal TRUE TRUE TRUE NA NA NA "com…
#> 19 commerce_legal TRUE TRUE TRUE NA NA NA "com…
#>
#> attr(,"class")
#> [1] "validation_status"
#>
#> $dataset
#> # A tibble: 65 × 18
#> iso3 country disease animal_category species disease_notification
#> <chr> <chr> <chr> <chr> <chr> <int>
#> 1 ALB Albania Brucel… Domestic Cattle 0
#> 2 BWA Botswana Brucel… Domestic Cattle 1
#> 3 CYM Cayman Islands Brucel… Domestic Cattle 0
#> 4 JPN Japan Brucel… Domestic Cattle 0
#> 5 SYC Seychelles Brucel… Domestic Cattle 0
#> 6 SYR Syria Brucel… Domestic Cattle 0
#> 7 PSE Palestine Brucel… Domestic Cattle 0
#> 8 USA United States of … Brucel… Domestic Cattle 0
#> 9 ARM Armenia Brucel… Domestic Cattle 0
#> 10 AZE Azerbaijan Brucel… Domestic Cattle 0
#> # ℹ 55 more rows
#> # ℹ 12 more variables: targeted_surveillance <int>, general_surveillance <int>,
#> # screening <int>, precautions_at_the_borders <int>, slaughter <int>,
#> # selective_killing_and_disposal <int>, zoning <int>,
#> # official_vaccination <int>, last_outbreak_end_date <date>,
#> # commerce_illegal <int>, commerce_legal <int>, data_source <chr>
#>
#> $dataset_changes
#> character(0)
#>
#> attr(,"class")
#> [1] "table_validation_status"
# read entry points data from Tunisia ----
tun_entry_points <- system.file(
package = "riskintrodata",
"samples",
"tunisia",
"entry_points",
"BORDER_CROSSING_POINTS.csv"
)
x <- readr::read_delim(
tun_entry_points
)
#> Rows: 110 Columns: 6
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> chr (4): NAME, TYPE, MODE, SOURCES
#> dbl (2): LONGITUDE_X, LATITUDE_Y
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
DATA_ENTRY_POINTS <- validate_table_content(
x = x,
table_name = "entry_points",
point_name = "NAME",
lng = "LONGITUDE_X",
lat = "LATITUDE_Y",
mode = "MODE",
type = "TYPE"
)
DATA_ENTRY_POINTS
#> $table_name
#> [1] "entry_points"
#>
#> $required_columns
#> $chk
#> [1] TRUE
#>
#> $msg
#> [1] "All required columns selected."
#>
#> $details
#> character(0)
#>
#> attr(,"class")
#> [1] "validation_status"
#>
#> $optional_columns
#> $chk
#> [1] TRUE
#>
#> $msg
#> [1] "Optional columns selected are available."
#>
#> $details
#> character(0)
#>
#> attr(,"class")
#> [1] "validation_status"
#>
#> $validate_rules
#> $chk
#> [1] TRUE
#>
#> $msg
#> 10 valid rules checking dataset.
#>
#> $details
#> # A tibble: 10 × 8
#> colname valid required column_found n index value msg
#> <chr> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <glue>
#> 1 point_name TRUE TRUE TRUE NA NA NA "point_name" has be…
#> 2 point_name TRUE TRUE TRUE NA NA NA "point_name" has be…
#> 3 lng TRUE TRUE TRUE NA NA NA "lng" has been vali…
#> 4 lng TRUE TRUE TRUE NA NA NA "lng" has been vali…
#> 5 lng TRUE TRUE TRUE NA NA NA "lng" has been vali…
#> 6 lat TRUE TRUE TRUE NA NA NA "lat" has been vali…
#> 7 lat TRUE TRUE TRUE NA NA NA "lat" has been vali…
#> 8 lat TRUE TRUE TRUE NA NA NA "lat" has been vali…
#> 9 mode TRUE FALSE TRUE NA NA NA "mode" has been val…
#> 10 type TRUE FALSE TRUE NA NA NA "type" has been val…
#>
#> attr(,"class")
#> [1] "validation_status"
#>
#> $dataset
#> Simple feature collection with 110 features and 4 fields
#> Geometry type: POINT
#> Dimension: XY
#> Bounding box: xmin: 7.572541 ymin: 31.94455 xmax: 11.59319 ymax: 37.26487
#> Geodetic CRS: WGS 84
#> # A tibble: 110 × 5
#> point_id point_name mode type geometry
#> * <chr> <chr> <chr> <chr> <POINT [°]>
#> 1 ep-00001 aeroport Djerba C AIR (10.77592 33.87149)
#> 2 ep-00002 aeroport enfidha C AIR (10.43123 36.07011)
#> 3 ep-00003 aeroport monastir C AIR (10.75472 35.75806)
#> 4 ep-00004 aeroport sfax C AIR (10.68861 34.72056)
#> 5 ep-00005 aeroport tabarka C AIR (8.87528 36.98028)
#> 6 ep-00006 Aeroport tozeur C AIR (8.10139 33.93889)
#> 7 ep-00007 aeroport tunis cathage C AIR (10.22694 36.85111)
#> 8 ep-00001 aeroport Djerba C AIR (10.77592 33.87149)
#> 9 ep-00002 aeroport enfidha C AIR (10.43123 36.07011)
#> 10 ep-00003 aeroport monastir C AIR (10.75472 35.75806)
#> # ℹ 100 more rows
#>
#> $dataset_changes
#> [1] "\"Entry points\" `lat` and `lng` table converted to type POINT_sf"
#>
#> attr(,"class")
#> [1] "table_validation_status"