2018-02-09

Getting started

Generate a report

View points on a map and check missing fields, event dates, depth values, points on land, …

report(abra, file="abra_report.html", dir=".")

abra_report.html

Check required fields

check_fields(abra)
##              field level                                    message
## 1 occurrenceStatus error Required field occurrenceStatus is missing

occurrenceStatus: A statement about the presence or absence of a Taxon at a Location.
http://rs.tdwg.org/dwc/terms/#occurrenceStatus

Map column names to Darwin Core terms

data <- data.frame(
  id = c("cruise_1", "station_1", "station_2", "sample_1", "sample_2", "sample_3", "sample_4", "subsample_1", "subsample_2"),
  date = c(NA, NA, NA, "2017-01-01", "2017-01-02", "2017-01-03", "2017-01-04", NA, NA),
  locality = rep("North Sea", 9),
  lon = c(NA, 2.9, 4.7, NA, NA, NA, NA, NA, NA),
  lat = c(NA, 54.1, 55.8, NA, NA, NA, NA, NA, NA),
  stringsAsFactors = FALSE)

mapping <- list(
  decimalLongitude = "lon", decimalLatitude = "lat",
  datasetName = "dataset", eventID = "id", eventDate = "date")

Issue 56: detect_fields

Map column names to Darwin Core terms

map_fields(data, mapping)
##       eventID  eventDate  locality decimalLongitude decimalLatitude
## 1    cruise_1       <NA> North Sea               NA              NA
## 2   station_1       <NA> North Sea              2.9            54.1
## 3   station_2       <NA> North Sea              4.7            55.8
## 4    sample_1 2017-01-01 North Sea               NA              NA
## 5    sample_2 2017-01-02 North Sea               NA              NA
## 6    sample_3 2017-01-03 North Sea               NA              NA
## 7    sample_4 2017-01-04 North Sea               NA              NA
## 8 subsample_1       <NA> North Sea               NA              NA
## 9 subsample_2       <NA> North Sea               NA              NA

Taxon matching

Interactive taxon matching with the World Register of Marine Species (WoRMS)

taxa <- c("Abra alva", "Buccinum fusiforme", "Buccinum fusiforme", "hlqsdkf")
matched_taxa <- match_taxa(taxa)
matched_taxa
3 names, 1 without matches, 1 with multiple matches
Proceed to resolve names (y/n/p)? y

  AphiaID     scientificname      authority     status match_type
1  531014 Buccinum fusiforme   Kiener, 1834 unaccepted      exact
2  510389 Buccinum fusiforme Broderip, 1830 unaccepted      exact

Multiple matches, pick a number or leave empty to skip: 2
##       scientificName                          scientificNameID matche_type
## 1          Abra alba urn:lsid:marinespecies.org:taxname:141433      near_1
## 2 Buccinum fusiforme urn:lsid:marinespecies.org:taxname:510389       exact
## 3 Buccinum fusiforme urn:lsid:marinespecies.org:taxname:510389       exact
## 4               <NA>                                      <NA>        <NA>

Checking points on land

onland <- check_onland(abra)
onland[,1:5]
##           id decimalLongitude decimalLatitude basisOfRecord
## 31 365512845       -0.9092748        54.57467    Occurrence
##              eventDate
## 31 2011-09-03 10:00:00
plot_map(onland, zoom = TRUE)

robis::leafletmap(onland)

Checking depth values

depthreport <- check_depth(abra, report = TRUE)
depthreport[1:10, ]
##                   field   level row
## 1  minimumDepthInMeters warning  34
## 2  minimumDepthInMeters warning  35
## 3  minimumDepthInMeters warning  36
## 4  minimumDepthInMeters warning  37
## 5  minimumDepthInMeters warning  47
## 6  minimumDepthInMeters warning  51
## 7  minimumDepthInMeters warning  52
## 8  minimumDepthInMeters warning  54
## 9  minimumDepthInMeters warning  55
## 10 minimumDepthInMeters warning  56
##                                                                                               message
## 1  Depth value (15.9) is greater than the value found in the bathymetry raster (depth=14.6, margin=0)
## 2  Depth value (15.9) is greater than the value found in the bathymetry raster (depth=14.6, margin=0)
## 3  Depth value (23.7) is greater than the value found in the bathymetry raster (depth=20.2, margin=0)
## 4  Depth value (24.8) is greater than the value found in the bathymetry raster (depth=20.2, margin=0)
## 5  Depth value (19.4) is greater than the value found in the bathymetry raster (depth=18.6, margin=0)
## 6  Depth value (23.4) is greater than the value found in the bathymetry raster (depth=18.6, margin=0)
## 7  Depth value (23.8) is greater than the value found in the bathymetry raster (depth=18.6, margin=0)
## 8    Depth value (8.5) is greater than the value found in the bathymetry raster (depth=7.2, margin=0)
## 9   Depth value (8.75) is greater than the value found in the bathymetry raster (depth=7.2, margin=0)
## 10   Depth value (8.9) is greater than the value found in the bathymetry raster (depth=7.2, margin=0)

Checking depth values

robis::leafletmap(abra[depthreport$row,])

Checking depth values

Extra parameters for check_depth:

  • depthmargin: How much can the given depth deviate from the bathymetry in the rasters (in meters).
  • shoremargin: How far offshore (in meters) should a record be to have a bathymetry greater than 0. If NA (default) then this test is ignored.
  • bathymetry: Raster* object that you want to use to check the depth against. If NULL (default) then the bathymetry from the xylookup service is used.

Lookup XY

Data for on land and depth checks is provided by the xylookup service which can be called trough the lookup_xy function.

env <- lookup_xy(abra)
head(env)
##   shoredistance sstemperature sssalinity bathymetry
## 1            30       10.2863    34.7627       -4.0
## 2          1080       10.3324    34.9062       61.4
## 3          1184       10.7220    34.8890      122.2
## 4           290       10.7920    34.2934       20.6
## 5           259       10.7220    34.8890       51.0
## 6           506       10.7710    34.3070       32.4

Check event date

data_nodate <- data.frame(
  scientificName = c("Abra alba", "Lanice conchilega"),
  stringsAsFactors = FALSE)

check_eventdate(data_nodate)
##   level                  message
## 1 error Column eventDate missing

Check event date

data_goodformats <- data.frame(
  eventDate = c(
    "2016",
    "2016-01",
    "2016-01-02",
    "2016-01-02 13:00",
    "2016-01-02T13:00",
    "2016-01-02 13:00:00/2016-01-02 14:00:00",
    "2016-01-02 13:00:00/14:00:00"), 
  stringsAsFactors = FALSE)

check_eventdate(data_goodformats)
## data frame with 0 columns and 0 rows

Check event date

data_badformats <- data.frame(
  eventDate = c(
    "2016/01/02",
    "2016-01-02 13h00"),
  stringsAsFactors = FALSE)

check_eventdate(data_badformats)
##   level row     field
## 1 error   1 eventDate
## 2 error   2 eventDate
##                                                       message
## 1       eventDate 2016/01/02 does not seem to be a valid date
## 2 eventDate 2016-01-02 13h00 does not seem to be a valid date

Dataset structure

archive <- hyperbenthos$data
tree <- treeStructure(archive$event.txt, archive$occurrence.txt, 
                      archive$extendedmeasurementorfact.txt)
exportTree(tree, "hyperbenthos_tree.html")

hyperbenthos_tree.html

Check eventID and parentEventID

check_eventids() checks if both eventID() and parentEventID fields are present in an event table, and if al parentEventIDs have a corresponding eventID.

data <- data.frame(
  eventID = c("a", "b", "c", "d", "e", "f"),
  parentEventID = c("", "", "a", "a", "z", "b"),
  stringsAsFactors = FALSE
)
check_eventids(data)
##           field level row                                      message
## 1 parentEventID error   5 parentEventID z has no corresponding eventID

Check eventID in an extension

check_extension_eventids() checks if all eventIDs in an extension have matching eventIDs in the core table.

event <- data.frame(
  eventID = c("cruise_1", "station_1", "station_2", 
              "sample_1", "sample_2", "sample_3", "sub_1", "sub_2"),
  parentEventID = c(NA, "cruise_1", "cruise_1", "station_1", "station_1", "station_2", 
                    "sample_3", "sample_3"),
  eventDate = c(NA, NA, NA, "2017-01-01", "2017-01-02", "2017-01-03", NA, NA),
  decimalLongitude = c(NA, 2.9, 4.7, NA, NA, NA, NA, NA),
  decimalLatitude = c(NA, 54.1, 55.8, NA, NA, NA, NA, NA),
  stringsAsFactors = FALSE)

event
##     eventID parentEventID  eventDate decimalLongitude decimalLatitude
## 1  cruise_1          <NA>       <NA>               NA              NA
## 2 station_1      cruise_1       <NA>              2.9            54.1
## 3 station_2      cruise_1       <NA>              4.7            55.8
## 4  sample_1     station_1 2017-01-01               NA              NA
## 5  sample_2     station_1 2017-01-02               NA              NA
## 6  sample_3     station_2 2017-01-03               NA              NA
## 7     sub_1      sample_3       <NA>               NA              NA
## 8     sub_2      sample_3       <NA>               NA              NA

Check eventID in an extension

occurrence <- data.frame(
  eventID = c("sample_1","sample_1","sample_28","sample_2","sample_3","sub_1"),
  scientificName = c("Abra alba", "Lanice conchilega", "Pectinaria koreni", 
                     "Nephtys hombergii", "Pectinaria koreni", "Abra alba"),
  stringsAsFactors = FALSE)

occurrence
##     eventID    scientificName
## 1  sample_1         Abra alba
## 2  sample_1 Lanice conchilega
## 3 sample_28 Pectinaria koreni
## 4  sample_2 Nephtys hombergii
## 5  sample_3 Pectinaria koreni
## 6     sub_1         Abra alba

Check eventID in an extension

check_extension_eventids(event, occurrence)
##     field level row
## 1 eventID error   3
##                                                      message
## 1 eventID sample_28 has no corresponding eventID in the core

Flatten event records

flatten_event() recursively adds event information from parent to child events.

event
##     eventID parentEventID  eventDate decimalLongitude decimalLatitude
## 1  cruise_1          <NA>       <NA>               NA              NA
## 2 station_1      cruise_1       <NA>              2.9            54.1
## 3 station_2      cruise_1       <NA>              4.7            55.8
## 4  sample_1     station_1 2017-01-01               NA              NA
## 5  sample_2     station_1 2017-01-02               NA              NA
## 6  sample_3     station_2 2017-01-03               NA              NA
## 7     sub_1      sample_3       <NA>               NA              NA
## 8     sub_2      sample_3       <NA>               NA              NA
flatten_event(event)
##     eventID parentEventID  eventDate decimalLongitude decimalLatitude
## 1  cruise_1          <NA>       <NA>               NA              NA
## 2 station_1      cruise_1       <NA>              2.9            54.1
## 3 station_2      cruise_1       <NA>              4.7            55.8
## 4  sample_1     station_1 2017-01-01              2.9            54.1
## 5  sample_2     station_1 2017-01-02              2.9            54.1
## 6  sample_3     station_2 2017-01-03              4.7            55.8
## 7     sub_1      sample_3 2017-01-03              4.7            55.8
## 8     sub_2      sample_3 2017-01-03              4.7            55.8

Flatten occurrence and event records

flatten_occurrence() will add event information to occurrence records.

occurrence
##     eventID    scientificName
## 1  sample_1         Abra alba
## 2  sample_1 Lanice conchilega
## 3 sample_28 Pectinaria koreni
## 4  sample_2 Nephtys hombergii
## 5  sample_3 Pectinaria koreni
## 6     sub_1         Abra alba
occurrence[occurrence$eventID=="sample_28",] <- "sample_2"
flatten_occurrence(event, occurrence)
##    eventID    scientificName  eventDate decimalLatitude decimalLongitude
## 1 sample_1         Abra alba 2017-01-01            54.1              2.9
## 2 sample_1 Lanice conchilega 2017-01-01            54.1              2.9
## 3 sample_2          sample_2 2017-01-02            54.1              2.9
## 4 sample_2 Nephtys hombergii 2017-01-02            54.1              2.9
## 5 sample_3 Pectinaria koreni 2017-01-03            55.8              4.7
## 6    sub_1         Abra alba 2017-01-03            55.8              4.7

Calculate centroid and radius for WKT geometries

WKT: text representation for geometries https://en.wikipedia.org/wiki/Well-known_text

POINT (30 10)
LINESTRING (30 10, 10 30, 40 40)
POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))

calculate_centroid() calculates a centroid and radius for WKT strings. This is useful for populating decimalLongitiude, decimalLatitude and coordinateUncertaintyInMeters.

wkt <- c(
  "POLYGON ((2.5378 51.1242, 2.9937 51.3203, 3.3453 51.3957, 2.2741 51.6998, 2.5378 51.1242))",
  "POLYGON ((3.1558 42.2356, 3.1324 42.1420, 3.2203 42.1124, 3.2601 42.2153, 3.1558 42.2356))"
)
centroid <- calculate_centroid(wkt)
centroid
##   decimalLongitude decimalLatitude coordinateUncertaintyInMeters
## 1         2.719067        51.40717                     44860.608
## 2         3.193506        42.17714                      7524.069

Centroid and radius for WKT

if(!requireNamespace("sf")){
  install.packages(sf)
}
features <- sf::st_as_sfc(wkt)
map <- robis::leafletmap(centroid)
leaflet::addPolygons(map, data = features)