2018-02-09

  • In R
  • In Python
  • API
  • Web

In R

Getting started

Additional libraries

Occurrence

occurrence(scientificname = NULL, year = NULL, obisid = NULL, aphiaid = NULL,
  groupid = NULL, resourceid = NULL, nodeid = NULL, areaid = NULL, startdate = NULL,
  enddate = NULL, startdepth = NULL, enddepth = NULL, geometry = NULL, qc = NULL,
  fields = NULL, verbose = FALSE)
ptevol <- occurrence("Pterois volitans") # red lionfish
## 
Retrieved 2000 records of 2436 (82%)
Retrieved 2436 records of 2436 (100%)
colnames(ptevol)
##  [1] "id"                            "decimalLongitude"             
##  [3] "decimalLatitude"               "depth"                        
##  [5] "institutionCode"               "collectionCode"               
##  [7] "catalogNumber"                 "individualCount"              
##  [9] "datasetName"                   "phylum"                       
## [11] "order"                         "family"                       
## [13] "genus"                         "scientificName"               
## [15] "originalScientificName"        "scientificNameAuthorship"     
## [17] "obisID"                        "resourceID"                   
## [19] "species"                       "qc"                           
## [21] "aphiaID"                       "speciesID"                    
## [23] "scientificNameID"              "class"                        
## [25] "eventDate"                     "yearcollected"                
## [27] "basisOfRecord"                 "locality"                     
## [29] "accessRights"                  "collectionID"                 
## [31] "habitat"                       "higherClassification"         
## [33] "higherGeography"               "language"                     
## [35] "locationRemarks"               "modified"                     
## [37] "occurrenceID"                  "recordedBy"                   
## [39] "recordNumber"                  "rights"                       
## [41] "rightsHolder"                  "specificEpithet"              
## [43] "stateProvince"                 "type"                         
## [45] "identifiedBy"                  "geodeticDatum"                
## [47] "bibliographicCitation"         "minimumDepthInMeters"         
## [49] "maximumDepthInMeters"          "coordinateUncertaintyInMeters"
## [51] "eventID"                       "footprintWKT"                 
## [53] "occurrenceRemarks"             "occurrenceStatus"             
## [55] "waterBody"                     "fieldNumber"                  
## [57] "footprintSRS"                  "references"                   
## [59] "taxonRank"                     "county"                       
## [61] "lifestage"                     "dynamicProperties"            
## [63] "dateIdentified"                "vernacularName"               
## [65] "coordinatePrecision"           "eventTime"                    
## [67] "associatedMedia"               "datasetID"                    
## [69] "identificationRemarks"         "eventRemarks"                 
## [71] "ownerInstitutionCode"          "taxonomicStatus"              
## [73] "island"                        "islandGroup"                  
## [75] "materialSampleID"              "continent"

Filter by year

occurrence("Pterois volitans", year = 1991, 
           fields = c("id", "decimalLongitude", "decimalLatitude", "year"))
## 
Retrieved 11 records of 11 (100%)
##           id decimalLongitude decimalLatitude year
## 1    8317355         153.6300       -27.60000   NA
## 2   10918348          55.6000       -20.86667   NA
## 3  702259173         153.6670       -28.40000   NA
## 4  702263624         153.0330       -30.80000   NA
## 5  702428002         145.8500        -5.16667   NA
## 6  703286139         153.6330       -28.39166   NA
## 7  703690415         117.7808       -19.99333   NA
## 8  703690793         116.9650       -19.76167   NA
## 9  703690899         116.7983       -19.62833   NA
## 10 703691118         118.7017       -18.94334   NA
## 11 703691119         117.7317       -20.01917   NA

Filter by date

occurrence("Pterois volitans", startdate = '1980-03-01', enddate = '1980-10-30', 
           fields = c("id", "decimalLongitude", "decimalLatitude", "eventDate"))
## 
Retrieved 3 records of 3 (100%)
##          id decimalLongitude decimalLatitude           eventDate
## 1    868932         158.1986        7.008333 1980-09-05 07:00:00
## 2 399697683        -177.8700      -29.240000 1980-08-06 10:00:00
## 3 702370348         116.0660      -20.283000 1980-05-31 10:00:00

Filter by depth

occurrence("Pterois volitans", startdepth = 100, enddepth = 200,
           fields = c("id", "decimalLongitude", "decimalLatitude", "depth"))
## 
Retrieved 10 records of 10 (100%)
##           id decimalLongitude decimalLatitude depth
## 1  703597136         127.6836      -12.391670 116.0
## 2  703597267         127.9425      -12.925975 100.0
## 3  703597268         127.9511      -12.750695 102.5
## 4  703597370         134.9856       -8.843335 107.0
## 5  703598330         113.3336      -23.551390 120.0
## 6  703690903         114.9292      -20.414165 122.5
## 7  703691121         117.5433      -19.020835 131.0
## 8  703691470         118.1525      -18.963335 119.5
## 9  703691925         117.7925      -19.168335 101.5
## 10 732957007         120.1400       13.880000 131.5

Filter by polygon

With http://iobis.org/maptool) we can create a WKT for the Perhentian and Redang Islands and use this to query all occurrences.

islands <- occurrence(geometry = 
      paste("POLYGON ((102.68921 6.05862","102.57111 5.95346",
                      "103.07785 5.49980","103.25226 5.62555",
                      "103.07648 5.87970","102.68921 6.05862))",sep=", "))
## 
Retrieved 532 records of 532 (100%)

Filter by polygon

leafletmap(islands)

Visualize data through time

islands$year <- as.numeric(format(as.Date(islands$eventDate), "%Y"))
ggplot(islands[!is.na(islands$year),], aes(x = year, fill = phylum)) +
  geom_histogram(binwidth = 5) +
  scale_fill_brewer(palette = "Paired")

Visualize multiple facets

lag <- occurrence("Lagis", resourceid=c(4312, 222))
ggplot() +
 geom_histogram(data = lag, aes(x = yearcollected), binwidth = 2) +
 facet_grid(resourceID ~ species)

## 
Retrieved 668 records of 668 (100%)

Filter by dataset

datasets <- dataset(q="Adriatic-INTERREG")
## 
Retrieved 6 records of 6 (100%)
datasets$name
## [1] "Meiobenthos North Adriatic-INTERREG-FVG-Projects" 
## [2] "Mesozooplankton North Adriatic-INTERREG Project"  
## [3] "Microzooplankton North Adriatic-INTERREG Project" 
## [4] "Microphytobenthos North Adriatic-INTERREG Project"
## [5] "Macrobenthos North Adriatic-INTERREG-FVG Project" 
## [6] "Phytoplankton North Adriatic-INTERREG-FVG Project"

Filter by dataset

occ <- occurrence(resourceid = datasets$id, year = 1998,
           fields = c("id", "decimalLongitude", "decimalLatitude", "resourceID"))
## 
Retrieved 1553 records of 1553 (100%)
head(occ)
##          id decimalLongitude decimalLatitude resourceID
## 1 719561828         13.56467        45.69350       3494
## 2 719561980         13.59617        45.66383       3494
## 3 719561996         13.56467        45.69350       3494
## 4 719561997         13.56467        45.69350       3494
## 5 719561998         13.56467        45.69350       3494
## 6 719561999         13.56467        45.69350       3494

Filter by quality control

Vandepitte L., Bosch S., Tyberghein L., Waumans F., Vanhoorne B., Hernandez F., De Clerck O., & Mees J. (2015) Fishing for data and sorting the catch: assessing the data quality, completeness and fitness for use of data in marine biogeographic databases. Database, 2015. http://dx.doi.org/10.1093/database/bau125

?qcflags

Categories of quality control flags:

  • Data format
  • Taxonomy
  • Completeness
  • Geography
  • Depth
  • Outliers

Some filters: quality control

  • 3. Taxonomy: is the taxon level genus or lower?
  • 4. Geography (lat/lon): are the latitude/longitude values different from zero?
  • 5. Geography (lat/lon): are the latitude/longitude values within their possible boundaries? (world coordinates)
  • 7. Completeness (date/time): is the sampling year (start/end) completed and valid?
qc3457 <- occurrence(resourceid = datasets$id, qc = c(3,4,5,7), year = 1998)
## 
Retrieved 1005 records of 1005 (100%)

Visualize QC

  • 28. Species outliers (geography): is the observation within three IQRs from the first & third quartile distance to the geographic centroid of this taxon?
acistu <- occurrence("Acipenser sturio")
## 
Retrieved 66 records of 66 (100%)
acistu$qcnum <- qcflags(acistu$qc, c(28))
colors <- c("#ee3300", "#86b300")[acistu$qcnum + 1]
popup <- paste0(acistu$datasetName, "<br/>", acistu$catalogNumber, 
                "<br/><a href=\"http://www.iobis.org/explore/#/dataset/", 
                acistu$resourceID, "\">OBIS dataset page</a>")

Visualize QC

leaflet() %>% addProviderTiles("CartoDB.Positron") %>%
  addCircleMarkers(
    popup = popup, radius = 3.5, weight = 0, fillColor = colors, fillOpacity = 1,
    lat = acistu$decimalLatitude, lng = acistu$decimalLongitude)

Visualize QC

acistu$qctxt <- c("Not ok", "Ok")[acistu$qcnum + 1]
ggplot() +
  geom_polygon(data=map_data("world"),aes(x=long,y=lat,group=group),fill="#dddddd")+
  geom_point(data=acistu, aes(x=decimalLongitude, y=decimalLatitude, color=qctxt))

Visualize environmental data

env <- obistools::lookup_xy(acistu)
envacistu <- cbind(acistu[complete.cases(env),], env[complete.cases(env),])
ggplot(envacistu, aes(x=sssalinity, y=sstemperature, color=qctxt)) +
  geom_point()

checklist: Generate checklists

checklist(scientificname = NULL, year = NULL, obisid = NULL, aphiaid = NULL,
  groupid = NULL, resourceid = NULL, areaid = NULL, startdate = NULL, enddate = NULL,
  startdepth = NULL, enddepth = NULL, geometry = NULL, qc = NULL, fields = NULL,
  verbose = FALSE)

checklist: Generate checklists

checklist(year=1800)[1:3,]
## 
Retrieved 16 records of 16 (100%)
##       id valid_id parent_id  rank_name                 tname
## 1 402913   402913    402912    Species     Anguilla anguilla
## 2 408772   408772    408771 Subspecies Auxis thazard thazard
## 3 424201   424201    424193    Species       Clupea harengus
##            tauthor worms_id redlist status records datasets   phylum
## 1 (Linnaeus, 1758)   126281    TRUE     CR       4        1 Chordata
## 2 (Lacepède, 1800)   293552      NA   <NA>       1        1 Chordata
## 3   Linnaeus, 1758   126417    TRUE     LC       2        1 Chordata
##            order      family    genus           species          class
## 1 Anguilliformes Anguillidae Anguilla Anguilla anguilla Actinopterygii
## 2    Perciformes  Scombridae    Auxis     Auxis thazard Actinopterygii
## 3   Clupeiformes   Clupeidae   Clupea   Clupea harengus Actinopterygii

dataset: Dataset metadata

dataset(id = NULL, scientificname = NULL, q = NULL, obisid = NULL, 
        aphiaid = NULL, groupid = NULL, areaid = NULL, nodeid = NULL, 
        startdate = NULL, enddate = NULL, startdepth = NULL, enddepth = NULL, 
        geometry = NULL, verbose = FALSE)

dataset: Dataset metadata

as.data.frame(dataset(id = 1))
## 
Retrieved 1 records of 1 (100%)
##   id digirname                                  website
## 1  1      BATS http://www.vims.edu/bio/zooplankton/BATS
##                                                                                       citation
## 1 Steinberg, D.K. and L.P. Madin (2003) Zooplankton Census. Bermuda Atlantic Time-series Study
##   species_cnt taxon_cnt record_cnt imis_dasid
## 1          23        25        635       3802
##                                                    name
## 1 Bermuda Atlantic Time-series Study Zooplankton Census

taxon and taxon_common

taxon(obisid = NULL, aphiaid = NULL, scientificname = NULL, verbose = FALSE)
taxon_common(obisid, verbose = FALSE)
tax <- taxon(scientificname = "Pterois volitans")
tax
##       id valid_id parent_id rank_name            tname          tauthor
## 1 501083   501083    501069   Species Pterois volitans (Linnaeus, 1758)
##   worms_id gisd records datasets   phylum           order       family
## 1   159559 TRUE    2436       51 Chordata Scorpaeniformes Scorpaenidae
##     genus          species          class
## 1 Pterois Pterois volitans Actinopterygii
taxon_common(tax$valid_id)
##           name language
## 1     lionfish  English
## 2 red lionfish  English
## 3   turkeyfish  English

area: OBIS areas

head(area())
## 
Retrieved 535 records of 535 (100%)
##    id                                                     name type
## 1 309                                           The Sundarbans prot
## 2  24                                   Bosnia and Herzegovina  eez
## 3 322                                                Shiretoko prot
## 4 478                         EBSA No 2:  Ua puakaoa seamounts ebsa
## 5 371 Rivercess and Sinoe Sea Turtle Breeding Ground (Liberia) ebsa
## 6 127                                                 Malaysia  eez

node: OBIS nodes

node()[1:5,]
## 
Retrieved 29 records of 29 (100%)
##   id    description           name                                  url
## 1  1 OBIS Argentina        AR-OBIS http://arobis.cenpat-conicet.gob.ar/
## 2  2        AfrOBIS        AfrOBIS           http://afrobis.csir.co.za/
## 3  3 Antarctic OBIS Antarctic OBIS            http://www.scarmarbin.be/
## 4 15          ArCOD          ArCOD              http://www.arcodiv.org/
## 5 28    Arctic OBIS    Arctic OBIS                                 <NA>
##                    theme
## 1              Argentina
## 2 Sub-Saharan (S-Africa)
## 3             Antarctica
## 4                 Arctic
## 5                 Arctic

group: Taxonomic groups

head(group())
## 
Retrieved 85 records of 85 (100%)
##   id       name
## 1  1    Acarina
## 2  2  Amoebozoa
## 3  3  Amphipoda
## 4  4   Apusozoa
## 5  5 Ascidiacea
## 6  6       Aves

In Python

Getting started

Installation

# from pypi
pip install pyobis

# dev
pip install git+git://github.com/sckott/pyobis.git#egg=pyobis

Help and issues: https://github.com/sckott/pyobis

Occurrence

from pyobis import occurrences
occurrences.search(scientificname = 'Mola mola')
occurrences.search(scientificname = 'Mola mola', offset=0, limit=10)
occurrences.search(geometry='POLYGON((30.1 10.1, 10 20, 20 40, 40 40, 30.1 10.1))', 
                   limit=20)
occurrences.search(aphiaid=key, year="2013", limit=20)
res = occurrences.download(year = 2001, scientificname = 'Orcinus')
res.uuid
res.status()
res.fetch()

Other methods

  • checklist - Create a checklist for a geometry, timeperiod, taxonomic group, …
  • taxa - Lookup taxonomic names
  • groups - List of taxonomic groups
  • resources - Lookup datasets metadata
  • nodes - Lookup OBIS nodes metadata

API

Documentation

Web

Explore Portal

Mapper