From 0f7d7d491eb86a98c62d5fbd9dcd0cf73498c3fd Mon Sep 17 00:00:00 2001 From: John Waller Date: Wed, 2 Oct 2024 16:49:43 +0200 Subject: [PATCH] adding support for download_describe #142 (#159) --- pygbif/occurrences/__init__.py | 1 + pygbif/occurrences/download.py | 32 ++++++++++- test/test-occurrences-download_describe.py | 18 +++++++ .../vcr_cassettes/test_download_describe.yaml | 53 +++++++++++++++++++ 4 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 test/test-occurrences-download_describe.py create mode 100644 test/vcr_cassettes/test_download_describe.yaml diff --git a/pygbif/occurrences/__init__.py b/pygbif/occurrences/__init__.py index 9fb93a1..d2366e2 100644 --- a/pygbif/occurrences/__init__.py +++ b/pygbif/occurrences/__init__.py @@ -37,5 +37,6 @@ download_list, download_get, download_cancel, + download_describe ) from .citation import citation \ No newline at end of file diff --git a/pygbif/occurrences/download.py b/pygbif/occurrences/download.py index 7161f06..1ebfd1b 100644 --- a/pygbif/occurrences/download.py +++ b/pygbif/occurrences/download.py @@ -17,13 +17,14 @@ gbif_GET, gbif_GET_write, gbif_DELETE, + gbif_baseurl ) # how to parse arguments/predicates def _parse_args(x): x = x.replace("'", '"') - tmp = re.split("\s", x) + tmp = re.split(r"\s", x) key = key_lkup.get(tmp[0]) # check special predicates if re.search(r"Null|NULL|null", x): @@ -623,6 +624,35 @@ def download_get(key, path=".", **kwargs): logging.info("On disk at " + path) return {"path": path, "size": meta["size"], "key": key} +def download_describe(format, **kwargs): + """ + Get a description the download format. This is useful for understanding + what fields are available in a given download format without having to run a download. + + :param format: [str] A format to describe. One of "simpleCsv", "simpleParquet", "dwca", "speciesList", "simpleAvro", "sql" + :param **kwargs: Further named arguments passed on to ``requests.get`` + + :return: A dictionary, of results + + Usage:: + + from pygbif import occurrences as occ + + occ.download_describe("dwca") + occ.download_describe("simpleCsv") + occ.download_describe("simpleParquet") + occ.download_describe("speciesList") + occ.download_describe("simpleAvro") + occ.download_describe("sql") + + """ + camel_formats = ["simpleCsv", "simpleParquet", "dwca", "speciesList", "simpleAvro","sql"] + if format in camel_formats: + url = gbif_baseurl + "occurrence/download/describe/" + str(format) + return gbif_GET(url,{}, **kwargs) + else: + raise ValueError("format not in list of acceptable formats") + operators = [ "equals", diff --git a/test/test-occurrences-download_describe.py b/test/test-occurrences-download_describe.py new file mode 100644 index 0000000..d386f31 --- /dev/null +++ b/test/test-occurrences-download_describe.py @@ -0,0 +1,18 @@ +"""Tests for occurrences module - download_describe""" +from pygbif import occurrences as occ +import vcr + +@vcr.use_cassette("test/vcr_cassettes/test_download_describe.yaml") +def test_download_describe(): + "occurrences.download_describe - basic usage" + res=occ.download_describe("simpleCsv") + assert dict == res.__class__ + assert len(res["fields"]) >= 50 # unlikely to get smaller + assert "gbifID" == res["fields"][0]["name"] + +def test_download_describe_fails_well(): + "occurrences.download_describe - fail test" + try: + res=occ.download_describe("dog") + except Exception as e: + assert str(e) == "format not in list of acceptable formats" diff --git a/test/vcr_cassettes/test_download_describe.yaml b/test/vcr_cassettes/test_download_describe.yaml new file mode 100644 index 0000000..45bd31a --- /dev/null +++ b/test/vcr_cassettes/test_download_describe.yaml @@ -0,0 +1,53 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + user-agent: + - python-requests/2.32.3,pygbif/0.6.4 + method: GET + uri: https://api.gbif.org/v1/occurrence/download/describe/simpleCsv + response: + body: + string: '{"fields":[{"name":"gbifID","type":"STRING","term":"http://rs.gbif.org/terms/1.0/gbifID","nullable":false},{"name":"datasetKey","type":"STRING","term":"http://rs.gbif.org/terms/1.0/datasetKey","nullable":false},{"name":"occurrenceID","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/occurrenceID","nullable":true},{"name":"kingdom","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/kingdom","nullable":true},{"name":"phylum","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/phylum","nullable":true},{"name":"class","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/class","nullable":true},{"name":"order","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/order","nullable":true},{"name":"family","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/family","nullable":true},{"name":"genus","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/genus","nullable":true},{"name":"species","type":"STRING","term":"http://rs.gbif.org/terms/1.0/species","nullable":true},{"name":"infraspecificEpithet","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/infraspecificEpithet","nullable":true},{"name":"taxonRank","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/taxonRank","nullable":true},{"name":"scientificName","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/scientificName","nullable":true},{"name":"verbatimScientificName","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/scientificName","nullable":true},{"name":"verbatimScientificNameAuthorship","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/scientificNameAuthorship","nullable":true},{"name":"countryCode","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/countryCode","nullable":true},{"name":"locality","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/locality","nullable":true},{"name":"stateProvince","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/stateProvince","nullable":true},{"name":"occurrenceStatus","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/occurrenceStatus","nullable":true},{"name":"individualCount","type":"INT","term":"http://rs.tdwg.org/dwc/terms/individualCount","nullable":true},{"name":"publishingOrgKey","type":"STRING","term":"http://rs.gbif.org/terms/internal/publishingOrgKey","nullable":true},{"name":"decimalLatitude","type":"DOUBLE","term":"http://rs.tdwg.org/dwc/terms/decimalLatitude","nullable":true},{"name":"decimalLongitude","type":"DOUBLE","term":"http://rs.tdwg.org/dwc/terms/decimalLongitude","nullable":true},{"name":"coordinateUncertaintyInMeters","type":"DOUBLE","term":"http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters","nullable":true},{"name":"coordinatePrecision","type":"DOUBLE","term":"http://rs.tdwg.org/dwc/terms/coordinatePrecision","nullable":true},{"name":"elevation","type":"DOUBLE","term":"http://rs.gbif.org/terms/1.0/elevation","nullable":true},{"name":"elevationAccuracy","type":"DOUBLE","term":"http://rs.gbif.org/terms/1.0/elevationAccuracy","nullable":true},{"name":"depth","type":"DOUBLE","term":"http://rs.gbif.org/terms/1.0/depth","nullable":true},{"name":"depthAccuracy","type":"DOUBLE","term":"http://rs.gbif.org/terms/1.0/depthAccuracy","nullable":true},{"name":"eventDate","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/eventDate","nullable":true},{"name":"day","type":"INT","term":"http://rs.tdwg.org/dwc/terms/day","nullable":true},{"name":"month","type":"INT","term":"http://rs.tdwg.org/dwc/terms/month","nullable":true},{"name":"year","type":"INT","term":"http://rs.tdwg.org/dwc/terms/year","nullable":true},{"name":"taxonKey","type":"INT","term":"http://rs.gbif.org/terms/1.0/taxonKey","nullable":true},{"name":"speciesKey","type":"INT","term":"http://rs.gbif.org/terms/1.0/speciesKey","nullable":true},{"name":"basisOfRecord","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/basisOfRecord","nullable":true},{"name":"institutionCode","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/institutionCode","nullable":true},{"name":"collectionCode","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/collectionCode","nullable":true},{"name":"catalogNumber","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/catalogNumber","nullable":true},{"name":"recordNumber","type":"STRING","term":"http://rs.tdwg.org/dwc/terms/recordNumber","nullable":true},{"name":"identifiedBy","type":"ARRAY","delimiter":";","term":"http://rs.tdwg.org/dwc/terms/identifiedBy","nullable":true},{"name":"dateIdentified","type":"DATE","typeFormat":"yyyy-MM-ddTHH:mm:ss","term":"http://rs.tdwg.org/dwc/terms/dateIdentified","nullable":true},{"name":"license","type":"STRING","term":"http://purl.org/dc/terms/license","nullable":true},{"name":"rightsHolder","type":"STRING","term":"http://purl.org/dc/terms/rightsHolder","nullable":true},{"name":"recordedBy","type":"ARRAY","delimiter":";","term":"http://rs.tdwg.org/dwc/terms/recordedBy","nullable":true},{"name":"typeStatus","type":"ARRAY","delimiter":";","term":"http://rs.tdwg.org/dwc/terms/typeStatus","nullable":true},{"name":"establishmentMeans","type":"STRUCT>","term":"http://rs.tdwg.org/dwc/terms/establishmentMeans","nullable":true},{"name":"lastInterpreted","type":"DATE","typeFormat":"yyyy-MM-ddTHH:mm:ss.SSSZ","term":"http://rs.gbif.org/terms/1.0/lastInterpreted","nullable":true},{"name":"mediaType","type":"ARRAY","delimiter":";","term":"http://rs.gbif.org/terms/1.0/mediaType","nullable":true},{"name":"issue","type":"ARRAY","delimiter":";","term":"http://rs.gbif.org/terms/1.0/issue","nullable":true}]}' + headers: + Accept-Ranges: + - bytes + Age: + - '0' + Cache-Control: + - public, max-age=3601 + Connection: + - keep-alive + Content-Length: + - '5525' + Content-Type: + - application/json + Date: + - Wed, 02 Oct 2024 14:04:55 GMT + Expires: + - '0' + Pragma: + - no-cache + Vary: + - Origin, Access-Control-Request-Method, Access-Control-Request-Headers + Via: + - 1.1 varnish (Varnish/6.0) + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Varnish: + - '766683002' + X-XSS-Protection: + - 1; mode=block + status: + code: 200 + message: OK +version: 1