diff --git a/pygbif/occurrences/__init__.py b/pygbif/occurrences/__init__.py index d2366e2..c496208 100644 --- a/pygbif/occurrences/__init__.py +++ b/pygbif/occurrences/__init__.py @@ -37,6 +37,7 @@ download_list, download_get, download_cancel, - download_describe + download_describe, + download_sql ) from .citation import citation \ No newline at end of file diff --git a/pygbif/occurrences/download.py b/pygbif/occurrences/download.py index 1ebfd1b..d284dff 100644 --- a/pygbif/occurrences/download.py +++ b/pygbif/occurrences/download.py @@ -653,6 +653,75 @@ def download_describe(format, **kwargs): else: raise ValueError("format not in list of acceptable formats") +def download_sql(sql, + format="SQL_TSV_ZIP", + user=None, + pwd=None, + email=None + ): + """ + Download data using a SQL query. + + This is an experimental feature, and the implementation may change throughout 2024. + The feature is currently only available for preview by invited users. Contact helpdesk@gbif.org + to request access. + + :param sql: [str] A SQL query + :param format: [str] The format to download the data in. Only ``SQL_TSV_ZIP`` is currently supported. + :param user: [str] A user name, will look at env var ``GBIF_USER`` first. + :param pwd: [str] Your password, will look at env var ``GBIF_PWD`` first. + :param email: [str] Your email, will look at env var ``GBIF_EMAIL`` first. + + :return: A string, the request id + + Usage:: + + from pygbif import occurrences as occ + + occ.download_sql("SELECT gbifid,publishingCountry FROM occurrence WHERE publishingCountry=GB'") + + """ + url = "https://api.gbif.org/v1/occurrence/download/request" + user = _check_environ("GBIF_USER", user) + pwd = _check_environ("GBIF_PWD", pwd) + + header = { + "accept": "application/json", + "content-type": "application/json", + "user-agent": "".join( + [ + "python-requests/", + requests.__version__, + ",pygbif/", + package_metadata.__version__, + ] + ), + } + payload = { + "sendNotification": True, + "notificationAddresses": [email], + "format": format, + "sql": sql + } + + r = requests.post( + url, + auth=requests.auth.HTTPBasicAuth(user, pwd), + data=json.dumps(payload), + headers=header, + ) + if r.status_code > 203: + raise Exception( + "error: " + + r.text + + ", with error status code " + + str(r.status_code) + + "check your number of active downloads." + ) + else: + request_id = r.text + logging.info("Your sql download key is " + request_id) + return request_id operators = [ "equals", diff --git a/test/test-occurrences-download_sql.py b/test/test-occurrences-download_sql.py new file mode 100644 index 0000000..5dd5382 --- /dev/null +++ b/test/test-occurrences-download_sql.py @@ -0,0 +1,11 @@ +from pygbif import occurrences +import vcr + +@vcr.use_cassette('test/vcr_cassettes/test-occurrences-download_sql.yaml') +def test_download_sql(): + """basic test of the download_sql function""" + out = occurrences.download_sql("SELECT gbifid,publishingCountry FROM occurrence WHERE publishingCountry='BE'") + assert "str" == out.__class__.__name__ + assert 23 == len(out) + + diff --git a/test/vcr_cassettes/test-occurrences-download_sql.yaml b/test/vcr_cassettes/test-occurrences-download_sql.yaml new file mode 100644 index 0000000..c293852 --- /dev/null +++ b/test/vcr_cassettes/test-occurrences-download_sql.yaml @@ -0,0 +1,57 @@ +interactions: +- request: + body: '{"sendNotification": true, "notificationAddresses": [null], "format": "SQL_TSV_ZIP", + "sql": "SELECT gbifid,publishingCountry FROM occurrence WHERE publishingCountry=''BE''"}' + headers: + Accept-Encoding: + - gzip, deflate + Authorization: + - Basic andhbGxlcjojOVNrZG5Ba3NpRGtuZWtzUW9zblZpZDg4QQ== + Connection: + - keep-alive + Content-Length: + - '171' + accept: + - application/json + content-type: + - application/json + user-agent: + - python-requests/2.32.3,pygbif/0.6.4 + method: POST + uri: https://api.gbif.org/v1/occurrence/download/request + response: + body: + string: 0039132-240906103802322 + headers: + Age: + - '0' + Cache-Control: + - public, max-age=3601 + Connection: + - keep-alive + Content-Length: + - '23' + Content-Type: + - application/json + Date: + - Thu, 03 Oct 2024 11:44:25 GMT + Expires: + - '0' + Pragma: + - no-cache + Vary: + - Origin, Access-Control-Request-Method, Access-Control-Request-Headers + Via: + - 1.1 varnish (Varnish/6.0) + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - DENY + X-Varnish: + - '475432033' + X-XSS-Protection: + - 1; mode=block + status: + code: 201 + message: Created +version: 1