From 6b05e0c1f86f09704c84c23e5c2c3bd71735e077 Mon Sep 17 00:00:00 2001 From: Simon Boehm Date: Sun, 22 Nov 2020 17:58:07 +0100 Subject: [PATCH 1/4] Add GoogleCloudStorage --- README.rst | 5 ++ storefact/__init__.py | 20 +++++++ storefact/_hstores.py | 5 ++ storefact/_store_creation.py | 16 +++++ storefact/_urls.py | 13 +++++ tests/gcstore_cred_example.json | 12 ++++ tests/test_gcstore_creation.py | 100 ++++++++++++++++++++++++++++++++ 7 files changed, 171 insertions(+) create mode 100644 tests/gcstore_cred_example.json create mode 100644 tests/test_gcstore_creation.py diff --git a/README.rst b/README.rst index 01cc683..7ac3bb0 100644 --- a/README.rst +++ b/README.rst @@ -20,6 +20,7 @@ The following simplekv_-Stores are supported in storefact: * FilesystemStore * BotoStore (Amazon S3) * AzureBlockBlobStorage +* GoogleCloudStore Storefact is released as open source under the 3-clause BSD license. @@ -73,6 +74,10 @@ URL and store types: * with storage account key: :code:`azure://account_name:account_key@container[?create_if_missing=true][?max_connections=2]` * with SAS token: :code:`azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)]` * with SAS and additional parameters: :code:`azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)][?max_block_size=4*1024*1024&max_single_put_size=64*1024*1024]` +* Google Cloud Store: (:code:`gcs://` and :code:`hgcs://`): + * :code:`hgcs://@bucket_name[?create_if_missing=true&bucket_creation_location=EUROPE-WEST3]` + to create + Storage URLs starting with a :code:`h` indicate extended allowed characters. This allows the usage of slashes and spaces in blob names. URL options with :code:`[]` are optional and the :code:`[]` need to be removed. diff --git a/storefact/__init__.py b/storefact/__init__.py index f68bae1..f5a32c5 100644 --- a/storefact/__init__.py +++ b/storefact/__init__.py @@ -41,6 +41,15 @@ def get_store_from_url(url): * AzureBlockBlockStorage (SAS): ``azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false`` * AzureBlockBlockStorage (SAS): ``azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)]`` * AzureBlockBlockStorage (SAS): ``azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)][?max_block_size=4*1024*1024&max_single_put_size=64*1024*1024]`` + * GoogleCloudStorage: ``gcs://@bucket_name[?create_if_missing=true][&bucket_creation_location=EUROPE-WEST1]`` + Get the encoded credentials as string like so: + + .. code-block:: python + + from pathlib import Path + import base64 + json_as_bytes = Path().read_bytes() + json_b64_encoded = base64.urlsafe_b64encode(b).decode() """ return get_store(**url2dict(url)) @@ -71,6 +80,17 @@ def get_store(type, create_if_missing=True, **params): otherwise, try to retrieve the bucket and fail with an ``IOError``. * ``"hs3"`` returns a variant of ``simplekv.net.botostore.BotoStore`` that allows "/" in the key name. The parameters are the same as for ``"s3"`` + * ``"gcs"``: Returns a ``simplekv.net.gcstore.GoogleCloudStore``. Parameters are + ``"credentials"``, ``"bucket_name"``, ``"bucket_creation_location"``, ``"project"`` and ``"create_if_missing"`` (default: ``True``). + + - ``"credentials"``: either the path to a credentials.json file or a *google.auth.credentials.Credentials* object + - ``"bucket_name"``: Name of the bucket the blobs are stored in. + - ``"project"``: The name of the GCStorage project. If a credentials JSON is passed then it contains the project name + and this parameter will be ignored. + - ``"create_if_missing"``: [optional] Create new bucket to store blobs in if ``"bucket_name"`` doesn't exist yet. (default: ``True``). + - ``"bucket_creation_location"``: [optional] If a new bucket is created (create_if_missing=True), the location it will be created in. + If ``None`` then GCloud uses a default location. + * ``"hgcs"``: Like ``"gcs"`` but "/" are allowed in the keynames. * ``"fs"``: Returns a ``simplekv.fs.FilesystemStore``. Specify the base path as "path" parameter. * ``"hfs"`` returns a variant of ``simplekv.fs.FilesystemStore`` that allows "/" in the key name. The parameters are the same as for ``"file"``. diff --git a/storefact/_hstores.py b/storefact/_hstores.py index d8f080a..0e2d7c3 100644 --- a/storefact/_hstores.py +++ b/storefact/_hstores.py @@ -10,6 +10,7 @@ from simplekv.memory.redisstore import RedisStore from simplekv.net.azurestore import AzureBlockBlobStore from simplekv.net.botostore import BotoStore +from simplekv.net.gcstore import GoogleCloudStore class HDictStore(ExtendedKeyspaceMixin, DictStore): @@ -30,6 +31,10 @@ def size(self, key): return k.size +class HGoogleCloudStore(ExtendedKeyspaceMixin, GoogleCloudStore): + pass + + class HFilesystemStore(ExtendedKeyspaceMixin, FilesystemStore): def size(self, key): return os.path.getsize(self._build_filename(key)) diff --git a/storefact/_store_creation.py b/storefact/_store_creation.py index b7211aa..336da74 100644 --- a/storefact/_store_creation.py +++ b/storefact/_store_creation.py @@ -16,6 +16,8 @@ def create_store(type, params): return _create_store_hs3(type, params) if type in ('s3'): return _create_store_s3(type, params) + if type in ('gcs', 'hgcs'): + return _create_store_gcs(type, params) if type in ('hfs', 'hfile', 'filesystem'): return _create_store_hfs(type, params) if type in ('fs', 'file'): @@ -29,6 +31,20 @@ def create_store(type, params): raise ValueError('Unknown store type: ' + str(type)) +def _create_store_gcs(store_type, params): + from simplekv.net.gcstore import GoogleCloudStore + from google.oauth2.service_account import Credentials + from ._hstores import HGoogleCloudStore + import json + + if type(params['credentials']) == bytes: + account_info = json.loads(params['credentials'].decode()) + params['credentials'] = Credentials.from_service_account_info(account_info) + params['project'] = account_info['project_id'] + + return GoogleCloudStore(**params) if store_type == 'gcs' else HGoogleCloudStore(**params) + + def _create_store_azure(type, params): from simplekv.net.azurestore import AzureBlockBlobStore from ._hstores import HAzureBlockBlobStore diff --git a/storefact/_urls.py b/storefact/_urls.py index 2b4b6c2..03ac694 100644 --- a/storefact/_urls.py +++ b/storefact/_urls.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- from uritools import urisplit +import base64 +import json +import io TRUEVALUES = (u'true',) @@ -19,6 +22,7 @@ def url2dict(url, raise_on_extra_params=False): azure://account_name:account_key@container[?create_if_missing=true][?max_connections=2] azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)] azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)][?max_block_size=4*1024*1024&max_single_put_size=64*1024*1024] + gcs://@bucket_name[?create_if_missing=true][?bucket_creation_location=EUROPE-WEST1] """ u = urisplit(url) parsed = dict( @@ -64,6 +68,15 @@ def extract_params(scheme, host, port, path, query, userinfo): if path: params['db'] = int(path) return params + if scheme in ('gcs', 'hgcs'): + credentials_b64 = userinfo + params = {'type': scheme, 'bucket_name': host} + params['credentials'] = base64.urlsafe_b64decode( + credentials_b64.encode() + ) + if 'bucket_creation_location' in query: + params[u'bucket_creation_location'] = query.pop(u'bucket_creation_location')[0] + return params if scheme in ('fs', 'hfs'): return {'type': scheme, 'path': host + path} if scheme in ('s3', 'hs3'): diff --git a/tests/gcstore_cred_example.json b/tests/gcstore_cred_example.json new file mode 100644 index 0000000..bb41c5b --- /dev/null +++ b/tests/gcstore_cred_example.json @@ -0,0 +1,12 @@ +{ + "type": "service_account", + "project_id": "central-splice-296415", + "private_key_id": "df9971f2b0399b3b09ea2cd4194ef8cc3d07183d", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDaCnuop7/PuaAa\n66OMLEj81zIrJTYrCOs2MsZ6zFFfZxipH9FH/Hd0+sWH4fSGFBGdop6fkJ9prRYy\nZUMFb+H5IAiOBzNzEh2Ls22wJ3mATHy0DcV8d2ip+/jCauelEKdHllMTa2VkBhrh\nnarYwYLI65QaMjiqIwyi40/3EtbjBROwblFkKMx0I+hVdfMn2yY698jiqjaGI65E\naw3bL4yLftSylTGHnZMJJTUNzgwVsdbITib8BLdqNMFX07hCErACsjNGVQexEQhs\nbCg4TTn7oEEepa/cgezxidM48saupZGp/BOh8+ZNGBSFYW9TZIm6/C7y7+loj42e\nASqL44nRAgMBAAECggEAELbGgWhwgewlC4M+K3fWlXkqiZel5hkeLK6qO84PO8yz\nnX1IpefCwXcarr6hp6iArDCDz8/+hFOGFNFh2JgZvqbXuQlwxnSgSq3zrdHs1Kqo\nUYYPuimOHp+n00FuJd9Xr1tk3WN5Z3g3Vc2oA8sbqyqx1Os+ID6lEJRztAkf61F7\nVugQupmQLyDtOUu/0X7xIc2ubV3IBAayAUWA1NIHFhti5Is0w0P55++ilVo14rOj\n16C3EfgF0BMYf2nT0Tu+FfPvKXX3sPrHLLbPKgjNMHb2PfP8F/eDvO7f+PhQLpvR\nvEpNkrvqeAi9Oot+c3fQfqlmYEVBW6DlB/TbbPfgBQKBgQDszWkbDdrZda3DV53k\nvS5HA9pg21qdM2lTt5uESQRkMPiVUa2ojX7JaB4krf92eh965D/KK6+3wipkU42e\nvbZCXKLTq4WwI/avkXtCe5WdWstsDxOps7Tz7x21oSNrxOaNd3GNOvgj3G4m+kmY\ntxL125g2c270tnRkwNyJZwlVlQKBgQDrt7K+HnCPhuvW4x4ubmhNLsrx1EQTYJPn\ng6AGzyJcAV/3JjIidr0N5EO3NhH208OolOcaI4DfMPTXuRTgElOsOlI5dI80I1qf\naCv6A3/kAbuiwcWDwqzo4kmfMyDD9EGUH2EDlB64Fd8gmflnZZbLDG4SOR/FwbL3\nzIYUbracTQKBgFKUw+3KZCLoFiQEfyzRCL19S3/xn2UpE7c9Vb/JUv250kcIh/46\n8hc89x4li+x+ZFIQHWrc11fHH8+DD+VnFZOJCMO2klW+BP5vRsQFo0SP8FcL3viU\nkitrnTnFKzwfG6QEHpZ5FSY0bjmrmVTnq3YQEcITeYNfybGFTKOiSjANAoGAIi0K\nL2J+c3DedNONJrI/USF9rz5VeHHCQAjnhNU3aBuS7GSPVEve8UqtPr0KMb1WWvTr\n5PBM2Q9iEHz3N92wYAI8tbPep3KQyxfxeFhVjJtrtxddKdNp+oIEpp5lG8QoVe+O\nw1QQeCRDq5hnfsRN7raopt+9aZDCAAeZUREUTOkCgYEAnVc+NrSVsEB34dDtLYnF\nU0e7WNLzqI6a7/Q2SERLm0ivYOgJF533xdoVXGIp4YMwIRxzgNX0WbTf7ECvl5vy\nNSWfLMTJsPk66LLfnV4bv7RxBnpaiZQkhPBcl2aZ/e0XkhP7otV34UFMPA3tbFDu\nFAmi+q542aj9wW/MIGRyxqU=\n-----END PRIVATE KEY-----\n", + "client_email": "storefact-testing@central-splice-296415.iam.gserviceaccount.com", + "client_id": "100559070598539809356", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/storefact-testing%40central-splice-296415.iam.gserviceaccount.com" +} diff --git a/tests/test_gcstore_creation.py b/tests/test_gcstore_creation.py new file mode 100644 index 0000000..f171b88 --- /dev/null +++ b/tests/test_gcstore_creation.py @@ -0,0 +1,100 @@ +import base64 +import json +import pathlib + +import pytest + +import storefact +from storefact._store_creation import create_store + +storage = pytest.importorskip("google.cloud.storage") +from google.auth.credentials import AnonymousCredentials +from google.auth.exceptions import RefreshError + + +def test_create_store_gcstore(mocker): + mock_hgcstore = mocker.patch("storefact._hstores.HGoogleCloudStore") + mock_gcstore = mocker.patch("simplekv.net.gcstore.GoogleCloudStore") + + anon_credentials = AnonymousCredentials() + create_store( + "gcs", + { + "credentials": anon_credentials, + "bucket_name": "test_bucket", + "create_if_missing": True, + "bucket_creation_location": "EUROPE-WEST1", + }, + ) + mock_gcstore.assert_called_once_with( + credentials=anon_credentials, + bucket_name="test_bucket", + create_if_missing=True, + bucket_creation_location="EUROPE-WEST1", + ) + mock_hgcstore.assert_not_called() + + +def test_create_store_hgcstore(mocker): + mock_hgcstore = mocker.patch("storefact._hstores.HGoogleCloudStore") + + anon_credentials = AnonymousCredentials() + create_store( + "hgcs", + { + "credentials": anon_credentials, + "bucket_name": "test_bucket", + "create_if_missing": True, + "bucket_creation_location": "EUROPE-WEST1", + }, + ) + mock_hgcstore.assert_called_once_with( + credentials=anon_credentials, + bucket_name="test_bucket", + create_if_missing=True, + bucket_creation_location="EUROPE-WEST1", + ) + + +SHORT_URL = ( + f"gcs://{base64.urlsafe_b64encode(b'some bytes+/=asdf').decode()}" + f"@bucket_name?create_if_missing=true&bucket_creation_location=WESTINDIES", + { + "type": "gcs", + "credentials": b"some bytes+/=asdf", + "bucket_name": "bucket_name", + "create_if_missing": True, + "bucket_creation_location": "WESTINDIES", + }, +) +ACTUAL_URL = ( + f"gcs://{base64.urlsafe_b64encode(pathlib.Path('tests/gcstore_cred_example.json').read_bytes()).decode()}" + f"@default_bucket?create_if_missing=false", + { + "type": "gcs", + "credentials": pathlib.Path("tests/gcstore_cred_example.json").read_bytes(), + "bucket_name": "default_bucket", + "create_if_missing": False, + }, +) + + +@pytest.mark.parametrize("url, expected", [SHORT_URL, ACTUAL_URL]) +def test_url2dict(url, expected): + assert storefact.url2dict(url) == expected + + +def test_json_decode(): + url, _ = ACTUAL_URL + creds = storefact.url2dict(url)["credentials"] + with open("tests/gcstore_cred_example.json") as file: + assert json.loads(creds) == json.load(file) + + +def test_complete(): + url, expected = ACTUAL_URL + store = storefact.get_store_from_url(url) + assert store.bucket_name == expected["bucket_name"] + assert store._client.project == 'central-splice-296415' + with pytest.raises(RefreshError): + store.get("somekey") From cd2e06c4517cff48fd5c6aa53a5a8f2ac34f31c9 Mon Sep 17 00:00:00 2001 From: Simon Boehm Date: Sun, 22 Nov 2020 18:02:08 +0100 Subject: [PATCH 2/4] README: Show how to create GCloud base64 encoding --- README.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 7ac3bb0..172c832 100644 --- a/README.rst +++ b/README.rst @@ -76,7 +76,14 @@ URL and store types: * with SAS and additional parameters: :code:`azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)][?max_block_size=4*1024*1024&max_single_put_size=64*1024*1024]` * Google Cloud Store: (:code:`gcs://` and :code:`hgcs://`): * :code:`hgcs://@bucket_name[?create_if_missing=true&bucket_creation_location=EUROPE-WEST3]` - to create + Get the encoded credentials as string like so: + + .. code-block:: python + + from pathlib import Path + import base64 + json_as_bytes = Path().read_bytes() + json_b64_encoded = base64.urlsafe_b64encode(b).decode() Storage URLs starting with a :code:`h` indicate extended allowed characters. This allows the usage of slashes and spaces in blob names. From c188ae5a8f37c941d158b2e56571994160e6a290 Mon Sep 17 00:00:00 2001 From: Simon Boehm Date: Tue, 1 Dec 2020 08:52:05 +0100 Subject: [PATCH 3/4] Small fix in README --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 172c832..580478e 100644 --- a/README.rst +++ b/README.rst @@ -83,7 +83,7 @@ URL and store types: from pathlib import Path import base64 json_as_bytes = Path().read_bytes() - json_b64_encoded = base64.urlsafe_b64encode(b).decode() + json_b64 = base64.urlsafe_b64encode(json_as_bytes).decode() Storage URLs starting with a :code:`h` indicate extended allowed characters. This allows the usage of slashes and spaces in blob names. From b84431ee292b93a8513c67b9dec43f08ea73132b Mon Sep 17 00:00:00 2001 From: Simon Boehm Date: Thu, 15 Apr 2021 15:42:12 +0200 Subject: [PATCH 4/4] Small change for Pipeline rerun --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 580478e..f723409 100644 --- a/README.rst +++ b/README.rst @@ -76,7 +76,7 @@ URL and store types: * with SAS and additional parameters: :code:`azure://account_name:shared_access_signature@container?use_sas&create_if_missing=false[?max_connections=2&socket_timeout=(20,100)][?max_block_size=4*1024*1024&max_single_put_size=64*1024*1024]` * Google Cloud Store: (:code:`gcs://` and :code:`hgcs://`): * :code:`hgcs://@bucket_name[?create_if_missing=true&bucket_creation_location=EUROPE-WEST3]` - Get the encoded credentials as string like so: + Get the encoded credentials as a string like so: .. code-block:: python