From fcaf2a29e56c1723b71489e722a22ea0987baf26 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Wed, 27 Nov 2024 10:34:25 -0500 Subject: [PATCH] Add public API endpoint for public collections (#2174) Fixes #1051 If org with provided slug doesn't exist or no public collections exist for that org, return same 404 response with a detail of "public_profile_not_found" to prevent people from using public endpoint to determine whether an org exists. Endpoint is `GET /api/public-collections/` (no auth needed) to avoid collisions with existing org and collection endpoints. --- backend/btrixcloud/colls.py | 35 +++++++++ backend/btrixcloud/models.py | 34 +++++++-- backend/btrixcloud/orgs.py | 31 +++++--- backend/test/test_collections.py | 121 ++++++++++++++++++++++++++++++- backend/test/test_org.py | 20 +---- 5 files changed, 207 insertions(+), 34 deletions(-) diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index 23640404bd..411e659ac9 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -30,6 +30,9 @@ UpdatedResponse, SuccessResponse, CollectionSearchValuesResponse, + OrgPublicCollections, + PublicOrgDetails, + CollAccessType, ) from .utils import dt_now @@ -395,6 +398,30 @@ async def add_successful_crawl_to_collections(self, crawl_id: str, cid: UUID): ) await self.update_crawl_collections(crawl_id) + async def get_org_public_collections(self, org_slug: str): + """List public collections for org""" + try: + org = await self.orgs.get_org_by_slug(org_slug) + # pylint: disable=broad-exception-caught + except Exception: + # pylint: disable=raise-missing-from + raise HTTPException(status_code=404, detail="public_profile_not_found") + + if not org.enablePublicProfile: + raise HTTPException(status_code=404, detail="public_profile_not_found") + + collections, _ = await self.list_collections( + org.id, access=CollAccessType.PUBLIC + ) + + public_org_details = PublicOrgDetails( + name=org.name, + description=org.publicDescription or "", + url=org.publicUrl or "", + ) + + return OrgPublicCollections(org=public_org_details, collections=collections) + # ============================================================================ # pylint: disable=too-many-locals @@ -582,4 +609,12 @@ async def download_collection( ): return await colls.download_collection(coll_id, org) + @app.get( + "/public-collections/{org_slug}", + tags=["collections"], + response_model=OrgPublicCollections, + ) + async def get_org_public_collections(org_slug: str): + return await colls.get_org_public_collections(org_slug) + return colls diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index 06c0ff46ec..48d36b0dc0 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -1144,6 +1144,24 @@ class RenameOrg(BaseModel): slug: Optional[str] = None +# ============================================================================ +class PublicOrgDetails(BaseModel): + """Model for org details that are available in public profile""" + + name: str + description: str = "" + url: str = "" + + +# ============================================================================ +class OrgPublicCollections(BaseModel): + """Model for listing public collections in org""" + + org: PublicOrgDetails + + collections: List[CollOut] = [] + + # ============================================================================ class OrgStorageRefs(BaseModel): """Input model for setting primary storage + optional replicas""" @@ -1374,10 +1392,12 @@ class OrgReadOnlyUpdate(BaseModel): # ============================================================================ -class OrgListPublicCollectionsUpdate(BaseModel): - """Organization listPublicCollections update""" +class OrgPublicProfileUpdate(BaseModel): + """Organization enablePublicProfile update""" - listPublicCollections: bool + enablePublicProfile: Optional[bool] = None + publicDescription: Optional[str] = None + publicUrl: Optional[str] = None # ============================================================================ @@ -1448,7 +1468,9 @@ class OrgOut(BaseMongoModel): allowedProxies: list[str] = [] crawlingDefaults: Optional[CrawlConfigDefaults] = None - listPublicCollections: bool = False + enablePublicProfile: bool = False + publicDescription: str = "" + publicUrl: str = "" # ============================================================================ @@ -1505,7 +1527,9 @@ class Organization(BaseMongoModel): allowedProxies: list[str] = [] crawlingDefaults: Optional[CrawlConfigDefaults] = None - listPublicCollections: bool = False + enablePublicProfile: bool = False + publicDescription: Optional[str] = None + publicUrl: Optional[str] = None def is_owner(self, user): """Check if user is owner""" diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 64c3fbb275..8ff7d0fd40 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -78,7 +78,7 @@ RemovedResponse, OrgSlugsResponse, OrgImportResponse, - OrgListPublicCollectionsUpdate, + OrgPublicProfileUpdate, ) from .pagination import DEFAULT_PAGE_SIZE, paginated_format from .utils import ( @@ -285,6 +285,14 @@ async def get_org_by_id(self, oid: UUID) -> Organization: return Organization.from_dict(res) + async def get_org_by_slug(self, slug: str) -> Organization: + """Get an org by id""" + res = await self.orgs.find_one({"slug": slug}) + if not res: + raise HTTPException(status_code=400, detail="invalid_org_slug") + + return Organization.from_dict(res) + async def get_default_org(self) -> Organization: """Get default organization""" res = await self.orgs.find_one({"default": True}) @@ -988,13 +996,18 @@ async def update_read_only_on_cancel( ) return res is not None - async def update_list_public_collections( - self, org: Organization, list_public_collections: bool + async def update_public_profile( + self, org: Organization, update: OrgPublicProfileUpdate ): - """Update listPublicCollections field on organization""" + """Update or enable/disable organization's public profile""" + query = update.dict(exclude_unset=True) + + if len(query) == 0: + raise HTTPException(status_code=400, detail="no_update_data") + res = await self.orgs.find_one_and_update( {"_id": org.id}, - {"$set": {"listPublicCollections": list_public_collections}}, + {"$set": query}, ) return res is not None @@ -1555,15 +1568,15 @@ async def update_read_only_on_cancel( return {"updated": True} @router.post( - "/list-public-collections", + "/public-profile", tags=["organizations", "collections"], response_model=UpdatedResponse, ) - async def update_list_public_collections( - update: OrgListPublicCollectionsUpdate, + async def update_public_profile( + update: OrgPublicProfileUpdate, org: Organization = Depends(org_owner_dep), ): - await ops.update_list_public_collections(org, update.listPublicCollections) + await ops.update_public_profile(org, update) return {"updated": True} diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py index 9be94f7249..4faf42540d 100644 --- a/backend/test/test_collections.py +++ b/backend/test/test_collections.py @@ -4,7 +4,7 @@ from zipfile import ZipFile, ZIP_STORED from tempfile import TemporaryFile -from .conftest import API_PREFIX +from .conftest import API_PREFIX, NON_DEFAULT_ORG_NAME, NON_DEFAULT_ORG_SLUG from .utils import read_in_chunks COLLECTION_NAME = "Test collection" @@ -15,6 +15,7 @@ _coll_id = None _second_coll_id = None +_public_coll_id = None upload_id = None modified = None @@ -66,6 +67,7 @@ def test_create_public_collection( assert data["added"] assert data["name"] == PUBLIC_COLLECTION_NAME + global _public_coll_id _public_coll_id = data["id"] # Verify that it is public @@ -725,6 +727,123 @@ def test_filter_sort_collections( assert r.json()["detail"] == "invalid_sort_direction" +def test_list_public_collections( + crawler_auth_headers, + admin_auth_headers, + default_org_id, + non_default_org_id, + crawler_crawl_id, + admin_crawl_id, +): + # Create new public collection + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/collections", + headers=crawler_auth_headers, + json={ + "crawlIds": [crawler_crawl_id], + "name": "Second public collection", + "access": "public", + }, + ) + assert r.status_code == 200 + second_public_coll_id = r.json()["id"] + + # Get default org slug + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}", + headers=crawler_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + org_slug = data["slug"] + org_name = data["name"] + + # Verify that public profile isn't enabled + assert data["enablePublicProfile"] is False + assert data["publicDescription"] == "" + assert data["publicUrl"] == "" + + # Try listing public collections without org public profile enabled + r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}") + assert r.status_code == 404 + assert r.json()["detail"] == "public_profile_not_found" + + # Enable public profile on org + public_description = "This is a test public org!" + public_url = "https://example.com" + + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/public-profile", + headers=admin_auth_headers, + json={ + "enablePublicProfile": True, + "publicDescription": public_description, + "publicUrl": public_url, + }, + ) + assert r.status_code == 200 + assert r.json()["updated"] + + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + assert data["enablePublicProfile"] + assert data["publicDescription"] == public_description + assert data["publicUrl"] == public_url + + # List public collections with no auth (no public profile) + r = requests.get(f"{API_PREFIX}/public-collections/{org_slug}") + assert r.status_code == 200 + data = r.json() + + org_data = data["org"] + assert org_data["name"] == org_name + assert org_data["description"] == public_description + assert org_data["url"] == public_url + + collections = data["collections"] + assert len(collections) == 2 + for collection in collections: + assert collection["id"] in (_public_coll_id, second_public_coll_id) + assert collection["access"] == "public" + + # Test non-existing slug - it should return a 404 but not reveal + # whether or not an org exists with that slug + r = requests.get(f"{API_PREFIX}/public-collections/nonexistentslug") + assert r.status_code == 404 + assert r.json()["detail"] == "public_profile_not_found" + + +def test_list_public_collections_no_colls(non_default_org_id, admin_auth_headers): + # Test existing org that's not public - should return same 404 as + # if org doesn't exist + r = requests.get(f"{API_PREFIX}/public-collections/{NON_DEFAULT_ORG_SLUG}") + assert r.status_code == 404 + assert r.json()["detail"] == "public_profile_not_found" + + # Enable public profile on org with zero public collections + r = requests.post( + f"{API_PREFIX}/orgs/{non_default_org_id}/public-profile", + headers=admin_auth_headers, + json={ + "enablePublicProfile": True, + }, + ) + assert r.status_code == 200 + assert r.json()["updated"] + + # List public collections with no auth - should still get profile even + # with no public collections + r = requests.get(f"{API_PREFIX}/public-collections/{NON_DEFAULT_ORG_SLUG}") + assert r.status_code == 200 + data = r.json() + assert data["org"]["name"] == NON_DEFAULT_ORG_NAME + assert data["collections"] == [] + + def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id): # Delete second collection r = requests.delete( diff --git a/backend/test/test_org.py b/backend/test/test_org.py index 7ef3d0ae9b..a3de79c5cd 100644 --- a/backend/test/test_org.py +++ b/backend/test/test_org.py @@ -17,7 +17,7 @@ def test_ensure_only_one_default_org(admin_auth_headers): r = requests.get(f"{API_PREFIX}/orgs", headers=admin_auth_headers) data = r.json() - assert data["total"] == 1 + assert data["total"] == 2 orgs = data["items"] default_orgs = [org for org in orgs if org["default"]] @@ -697,24 +697,6 @@ def test_update_read_only(admin_auth_headers, default_org_id): assert data["readOnlyReason"] == "" -def test_update_list_public_collections(admin_auth_headers, default_org_id): - # Test that default is false - r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers) - assert r.json()["listPublicCollections"] is False - - # Update - r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/list-public-collections", - headers=admin_auth_headers, - json={"listPublicCollections": True}, - ) - assert r.json()["updated"] - - # Test update is reflected in GET response - r = requests.get(f"{API_PREFIX}/orgs/{default_org_id}", headers=admin_auth_headers) - assert r.json()["listPublicCollections"] - - def test_sort_orgs(admin_auth_headers): # Create a few new orgs for testing r = requests.post(