From 4509830c320e6de1a0b733c183ef9475c074947e Mon Sep 17 00:00:00 2001 From: Bernard Hamlin <948122+blueo@users.noreply.github.com> Date: Wed, 17 Apr 2024 15:00:44 +1200 Subject: [PATCH] Force live object when adding to the index (#91) This corrects behaviour that potentially allows indexing of draft content. Queues are normally run via a "dev task" which is run in the DevelopmentAdmin context. This controller will set the reading mode to DRAFT. The upshot of this is that an Index job will unserialise a DataObject in DRAFT reading mode (saved as a Data Query param) and the subsequent `toArray` call could get draft content. This is often seen as a link indexed with `?stage=Stage` parameters. The impact is likely limited because an IndexJob is usually queued by a publish action and run shortly afterwards. In these cases it is unlikely that draft changes are present - but is isn't impossible. I checked the ReIndexJob run and it doesn't appear to have this issue because it sets the reading mode in the setup method. I have instead added this to the unserialise function as it seemed a safer method to ensure the reading mode was as expected. --- src/DataObject/DataObjectDocument.php | 51 +++++++--- tests/DataObject/DataObjectDocumentTest.php | 104 +++++++++++++++++++- 2 files changed, 138 insertions(+), 17 deletions(-) diff --git a/src/DataObject/DataObjectDocument.php b/src/DataObject/DataObjectDocument.php index 3d99458..b35deef 100644 --- a/src/DataObject/DataObjectDocument.php +++ b/src/DataObject/DataObjectDocument.php @@ -19,6 +19,7 @@ use SilverStripe\ORM\RelationList; use SilverStripe\ORM\UnsavedRelationList; use SilverStripe\SearchService\Exception\IndexConfigurationException; +use SilverStripe\SearchService\Exception\IndexingServiceException; use SilverStripe\SearchService\Extensions\DBFieldExtension; use SilverStripe\SearchService\Extensions\SearchServiceExtension; use SilverStripe\SearchService\Interfaces\DataObjectDocumentInterface; @@ -147,8 +148,10 @@ public function shouldIndex(): bool return false; } - // Dataobject is only in draft - if ($dataObject->hasExtension(Versioned::class) && !$dataObject->isLiveVersion()) { + // DataObject has no published version (or draft changes could cause a doc to be removed) + if ($dataObject->hasExtension(Versioned::class) && !$dataObject->isPublished()) { + // note even if we pass a draft object to the indexer onAddToSearchIndexes will + // set the version to live before adding return false; } @@ -204,24 +207,20 @@ public function getIndexes(): array /** * Generates a map of all the fields and values which will be sent * + * This will always use the current DataObject so you must ensure + * it is in the correct state (eg Live) prior to calling toArray. + * For example the onAddToSearchIndexes method will set the data + * object to LIVE when adding to the index + * + * @see DataObjectDocument::onAddToSearchIndexes() * @throws IndexConfigurationException */ public function toArray(): array { $pageContentField = $this->config()->get('page_content_field'); - if ($this->getDataObject()->hasExtension(Versioned::class)) { - $dataObject = Versioned::withVersionedMode(function () { - Versioned::set_stage(Versioned::LIVE); - - return DataObject::get_by_id($this->getSourceClass(), $this->getDataObject()->ID); - }); - } else { - $dataObject = DataObject::get_by_id( - $this->getSourceClass(), - $this->getDataObject()->ID - ); - } + // assume shouldIndex is called before this + $dataObject = $this->getDataObject(); if (!$dataObject || !$dataObject->exists()) { throw new IndexConfigurationException( @@ -628,8 +627,32 @@ public function __unserialize(array $data): void } } + /** + * Add to index event handler + * + * @throws IndexingServiceException + * @return void + */ public function onAddToSearchIndexes(string $event): void { + if ($event === DocumentAddHandler::BEFORE_ADD) { + // make sure DataObject is always live on adding to the index + Versioned::withVersionedMode(function (): void { + Versioned::set_stage(Versioned::LIVE); + + $currentDataObject = $this->getDataObject(); + + $liveDataObject = DataObject::get($currentDataObject->ClassName)->byID($currentDataObject->ID); + + if (!$liveDataObject) { + // unlikely case as indexer calls 'shouldIndex' immediately prior to this + throw new IndexingServiceException('Only published DataObjects may be added to the index'); + } + + $this->setDataObject($liveDataObject); + }); + } + if ($event === DocumentAddHandler::AFTER_ADD) { $this->markIndexed(); } diff --git a/tests/DataObject/DataObjectDocumentTest.php b/tests/DataObject/DataObjectDocumentTest.php index 0d0a729..e2b87b7 100644 --- a/tests/DataObject/DataObjectDocumentTest.php +++ b/tests/DataObject/DataObjectDocumentTest.php @@ -11,16 +11,20 @@ use SilverStripe\SearchService\Interfaces\DocumentAddHandler; use SilverStripe\SearchService\Interfaces\DocumentRemoveHandler; use SilverStripe\SearchService\Schema\Field; +use SilverStripe\SearchService\Service\Indexer; use SilverStripe\SearchService\Tests\Fake\DataObjectFake; use SilverStripe\SearchService\Tests\Fake\DataObjectFakePrivate; use SilverStripe\SearchService\Tests\Fake\DataObjectFakeVersioned; use SilverStripe\SearchService\Tests\Fake\DataObjectSubclassFake; use SilverStripe\SearchService\Tests\Fake\ImageFake; use SilverStripe\SearchService\Tests\Fake\PageFake; +use SilverStripe\SearchService\Tests\Fake\ServiceFake; use SilverStripe\SearchService\Tests\Fake\TagFake; use SilverStripe\SearchService\Tests\SearchServiceTest; use SilverStripe\Security\Member; use SilverStripe\Subsites\Model\Subsite; +use SilverStripe\Versioned\ReadingMode; +use SilverStripe\Versioned\Versioned; class DataObjectDocumentTest extends SearchServiceTest { @@ -623,21 +627,58 @@ public function testExtensionRequired(): void $this->assertEquals($fake, $doc->getDataObject()); } - public function testEvents(): void + public function testMarkIndexedOnEvents(): void { + $dataObject = $this->objFromFixture(DataObjectFakeVersioned::class, 'one'); $mock = $this->getMockBuilder(DataObjectDocument::class) - ->onlyMethods(['markIndexed']) + ->onlyMethods(['markIndexed', 'getDataObject']) ->disableOriginalConstructor() ->getMock(); $mock->expects($this->exactly(2)) ->method('markIndexed'); - $mock->onAddToSearchIndexes(DocumentAddHandler::BEFORE_ADD); + $mock->method('getDataObject') + ->willReturn($dataObject); + $mock->onAddToSearchIndexes(DocumentAddHandler::AFTER_ADD); + // currenlty BEFORE_REMOVE is a noop $mock->onRemoveFromSearchIndexes(DocumentRemoveHandler::BEFORE_REMOVE); $mock->onRemoveFromSearchIndexes(DocumentRemoveHandler::AFTER_REMOVE); } + public function testOnAddToSearchIndexes(): void + { + Versioned::withVersionedMode(function (): void { + // set DRAFT to match state of queue runners which generally + // run through DevelopmentAdmin controller + Versioned::set_stage(Versioned::DRAFT); + + $dataObject = $this->objFromFixture(DataObjectFakeVersioned::class, 'one'); + $dataObject->publishRecursive(); + $document = DataObjectDocument::create($dataObject); + + $queryParams = $document->getDataObject()->getSourceQueryParams(); + $readingMode = ReadingMode::fromDataQueryParams($queryParams); + + $this->assertEquals('Stage.' . Versioned::DRAFT, $readingMode); + + $document->onAddToSearchIndexes(DocumentAddHandler::BEFORE_ADD); + + $queryParams = $document->getDataObject()->getSourceQueryParams(); + $readingMode = ReadingMode::fromDataQueryParams($queryParams); + + $this->assertEquals('Stage.' . Versioned::LIVE, $readingMode); + + $dataObject->doArchive(); + + $this->expectExceptionMessage( + sprintf('Only published DataObjects may be added to the index') + ); + + $document->onAddToSearchIndexes(DocumentAddHandler::BEFORE_ADD); + }); + } + public function testDeletedDataObject(): void { $dataObject = $this->objFromFixture(DataObjectFakeVersioned::class, 'one'); @@ -660,4 +701,61 @@ public function testDeletedDataObject(): void unserialize(serialize($doc)); } + public function testIndexDataObjectDocument(): void + { + $config = $this->mockConfig(); + $config->set('getSearchableClasses', [ + PageFake::class, + ]); + $config->set('getFieldsForClass', [ + PageFake::class => [ + new Field('title'), + new Field('page_link', 'Link'), + ], + ]); + + Versioned::withVersionedMode(function () use ($config): void { + // Reading mode as if run by job - development Admin sets Draft reading mode + // usually via /dev/tasks/ProcessJobQueueTask + // @see SilverStripe\Dev\DevelopmentAdmin + Versioned::set_stage(Versioned::DRAFT); + + $dataObject = PageFake::create(); + $dataObject->Title = 'Draft'; + $dataObject->write(); + + $doc = DataObjectDocument::create($dataObject); + + $config->set( + 'getIndexesForDocument', + [ + $doc->getIdentifier() => [ + 'index' => 'data', + ], + ] + ); + + $indexer = new Indexer([$doc]); + $indexer->setIndexService($service = new ServiceFake()); + $indexer->setBatchSize(1); + $indexer->processNode(); + + $this->assertCount(0, $service->documents, 'Draft documents should not be indexed'); + + $dataObject->Title = 'Published'; + $dataObject->write(); + $published = $dataObject->publishRecursive(); + $this->assertTrue($published); + $dataObject->flushCache(); + $doc = DataObjectDocument::create($dataObject); + + $indexer = new Indexer([$doc]); + $indexer->setIndexService($service = new ServiceFake()); + $indexer->setBatchSize(1); + $indexer->processNode(); + + $this->assertCount(1, $service->documents, 'Published documents should be indexed'); + }); + } + }