Skip to content

Commit

Permalink
fix(FeedParser): scrape out content from rss content:encoded (#4178)
Browse files Browse the repository at this point in the history
* fix(FeedParser): parse content module from rss2

* refactor
  • Loading branch information
dvikan authored Jul 31, 2024
1 parent e55e9b8 commit b8a9f34
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 20 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,16 @@ See `formats/PlaintextFormat.php` for an example.
These commands require that you have installed the dev dependencies in `composer.json`.
Run all tests:
./vendor/bin/phpunit
Run a single test class:
./vendor/bin/phpunit --filter UrlTest
Run linter:
./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./
https://github.com/squizlabs/PHP_CodeSniffer/wiki
Expand Down
19 changes: 10 additions & 9 deletions actions/DisplayAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,6 @@ private function createResponse(Request $request, BridgeAbstract $bridge, string
$input = array_diff_key($requestArray, array_fill_keys($remove, ''));
$bridge->setInput($input);
$bridge->collectData();
$items = $bridge->getItems();
if (isset($items[0]) && is_array($items[0])) {
$feedItems = [];
foreach ($items as $item) {
$feedItems[] = FeedItem::fromArray($item);
}
$items = $feedItems;
}
$feed = $bridge->getFeed();
} catch (\Exception $e) {
// Probably an exception inside a bridge
if ($e instanceof HttpException) {
Expand Down Expand Up @@ -154,6 +145,16 @@ private function createResponse(Request $request, BridgeAbstract $bridge, string
}
}

$items = $bridge->getItems();
if (isset($items[0]) && is_array($items[0])) {
$feedItems = [];
foreach ($items as $item) {
$feedItems[] = FeedItem::fromArray($item);
}
$items = $feedItems;
}
$feed = $bridge->getFeed();

$formatFactory = new FormatFactory();
$format = $formatFactory->create($format);

Expand Down
9 changes: 7 additions & 2 deletions lib/FeedItem.php
Original file line number Diff line number Diff line change
Expand Up @@ -186,21 +186,26 @@ public function getContent(): ?string
}

/**
* @param string|object $content The item content as text or simple_html_dom object.
* @param string|array|\simple_html_dom|\simple_html_dom_node $content The item content
*/
public function setContent($content)
{
$this->content = null;

if (
$content instanceof simple_html_dom
|| $content instanceof simple_html_dom_node
) {
$content = (string) $content;
} elseif (is_array($content)) {
// Assuming this is the rss2.0 content module
$content = $content['encoded'] ?? '';
}

if (is_string($content)) {
$this->content = $content;
} else {
Debug::log(sprintf('Feed content must be a string but got %s', gettype($content)));
Debug::log(sprintf('Unable to convert feed content to string: %s', gettype($content)));
}
}

Expand Down
3 changes: 2 additions & 1 deletion lib/FeedParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,9 @@ public function parseRss2Item(\SimpleXMLElement $feedItem): array
if (isset($namespaces['media'])) {
$media = $feedItem->children($namespaces['media']);
}

foreach ($namespaces as $namespaceName => $namespaceUrl) {
if (in_array($namespaceName, ['', 'content', 'media'])) {
if (in_array($namespaceName, ['', 'media'])) {
continue;
}
$item[$namespaceName] = $this->parseModule($feedItem, $namespaceName, $namespaceUrl);
Expand Down
19 changes: 11 additions & 8 deletions tests/FeedParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@

class FeedParserTest extends TestCase
{
private \FeedParser $sut;

public function setUp(): void
{
$this->sut = new \FeedParser();
}

public function testRss1()
{
$xml = <<<XML
Expand Down Expand Up @@ -37,8 +44,7 @@ public function testRss1()
</rdf:RDF>
XML;

$sut = new \FeedParser();
$feed = $sut->parseFeed($xml);
$feed = $this->sut->parseFeed($xml);

$this->assertSame('hello feed', $feed['title']);
$this->assertSame('http://meerkat.oreillynet.com', $feed['uri']);
Expand Down Expand Up @@ -74,8 +80,7 @@ public function testRss2()
</rss>
XML;

$sut = new \FeedParser();
$feed = $sut->parseFeed($xml);
$feed = $this->sut->parseFeed($xml);

$this->assertSame('hello feed', $feed['title']);
$this->assertSame('https://example.com/', $feed['uri']);
Expand Down Expand Up @@ -111,8 +116,7 @@ public function testAtom()
</feed>
XML;

$sut = new \FeedParser();
$feed = $sut->parseFeed($xml);
$feed = $this->sut->parseFeed($xml);

$this->assertSame('hello feed', $feed['title']);
$this->assertSame('https://example.com/1', $feed['uri']);
Expand Down Expand Up @@ -151,8 +155,7 @@ public function testAppleItunesModule()
</rss>
XML;

$sut = new \FeedParser();
$feed = $sut->parseFeed($xml);
$feed = $this->sut->parseFeed($xml);
$expected = [
'title' => '',
'uri' => '',
Expand Down

0 comments on commit b8a9f34

Please sign in to comment.