Skip to content

Commit

Permalink
[ArsTechnicaBridge] Refactor to website restructuring
Browse files Browse the repository at this point in the history
  • Loading branch information
Phantop committed Oct 5, 2024
1 parent 80c43f1 commit 9ffd89f
Showing 1 changed file with 27 additions and 43 deletions.
70 changes: 27 additions & 43 deletions bridges/ArsTechnicaBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,24 @@ class ArsTechnicaBridge extends FeedExpander
const URI = 'https://arstechnica.com/';
const DESCRIPTION = 'Returns the latest articles from Ars Technica';
const PARAMETERS = [[
'section' => [
'name' => 'Site section',
'type' => 'list',
'defaultValue' => 'index',
'values' => [
'All' => 'index',
'Apple' => 'apple',
'Board Games' => 'cardboard',
'Cars' => 'cars',
'Features' => 'features',
'Gaming' => 'gaming',
'Information Technology' => 'technology-lab',
'Science' => 'science',
'Staff Blogs' => 'staff-blogs',
'Tech Policy' => 'tech-policy',
'Tech' => 'gadgets',
]
'section' => [
'name' => 'Site section',
'type' => 'list',
'defaultValue' => 'index',
'values' => [
'All' => 'index',
'Apple' => 'apple',
'Board Games' => 'cardboard',
'Cars' => 'cars',
'Features' => 'features',
'Gaming' => 'gaming',
'Information Technology' => 'technology-lab',
'Science' => 'science',
'Staff Blogs' => 'staff-blogs',
'Tech Policy' => 'tech-policy',
'Tech' => 'gadgets',
]
]
]];

public function collectData()
Expand All @@ -36,39 +36,23 @@ public function collectData()
protected function parseItem(array $item)
{
$item_html = getSimpleHTMLDOMCached($item['uri']);
$item_html = defaultLinkTo($item_html, self::URI);
$item['content'] = $item_html->find('.article-content', 0);
$parsely = $item_html->find('[name="parsely-page"]', 0)->content;
$parsely_json = Json::decode(html_entity_decode($parsely));

$parsely = $item_html->find('[name="parsely-page"]', 0);
$parsely_json = json_decode(html_entity_decode($parsely->content), true);
$item['categories'] = $parsely_json['tags'];

$pages = $item_html->find('nav.page-numbers > .numbers > a', -2);
if (null !== $pages) {
for ($i = 2; $i <= $pages->innertext; $i++) {
$page_url = $item['uri'] . '&page=' . $i;
$page_html = getSimpleHTMLDOMCached($page_url);
$page_html = defaultLinkTo($page_html, self::URI);
$item['content'] .= $page_html->find('.article-content', 0);
}
$item['content'] = str_get_html($item['content']);
$item['comments'] = $item_html->find('#comments a', 0)->href;
$item['content'] = '';
foreach ($item_html->find('.post-content') as $content) {
$item['content'] .= $content;
}

$item['content'] = backgroundToImg($item['content']);

// remove various ars advertising
$item['content']->find('#social-left', 0)->remove();
foreach ($item['content']->find('.ars-component-buy-box') as $ad) {
$ad->remove();
}
foreach ($item['content']->find('.ad_wrapper') as $ad) {
$sel = '#social-left, .ars-component-buy-box, .ad_wrapper, .sidebar, .toc-container, .ars-gallery-caption-arrow';
foreach ($item['content']->find($sel) as $ad) {
$ad->remove();
}
foreach ($item['content']->find('.sidebar') as $ad) {
$ad->remove();
}

$item['content'] = backgroundToImg($item['content']);

$item['uid'] = explode('=', $item['uri'])[1];

return $item;
}
Expand Down

0 comments on commit 9ffd89f

Please sign in to comment.