diff --git a/bridges/OLXBridge.php b/bridges/OLXBridge.php index 1a8be901358..31f05eaaa7c 100644 --- a/bridges/OLXBridge.php +++ b/bridges/OLXBridge.php @@ -103,20 +103,15 @@ public function collectData() continue; } - $shippingOffered = $post->find('.css-1c0ed4l svg', 0)->outertext ?? false; - if ($this->getInput('shippingOfferedOnly') && !$shippingOffered) { - continue; - } - $negotiable = $post->find('p[data-testid="ad-price"] span.css-e2218f', 0)->plaintext ?? false; if ($negotiable) { $price = trim(str_replace($negotiable, '', $price)); $negotiable = '(' . $negotiable . ')'; } - if ($post->find('h6', 0)->plaintext != '') { + if ($post->find('h4', 0)->plaintext != '') { $item['uri'] = $post->find('a', 0)->href; - $item['title'] = $post->find('h6', 0)->plaintext; + $item['title'] = $post->find('h4', 0)->plaintext; } # ignore the date component, as it is too convoluted — use the deep-crawled one; see below @@ -128,6 +123,12 @@ public function collectData() # Given that, do deep-crawl *all* the results, which allows to aso obtain the ID, the simplified location # and date strings, as well as the detailed description. $articleHTMLContent = getSimpleHTMLDOMCached($item['uri']); + $articleHTMLContent = defaultLinkTo($articleHTMLContent, $this->getHostname()); + + $shippingOffered = $articleHTMLContent->find('img[alt="Safety Badge"]', 0)->src ?? false; + if ($this->getInput('shippingOfferedOnly') && !$shippingOffered) { + continue; + } # Extract a clean ID without resorting to the convoluted CSS class or sibling selectors. Should be always present. $refreshLink = $articleHTMLContent->find('a[data-testid=refresh-link]', 0)->href ?? false; @@ -195,7 +196,7 @@ public function collectData()
$location
-$price $negotiable $shippingOffered
+$price $negotiable