diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index 20ee9c6..e78d00d 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -4,75 +4,84 @@ * An check external links job * */ -class CheckExternalLinksJob extends AbstractQueuedJob { +class CheckExternalLinksJob extends AbstractQueuedJob +{ - public static $regenerate_time = 43200; + public static $regenerate_time = 43200; - public function __construct() { - $this->pagesToProcess = DB::query('SELECT "ID" FROM "SiteTree_Live" WHERE "ShowInSearch"=1')->column(); - $this->currentStep = 0; - $this->totalSteps = count($this->pagesToProcess); - } + public function __construct() + { + $this->pagesToProcess = DB::query('SELECT "ID" FROM "SiteTree_Live" WHERE "ShowInSearch"=1')->column(); + $this->currentStep = 0; + $this->totalSteps = count($this->pagesToProcess); + } - /** - * Sitemap job is going to run for a while... - */ - public function getJobType() { - return QueuedJob::QUEUED; - } + /** + * Sitemap job is going to run for a while... + */ + public function getJobType() + { + return QueuedJob::QUEUED; + } - /** - * @return string - */ - public function getTitle() { - return 'Checking external links'; - } + /** + * @return string + */ + public function getTitle() + { + return 'Checking external links'; + } - /** - * Return a signature for this queued job - * - * For the generate sitemap job, we only ever want one instance running, so just use the class name - * - * @return String - */ - public function getSignature() { - return md5(get_class($this)); - } + /** + * Return a signature for this queued job + * + * For the generate sitemap job, we only ever want one instance running, so just use the class name + * + * @return String + */ + public function getSignature() + { + return md5(get_class($this)); + } - /** - * Note that this is duplicated for backwards compatibility purposes... - */ - public function setup() { - parent::setup(); - increase_time_limit_to(); + /** + * Note that this is duplicated for backwards compatibility purposes... + */ + public function setup() + { + parent::setup(); + increase_time_limit_to(); - $restart = $this->currentStep == 0; + $restart = $this->currentStep == 0; - if ($restart) { - $this->pagesToProcess = DB::query('SELECT "ID" FROM SiteTree_Live WHERE ShowInSearch=1')->column(); - } - } + if ($restart) { + $this->pagesToProcess = DB::query('SELECT "ID" FROM SiteTree_Live WHERE ShowInSearch=1')->column(); + } + } - /** - * On any restart, make sure to check that our temporary file is being created still. - */ - public function prepareForRestart() { - parent::prepareForRestart(); - } + /** + * On any restart, make sure to check that our temporary file is being created still. + */ + public function prepareForRestart() + { + parent::prepareForRestart(); + } - public function process() { - $task = new CheckExternalLinks(); - $task->run(); - $this->completeJob(); - } + public function process() + { + $task = new CheckExternalLinks(); + $task->run(); + $this->completeJob(); + } - /** - * Outputs the completed file to the site's webroot - */ - protected function completeJob() { - $this->isComplete = 1; - $nextgeneration = new CheckExternalLinksJob(); - singleton('QueuedJobService')->queueJob($nextgeneration, - date('Y-m-d H:i:s', time() + self::$regenerate_time)); - } -} \ No newline at end of file + /** + * Outputs the completed file to the site's webroot + */ + protected function completeJob() + { + $this->isComplete = 1; + $nextgeneration = new CheckExternalLinksJob(); + singleton('QueuedJobService')->queueJob($nextgeneration, + date('Y-m-d H:i:s', time() + self::$regenerate_time)); + } +} diff --git a/code/model/BrokenExternalLinks.php b/code/model/BrokenExternalLinks.php index 4e73c1d..f96c63d 100644 --- a/code/model/BrokenExternalLinks.php +++ b/code/model/BrokenExternalLinks.php @@ -1,44 +1,46 @@ 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. - 'HTTPCode' =>'Int' - ); - - private static $has_one = array( - 'Page' => 'Page' - ); - - public static $summary_fields = array( - 'Page.Title' => 'Page', - 'HTTPCode' => 'HTTP Code', - 'Created' => 'Created' - ); - - public static $searchable_fields = array( - 'HTTPCode' => array('title' => 'HTTP Code') - ); - - function canEdit($member = false) { - return false; - } - +class BrokenExternalLinks extends DataObject +{ + + private static $db = array( + 'Link' => 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. + 'HTTPCode' =>'Int' + ); + + private static $has_one = array( + 'Page' => 'Page' + ); + + public static $summary_fields = array( + 'Page.Title' => 'Page', + 'HTTPCode' => 'HTTP Code', + 'Created' => 'Created' + ); + + public static $searchable_fields = array( + 'HTTPCode' => array('title' => 'HTTP Code') + ); + + public function canEdit($member = false) + { + return false; + } } -class BrokenExternalLinksAdmin extends ModelAdmin { - - public static $url_segment = 'broken-external-links-admin'; +class BrokenExternalLinksAdmin extends ModelAdmin +{ - public static $managed_models = array( - 'BrokenExternalLinks' - ); + public static $url_segment = 'broken-external-links-admin'; - public static $menu_title = 'Broken Ext. links'; + public static $managed_models = array( + 'BrokenExternalLinks' + ); - public function init() { - parent::init(); - } + public static $menu_title = 'Broken Ext. links'; + public function init() + { + parent::init(); + } } diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index 1232acf..499c1ab 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -1,75 +1,81 @@ clearTable($table); - else DB::query("TRUNCATE \"$table\""); - $pages = SiteTree::get(); - foreach ($pages as $page) { - $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); + public function run($request) + { + // clear broken external link table + $table = 'BrokenExternalLinks'; + if (method_exists(DB::getConn(), 'clearTable')) { + DB::getConn()->clearTable($table); + } else { + DB::query("TRUNCATE \"$table\""); + } + $pages = SiteTree::get(); + foreach ($pages as $page) { + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); - // Populate link tracking for internal links & links to asset files. - if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) { - $href = Director::makeRelative($link->getAttribute('href')); - if ($href == 'admin/') continue; + // Populate link tracking for internal links & links to asset files. + if ($links = $htmlValue->getElementsByTagName('a')) { + foreach ($links as $link) { + $href = Director::makeRelative($link->getAttribute('href')); + if ($href == 'admin/') { + continue; + } - // ignore SiteTree and assets links as they will be caught by SiteTreeLinkTracking - if(preg_match('/\[sitetree_link,id=([0-9]+)\]/i', $href, $matches)) { - continue; - } else if(substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR.'/') { - continue; - } - if($href && function_exists('curl_init')) { - $handle = curl_init($href); - curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); - $response = curl_exec($handle); - $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); - curl_close($handle); - if (($httpCode < 200 || $httpCode > 302) - || ($href == '' || $href[0] == '/')) - { - $brokenLink = new BrokenExternalLinks(); - $brokenLink->PageID = $page->ID; - $brokenLink->Link = $href; - $brokenLink->HTTPCode = $httpCode; - $brokenLink->write(); + // ignore SiteTree and assets links as they will be caught by SiteTreeLinkTracking + if (preg_match('/\[sitetree_link,id=([0-9]+)\]/i', $href, $matches)) { + continue; + } elseif (substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR.'/') { + continue; + } + if ($href && function_exists('curl_init')) { + $handle = curl_init($href); + curl_setopt($handle, CURLOPT_RETURNTRANSFER, true); + $response = curl_exec($handle); + $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); + curl_close($handle); + if (($httpCode < 200 || $httpCode > 302) + || ($href == '' || $href[0] == '/')) { + $brokenLink = new BrokenExternalLinks(); + $brokenLink->PageID = $page->ID; + $brokenLink->Link = $href; + $brokenLink->HTTPCode = $httpCode; + $brokenLink->write(); - // TODO set the broken link class - /* - $class = $link->getAttribute('class'); - $class = ($class) ? $class . 'ss-broken' : 'ss-broken'; - $link->setAttribute('class', ($class ? "$class ss-broken" : 'ss-broken')); - */ + // TODO set the broken link class + /* + $class = $link->getAttribute('class'); + $class = ($class) ? $class . 'ss-broken' : 'ss-broken'; + $link->setAttribute('class', ($class ? "$class ss-broken" : 'ss-broken')); + */ - // use raw sql query to set broken link as calling the dataobject write - // method will reset the links if no broken internal links are found - $query = "UPDATE \"SiteTree\" SET \"HasBrokenLink\" = 1 "; - $query .= "WHERE \"ID\" = " . (int)$page->ID; - $result = DB::query($query); - if (!$result) { - // error updating hasBrokenLink - } + // use raw sql query to set broken link as calling the dataobject write + // method will reset the links if no broken internal links are found + $query = "UPDATE \"SiteTree\" SET \"HasBrokenLink\" = 1 "; + $query .= "WHERE \"ID\" = " . (int)$page->ID; + $result = DB::query($query); + if (!$result) { + // error updating hasBrokenLink + } + } + } + } + } + } - } - } - } - } - - // run this again if queued jobs exists and is a valid int - $queuedJob = Config::inst()->get('CheckExternalLinks', 'QueuedJob'); - if (isset($queuedJob) && is_int($queuedJob) && class_exists('QueuedJobService')) { - $checkLinks = new CheckExternalLinksJob(); - singleton('QueuedJobService') - ->queueJob($checkLinks, date('Y-m-d H:i:s', time() + $queuedJob)); - } - - } + // run this again if queued jobs exists and is a valid int + $queuedJob = Config::inst()->get('CheckExternalLinks', 'QueuedJob'); + if (isset($queuedJob) && is_int($queuedJob) && class_exists('QueuedJobService')) { + $checkLinks = new CheckExternalLinksJob(); + singleton('QueuedJobService') + ->queueJob($checkLinks, date('Y-m-d H:i:s', time() + $queuedJob)); + } + } }