From 7167b24fadd3ddb953a159720b45eca2e89e7dbc Mon Sep 17 00:00:00 2001 From: Ben Fenster Date: Wed, 6 Nov 2024 09:40:31 -0700 Subject: [PATCH] Enhancements to check-redirects script: have an explicit list of exclusions, and skip anything that's a 'redirect' and not a 'redirect_asis' as those are not so easy to check --- tools/check-redirects.php | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/check-redirects.php b/tools/check-redirects.php index f4eeed7..ba77133 100644 --- a/tools/check-redirects.php +++ b/tools/check-redirects.php @@ -20,6 +20,15 @@ $sites = parse( file( dirname(__DIR__) . '/maps/sites.map' ) ); $checks = []; +$exclude = [ + '_/rsvp', + '_/google', + '_/google/calendar', + '_/google/docs', + '_/google/drive', + '_/google/sites' +]; + $start = time(); $i = 0; @@ -29,12 +38,14 @@ date_default_timezone_set( 'America/New_York' ); echo 'Checking all URLs. This will take about ' . $estimate . '. It is ' . date('H:i') . ' now (Eastern). No changes are made during these checks. Redirects to Check: ' . count($redirects) . "\n"; foreach( $redirects as $key => $url ) { + if ( in_array( $key, $exclude ) ) { + $checks[ $key ] = null; + } $checks[ $key ] = check_url( $url ); if ( $i++ % 20 == 19 ) echo " " . $i . " done (" . (time() - $start) . " seconds)\n"; } - $i = 0; // Note: we do not iterate over sites. This reporting is explicitly for redirects. @@ -43,6 +54,11 @@ $i++; $counter = '[' . str_pad( $i, 3, ' ', STR_PAD_LEFT ) . '/' . str_pad( count($redirects), 3, ' ', STR_PAD_LEFT ) . '] '; + if ( in_array( $key, $exclude ) ) { + echo $counter . $key . " - Excluded from checking\n"; + continue; + } + if ( !isset( $sites[ $key ] ) ) { echo $counter . $key . "\n"; echo " Found in redirects.map but not in sites.map\n"; @@ -66,6 +82,11 @@ $check = $checks[ $key ]; if ( $check ) { + if ( $sites[ $key ] === 'redirect' ) { + echo $counter . $key . " - This is a 'redirect' not a 'redirect_asis'. Skipping.\n"; + continue; + } + if ( preg_match( '/Second order redirect: (.*)$/', $check, $match ) ) { if ( $match[1] == $url . '/' || str_replace( 'http://', 'https://', $url ) == $match[1] ) { // Automatically replace when it's just about a trailing slash or HTTPS @@ -159,6 +180,8 @@ function check_url( $url ) { return null; // Second-order redirects are expected for these URLs. if ( stristr( $redirect, '/wp-app/shibboleth/' ) ) return null; // Second-order redirects are expected for these URLs. + if ( stristr( $redirect, '/Shibboleth.sso/Login/' ) ) + return null; // Second-order redirects are expected for these URLs. return 'Second order redirect: ' . $redirect; }