From 95684d33890fe5779b48cedb77713e863d0db785 Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Mon, 13 Oct 2025 19:05:44 -0700 Subject: [PATCH 1/2] Mark bad `climate.nasa.gov` redirects as 404 NASA went through a year-and-a-half long transition of pages from `climate.nasa.gov` to `science.nasa.gov/climate-change` that concluded a couple weeks ago. Unfortunately, when they finished, they started redirecting `climate.nasa.gov/*` to the new climate change home page instead of to the matching page on the new site, making a bunch of URLs effectively into 404s. --- app/models/version.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/models/version.rb b/app/models/version.rb index b2662ceb..9b99ce11 100644 --- a/app/models/version.rb +++ b/app/models/version.rb @@ -220,6 +220,12 @@ def effective_status # rubocop:disable Metrics/PerceivedComplexity # of climate-related pages to instead of giving them 4xx status codes. return 404 if redirected_to.ends_with?('epa.gov/sites/production/files/signpost/cc.html') + # Special case for climate.nasa.gov getting moved with bad redirects for all the sub-pages. + return 404 if ( + /^https?:\/\/climate.nasa.gov\/.+$/i.match?(url) && + redirected_to.ends_with?('://science.nasa.gov/climate-change/') + ) + # We see a lot of redirects to the root of the same domain when a page is removed. parsed_url = Addressable::URI.parse(url) return 404 if parsed_url.path != '/' && Surt.surt(parsed_url.join('/')) == Surt.surt(redirected_to) From 2625620020a58031905e36ea1b4d6a0af0f701ae Mon Sep 17 00:00:00 2001 From: Rob Brackett Date: Mon, 13 Oct 2025 19:12:45 -0700 Subject: [PATCH 2/2] OBEY RUBCOP --- app/models/version.rb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/models/version.rb b/app/models/version.rb index 9b99ce11..09d10528 100644 --- a/app/models/version.rb +++ b/app/models/version.rb @@ -221,10 +221,8 @@ def effective_status # rubocop:disable Metrics/PerceivedComplexity return 404 if redirected_to.ends_with?('epa.gov/sites/production/files/signpost/cc.html') # Special case for climate.nasa.gov getting moved with bad redirects for all the sub-pages. - return 404 if ( - /^https?:\/\/climate.nasa.gov\/.+$/i.match?(url) && - redirected_to.ends_with?('://science.nasa.gov/climate-change/') - ) + return 404 if /^https?:\/\/climate.nasa.gov\/.+$/i.match?(url) && + redirected_to.ends_with?('://science.nasa.gov/climate-change/') # We see a lot of redirects to the root of the same domain when a page is removed. parsed_url = Addressable::URI.parse(url)