Replace urlparse with urlsplit

Mr0grog · Mr0grog · commit d7a08a1bcb30 · 2025-10-10T12:06:06.000-07:00
I learned recently that `urlsplit` is generally more correct/up-to-date. It's also much faster, although URL parsing is really not the bottleneck here.
diff --git a/analyst_sheets/analyze.py b/analyst_sheets/analyze.py
@@ -25,7 +25,7 @@
 import os.path
 import re
 import sys
-from urllib.parse import urljoin, urlparse
+from urllib.parse import urljoin, urlsplit
 from web_monitoring_diff import (html_source_diff, html_text_diff,
                                  links_diff_json)
 
@@ -102,7 +102,7 @@ def is_fetchable(url):
 
 
 def is_allowed_extension(url):
-    extension = os.path.splitext(urlparse(url).path)[1]
+    extension = os.path.splitext(urlsplit(url).path)[1]
     return not extension or extension not in DISALLOWED_EXTENSIONS
 
 
@@ -341,7 +341,7 @@ def get_version_status(version: dict) -> int:
     redirects, _, _ = get_redirects(version)
     if (
         redirects
-        and urlparse(url).path != '/'
+        and urlsplit(url).path != '/'
         and surt(urljoin(url, '/')) == surt(redirects[-1])
     ):
         return 404
@@ -526,7 +526,7 @@ def analyze_redirects(page, a, b):
 
 
 def is_home_page(page):
-    url_path = urlparse(page['url']).path
+    url_path = urlsplit(page['url']).path
     return True if ROOT_PAGE_PATTERN.match(url_path) else False