Skip to content

Commit d7a08a1

Browse files
committed
Replace urlparse with urlsplit
I learned recently that `urlsplit` is generally more correct/up-to-date. It's also much faster, although URL parsing is really not the bottleneck here.
1 parent 03ff0c4 commit d7a08a1

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

analyst_sheets/analyze.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import os.path
2626
import re
2727
import sys
28-
from urllib.parse import urljoin, urlparse
28+
from urllib.parse import urljoin, urlsplit
2929
from web_monitoring_diff import (html_source_diff, html_text_diff,
3030
links_diff_json)
3131

@@ -102,7 +102,7 @@ def is_fetchable(url):
102102

103103

104104
def is_allowed_extension(url):
105-
extension = os.path.splitext(urlparse(url).path)[1]
105+
extension = os.path.splitext(urlsplit(url).path)[1]
106106
return not extension or extension not in DISALLOWED_EXTENSIONS
107107

108108

@@ -341,7 +341,7 @@ def get_version_status(version: dict) -> int:
341341
redirects, _, _ = get_redirects(version)
342342
if (
343343
redirects
344-
and urlparse(url).path != '/'
344+
and urlsplit(url).path != '/'
345345
and surt(urljoin(url, '/')) == surt(redirects[-1])
346346
):
347347
return 404
@@ -526,7 +526,7 @@ def analyze_redirects(page, a, b):
526526

527527

528528
def is_home_page(page):
529-
url_path = urlparse(page['url']).path
529+
url_path = urlsplit(page['url']).path
530530
return True if ROOT_PAGE_PATTERN.match(url_path) else False
531531

532532

0 commit comments

Comments
 (0)