Skip to content

Commit 710e43a

Browse files
committed
Bugfix: path normalization applied to URLs
The `-r` and `-c` path normalization logic did not check if the requested requirements or constraint files were remote files being loaded, e.g., via HTTPS. As a result, a usage like `-c https://example.com/constraints.txt` incorrectly got converted to a path and normalized, stripping one of the two slashes which separates the scheme from the netloc. A regression test is added which reproduces the bug, and the gap in detection is closed by checking explicitly if inputs can parse as URIs. Any given URI (including file URIs) will be exempted from the path rewrites. The check is implemented by calling `urllib.parse.urlparse()` and checking the `scheme` attribute of the parse result.
1 parent 6e0ef33 commit 710e43a

File tree

3 files changed

+106
-0
lines changed

3 files changed

+106
-0
lines changed

changelog.d/2223.bugfix.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fixed a bug which removed slashes from URLs in ``-r`` and ``-c`` in the output
2+
of ``pip-compile`` -- by {user}`sirosen`.

piptools/_compat/pip_compat.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import optparse
44
import pathlib
5+
import urllib.parse
56
from dataclasses import dataclass
67
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Set, cast
78

@@ -140,6 +141,10 @@ def _relativize_comes_from_location(original_comes_from: str, /) -> str:
140141
# split on the space
141142
prefix, space_sep, suffix = original_comes_from.partition(" ")
142143

144+
# if the value part is a URI, return the original
145+
if _is_uri(suffix):
146+
return original_comes_from
147+
143148
file_path = pathlib.Path(suffix)
144149

145150
# if the path was not absolute, normalize to posix-style and finish processing
@@ -169,11 +174,34 @@ def _normalize_comes_from_location(original_comes_from: str, /) -> str:
169174
# split on the space
170175
prefix, space_sep, suffix = original_comes_from.partition(" ")
171176

177+
# if the value part is a URI, return the original
178+
if _is_uri(suffix):
179+
return original_comes_from
180+
172181
# convert to a posix-style path
173182
suffix = pathlib.Path(suffix).as_posix()
174183
return f"{prefix}{space_sep}{suffix}"
175184

176185

186+
def _is_uri(value: str) -> bool:
187+
"""
188+
Test a string to see if it is a URI.
189+
190+
The test is performed by trying a URL parse and seeing is a scheme is populated.
191+
192+
This means that according to this rule, valid URLs such as
193+
``example.com/data/my_pip_constraints.txt`` may fail to count as URIs.
194+
However, we cannot safely distinguish such strings from real filesystem paths.
195+
e.g., ``./example.com/`` may be a directory.
196+
197+
Importantly, realistic usage such as
198+
``-c https://example.com/constraints.txt``
199+
is properly detected by this technique.
200+
"""
201+
parse_result = urllib.parse.urlparse(value)
202+
return parse_result.scheme != ""
203+
204+
177205
def create_wheel_cache(cache_dir: str, format_control: str | None = None) -> WheelCache:
178206
kwargs: dict[str, str | None] = {"cache_dir": cache_dir}
179207
if PIP_VERSION[:2] <= (23, 0):

tests/test_cli_compile.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from unittest.mock import MagicMock
1515

1616
import pytest
17+
from pip._internal.network.session import PipSession
1718
from pip._internal.req.constructors import install_req_from_line
1819
from pip._internal.utils.hashes import FAVORITE_HASH
1920
from pip._internal.utils.urls import path_to_url
@@ -4044,3 +4045,78 @@ def test_second_order_requirements_relative_path_in_separate_dir(
40444045
# via -r {output_path}
40454046
"""
40464047
)
4048+
4049+
4050+
@pytest.mark.parametrize(
4051+
"input_path_absolute", (True, False), ids=("absolute-input", "relative-input")
4052+
)
4053+
def test_url_constraints_are_not_treated_as_file_paths(
4054+
pip_conf,
4055+
make_package,
4056+
runner,
4057+
tmp_path,
4058+
monkeypatch,
4059+
input_path_absolute,
4060+
):
4061+
"""
4062+
Test normalization of ``-c`` constraints when the constraints are HTTPS URLs.
4063+
The constraints should be preserved verbatim.
4064+
4065+
This is a regression test for
4066+
https://github.com/jazzband/pip-tools/issues/2223
4067+
"""
4068+
reqs_in = tmp_path / "requirements.in"
4069+
constraints_url = "https://example.com/files/common_constraints.txt"
4070+
reqs_in.write_text(
4071+
f"""
4072+
small-fake-a
4073+
-c {constraints_url}
4074+
"""
4075+
)
4076+
4077+
input_dir_path = tmp_path if input_path_absolute else pathlib.Path(".")
4078+
input_path = (input_dir_path / "requirements.in").as_posix()
4079+
4080+
# TODO: find a better way of mocking the callout to get the constraints
4081+
# file (use `responses`?)
4082+
#
4083+
# we need a mock response for `GET https://...` as fetched by pip
4084+
# although this is fragile, it can be adapted if pip changes
4085+
def fake_url_get(url):
4086+
response = mock.Mock()
4087+
response.reason = "Ok"
4088+
response.status_code = 200
4089+
response.url = url
4090+
response.text = "small-fake-a==0.2"
4091+
return response
4092+
4093+
mock_get = mock.Mock(side_effect=fake_url_get)
4094+
4095+
with monkeypatch.context() as revertable_ctx:
4096+
revertable_ctx.chdir(tmp_path)
4097+
with mock.patch.object(PipSession, "get", mock_get):
4098+
out = runner.invoke(
4099+
cli,
4100+
[
4101+
"--output-file",
4102+
"-",
4103+
"--quiet",
4104+
"--no-header",
4105+
"--no-emit-options",
4106+
"-r",
4107+
input_path,
4108+
],
4109+
)
4110+
4111+
# sanity check, pip should have tried to fetch the constraints
4112+
mock_get.assert_called_once_with(constraints_url)
4113+
4114+
assert out.exit_code == 0
4115+
assert out.stdout == dedent(
4116+
f"""\
4117+
small-fake-a==0.2
4118+
# via
4119+
# -c {constraints_url}
4120+
# -r {input_path}
4121+
"""
4122+
)

0 commit comments

Comments
 (0)