Skip to content

Commit f628f6a

Browse files
authored
Implement IPFS URL & subdomain validation (#12)
1 parent 9d0c639 commit f628f6a

File tree

9 files changed

+179
-54
lines changed

9 files changed

+179
-54
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@ repos:
1010
- id: end-of-file-fixer
1111

1212
- repo: https://github.com/psf/black
13-
rev: 22.1.0
13+
rev: 22.3.0
1414
hooks:
1515
- id: black

README.MD

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# py-is_ipfs
2-
[![Latest release](https://img.shields.io/pypi/v/py-is_ipfs?color=blue&label=release)](https://github.com/Barabazs/py-is_ipfs/releases/latest)
3-
![GitHub Workflow Status](https://img.shields.io/github/workflow/status/Barabazs/py-is_ipfs/Test?label=Test&logo=github)
2+
[![Latest release](https://img.shields.io/pypi/v/py-is_ipfs?color=blue&label=release)](https://github.com/Barabazs/py-is_ipfs/releases/latest) ![GitHub Workflow Status](https://img.shields.io/github/workflow/status/Barabazs/py-is_ipfs/Test?label=Test&logo=github)
43

54
py-is_ipfs is a Python library to identify valid IPFS resources.
65
This project is a work in progress.
@@ -9,7 +8,7 @@ This project is a work in progress.
98
### Installation
109
py-is_ipfs is available on PyPI:
1110

12-
`python -m pip install py_is-ipfs`
11+
`python -m pip install py-is_ipfs`
1312

1413
### Usage
1514

@@ -23,13 +22,17 @@ print(Validator("QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o").is_ipfs())
2322
* [x] v0
2423
* [x] v1
2524
* [ ] URL
26-
* [ ] IPFS
25+
* [x] IPFS
2726
* [ ] IPNS
2827
* [ ] Subdomain
29-
* [ ] IPFS
28+
* [x] IPFS
3029
* [ ] IPNS
3130
* [ ] Path
3231
* [ ] ...
3332

3433
## License
3534
[MIT](https://github.com/Barabazs/py-is_ipfs/blob/main/LICENSE)
35+
36+
## Acknowledgments
37+
* [Protocol Labs](https://protocol.ai/) for funding this project with a micro grant.
38+
* [ipfs-shipyard/is-ipfs](https://github.com/ipfs-shipyard/is-ipfs) for the heuristics implemented in JavaScript that this project used as a starting point.

is_ipfs/is_ipfs.py

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
import typing
23

34
import cid
@@ -11,16 +12,22 @@ class Validator:
1112

1213
def __init__(self, input: typing.Any):
1314
self.input = input
15+
self.pathGatewayPattern = re.compile(
16+
r"^https?://[^/]+/(?P<protocol>ip[fn]s)/(?P<hash>[^/?#]+)"
17+
)
18+
self.subdomainGatewayPattern = re.compile(
19+
r"^https?://(?P<hash>[^/]+)\.(?P<protocol>ip[fn]s)\.[^/?]+"
20+
)
1421

1522
def is_ipfs(self) -> bool:
1623
"""
17-
Returns True if the provided input is a valid IPFS CID or False otherwise.
24+
Returns True if the provided input is a valid IPFS resource/object or False otherwise.
1825
"""
19-
return self._is_CID()
26+
return self._is_CID() or self._is_ipfs_url()
2027

2128
def _is_CID(self) -> bool:
2229
"""
23-
Returns true if the provided string or CID object represents a valid CID or false otherwise.
30+
Returns True if the provided string or CID object represents a valid CID or False otherwise.
2431
"""
2532
if type(self.input) == str:
2633
return cid.is_cid(self.input)
@@ -36,3 +43,49 @@ def _is_CID(self) -> bool:
3643
except:
3744
return False
3845
return False
46+
47+
def _is_ipfs_url(self) -> bool:
48+
"""
49+
Returns True if the provided string is a valid IPFS url or False otherwise.
50+
"""
51+
return self._ipfs_path_url() or self._ipfs_subdomain_url()
52+
53+
def _is_integral_ipfs_url(
54+
self,
55+
pattern: re.Pattern,
56+
) -> bool:
57+
58+
formatted = str(self.input)
59+
if not formatted:
60+
return False
61+
62+
match = re.match(pattern, formatted)
63+
if not match:
64+
return False
65+
66+
if match["protocol"] != "ipfs":
67+
return False
68+
69+
_hash = match["hash"]
70+
71+
if pattern == self.subdomainGatewayPattern:
72+
_hash = _hash.lower()
73+
74+
return Validator(_hash)._is_CID()
75+
76+
def _ipfs_subdomain_url(self) -> bool:
77+
"""
78+
Returns True if the provided url string includes a valid IPFS subdomain (case-insensitive CIDv1) or False otherwise.
79+
"""
80+
return self._is_integral_ipfs_url(
81+
self.subdomainGatewayPattern,
82+
)
83+
84+
def _ipfs_path_url(self) -> bool:
85+
"""
86+
Returns True if the provided url string is a valid IPFS URL or False otherwise.
87+
"""
88+
89+
return self._is_integral_ipfs_url(
90+
self.pathGatewayPattern,
91+
)

requirements-dev.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
pre-commit==2.17.0
2-
black==22.1.0
2+
black==22.3.0

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
setuptools.setup(
88
name="py-is_ipfs",
9-
version="0.0.3",
9+
version="0.0.4",
1010
description="Python library to identify valid IPFS resources",
1111
long_description=long_description,
1212
long_description_content_type="text/markdown",

tests/integration/test_is_ipfs.py

Lines changed: 32 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,38 @@
11
from is_ipfs import Validator
2-
import cid
32
import unittest
3+
import tests.testing_data as testing_data
44

55

66
class TestCase(unittest.TestCase):
7-
def test_is_ipfs(self):
8-
# Test valid CIDv0 strings
9-
self.assertTrue(
10-
Validator("QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o").is_ipfs()
11-
)
12-
# Test invalid CIDv0 strings
13-
self.assertFalse(
14-
Validator("QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE70").is_ipfs()
15-
)
16-
17-
# Test valid CIDv1 strings
18-
self.assertTrue(
19-
Validator(
20-
"bafybeie2reiz2q6rbcuwpy2etyztjnceolu4rdi7rp3th2lsky4r5ckeey"
21-
).is_ipfs()
22-
)
23-
24-
# Test invalid CIDv1 strings
25-
self.assertFalse(
26-
Validator(
27-
"bafybeie2reiz2q6rbcuwpy2etyztjnceolu4rdi7rp3th2lsky4r5ckee"
28-
).is_ipfs()
29-
)
30-
31-
# Test invalid misc input
32-
self.assertFalse(Validator("").is_ipfs())
33-
self.assertFalse(Validator(1345).is_ipfs())
34-
35-
self.assertTrue(
36-
Validator(
37-
cid.from_string("QmYjtig7VJQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o")
38-
).is_ipfs()
39-
)
40-
self.assertFalse(
41-
Validator(
42-
cid.CIDv0("QmYjtig7VQ6XsnUjqqJvj7QaMcCAwtrgNdahSiFofrE7o")
43-
).is_ipfs()
44-
)
45-
self.assertFalse(Validator(cid.CIDv0("dfmqjdmfkjqdm")).is_ipfs())
46-
47-
48-
if __name__ == "__main__": # pragma: no cover
7+
def test_all(self):
8+
9+
with self.subTest("Test valid CID entries from fixtures"):
10+
for key, value in testing_data.valid_entries["cid"].items():
11+
for entries in value:
12+
self.assertTrue(Validator(entries).is_ipfs())
13+
14+
with self.subTest("Test invalid CID entries from fixtures"):
15+
for key, value in testing_data.invalid_entries["cid"].items():
16+
for entries in value:
17+
self.assertFalse(Validator(entries).is_ipfs())
18+
19+
with self.subTest("Test valid IPFS URL entries from fixtures"):
20+
for key, value in testing_data.valid_entries["url"].items():
21+
for entries in value:
22+
self.assertTrue(Validator(entries).is_ipfs())
23+
24+
with self.subTest("Test invalid IPFS URL entries from fixtures"):
25+
for entry in testing_data.invalid_entries["url"]["ipfs"]:
26+
self.assertFalse(Validator(entry).is_ipfs())
27+
28+
with self.subTest("Test valid IPFS subdomain entries from fixtures"):
29+
for entry in testing_data.valid_entries["subdomain"]["ipfs"]:
30+
self.assertTrue(Validator(entry).is_ipfs())
31+
32+
with self.subTest("Test invalid IPFS subdomain entries from fixtures"):
33+
for entry in testing_data.invalid_entries["subdomain"]["ipfs"]:
34+
self.assertFalse(Validator(entry).is_ipfs())
35+
36+
37+
if __name__ == "__main__":
4938
unittest.main()

tests/testing_data.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,23 @@
4747
"f01701220c3c4733ec8affd06cf9e9ff50ffc6bcd2ec85a6170004bb709669c31de94391a",
4848
],
4949
},
50+
"url": {
51+
"ipfs": [
52+
"http://ipfs.io/ipfs/QmYHNYAaYK5hm3ZhZFx5W9H6xydKDGimjdgJMrMSdnctEm?arg=val#hash",
53+
"http://ipfs.alexandria.media/ipfs/QmeWz9YZEeNFXQhHg4PnR5ZiNr5isttgi5n1tc1eD5EfGU/content/index.html?arg=val#hash",
54+
"http://ipfs.io/ipfs/QmYHNYAaYK5hm3ZhZFx5W9H6xydKDGimjdgJMrMSdnctEm",
55+
"https://gateway.pinata.cloud/ipfs/Qmb4sw3sqA7AZsaRZ7vtMwxCduk1ExJL5gVDPcpnP8kxFK/",
56+
"https://gateway.pinata.cloud/ipfs/bafybeif5dwlk2sdx5yge4azff2ovsnar63cu37ncw4n24vnqixwamwhxui/",
57+
"https://bafybeif5dwlk2sdx5yge4azff2ovsnar63cu37ncw4n24vnqixwamwhxui.ipfs.dweb.link/",
58+
],
59+
},
60+
"subdomain": {
61+
"ipfs": [
62+
"http://bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va.ipfs.dweb.link",
63+
"http://bafybeidvtwx54qr44kidymvhfzefzxhgkieigwth6oswk75zhlzjdmunoy.ipfs.dweb.link/linkify-demo.html",
64+
"http://bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va.ipfs.localhost:8080",
65+
],
66+
},
5067
}
5168
invalid_entries = {
5269
"cid": {
@@ -67,4 +84,30 @@
6784
encode("base16", "QmNQuBJ8g4QN6mSLXHekxBbcToPwKxamWNrDdEugxMTDd"),
6885
]
6986
},
87+
"url": {
88+
"ipfs": [
89+
"http://ipfs.io/ipns/github.com/",
90+
"https://Qmb4sw3sqA7AZsaRZ7vtMwxCduk1ExJL5gVDPcpnP8kxFK.ipfs.dweb.link/",
91+
"http://ipfs.io/ipns/QmYHNYAaYK5hm3ZhZFx5W9H6xydKDGimjdgJMrMSdnctEm",
92+
"https://github.com/ipfs/js-ipfs/blob/master/README.md",
93+
"https://google.com",
94+
"http://ipfs.io/ipns/github.com/",
95+
"https://github.com/ipfs/js-ipfs/blob/master/README.md",
96+
"http://ipfs.io/ipns/github.com/",
97+
"https://github.com/ipfs/js-ipfs/blob/master/README.md",
98+
],
99+
},
100+
"subdomain": {
101+
"ipfs": [
102+
"http://bafybeiabc2xofh6tdi6vutusorpumwcikw3hf3st4ecjugo6j52f6xwc6q.ipns.dweb.link",
103+
"http://not-a-cid.ipfs.dweb.link",
104+
"http://QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR.ipfs.dweb.link",
105+
"http://bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va.dweb.link",
106+
"http://QmcNioXSC1bfJj1dcFErhUfyjFzoX2HodkRccsFFVJJvg8.ipns.dweb.link",
107+
"http://bafybeiabc2xofh6tdi6vutusorpumwcikw3hf3st4ecjugo6j52f6xwc6q.dweb.link",
108+
"http://invalid-hostname-.ipns.dweb.link",
109+
"http://www.bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va.ipfs.dweb.link",
110+
"http://not-a-cid-or-valid-hostname-.ipns.dweb.link",
111+
],
112+
},
70113
}

tests/unit/test_subdomain.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from is_ipfs import Validator
2+
import unittest
3+
import tests.testing_data as testing_data
4+
5+
6+
class TestCase(unittest.TestCase):
7+
def test_ipfs_subdomain(self):
8+
with self.subTest("Test valid IPFS subdomain entries from fixtures"):
9+
for entry in testing_data.valid_entries["subdomain"]["ipfs"]:
10+
self.assertTrue(Validator(entry)._ipfs_subdomain_url())
11+
12+
with self.subTest("Test invalid IPFS subdomain entries from fixtures"):
13+
for entry in testing_data.invalid_entries["subdomain"]["ipfs"]:
14+
self.assertFalse(Validator(entry)._ipfs_subdomain_url())
15+
16+
17+
if __name__ == "__main__": # pragma: no cover
18+
unittest.main()

tests/unit/test_url.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from is_ipfs import Validator
2+
import unittest
3+
import tests.testing_data as testing_data
4+
5+
6+
class TestCase(unittest.TestCase):
7+
def test_ipfs_url(self):
8+
with self.subTest("Test valid IPFS URL entries from fixtures"):
9+
for entry in testing_data.valid_entries["url"]["ipfs"]:
10+
print(entry)
11+
self.assertTrue(Validator(entry)._is_ipfs_url())
12+
13+
with self.subTest("Test invalid IPFS URL entries from fixtures"):
14+
for entry in testing_data.invalid_entries["url"]["ipfs"]:
15+
self.assertFalse(Validator(entry)._is_ipfs_url())
16+
17+
18+
if __name__ == "__main__": # pragma: no cover
19+
unittest.main()

0 commit comments

Comments
 (0)