11import re
22import typing
3+ from urllib .parse import urlparse
34
45import cid
56from multibase import decode
@@ -13,21 +14,27 @@ class Validator:
1314
1415 def __init__ (self , input : typing .Any ):
1516 self .input = input
16- self .pathGatewayPattern = re .compile (
17+ self .path_gateway_pattern = re .compile (
1718 r"^https?://[^/]+/(?P<protocol>ip[fn]s)/(?P<hash>[^/?#]+)"
1819 )
19- self .subdomainGatewayPattern = re .compile (
20+ self .subdomain_gateway_pattern = re .compile (
2021 r"^https?://(?P<hash>[^/]+)\.(?P<protocol>ip[fn]s)\.[^/?]+"
2122 )
22- self .pathPattern = re .compile (r"^/(?P<protocol>ip[fn]s)/(?P<hash>[^/?#]+)" )
23+ self .path_pattern = re .compile (r"^/(?P<protocol>ip[fn]s)/(?P<hash>[^/?#]+)" )
2324
2425 def is_ipfs (self ) -> bool :
2526 """
2627 Returns True if the provided input is a valid IPFS resource/object or False otherwise.
2728 """
28- return self ._is_CID () or self ._is_ipfs_url () or self ._is_ipfs_path ()
29+ return (
30+ self ._is_cid ()
31+ or self ._is_ipfs_url ()
32+ or self ._is_ipns_url ()
33+ or self ._is_ipfs_path ()
34+ or self ._is_ipns_path ()
35+ )
2936
30- def _is_CID (self ) -> bool :
37+ def _is_cid (self ) -> bool :
3138 """
3239 Returns True if the provided string or CID object represents a valid CID or False otherwise.
3340 """
@@ -36,27 +43,25 @@ def _is_CID(self) -> bool:
3643 elif type (self .input ) == bytes :
3744 try :
3845 return cid .is_cid (decode (self .input ))
39- except :
46+ except Exception as error :
47+ print (f"Unexpected { type (error )} , { error } " )
4048 return False
4149 else :
4250 try :
4351 if isinstance (self .input , (cid .CIDv0 , cid .CIDv1 )):
4452 return cid .is_cid (str (self .input ))
45- except :
53+ except Exception as error :
54+ print (f"Unexpected { type (error )} , { error } " )
4655 return False
4756 return False
4857
49- def _is_ipfs_url (self ) -> bool :
50- """
51- Returns True if the provided string is a valid IPFS url or False otherwise.
52- """
53- return self ._ipfs_path_url () or self ._ipfs_subdomain_url ()
54-
5558 def _is_integral_ipfs_url (
5659 self ,
5760 pattern : re .Pattern ,
5861 ) -> bool :
59-
62+ """
63+ Main logic for IPFS URL validation.
64+ """
6065 formatted = str (self .input )
6166 if not formatted :
6267 return False
@@ -70,14 +75,15 @@ def _is_integral_ipfs_url(
7075
7176 _hash = match ["hash" ]
7277
73- if pattern == self .subdomainGatewayPattern :
78+ if pattern == self .subdomain_gateway_pattern :
7479 _hash = _hash .lower ()
7580 try :
7681 if get_codec (_hash ).encoding not in ["base32" , "base36" ]:
7782 return False
78- except :
83+ except Exception as error :
84+ print (f"Unexpected { type (error )} , { error } " )
7985 return False
80- elif pattern == self .pathGatewayPattern :
86+ elif pattern == self .path_gateway_pattern :
8187 if not str (_hash ).startswith ("Qm" ):
8288 try :
8389 if get_codec (_hash ).encoding not in [
@@ -90,34 +96,141 @@ def _is_integral_ipfs_url(
9096 "base58flickr" ,
9197 "base58btc" ,
9298 "base64url" ,
93- "base32" ,
94- "base36" ,
9599 ]:
96100 return False
97- except :
98- pass
101+ except Exception as error :
102+ print (f"Unexpected { type (error )} , { error } " )
103+ return False
99104
100- return Validator (_hash )._is_CID ()
105+ return Validator (_hash )._is_cid ()
101106
102107 def _ipfs_subdomain_url (self ) -> bool :
103108 """
104109 Returns True if the provided url string includes a valid IPFS subdomain (case-insensitive CIDv1) or False otherwise.
105110 """
106111 return self ._is_integral_ipfs_url (
107- self .subdomainGatewayPattern ,
112+ self .subdomain_gateway_pattern ,
108113 )
109114
110115 def _ipfs_path_url (self ) -> bool :
111116 """
112117 Returns True if the provided url string is a valid IPFS URL or False otherwise.
113118 """
114-
115119 return self ._is_integral_ipfs_url (
116- self .pathGatewayPattern ,
120+ self .path_gateway_pattern ,
117121 )
118122
123+ def _is_ipfs_url (self ) -> bool :
124+ """
125+ Returns True if the provided string is a valid IPFS url or False otherwise.
126+ """
127+ return self ._ipfs_path_url () or self ._ipfs_subdomain_url ()
128+
119129 def _is_ipfs_path (self ) -> bool :
120130 """
121- Returns true if the provided string is a valid IPFS path or false otherwise.
131+ Returns True if the provided string is a valid IPFS path or False otherwise.
122132 """
123- return self ._is_integral_ipfs_url (self .pathPattern )
133+ return self ._is_integral_ipfs_url (self .path_pattern )
134+
135+ def _is_integral_ipns_url (
136+ self ,
137+ pattern : re .Pattern ,
138+ ) -> bool :
139+ """
140+ Main logic for IPNS URL validation.
141+ """
142+ formatted = str (self .input )
143+ if not formatted :
144+ return False
145+
146+ match = re .match (pattern , formatted )
147+ if not match :
148+ return False
149+
150+ if match ["protocol" ] != "ipns" :
151+ return False
152+
153+ ipns_id = match ["hash" ]
154+
155+ if ipns_id and pattern == self .subdomain_gateway_pattern :
156+ ipns_id = ipns_id .lower ()
157+
158+ if Validator (ipns_id )._is_cid ():
159+ try :
160+ if get_codec (ipns_id ).encoding == "base36" :
161+ return True
162+ except Exception as error :
163+ print (f"Unexpected { type (error )} , { error } " )
164+ return False
165+ try :
166+ if "." not in ipns_id and "-" in ipns_id :
167+ ipns_id = (
168+ ipns_id .replace ("--" , "@" ).replace ("-" , "." ).replace ("@" , "-" )
169+ )
170+
171+ return self ._id_is_explicit_tld (ipns_id )
172+ except Exception as error :
173+ print (f"Unexpected { type (error )} , { error } " )
174+ return False
175+
176+ elif pattern == self .path_gateway_pattern or pattern == self .path_pattern :
177+ if not str (ipns_id ).startswith ("Qm" ):
178+ if self ._id_is_explicit_tld (ipns_id ):
179+ return True
180+
181+ try :
182+ if get_codec (ipns_id ).encoding not in [
183+ "base2" ,
184+ "base16" ,
185+ "base32" ,
186+ "base32hex" ,
187+ "base36" ,
188+ "base36upper" ,
189+ "base58flickr" ,
190+ "base58btc" ,
191+ "base64url" ,
192+ ]:
193+ return False
194+ except Exception as error :
195+ print (f"Unexpected { type (error )} , { error } " )
196+ return False
197+
198+ return Validator (ipns_id )._is_cid ()
199+
200+ def _ipns_subdomain_url (self ) -> bool :
201+ """
202+ Returns True if the provided url string includes a valid IPFS subdomain (case-insensitive CIDv1) or False otherwise.
203+ """
204+ return self ._is_integral_ipns_url (
205+ self .subdomain_gateway_pattern ,
206+ )
207+
208+ def _ipns_path_url (self ) -> bool :
209+ """
210+ Returns True if the provided url string is a valid IPFS URL or False otherwise.
211+ """
212+ return self ._is_integral_ipns_url (
213+ self .path_gateway_pattern ,
214+ )
215+
216+ def _is_ipns_url (self ) -> bool :
217+ """
218+ Returns True if the provided string is a valid IPFS url or False otherwise.
219+ """
220+ return self ._ipns_path_url () or self ._ipns_subdomain_url ()
221+
222+ def _is_ipns_path (self ) -> bool :
223+ """
224+ Returns True if the provided string is a valid IPNS path or False otherwise.
225+ """
226+ return self ._is_integral_ipns_url (self .path_pattern )
227+
228+ def _id_is_explicit_tld (self , input_string : str ) -> bool :
229+ """
230+ Returns True if the provided url string has an explicit TLD, False otherwise.
231+ """
232+ fqdn_with_tld = re .compile (
233+ r"(?=^.{4,253}\.?$)(^((?!-)[a-zA-Z0-9-]{1,63}(?<!-)\.)+[a-zA-Z]{2,63}$)"
234+ )
235+ hostname = urlparse (f"http://{ input_string } " ).hostname
236+ return bool (re .search (fqdn_with_tld , hostname ))
0 commit comments