@@ -170,6 +170,7 @@ class ID3File(File):
170170 'TSOT' : 'titlesort' ,
171171 'WCOP' : 'license' ,
172172 'WOAR' : 'website' ,
173+ 'WXXX' : 'user_website' ,
173174 'COMM' : 'comment' ,
174175 'TOAL' : 'originalalbum' ,
175176 'TOPE' : 'originalartist' ,
@@ -300,6 +301,8 @@ def _load(self, filename):
300301 for frame in tags .values ():
301302 self ._process_frame (frame , metadata , config_params )
302303
304+ self ._process_link_frames_on_load (tags , metadata )
305+
303306 if 'date' in metadata :
304307 self ._sanitize_date (metadata )
305308
@@ -614,6 +617,12 @@ def _get_tags(self, filename):
614617
615618 def _save_tags (self , tags , filename ):
616619 config = get_config ()
620+
621+ try :
622+ self ._sanitize_id3_frames (tags )
623+ except Exception as e :
624+ log .error ("Error sanitizing ID3 frames: %s" , e )
625+
617626 if config .setting ['write_id3v1' ]:
618627 v1 = 2
619628 else :
@@ -622,11 +631,153 @@ def _save_tags(self, tags, filename):
622631 if config .setting ['write_id3v23' ]:
623632 tags .update_to_v23 ()
624633 separator = config .setting ['id3v23_join_with' ]
625- tags .save (filename , v2_version = 3 , v1 = v1 , v23_sep = separator )
634+ try :
635+ tags .save (filename , v2_version = 3 , v1 = v1 , v23_sep = separator )
636+ except ValueError as e :
637+ if str (e ) == "Invalid frame ID" :
638+ log .warning ("Invalid frame ID error when saving. Removing all LINK frames as fallback..." )
639+ for frame_id in list (tags .keys ()):
640+ if frame_id .startswith ('LINK' ):
641+ del tags [frame_id ]
642+ tags .save (filename , v2_version = 3 , v1 = v1 , v23_sep = separator )
643+ else :
644+ raise
626645 else :
627646 tags .update_to_v24 ()
628647 tags .save (filename , v2_version = 4 , v1 = v1 )
629648
649+ def _sanitize_id3_frames (self , tags ):
650+ """This method attempts to fix various issues with ID3 frames:
651+ 1. Handle invalid LINK frames (convert to WXXX if possible)
652+ 2. Remove null bytes and other illegal characters from frame IDs
653+ Note: In the future, we might consider prompting users about invalid frames."""
654+ to_remove = []
655+ to_add = []
656+ confirmed_problematic_frames = []
657+ extracted_urls = []
658+ for frame_id , frame in list (tags .items ()):
659+ try :
660+ invalid_id = False
661+ if '\x00 ' in frame_id :
662+ invalid_id = True
663+ if frame_id .startswith ('LINK' ):
664+ is_malformed = False
665+ if ':' in frame_id and any (term in frame_id for term in ('http' , 'www.' , '://' )):
666+ is_malformed = True
667+
668+ if hasattr (frame , 'data' ) and frame .data :
669+ frame_data = frame .data .decode ('latin1' , errors = 'ignore' )
670+ # A valid frame should have: URL text string, null byte, then optional data
671+ has_url_pattern = any (term in frame_data for term in ('http' , 'www.' , '://' ))
672+ if frame_data .startswith ('\x00 ' ) and any (term in frame_data [1 :] for term in ('http' , 'www.' , '://' )):
673+ is_malformed = True
674+ elif has_url_pattern and '\x00 ' not in frame_data :
675+ is_malformed = True
676+ elif '\x00 ' in frame_data :
677+ parts = frame_data .split ('\x00 ' , 1 )
678+ if len (parts ) > 1 and any (term in parts [1 ] for term in ('http' , 'www.' , '://' )):
679+ is_malformed = True
680+
681+ if is_malformed :
682+ invalid_id = True
683+ if self ._extract_and_convert_link_frame (frame , frame_id , to_add ):
684+ log .debug ("Successfully converted malformed LINK frame %r to WXXX" , frame_id )
685+ for item in to_add :
686+ if isinstance (item , mutagen .id3 .WXXX ):
687+ extracted_urls .append (item .url )
688+
689+ if invalid_id :
690+ confirmed_problematic_frames .append (frame_id )
691+ to_remove .append (frame_id )
692+
693+ except Exception as e :
694+ log .error ("Error processing frame %r: %s" , frame_id , e )
695+
696+ for frame_id in to_remove :
697+ del tags [frame_id ]
698+ for frame in to_add :
699+ tags .add (frame )
700+
701+ if extracted_urls and hasattr (self , 'metadata' ) and hasattr (self , 'orig_metadata' ):
702+ if 'user_website' not in self .metadata :
703+ self .metadata ['user_website' ] = extracted_urls
704+ if 'user_website' not in self .orig_metadata :
705+ self .orig_metadata ['user_website' ] = extracted_urls
706+
707+ return confirmed_problematic_frames
708+
709+ def _extract_and_convert_link_frame (self , frame , frame_id , to_add ):
710+ """Extract URL from a malformed LINK frame and convert it to a WXXX frame. Handles cases where URLs
711+ are split between frame ID and data or separated by null bytes (common for archive.org)"""
712+
713+ try :
714+ url_parts = []
715+ if hasattr (frame , 'data' ) and frame .data :
716+ frame_data = frame .data .decode ('latin1' , errors = 'ignore' )
717+ if '\x00 ' in frame_data :
718+ parts = frame_data .split ('\x00 ' , 1 )
719+ # Likely valid LINK frame if the first part looks like a URL and second part doesn't contain URL patterns
720+ if (parts [0 ] and any (term in parts [0 ] for term in ('http' , 'www.' , '://' ))
721+ and len (parts ) > 1 and not any (term in parts [1 ] for term in ('http' , 'www.' , '://' ))):
722+ log .debug ("Skipping valid LINK frame with URL %r" , parts [0 ])
723+ return False
724+
725+ if ':' in frame_id :
726+ frame_id_parts = frame_id .split (':' , 1 )
727+ if frame_id_parts [0 ] == 'LINK' :
728+ # Standard case where everything after LINK: is part of the URL
729+ url_parts .append (frame_id_parts [1 ])
730+ elif any (proto in frame_id_parts [0 ] for proto in ('LINKhttp' , 'LINKhtt' , 'LINKwww' )):
731+ # Case where part of the URL protocol is in the frame_id
732+ prefix = 'LINK'
733+ protocol_part = frame_id_parts [0 ][len (prefix ):]
734+ url_parts .append (protocol_part + ':' + frame_id_parts [1 ])
735+
736+ if hasattr (frame , 'data' ) and frame .data :
737+ frame_data = frame .data .decode ('latin1' , errors = 'ignore' ).strip ()
738+ if frame_data :
739+ if '\x00 ' in frame_data :
740+ parts = frame_data .split ('\x00 ' )
741+ for part in parts :
742+ if part .strip () and any (term in part for term in ('http' , 'www.' , '://' )):
743+ url_parts .append (part .strip ())
744+ elif any (term in frame_data for term in ('http' , 'www.' , '://' )):
745+ url_parts .append (frame_data )
746+
747+ combined_url = '' .join (url_parts ).strip (':;,. \t \n \r ' )
748+
749+ if combined_url :
750+ url_match = re .search (r'(?:https?:?(?:/+|\\+)|www\.)[a-zA-Z0-9][-a-zA-Z0-9\.]+\.[a-zA-Z]{2,}(?:/[^\s:;,]*)?' , combined_url )
751+ if url_match :
752+ raw_url = url_match .group (0 )
753+
754+ if raw_url .startswith ('www.' ):
755+ final_url = 'http://' + raw_url
756+ elif raw_url .startswith (('http://' , 'https://' )):
757+ final_url = raw_url
758+ else :
759+ final_url = re .sub (r'^htt:?p' , 'http' , raw_url )
760+ if not final_url .startswith (('http://' , 'https://' )):
761+ if final_url .startswith (':' ):
762+ final_url = 'http' + final_url
763+ elif final_url .startswith ('//' ):
764+ final_url = 'http:' + final_url
765+ elif final_url .startswith ('p://' ):
766+ final_url = 'http://' + final_url [4 :]
767+ else :
768+ final_url = 'http://' + final_url
769+
770+ wxxx_frame = mutagen .id3 .WXXX (encoding = Id3Encoding .LATIN1 , desc = "URL from malformed LINK frame" , url = final_url )
771+ to_add .append (wxxx_frame )
772+ return True
773+
774+ log .warning ("Could not extract URL from malformed LINK frame %r" , frame_id )
775+ return False
776+
777+ except Exception as e :
778+ log .error ("Failed to process LINK frame %r: %s" , frame_id , e )
779+ return False
780+
630781 def format_specific_metadata (self , metadata , tag , settings = None ):
631782 if not settings :
632783 settings = get_config ().setting
@@ -1035,6 +1186,49 @@ def _remove_other_supported_tag(self, tags, real_name):
10351186 """Remove other supported tag from ID3 frames."""
10361187 del tags [real_name ]
10371188
1189+ def _process_link_frames_on_load (self , tags , metadata ):
1190+ """Process malformed LINK frames during initial file load and add them as website URLs."""
1191+
1192+ link_frames = []
1193+ for frame_id , frame in list (tags .items ()):
1194+ if frame_id .startswith ('LINK' ):
1195+ is_potentially_malformed = False
1196+
1197+ if ':' in frame_id and any (term in frame_id for term in ('http' , 'www.' , '://' )):
1198+ is_potentially_malformed = True
1199+
1200+ if hasattr (frame , 'data' ) and frame .data :
1201+ try :
1202+ frame_data = frame .data .decode ('latin1' , errors = 'ignore' )
1203+
1204+ if frame_data .startswith ('\x00 ' ) and any (term in frame_data [1 :] for term in ('http' , 'www.' , '://' )):
1205+ is_potentially_malformed = True
1206+
1207+ elif '\x00 ' in frame_data :
1208+ parts = frame_data .split ('\x00 ' , 1 )
1209+ if len (parts ) > 1 and any (term in parts [1 ] for term in ('http' , 'www.' , '://' )):
1210+ is_potentially_malformed = True
1211+
1212+ elif any (term in frame_data for term in ('http' , 'www.' , '://' )) and '\x00 ' not in frame_data :
1213+ is_potentially_malformed = True
1214+ except Exception :
1215+ is_potentially_malformed = True
1216+
1217+ if is_potentially_malformed :
1218+ link_frames .append ((frame_id , frame ))
1219+
1220+ if not link_frames :
1221+ return
1222+
1223+ to_add = []
1224+ for frame_id , frame in link_frames :
1225+ if self ._extract_and_convert_link_frame (frame , frame_id , to_add ):
1226+ log .debug ("Extracted URL from malformed LINK frame %r during load" , frame_id )
1227+
1228+ for frame in to_add :
1229+ if isinstance (frame , mutagen .id3 .WXXX ):
1230+ url = frame .url
1231+ metadata .add ('user_website' , url )
10381232
10391233class MP3File (ID3File ):
10401234
@@ -1080,10 +1274,26 @@ def _get_tags(self, filename):
10801274
10811275 def _save_tags (self , tags , filename ):
10821276 config = get_config ()
1277+
1278+ try :
1279+ self ._sanitize_id3_frames (tags )
1280+ except Exception as e :
1281+ log .error ("Error sanitizing ID3 frames: %s" , e )
1282+
10831283 if config .setting ['write_id3v23' ]:
10841284 compatid3 .update_to_v23 (tags )
10851285 separator = config .setting ['id3v23_join_with' ]
1086- tags .save (filename , v2_version = 3 , v23_sep = separator )
1286+ try :
1287+ tags .save (filename , v2_version = 3 , v23_sep = separator )
1288+ except ValueError as e :
1289+ if str (e ) == "Invalid frame ID" :
1290+ log .debug ("Invalid frame ID error when saving. Removing all LINK frames as fallback..." )
1291+ for frame_id in list (tags .keys ()):
1292+ if frame_id .startswith ('LINK' ):
1293+ del tags [frame_id ]
1294+ tags .save (filename , v2_version = 3 , v23_sep = separator )
1295+ else :
1296+ raise
10871297 else :
10881298 tags .update_to_v24 ()
10891299 tags .save (filename , v2_version = 4 )
0 commit comments