@@ -21,31 +21,46 @@ def __init__(self, parse_options=None):
2121
2222 def read (self , file ):
2323 """Reads the captions file."""
24- content = self ._read_content ( file )
24+ content = self ._get_content_from_file ( file_path = file )
2525 self ._validate (content )
2626 self ._parse (content )
2727
2828 return self
2929
30- def _read_content (self , file ):
30+ def read_from_buffer (self , buffer ):
31+ content = self ._read_content_lines (buffer )
32+ self ._validate (content )
33+ self ._parse (content )
34+
35+ return self
3136
32- first_bytes = min (32 , os .path .getsize (file ))
33- with open (file , 'rb' ) as f :
37+ def _get_content_from_file (self , file_path ):
38+ encoding = self ._read_file_encoding (file_path )
39+ with open (file_path , encoding = encoding ) as f :
40+ return self ._read_content_lines (f )
41+
42+ def _read_file_encoding (self , file_path ):
43+ first_bytes = min (32 , os .path .getsize (file_path ))
44+ with open (file_path , 'rb' ) as f :
3445 raw = f .read (first_bytes )
3546
3647 if raw .startswith (codecs .BOM_UTF8 ):
37- encoding = 'utf-8-sig'
48+ return 'utf-8-sig'
3849 else :
39- encoding = 'utf-8'
50+ return 'utf-8'
51+
52+ def _read_content_lines (self , file_obj ):
4053
41- with open (file , encoding = encoding ) as f :
42- lines = [line .rstrip ('\n ' ) for line in f .readlines ()]
54+ lines = [line .rstrip ('\n ' ) for line in file_obj .readlines ()]
4355
4456 if not lines :
4557 raise MalformedFileError ('The file is empty.' )
4658
4759 return lines
4860
61+ def _read_content (self , file ):
62+ return self ._get_content_from_file (file_path = file )
63+
4964 def _parse_timeframe_line (self , line ):
5065 """Parse timeframe line and return start and end timestamps."""
5166 tf = self ._validate_timeframe_line (line )
0 commit comments