Skip to content

Commit 8f6a4ea

Browse files
authored
Merge pull request #23 from omerholz/read_from_memory
let the parser read content either from a file or from a file-like object
2 parents 2e7418f + 730f358 commit 8f6a4ea

File tree

4 files changed

+55
-8
lines changed

4 files changed

+55
-8
lines changed

tests/webvtt.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import webvtt
66
from webvtt.structures import Caption, Style
77
from .generic import GenericParserTestCase
8+
from webvtt.errors import MalformedFileError
89

910

1011
BASE_DIR = os.path.dirname(__file__)
@@ -222,6 +223,28 @@ def test_manipulate_lines(self):
222223
'Caption line #1 updated'
223224
)
224225

226+
def test_read_file_buffer(self):
227+
with open(self._get_file('sample.vtt'), 'r', encoding='utf-8') as f:
228+
vtt = webvtt.read_buffer(f)
229+
self.assertIsInstance(vtt.captions, list)
230+
231+
def test_read_memory_buffer(self):
232+
payload = ''
233+
with open(self._get_file('sample.vtt'), 'r', encoding='utf-8') as f:
234+
payload = f.read()
235+
236+
buffer = io.StringIO(payload)
237+
vtt = webvtt.read_buffer(buffer)
238+
self.assertIsInstance(vtt.captions, list)
239+
240+
def test_read_malformed_buffer(self):
241+
malformed_payloads = ['', 'MOCK MELFORMED CONTENT']
242+
for payload in malformed_payloads:
243+
buffer = io.StringIO(payload)
244+
with self.assertRaises(MalformedFileError):
245+
webvtt.read_buffer(buffer)
246+
247+
225248
def test_captions(self):
226249
vtt = webvtt.read(self._get_file('sample.vtt'))
227250
self.assertIsInstance(vtt.captions, list)

webvtt/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
__all__ = webvtt.__all__ + segmenter.__all__ + structures.__all__ + errors.__all__
99

1010
read = WebVTT.read
11+
read_buffer = WebVTT.read_buffer
1112
from_srt = WebVTT.from_srt
1213
from_sbv = WebVTT.from_sbv
1314
list_formats = WebVTT.list_formats

webvtt/parsers.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,31 +21,46 @@ def __init__(self, parse_options=None):
2121

2222
def read(self, file):
2323
"""Reads the captions file."""
24-
content = self._read_content(file)
24+
content = self._get_content_from_file(file_path=file)
2525
self._validate(content)
2626
self._parse(content)
2727

2828
return self
2929

30-
def _read_content(self, file):
30+
def read_from_buffer(self, buffer):
31+
content = self._read_content_lines(buffer)
32+
self._validate(content)
33+
self._parse(content)
34+
35+
return self
3136

32-
first_bytes = min(32, os.path.getsize(file))
33-
with open(file, 'rb') as f:
37+
def _get_content_from_file(self, file_path):
38+
encoding = self._read_file_encoding(file_path)
39+
with open(file_path, encoding=encoding) as f:
40+
return self._read_content_lines(f)
41+
42+
def _read_file_encoding(self, file_path):
43+
first_bytes = min(32, os.path.getsize(file_path))
44+
with open(file_path, 'rb') as f:
3445
raw = f.read(first_bytes)
3546

3647
if raw.startswith(codecs.BOM_UTF8):
37-
encoding = 'utf-8-sig'
48+
return 'utf-8-sig'
3849
else:
39-
encoding = 'utf-8'
50+
return 'utf-8'
51+
52+
def _read_content_lines(self, file_obj):
4053

41-
with open(file, encoding=encoding) as f:
42-
lines = [line.rstrip('\n') for line in f.readlines()]
54+
lines = [line.rstrip('\n') for line in file_obj.readlines()]
4355

4456
if not lines:
4557
raise MalformedFileError('The file is empty.')
4658

4759
return lines
4860

61+
def _read_content(self, file):
62+
return self._get_content_from_file(file_path=file)
63+
4964
def _parse_timeframe_line(self, line):
5065
"""Parse timeframe line and return start and end timestamps."""
5166
tf = self._validate_timeframe_line(line)

webvtt/webvtt.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ def read(cls, file):
6060
parser = WebVTTParser().read(file)
6161
return cls(file=file, captions=parser.captions, styles=parser.styles)
6262

63+
@classmethod
64+
def read_buffer(cls, buffer):
65+
"""Reads a WebVTT captions from a file-like object.
66+
Such file-like object may be the return of an io.open call,
67+
io.StringIO object, tempfile.TemporaryFile object, etc."""
68+
parser = WebVTTParser().read_from_buffer(buffer)
69+
return cls(captions=parser.captions, styles=parser.styles)
70+
6371
def _get_output_file(self, output, extension='vtt'):
6472
if not output:
6573
if not self.file:

0 commit comments

Comments
 (0)