Skip to content

Commit 207bfb9

Browse files
Merge pull request #451 from TeamMsgExtractor/next-release
Version 0.53.1
2 parents 373f6c1 + efd2a7a commit 207bfb9

File tree

10 files changed

+131
-20
lines changed

10 files changed

+131
-20
lines changed

.github/workflows/python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
strategy:
1515
fail-fast: false
1616
matrix:
17-
python-version: ["3.8", "3.9", "3.10", "3.11"]
17+
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
1818

1919
steps:
2020
- uses: actions/checkout@v4

CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,17 @@
1+
**v0.53.1**
2+
* Expanded allowable range for `red-black-tree-mod`.
3+
* Fix issue with `MessageBase.asEmailMessage()` that prevented embedded MSG files from being attached.
4+
* Expand allowable versions of `BeautifulSoup4`.
5+
6+
**v0.53.0**
7+
* Added tests for many functions in `extract_msg.utils`.
8+
* Fix an issue in `extract_msg.utils.msgPathToString()` that prevented backslashes from being replaced with forward slashes.
9+
* Change the behavior of `extract_msg.utils.minutesToDurationStr()` to properly use plurals.
10+
* Fixed issue in `extract_msg.utils.unwrapMsg()` that would prevent it from working on signed messages due to an API change.
11+
* Added new exception `MimetypeFailureError`.
12+
* Modified the logic of `MessageBase.asEmailMessage()` to use `AttachmentBase/SignedAttachment.name` instead of `getFilename()` which only exists on AttachmentBase.
13+
* Modified the logic of `MessageBase.htmlBodyPrepared()` to properly put the mimetype in image tags to ensure rendering. Logic was also modified to use `encode` instead of `prettify` to reduce computation and output size.
14+
115
**v0.52.0**
216
* [[TeamMsgExtractor #444](https://github.com/TeamMsgExtractor/msg-extractor/issues/444)] Fix typo in string that prevented HTML body from generating from the plain text body properly.
317
* Adjusted the behavior of `MSGFile.areStringsUnicode` to prioritize the property specified by the parent MSG files for MSG files that are embedded. Additionally, added a fallback to rely on whether or not there is a stream using the `001F` type to determine the property value if it is entirely missing.

README.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,8 @@ your access to the newest major version of extract-msg.
260260
.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
261261
:target: LICENSE.txt
262262

263-
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.52.0-blue.svg
264-
:target: https://pypi.org/project/extract-msg/0.52.0/
263+
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.53.1-blue.svg
264+
:target: https://pypi.org/project/extract-msg/0.53.1/
265265

266266
.. |PyPI2| image:: https://img.shields.io/badge/python-3.8+-brightgreen.svg
267267
:target: https://www.python.org/downloads/release/python-3810/

extract_msg/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
# along with this program. If not, see <http://www.gnu.org/licenses/>.
2828

2929
__author__ = 'Destiny Peterson & Matthew Walker'
30-
__date__ = '2024-10-22'
31-
__version__ = '0.52.0'
30+
__date__ = '2025-02-05'
31+
__version__ = '0.53.1'
3232

3333
__all__ = [
3434
# Modules:

extract_msg/exceptions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ class InvalidPropertyIdError(ExMsgBaseException):
9292
The provided property ID was invalid.
9393
"""
9494

95+
class MimetypeFailureError(ExMsgBaseException):
96+
"""
97+
The mimetype was unable to be properly determined when it was mandatory.
98+
"""
99+
95100
class NotWritableError(ExMsgBaseException):
96101
"""
97102
Modification was attempted on an instance that is not writable.

extract_msg/msg_classes/message_base.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@
3939
)
4040
from ..exceptions import (
4141
ConversionError, DataNotFoundError, DeencapMalformedData,
42-
DeencapNotEncapsulated, IncompatibleOptionsError, WKError
42+
DeencapNotEncapsulated, IncompatibleOptionsError, MimetypeFailureError,
43+
WKError
4344
)
4445
from .msg import MSGFile
4546
from ..structures.report_tag import ReportTag
@@ -178,13 +179,10 @@ def asEmailMessage(self) -> EmailMessage:
178179
if att.dataType:
179180
if hasattr(att.dataType, 'asEmailMessage'):
180181
# Replace the extension with '.eml'.
181-
filename = att.getFilename()
182+
filename = att.name or ''
182183
if filename.lower().endswith('.msg'):
183184
filename = filename[:-4] + '.eml'
184-
msgMain.add_attachment(
185-
att.data.asEmailMessage(),
186-
filename = filename,
187-
cid = att.contentId)
185+
msgMain.attach(att.data.asEmailMessage())
188186
else:
189187
if issubclass(att.dataType, bytes):
190188
data = att.data
@@ -1198,12 +1196,35 @@ def htmlBodyPrepared(self) -> Optional[bytes]:
11981196
for tag in tags:
11991197
# Iterate through the attachments until we get the right one.
12001198
cid = tag['src'][4:]
1201-
data = next((attachment.data for attachment in self.attachments if attachment.cid == cid), None)
1199+
att = next((attachment for attachment in self.attachments if hasattr(attachment, 'cid') and attachment.cid == cid), None)
12021200
# If we found anything, inject it.
1203-
if data:
1204-
tag['src'] = (b'data:image;base64,' + base64.b64encode(data)).decode('utf-8')
1201+
if att and isinstance(att.data, bytes):
1202+
# Try to get the mimetype. If we can't, see if the item has an
1203+
# extension and guess the mimtype for a few known ones.
1204+
mime = att.mimetype
1205+
if not mime:
1206+
ext = (att.name or '').split('.')[-1].lower()
1207+
if ext == 'png':
1208+
mime = 'image/png'
1209+
elif ext == 'jpg' or ext == 'jpeg':
1210+
mime = 'image/jpeg'
1211+
elif ext == 'gif':
1212+
mime = 'image/gif'
1213+
elif ext == 'tiff' or ext == 'tif':
1214+
mime = 'image/tif'
1215+
elif ext == 'bmp':
1216+
mime = 'image/bmp'
1217+
elif ext == 'svg':
1218+
mime = 'image/svg+xml'
1219+
# Final check.
1220+
if mime:
1221+
tag['src'] = (b'data:' + mime.encode() + b';base64,' + base64.b64encode(att.data)).decode('utf-8')
1222+
else:
1223+
# We don't know what to actually put for this item, and we
1224+
# really should never end up here, so throw an error.
1225+
raise MimetypeFailureError('Could not get the mimetype to use for htmlBodyPrepared.')
12051226

1206-
return soup.prettify('utf-8')
1227+
return soup.encode('utf-8')
12071228

12081229
@functools.cached_property
12091230
def htmlInjectableHeader(self) -> str:

extract_msg/utils.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -696,8 +696,17 @@ def minutesToDurationStr(minutes: int) -> str:
696696
return '1 minute'
697697
elif minutes < 60:
698698
return f'{minutes} minutes'
699+
elif minutes == 60:
700+
return '1 hour'
699701
elif minutes % 60 == 0:
700702
return f'{minutes // 60} hours'
703+
elif minutes < 120:
704+
if minutes == 61:
705+
return f'1 hour 1 minute'
706+
else:
707+
return f'1 hour {minutes - 60} minutes'
708+
elif minutes % 60 == 1:
709+
return f'{minutes // 60} hours 1 minute'
701710
else:
702711
return f'{minutes // 60} hours {minutes % 60} minutes'
703712

@@ -709,8 +718,7 @@ def msgPathToString(inp: Union[str, Iterable[str]]) -> str:
709718
"""
710719
if not isinstance(inp, str):
711720
inp = '/'.join(inp)
712-
inp.replace('\\', '/')
713-
return inp
721+
return inp.replace('\\', '/')
714722

715723

716724
def parseType(_type: int, stream: Union[int, bytes], encoding: str, extras: Sequence[bytes]):
@@ -1094,7 +1102,7 @@ def unwrapMsg(msg: MSGFile) -> Dict[str, List]:
10941102
msgFiles.append(att.data)
10951103
toProcess.append(att.data)
10961104
if isinstance(currentItem, MessageSignedBase):
1097-
raw += currentItem._rawAttachments
1105+
raw += currentItem.rawAttachments
10981106

10991107
return {
11001108
'attachments': attachments,

extract_msg_tests/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
'OleWriterEditingTests',
55
'OleWriterExportTests',
66
'PropTests',
7+
'UtilTests',
78
'ValidationTests',
89
]
910

1011
from .attachment_tests import AttachmentTests
1112
from .cmd_line_tests import CommandLineTests
1213
from .ole_writer_tests import OleWriterEditingTests, OleWriterExportTests
1314
from .prop_tests import PropTests
15+
from .util_tests import UtilTests
1416
from .validation_tests import ValidationTests

extract_msg_tests/util_tests.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
__all__ = [
2+
'UtilTests',
3+
]
4+
5+
6+
import unittest
7+
8+
from extract_msg import utils
9+
10+
11+
class UtilTests(unittest.TestCase):
12+
def test_dictGetCasedKey(self):
13+
caseDict = {'hello': 1, 'HeUtQjWkW': 2}
14+
15+
self.assertEqual(utils.dictGetCasedKey(caseDict, 'Hello'), 'hello')
16+
self.assertEqual(utils.dictGetCasedKey(caseDict, 'heutqjwkw'), 'HeUtQjWkW')
17+
with self.assertRaises(KeyError):
18+
utils.dictGetCasedKey(caseDict, 'jjjjj')
19+
20+
def test_divide(self):
21+
inputString = '12345678901234567890'
22+
expectedOutputs = {
23+
1: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0'],
24+
2: ['12', '34', '56', '78', '90', '12', '34', '56', '78', '90'],
25+
3: ['123', '456', '789', '012', '345', '678', '90'],
26+
4: ['1234', '5678', '9012', '3456', '7890'],
27+
5: ['12345', '67890', '12345', '67890'],
28+
6: ['123456', '789012', '345678', '90'],
29+
7: ['1234567', '8901234', '567890'],
30+
8: ['12345678', '90123456', '7890'],
31+
9: ['123456789', '012345678', '90'],
32+
10: ['1234567890', '1234567890'],
33+
11: ['12345678901', '234567890'],
34+
}
35+
36+
for divideBy, expectedResult in expectedOutputs.items():
37+
self.assertListEqual(utils.divide(inputString, divideBy), expectedResult)
38+
39+
def test_makeWeakRef(self):
40+
self.assertIsNone(utils.makeWeakRef(None))
41+
class TestClass:
42+
pass
43+
self.assertIsNotNone(utils.makeWeakRef(TestClass()))
44+
45+
def test_minutesToDurationStr(self):
46+
self.assertEqual(utils.minutesToDurationStr(0), '0 hours')
47+
self.assertEqual(utils.minutesToDurationStr(1), '1 minute')
48+
self.assertEqual(utils.minutesToDurationStr(2), '2 minutes')
49+
self.assertEqual(utils.minutesToDurationStr(59), '59 minutes')
50+
self.assertEqual(utils.minutesToDurationStr(60), '1 hour')
51+
self.assertEqual(utils.minutesToDurationStr(61), '1 hour 1 minute')
52+
self.assertEqual(utils.minutesToDurationStr(62), '1 hour 2 minutes')
53+
self.assertEqual(utils.minutesToDurationStr(120), '2 hours')
54+
self.assertEqual(utils.minutesToDurationStr(121), '2 hours 1 minute')
55+
self.assertEqual(utils.minutesToDurationStr(122), '2 hours 2 minutes')
56+
57+
def test_msgPathToStr(self):
58+
self.assertEqual(utils.msgPathToString('hello/world/one'), 'hello/world/one')
59+
self.assertEqual(utils.msgPathToString('hello/world\\one'), 'hello/world/one')
60+
self.assertEqual(utils.msgPathToString(['hello', 'world', 'one']), 'hello/world/one')
61+
self.assertEqual(utils.msgPathToString(['hello\\world', 'one']), 'hello/world/one')

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ olefile==0.47
66
tzlocal>=4.2,<6
77
compressed-rtf>=1.0.6,<2
88
ebcdic>=1.1.1,<2
9-
beautifulsoup4>=4.11.1,<4.13
9+
beautifulsoup4>=4.11.1,<4.14
1010
RTFDE>=0.1.1,<0.2
11-
red-black-tree-mod==1.20
11+
red-black-tree-mod>=1.20, <=1.23

0 commit comments

Comments
 (0)