Skip to content

Commit f0df93d

Browse files
authored
perf: use pull architecture and leaner streams (#95)
1 parent b80a5a7 commit f0df93d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+2845
-1217
lines changed

README.md

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,9 @@ const fileStream = createWriteStream('./project.tar');
156156
await pipeline(tarStream, fileStream);
157157

158158
// Extract a tar file to a directory
159-
const tarReadStream = createReadStream('./project.tar');
159+
const tarReadStream = createReadStream('./project.tar', {
160+
highWaterMark: 256 * 1024 // 256 KB for optimal performance
161+
});
160162
const extractStream = unpackTar('./output/directory');
161163
await pipeline(tarReadStream, extractStream);
162164
```
@@ -176,7 +178,9 @@ const packStream = packTar('./my/project', {
176178
});
177179

178180
// Unpack with advanced options
179-
const sourceStream = createReadStream('./archive.tar');
181+
const sourceStream = createReadStream('./archive.tar', {
182+
highWaterMark: 256 * 1024 // 256 KB for optimal performance
183+
});
180184
const extractStream = unpackTar('./output', {
181185
// Core options
182186
strip: 1, // Remove first directory level
@@ -187,8 +191,7 @@ const extractStream = unpackTar('./output', {
187191
fmode: 0o644, // Override file permissions
188192
dmode: 0o755, // Override directory permissions
189193
maxDepth: 50, // Limit extraction depth for security (default: 1024)
190-
concurrency: 8, // Limit concurrent filesystem operations (default: CPU cores)
191-
streamTimeout: 10000 // Timeout after 10 seconds of inactivity (default: 5000ms)
194+
concurrency: 8 // Limit concurrent filesystem operations (default: CPU cores)
192195
});
193196

194197
await pipeline(sourceStream, extractStream);
@@ -205,7 +208,6 @@ import { pipeline } from 'node:stream/promises';
205208
const sources: TarSource[] = [
206209
{ type: 'file', source: './package.json', target: 'project/package.json' },
207210
{ type: 'directory', source: './src', target: 'project/src' },
208-
209211
{ type: 'content', content: 'Hello World!', target: 'project/hello.txt' },
210212
{ type: 'content', content: '#!/bin/bash\necho "Executable"', target: 'bin/script.sh', mode: 0o755 },
211213
{ type: 'stream', content: createReadStream('./large-file.bin'), target: 'project/data.bin', size: 1048576 },
@@ -229,14 +231,22 @@ const tarStream = packTar('./my/project');
229231
await pipeline(tarStream, createGzip(), createWriteStream('./project.tar.gz'));
230232

231233
// Decompress and extract .tar.gz
232-
const gzipStream = createReadStream('./project.tar.gz');
234+
const gzipStream = createReadStream('./project.tar.gz', {
235+
highWaterMark: 256 * 1024 // 256 KB for optimal performance
236+
});
233237
await pipeline(gzipStream, createGunzip(), unpackTar('./output'));
234238
```
235239

236240
## API Reference
237241

238242
See the [API Reference](./REFERENCE.md).
239243

244+
# Benchmarks
245+
246+
Current benchmarks indicate we're much faster than other popular tar libraries for small file archives (packing and unpacking). On the other hand, larger files hit an I/O bottleneck resulting in similar performance between libraries.
247+
248+
See the [Results](./benchmarks/README.md).
249+
240250
## Compatibility
241251

242252
The core library uses the [Web Streams API](https://caniuse.com/streams) and requires:

REFERENCE.md

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -149,12 +149,13 @@ import { unpackTar } from 'modern-tar/fs';
149149
import { createReadStream } from 'node:fs';
150150
import { pipeline } from 'node:stream/promises';
151151

152-
const tarStream = createReadStream('backup.tar');
152+
const tarStream = createReadStream('backup.tar', {
153+
highWaterMark: 256 * 1024 // 256 KB for optimal performance
154+
});
153155
const extractStream = unpackTar('/restore/location', {
154156
strip: 1,
155157
fmode: 0o644, // Set consistent file permissions
156158
strict: true, // Enable strict validation
157-
streamTimeout: 10000, // Timeout after 10 seconds of inactivity
158159
});
159160
await pipeline(tarStream, extractStream);
160161
```
@@ -215,12 +216,6 @@ interface UnpackOptions extends DecoderOptions {
215216
filter?: (header: TarHeader) => boolean;
216217
/** Transform function to modify tar headers before extraction */
217218
map?: (header: TarHeader) => TarHeader;
218-
/**
219-
* The number of milliseconds of inactivity before a stream is considered stalled.
220-
* Prevents hangs when processing corrupted or incomplete archives.
221-
* @default 5000
222-
*/
223-
streamTimeout?: number;
224219
}
225220
```
226221

@@ -291,8 +286,7 @@ interface UnpackOptionsFS extends UnpackOptions {
291286
filter?: (header: TarHeader) => boolean;
292287
/** Transform function to modify headers before extraction */
293288
map?: (header: TarHeader) => TarHeader;
294-
/** Stream timeout in milliseconds for detecting stalled streams */
295-
streamTimeout?: number;
289+
296290

297291
// Filesystem-specific options:
298292
/** Default mode for created directories (e.g., 0o755). Overrides tar header mode */

benchmarks/README.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,28 +68,28 @@ These benchmarks were run on an Apple M3 Pro and can include a lot of noise. Res
6868
┌─────────┬────────────────────────────────────────────────────┬─────────────────────┬────────────────────────┬────────────────────────┬────────────────────────┬─────────┐
6969
│ (index) │ Task name │ Latency avg (ns) │ Latency med (ns) │ Throughput avg (ops/s) │ Throughput med (ops/s) │ Samples │
7070
├─────────┼────────────────────────────────────────────────────┼─────────────────────┼────────────────────────┼────────────────────────┼────────────────────────┼─────────┤
71-
│ 0 │ 'modern-tar: Unpack Many Small Files (2500 x 1KB)''231053373 ± 3.03%''228506875 ± 23751375' '4 ± 3.03%''4 ± 0'65
72-
│ 1 │ 'node-tar: Unpack Many Small Files (2500 x 1KB)''232562313 ± 3.26%''222701583 ± 17677541' '4 ± 3.17%''4 ± 0'65
73-
│ 2 │ 'tar-fs: Unpack Many Small Files (2500 x 1KB)''524874342 ± 2.59%''518207771 ± 22493479''2 ± 2.45%''2 ± 0' │ 30 │
71+
│ 0 │ 'modern-tar: Unpack Many Small Files (2500 x 1KB)''135197180 ± 1.09%''134176333 ± 4318458' '7 ± 1.04%''7 ± 0'111
72+
│ 1 │ 'node-tar: Unpack Many Small Files (2500 x 1KB)''194336629 ± 1.10%''192998833 ± 7543895' '5 ± 1.09%''5 ± 0'78
73+
│ 2 │ 'tar-fs: Unpack Many Small Files (2500 x 1KB)''524907015 ± 2.02%''536602646 ± 18800583''2 ± 2.09%''2 ± 0' │ 30 │
7474
└─────────┴────────────────────────────────────────────────────┴─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴─────────┘
7575

7676
--- Many Small Nested Files (2500 x 1KB) ---
7777
┌─────────┬───────────────────────────────────────────────────────────┬─────────────────────┬────────────────────────┬────────────────────────┬────────────────────────┬─────────┐
7878
│ (index) │ Task name │ Latency avg (ns) │ Latency med (ns) │ Throughput avg (ops/s) │ Throughput med (ops/s) │ Samples │
7979
├─────────┼───────────────────────────────────────────────────────────┼─────────────────────┼────────────────────────┼────────────────────────┼────────────────────────┼─────────┤
80-
│ 0 │ 'modern-tar: Unpack Many Small Nested Files (2500 x 1KB)''210424280 ± 1.32%''209016354 ± 8327583''5 ± 1.30%''5 ± 0'72
81-
│ 1 │ 'node-tar: Unpack Many Small Nested Files (2500 x 1KB)''375473636 ± 2.64%''365691875 ± 16068167''3 ± 2.50%''3 ± 0'41
82-
│ 2 │ 'tar-fs: Unpack Many Small Nested Files (2500 x 1KB)''656863267 ± 1.94%''654005562 ± 25563875''2 ± 1.94%''2 ± 0' │ 30 │
80+
│ 0 │ 'modern-tar: Unpack Many Small Nested Files (2500 x 1KB)''166566736 ± 0.76%''165969875 ± 4058125''6 ± 0.75%''6 ± 0'91
81+
│ 1 │ 'node-tar: Unpack Many Small Nested Files (2500 x 1KB)''364342735 ± 1.64%''359822521 ± 12994563''3 ± 1.60%''3 ± 0'42
82+
│ 2 │ 'tar-fs: Unpack Many Small Nested Files (2500 x 1KB)''567124578 ± 1.46%''566608812 ± 14051833''2 ± 1.47%''2 ± 0' │ 30 │
8383
└─────────┴───────────────────────────────────────────────────────────┴─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴─────────┘
8484

8585
--- Few Large Files (5 x 20MB) ---
8686
┌─────────┬─────────────────────────────────────────────────┬────────────────────┬──────────────────────┬────────────────────────┬────────────────────────┬─────────┐
8787
│ (index) │ Task name │ Latency avg (ns) │ Latency med (ns) │ Throughput avg (ops/s) │ Throughput med (ops/s) │ Samples │
8888
├─────────┼─────────────────────────────────────────────────┼────────────────────┼──────────────────────┼────────────────────────┼────────────────────────┼─────────┤
89-
│ 0 │ 'modern-tar: Unpack Few Large Files (5 x 20MB)''47089232 ± 4.02%''44317458 ± 1487584''22 ± 1.20%''23 ± 1'319
90-
│ 1 │ 'node-tar: Unpack Few Large Files (5 x 20MB)''29503568 ± 7.99%''22717625 ± 2264667' '40 ± 2.29%''44 ± 5'509
91-
│ 2 │ 'tar-fs: Unpack Few Large Files (5 x 20MB)''47116490 ± 8.18%''39027917 ± 2082917''24 ± 2.30%''26 ± 1'319
89+
│ 0 │ 'modern-tar: Unpack Few Large Files (5 x 20MB)''35877987 ± 4.11%''29541167 ± 2477751''31 ± 2.45%''34 ± 3'419
90+
│ 1 │ 'node-tar: Unpack Few Large Files (5 x 20MB)''39709622 ± 4.50%''34362438 ± 904791' '27 ± 1.97%''29 ± 1'378
91+
│ 2 │ 'tar-fs: Unpack Few Large Files (5 x 20MB)''66414937 ± 7.42%''59902979 ± 8376146''16 ± 2.78%''17 ± 2'226
9292
└─────────┴─────────────────────────────────────────────────┴────────────────────┴──────────────────────┴────────────────────────┴────────────────────────┴─────────┘
9393
```
9494

95-
`modern-tar` is expected to be slower than alternatives due to its focus on zero dependencies, but there is still plenty of room to improve performance and is generally comparable for many usecases due to work being very I/O bound.
95+
For large files `modern-tar` and `node-tar` are practically very similar in performance since at this point the bottleneck is I/O. However, for many small files, `modern-tar` shows much better results than other libraries.

benchmarks/unpack.bench.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,12 @@ export async function runUnpackingBenchmarks() {
6565
.add(
6666
`modern-tar: Unpack ${testCase.name}`,
6767
async () => {
68-
const readStream = fs.createReadStream(tarballPath);
69-
const extractStream = unpackTar(extractDir);
70-
await pipeline(readStream, extractStream);
68+
const readStream = fs.createReadStream(tarballPath, {
69+
// Recommended tuning params for large files.
70+
highWaterMark: 256 * 1024, // 256 KB
71+
});
72+
const unpackStream = unpackTar(extractDir);
73+
await pipeline(readStream, unpackStream);
7174
},
7275
{
7376
async beforeEach() {

biome.json

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"$schema": "https://biomejs.dev/schemas/2.2.5/schema.json",
2+
"$schema": "https://biomejs.dev/schemas/2.3.5/schema.json",
33
"files": {
44
"includes": [
55
"**",
@@ -14,7 +14,12 @@
1414
"enabled": true
1515
},
1616
"linter": {
17-
"enabled": true
17+
"enabled": true,
18+
"rules": {
19+
"suspicious": {
20+
"noAssignInExpressions": "off"
21+
}
22+
}
1823
},
1924
"assist": {
2025
"enabled": true

0 commit comments

Comments
 (0)