Skip to content

Commit 0b221ac

Browse files
committed
[WIP] bring back bailout, and cleanly separate launch from results
Starting the tapParser right away and inside the attempt, means when we throw an exception for browser connect timeout, we have no way to produce a finalResult / clientresult event. This is why bailout was separate, but this is messy to deal with in reporters and also makes partial results hard to transfer. Instead, handle our bailout the same way as TAP bailout: As part of the clientresult event, not as a separate event. Upstream tap-parser emits finalResult#bailout, we can do the same in clientresult. For this to work we need to make tap-finished trigger on bailout, which it didn't until now. The benefit of this is also that there is no longer any risk of multiple browser attempts communicating over the same TAP stream and producing duplicate or conflicting results because we do not attach the TAP stream until after the browser has connected. For additional disambiguation we also mint the clientId during the attempt, instead of sharing it across retries.
1 parent 38f7441 commit 0b221ac

File tree

2 files changed

+158
-112
lines changed

2 files changed

+158
-112
lines changed

src/server.js

Lines changed: 141 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ class ControlServer {
9292
this.connectTimeout = options.connectTimeout;
9393
this.debugMode = options.debugMode;
9494

95+
this.launchingBrowsers = new Set();
9596
this.browsers = new Map();
9697
// Optimization: Prefetch test file in parallel with server creation and browser launching.
9798
//
@@ -263,15 +264,9 @@ class ControlServer {
263264
const clientId = url.searchParams.get('qtap_clientId');
264265
if (clientId !== null) {
265266
// Serve the testfile from any URL path, as chosen by launchBrowser()
266-
const browser = this.browsers.get(clientId);
267-
if (browser) {
268-
browser.logger.debug('browser_connected', `${browser.getDisplayName()} connected! Serving test file.`);
269-
this.eventbus.emit('clientonline', {
270-
clientId,
271-
testFile: this.testFile + this.testFileQueryString,
272-
browserName: browser.browserName,
273-
displayName: browser.getDisplayName(),
274-
});
267+
if (this.launchingBrowsers.has(clientId)) {
268+
this.launchingBrowsers.delete(clientId);
269+
this.eventbus.emit('clientonline', { clientId });
275270
} else if (this.debugMode) {
276271
// Allow users to reload the page when in --debug mode.
277272
// Note that the results of this reload will not be reported, because
@@ -296,11 +291,6 @@ class ControlServer {
296291
resp.writeHead(200);
297292
resp.write(testFileResp.body);
298293
resp.end();
299-
300-
// Count proxying the test file toward connectTimeout, not idleTimeout.
301-
if (browser) {
302-
browser.clientIdleActive = performance.now();
303-
}
304294
return;
305295
}
306296

@@ -382,9 +372,95 @@ class ControlServer {
382372
}
383373

384374
async launchBrowser (browserFn, browserName, globalSignal) {
385-
const clientId = 'client_S' + this.serverId + '_C' + this.nextClientId++;
386-
const logger = this.logger.channel(`qtap_browser_${clientId}_${browserName}`);
375+
const maxTries = (browserFn.allowRetries === false || this.debugMode) ? 1 : 3;
376+
let i = 1;
377+
let browserPromise;
378+
while (true) {
379+
const clientId = 'client_S' + this.serverId + '_C' + this.nextClientId++;
380+
const logger = this.logger.channel(`qtap_browser_${clientId}_${browserName}`);
381+
const controller = new AbortController();
382+
const signals = {
383+
// NOTE: The browser signal tracks both "browser" and "global" controllers,
384+
// so that if qtap.run() bails out (e.g. uncaught error from a reporter, or
385+
// test server fails in fetchTestFile due to file not found), and if for
386+
// some reason the natural shutdown fails (i.e. we don't call
387+
// server.stopBrowsers or don't await browerPromise), then we have one
388+
// last chance during shared_cleanup to stop dangling browser processes.
389+
browser: AbortSignal.any([controller.signal, globalSignal]),
390+
global: globalSignal
391+
};
392+
if (this.debugMode) {
393+
// Replace with a dummy signal that we never invoke
394+
//
395+
// TODO: Refactor reporting and process mgmt so that results are reported
396+
// on the CLI as normal. Right now, once the test finishes, the CLI doesn't
397+
// report it because it waits to report until the browser exits. We need
398+
// to create a way to keep report the results yet keep the process open.
399+
signals.browser = (new AbortController()).signal;
400+
signals.browser.addEventListener('abort', () => {
401+
browser.logger.warning('browser_signal_debugging', 'Keeping browser open for debugging');
402+
});
403+
}
404+
405+
// TODO: rename browser=>client
406+
const browser = {
407+
clientId,
408+
logger,
409+
browserName,
410+
getDisplayName () {
411+
return browserFn.getDisplayName();
412+
},
413+
/**
414+
* Reasons to stop a browser, whichever comes first:
415+
* 1. tap-finished (client has sent us the test results).
416+
* 2. tap-parser 'bailout' event (client knows it crashed).
417+
* 3. connect timeout
418+
* 4. idle timeout (client idle and presumed lost, or a silent crash).
419+
*
420+
* @param {any} reason
421+
*/
422+
stop: async (reason) => {
423+
if (!this.browsers.has(clientId)) {
424+
// Ignore any duplicate or late reasons to stop
425+
return;
426+
}
427+
428+
this.browsers.delete(clientId);
429+
controller.abort(reason);
430+
}
431+
};
432+
try {
433+
browserPromise = await this.launchBrowserAttempt(browserFn, browser, signals);
434+
break;
435+
} catch (e) {
436+
// Do not retry uncaught errors from browserFn or client-side bailout,
437+
// which are expected to be deterministic. Only retry BrowserConnectTimeout.
438+
if (e instanceof util.BrowserConnectTimeout && i < maxTries) {
439+
i++;
440+
this.logger.debug('browser_connect_retry', `Retrying, attempt ${i} of ${maxTries}`);
441+
} else {
442+
if (e instanceof util.QTapError) {
443+
e.qtapClient = {
444+
browser: browserFn.displayName,
445+
testFile: this.testFile
446+
};
447+
}
448+
throw e;
449+
}
450+
}
451+
}
452+
453+
await this.getClientResult(browserPromise);
454+
}
387455

456+
/**
457+
* Launch a browser once and throw if it takes too long to connect.
458+
*
459+
* @param {Function} browserFn
460+
* @param {Object} browser
461+
* @param {Object<string,AbortSignal>} signals
462+
*/
463+
async launchBrowserAttempt (browserFn, browser, signals) {
388464
const proxyBase = this.getProxyBase();
389465
// Serve the a test file from URL that looks like the original path when possible.
390466
//
@@ -402,97 +478,63 @@ class ControlServer {
402478
// (e.g. www.mysite.test/mysite/). That test case depends on the real path.
403479
// https://github.com/WordPress/wordpress-develop/blob/6.7.1/tests/qunit/wp-admin/js/password-strength-meter.js#L100
404480
const tmpUrl = new URL(this.testFile + this.testFileQueryString, proxyBase);
405-
tmpUrl.searchParams.set('qtap_clientId', clientId);
481+
tmpUrl.searchParams.set('qtap_clientId', browser.clientId);
406482
const url = proxyBase + tmpUrl.pathname + tmpUrl.search;
407483

408-
const maxTries = (browserFn.allowRetries === false || this.debugMode) ? 1 : 3;
409-
let i = 1;
410-
while (true) {
411-
try {
412-
const browserPromise = this.launchBrowserAttempt(browserFn, browserName, globalSignal, clientId, url, logger);
413-
const result = await browserPromise;
414-
logger.debug('event_clientresult', result);
415-
this.eventbus.emit('clientresult', result);
416-
return;
417-
} catch (e) {
418-
// Do not retry uncaught errors from browserFn or client-side bailout,
419-
// which are expected to be deterministic. Only retry BrowserConnectTimeout.
420-
if (i >= maxTries || !(e instanceof util.BrowserConnectTimeout)) {
421-
if (e instanceof util.QTapError) {
422-
e.qtapClient = {
423-
browser: browserFn.displayName,
424-
testFile: this.testFile
425-
};
426-
}
427-
throw e;
428-
}
484+
const connectTimeoutTimer = setTimeout(() => {
485+
const reason = `Browser did not start within ${this.connectTimeout}s`;
486+
this.launchingBrowsers.delete(browser.clientId);
487+
browser.stop(new util.BrowserConnectTimeout(reason));
488+
browser.logger.warning('browser_connect_timeout', reason);
489+
}, this.connectTimeout * 1000);
490+
491+
this.on('clientonline', (event) => {
492+
if (event.clientId === browser.clientId) {
493+
browser.logger.debug('browser_connected', `${browserFn.getDisplayName()} connected! Serving test file.`);
494+
this.launchingBrowsers.delete(browser.clientId);
495+
clearTimeout(connectTimeoutTimer);
496+
}
497+
});
498+
499+
this.launchingBrowsers.add(browser.clientId);
500+
browser.logger.debug('browser_launch_call');
501+
502+
try {
503+
await browserFn(url, signals, browser.logger, this.debugMode);
429504

430-
i++;
431-
logger.debug('browser_connect_retry', `Retrying, attempt ${i} of ${maxTries}`);
505+
browser.logger.debug('browser_launch_exit');
506+
} catch (/** @type {Error|Object|string} */ e) {
507+
// Silence any errors from browserFn that happen after we called browser.stop().
508+
if (signals.browser.aborted) {
509+
browser.logger.debug('browser_launch_stopped', String(e.cause || e));
510+
} else {
511+
browser.logger.warning('browser_launch_error', e);
512+
throw e;
432513
}
433514
}
434-
}
435515

436-
async launchBrowserAttempt (browserFn, browserName, globalSignal, clientId, url, logger) {
437-
const controller = new AbortController();
438-
439-
const signals = {
440-
// NOTE: The browser signal tracks both "browser" and "global" controllers,
441-
// so that if qtap.run() bails out (e.g. uncaught error from a reporter, or
442-
// test server fails in fetchTestFile due to file not found), and if for
443-
// some reason the natural shutdown fails (i.e. we don't call
444-
// server.stopBrowsers or don't await browerPromise), then we have one
445-
// last chance during shared_cleanup to stop dangling browser processes.
446-
browser: AbortSignal.any([controller.signal, globalSignal]),
447-
global: globalSignal
448-
};
449-
if (this.debugMode) {
450-
// Replace with a dummy signal that we never invoke
451-
//
452-
// TODO: Refactor reporting and process mgmt so that results are reported
453-
// on the CLI as normal. Right now, once the test finishes, the CLI doesn't
454-
// report it because it waits to report until the browser exits. We need
455-
// to create a way to keep report the results yet keep the process open.
456-
signals.browser = (new AbortController()).signal;
457-
signals.browser.addEventListener('abort', () => {
458-
logger.warning('browser_signal_debugging', 'Keeping browser open for debugging');
459-
});
516+
if (signals.browser.aborted) {
517+
// Throw BrowserConnectTimeout for retry purposes.
518+
throw signals.browser.reason;
460519
}
520+
}
461521

522+
/**
523+
* Await a browser process and consume its results.
524+
*
525+
* @param {Function} browserFn
526+
* @param {Object} browser
527+
* @param {Object<string,AbortSignal>} signals
528+
*/
529+
async getClientResult (browserPromise, browser, signals, clientId, url, logger) {
462530
let clientIdleTimer;
463531

464-
const browser = {
465-
clientId,
466-
logger,
467-
clientIdleActive: null,
468-
browserName,
469-
getDisplayName () {
470-
return browserFn.getDisplayName();
471-
},
472-
/**
473-
* Reasons to stop a browser, whichever comes first:
474-
* 1. tap-finished (client has sent us the test results).
475-
* 2. tap-parser 'bailout' event (client knows it crashed).
476-
* 3. timeout (client didn't connect, client idle and presumed lost, or a silent crash).
477-
*
478-
* @param {any} reason
479-
*/
480-
stop: async (reason) => {
481-
if (!this.browsers.has(clientId)) {
482-
// Ignore any duplicate or late reasons to stop
483-
return;
484-
}
485-
486-
clearTimeout(clientIdleTimer);
487-
this.browsers.delete(clientId);
488-
controller.abort(reason);
489-
}
490-
};
491-
492532
let result;
493533
const tapParser = tapFinished({ wait: 0 }, (finalResult) => {
534+
clearTimeout(clientIdleTimer);
535+
494536
result = {
495-
clientId,
537+
clientId: browser.clientId,
496538
ok: finalResult.ok,
497539
total: finalResult.count,
498540
// avoid precomputed `finalResult.todo` because that
@@ -557,16 +599,8 @@ class ControlServer {
557599
// Node.js/V8 natively allocating many timers when processing large batches of test results.
558600
// Instead, merely store performance.now() and check that periodically.
559601
const TIMEOUT_CHECK_MS = 100;
560-
const browserStart = performance.now();
561602
const qtapCheckTimeout = () => {
562-
if (!browser.clientIdleActive) {
563-
if ((performance.now() - browserStart) > (this.connectTimeout * 1000)) {
564-
const reason = `Browser did not start within ${this.connectTimeout}s`;
565-
logger.warning('browser_connect_timeout', reason);
566-
browser.stop(new util.BrowserConnectTimeout(reason));
567-
return;
568-
}
569-
} else {
603+
if (browser.connected) {
570604
if ((performance.now() - browser.clientIdleActive) > (this.idleTimeout * 1000)) {
571605
const reason = `Browser idle for ${this.idleTimeout}s`;
572606
logger.warning('browser_idle_timeout', reason);
@@ -579,12 +613,10 @@ class ControlServer {
579613
clientIdleTimer = setTimeout(qtapCheckTimeout, TIMEOUT_CHECK_MS);
580614

581615
try {
582-
logger.debug('browser_launch_call');
583-
584-
await browserFn(url, signals, logger, this.debugMode);
616+
await browserPromise;
585617

586618
// Usually browserFn() will return because we asked via browser.stop(), e.g. tests finished,
587-
// bailed, or timed out. In case the browser ended by itself, we call browser.stop() here,
619+
// or timed out. In case the browser ended by itself, we call browser.stop() here,
588620
// so that if we didn't called it before, this will report an error.
589621
// Also, this ensures the signal can clean up any resources created by browserFn.
590622
logger.debug('browser_launch_exit');
@@ -606,6 +638,8 @@ class ControlServer {
606638
}
607639

608640
return result;
641+
logger.debug('event_clientresult', result);
642+
this.eventbus.emit('clientresult', result);
609643
}
610644

611645
/** @return {string} */

src/tap-finished.js

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,18 @@
2828
* Upgrade from tap-parser@5 to tap-parser@18, so that QTap doesn't pull in the
2929
* 35 dependencies that tap-parser@5 carried to support old Node.js versions.
3030
*
31-
* - Consider bailout as finished to avoid a hanging process.
31+
* - Add bailout as 'finished' trigger, to avoid a hanging process.
32+
*
33+
* - Fix hang when calling p.end() to close unfinished stream,
34+
* due to unreachable inner 'complete' event when calling finish()
35+
* from the outer 'complete' event (no memory, not fired twice).
36+
*
37+
* This appears to be caused by [1] when switching finish() from `cb(p.results)`,
38+
* to `p.on('complete', cb)`.
39+
*
40+
* - Fix duplicate p.end() when wait=0 due to missing !ended check.
41+
*
42+
* [1]: https://github.com/tapjs/tap-finished/commit/35b653d778a4f387c269f6dfbe95066455636410
3243
*/
3344
'use strict';
3445

@@ -60,7 +71,7 @@ export default function tapFinished (opts, cb) {
6071
});
6172
if (opts.wait && !ended) {
6273
setTimeout(function () { p.end(); }, opts.wait);
63-
} else { p.end(); }
74+
} else if (!ended) { p.end(); }
6475
}
6576

6677
function check () {
@@ -81,14 +92,15 @@ export default function tapFinished (opts, cb) {
8192
check();
8293
});
8394

84-
p.on('complete', function () {
95+
p.on('bailout', function () {
8596
if (finished) { return; }
8697
finish();
8798
});
8899

89-
p.on('bailout', function () {
100+
p.on('complete', function () {
90101
if (finished) { return; }
91-
finish();
102+
finished = true;
103+
cb(p.results);
92104
});
93105

94106
return p;

0 commit comments

Comments
 (0)