diff --git a/__tests__/brackets.test.js b/__tests__/brackets.test.js new file mode 100644 index 000000000..356eba004 --- /dev/null +++ b/__tests__/brackets.test.js @@ -0,0 +1,41 @@ +import { mdast, md } from '../index'; + +describe('square brackets behavior', () => { + it('retains unmatched open brackets after a closed bracket (link reference)', () => { + const markdown = '[bar][bar'; + expect(md(mdast(markdown))).toMatchInlineSnapshot(` + "[bar][bar + " + `); + }); + + it('leaves normal shortcut references followed by text untouched', () => { + const markdown = '[bar]bar'; + expect(md(mdast(markdown))).toMatchInlineSnapshot(` + "[bar]bar + " + `); + }); + + it('parses consecutive square brackets as link references', () => { + const markdown = '[first][second]'; + const tree = mdast(markdown); + const paragraph = tree.children[0]; + + expect(paragraph.children).toHaveLength(1); + expect(paragraph.children[0].type).toBe('linkReference'); + expect(paragraph.children[0].label).toBe('second'); + expect(paragraph.children[0].children).toHaveLength(1); + expect(paragraph.children[0].children[0].value).toBe('first'); + }); + + it('does not affect code blocks', () => { + const markdown = '```\n[bar][bar\n```'; + const tree = mdast(markdown); + expect(tree.children).toHaveLength(1); + expect(tree.children[0]).toMatchObject({ + type: 'code', + value: '[bar][bar', + }); + }); +}); diff --git a/index.js b/index.js index a3e90c8c0..372c1f59a 100644 --- a/index.js +++ b/index.js @@ -28,6 +28,7 @@ const customCompilers = Object.values(require('./processor/compile')); const registerCustomComponents = require('./lib/registerCustomComponents'); const { options, parseOptions } = require('./options'); const { icons: calloutIcons } = require('./processor/parse/flavored/callout'); +const fixDanglingShortcutReferences = require('./processor/plugin/fix-dangling-shortcut-references'); const toPlainText = require('./processor/plugin/plain-text'); const sectionAnchorId = require('./processor/plugin/section-anchor-id'); const tableFlattening = require('./processor/plugin/table-flattening'); @@ -117,6 +118,7 @@ export function processor(userOpts = {}) { .data('reusableContent', reusableContent) .use(!opts.correctnewlines ? remarkBreaks : () => {}) .use(CustomParsers.map(parser => parser.sanitize?.(sanitize) || parser)) + .use(fixDanglingShortcutReferences) .use(remarkTransformers) .use(remarkSlug) .use(remarkDisableTokenizers, opts.disableTokenizers); diff --git a/processor/compile/dangling-shortcut-literal.js b/processor/compile/dangling-shortcut-literal.js new file mode 100644 index 000000000..e511622e3 --- /dev/null +++ b/processor/compile/dangling-shortcut-literal.js @@ -0,0 +1,10 @@ +module.exports = function DanglingShortcutLiteralCompiler() { + const { Compiler } = this; + const { visitors } = Compiler.prototype; + const originalText = visitors.text; + + visitors.text = function compileText(node, ...rest) { + if (node?.data?.danglingShortcutLiteral) return node.value; + return originalText.call(this, node, ...rest); + }; +}; diff --git a/processor/compile/index.js b/processor/compile/index.js index fd111b06f..8a519b8e1 100644 --- a/processor/compile/index.js +++ b/processor/compile/index.js @@ -3,6 +3,7 @@ export { default as codeTabsCompiler } from './code-tabs'; export { default as divCompiler } from './div'; export { default as escapeCompiler } from './escape'; export { default as figureCompiler } from './figure'; +export { default as danglingShortcutLiteralCompiler } from './dangling-shortcut-literal'; export { default as gemojiCompiler } from './gemoji'; export { default as htmlBlockCompiler } from './html-block'; export { default as iconCompiler } from './i'; diff --git a/processor/plugin/fix-dangling-shortcut-references.js b/processor/plugin/fix-dangling-shortcut-references.js new file mode 100644 index 000000000..c18672816 --- /dev/null +++ b/processor/plugin/fix-dangling-shortcut-references.js @@ -0,0 +1,44 @@ +const { visit } = require('unist-util-visit'); + +const PROTECTED_PARENTS = new Set(['code', 'inlineCode', 'html', 'jsx']); + +// Resolve the readable label Remark inferred for the link reference. +const labelFrom = node => + node?.label ?? + node?.identifier ?? + (typeof node?.value === 'string' ? node.value : (node?.children || []).map(labelFrom).join('')); + +// Remark drops extra "[" characters when a shortcut link is immediately followed by another "[" +// (e.g. "[foo][bar"). This plugin visits the AST, looks for link references, and compares the +// span denoted by the node with the original section to identify dropped [ characters. +module.exports = function fixDanglingShortcutReferences() { + return tree => { + visit(tree, 'linkReference', (node, index, parent) => { + // Skip contexts where we should never mutate literals (code, inline code, raw HTML/JSX). + if (!parent?.children || PROTECTED_PARENTS.has(parent.type) || node.referenceType !== 'shortcut') return; + + const next = parent.children[index + 1]; + if (!next || next.type !== 'text') return; // Need the stray "[" token that Remark left as text. + + const start = node.position?.start?.offset; + const end = node.position?.end?.offset; + if (typeof start !== 'number' || typeof end !== 'number') return; + + const label = labelFrom(node); + const extraChars = end - start - (label.length + 2); // surrounding brackets + if (extraChars <= 0) return; + + // Collapse the broken linkReference + following text into a plain text node that mirrors + // the original markdown literal, tagging it so the compiler can output it verbatim. + parent.children.splice(index, 2, { + type: 'text', + value: `[${label}]${'['.repeat(extraChars)}${next.value}`, + data: { danglingShortcutLiteral: true }, + position: { + start: node.position.start, + end: next.position?.end || node.position.end, + }, + }); + }); + }; +};