Skip to content

Commit 7924225

Browse files
committed
DomQuery: uses PHP 8.4 HTML DOM
1 parent 6306e73 commit 7924225

File tree

3 files changed

+168
-27
lines changed

3 files changed

+168
-27
lines changed

src/Framework/DomQuery.php

Lines changed: 46 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99

1010
namespace Tester;
1111

12+
use Dom;
13+
1214

1315
/**
14-
* DomQuery simplifies querying (X)HTML documents.
16+
* Simplifies querying and traversing HTML documents using CSS selectors.
1517
*/
1618
class DomQuery extends \SimpleXMLElement
1719
{
@@ -20,24 +22,33 @@ class DomQuery extends \SimpleXMLElement
2022
*/
2123
public static function fromHtml(string $html): self
2224
{
23-
if (!str_contains($html, '<')) {
24-
$html = '<body>' . $html;
25-
}
25+
$old = libxml_use_internal_errors(true);
26+
libxml_clear_errors();
2627

27-
// parse these elements as void
28-
$html = preg_replace('#<(keygen|source|track|wbr)(?=\s|>)((?:"[^"]*"|\'[^\']*\'|[^"\'>])*+)(?<!/)>#', '<$1$2 />', $html);
28+
if (PHP_VERSION_ID < 80400) {
29+
if (!str_contains($html, '<')) {
30+
$html = '<body>' . $html;
31+
}
2932

30-
// fix parsing of </ inside scripts
31-
$html = preg_replace_callback(
32-
'#(<script(?=\s|>)(?:"[^"]*"|\'[^\']*\'|[^"\'>])*+>)(.*?)(</script>)#s',
33-
fn(array $m): string => $m[1] . str_replace('</', '<\/', $m[2]) . $m[3],
34-
$html,
35-
);
33+
// parse these elements as void
34+
$html = preg_replace('#<(keygen|source|track|wbr)(?=\s|>)((?:"[^"]*"|\'[^\']*\'|[^"\'>])*+)(?<!/)>#', '<$1$2 />', $html);
35+
36+
// fix parsing of </ inside scripts
37+
$html = preg_replace_callback(
38+
'#(<script(?=\s|>)(?:"[^"]*"|\'[^\']*\'|[^"\'>])*+>)(.*?)(</script>)#s',
39+
fn(array $m): string => $m[1] . str_replace('</', '<\/', $m[2]) . $m[3],
40+
$html,
41+
);
42+
43+
$dom = new \DOMDocument;
44+
$dom->loadHTML($html);
45+
} else {
46+
if (!preg_match('~<!DOCTYPE~i', $html)) {
47+
$html = '<!DOCTYPE html>' . $html;
48+
}
49+
$dom = Dom\HTMLDocument::createFromString($html, Dom\HTML_NO_DEFAULT_NS);
50+
}
3651

37-
$dom = new \DOMDocument;
38-
$old = libxml_use_internal_errors(true);
39-
libxml_clear_errors();
40-
$dom->loadHTML($html);
4152
$errors = libxml_get_errors();
4253
libxml_use_internal_errors($old);
4354

@@ -61,32 +72,43 @@ public static function fromXml(string $xml): self
6172

6273

6374
/**
64-
* Finds descendants of current element that match the given CSS selector.
75+
* Returns array of elements matching CSS selector.
6576
* @return DomQuery[]
6677
*/
6778
public function find(string $selector): array
6879
{
69-
return str_starts_with($selector, ':scope')
70-
? $this->xpath('self::' . self::css2xpath(substr($selector, 6)))
71-
: $this->xpath('descendant::' . self::css2xpath($selector));
80+
if (PHP_VERSION_ID < 80400) {
81+
return str_starts_with($selector, ':scope')
82+
? $this->xpath('self::' . self::css2xpath(substr($selector, 6)))
83+
: $this->xpath('descendant::' . self::css2xpath($selector));
84+
}
85+
86+
return array_map(
87+
fn($el) => simplexml_import_dom($el, self::class),
88+
iterator_to_array(Dom\import_simplexml($this)->querySelectorAll($selector)),
89+
);
7290
}
7391

7492

7593
/**
76-
* Checks if any descendant of current element matches the given selector.
94+
* Checks if any descendant matches CSS selector.
7795
*/
7896
public function has(string $selector): bool
7997
{
80-
return (bool) $this->find($selector);
98+
return PHP_VERSION_ID < 80400
99+
? (bool) $this->find($selector)
100+
: (bool) Dom\import_simplexml($this)->querySelector($selector);
81101
}
82102

83103

84104
/**
85-
* Determines if the current element matches the specified CSS selector.
105+
* Checks if element matches CSS selector.
86106
*/
87107
public function matches(string $selector): bool
88108
{
89-
return (bool) $this->xpath('self::' . self::css2xpath($selector));
109+
return PHP_VERSION_ID < 80400
110+
? (bool) $this->xpath('self::' . self::css2xpath($selector))
111+
: Dom\import_simplexml($this)->matches($selector);
90112
}
91113

92114

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
<?php
2+
3+
/**
4+
* @phpVersion 8.4
5+
*/
6+
7+
declare(strict_types=1);
8+
9+
use Tester\Assert;
10+
use Tester\DomQuery;
11+
12+
require __DIR__ . '/../bootstrap.php';
13+
14+
15+
test('fromHtml() creates DomQuery from HTML string', function () {
16+
$dom = DomQuery::fromHtml('<div class="test"><p>Hello</p></div>');
17+
Assert::type(DomQuery::class, $dom);
18+
Assert::true($dom->has('div'));
19+
});
20+
21+
test('fromHtml() handles HTML without root element', function () {
22+
$dom = DomQuery::fromHtml('Hello world');
23+
Assert::type(DomQuery::class, $dom);
24+
Assert::contains('Hello world', (string) $dom->find('body')[0]);
25+
});
26+
27+
test('fromHtml() handles void elements correctly', function () {
28+
$dom = DomQuery::fromHtml('<div><source src="test.mp3"><wbr>test</div>');
29+
Assert::true($dom->has('source'));
30+
Assert::true($dom->has('wbr'));
31+
});
32+
33+
test('fromHtml() handles script tags with </ inside', function () {
34+
$dom = DomQuery::fromHtml('<script>if (a</b) { alert("test"); }</script>');
35+
Assert::true($dom->has('script'));
36+
});
37+
38+
test('find() returns matching elements', function () {
39+
$dom = DomQuery::fromHtml('
40+
<div class="container">
41+
<p class="first">First paragraph</p>
42+
<p class="second">Second paragraph</p>
43+
<span>Test span</span>
44+
</div>
45+
');
46+
47+
$paragraphs = $dom->find('p');
48+
Assert::count(2, $paragraphs);
49+
Assert::contains('First paragraph', (string) $paragraphs[0]);
50+
51+
$spans = $dom->find('span');
52+
Assert::count(1, $spans);
53+
Assert::contains('Test span', (string) $spans[0]);
54+
});
55+
56+
test('find() supports complex CSS selectors', function () {
57+
$dom = DomQuery::fromHtml('
58+
<div class="container">
59+
<p class="first">First</p>
60+
<div class="wrapper">
61+
<p class="second">Second</p>
62+
<p class="third">Third</p>
63+
</div>
64+
</div>
65+
');
66+
67+
$results = $dom->find('div.wrapper p');
68+
Assert::count(2, $results);
69+
Assert::contains('Second', (string) $results[0]);
70+
71+
$results = $dom->find('p.first + div');
72+
Assert::count(1, $results);
73+
Assert::true($results[0]->has('p.second'));
74+
});
75+
76+
test('has() checks for existence of elements', function () {
77+
$dom = DomQuery::fromHtml('
78+
<div class="test">
79+
<span class="inner">Test</span>
80+
</div>
81+
');
82+
83+
Assert::true($dom->has('span.inner'));
84+
Assert::true($dom->has('div.test'));
85+
Assert::false($dom->has('p'));
86+
Assert::false($dom->has('.nonexistent'));
87+
});
88+
89+
test('matches() checks if element matches selector', function () {
90+
$dom = DomQuery::fromHtml('<div class="test"><p class="para">Test</p></div>');
91+
$para = $dom->find('p')[0];
92+
93+
Assert::true($para->matches('p'));
94+
Assert::true($para->matches('.para'));
95+
Assert::true($para->matches('p.para'));
96+
Assert::false($para->matches('div'));
97+
Assert::false($para->matches('.test'));
98+
});
99+
100+
test('find() returns empty array for no matches', function () {
101+
$dom = DomQuery::fromHtml('<div></div>');
102+
Assert::same([], $dom->find('nonexistent'));
103+
});
104+
105+
test('handles malformed HTML gracefully', function () {
106+
Assert::error(function () use (&$dom) {
107+
$dom = DomQuery::fromHtml('<div><p>Unclosed paragraph<span>Test</div>');
108+
}, E_USER_WARNING, 'Tester\DomQuery::fromHtml: tree error unexpected-element-in-open-elements-stack%a%');
109+
Assert::true($dom->has('div'));
110+
Assert::true($dom->has('p'));
111+
Assert::true($dom->has('span'));
112+
});
113+
114+
test('handles HTML entities in attributes', function () {
115+
$dom = DomQuery::fromHtml('<div data-test="&quot;quoted&quot;">Test</div>');
116+
Assert::true($dom->find('div')[0]->matches('[data-test="\\"quoted\\""]'));
117+
});

tests/Framework/DomQuery.fromXml.phpt

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ Assert::count(2, $results);
3131
Assert::type(DomQuery::class, $results[0]);
3232
Assert::type(DomQuery::class, $results[1]);
3333

34-
// children
35-
$results = $dom->find(':scope > item');
36-
Assert::count(2, $results);
34+
if (PHP_VERSION_ID < 80400) { // TODO: not yet supported by Lexbor
35+
// children
36+
$results = $dom->find(':scope > item');
37+
Assert::count(2, $results);
38+
}
3739

3840
// has
3941
Assert::true($dom->has('#test1'));

0 commit comments

Comments
 (0)