Skip to content

Commit d53a8e5

Browse files
authored
Merge pull request #4761 from oleibman/htmlwarn
Suppress Libxml Warnings in Reader/Html
2 parents 1a01a22 + 2a43cb4 commit d53a8e5

File tree

3 files changed

+102
-13
lines changed

3 files changed

+102
-13
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org). Thia is a
1111

1212
- Passthrough support (with some restrictions) for otherwise unsupported drawing elements. [Issue #4037](https://github.com/PHPOffice/PhpSpreadsheet/issues/4037) [Issue #4704](https://github.com/PHPOffice/PhpSpreadsheet/issues/4704) [PR #4712](https://github.com/PHPOffice/PhpSpreadsheet/pull/4712)
1313
- Set all locale variables at once in a threadsafe manner. [Issue #954](https://github.com/PHPOffice/PhpSpreadsheet/issues/954) [PR #4760](https://github.com/PHPOffice/PhpSpreadsheet/pull/4760)
14+
- Reader/Html add ability to suppress warning messages from loadhtml. [Issue #647](https://github.com/PHPOffice/PhpSpreadsheet/issues/647) [Issue #849](https://github.com/PHPOffice/PhpSpreadsheet/issues/849) [PR #4761](https://github.com/PHPOffice/PhpSpreadsheet/pull/4761)
1415

1516
### Removed
1617

src/PhpSpreadsheet/Reader/Html.php

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use DOMElement;
88
use DOMNode;
99
use DOMText;
10+
use LibXMLError;
1011
use PhpOffice\PhpSpreadsheet\Calculation\Calculation;
1112
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
1213
use PhpOffice\PhpSpreadsheet\Cell\DataType;
@@ -132,6 +133,32 @@ class Html extends BaseReader
132133
/** @var array<string, bool> */
133134
protected array $rowspan = [];
134135

136+
/**
137+
* Default setting uses current setting of libxml_use_internal_errors.
138+
* It will probably change to 'true' in a future release.
139+
*/
140+
protected ?bool $suppressLoadWarnings = null;
141+
142+
/** @var LibXMLError[] */
143+
protected array $libxmlMessages = [];
144+
145+
/**
146+
* Suppress load warning messages, keeping them available
147+
* in $this->libxmlMessages().
148+
*/
149+
public function setSuppressLoadWarnings(?bool $suppressLoadWarnings): self
150+
{
151+
$this->suppressLoadWarnings = $suppressLoadWarnings;
152+
153+
return $this;
154+
}
155+
156+
/** @return LibXMLError[] */
157+
public function getLibxmlMessages(): array
158+
{
159+
return $this->libxmlMessages;
160+
}
161+
135162
/**
136163
* Create a new HTML Reader instance.
137164
*/
@@ -305,9 +332,11 @@ protected function flushCell(Worksheet $sheet, string $column, int|string $row,
305332
$this->dataArray[$row][$column] = $cellContent; // @phpstan-ignore-line
306333
}
307334
} else {
308-
// We have a Rich Text run
335+
// We have a Rich Text run.
336+
// I don't actually see any way to reach this line.
309337
// TODO
310-
$this->dataArray[$row][$column] = 'RICH TEXT: ' . StringHelper::convertToString($cellContent); // @phpstan-ignore-line
338+
// @phpstan-ignore-next-line
339+
$this->dataArray[$row][$column] = 'RICH TEXT: ' . StringHelper::convertToString($cellContent); // @codeCoverageIgnore
311340
}
312341
$cellContent = (string) '';
313342
}
@@ -732,12 +761,23 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp
732761
$dom = new DOMDocument();
733762

734763
// Reload the HTML file into the DOM object
764+
if (is_bool($this->suppressLoadWarnings)) {
765+
$useErrors = libxml_use_internal_errors($this->suppressLoadWarnings);
766+
} else {
767+
$useErrors = null;
768+
}
769+
735770
try {
736771
$convert = $this->getSecurityScannerOrThrow()->scanFile($filename);
737772
$convert = static::replaceNonAsciiIfNeeded($convert);
738773
$loaded = ($convert === null) ? false : $dom->loadHTML($convert);
739774
} catch (Throwable $e) {
740775
$loaded = false;
776+
} finally {
777+
$this->libxmlMessages = libxml_get_errors();
778+
if (is_bool($useErrors)) {
779+
libxml_use_internal_errors($useErrors);
780+
}
741781
}
742782
if ($loaded === false) {
743783
throw new Exception('Failed to load file ' . $filename . ' as a DOM Document', 0, $e ?? null);
@@ -852,12 +892,23 @@ public function loadFromString(string $content, ?Spreadsheet $spreadsheet = null
852892
$dom = new DOMDocument();
853893

854894
// Reload the HTML file into the DOM object
895+
if (is_bool($this->suppressLoadWarnings)) {
896+
$useErrors = libxml_use_internal_errors($this->suppressLoadWarnings);
897+
} else {
898+
$useErrors = null;
899+
}
900+
855901
try {
856902
$convert = $this->getSecurityScannerOrThrow()->scan($content);
857903
$convert = static::replaceNonAsciiIfNeeded($convert);
858904
$loaded = ($convert === null) ? false : $dom->loadHTML($convert);
859905
} catch (Throwable $e) {
860906
$loaded = false;
907+
} finally {
908+
$this->libxmlMessages = libxml_get_errors();
909+
if (is_bool($useErrors)) {
910+
libxml_use_internal_errors($useErrors);
911+
}
861912
}
862913
if ($loaded === false) {
863914
throw new Exception('Failed to load content as a DOM Document', 0, $e ?? null);

tests/PhpSpreadsheetTests/Reader/Html/HtmlLibxmlTest.php

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
namespace PhpOffice\PhpSpreadsheetTests\Reader\Html;
66

77
use PhpOffice\PhpSpreadsheet\Reader\Html;
8+
use PhpOffice\PhpSpreadsheet\Spreadsheet;
89
use PHPUnit\Framework\TestCase;
910

1011
/**
@@ -15,29 +16,65 @@
1516
*/
1617
class HtmlLibxmlTest extends TestCase
1718
{
18-
private bool $useErrors;
19-
20-
protected function setUp(): void
21-
{
22-
$this->useErrors = libxml_use_internal_errors(true);
23-
}
19+
private Spreadsheet $spreadsheet;
2420

2521
protected function tearDown(): void
2622
{
27-
libxml_use_internal_errors($this->useErrors);
23+
$this->spreadsheet->disconnectWorksheets();
24+
unset($this->spreadsheet);
2825
}
2926

3027
public function testLoadInvalidString(): void
3128
{
29+
$libxml = libxml_use_internal_errors();
3230
$html = '<table<>';
33-
(new Html())->loadFromString($html);
34-
self::assertNotEmpty(libxml_get_errors());
31+
$reader = new Html();
32+
$reader->setSuppressLoadWarnings(true);
33+
$this->spreadsheet = $reader->loadFromString($html);
34+
self::assertNotEmpty($reader->getLibxmlMessages());
35+
self::assertSame($libxml, libxml_use_internal_errors());
3536
}
3637

3738
public function testLoadValidString(): void
3839
{
40+
$libxml = libxml_use_internal_errors();
41+
$html = '<table>';
42+
$reader = new Html();
43+
$reader->setSuppressLoadWarnings(true);
44+
$this->spreadsheet = $reader->loadFromString($html);
45+
self::assertEmpty($reader->getLibxmlMessages());
46+
self::assertSame($libxml, libxml_use_internal_errors());
47+
}
48+
49+
public function testLoadValidStringNoSuppress(): void
50+
{
51+
$libxml = libxml_use_internal_errors();
3952
$html = '<table>';
40-
(new Html())->loadFromString($html);
41-
self::assertEmpty(libxml_get_errors());
53+
$reader = new Html();
54+
$this->spreadsheet = $reader->loadFromString($html);
55+
self::assertEmpty($reader->getLibxmlMessages());
56+
self::assertSame($libxml, libxml_use_internal_errors());
57+
}
58+
59+
public function testLoadValidFile(): void
60+
{
61+
$libxml = libxml_use_internal_errors();
62+
$reader = new Html();
63+
$reader->setSuppressLoadWarnings(true);
64+
$file = 'tests/data/Reader/HTML/charset.ISO-8859-1.html4.html';
65+
$this->spreadsheet = $reader->load($file);
66+
self::assertEmpty($reader->getLibxmlMessages());
67+
self::assertSame($libxml, libxml_use_internal_errors());
68+
}
69+
70+
public function testLoadInvalidFile(): void
71+
{
72+
$libxml = libxml_use_internal_errors();
73+
$reader = new Html();
74+
$reader->setSuppressLoadWarnings(true);
75+
$file = 'tests/data/Reader/HTML/badhtml.html';
76+
$this->spreadsheet = $reader->load($file);
77+
self::assertNotEmpty($reader->getLibxmlMessages());
78+
self::assertSame($libxml, libxml_use_internal_errors());
4279
}
4380
}

0 commit comments

Comments
 (0)