|
7 | 7 | use DOMElement; |
8 | 8 | use DOMNode; |
9 | 9 | use DOMText; |
| 10 | +use LibXMLError; |
10 | 11 | use PhpOffice\PhpSpreadsheet\Calculation\Calculation; |
11 | 12 | use PhpOffice\PhpSpreadsheet\Cell\Coordinate; |
12 | 13 | use PhpOffice\PhpSpreadsheet\Cell\DataType; |
@@ -132,6 +133,32 @@ class Html extends BaseReader |
132 | 133 | /** @var array<string, bool> */ |
133 | 134 | protected array $rowspan = []; |
134 | 135 |
|
| 136 | + /** |
| 137 | + * Default setting uses current setting of libxml_use_internal_errors. |
| 138 | + * It will probably change to 'true' in a future release. |
| 139 | + */ |
| 140 | + protected ?bool $suppressLoadWarnings = null; |
| 141 | + |
| 142 | + /** @var LibXMLError[] */ |
| 143 | + protected array $libxmlMessages = []; |
| 144 | + |
| 145 | + /** |
| 146 | + * Suppress load warning messages, keeping them available |
| 147 | + * in $this->libxmlMessages(). |
| 148 | + */ |
| 149 | + public function setSuppressLoadWarnings(?bool $suppressLoadWarnings): self |
| 150 | + { |
| 151 | + $this->suppressLoadWarnings = $suppressLoadWarnings; |
| 152 | + |
| 153 | + return $this; |
| 154 | + } |
| 155 | + |
| 156 | + /** @return LibXMLError[] */ |
| 157 | + public function getLibxmlMessages(): array |
| 158 | + { |
| 159 | + return $this->libxmlMessages; |
| 160 | + } |
| 161 | + |
135 | 162 | /** |
136 | 163 | * Create a new HTML Reader instance. |
137 | 164 | */ |
@@ -305,9 +332,11 @@ protected function flushCell(Worksheet $sheet, string $column, int|string $row, |
305 | 332 | $this->dataArray[$row][$column] = $cellContent; // @phpstan-ignore-line |
306 | 333 | } |
307 | 334 | } else { |
308 | | - // We have a Rich Text run |
| 335 | + // We have a Rich Text run. |
| 336 | + // I don't actually see any way to reach this line. |
309 | 337 | // TODO |
310 | | - $this->dataArray[$row][$column] = 'RICH TEXT: ' . StringHelper::convertToString($cellContent); // @phpstan-ignore-line |
| 338 | + // @phpstan-ignore-next-line |
| 339 | + $this->dataArray[$row][$column] = 'RICH TEXT: ' . StringHelper::convertToString($cellContent); // @codeCoverageIgnore |
311 | 340 | } |
312 | 341 | $cellContent = (string) ''; |
313 | 342 | } |
@@ -732,12 +761,23 @@ public function loadIntoExisting(string $filename, Spreadsheet $spreadsheet): Sp |
732 | 761 | $dom = new DOMDocument(); |
733 | 762 |
|
734 | 763 | // Reload the HTML file into the DOM object |
| 764 | + if (is_bool($this->suppressLoadWarnings)) { |
| 765 | + $useErrors = libxml_use_internal_errors($this->suppressLoadWarnings); |
| 766 | + } else { |
| 767 | + $useErrors = null; |
| 768 | + } |
| 769 | + |
735 | 770 | try { |
736 | 771 | $convert = $this->getSecurityScannerOrThrow()->scanFile($filename); |
737 | 772 | $convert = static::replaceNonAsciiIfNeeded($convert); |
738 | 773 | $loaded = ($convert === null) ? false : $dom->loadHTML($convert); |
739 | 774 | } catch (Throwable $e) { |
740 | 775 | $loaded = false; |
| 776 | + } finally { |
| 777 | + $this->libxmlMessages = libxml_get_errors(); |
| 778 | + if (is_bool($useErrors)) { |
| 779 | + libxml_use_internal_errors($useErrors); |
| 780 | + } |
741 | 781 | } |
742 | 782 | if ($loaded === false) { |
743 | 783 | throw new Exception('Failed to load file ' . $filename . ' as a DOM Document', 0, $e ?? null); |
@@ -852,12 +892,23 @@ public function loadFromString(string $content, ?Spreadsheet $spreadsheet = null |
852 | 892 | $dom = new DOMDocument(); |
853 | 893 |
|
854 | 894 | // Reload the HTML file into the DOM object |
| 895 | + if (is_bool($this->suppressLoadWarnings)) { |
| 896 | + $useErrors = libxml_use_internal_errors($this->suppressLoadWarnings); |
| 897 | + } else { |
| 898 | + $useErrors = null; |
| 899 | + } |
| 900 | + |
855 | 901 | try { |
856 | 902 | $convert = $this->getSecurityScannerOrThrow()->scan($content); |
857 | 903 | $convert = static::replaceNonAsciiIfNeeded($convert); |
858 | 904 | $loaded = ($convert === null) ? false : $dom->loadHTML($convert); |
859 | 905 | } catch (Throwable $e) { |
860 | 906 | $loaded = false; |
| 907 | + } finally { |
| 908 | + $this->libxmlMessages = libxml_get_errors(); |
| 909 | + if (is_bool($useErrors)) { |
| 910 | + libxml_use_internal_errors($useErrors); |
| 911 | + } |
861 | 912 | } |
862 | 913 | if ($loaded === false) { |
863 | 914 | throw new Exception('Failed to load content as a DOM Document', 0, $e ?? null); |
|
0 commit comments