@@ -15,42 +15,62 @@ protected function setUp(): void {
1515 /**
1616 * Attempt to decode the file using the default decoding order.
1717 * For files with encodings not included in the COMMON_ENCODINGS array encoding to UTF-8 will fail.
18+ * We then do the conversion again after setting the mb_detect_order value: all conversions should succeed.
1819 * @dataProvider dataFileEncodings
1920 */
20- public function testDefault (string $ file , string $ encoding ) {
21- $ utf8_string = $ this ->encodingService ->encodeToUtf8 (file_get_contents ($ file ));
21+ public function testEncoding (string $ file , string $ encoding ) {
22+ $ utf8String = $ this ->encodingService ->encodeToUtf8 (file_get_contents ($ file ));
2223
2324 // If encoding is not part of the default encodings we can expect it to fail
2425 // It might still succeed because encoding detection is not precise.
25- if (!$ utf8_string && !in_array ($ encoding , EncodingService::COMMON_ENCODINGS , true )) {
26- return ;
26+ if ($ utf8String || $ this ->isSupportedEncoding ($ encoding )) {
27+ $ this ->assertNotNull ($ utf8String );
28+ $ this ->assertNotFalse (mb_detect_encoding ($ utf8String , 'UTF-8 ' , true ));
2729 }
2830
29- $ this ->assertNotNull ($ utf8_string );
30- $ this ->assertNotFalse (mb_detect_encoding ($ utf8_string , 'UTF-8 ' , true ));
31+ $ originalOrder = mb_detect_order ();
32+ $ this ->assertNotFalse (mb_detect_order ($ encoding ));
33+
34+ $ utf8String = $ this ->encodingService ->encodeToUtf8 (file_get_contents ($ file ));
35+ $ this ->assertNotNull ($ utf8String );
36+ $ this ->assertNotFalse (mb_detect_encoding ($ utf8String , 'UTF-8 ' , true ));
37+
38+ mb_detect_order ($ originalOrder );
3139 }
3240
3341 /**
34- * Includes the encoding of the file in the detection order config value.
35- * This means that all files should be successfully encoded to UTF-8 .
42+ * If the encoding is in the list of common encodings we should be able to detect an encoding (it might not be the
43+ * correct encoding due to detection inaccuracies). If not, add the encoding to mb_detect_order .
3644 * @dataProvider dataFileEncodings
3745 */
38- public function testCustomOrder (string $ file , string $ encoding ) {
39- $ original_order = mb_detect_order ();
46+ public function testDetection (string $ file , string $ encoding ) {
47+ $ detectedEncoding = $ this ->encodingService ->detectEncoding (file_get_contents ($ file ));
48+ if ($ this ->isSupportedEncoding ($ encoding )) {
49+ $ this ->assertNotNull ($ detectedEncoding );
50+ }
51+
52+ $ originalOrder = mb_detect_order ();
4053 $ this ->assertNotFalse (mb_detect_order ($ encoding ));
4154
42- $ utf8_string = $ this ->encodingService ->encodeToUtf8 (file_get_contents ($ file ));
43- $ this ->assertNotNull ($ utf8_string );
44- $ this ->assertNotFalse (mb_detect_encoding ($ utf8_string , 'UTF-8 ' , true ));
55+ $ detectedEncoding = $ this ->encodingService ->detectEncoding (file_get_contents ($ file ));
56+ $ this ->assertEquals ($ encoding , $ detectedEncoding );
4557
46- mb_detect_order ($ original_order );
58+ mb_detect_order ($ originalOrder );
4759 }
4860
61+
4962 public function dataFileEncodings (): array {
5063 return [
51- ['./tests/data/iso-8859.txt ' , 'ISO-8859-1 ' ],
64+ ['./tests/data/iso-8859-15 .txt ' , 'ISO-8859-15 ' ],
5265 ['./tests/data/big5.txt ' , 'BIG-5 ' ],
53- ['./tests/data/gbk.txt ' , 'GBK ' ]
66+ ['./tests/data/cp936.txt ' , 'CP936 ' ],
67+ ['./tests/data/utf-16.txt ' , 'UTF-16LE ' ],
68+ ['./tests/data/iso-8859-5.txt ' , 'ISO-8859-5 ' ],
5469 ];
5570 }
71+
72+ private function isSupportedEncoding (string $ encoding ): bool {
73+ return in_array ($ encoding , EncodingService::COMMON_ENCODINGS , true )
74+ || isset (EncodingService::UTF_BOMs[$ encoding ]);
75+ }
5676}
0 commit comments