From 15416583a14325efe0905f0d21e81a099afa2ba5 Mon Sep 17 00:00:00 2001 From: Edwin Date: Thu, 27 Feb 2020 23:17:47 +0000 Subject: [PATCH] use TextIOWrapper to read from bufferedreader instead --- database_sanitizer/dump/mysql.py | 3 +- database_sanitizer/tests/test_dump_mysql.py | 34 +++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/database_sanitizer/dump/mysql.py b/database_sanitizer/dump/mysql.py index 8b4ebc1..8a8f63c 100644 --- a/database_sanitizer/dump/mysql.py +++ b/database_sanitizer/dump/mysql.py @@ -5,6 +5,7 @@ import codecs import re import subprocess +import io from ..utils.mysql import ( decode_mysql_literal, @@ -83,7 +84,7 @@ def sanitize_from_stream(stream, config): of the values stored in the database. :type config: database_sanitizer.config.Configuration|None """ - for line in codecs.getreader("utf-8")(stream): + for line in io.TextIOWrapper(stream, encoding="utf-8"): # Eat the trailing new line. line = line.rstrip("\n") diff --git a/database_sanitizer/tests/test_dump_mysql.py b/database_sanitizer/tests/test_dump_mysql.py index c95cb26..071e79b 100644 --- a/database_sanitizer/tests/test_dump_mysql.py +++ b/database_sanitizer/tests/test_dump_mysql.py @@ -35,6 +35,26 @@ --- Final line after `INSERT INTO` statement. """ +MOCK_MYSQLDUMP_OUTPUT_WITH_U2028 = b""" +--- Fake MySQL database dump + +DROP TABLE IF EXISTS `test`; + +CREATE TABLE `test` ( +`id` int(11) NOT NULL AUTO_INCREMENT, +`created_at` date NOT NULL, +`notes` varchar(255) NOT NULL, +PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; + +INSERT INTO `test` (`id`, `created_at`, `notes`) VALUES \ +(1,'2018-01-01','Test \xe2\x80\xa8 data 1'),\ +(2,'2018-01-02','Test data 2'),\ +(3,'2018-01-03','Test data 3'); + +--- Final line after `INSERT INTO` statement. +""" + INVALID_MOCK_MYSQLDUMP_OUTPUT = b""" --- Fake MySQL database dump @@ -67,6 +87,20 @@ def test_sanitize_from_stream(): (3,'2018-01-03','Sanitized');\ """ in dump_output_lines +def test_sanitize_with_u2028_from_stream(): + stream = io.BytesIO(MOCK_MYSQLDUMP_OUTPUT_WITH_U2028) + config = Configuration() + config.sanitizers["test.notes"] = lambda value: "Sanitized" + dump_output_lines = list(sanitize_from_stream(stream, config)) + + assert "--- Fake MySQL database dump" in dump_output_lines + assert "--- Final line after `INSERT INTO` statement." in dump_output_lines + assert """INSERT INTO `test` (`id`, `created_at`, `notes`) VALUES \ +(1,'2018-01-01','Sanitized'),\ +(2,'2018-01-02','Sanitized'),\ +(3,'2018-01-03','Sanitized');\ +""" in dump_output_lines + def test_skip_table_rows(): stream = io.BytesIO(MOCK_MYSQLDUMP_OUTPUT)