From 99c2633cef310bec8c61bde305abd57898707cb3 Mon Sep 17 00:00:00 2001 From: Oleksandr Bazarnov Date: Thu, 20 Feb 2025 18:40:29 +0200 Subject: [PATCH] fix --- .../declarative/decoders/composite_raw_decoder.py | 12 +++++++++++- .../declarative/decoders/test_composite_decoder.py | 4 +++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py b/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py index 389679406..2cb618175 100644 --- a/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py @@ -107,6 +107,16 @@ class CsvParser(Parser): encoding: Optional[str] = "utf-8" delimiter: Optional[str] = "," + def _get_delimiter(self) -> Optional[str]: + """ + Get delimiter from the configuration. Check for the escape character and decode it. + """ + if self.delimiter is not None: + if self.delimiter.startswith("\\"): + self.delimiter = self.delimiter.encode("utf-8").decode("unicode_escape") + + return self.delimiter + def parse( self, data: BufferedIOBase, @@ -115,7 +125,7 @@ def parse( Parse CSV data from decompressed bytes. """ text_data = TextIOWrapper(data, encoding=self.encoding) # type: ignore - reader = csv.DictReader(text_data, delimiter=self.delimiter or ",") + reader = csv.DictReader(text_data, delimiter=self._get_delimiter() or ",") yield from reader diff --git a/unit_tests/sources/declarative/decoders/test_composite_decoder.py b/unit_tests/sources/declarative/decoders/test_composite_decoder.py index 524593b56..745113925 100644 --- a/unit_tests/sources/declarative/decoders/test_composite_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_composite_decoder.py @@ -62,7 +62,9 @@ def test_composite_raw_decoder_gzip_csv_parser(requests_mock, encoding: str): ) response = requests.get("https://airbyte.io/", stream=True) - parser = GzipParser(inner_parser=CsvParser(encoding=encoding, delimiter="\t")) + # the delimiter is set to `\\t` intentionally to test the parsing logic here + parser = GzipParser(inner_parser=CsvParser(encoding=encoding, delimiter="\\t")) + composite_raw_decoder = CompositeRawDecoder(parser=parser) counter = 0 for _ in composite_raw_decoder.decode(response):