From f2d644f472b0f391df458d1fc36c7c7f9d0ccaee Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Fri, 3 Jan 2025 14:23:35 -0800 Subject: [PATCH 01/25] feat: add field_path attribute to Requestoption --- .../declarative/requesters/request_option.py | 45 ++++++++++++++++--- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index d13d20566..6779c8c76 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -4,7 +4,7 @@ from dataclasses import InitVar, dataclass from enum import Enum -from typing import Any, Mapping, Union +from typing import Any, Literal, List, Mapping, Optional, Union from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString @@ -20,19 +20,50 @@ class RequestOptionType(Enum): body_json = "body_json" +@dataclass +class FieldName: + """Represents a direct field name reference""" + value: Union[InterpolatedString, str] + type: Literal["field_name"] = "field_name" + +@dataclass +class FieldPath: + """Represents a path to a nested field""" + value: List[Union[InterpolatedString, str]] + type: Literal["field_path"] = "field_path" + @dataclass class RequestOption: """ Describes an option to set on a request - + Attributes: - field_name (str): Describes the name of the parameter to inject - inject_into (RequestOptionType): Describes where in the HTTP request to inject the parameter + field_name: Describes the name of the parameter to inject. Mutually exclusive with field_path. + field_path: Describes the path to a nested field as a list of field names. Mutually exclusive with field_name. + inject_into: Describes where in the HTTP request to inject the parameter """ - - field_name: Union[InterpolatedString, str] inject_into: RequestOptionType parameters: InitVar[Mapping[str, Any]] + field_name: Optional[Union[InterpolatedString, str]] = None + field_path: Optional[List[Union[InterpolatedString, str]]] = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: - self.field_name = InterpolatedString.create(self.field_name, parameters=parameters) + # Validate inputs + if self.field_name is None and self.field_path is None: + raise ValueError("RequestOption requires either a field_name or field_path") + if self.field_name is not None and self.field_path is not None: + raise ValueError("Only one of field_name or field_path can be provided to RequestOption") + + # Handle interpolation + if self.field_name is not None: + self.field_name = InterpolatedString.create(self.field_name, parameters=parameters) + if self.field_path is not None: + self.field_path = [ + InterpolatedString.create(segment, parameters=parameters) + for segment in self.field_path + ] + + @property + def is_field_path(self) -> bool: + """Returns whether this option uses a field path""" + return self.field_path is not None From 00d203658367c60ec7ab5ed566a85af77fe0a2d0 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Fri, 3 Jan 2025 16:07:00 -0800 Subject: [PATCH 02/25] task: update combine_mappings to handle nested paths --- airbyte_cdk/utils/mapping_helpers.py | 103 +++++++++++++++++------ unit_tests/utils/test_mapping_helpers.py | 26 +++++- 2 files changed, 100 insertions(+), 29 deletions(-) diff --git a/airbyte_cdk/utils/mapping_helpers.py b/airbyte_cdk/utils/mapping_helpers.py index 469fb5e0a..1e3b821a9 100644 --- a/airbyte_cdk/utils/mapping_helpers.py +++ b/airbyte_cdk/utils/mapping_helpers.py @@ -3,43 +3,90 @@ # -from typing import Any, List, Mapping, Optional, Set, Union +from typing import Any, Dict, List, Mapping, Optional, Union +def has_nested_conflict(path1: List[str], value1: Any, path2: List[str], value2: Any) -> bool: + """ + Check if two paths conflict with each other. + e.g. ["a", "b"] conflicts with ["a", "b"] if values differ + e.g. ["a"] conflicts with ["a", "b"] (can't have both a value and a nested structure) + """ + # If one path is a prefix of the other, they conflict + shorter, longer = sorted([path1, path2], key=len) + if longer[:len(shorter)] == shorter: + return True + + # If paths are the same but values differ, they conflict + if path1 == path2 and value1 != value2: + return True + + return False + +def flatten_mapping(mapping: Mapping[str, Any], prefix: List[str] = None) -> List[tuple[List[str], Any]]: + """ + Convert a nested mapping into a list of (path, value) pairs + e.g. {"a": {"b": 1}} -> [(["a", "b"], 1)] + """ + prefix = prefix or [] + result = [] + + for key, value in mapping.items(): + current_path = prefix + [key] + if isinstance(value, Mapping): + result.extend(flatten_mapping(value, current_path)) + else: + result.append((current_path, value)) + + return result + def combine_mappings( mappings: List[Optional[Union[Mapping[str, Any], str]]], ) -> Union[Mapping[str, Any], str]: """ - Combine multiple mappings into a single mapping. If any of the mappings are a string, return - that string. Raise errors in the following cases: - * If there are duplicate keys across mappings + Combine multiple mappings into a single mapping, supporting nested structures. + If any of the mappings are a string, return that string. Raise errors in the following cases: + * If there are conflicting paths across mappings (including nested conflicts) * If there are multiple string mappings * If there are multiple mappings containing keys and one of them is a string """ - all_keys: List[Set[str]] = [] - for part in mappings: - if part is None: - continue - keys = set(part.keys()) if not isinstance(part, str) else set() - all_keys.append(keys) - - string_options = sum(isinstance(mapping, str) for mapping in mappings) - # If more than one mapping is a string, raise a ValueError + # Handle string cases first (maintaining existing behavior) + string_options = sum(isinstance(mapping, str) for mapping in mappings if mapping is not None) if string_options > 1: raise ValueError("Cannot combine multiple string options") - - if string_options == 1 and sum(len(keys) for keys in all_keys) > 0: - raise ValueError("Cannot combine multiple options if one is a string") - - # If any mapping is a string, return it + + non_empty_mappings = [m for m in mappings if m is not None and not (isinstance(m, Mapping) and not m)] + + if string_options == 1: + if len(non_empty_mappings) > 1: + raise ValueError("Cannot combine multiple options if one is a string") + return next(m for m in non_empty_mappings if isinstance(m, str)) + + # Convert all mappings to flat (path, value) pairs for conflict detection + all_paths: List[List[tuple[List[str], Any]]] = [] for mapping in mappings: - if isinstance(mapping, str): - return mapping - - # If there are duplicate keys across mappings, raise a ValueError - intersection = set().union(*all_keys) - if len(intersection) < sum(len(keys) for keys in all_keys): - raise ValueError(f"Duplicate keys found: {intersection}") - - # Return the combined mappings - return {key: value for mapping in mappings if mapping for key, value in mapping.items()} # type: ignore # mapping can't be string here + if mapping is None or not isinstance(mapping, Mapping): + continue + all_paths.append(flatten_mapping(mapping)) + + # Check for conflicts + for i, paths1 in enumerate(all_paths): + for path1, value1 in paths1: + for paths2 in all_paths[i+1:]: + for path2, value2 in paths2: + if has_nested_conflict(path1, value1, path2, value2): + raise ValueError(f"Duplicate keys or nested path conflict found: {'.'.join(path1)} conflicts with {'.'.join(path2)}") + + # If no conflicts, merge all mappings + result: Dict[str, Any] = {} + for mapping in mappings: + if mapping is None or not isinstance(mapping, Mapping): + continue + for path, value in flatten_mapping(mapping): + current = result + *prefix, last = path + for key in prefix: + current = current.setdefault(key, {}) + current[last] = value + + return result diff --git a/unit_tests/utils/test_mapping_helpers.py b/unit_tests/utils/test_mapping_helpers.py index 272ce9b7a..f972a94fe 100644 --- a/unit_tests/utils/test_mapping_helpers.py +++ b/unit_tests/utils/test_mapping_helpers.py @@ -21,7 +21,7 @@ def test_combine_with_string(): def test_overlapping_keys(): mappings = [{"a": 1, "b": 2}, {"b": 3}] - with pytest.raises(ValueError, match="Duplicate keys found"): + with pytest.raises(ValueError, match="Duplicate keys"): combine_mappings(mappings) @@ -53,3 +53,27 @@ def test_combine_with_string_and_empty_mappings(): mappings = ["option", {}] result = combine_mappings(mappings) assert result == "option" + + +def test_nested_merge(): + mappings = [{"a": {"b": 1}}, {"c": {"d": 2}}] + result = combine_mappings(mappings) + assert result == {"a": {"b": 1}, "c": {"d": 2}} + + +def test_nested_conflict(): + mappings = [{"a": {"b": 1}}, {"a": {"b": 2}}] + with pytest.raises(ValueError, match="nested path conflict"): + combine_mappings(mappings) + + +def test_nested_path_conflict(): + mappings = [{"a": 1}, {"a": {"b": 2}}] + with pytest.raises(ValueError, match="nested path conflict"): + combine_mappings(mappings) + + +def test_deep_nested_merge(): + mappings = [{"a": {"b": {"c": 1}}}, {"d": {"e": {"f": 2}}}] + result = combine_mappings(mappings) + assert result == {"a": {"b": {"c": 1}}, "d": {"e": {"f": 2}}} From 9c98313fc8e2dc8a7679490168846750204b1e04 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Mon, 6 Jan 2025 12:15:24 -0800 Subject: [PATCH 03/25] feat: add reusable method to inject value into target dict --- .../declarative/requesters/request_option.py | 46 +++++++++++++++++-- airbyte_cdk/utils/mapping_helpers.py | 39 ++++++++++------ 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index 6779c8c76..f64447443 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -4,9 +4,10 @@ from dataclasses import InitVar, dataclass from enum import Enum -from typing import Any, Literal, List, Mapping, Optional, Union +from typing import Any, List, Literal, Mapping, MutableMapping, Optional, Union from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString +from airbyte_cdk.sources.types import Config class RequestOptionType(Enum): @@ -23,25 +24,30 @@ class RequestOptionType(Enum): @dataclass class FieldName: """Represents a direct field name reference""" + value: Union[InterpolatedString, str] type: Literal["field_name"] = "field_name" + @dataclass class FieldPath: """Represents a path to a nested field""" + value: List[Union[InterpolatedString, str]] type: Literal["field_path"] = "field_path" + @dataclass class RequestOption: """ Describes an option to set on a request - + Attributes: field_name: Describes the name of the parameter to inject. Mutually exclusive with field_path. field_path: Describes the path to a nested field as a list of field names. Mutually exclusive with field_name. inject_into: Describes where in the HTTP request to inject the parameter """ + inject_into: RequestOptionType parameters: InitVar[Mapping[str, Any]] field_name: Optional[Union[InterpolatedString, str]] = None @@ -52,8 +58,10 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: if self.field_name is None and self.field_path is None: raise ValueError("RequestOption requires either a field_name or field_path") if self.field_name is not None and self.field_path is not None: - raise ValueError("Only one of field_name or field_path can be provided to RequestOption") - + raise ValueError( + "Only one of field_name or field_path can be provided to RequestOption" + ) + # Handle interpolation if self.field_name is not None: self.field_name = InterpolatedString.create(self.field_name, parameters=parameters) @@ -67,3 +75,33 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def is_field_path(self) -> bool: """Returns whether this option uses a field path""" return self.field_path is not None + + def inject_into_dict( + self, + target: MutableMapping[str, Any], + value: Any, + config: Config, + ) -> None: + """ + Inject a value into a target dict using either field_name or field_path + + Args: + target: The dict to inject the value into + value: The value to inject + config: The config object to use for interpolation + """ + + if self.is_field_path: + assert self.field_path is not None + current = target + *path_parts, final_key = [ + segment.eval(config=config) if isinstance(segment, InterpolatedString) else segment + for segment in self.field_path + ] + + for part in path_parts: + current = current.setdefault(part, {}) + current[final_key] = value + else: + assert self.field_name is not None + target[self.field_name.eval(config=config) if isinstance(self.field_name, InterpolatedString) else self.field_name] = value diff --git a/airbyte_cdk/utils/mapping_helpers.py b/airbyte_cdk/utils/mapping_helpers.py index 1e3b821a9..e8013119e 100644 --- a/airbyte_cdk/utils/mapping_helpers.py +++ b/airbyte_cdk/utils/mapping_helpers.py @@ -14,32 +14,37 @@ def has_nested_conflict(path1: List[str], value1: Any, path2: List[str], value2: """ # If one path is a prefix of the other, they conflict shorter, longer = sorted([path1, path2], key=len) - if longer[:len(shorter)] == shorter: + if longer[: len(shorter)] == shorter: return True - + # If paths are the same but values differ, they conflict if path1 == path2 and value1 != value2: return True - + return False -def flatten_mapping(mapping: Mapping[str, Any], prefix: List[str] = None) -> List[tuple[List[str], Any]]: + +def flatten_mapping( + mapping: Mapping[str, Any], + prefix: Optional[List[str]] = None +) -> List[tuple[List[str], Any]]: """ Convert a nested mapping into a list of (path, value) pairs e.g. {"a": {"b": 1}} -> [(["a", "b"], 1)] """ prefix = prefix or [] result = [] - + for key, value in mapping.items(): current_path = prefix + [key] if isinstance(value, Mapping): result.extend(flatten_mapping(value, current_path)) else: result.append((current_path, value)) - + return result + def combine_mappings( mappings: List[Optional[Union[Mapping[str, Any], str]]], ) -> Union[Mapping[str, Any], str]: @@ -54,29 +59,33 @@ def combine_mappings( string_options = sum(isinstance(mapping, str) for mapping in mappings if mapping is not None) if string_options > 1: raise ValueError("Cannot combine multiple string options") - - non_empty_mappings = [m for m in mappings if m is not None and not (isinstance(m, Mapping) and not m)] - + + non_empty_mappings = [ + m for m in mappings if m is not None and not (isinstance(m, Mapping) and not m) + ] + if string_options == 1: if len(non_empty_mappings) > 1: raise ValueError("Cannot combine multiple options if one is a string") return next(m for m in non_empty_mappings if isinstance(m, str)) - + # Convert all mappings to flat (path, value) pairs for conflict detection all_paths: List[List[tuple[List[str], Any]]] = [] for mapping in mappings: if mapping is None or not isinstance(mapping, Mapping): continue all_paths.append(flatten_mapping(mapping)) - + # Check for conflicts for i, paths1 in enumerate(all_paths): for path1, value1 in paths1: - for paths2 in all_paths[i+1:]: + for paths2 in all_paths[i + 1 :]: for path2, value2 in paths2: if has_nested_conflict(path1, value1, path2, value2): - raise ValueError(f"Duplicate keys or nested path conflict found: {'.'.join(path1)} conflicts with {'.'.join(path2)}") - + raise ValueError( + f"Duplicate keys or nested path conflict found: {'.'.join(path1)} conflicts with {'.'.join(path2)}" + ) + # If no conflicts, merge all mappings result: Dict[str, Any] = {} for mapping in mappings: @@ -88,5 +97,5 @@ def combine_mappings( for key in prefix: current = current.setdefault(key, {}) current[last] = value - + return result From a524f3e8d3b67b2610e90a6d2e440d212abca0b1 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Mon, 6 Jan 2025 14:48:19 -0800 Subject: [PATCH 04/25] chore: fix test and format --- .../declarative/requesters/request_option.py | 16 ++++++++++------ airbyte_cdk/utils/mapping_helpers.py | 3 +-- .../paginators/test_default_paginator.py | 4 +++- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index f64447443..13957b3a0 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -75,12 +75,12 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def is_field_path(self) -> bool: """Returns whether this option uses a field path""" return self.field_path is not None - + def inject_into_dict( - self, - target: MutableMapping[str, Any], - value: Any, - config: Config, + self, + target: MutableMapping[str, Any], + value: Any, + config: Config, ) -> None: """ Inject a value into a target dict using either field_name or field_path @@ -104,4 +104,8 @@ def inject_into_dict( current[final_key] = value else: assert self.field_name is not None - target[self.field_name.eval(config=config) if isinstance(self.field_name, InterpolatedString) else self.field_name] = value + target[ + self.field_name.eval(config=config) + if isinstance(self.field_name, InterpolatedString) + else self.field_name + ] = value diff --git a/airbyte_cdk/utils/mapping_helpers.py b/airbyte_cdk/utils/mapping_helpers.py index e8013119e..d9b31ce53 100644 --- a/airbyte_cdk/utils/mapping_helpers.py +++ b/airbyte_cdk/utils/mapping_helpers.py @@ -25,8 +25,7 @@ def has_nested_conflict(path1: List[str], value1: Any, path2: List[str], value2: def flatten_mapping( - mapping: Mapping[str, Any], - prefix: Optional[List[str]] = None + mapping: Mapping[str, Any], prefix: Optional[List[str]] = None ) -> List[tuple[List[str], Any]]: """ Convert a nested mapping into a list of (path, value) pairs diff --git a/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py b/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py index 1cd34c42f..996b8abc4 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py @@ -409,7 +409,9 @@ def test_paginator_with_page_option_no_page_size(): DefaultPaginator( page_size_option=MagicMock(), page_token_option=RequestOption( - "limit", RequestOptionType.request_parameter, parameters={} + field_name="limit", + inject_into=RequestOptionType.request_parameter, + parameters={}, ), pagination_strategy=pagination_strategy, config=MagicMock(), From fcaf110b50bfcd0282db99ae72e2fdc0715128c0 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Mon, 6 Jan 2025 15:38:10 -0800 Subject: [PATCH 05/25] feat: update ApiKeyAuthenticator to handle field_path in JSON body --- airbyte_cdk/sources/declarative/auth/token.py | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/airbyte_cdk/sources/declarative/auth/token.py b/airbyte_cdk/sources/declarative/auth/token.py index 12fb899b9..3e34ef274 100644 --- a/airbyte_cdk/sources/declarative/auth/token.py +++ b/airbyte_cdk/sources/declarative/auth/token.py @@ -5,7 +5,7 @@ import base64 import logging from dataclasses import InitVar, dataclass -from typing import Any, Mapping, Union +from typing import Any, Mapping, MutableMapping, Union import requests from cachetools import TTLCache, cached @@ -46,9 +46,19 @@ class ApiKeyAuthenticator(DeclarativeAuthenticator): parameters: InitVar[Mapping[str, Any]] def __post_init__(self, parameters: Mapping[str, Any]) -> None: - self._field_name = InterpolatedString.create( - self.request_option.field_name, parameters=parameters - ) + # Only body_json requests can specify a field_path. All other request types must use a top-level field_name + if ( + self.request_option.field_name is None + and self.request_option.inject_into != RequestOptionType.body_json + ): + raise ValueError( + "ApiKeyAuthenticator requires a top-level field_name for non-body-json requests" + ) + + if self.request_option.field_name is not None: + self._field_name = InterpolatedString.create( + self.request_option.field_name, parameters=parameters + ) @property def auth_header(self) -> str: @@ -60,9 +70,15 @@ def token(self) -> str: return self.token_provider.get_token() def _get_request_options(self, option_type: RequestOptionType) -> Mapping[str, Any]: - options = {} + options: MutableMapping[str, Any] = {} if self.request_option.inject_into == option_type: - options[self._field_name.eval(self.config)] = self.token + if option_type == RequestOptionType.body_json: + # For JSON body injection, we support both field_name and field_path injection. + # This allows nested structures to be injected like {"data": {"auth": {"token": "value"}}} + self.request_option.inject_into_dict(options, self.token, self.config) + else: + assert self._field_name is not None + options[self._field_name.eval(self.config)] = self.token return options def get_request_params(self) -> Mapping[str, Any]: From fd2de58e020f8270b48b55ca4bfc7f51619b7377 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 7 Jan 2025 11:53:39 -0800 Subject: [PATCH 06/25] feat: update DatetimeBasedCursor injection logic --- .../incremental/datetime_based_cursor.py | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index d6d329aec..96a5cfdd3 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -76,6 +76,11 @@ class DatetimeBasedCursor(DeclarativeCursor): cursor_datetime_formats: List[str] = field(default_factory=lambda: []) def __post_init__(self, parameters: Mapping[str, Any]) -> None: + + for option, name in [(self.start_time_option, "start_time_option"), (self.end_time_option, "end_time_option")]: + if option and option.field_name is None and option.inject_into != RequestOptionType.body_json: + raise ValueError(f"DatetimeBasedCursor requires a top-level field_name for non-body-json requests in {name}.") + if (self.step and not self.cursor_granularity) or ( not self.step and self.cursor_granularity ): @@ -365,14 +370,30 @@ def _get_request_options( options: MutableMapping[str, Any] = {} if not stream_slice: return options + if self.start_time_option and self.start_time_option.inject_into == option_type: - options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string - self._partition_field_start.eval(self.config) - ) + start_time_value = stream_slice.get(self._partition_field_start.eval(self.config)) + # Support for injecting nested fields when injecting into body_json + if option_type == RequestOptionType.body_json: + self.start_time_option.inject_into_dict(options, start_time_value, self.config) + else: + assert self.start_time_option.field_name is not None + field_name = self.start_time_option.field_name + key = field_name.eval(self.config) if isinstance(field_name, InterpolatedString) else field_name + options[key] = start_time_value + if self.end_time_option and self.end_time_option.inject_into == option_type: - options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr] - self._partition_field_end.eval(self.config) - ) + end_time_value = stream_slice.get(self._partition_field_end.eval(self.config)) + if option_type == RequestOptionType.body_json: + # For JSON bodies, support both field_name and field_path to enable nested structures + self.end_time_option.inject_into_dict(options, end_time_value, self.config) + else: + # For non-JSON requests, only support top-level field names + assert self.end_time_option.field_name is not None + field_name = self.end_time_option.field_name + key = field_name.eval(self.config) if isinstance(field_name, InterpolatedString) else field_name + options[key] = end_time_value + return options def should_be_synced(self, record: Record) -> bool: From 20cc5d6a21913cacdf2939ca62a94466ca2702aa Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 7 Jan 2025 12:54:18 -0800 Subject: [PATCH 07/25] chore: update declarative schema/generate RequestOption model --- .../declarative_component_schema.yaml | 16 ++- .../models/declarative_component_schema.py | 102 ++++++++++++------ .../declarative/requesters/request_option.py | 16 --- 3 files changed, 80 insertions(+), 54 deletions(-) diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 89c731075..4d6e8a39b 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -2616,18 +2616,17 @@ definitions: enum: [RequestPath] RequestOption: title: Request Option - description: Specifies the key field and where in the request a component's value should be injected. + description: Specifies the key field or path and where in the request a component's value should be injected. type: object required: - type - - field_name - inject_into properties: type: type: string enum: [RequestOption] field_name: - title: Request Option + title: Field Name description: Configures which key should be used in the location that the descriptor is being injected into type: string examples: @@ -2635,6 +2634,17 @@ definitions: interpolation_context: - config - parameters + field_path: + title: Field Path + description: Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries) + type: array + items: + type: string + examples: + - ["data", "viewer", "id"] + interpolation_context: + - config + - parameters inject_into: title: Inject Into description: Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated. diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 5823b34c1..3f27dc811 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -534,7 +534,9 @@ class OAuthAuthenticator(BaseModel): scopes: Optional[List[str]] = Field( None, description="List of scopes that should be granted to the access token.", - examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]], + examples=[ + ["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"] + ], title="Scopes", ) token_expiry_date: Optional[str] = Field( @@ -833,7 +835,9 @@ class Config: access_token_headers: Optional[Dict[str, Any]] = Field( None, description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.", - examples=[{"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"}], + examples=[ + {"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"} + ], title="Access Token Headers", ) access_token_params: Optional[Dict[str, Any]] = Field( @@ -902,24 +906,28 @@ class OAuthConfigSpecification(BaseModel): class Config: extra = Extra.allow - oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field( - None, - description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", - examples=[ - {"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}}, - { - "app_id": { - "type": "string", - "path_in_connector_config": ["info", "app_id"], - } - }, - ], - title="OAuth user input", + oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = ( + Field( + None, + description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", + examples=[ + {"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}}, + { + "app_id": { + "type": "string", + "path_in_connector_config": ["info", "app_id"], + } + }, + ], + title="OAuth user input", + ) ) - oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field( - None, - description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}\n + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}\n + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}\n + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{auth_code_key}": "{{auth_code_key}}",\n "{client_id_key}": "{{client_id_key}}",\n "{client_secret_key}": "{{client_secret_key}}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }', - title="DeclarativeOAuth Connector Specification", + oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = ( + Field( + None, + description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}\n + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}\n + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}\n + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{auth_code_key}": "{{auth_code_key}}",\n "{client_id_key}": "{{client_id_key}}",\n "{client_secret_key}": "{{client_secret_key}}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }', + title="DeclarativeOAuth Connector Specification", + ) ) complete_oauth_output_specification: Optional[Dict[str, Any]] = Field( None, @@ -937,7 +945,9 @@ class Config: complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }", - examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}], + examples=[ + {"client_id": {"type": "string"}, "client_secret": {"type": "string"}} + ], title="OAuth input specification", ) complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field( @@ -1057,11 +1067,17 @@ class InjectInto(Enum): class RequestOption(BaseModel): type: Literal["RequestOption"] - field_name: str = Field( - ..., + field_name: Optional[str] = Field( + None, description="Configures which key should be used in the location that the descriptor is being injected into", examples=["segment_id"], - title="Request Option", + title="Field Name", + ) + field_path: Optional[List[str]] = Field( + None, + description="Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)", + examples=[["data", "viewer", "id"]], + title="Field Path", ) inject_into: InjectInto = Field( ..., @@ -1671,12 +1687,16 @@ class Config: description="Component used to coordinate how records are extracted across stream slices and request pages.", title="Retriever", ) - incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field( - None, - description="Component used to fetch data incrementally based on a time field in the data.", - title="Incremental Sync", + incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = ( + Field( + None, + description="Component used to fetch data incrementally based on a time field in the data.", + title="Incremental Sync", + ) + ) + name: Optional[str] = Field( + "", description="The stream name.", example=["Users"], title="Name" ) - name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name") primary_key: Optional[PrimaryKey] = Field( "", description="The primary key of the stream.", title="Primary Key" ) @@ -1944,7 +1964,11 @@ class SimpleRetriever(BaseModel): CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], + List[ + Union[ + CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter + ] + ], ] ] = Field( [], @@ -1987,7 +2011,9 @@ class AsyncRetriever(BaseModel): ) download_extractor: Optional[ Union[CustomRecordExtractor, DpathExtractor, ResponseToFileExtractor] - ] = Field(None, description="Responsible for fetching the records from provided urls.") + ] = Field( + None, description="Responsible for fetching the records from provided urls." + ) creation_requester: Union[CustomRequester, HttpRequester] = Field( ..., description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.", @@ -2017,7 +2043,11 @@ class AsyncRetriever(BaseModel): CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], + List[ + Union[ + CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter + ] + ], ] ] = Field( [], @@ -2081,10 +2111,12 @@ class DynamicDeclarativeStream(BaseModel): stream_template: DeclarativeStream = Field( ..., description="Reference to the stream template.", title="Stream Template" ) - components_resolver: Union[HttpComponentsResolver, ConfigComponentsResolver] = Field( - ..., - description="Component resolve and populates stream templates with components values.", - title="Components Resolver", + components_resolver: Union[HttpComponentsResolver, ConfigComponentsResolver] = ( + Field( + ..., + description="Component resolve and populates stream templates with components values.", + title="Components Resolver", + ) ) diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index 13957b3a0..e8235b47c 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -21,22 +21,6 @@ class RequestOptionType(Enum): body_json = "body_json" -@dataclass -class FieldName: - """Represents a direct field name reference""" - - value: Union[InterpolatedString, str] - type: Literal["field_name"] = "field_name" - - -@dataclass -class FieldPath: - """Represents a path to a nested field""" - - value: List[Union[InterpolatedString, str]] - type: Literal["field_path"] = "field_path" - - @dataclass class RequestOption: """ From 7292b5777f67af8c24c44eede3cc1e52a3d99292 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 7 Jan 2025 13:48:07 -0800 Subject: [PATCH 08/25] fix: add type validations and unit tests --- .../declarative/requesters/request_option.py | 16 +- .../paginators/test_request_option.py | 222 ++++++++++++++++++ 2 files changed, 237 insertions(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index e8235b47c..7d00ce8d4 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -41,10 +41,24 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: # Validate inputs if self.field_name is None and self.field_path is None: raise ValueError("RequestOption requires either a field_name or field_path") + if self.field_name is not None and self.field_path is not None: raise ValueError( "Only one of field_name or field_path can be provided to RequestOption" ) + + if self.field_name is not None and not isinstance(self.field_name, (str, InterpolatedString)): + raise TypeError(f"field_name expects a string, but got {type(self.field_name)}") + + if self.field_path is not None: + if not isinstance(self.field_path, list): + raise TypeError(f"field_path expects a list, but got {type(self.field_path)}") + for value in self.field_path: + if not isinstance(value, (str, InterpolatedString)): + raise TypeError(f"field_path values must be strings, got {type(value)}") + + if self.field_path is not None and self.inject_into != RequestOptionType.body_json: + raise ValueError("Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types.") # Handle interpolation if self.field_name is not None: @@ -79,7 +93,7 @@ def inject_into_dict( assert self.field_path is not None current = target *path_parts, final_key = [ - segment.eval(config=config) if isinstance(segment, InterpolatedString) else segment + str(segment.eval(config=config) if isinstance(segment, InterpolatedString) else segment) for segment in self.field_path ] diff --git a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py index 69866773c..9e1b25ce8 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py @@ -3,6 +3,7 @@ # import pytest +from typing import Any, Dict, List, Optional from airbyte_cdk.sources.declarative.requesters.request_option import ( RequestOption, @@ -55,3 +56,224 @@ def test_request_option(option_type: RequestOptionType, field_name: str, expecte ) assert request_option.field_name.eval({"cursor_field": "created_at"}) == expected_field_name assert request_option.inject_into == option_type + + +def test_request_option_validation(): + """Test that RequestOption properly validates its inputs""" + # Should raise when neither field_name nor field_path is provided + with pytest.raises(ValueError, match="RequestOption requires either a field_name or field_path"): + RequestOption(inject_into=RequestOptionType.body_json, parameters={}) + + # Should raise when both field_name and field_path are provided + with pytest.raises(ValueError, match="Only one of field_name or field_path can be provided"): + RequestOption( + field_name="field", + field_path=["data", "field"], + inject_into=RequestOptionType.body_json, + parameters={} + ) + + # Should raise when field_path is used with non-body-json request type + with pytest.raises(ValueError, match="Nested field injection is only supported for body JSON injection."): + RequestOption( + field_path=["data", "field"], + inject_into=RequestOptionType.header, + parameters={} + ) + + +def test_inject_into_dict(): + """Test the inject_into_dict functionality""" + config = {"base_field": "value"} + + # Test with field_name + request_option = RequestOption( + field_name="test_{{ config['base_field'] }}", + inject_into=RequestOptionType.body_json, + parameters={} + ) + target: Dict[str, Any] = {} + request_option.inject_into_dict(target, "test_value", config) + assert target == {"test_value": "test_value"} + + # Test with field_path + request_option = RequestOption( + field_path=["data", "nested_{{ config['base_field'] }}", "field"], + inject_into=RequestOptionType.body_json, + parameters={} + ) + target = {} + request_option.inject_into_dict(target, "test_value", config) + assert target == {"data": {"nested_value": {"field": "test_value"}}} + +@pytest.mark.parametrize( + "field_name, field_path, inject_into, expected_is_field_path", + [ + ("field", None, RequestOptionType.body_json, False), + (None, ["data", "field"], RequestOptionType.body_json, True), + ] +) +def test_is_field_path( + field_name: Optional[str], + field_path: Optional[List[str]], + inject_into: RequestOptionType, + expected_is_field_path: bool +): + """Test the is_field_path property""" + request_option = RequestOption( + field_name=field_name, + field_path=field_path, + inject_into=inject_into, + parameters={} + ) + assert request_option.is_field_path == expected_is_field_path + +def test_interpolation_in_field_path(): + """Test that interpolation works in field paths""" + config = {"nested": "user"} + parameters = {"type": "profile"} + + request_option = RequestOption( + field_path=["data", "{{ config['nested'] }}", "{{ parameters['type'] }}"], + inject_into=RequestOptionType.body_json, + parameters=parameters + ) + + target = {} + request_option.inject_into_dict(target, "test_value", config) + assert target == {"data": {"user": {"profile": "test_value"}}} + + +def test_inject_into_dict_multiple_injections(): + """Test injecting multiple values into the same target dict""" + config = {"base": "test"} + + # Create target with existing data + target = {"existing": "value"} + + # First injection with field_name + option1 = RequestOption( + field_name="field1", + inject_into=RequestOptionType.body_json, + parameters={} + ) + option1.inject_into_dict(target, "value1", config) + + # Second injection with nested path + option2 = RequestOption( + field_path=["data", "nested", "field2"], + inject_into=RequestOptionType.body_json, + parameters={} + ) + option2.inject_into_dict(target, "value2", config) + + assert target == { + "existing": "value", + "field1": "value1", + "data": { + "nested": { + "field2": "value2" + } + } + } + +def test_inject_into_dict_deep_nesting(): + """Test injecting values into deeply nested structures""" + config = {} + target = {} + + request_option = RequestOption( + field_path=["level1", "level2", "level3", "level4", "field"], + inject_into=RequestOptionType.body_json, + parameters={} + ) + request_option.inject_into_dict(target, "deep_value", config) + + assert target == { + "level1": { + "level2": { + "level3": { + "level4": { + "field": "deep_value" + } + } + } + } + } + +def test_inject_into_dict_various_value_types(): + """Test injecting different types of values""" + config = {} + test_cases = [ + (42, "integer"), + (3.14, "float"), + (True, "boolean"), + (["a", "b", "c"], "list"), + ({"key": "value"}, "dict"), + (None, "none") + ] + + for value, name in test_cases: + target = {} + request_option = RequestOption( + field_path=["data", name], + inject_into=RequestOptionType.body_json, + parameters={} + ) + request_option.inject_into_dict(target, value, config) + assert target["data"][name] == value + +def test_inject_into_dict_with_interpolation_combinations(): + """Test various combinations of static and interpolated path segments""" + config = {"user_type": "admin", "section": "profile"} + parameters = {"id": "12345"} + + request_option = RequestOption( + field_path=[ + "data", + "{{ config['user_type'] }}", + "{{ parameters['id'] }}", + "{{ config['section'] }}", + "details" + ], + inject_into=RequestOptionType.body_json, + parameters=parameters + ) + + target = {} + request_option.inject_into_dict(target, "test_value", config) + + assert target == { + "data": { + "admin": { + "12345": { + "profile": { + "details": "test_value" + } + } + } + } + } + +def test_inject_into_dict_error_handling(): + """Test error cases for inject_into_dict""" + config = {} + + # Test with invalid target type + with pytest.raises(TypeError): + request_option = RequestOption( + field_name="test", + inject_into=RequestOptionType.body_json, + parameters={} + ) + request_option.inject_into_dict(None, "value", config) # type: ignore + + # Test with invalid path segments + with pytest.raises(TypeError): + request_option = RequestOption( + field_path={"this": "should", "be": "a list"}, # type: ignore + inject_into=RequestOptionType.body_json, + parameters={} + ) + target = {} + request_option.inject_into_dict(target, "value", config) From ebb1791741af335d999fe0432e30930394edb97b Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 7 Jan 2025 13:49:00 -0800 Subject: [PATCH 09/25] chore: format --- .../incremental/datetime_based_cursor.py | 30 ++++- .../models/declarative_component_schema.py | 90 +++++-------- .../declarative/requesters/request_option.py | 20 ++- .../paginators/test_request_option.py | 122 +++++++----------- 4 files changed, 118 insertions(+), 144 deletions(-) diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 96a5cfdd3..93a7fbb7d 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -76,10 +76,18 @@ class DatetimeBasedCursor(DeclarativeCursor): cursor_datetime_formats: List[str] = field(default_factory=lambda: []) def __post_init__(self, parameters: Mapping[str, Any]) -> None: - - for option, name in [(self.start_time_option, "start_time_option"), (self.end_time_option, "end_time_option")]: - if option and option.field_name is None and option.inject_into != RequestOptionType.body_json: - raise ValueError(f"DatetimeBasedCursor requires a top-level field_name for non-body-json requests in {name}.") + for option, name in [ + (self.start_time_option, "start_time_option"), + (self.end_time_option, "end_time_option"), + ]: + if ( + option + and option.field_name is None + and option.inject_into != RequestOptionType.body_json + ): + raise ValueError( + f"DatetimeBasedCursor requires a top-level field_name for non-body-json requests in {name}." + ) if (self.step and not self.cursor_granularity) or ( not self.step and self.cursor_granularity @@ -370,7 +378,7 @@ def _get_request_options( options: MutableMapping[str, Any] = {} if not stream_slice: return options - + if self.start_time_option and self.start_time_option.inject_into == option_type: start_time_value = stream_slice.get(self._partition_field_start.eval(self.config)) # Support for injecting nested fields when injecting into body_json @@ -379,7 +387,11 @@ def _get_request_options( else: assert self.start_time_option.field_name is not None field_name = self.start_time_option.field_name - key = field_name.eval(self.config) if isinstance(field_name, InterpolatedString) else field_name + key = ( + field_name.eval(self.config) + if isinstance(field_name, InterpolatedString) + else field_name + ) options[key] = start_time_value if self.end_time_option and self.end_time_option.inject_into == option_type: @@ -391,7 +403,11 @@ def _get_request_options( # For non-JSON requests, only support top-level field names assert self.end_time_option.field_name is not None field_name = self.end_time_option.field_name - key = field_name.eval(self.config) if isinstance(field_name, InterpolatedString) else field_name + key = ( + field_name.eval(self.config) + if isinstance(field_name, InterpolatedString) + else field_name + ) options[key] = end_time_value return options diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 3f27dc811..c8c3f7c7e 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -534,9 +534,7 @@ class OAuthAuthenticator(BaseModel): scopes: Optional[List[str]] = Field( None, description="List of scopes that should be granted to the access token.", - examples=[ - ["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"] - ], + examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]], title="Scopes", ) token_expiry_date: Optional[str] = Field( @@ -835,9 +833,7 @@ class Config: access_token_headers: Optional[Dict[str, Any]] = Field( None, description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.", - examples=[ - {"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"} - ], + examples=[{"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"}], title="Access Token Headers", ) access_token_params: Optional[Dict[str, Any]] = Field( @@ -906,28 +902,24 @@ class OAuthConfigSpecification(BaseModel): class Config: extra = Extra.allow - oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = ( - Field( - None, - description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", - examples=[ - {"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}}, - { - "app_id": { - "type": "string", - "path_in_connector_config": ["info", "app_id"], - } - }, - ], - title="OAuth user input", - ) + oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field( + None, + description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", + examples=[ + {"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}}, + { + "app_id": { + "type": "string", + "path_in_connector_config": ["info", "app_id"], + } + }, + ], + title="OAuth user input", ) - oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = ( - Field( - None, - description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}\n + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}\n + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}\n + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{auth_code_key}": "{{auth_code_key}}",\n "{client_id_key}": "{{client_id_key}}",\n "{client_secret_key}": "{{client_secret_key}}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }', - title="DeclarativeOAuth Connector Specification", - ) + oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field( + None, + description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}\n + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}\n + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}\n + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{auth_code_key}": "{{auth_code_key}}",\n "{client_id_key}": "{{client_id_key}}",\n "{client_secret_key}": "{{client_secret_key}}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }', + title="DeclarativeOAuth Connector Specification", ) complete_oauth_output_specification: Optional[Dict[str, Any]] = Field( None, @@ -945,9 +937,7 @@ class Config: complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }", - examples=[ - {"client_id": {"type": "string"}, "client_secret": {"type": "string"}} - ], + examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}], title="OAuth input specification", ) complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field( @@ -1687,16 +1677,12 @@ class Config: description="Component used to coordinate how records are extracted across stream slices and request pages.", title="Retriever", ) - incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = ( - Field( - None, - description="Component used to fetch data incrementally based on a time field in the data.", - title="Incremental Sync", - ) - ) - name: Optional[str] = Field( - "", description="The stream name.", example=["Users"], title="Name" + incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field( + None, + description="Component used to fetch data incrementally based on a time field in the data.", + title="Incremental Sync", ) + name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name") primary_key: Optional[PrimaryKey] = Field( "", description="The primary key of the stream.", title="Primary Key" ) @@ -1964,11 +1950,7 @@ class SimpleRetriever(BaseModel): CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[ - Union[ - CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter - ] - ], + List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], ] ] = Field( [], @@ -2011,9 +1993,7 @@ class AsyncRetriever(BaseModel): ) download_extractor: Optional[ Union[CustomRecordExtractor, DpathExtractor, ResponseToFileExtractor] - ] = Field( - None, description="Responsible for fetching the records from provided urls." - ) + ] = Field(None, description="Responsible for fetching the records from provided urls.") creation_requester: Union[CustomRequester, HttpRequester] = Field( ..., description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.", @@ -2043,11 +2023,7 @@ class AsyncRetriever(BaseModel): CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[ - Union[ - CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter - ] - ], + List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], ] ] = Field( [], @@ -2111,12 +2087,10 @@ class DynamicDeclarativeStream(BaseModel): stream_template: DeclarativeStream = Field( ..., description="Reference to the stream template.", title="Stream Template" ) - components_resolver: Union[HttpComponentsResolver, ConfigComponentsResolver] = ( - Field( - ..., - description="Component resolve and populates stream templates with components values.", - title="Components Resolver", - ) + components_resolver: Union[HttpComponentsResolver, ConfigComponentsResolver] = Field( + ..., + description="Component resolve and populates stream templates with components values.", + title="Components Resolver", ) diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index 7d00ce8d4..d8e37bfc6 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -41,15 +41,17 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: # Validate inputs if self.field_name is None and self.field_path is None: raise ValueError("RequestOption requires either a field_name or field_path") - + if self.field_name is not None and self.field_path is not None: raise ValueError( "Only one of field_name or field_path can be provided to RequestOption" ) - - if self.field_name is not None and not isinstance(self.field_name, (str, InterpolatedString)): + + if self.field_name is not None and not isinstance( + self.field_name, (str, InterpolatedString) + ): raise TypeError(f"field_name expects a string, but got {type(self.field_name)}") - + if self.field_path is not None: if not isinstance(self.field_path, list): raise TypeError(f"field_path expects a list, but got {type(self.field_path)}") @@ -58,7 +60,9 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: raise TypeError(f"field_path values must be strings, got {type(value)}") if self.field_path is not None and self.inject_into != RequestOptionType.body_json: - raise ValueError("Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types.") + raise ValueError( + "Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types." + ) # Handle interpolation if self.field_name is not None: @@ -93,7 +97,11 @@ def inject_into_dict( assert self.field_path is not None current = target *path_parts, final_key = [ - str(segment.eval(config=config) if isinstance(segment, InterpolatedString) else segment) + str( + segment.eval(config=config) + if isinstance(segment, InterpolatedString) + else segment + ) for segment in self.field_path ] diff --git a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py index 9e1b25ce8..7881ccaf5 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py @@ -2,9 +2,10 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -import pytest from typing import Any, Dict, List, Optional +import pytest + from airbyte_cdk.sources.declarative.requesters.request_option import ( RequestOption, RequestOptionType, @@ -61,7 +62,9 @@ def test_request_option(option_type: RequestOptionType, field_name: str, expecte def test_request_option_validation(): """Test that RequestOption properly validates its inputs""" # Should raise when neither field_name nor field_path is provided - with pytest.raises(ValueError, match="RequestOption requires either a field_name or field_path"): + with pytest.raises( + ValueError, match="RequestOption requires either a field_name or field_path" + ): RequestOption(inject_into=RequestOptionType.body_json, parameters={}) # Should raise when both field_name and field_path are provided @@ -70,27 +73,27 @@ def test_request_option_validation(): field_name="field", field_path=["data", "field"], inject_into=RequestOptionType.body_json, - parameters={} + parameters={}, ) # Should raise when field_path is used with non-body-json request type - with pytest.raises(ValueError, match="Nested field injection is only supported for body JSON injection."): + with pytest.raises( + ValueError, match="Nested field injection is only supported for body JSON injection." + ): RequestOption( - field_path=["data", "field"], - inject_into=RequestOptionType.header, - parameters={} + field_path=["data", "field"], inject_into=RequestOptionType.header, parameters={} ) def test_inject_into_dict(): """Test the inject_into_dict functionality""" config = {"base_field": "value"} - + # Test with field_name request_option = RequestOption( field_name="test_{{ config['base_field'] }}", inject_into=RequestOptionType.body_json, - parameters={} + parameters={}, ) target: Dict[str, Any] = {} request_option.inject_into_dict(target, "test_value", config) @@ -100,45 +103,44 @@ def test_inject_into_dict(): request_option = RequestOption( field_path=["data", "nested_{{ config['base_field'] }}", "field"], inject_into=RequestOptionType.body_json, - parameters={} + parameters={}, ) target = {} request_option.inject_into_dict(target, "test_value", config) assert target == {"data": {"nested_value": {"field": "test_value"}}} + @pytest.mark.parametrize( "field_name, field_path, inject_into, expected_is_field_path", [ ("field", None, RequestOptionType.body_json, False), (None, ["data", "field"], RequestOptionType.body_json, True), - ] + ], ) def test_is_field_path( field_name: Optional[str], field_path: Optional[List[str]], inject_into: RequestOptionType, - expected_is_field_path: bool + expected_is_field_path: bool, ): """Test the is_field_path property""" request_option = RequestOption( - field_name=field_name, - field_path=field_path, - inject_into=inject_into, - parameters={} + field_name=field_name, field_path=field_path, inject_into=inject_into, parameters={} ) assert request_option.is_field_path == expected_is_field_path + def test_interpolation_in_field_path(): """Test that interpolation works in field paths""" config = {"nested": "user"} parameters = {"type": "profile"} - + request_option = RequestOption( field_path=["data", "{{ config['nested'] }}", "{{ parameters['type'] }}"], inject_into=RequestOptionType.body_json, - parameters=parameters + parameters=parameters, ) - + target = {} request_option.inject_into_dict(target, "test_value", config) assert target == {"data": {"user": {"profile": "test_value"}}} @@ -147,59 +149,45 @@ def test_interpolation_in_field_path(): def test_inject_into_dict_multiple_injections(): """Test injecting multiple values into the same target dict""" config = {"base": "test"} - + # Create target with existing data target = {"existing": "value"} - + # First injection with field_name option1 = RequestOption( - field_name="field1", - inject_into=RequestOptionType.body_json, - parameters={} + field_name="field1", inject_into=RequestOptionType.body_json, parameters={} ) option1.inject_into_dict(target, "value1", config) - + # Second injection with nested path option2 = RequestOption( field_path=["data", "nested", "field2"], inject_into=RequestOptionType.body_json, - parameters={} + parameters={}, ) option2.inject_into_dict(target, "value2", config) - + assert target == { "existing": "value", "field1": "value1", - "data": { - "nested": { - "field2": "value2" - } - } + "data": {"nested": {"field2": "value2"}}, } + def test_inject_into_dict_deep_nesting(): """Test injecting values into deeply nested structures""" config = {} target = {} - + request_option = RequestOption( field_path=["level1", "level2", "level3", "level4", "field"], inject_into=RequestOptionType.body_json, - parameters={} + parameters={}, ) request_option.inject_into_dict(target, "deep_value", config) - - assert target == { - "level1": { - "level2": { - "level3": { - "level4": { - "field": "deep_value" - } - } - } - } - } + + assert target == {"level1": {"level2": {"level3": {"level4": {"field": "deep_value"}}}}} + def test_inject_into_dict_various_value_types(): """Test injecting different types of values""" @@ -210,70 +198,58 @@ def test_inject_into_dict_various_value_types(): (True, "boolean"), (["a", "b", "c"], "list"), ({"key": "value"}, "dict"), - (None, "none") + (None, "none"), ] - + for value, name in test_cases: target = {} request_option = RequestOption( - field_path=["data", name], - inject_into=RequestOptionType.body_json, - parameters={} + field_path=["data", name], inject_into=RequestOptionType.body_json, parameters={} ) request_option.inject_into_dict(target, value, config) assert target["data"][name] == value + def test_inject_into_dict_with_interpolation_combinations(): """Test various combinations of static and interpolated path segments""" config = {"user_type": "admin", "section": "profile"} parameters = {"id": "12345"} - + request_option = RequestOption( field_path=[ "data", "{{ config['user_type'] }}", "{{ parameters['id'] }}", "{{ config['section'] }}", - "details" + "details", ], inject_into=RequestOptionType.body_json, - parameters=parameters + parameters=parameters, ) - + target = {} request_option.inject_into_dict(target, "test_value", config) - - assert target == { - "data": { - "admin": { - "12345": { - "profile": { - "details": "test_value" - } - } - } - } - } + + assert target == {"data": {"admin": {"12345": {"profile": {"details": "test_value"}}}}} + def test_inject_into_dict_error_handling(): """Test error cases for inject_into_dict""" config = {} - + # Test with invalid target type with pytest.raises(TypeError): request_option = RequestOption( - field_name="test", - inject_into=RequestOptionType.body_json, - parameters={} + field_name="test", inject_into=RequestOptionType.body_json, parameters={} ) request_option.inject_into_dict(None, "value", config) # type: ignore - + # Test with invalid path segments with pytest.raises(TypeError): request_option = RequestOption( field_path={"this": "should", "be": "a list"}, # type: ignore inject_into=RequestOptionType.body_json, - parameters={} + parameters={}, ) target = {} request_option.inject_into_dict(target, "value", config) From 5051d0db9d6bd8ca944f0764d869ec518d28bb7a Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 7 Jan 2025 14:05:39 -0800 Subject: [PATCH 10/25] refactor: update tests --- .../paginators/test_request_option.py | 299 +++++++++--------- 1 file changed, 143 insertions(+), 156 deletions(-) diff --git a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py index 7881ccaf5..0feb4b5d0 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py @@ -1,8 +1,4 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Type, Union import pytest @@ -36,22 +32,11 @@ (RequestOptionType.body_data, "since_{{ config['cursor_field'] }}", "since_created_at"), (RequestOptionType.body_json, "since_{{ config['cursor_field'] }}", "since_created_at"), ], - ids=[ - "test_limit_param_with_field_name", - "test_limit_header_with_field_name", - "test_limit_data_with_field_name", - "test_limit_json_with_field_name", - "test_limit_param_with_parameters_interpolation", - "test_limit_header_with_parameters_interpolation", - "test_limit_data_with_parameters_interpolation", - "test_limit_json_with_parameters_interpolation", - "test_limit_param_with_config_interpolation", - "test_limit_header_with_config_interpolation", - "test_limit_data_with_config_interpolation", - "test_limit_json_with_config_interpolation", - ], ) -def test_request_option(option_type: RequestOptionType, field_name: str, expected_field_name: str): +def test_request_option_field_name( + option_type: RequestOptionType, field_name: str, expected_field_name: str +): + """Test the original field_name functionality""" request_option = RequestOption( inject_into=option_type, field_name=field_name, parameters={"cursor_field": "updated_at"} ) @@ -59,55 +44,156 @@ def test_request_option(option_type: RequestOptionType, field_name: str, expecte assert request_option.inject_into == option_type -def test_request_option_validation(): - """Test that RequestOption properly validates its inputs""" - # Should raise when neither field_name nor field_path is provided - with pytest.raises( - ValueError, match="RequestOption requires either a field_name or field_path" - ): - RequestOption(inject_into=RequestOptionType.body_json, parameters={}) - - # Should raise when both field_name and field_path are provided - with pytest.raises(ValueError, match="Only one of field_name or field_path can be provided"): - RequestOption( - field_name="field", - field_path=["data", "field"], - inject_into=RequestOptionType.body_json, - parameters={}, - ) - - # Should raise when field_path is used with non-body-json request type - with pytest.raises( - ValueError, match="Nested field injection is only supported for body JSON injection." - ): +@pytest.mark.parametrize( + "field_name, field_path, inject_into, error_type, error_message", + [ + ( + None, + None, + RequestOptionType.body_json, + ValueError, + "RequestOption requires either a field_name or field_path", + ), + ( + "field", + ["data", "field"], + RequestOptionType.body_json, + ValueError, + "Only one of field_name or field_path can be provided", + ), + ( + None, + ["data", "field"], + RequestOptionType.header, + ValueError, + "Nested field injection is only supported for body JSON injection.", + ), + ( + None, + {"this": "should", "be": "a list"}, + RequestOptionType.body_json, + TypeError, + "field_path expects a list", + ), + ], +) +def test_request_option_validation( + field_name: Optional[str], + field_path: Any, + inject_into: RequestOptionType, + error_type: Type[Exception], + error_message: str, +): + """Test various validation cases for RequestOption""" + with pytest.raises(error_type, match=error_message): RequestOption( - field_path=["data", "field"], inject_into=RequestOptionType.header, parameters={} + field_name=field_name, field_path=field_path, inject_into=inject_into, parameters={} ) -def test_inject_into_dict(): - """Test the inject_into_dict functionality""" +@pytest.mark.parametrize( + "request_option_args, value, expected_result", + [ + # Basic field_name test + ( + { + "field_name": "test_{{ config['base_field'] }}", + "inject_into": RequestOptionType.body_json, + }, + "test_value", + {"test_value": "test_value"}, + ), + # Basic field_path test + ( + { + "field_path": ["data", "nested_{{ config['base_field'] }}", "field"], + "inject_into": RequestOptionType.body_json, + }, + "test_value", + {"data": {"nested_value": {"field": "test_value"}}}, + ), + # Deep nesting test + ( + { + "field_path": ["level1", "level2", "level3", "level4", "field"], + "inject_into": RequestOptionType.body_json, + }, + "deep_value", + {"level1": {"level2": {"level3": {"level4": {"field": "deep_value"}}}}}, + ), + ], +) +def test_inject_into_dict_cases( + request_option_args: Dict[str, Any], value: Any, expected_result: Dict[str, Any] +): + """Test various injection cases""" config = {"base_field": "value"} + target: Dict[str, Any] = {} + + request_option = RequestOption(**request_option_args, parameters={}) + request_option.inject_into_dict(target, value, config) + assert target == expected_result + - # Test with field_name +@pytest.mark.parametrize( + "config, parameters, field_path, expected_structure", + [ + ( + {"nested": "user"}, + {"type": "profile"}, + ["data", "{{ config['nested'] }}", "{{ parameters['type'] }}"], + {"data": {"user": {"profile": "test_value"}}}, + ), + ( + {"user_type": "admin", "section": "profile"}, + {"id": "12345"}, + [ + "data", + "{{ config['user_type'] }}", + "{{ parameters['id'] }}", + "{{ config['section'] }}", + "details", + ], + {"data": {"admin": {"12345": {"profile": {"details": "test_value"}}}}}, + ), + ], +) +def test_interpolation_cases( + config: Dict[str, Any], + parameters: Dict[str, Any], + field_path: List[str], + expected_structure: Dict[str, Any], +): + """Test various interpolation scenarios""" request_option = RequestOption( - field_name="test_{{ config['base_field'] }}", - inject_into=RequestOptionType.body_json, - parameters={}, + field_path=field_path, inject_into=RequestOptionType.body_json, parameters=parameters ) target: Dict[str, Any] = {} request_option.inject_into_dict(target, "test_value", config) - assert target == {"test_value": "test_value"} + assert target == expected_structure - # Test with field_path + +@pytest.mark.parametrize( + "value, expected_type", + [ + (42, int), + (3.14, float), + (True, bool), + (["a", "b", "c"], list), + ({"key": "value"}, dict), + (None, type(None)), + ], +) +def test_value_type_handling(value: Any, expected_type: Type): + """Test handling of different value types""" + config = {} + target: Dict[str, Any] = {} request_option = RequestOption( - field_path=["data", "nested_{{ config['base_field'] }}", "field"], - inject_into=RequestOptionType.body_json, - parameters={}, + field_path=["data", "test"], inject_into=RequestOptionType.body_json, parameters={} ) - target = {} - request_option.inject_into_dict(target, "test_value", config) - assert target == {"data": {"nested_value": {"field": "test_value"}}} + request_option.inject_into_dict(target, value, config) + assert isinstance(target["data"]["test"], expected_type) + assert target["data"]["test"] == value @pytest.mark.parametrize( @@ -130,27 +216,9 @@ def test_is_field_path( assert request_option.is_field_path == expected_is_field_path -def test_interpolation_in_field_path(): - """Test that interpolation works in field paths""" - config = {"nested": "user"} - parameters = {"type": "profile"} - - request_option = RequestOption( - field_path=["data", "{{ config['nested'] }}", "{{ parameters['type'] }}"], - inject_into=RequestOptionType.body_json, - parameters=parameters, - ) - - target = {} - request_option.inject_into_dict(target, "test_value", config) - assert target == {"data": {"user": {"profile": "test_value"}}} - - -def test_inject_into_dict_multiple_injections(): +def test_multiple_injections(): """Test injecting multiple values into the same target dict""" config = {"base": "test"} - - # Create target with existing data target = {"existing": "value"} # First injection with field_name @@ -172,84 +240,3 @@ def test_inject_into_dict_multiple_injections(): "field1": "value1", "data": {"nested": {"field2": "value2"}}, } - - -def test_inject_into_dict_deep_nesting(): - """Test injecting values into deeply nested structures""" - config = {} - target = {} - - request_option = RequestOption( - field_path=["level1", "level2", "level3", "level4", "field"], - inject_into=RequestOptionType.body_json, - parameters={}, - ) - request_option.inject_into_dict(target, "deep_value", config) - - assert target == {"level1": {"level2": {"level3": {"level4": {"field": "deep_value"}}}}} - - -def test_inject_into_dict_various_value_types(): - """Test injecting different types of values""" - config = {} - test_cases = [ - (42, "integer"), - (3.14, "float"), - (True, "boolean"), - (["a", "b", "c"], "list"), - ({"key": "value"}, "dict"), - (None, "none"), - ] - - for value, name in test_cases: - target = {} - request_option = RequestOption( - field_path=["data", name], inject_into=RequestOptionType.body_json, parameters={} - ) - request_option.inject_into_dict(target, value, config) - assert target["data"][name] == value - - -def test_inject_into_dict_with_interpolation_combinations(): - """Test various combinations of static and interpolated path segments""" - config = {"user_type": "admin", "section": "profile"} - parameters = {"id": "12345"} - - request_option = RequestOption( - field_path=[ - "data", - "{{ config['user_type'] }}", - "{{ parameters['id'] }}", - "{{ config['section'] }}", - "details", - ], - inject_into=RequestOptionType.body_json, - parameters=parameters, - ) - - target = {} - request_option.inject_into_dict(target, "test_value", config) - - assert target == {"data": {"admin": {"12345": {"profile": {"details": "test_value"}}}}} - - -def test_inject_into_dict_error_handling(): - """Test error cases for inject_into_dict""" - config = {} - - # Test with invalid target type - with pytest.raises(TypeError): - request_option = RequestOption( - field_name="test", inject_into=RequestOptionType.body_json, parameters={} - ) - request_option.inject_into_dict(None, "value", config) # type: ignore - - # Test with invalid path segments - with pytest.raises(TypeError): - request_option = RequestOption( - field_path={"this": "should", "be": "a list"}, # type: ignore - inject_into=RequestOptionType.body_json, - parameters={}, - ) - target = {} - request_option.inject_into_dict(target, "value", config) From 952535769c14b2baad0d928915f5bdad6c5460f5 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Thu, 9 Jan 2025 12:04:20 -0800 Subject: [PATCH 11/25] refactor: centralize injection logic in RequestOption --- airbyte_cdk/sources/declarative/auth/token.py | 23 +-------------- .../incremental/datetime_based_cursor.py | 27 ++--------------- .../declarative/requesters/request_option.py | 29 ++++++++++++------- .../paginators/test_request_option.py | 12 ++++---- 4 files changed, 28 insertions(+), 63 deletions(-) diff --git a/airbyte_cdk/sources/declarative/auth/token.py b/airbyte_cdk/sources/declarative/auth/token.py index 3e34ef274..caecf9d2c 100644 --- a/airbyte_cdk/sources/declarative/auth/token.py +++ b/airbyte_cdk/sources/declarative/auth/token.py @@ -45,21 +45,6 @@ class ApiKeyAuthenticator(DeclarativeAuthenticator): config: Config parameters: InitVar[Mapping[str, Any]] - def __post_init__(self, parameters: Mapping[str, Any]) -> None: - # Only body_json requests can specify a field_path. All other request types must use a top-level field_name - if ( - self.request_option.field_name is None - and self.request_option.inject_into != RequestOptionType.body_json - ): - raise ValueError( - "ApiKeyAuthenticator requires a top-level field_name for non-body-json requests" - ) - - if self.request_option.field_name is not None: - self._field_name = InterpolatedString.create( - self.request_option.field_name, parameters=parameters - ) - @property def auth_header(self) -> str: options = self._get_request_options(RequestOptionType.header) @@ -72,13 +57,7 @@ def token(self) -> str: def _get_request_options(self, option_type: RequestOptionType) -> Mapping[str, Any]: options: MutableMapping[str, Any] = {} if self.request_option.inject_into == option_type: - if option_type == RequestOptionType.body_json: - # For JSON body injection, we support both field_name and field_path injection. - # This allows nested structures to be injected like {"data": {"auth": {"token": "value"}}} - self.request_option.inject_into_dict(options, self.token, self.config) - else: - assert self._field_name is not None - options[self._field_name.eval(self.config)] = self.token + self.request_option.inject_into_request(options, self.token, self.config) return options def get_request_params(self) -> Mapping[str, Any]: diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 93a7fbb7d..fd048658f 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -381,34 +381,11 @@ def _get_request_options( if self.start_time_option and self.start_time_option.inject_into == option_type: start_time_value = stream_slice.get(self._partition_field_start.eval(self.config)) - # Support for injecting nested fields when injecting into body_json - if option_type == RequestOptionType.body_json: - self.start_time_option.inject_into_dict(options, start_time_value, self.config) - else: - assert self.start_time_option.field_name is not None - field_name = self.start_time_option.field_name - key = ( - field_name.eval(self.config) - if isinstance(field_name, InterpolatedString) - else field_name - ) - options[key] = start_time_value + self.start_time_option.inject_into_request(options, start_time_value, self.config) if self.end_time_option and self.end_time_option.inject_into == option_type: end_time_value = stream_slice.get(self._partition_field_end.eval(self.config)) - if option_type == RequestOptionType.body_json: - # For JSON bodies, support both field_name and field_path to enable nested structures - self.end_time_option.inject_into_dict(options, end_time_value, self.config) - else: - # For non-JSON requests, only support top-level field names - assert self.end_time_option.field_name is not None - field_name = self.end_time_option.field_name - key = ( - field_name.eval(self.config) - if isinstance(field_name, InterpolatedString) - else field_name - ) - options[key] = end_time_value + self.end_time_option.inject_into_request(options, end_time_value, self.config) return options diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index d8e37bfc6..509d5b6c2 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -27,9 +27,9 @@ class RequestOption: Describes an option to set on a request Attributes: - field_name: Describes the name of the parameter to inject. Mutually exclusive with field_path. - field_path: Describes the path to a nested field as a list of field names. Mutually exclusive with field_name. - inject_into: Describes where in the HTTP request to inject the parameter + field_name (str): Describes the name of the parameter to inject. Mutually exclusive with field_path. + field_path (list(str)): Describes the path to a nested field as a list of field names. Mutually exclusive with field_name. + inject_into (RequestOptionType): Describes where in the HTTP request to inject the parameter """ inject_into: RequestOptionType @@ -38,7 +38,7 @@ class RequestOption: field_path: Optional[List[Union[InterpolatedString, str]]] = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: - # Validate inputs + # Validate inputs. We should expect either field_name or field_path, but not both if self.field_name is None and self.field_path is None: raise ValueError("RequestOption requires either a field_name or field_path") @@ -75,27 +75,34 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: @property def is_field_path(self) -> bool: - """Returns whether this option uses a field path""" + """Returns whether this option is a field path (ie, a nested field)""" return self.field_path is not None - def inject_into_dict( + def inject_into_request( self, target: MutableMapping[str, Any], value: Any, config: Config, ) -> None: """ - Inject a value into a target dict using either field_name or field_path + Inject a request option value into a target request structure using either field_name or field_path. + For non-body-json injection, only top-level field names are supported. + For body-json injection, both field names and nested field paths are supported. Args: - target: The dict to inject the value into + target: The request structure to inject the value into value: The value to inject config: The config object to use for interpolation """ + assert not ( + self.inject_into != RequestOptionType.body_json and self.is_field_path + ), "Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types." + if self.is_field_path: assert self.field_path is not None current = target + *path_parts, final_key = [ str( segment.eval(config=config) @@ -108,10 +115,12 @@ def inject_into_dict( for part in path_parts: current = current.setdefault(part, {}) current[final_key] = value + else: assert self.field_name is not None - target[ + key = ( self.field_name.eval(config=config) if isinstance(self.field_name, InterpolatedString) else self.field_name - ] = value + ) + target[str(key)] = value diff --git a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py index 0feb4b5d0..07d7cf86b 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py @@ -123,7 +123,7 @@ def test_request_option_validation( ), ], ) -def test_inject_into_dict_cases( +def test_inject_into_request_cases( request_option_args: Dict[str, Any], value: Any, expected_result: Dict[str, Any] ): """Test various injection cases""" @@ -131,7 +131,7 @@ def test_inject_into_dict_cases( target: Dict[str, Any] = {} request_option = RequestOption(**request_option_args, parameters={}) - request_option.inject_into_dict(target, value, config) + request_option.inject_into_request(target, value, config) assert target == expected_result @@ -169,7 +169,7 @@ def test_interpolation_cases( field_path=field_path, inject_into=RequestOptionType.body_json, parameters=parameters ) target: Dict[str, Any] = {} - request_option.inject_into_dict(target, "test_value", config) + request_option.inject_into_request(target, "test_value", config) assert target == expected_structure @@ -191,7 +191,7 @@ def test_value_type_handling(value: Any, expected_type: Type): request_option = RequestOption( field_path=["data", "test"], inject_into=RequestOptionType.body_json, parameters={} ) - request_option.inject_into_dict(target, value, config) + request_option.inject_into_request(target, value, config) assert isinstance(target["data"]["test"], expected_type) assert target["data"]["test"] == value @@ -225,7 +225,7 @@ def test_multiple_injections(): option1 = RequestOption( field_name="field1", inject_into=RequestOptionType.body_json, parameters={} ) - option1.inject_into_dict(target, "value1", config) + option1.inject_into_request(target, "value1", config) # Second injection with nested path option2 = RequestOption( @@ -233,7 +233,7 @@ def test_multiple_injections(): inject_into=RequestOptionType.body_json, parameters={}, ) - option2.inject_into_dict(target, "value2", config) + option2.inject_into_request(target, "value2", config) assert target == { "existing": "value", From 6638dc86ab2a8ac00c03f9399c5e7abcc7478b68 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Thu, 9 Jan 2025 14:03:28 -0800 Subject: [PATCH 12/25] chore: add components tests --- .../declarative/auth/test_token_auth.py | 50 +++++++++++++++++++ .../incremental/test_datetime_based_cursor.py | 34 +++++++++++-- 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/unit_tests/sources/declarative/auth/test_token_auth.py b/unit_tests/sources/declarative/auth/test_token_auth.py index 2a23d4e19..18658994f 100644 --- a/unit_tests/sources/declarative/auth/test_token_auth.py +++ b/unit_tests/sources/declarative/auth/test_token_auth.py @@ -248,3 +248,53 @@ def test_api_key_authenticator_inject( parameters=parameters, ) assert {expected_field_name: expected_field_value} == getattr(token_auth, validation_fn)() + +def test_api_key_authenticator_nested_token_injection(): + """Test that the ApiKeyAuthenticator can properly inject tokens into nested structures when using body_json""" + config = {"api_version": "v2"} + parameters = {"auth_type": "bearer"} + + test_cases = [ + # Basic nested structure + { + "field_path": ["data", "auth", "token"], + "token": "test-token", + "expected": {"data": {"auth": {"token": "test-token"}}} + }, + # Nested with config interpolation + { + "field_path": ["api", "{{ config.api_version }}", "auth", "token"], + "token": "test-token", + "expected": {"api": {"v2": {"auth": {"token": "test-token"}}}} + }, + # Nested with parameter interpolation + { + "field_path": ["auth", "{{ parameters.auth_type }}", "credentials"], + "token": "test-token", + "expected": {"auth": {"bearer": {"credentials": "test-token"}}} + }, + # Deep nesting with both interpolations + { + "field_path": ["api", "{{ config.api_version }}", "auth", "{{ parameters.auth_type }}", "token"], + "token": "test-token", + "expected": {"api": {"v2": {"auth": {"bearer": {"token": "test-token"}}}}} + } + ] + + for case in test_cases: + token_provider = InterpolatedStringTokenProvider( + config=config, + api_token=case["token"], + parameters=parameters + ) + token_auth = ApiKeyAuthenticator( + request_option=RequestOption( + inject_into=RequestOptionType.body_json, + field_path=case["field_path"], + parameters=parameters + ), + token_provider=token_provider, + config=config, + parameters=parameters, + ) + assert token_auth.get_request_body_json() == case["expected"] diff --git a/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py b/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py index 37ed7ebfe..47eeaefb4 100644 --- a/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +++ b/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py @@ -782,13 +782,14 @@ def test_given_different_format_and_slice_is_highest_when_close_slice_then_state @pytest.mark.parametrize( - "test_name, inject_into, field_name, expected_req_params, expected_headers, expected_body_json, expected_body_data", + "test_name, inject_into, field_name, field_path, expected_req_params, expected_headers, expected_body_json, expected_body_data", [ - ("test_start_time_inject_into_none", None, None, {}, {}, {}, {}), + ("test_start_time_inject_into_none", None, None, None, {}, {}, {}, {}), ( "test_start_time_passed_by_req_param", RequestOptionType.request_parameter, "start_time", + None, { "start_time": "2021-01-01T00:00:00.000000+0000", "endtime": "2021-01-04T00:00:00.000000+0000", @@ -801,6 +802,7 @@ def test_given_different_format_and_slice_is_highest_when_close_slice_then_state "test_start_time_inject_into_header", RequestOptionType.header, "start_time", + None, {}, { "start_time": "2021-01-01T00:00:00.000000+0000", @@ -810,9 +812,10 @@ def test_given_different_format_and_slice_is_highest_when_close_slice_then_state {}, ), ( - "test_start_time_inject_intoy_body_json", + "test_start_time_inject_into_body_json", RequestOptionType.body_json, "start_time", + None, {}, {}, { @@ -821,10 +824,30 @@ def test_given_different_format_and_slice_is_highest_when_close_slice_then_state }, {}, ), + ( + "test_nested_field_injection_into_body_json", + RequestOptionType.body_json, + None, + ["data", "queries", "time_range", "start"], + {}, + {}, + { + "data": { + "queries": { + "time_range": { + "start": "2021-01-01T00:00:00.000000+0000", + "end": "2021-01-04T00:00:00.000000+0000" + } + } + } + }, + {}, + ), ( "test_start_time_inject_into_body_data", RequestOptionType.body_data, "start_time", + None, {}, {}, {}, @@ -839,18 +862,19 @@ def test_request_option( test_name, inject_into, field_name, + field_path, expected_req_params, expected_headers, expected_body_json, expected_body_data, ): start_request_option = ( - RequestOption(inject_into=inject_into, parameters={}, field_name=field_name) + RequestOption(inject_into=inject_into, parameters={}, field_name=field_name, field_path=field_path) if inject_into else None ) end_request_option = ( - RequestOption(inject_into=inject_into, parameters={}, field_name="endtime") + RequestOption(inject_into=inject_into, parameters={}, field_name="endtime" if field_name else None, field_path=["data", "queries", "time_range", "end"] if field_path else None) if inject_into else None ) From eba438bdb872add1116e0267291ead1dd2022485 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Thu, 9 Jan 2025 14:45:34 -0800 Subject: [PATCH 13/25] refactor: shuffle unit tests --- .../declarative/auth/test_token_auth.py | 77 +++--- .../paginators/test_request_option.py | 221 ++---------------- .../request_options/test_request_options.py | 207 ++++++++++++++++ 3 files changed, 258 insertions(+), 247 deletions(-) create mode 100644 unit_tests/sources/declarative/requesters/request_options/test_request_options.py diff --git a/unit_tests/sources/declarative/auth/test_token_auth.py b/unit_tests/sources/declarative/auth/test_token_auth.py index 18658994f..373d8b305 100644 --- a/unit_tests/sources/declarative/auth/test_token_auth.py +++ b/unit_tests/sources/declarative/auth/test_token_auth.py @@ -249,52 +249,41 @@ def test_api_key_authenticator_inject( ) assert {expected_field_name: expected_field_value} == getattr(token_auth, validation_fn)() -def test_api_key_authenticator_nested_token_injection(): + +@pytest.mark.parametrize( + "field_path, token, expected_result", + [ + ( + ["data", "auth", "token"], + "test-token", + {"data": {"auth": {"token": "test-token"}}}, + ), + ( + ["api", "{{ config.api_version }}", "auth", "token"], + "test-token", + {"api": {"v2": {"auth": {"token": "test-token"}}}}, + ), + ], + ids=["Basic nested structure", "Nested with config interpolation"] +) +def test_api_key_authenticator_nested_token_injection(field_path, token, expected_result): """Test that the ApiKeyAuthenticator can properly inject tokens into nested structures when using body_json""" config = {"api_version": "v2"} parameters = {"auth_type": "bearer"} - test_cases = [ - # Basic nested structure - { - "field_path": ["data", "auth", "token"], - "token": "test-token", - "expected": {"data": {"auth": {"token": "test-token"}}} - }, - # Nested with config interpolation - { - "field_path": ["api", "{{ config.api_version }}", "auth", "token"], - "token": "test-token", - "expected": {"api": {"v2": {"auth": {"token": "test-token"}}}} - }, - # Nested with parameter interpolation - { - "field_path": ["auth", "{{ parameters.auth_type }}", "credentials"], - "token": "test-token", - "expected": {"auth": {"bearer": {"credentials": "test-token"}}} - }, - # Deep nesting with both interpolations - { - "field_path": ["api", "{{ config.api_version }}", "auth", "{{ parameters.auth_type }}", "token"], - "token": "test-token", - "expected": {"api": {"v2": {"auth": {"bearer": {"token": "test-token"}}}}} - } - ] - - for case in test_cases: - token_provider = InterpolatedStringTokenProvider( - config=config, - api_token=case["token"], + token_provider = InterpolatedStringTokenProvider( + config=config, + api_token=token, + parameters=parameters + ) + token_auth = ApiKeyAuthenticator( + request_option=RequestOption( + inject_into=RequestOptionType.body_json, + field_path=field_path, parameters=parameters - ) - token_auth = ApiKeyAuthenticator( - request_option=RequestOption( - inject_into=RequestOptionType.body_json, - field_path=case["field_path"], - parameters=parameters - ), - token_provider=token_provider, - config=config, - parameters=parameters, - ) - assert token_auth.get_request_body_json() == case["expected"] + ), + token_provider=token_provider, + config=config, + parameters=parameters, + ) + assert token_auth.get_request_body_json() == expected_result diff --git a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py index 07d7cf86b..edbb8789e 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py @@ -1,4 +1,6 @@ -from typing import Any, Dict, List, Optional, Type, Union +# +# Copyright (c) 2023 Airbyte, inc., all rights reserved. +# import pytest @@ -32,211 +34,24 @@ (RequestOptionType.body_data, "since_{{ config['cursor_field'] }}", "since_created_at"), (RequestOptionType.body_json, "since_{{ config['cursor_field'] }}", "since_created_at"), ], + ids=[ + "test_limit_param_with_field_name", + "test_limit_header_with_field_name", + "test_limit_data_with_field_name", + "test_limit_json_with_field_name", + "test_limit_param_with_parameters_interpolation", + "test_limit_header_with_parameters_interpolation", + "test_limit_data_with_parameters_interpolation", + "test_limit_json_with_parameters_interpolation", + "test_limit_param_with_config_interpolation", + "test_limit_header_with_config_interpolation", + "test_limit_data_with_config_interpolation", + "test_limit_json_with_config_interpolation", + ], ) -def test_request_option_field_name( - option_type: RequestOptionType, field_name: str, expected_field_name: str -): - """Test the original field_name functionality""" +def test_request_option(option_type: RequestOptionType, field_name: str, expected_field_name: str): request_option = RequestOption( inject_into=option_type, field_name=field_name, parameters={"cursor_field": "updated_at"} ) assert request_option.field_name.eval({"cursor_field": "created_at"}) == expected_field_name assert request_option.inject_into == option_type - - -@pytest.mark.parametrize( - "field_name, field_path, inject_into, error_type, error_message", - [ - ( - None, - None, - RequestOptionType.body_json, - ValueError, - "RequestOption requires either a field_name or field_path", - ), - ( - "field", - ["data", "field"], - RequestOptionType.body_json, - ValueError, - "Only one of field_name or field_path can be provided", - ), - ( - None, - ["data", "field"], - RequestOptionType.header, - ValueError, - "Nested field injection is only supported for body JSON injection.", - ), - ( - None, - {"this": "should", "be": "a list"}, - RequestOptionType.body_json, - TypeError, - "field_path expects a list", - ), - ], -) -def test_request_option_validation( - field_name: Optional[str], - field_path: Any, - inject_into: RequestOptionType, - error_type: Type[Exception], - error_message: str, -): - """Test various validation cases for RequestOption""" - with pytest.raises(error_type, match=error_message): - RequestOption( - field_name=field_name, field_path=field_path, inject_into=inject_into, parameters={} - ) - - -@pytest.mark.parametrize( - "request_option_args, value, expected_result", - [ - # Basic field_name test - ( - { - "field_name": "test_{{ config['base_field'] }}", - "inject_into": RequestOptionType.body_json, - }, - "test_value", - {"test_value": "test_value"}, - ), - # Basic field_path test - ( - { - "field_path": ["data", "nested_{{ config['base_field'] }}", "field"], - "inject_into": RequestOptionType.body_json, - }, - "test_value", - {"data": {"nested_value": {"field": "test_value"}}}, - ), - # Deep nesting test - ( - { - "field_path": ["level1", "level2", "level3", "level4", "field"], - "inject_into": RequestOptionType.body_json, - }, - "deep_value", - {"level1": {"level2": {"level3": {"level4": {"field": "deep_value"}}}}}, - ), - ], -) -def test_inject_into_request_cases( - request_option_args: Dict[str, Any], value: Any, expected_result: Dict[str, Any] -): - """Test various injection cases""" - config = {"base_field": "value"} - target: Dict[str, Any] = {} - - request_option = RequestOption(**request_option_args, parameters={}) - request_option.inject_into_request(target, value, config) - assert target == expected_result - - -@pytest.mark.parametrize( - "config, parameters, field_path, expected_structure", - [ - ( - {"nested": "user"}, - {"type": "profile"}, - ["data", "{{ config['nested'] }}", "{{ parameters['type'] }}"], - {"data": {"user": {"profile": "test_value"}}}, - ), - ( - {"user_type": "admin", "section": "profile"}, - {"id": "12345"}, - [ - "data", - "{{ config['user_type'] }}", - "{{ parameters['id'] }}", - "{{ config['section'] }}", - "details", - ], - {"data": {"admin": {"12345": {"profile": {"details": "test_value"}}}}}, - ), - ], -) -def test_interpolation_cases( - config: Dict[str, Any], - parameters: Dict[str, Any], - field_path: List[str], - expected_structure: Dict[str, Any], -): - """Test various interpolation scenarios""" - request_option = RequestOption( - field_path=field_path, inject_into=RequestOptionType.body_json, parameters=parameters - ) - target: Dict[str, Any] = {} - request_option.inject_into_request(target, "test_value", config) - assert target == expected_structure - - -@pytest.mark.parametrize( - "value, expected_type", - [ - (42, int), - (3.14, float), - (True, bool), - (["a", "b", "c"], list), - ({"key": "value"}, dict), - (None, type(None)), - ], -) -def test_value_type_handling(value: Any, expected_type: Type): - """Test handling of different value types""" - config = {} - target: Dict[str, Any] = {} - request_option = RequestOption( - field_path=["data", "test"], inject_into=RequestOptionType.body_json, parameters={} - ) - request_option.inject_into_request(target, value, config) - assert isinstance(target["data"]["test"], expected_type) - assert target["data"]["test"] == value - - -@pytest.mark.parametrize( - "field_name, field_path, inject_into, expected_is_field_path", - [ - ("field", None, RequestOptionType.body_json, False), - (None, ["data", "field"], RequestOptionType.body_json, True), - ], -) -def test_is_field_path( - field_name: Optional[str], - field_path: Optional[List[str]], - inject_into: RequestOptionType, - expected_is_field_path: bool, -): - """Test the is_field_path property""" - request_option = RequestOption( - field_name=field_name, field_path=field_path, inject_into=inject_into, parameters={} - ) - assert request_option.is_field_path == expected_is_field_path - - -def test_multiple_injections(): - """Test injecting multiple values into the same target dict""" - config = {"base": "test"} - target = {"existing": "value"} - - # First injection with field_name - option1 = RequestOption( - field_name="field1", inject_into=RequestOptionType.body_json, parameters={} - ) - option1.inject_into_request(target, "value1", config) - - # Second injection with nested path - option2 = RequestOption( - field_path=["data", "nested", "field2"], - inject_into=RequestOptionType.body_json, - parameters={}, - ) - option2.inject_into_request(target, "value2", config) - - assert target == { - "existing": "value", - "field1": "value1", - "data": {"nested": {"field2": "value2"}}, - } diff --git a/unit_tests/sources/declarative/requesters/request_options/test_request_options.py b/unit_tests/sources/declarative/requesters/request_options/test_request_options.py new file mode 100644 index 000000000..312d2c63e --- /dev/null +++ b/unit_tests/sources/declarative/requesters/request_options/test_request_options.py @@ -0,0 +1,207 @@ +from typing import Any, Dict, List, Optional, Type + +import pytest + +from airbyte_cdk.sources.declarative.requesters.request_option import ( + RequestOption, + RequestOptionType, +) + + +@pytest.mark.parametrize( + "field_name, field_path, inject_into, error_type, error_message", + [ + ( + None, + None, + RequestOptionType.body_json, + ValueError, + "RequestOption requires either a field_name or field_path", + ), + ( + "field", + ["data", "field"], + RequestOptionType.body_json, + ValueError, + "Only one of field_name or field_path can be provided", + ), + ( + None, + ["data", "field"], + RequestOptionType.header, + ValueError, + "Nested field injection is only supported for body JSON injection.", + ), + ( + None, + {"this": "should", "be": "a list"}, + RequestOptionType.body_json, + TypeError, + "field_path expects a list", + ), + ], +) +def test_request_option_validation( + field_name: Optional[str], + field_path: Any, + inject_into: RequestOptionType, + error_type: Type[Exception], + error_message: str, +): + """Test various validation cases for RequestOption""" + with pytest.raises(error_type, match=error_message): + RequestOption( + field_name=field_name, field_path=field_path, inject_into=inject_into, parameters={} + ) + + +@pytest.mark.parametrize( + "request_option_args, value, expected_result", + [ + # Basic field_name test + ( + { + "field_name": "test_{{ config['base_field'] }}", + "inject_into": RequestOptionType.body_json, + }, + "test_value", + {"test_value": "test_value"}, + ), + # Basic field_path test + ( + { + "field_path": ["data", "nested_{{ config['base_field'] }}", "field"], + "inject_into": RequestOptionType.body_json, + }, + "test_value", + {"data": {"nested_value": {"field": "test_value"}}}, + ), + # Deep nesting test + ( + { + "field_path": ["level1", "level2", "level3", "level4", "field"], + "inject_into": RequestOptionType.body_json, + }, + "deep_value", + {"level1": {"level2": {"level3": {"level4": {"field": "deep_value"}}}}}, + ), + ], +) +def test_inject_into_request_cases( + request_option_args: Dict[str, Any], value: Any, expected_result: Dict[str, Any] +): + """Test various injection cases""" + config = {"base_field": "value"} + target: Dict[str, Any] = {} + + request_option = RequestOption(**request_option_args, parameters={}) + request_option.inject_into_request(target, value, config) + assert target == expected_result + + +@pytest.mark.parametrize( + "config, parameters, field_path, expected_structure", + [ + ( + {"nested": "user"}, + {"type": "profile"}, + ["data", "{{ config['nested'] }}", "{{ parameters['type'] }}"], + {"data": {"user": {"profile": "test_value"}}}, + ), + ( + {"user_type": "admin", "section": "profile"}, + {"id": "12345"}, + [ + "data", + "{{ config['user_type'] }}", + "{{ parameters['id'] }}", + "{{ config['section'] }}", + "details", + ], + {"data": {"admin": {"12345": {"profile": {"details": "test_value"}}}}}, + ), + ], +) +def test_interpolation_cases( + config: Dict[str, Any], + parameters: Dict[str, Any], + field_path: List[str], + expected_structure: Dict[str, Any], +): + """Test various interpolation scenarios""" + request_option = RequestOption( + field_path=field_path, inject_into=RequestOptionType.body_json, parameters=parameters + ) + target: Dict[str, Any] = {} + request_option.inject_into_request(target, "test_value", config) + assert target == expected_structure + + +@pytest.mark.parametrize( + "value, expected_type", + [ + (42, int), + (3.14, float), + (True, bool), + (["a", "b", "c"], list), + ({"key": "value"}, dict), + (None, type(None)), + ], +) +def test_value_type_handling(value: Any, expected_type: Type): + """Test handling of different value types""" + config = {} + target: Dict[str, Any] = {} + request_option = RequestOption( + field_path=["data", "test"], inject_into=RequestOptionType.body_json, parameters={} + ) + request_option.inject_into_request(target, value, config) + assert isinstance(target["data"]["test"], expected_type) + assert target["data"]["test"] == value + + +@pytest.mark.parametrize( + "field_name, field_path, inject_into, expected_is_field_path", + [ + ("field", None, RequestOptionType.body_json, False), + (None, ["data", "field"], RequestOptionType.body_json, True), + ], +) +def test_is_field_path( + field_name: Optional[str], + field_path: Optional[List[str]], + inject_into: RequestOptionType, + expected_is_field_path: bool, +): + """Test the is_field_path property""" + request_option = RequestOption( + field_name=field_name, field_path=field_path, inject_into=inject_into, parameters={} + ) + assert request_option.is_field_path == expected_is_field_path + + +def test_multiple_injections(): + """Test injecting multiple values into the same target dict""" + config = {"base": "test"} + target = {"existing": "value"} + + # First injection with field_name + option1 = RequestOption( + field_name="field1", inject_into=RequestOptionType.body_json, parameters={} + ) + option1.inject_into_request(target, "value1", config) + + # Second injection with nested path + option2 = RequestOption( + field_path=["data", "nested", "field2"], + inject_into=RequestOptionType.body_json, + parameters={}, + ) + option2.inject_into_request(target, "value2", config) + + assert target == { + "existing": "value", + "field1": "value1", + "data": {"nested": {"field2": "value2"}}, + } + From 5dc29caf71f64074e712f7911171026b4ca497a9 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Thu, 9 Jan 2025 14:52:28 -0800 Subject: [PATCH 14/25] chore: format --- .../declarative/auth/test_token_auth.py | 38 +++++++++---------- .../incremental/test_datetime_based_cursor.py | 13 +++++-- .../paginators/test_request_option.py | 2 +- .../request_options/test_request_options.py | 1 - 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/unit_tests/sources/declarative/auth/test_token_auth.py b/unit_tests/sources/declarative/auth/test_token_auth.py index 373d8b305..4e90367c1 100644 --- a/unit_tests/sources/declarative/auth/test_token_auth.py +++ b/unit_tests/sources/declarative/auth/test_token_auth.py @@ -251,36 +251,32 @@ def test_api_key_authenticator_inject( @pytest.mark.parametrize( - "field_path, token, expected_result", - [ - ( - ["data", "auth", "token"], - "test-token", - {"data": {"auth": {"token": "test-token"}}}, - ), - ( - ["api", "{{ config.api_version }}", "auth", "token"], - "test-token", - {"api": {"v2": {"auth": {"token": "test-token"}}}}, - ), - ], - ids=["Basic nested structure", "Nested with config interpolation"] + "field_path, token, expected_result", + [ + ( + ["data", "auth", "token"], + "test-token", + {"data": {"auth": {"token": "test-token"}}}, + ), + ( + ["api", "{{ config.api_version }}", "auth", "token"], + "test-token", + {"api": {"v2": {"auth": {"token": "test-token"}}}}, + ), + ], + ids=["Basic nested structure", "Nested with config interpolation"], ) def test_api_key_authenticator_nested_token_injection(field_path, token, expected_result): """Test that the ApiKeyAuthenticator can properly inject tokens into nested structures when using body_json""" config = {"api_version": "v2"} parameters = {"auth_type": "bearer"} - + token_provider = InterpolatedStringTokenProvider( - config=config, - api_token=token, - parameters=parameters + config=config, api_token=token, parameters=parameters ) token_auth = ApiKeyAuthenticator( request_option=RequestOption( - inject_into=RequestOptionType.body_json, - field_path=field_path, - parameters=parameters + inject_into=RequestOptionType.body_json, field_path=field_path, parameters=parameters ), token_provider=token_provider, config=config, diff --git a/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py b/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py index 47eeaefb4..3ddc5847f 100644 --- a/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +++ b/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py @@ -836,7 +836,7 @@ def test_given_different_format_and_slice_is_highest_when_close_slice_then_state "queries": { "time_range": { "start": "2021-01-01T00:00:00.000000+0000", - "end": "2021-01-04T00:00:00.000000+0000" + "end": "2021-01-04T00:00:00.000000+0000", } } } @@ -869,12 +869,19 @@ def test_request_option( expected_body_data, ): start_request_option = ( - RequestOption(inject_into=inject_into, parameters={}, field_name=field_name, field_path=field_path) + RequestOption( + inject_into=inject_into, parameters={}, field_name=field_name, field_path=field_path + ) if inject_into else None ) end_request_option = ( - RequestOption(inject_into=inject_into, parameters={}, field_name="endtime" if field_name else None, field_path=["data", "queries", "time_range", "end"] if field_path else None) + RequestOption( + inject_into=inject_into, + parameters={}, + field_name="endtime" if field_name else None, + field_path=["data", "queries", "time_range", "end"] if field_path else None, + ) if inject_into else None ) diff --git a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py index edbb8789e..69866773c 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_request_option.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_request_option.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2023 Airbyte, inc., all rights reserved. +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import pytest diff --git a/unit_tests/sources/declarative/requesters/request_options/test_request_options.py b/unit_tests/sources/declarative/requesters/request_options/test_request_options.py index 312d2c63e..6d95f60d9 100644 --- a/unit_tests/sources/declarative/requesters/request_options/test_request_options.py +++ b/unit_tests/sources/declarative/requesters/request_options/test_request_options.py @@ -204,4 +204,3 @@ def test_multiple_injections(): "field1": "value1", "data": {"nested": {"field2": "value2"}}, } - From 6fc897f63ce45346ddd7d752bf032d7d0fe388fc Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Thu, 9 Jan 2025 15:57:36 -0800 Subject: [PATCH 15/25] chore: cleanup and code comments --- .../declarative/requesters/request_option.py | 23 ++++++++-------- airbyte_cdk/utils/mapping_helpers.py | 26 ++++++++++++------- .../request_options/test_request_options.py | 10 +++---- 3 files changed, 33 insertions(+), 26 deletions(-) diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index 509d5b6c2..fb38e897d 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -64,7 +64,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: "Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types." ) - # Handle interpolation + # Convert field_name and field_path into InterpolatedString objects if they are strings if self.field_name is not None: self.field_name = InterpolatedString.create(self.field_name, parameters=parameters) if self.field_path is not None: @@ -74,7 +74,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: ] @property - def is_field_path(self) -> bool: + def _is_field_path(self) -> bool: """Returns whether this option is a field path (ie, a nested field)""" return self.field_path is not None @@ -94,15 +94,16 @@ def inject_into_request( value: The value to inject config: The config object to use for interpolation """ + if self._is_field_path: + if self.inject_into != RequestOptionType.body_json: + raise ValueError( + "Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types." + ) - assert not ( - self.inject_into != RequestOptionType.body_json and self.is_field_path - ), "Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types." - - if self.is_field_path: - assert self.field_path is not None + assert self.field_path is not None # for type checker current = target - + # Convert path segments into strings, evaluating any interpolated segments + # Example: ["data", "{{ config[user_type] }}", "id"] -> ["data", "admin", "id"] *path_parts, final_key = [ str( segment.eval(config=config) @@ -112,12 +113,12 @@ def inject_into_request( for segment in self.field_path ] + # Build a nested dictionary structure and set the final value at the deepest level for part in path_parts: current = current.setdefault(part, {}) current[final_key] = value - else: - assert self.field_name is not None + # For non-nested fields, evaluate the field name if it's an interpolated string key = ( self.field_name.eval(config=config) if isinstance(self.field_name, InterpolatedString) diff --git a/airbyte_cdk/utils/mapping_helpers.py b/airbyte_cdk/utils/mapping_helpers.py index d9b31ce53..acccf1024 100644 --- a/airbyte_cdk/utils/mapping_helpers.py +++ b/airbyte_cdk/utils/mapping_helpers.py @@ -6,7 +6,7 @@ from typing import Any, Dict, List, Mapping, Optional, Union -def has_nested_conflict(path1: List[str], value1: Any, path2: List[str], value2: Any) -> bool: +def _has_nested_conflict(path1: List[str], value1: Any, path2: List[str], value2: Any) -> bool: """ Check if two paths conflict with each other. e.g. ["a", "b"] conflicts with ["a", "b"] if values differ @@ -24,11 +24,11 @@ def has_nested_conflict(path1: List[str], value1: Any, path2: List[str], value2: return False -def flatten_mapping( +def _flatten_mapping( mapping: Mapping[str, Any], prefix: Optional[List[str]] = None ) -> List[tuple[List[str], Any]]: """ - Convert a nested mapping into a list of (path, value) pairs + Convert a nested mapping into a list of (path, value) pairs to make conflict detection easier. e.g. {"a": {"b": 1}} -> [(["a", "b"], 1)] """ prefix = prefix or [] @@ -37,7 +37,7 @@ def flatten_mapping( for key, value in mapping.items(): current_path = prefix + [key] if isinstance(value, Mapping): - result.extend(flatten_mapping(value, current_path)) + result.extend(_flatten_mapping(value, current_path)) else: result.append((current_path, value)) @@ -54,15 +54,18 @@ def combine_mappings( * If there are multiple string mappings * If there are multiple mappings containing keys and one of them is a string """ - # Handle string cases first (maintaining existing behavior) + + # Count how many string options we have, ignoring None values string_options = sum(isinstance(mapping, str) for mapping in mappings if mapping is not None) if string_options > 1: raise ValueError("Cannot combine multiple string options") + # Filter out None values and empty mappings non_empty_mappings = [ m for m in mappings if m is not None and not (isinstance(m, Mapping) and not m) ] + # If there is only one string option, return it if string_options == 1: if len(non_empty_mappings) > 1: raise ValueError("Cannot combine multiple options if one is a string") @@ -73,26 +76,29 @@ def combine_mappings( for mapping in mappings: if mapping is None or not isinstance(mapping, Mapping): continue - all_paths.append(flatten_mapping(mapping)) + all_paths.append(_flatten_mapping(mapping)) - # Check for conflicts + # Check each path against all other paths for conflicts + # Conflicts occur when the same path has different values or when one path is a prefix of another + # e.g. {"a": 1} and {"a": {"b": 2}} conflict because "a" can't be both a value and a nested structure for i, paths1 in enumerate(all_paths): for path1, value1 in paths1: for paths2 in all_paths[i + 1 :]: for path2, value2 in paths2: - if has_nested_conflict(path1, value1, path2, value2): + if _has_nested_conflict(path1, value1, path2, value2): raise ValueError( f"Duplicate keys or nested path conflict found: {'.'.join(path1)} conflicts with {'.'.join(path2)}" ) - # If no conflicts, merge all mappings + # If no conflicts were found, merge all mappings result: Dict[str, Any] = {} for mapping in mappings: if mapping is None or not isinstance(mapping, Mapping): continue - for path, value in flatten_mapping(mapping): + for path, value in _flatten_mapping(mapping): current = result *prefix, last = path + # Create nested dictionaries for each prefix segment for key in prefix: current = current.setdefault(key, {}) current[last] = value diff --git a/unit_tests/sources/declarative/requesters/request_options/test_request_options.py b/unit_tests/sources/declarative/requesters/request_options/test_request_options.py index 6d95f60d9..31222afa1 100644 --- a/unit_tests/sources/declarative/requesters/request_options/test_request_options.py +++ b/unit_tests/sources/declarative/requesters/request_options/test_request_options.py @@ -161,23 +161,23 @@ def test_value_type_handling(value: Any, expected_type: Type): @pytest.mark.parametrize( - "field_name, field_path, inject_into, expected_is_field_path", + "field_name, field_path, inject_into, expected__is_field_path", [ ("field", None, RequestOptionType.body_json, False), (None, ["data", "field"], RequestOptionType.body_json, True), ], ) -def test_is_field_path( +def test__is_field_path( field_name: Optional[str], field_path: Optional[List[str]], inject_into: RequestOptionType, - expected_is_field_path: bool, + expected__is_field_path: bool, ): - """Test the is_field_path property""" + """Test the _is_field_path property""" request_option = RequestOption( field_name=field_name, field_path=field_path, inject_into=inject_into, parameters={} ) - assert request_option.is_field_path == expected_is_field_path + assert request_option._is_field_path == expected__is_field_path def test_multiple_injections(): From 97cff3fa137a1b273ec5661f5228f36b8f3c05eb Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Thu, 9 Jan 2025 16:11:37 -0800 Subject: [PATCH 16/25] chore: remove redundant check in DatetimeBasedCursor --- .../incremental/datetime_based_cursor.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index fd048658f..8ef1c89a4 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -76,19 +76,6 @@ class DatetimeBasedCursor(DeclarativeCursor): cursor_datetime_formats: List[str] = field(default_factory=lambda: []) def __post_init__(self, parameters: Mapping[str, Any]) -> None: - for option, name in [ - (self.start_time_option, "start_time_option"), - (self.end_time_option, "end_time_option"), - ]: - if ( - option - and option.field_name is None - and option.inject_into != RequestOptionType.body_json - ): - raise ValueError( - f"DatetimeBasedCursor requires a top-level field_name for non-body-json requests in {name}." - ) - if (self.step and not self.cursor_granularity) or ( not self.step and self.cursor_granularity ): From 719112835d31deb3bc24438103c9c819591ebb99 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Fri, 17 Jan 2025 11:26:02 -0800 Subject: [PATCH 17/25] feat: update remaining components to use injection logic --- .../partition_routers/list_partition_router.py | 6 ++++-- .../partition_routers/substream_partition_router.py | 12 +++--------- .../requesters/paginators/default_paginator.py | 10 +++++----- .../datetime_based_request_options_provider.py | 13 +++++++------ 4 files changed, 19 insertions(+), 22 deletions(-) diff --git a/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py index 29b700b04..6049cefe2 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py @@ -3,7 +3,7 @@ # from dataclasses import InitVar, dataclass -from typing import Any, Iterable, List, Mapping, Optional, Union +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter @@ -100,7 +100,9 @@ def _get_request_option( ): slice_value = stream_slice.get(self._cursor_field.eval(self.config)) if slice_value: - return {self.request_option.field_name.eval(self.config): slice_value} # type: ignore # field_name is always casted to InterpolatedString + options: MutableMapping[str, Any] = {} + self.request_option.inject_into_request(options, slice_value, self.config) + return options else: return {} else: diff --git a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py index 1c7bb6961..1d914b7e5 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py @@ -4,7 +4,7 @@ import copy import logging from dataclasses import InitVar, dataclass -from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, Optional, Union +from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union import dpath @@ -118,7 +118,7 @@ def get_request_body_json( def _get_request_option( self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice] ) -> Mapping[str, Any]: - params = {} + params:MutableMapping[str, Any] = {} if stream_slice: for parent_config in self.parent_stream_configs: if ( @@ -128,13 +128,7 @@ def _get_request_option( key = parent_config.partition_field.eval(self.config) # type: ignore # partition_field is always casted to an interpolated string value = stream_slice.get(key) if value: - params.update( - { - parent_config.request_option.field_name.eval( # type: ignore [union-attr] - config=self.config - ): value - } - ) + parent_config.request_option.inject_into_request(params, value, self.config) return params def stream_slices(self) -> Iterable[StreamSlice]: diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py index e9476447a..e2f4eb4dc 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py @@ -180,7 +180,7 @@ def reset(self, reset_value: Optional[Any] = None) -> None: self._token = self.pagination_strategy.initial_token def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping[str, Any]: - options = {} + options: MutableMapping[str, Any] = {} if ( self.page_token_option @@ -188,15 +188,15 @@ def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping and isinstance(self.page_token_option, RequestOption) and self.page_token_option.inject_into == option_type ): - options[self.page_token_option.field_name.eval(config=self.config)] = self._token # type: ignore # field_name is always cast to an interpolated string + self.page_token_option.inject_into_request(options, self._token, self.config) if ( self.page_size_option and self.pagination_strategy.get_page_size() and self.page_size_option.inject_into == option_type ): - options[self.page_size_option.field_name.eval(config=self.config)] = ( # type: ignore [union-attr] - self.pagination_strategy.get_page_size() - ) # type: ignore # field_name is always cast to an interpolated string + page_size = self.pagination_strategy.get_page_size() + self.page_size_option.inject_into_request(options, page_size, self.config) + return options diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py index 05e06db71..d0de3a7d2 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py @@ -80,12 +80,13 @@ def _get_request_options( options: MutableMapping[str, Any] = {} if not stream_slice: return options + if self.start_time_option and self.start_time_option.inject_into == option_type: - options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string - self._partition_field_start.eval(self.config) - ) + start_time_value = stream_slice.get(self._partition_field_start.eval(self.config)) + self.start_time_option.inject_into_request(options, start_time_value, self.config) + if self.end_time_option and self.end_time_option.inject_into == option_type: - options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr] - self._partition_field_end.eval(self.config) - ) + end_time_value = stream_slice.get(self._partition_field_end.eval(self.config)) + self.end_time_option.inject_into_request(options, end_time_value, self.config) + return options From 4ef2423f1e7f66f0000d2e3f2efd8e0789d36dca Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 21 Jan 2025 11:39:48 -0800 Subject: [PATCH 18/25] refactor: simplify validations --- .../declarative/requesters/request_option.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index fb38e897d..91ed9bbf4 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -28,7 +28,8 @@ class RequestOption: Attributes: field_name (str): Describes the name of the parameter to inject. Mutually exclusive with field_path. - field_path (list(str)): Describes the path to a nested field as a list of field names. Mutually exclusive with field_name. + field_path (list(str)): Describes the path to a nested field as a list of field names. + Only valid for body_json injection type, and mutually exclusive with field_name. inject_into (RequestOptionType): Describes where in the HTTP request to inject the parameter """ @@ -38,6 +39,7 @@ class RequestOption: field_path: Optional[List[Union[InterpolatedString, str]]] = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: + # Validate inputs. We should expect either field_name or field_path, but not both if self.field_name is None and self.field_path is None: raise ValueError("RequestOption requires either a field_name or field_path") @@ -47,18 +49,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: "Only one of field_name or field_path can be provided to RequestOption" ) - if self.field_name is not None and not isinstance( - self.field_name, (str, InterpolatedString) - ): - raise TypeError(f"field_name expects a string, but got {type(self.field_name)}") - - if self.field_path is not None: - if not isinstance(self.field_path, list): - raise TypeError(f"field_path expects a list, but got {type(self.field_path)}") - for value in self.field_path: - if not isinstance(value, (str, InterpolatedString)): - raise TypeError(f"field_path values must be strings, got {type(value)}") - + # Nested field injection is only supported for body JSON injection if self.field_path is not None and self.inject_into != RequestOptionType.body_json: raise ValueError( "Nested field injection is only supported for body JSON injection. Please use a top-level field_name for other injection types." @@ -67,7 +58,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: # Convert field_name and field_path into InterpolatedString objects if they are strings if self.field_name is not None: self.field_name = InterpolatedString.create(self.field_name, parameters=parameters) - if self.field_path is not None: + elif self.field_path is not None: self.field_path = [ InterpolatedString.create(segment, parameters=parameters) for segment in self.field_path From d1fde99160b3f5d197dbb60c3d9ac02ed48be451 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 21 Jan 2025 12:16:14 -0800 Subject: [PATCH 19/25] refactor: simplify combine_mappings logic --- .../substream_partition_router.py | 4 +- .../declarative/requesters/request_option.py | 3 +- ...datetime_based_request_options_provider.py | 2 +- airbyte_cdk/utils/mapping_helpers.py | 107 +++++------- unit_tests/utils/test_mapping_helpers.py | 160 ++++++++++-------- 5 files changed, 135 insertions(+), 141 deletions(-) diff --git a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py index 1d914b7e5..76cb29666 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py @@ -118,7 +118,7 @@ def get_request_body_json( def _get_request_option( self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice] ) -> Mapping[str, Any]: - params:MutableMapping[str, Any] = {} + params: MutableMapping[str, Any] = {} if stream_slice: for parent_config in self.parent_stream_configs: if ( @@ -128,7 +128,7 @@ def _get_request_option( key = parent_config.partition_field.eval(self.config) # type: ignore # partition_field is always casted to an interpolated string value = stream_slice.get(key) if value: - parent_config.request_option.inject_into_request(params, value, self.config) + parent_config.request_option.inject_into_request(params, value, self.config) return params def stream_slices(self) -> Iterable[StreamSlice]: diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index 91ed9bbf4..e0946b53b 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -28,7 +28,7 @@ class RequestOption: Attributes: field_name (str): Describes the name of the parameter to inject. Mutually exclusive with field_path. - field_path (list(str)): Describes the path to a nested field as a list of field names. + field_path (list(str)): Describes the path to a nested field as a list of field names. Only valid for body_json injection type, and mutually exclusive with field_name. inject_into (RequestOptionType): Describes where in the HTTP request to inject the parameter """ @@ -39,7 +39,6 @@ class RequestOption: field_path: Optional[List[Union[InterpolatedString, str]]] = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: - # Validate inputs. We should expect either field_name or field_path, but not both if self.field_name is None and self.field_path is None: raise ValueError("RequestOption requires either a field_name or field_path") diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py index d0de3a7d2..437ea7b7b 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py @@ -80,7 +80,7 @@ def _get_request_options( options: MutableMapping[str, Any] = {} if not stream_slice: return options - + if self.start_time_option and self.start_time_option.inject_into == option_type: start_time_value = stream_slice.get(self._partition_field_start.eval(self.config)) self.start_time_option.inject_into_request(options, start_time_value, self.config) diff --git a/airbyte_cdk/utils/mapping_helpers.py b/airbyte_cdk/utils/mapping_helpers.py index acccf1024..ac0ec0cdd 100644 --- a/airbyte_cdk/utils/mapping_helpers.py +++ b/airbyte_cdk/utils/mapping_helpers.py @@ -3,45 +3,41 @@ # +import copy from typing import Any, Dict, List, Mapping, Optional, Union -def _has_nested_conflict(path1: List[str], value1: Any, path2: List[str], value2: Any) -> bool: +def _merge_mappings( + target: Dict[str, Any], + source: Mapping[str, Any], + path: Optional[List[str]] = None, +) -> None: """ - Check if two paths conflict with each other. - e.g. ["a", "b"] conflicts with ["a", "b"] if values differ - e.g. ["a"] conflicts with ["a", "b"] (can't have both a value and a nested structure) - """ - # If one path is a prefix of the other, they conflict - shorter, longer = sorted([path1, path2], key=len) - if longer[: len(shorter)] == shorter: - return True - - # If paths are the same but values differ, they conflict - if path1 == path2 and value1 != value2: - return True - - return False - + Recursively merge two dictionaries, raising an error if there are any conflicts. + A conflict occurs when the same path exists in both dictionaries with different values. -def _flatten_mapping( - mapping: Mapping[str, Any], prefix: Optional[List[str]] = None -) -> List[tuple[List[str], Any]]: + Args: + target: The dictionary to merge into + source: The dictionary to merge from + path: The current path in the nested structure (for error messages) """ - Convert a nested mapping into a list of (path, value) pairs to make conflict detection easier. - e.g. {"a": {"b": 1}} -> [(["a", "b"], 1)] - """ - prefix = prefix or [] - result = [] - - for key, value in mapping.items(): - current_path = prefix + [key] - if isinstance(value, Mapping): - result.extend(_flatten_mapping(value, current_path)) + path = path or [] + for key, source_value in source.items(): + current_path = path + [str(key)] + + if key in target: + target_value = target[key] + if isinstance(target_value, dict) and isinstance(source_value, dict): + # If both are dictionaries, recursively merge them + _merge_mappings(target_value, source_value, current_path) + elif target_value != source_value: + # If same key has different values, that's a conflict + raise ValueError( + f"Duplicate keys or nested path conflict found: {'.'.join(current_path)}" + ) else: - result.append((current_path, value)) - - return result + # No conflict, just copy the value (using deepcopy for nested structures) + target[key] = copy.deepcopy(source_value) def combine_mappings( @@ -49,11 +45,16 @@ def combine_mappings( ) -> Union[Mapping[str, Any], str]: """ Combine multiple mappings into a single mapping, supporting nested structures. - If any of the mappings are a string, return that string. Raise errors in the following cases: - * If there are conflicting paths across mappings (including nested conflicts) + Raise errors in the following cases: * If there are multiple string mappings - * If there are multiple mappings containing keys and one of them is a string + * If there is a string mapping AND any non-empty dictionary mappings + * If there are conflicting paths across mappings (including nested conflicts) + + If there is exactly one string mapping and no other non-empty mappings, return that string. + Otherwise, combine all dictionary mappings into a single mapping. """ + if not mappings: + return {} # Count how many string options we have, ignoring None values string_options = sum(isinstance(mapping, str) for mapping in mappings if mapping is not None) @@ -65,42 +66,16 @@ def combine_mappings( m for m in mappings if m is not None and not (isinstance(m, Mapping) and not m) ] - # If there is only one string option, return it + # If there is only one string option and no other non-empty mappings, return it if string_options == 1: if len(non_empty_mappings) > 1: raise ValueError("Cannot combine multiple options if one is a string") return next(m for m in non_empty_mappings if isinstance(m, str)) - # Convert all mappings to flat (path, value) pairs for conflict detection - all_paths: List[List[tuple[List[str], Any]]] = [] - for mapping in mappings: - if mapping is None or not isinstance(mapping, Mapping): - continue - all_paths.append(_flatten_mapping(mapping)) - - # Check each path against all other paths for conflicts - # Conflicts occur when the same path has different values or when one path is a prefix of another - # e.g. {"a": 1} and {"a": {"b": 2}} conflict because "a" can't be both a value and a nested structure - for i, paths1 in enumerate(all_paths): - for path1, value1 in paths1: - for paths2 in all_paths[i + 1 :]: - for path2, value2 in paths2: - if _has_nested_conflict(path1, value1, path2, value2): - raise ValueError( - f"Duplicate keys or nested path conflict found: {'.'.join(path1)} conflicts with {'.'.join(path2)}" - ) - - # If no conflicts were found, merge all mappings + # Start with an empty result and merge each mapping into it result: Dict[str, Any] = {} - for mapping in mappings: - if mapping is None or not isinstance(mapping, Mapping): - continue - for path, value in _flatten_mapping(mapping): - current = result - *prefix, last = path - # Create nested dictionaries for each prefix segment - for key in prefix: - current = current.setdefault(key, {}) - current[last] = value + for mapping in non_empty_mappings: + if mapping and isinstance(mapping, Mapping): + _merge_mappings(result, mapping) return result diff --git a/unit_tests/utils/test_mapping_helpers.py b/unit_tests/utils/test_mapping_helpers.py index f972a94fe..d8524bc53 100644 --- a/unit_tests/utils/test_mapping_helpers.py +++ b/unit_tests/utils/test_mapping_helpers.py @@ -7,73 +7,93 @@ from airbyte_cdk.utils.mapping_helpers import combine_mappings -def test_basic_merge(): - mappings = [{"a": 1}, {"b": 2}, {"c": 3}, {}] - result = combine_mappings(mappings) - assert result == {"a": 1, "b": 2, "c": 3} - - -def test_combine_with_string(): - mappings = [{"a": 1}, "option"] - with pytest.raises(ValueError, match="Cannot combine multiple options if one is a string"): - combine_mappings(mappings) - - -def test_overlapping_keys(): - mappings = [{"a": 1, "b": 2}, {"b": 3}] - with pytest.raises(ValueError, match="Duplicate keys"): - combine_mappings(mappings) - - -def test_multiple_strings(): - mappings = ["option1", "option2"] - with pytest.raises(ValueError, match="Cannot combine multiple string options"): - combine_mappings(mappings) - - -def test_handle_none_values(): - mappings = [{"a": 1}, None, {"b": 2}] - result = combine_mappings(mappings) - assert result == {"a": 1, "b": 2} - - -def test_empty_mappings(): - mappings = [] - result = combine_mappings(mappings) - assert result == {} - - -def test_single_mapping(): - mappings = [{"a": 1}] - result = combine_mappings(mappings) - assert result == {"a": 1} - - -def test_combine_with_string_and_empty_mappings(): - mappings = ["option", {}] - result = combine_mappings(mappings) - assert result == "option" - - -def test_nested_merge(): - mappings = [{"a": {"b": 1}}, {"c": {"d": 2}}] - result = combine_mappings(mappings) - assert result == {"a": {"b": 1}, "c": {"d": 2}} - - -def test_nested_conflict(): - mappings = [{"a": {"b": 1}}, {"a": {"b": 2}}] - with pytest.raises(ValueError, match="nested path conflict"): - combine_mappings(mappings) - - -def test_nested_path_conflict(): - mappings = [{"a": 1}, {"a": {"b": 2}}] - with pytest.raises(ValueError, match="nested path conflict"): - combine_mappings(mappings) - - -def test_deep_nested_merge(): - mappings = [{"a": {"b": {"c": 1}}}, {"d": {"e": {"f": 2}}}] - result = combine_mappings(mappings) - assert result == {"a": {"b": {"c": 1}}, "d": {"e": {"f": 2}}} +@pytest.mark.parametrize( + "test_name, mappings, expected_result, expected_error", + [ + ("basic_merge", [{"a": 1}, {"b": 2}, {"c": 3}, {}], {"a": 1, "b": 2, "c": 3}, None), + ("handle_none_values", [{"a": 1}, None, {"b": 2}], {"a": 1, "b": 2}, None), + ("empty_mappings", [], {}, None), + ("single_mapping", [{"a": 1}], {"a": 1}, None), + ("overlapping_keys", [{"a": 1, "b": 2}, {"b": 3}], None, "Duplicate keys"), + ], +) +def test_basic_mapping_operations(test_name, mappings, expected_result, expected_error): + if expected_error: + with pytest.raises(ValueError, match=expected_error): + combine_mappings(mappings) + else: + assert combine_mappings(mappings) == expected_result + + +@pytest.mark.parametrize( + "test_name, mappings, expected_result, expected_error", + [ + ( + "combine_with_string", + [{"a": 1}, "option"], + None, + "Cannot combine multiple options if one is a string", + ), + ( + "multiple_strings", + ["option1", "option2"], + None, + "Cannot combine multiple string options", + ), + ("string_with_empty_mapping", ["option", {}], "option", None), + ], +) +def test_string_handling(test_name, mappings, expected_result, expected_error): + if expected_error: + with pytest.raises(ValueError, match=expected_error): + combine_mappings(mappings) + else: + assert combine_mappings(mappings) == expected_result + + +@pytest.mark.parametrize( + "test_name, mappings, expected_result, expected_error", + [ + ( + "simple_nested_merge", + [{"a": {"b": 1}}, {"c": {"d": 2}}], + {"a": {"b": 1}, "c": {"d": 2}}, + None, + ), + ( + "deep_nested_merge", + [{"a": {"b": {"c": 1}}}, {"d": {"e": {"f": 2}}}], + {"a": {"b": {"c": 1}}, "d": {"e": {"f": 2}}}, + None, + ), + ( + "nested_merge_same_level", + [ + {"data": {"user": {"id": 1}, "status": "active"}}, + {"data": {"user": {"name": "test"}, "type": "admin"}}, + ], + {"data": {"user": {"id": 1, "name": "test"}, "status": "active", "type": "admin"}}, + None, + ), + ("nested_conflict", [{"a": {"b": 1}}, {"a": {"b": 2}}], None, "nested path conflict"), + ("type_conflict", [{"a": 1}, {"a": {"b": 2}}], None, "nested path conflict"), + ( + "merge_empty_nested", + [{"a": {"b": {}}}, {"a": {"b": {"c": 1}}}], + {"a": {"b": {"c": 1}}}, + None, + ), + ( + "different_value_types", + [{"data": {"field": "string"}}, {"data": {"field": {"nested": "value"}}}], + None, + "nested path conflict", + ), + ], +) +def test_nested_structures(test_name, mappings, expected_result, expected_error): + if expected_error: + with pytest.raises(ValueError, match=expected_error): + combine_mappings(mappings) + else: + assert combine_mappings(mappings) == expected_result From 6e8e13cb8124382acf59d956222685da4827c8ac Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 21 Jan 2025 15:15:09 -0800 Subject: [PATCH 20/25] chore: update unit tests --- .../requesters/paginators/default_paginator.py | 2 +- .../requesters/request_options/test_request_options.py | 7 ------- .../declarative/requesters/test_http_requester.py | 10 +++++----- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py index d0bfff1ae..6fb412cd9 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py @@ -197,7 +197,7 @@ def _get_request_options( and self.page_token_option.inject_into == option_type ): self.page_token_option.inject_into_request(options, token, self.config) - + if ( self.page_size_option and self.pagination_strategy.get_page_size() diff --git a/unit_tests/sources/declarative/requesters/request_options/test_request_options.py b/unit_tests/sources/declarative/requesters/request_options/test_request_options.py index 31222afa1..115ce688d 100644 --- a/unit_tests/sources/declarative/requesters/request_options/test_request_options.py +++ b/unit_tests/sources/declarative/requesters/request_options/test_request_options.py @@ -32,13 +32,6 @@ ValueError, "Nested field injection is only supported for body JSON injection.", ), - ( - None, - {"this": "should", "be": "a list"}, - RequestOptionType.body_json, - TypeError, - "field_path expects a list", - ), ], ) def test_request_option_validation( diff --git a/unit_tests/sources/declarative/requesters/test_http_requester.py b/unit_tests/sources/declarative/requesters/test_http_requester.py index 28ea0cb9b..77725b449 100644 --- a/unit_tests/sources/declarative/requesters/test_http_requester.py +++ b/unit_tests/sources/declarative/requesters/test_http_requester.py @@ -243,8 +243,8 @@ def test_basic_send_request(): None, "field=value&field2=value&authfield=val", ), - ({"field": "value"}, None, {"field": "value"}, None, None, None, ValueError, None), - ({"field": "value"}, None, None, None, {"field": "value"}, None, ValueError, None), + ({"field": "value"}, None, {"field": "value"}, None, None, None, None, "field=value"), + ({"field": "value"}, None, None, None, {"field": "value"}, None, None, "field=value"), ( {"field": "value"}, None, @@ -252,8 +252,8 @@ def test_basic_send_request(): None, {"field": "value"}, None, - ValueError, None, + "field=value&field2=value", ), # merging json params from the three sources (None, {"field": "value"}, None, None, None, None, None, '{"field": "value"}'), @@ -277,8 +277,8 @@ def test_basic_send_request(): None, '{"field": "value", "field2": "value", "authfield": "val"}', ), - (None, {"field": "value"}, None, {"field": "value"}, None, None, ValueError, None), - (None, {"field": "value"}, None, None, None, {"field": "value"}, ValueError, None), + (None, {"field": "value"}, None, {"field": "value"}, None, None, None, "field=value"), + (None, {"field": "value"}, None, None, None, {"field": "value"}, None, "field=value"), # raise on mixed data and json params ( {"field": "value"}, From 0fabdd74a1495663617e2c94fdc5ee91d1534104 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 21 Jan 2025 15:37:21 -0800 Subject: [PATCH 21/25] chore: add deprecation comment to field_name --- .../declarative/declarative_component_schema.yaml | 2 +- .../models/declarative_component_schema.py | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 3ee24e843..cc3369682 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -2809,7 +2809,7 @@ definitions: enum: [RequestOption] field_name: title: Field Name - description: Configures which key should be used in the location that the descriptor is being injected into + description: Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder. type: string examples: - segment_id diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 75a3d4321..5e9ca1dc5 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -719,7 +719,7 @@ class HttpResponseFilter(BaseModel): class TypesMap(BaseModel): target_type: Union[str, List[str]] current_type: Union[str, List[str]] - condition: Optional[str] + condition: Optional[str] = None class SchemaTypeIdentifier(BaseModel): @@ -797,14 +797,11 @@ class DpathFlattenFields(BaseModel): field_path: List[str] = Field( ..., description="A path to field that needs to be flattened.", - examples=[ - ["data"], - ["data", "*", "field"], - ], + examples=[["data"], ["data", "*", "field"]], title="Field Path", ) delete_origin_value: Optional[bool] = Field( - False, + None, description="Whether to delete the origin value or keep it. Default is False.", title="Delete Origin Value", ) @@ -1175,7 +1172,7 @@ class RequestOption(BaseModel): type: Literal["RequestOption"] field_name: Optional[str] = Field( None, - description="Configures which key should be used in the location that the descriptor is being injected into", + description="Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.", examples=["segment_id"], title="Field Name", ) From fdfa92c4d87a5d30e1cfd6fe03c64805e125a60a Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 21 Jan 2025 16:39:50 -0800 Subject: [PATCH 22/25] refactor: update merge_mappings to handle body_json differently than other injection types --- .../declarative/requesters/http_requester.py | 6 ++- .../retrievers/simple_retriever.py | 5 +- airbyte_cdk/utils/mapping_helpers.py | 53 +++++++++++++------ .../requesters/test_http_requester.py | 6 +-- 4 files changed, 50 insertions(+), 20 deletions(-) diff --git a/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte_cdk/sources/declarative/requesters/http_requester.py index 35d4b0f11..ad23f4d06 100644 --- a/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -199,6 +199,9 @@ def _get_request_options( Raise a ValueError if there's a key collision Returned merged mapping otherwise """ + + is_body_json = requester_method.__name__ == "get_request_body_json" + return combine_mappings( [ requester_method( @@ -208,7 +211,8 @@ def _get_request_options( ), auth_options_method(), extra_options, - ] + ], + allow_same_value_merge=is_body_json, ) def _request_headers( diff --git a/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index d167a84bc..e42f0485a 100644 --- a/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -128,6 +128,9 @@ def _get_request_options( Returned merged mapping otherwise """ # FIXME we should eventually remove the usage of stream_state as part of the interpolation + + is_body_json = paginator_method.__name__ == "get_request_body_json" + mappings = [ paginator_method( stream_state=stream_state, @@ -143,7 +146,7 @@ def _get_request_options( next_page_token=next_page_token, ) ) - return combine_mappings(mappings) + return combine_mappings(mappings, allow_same_value_merge=is_body_json) def _request_headers( self, diff --git a/airbyte_cdk/utils/mapping_helpers.py b/airbyte_cdk/utils/mapping_helpers.py index ac0ec0cdd..c5682c288 100644 --- a/airbyte_cdk/utils/mapping_helpers.py +++ b/airbyte_cdk/utils/mapping_helpers.py @@ -11,15 +11,18 @@ def _merge_mappings( target: Dict[str, Any], source: Mapping[str, Any], path: Optional[List[str]] = None, + allow_same_value_merge: bool = False, ) -> None: """ Recursively merge two dictionaries, raising an error if there are any conflicts. - A conflict occurs when the same path exists in both dictionaries with different values. + For body_json requests (allow_same_value_merge=True), a conflict occurs only when the same path has different values. + For other request types (allow_same_value_merge=False), any duplicate key is a conflict, regardless of value. Args: target: The dictionary to merge into source: The dictionary to merge from path: The current path in the nested structure (for error messages) + allow_same_value_merge: Whether to allow merging the same value into the same key. Set to false by default, should only be true for body_json injections """ path = path or [] for key, source_value in source.items(): @@ -28,13 +31,15 @@ def _merge_mappings( if key in target: target_value = target[key] if isinstance(target_value, dict) and isinstance(source_value, dict): + # Only body_json supports nested_structures + if not allow_same_value_merge: + raise ValueError(f"Duplicate keys found: {'.'.join(current_path)}") # If both are dictionaries, recursively merge them - _merge_mappings(target_value, source_value, current_path) - elif target_value != source_value: + _merge_mappings(target_value, source_value, current_path, allow_same_value_merge) + + elif not allow_same_value_merge or target_value != source_value: # If same key has different values, that's a conflict - raise ValueError( - f"Duplicate keys or nested path conflict found: {'.'.join(current_path)}" - ) + raise ValueError(f"Duplicate keys found: {'.'.join(current_path)}") else: # No conflict, just copy the value (using deepcopy for nested structures) target[key] = copy.deepcopy(source_value) @@ -42,16 +47,34 @@ def _merge_mappings( def combine_mappings( mappings: List[Optional[Union[Mapping[str, Any], str]]], + allow_same_value_merge: bool = False, ) -> Union[Mapping[str, Any], str]: """ - Combine multiple mappings into a single mapping, supporting nested structures. - Raise errors in the following cases: - * If there are multiple string mappings - * If there is a string mapping AND any non-empty dictionary mappings - * If there are conflicting paths across mappings (including nested conflicts) - - If there is exactly one string mapping and no other non-empty mappings, return that string. - Otherwise, combine all dictionary mappings into a single mapping. + Combine multiple mappings into a single mapping. + + For body_json requests (allow_same_value_merge=True): + - Supports nested structures (e.g., {"data": {"user": {"id": 1}}}) + - Allows duplicate keys if their values match + - Raises error if same path has different values + + For other request types (allow_same_value_merge=False): + - Only supports flat structures + - Any duplicate key raises an error, regardless of value + + Args: + mappings: List of mappings to combine + allow_same_value_merge: Whether to allow duplicate keys with matching values. + Should only be True for body_json requests. + + Returns: + A single mapping combining all inputs, or a string if there is exactly one + string mapping and no other non-empty mappings. + + Raises: + ValueError: If there are: + - Multiple string mappings + - Both a string mapping and non-empty dictionary mappings + - Conflicting keys/paths based on allow_same_value_merge setting """ if not mappings: return {} @@ -76,6 +99,6 @@ def combine_mappings( result: Dict[str, Any] = {} for mapping in non_empty_mappings: if mapping and isinstance(mapping, Mapping): - _merge_mappings(result, mapping) + _merge_mappings(result, mapping, allow_same_value_merge=allow_same_value_merge) return result diff --git a/unit_tests/sources/declarative/requesters/test_http_requester.py b/unit_tests/sources/declarative/requesters/test_http_requester.py index 77725b449..c1f3cee4f 100644 --- a/unit_tests/sources/declarative/requesters/test_http_requester.py +++ b/unit_tests/sources/declarative/requesters/test_http_requester.py @@ -243,8 +243,8 @@ def test_basic_send_request(): None, "field=value&field2=value&authfield=val", ), - ({"field": "value"}, None, {"field": "value"}, None, None, None, None, "field=value"), - ({"field": "value"}, None, None, None, {"field": "value"}, None, None, "field=value"), + ({"field": "value"}, None, {"field": "value"}, None, None, None, ValueError, None), + ({"field": "value"}, None, None, None, {"field": "value"}, None, ValueError, None), ( {"field": "value"}, None, @@ -252,8 +252,8 @@ def test_basic_send_request(): None, {"field": "value"}, None, + ValueError, None, - "field=value&field2=value", ), # merging json params from the three sources (None, {"field": "value"}, None, None, None, None, None, '{"field": "value"}'), From a2262a90e135ae73fad44a9822de82aec729e581 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Tue, 21 Jan 2025 17:27:39 -0800 Subject: [PATCH 23/25] chore: fix all the tests --- .../retrievers/test_simple_retriever.py | 39 +++++++++- unit_tests/utils/test_mapping_helpers.py | 72 +++++++++++-------- 2 files changed, 82 insertions(+), 29 deletions(-) diff --git a/unit_tests/sources/declarative/retrievers/test_simple_retriever.py b/unit_tests/sources/declarative/retrievers/test_simple_retriever.py index 5878c758f..bd31f7b65 100644 --- a/unit_tests/sources/declarative/retrievers/test_simple_retriever.py +++ b/unit_tests/sources/declarative/retrievers/test_simple_retriever.py @@ -58,6 +58,11 @@ def test_simple_retriever_full(mock_http_stream): request_params = {"param": "value"} requester.get_request_params.return_value = request_params + requester.get_request_params.__name__ = "get_request_params" + requester.get_request_headers.__name__ = "get_request_headers" + requester.get_request_body_data.__name__ = "get_request_body_data" + requester.get_request_body_json.__name__ = "get_request_body_json" + paginator = MagicMock() paginator.get_initial_token.return_value = None next_page_token = {"cursor": "cursor_value"} @@ -65,6 +70,11 @@ def test_simple_retriever_full(mock_http_stream): paginator.next_page_token.return_value = next_page_token paginator.get_request_headers.return_value = {} + paginator.get_request_params.__name__ = "get_request_params" + paginator.get_request_headers.__name__ = "get_request_headers" + paginator.get_request_body_data.__name__ = "get_request_body_data" + paginator.get_request_body_json.__name__ = "get_request_body_json" + record_selector = MagicMock() record_selector.select_records.return_value = records @@ -442,11 +452,19 @@ def test_get_request_options_from_pagination( paginator.get_request_body_data.return_value = paginator_mapping paginator.get_request_body_json.return_value = paginator_mapping + paginator.get_request_params.__name__ = "get_request_params" + paginator.get_request_body_data.__name__ = "get_request_body_data" + paginator.get_request_body_json.__name__ = "get_request_body_json" + request_options_provider = MagicMock() request_options_provider.get_request_params.return_value = request_options_provider_mapping request_options_provider.get_request_body_data.return_value = request_options_provider_mapping request_options_provider.get_request_body_json.return_value = request_options_provider_mapping + request_options_provider.get_request_params.__name__ = "get_request_params" + request_options_provider.get_request_body_data.__name__ = "get_request_body_data" + request_options_provider.get_request_body_json.__name__ = "get_request_body_json" + record_selector = MagicMock() retriever = SimpleRetriever( name="stream_name", @@ -489,10 +507,12 @@ def test_get_request_headers(test_name, paginator_mapping, expected_mapping): # This test is separate from the other request options because request headers must be strings paginator = MagicMock() paginator.get_request_headers.return_value = paginator_mapping + paginator.get_request_headers.__name__ = "get_request_headers" requester = MagicMock(use_cache=False) stream_slicer = MagicMock() stream_slicer.get_request_headers.return_value = {"key": "value"} + stream_slicer.get_request_headers.__name__ = "get_request_headers" record_selector = MagicMock() retriever = SimpleRetriever( @@ -565,10 +585,12 @@ def test_ignore_stream_slicer_parameters_on_paginated_requests( # This test is separate from the other request options because request headers must be strings paginator = MagicMock() paginator.get_request_headers.return_value = paginator_mapping + paginator.get_request_headers.__name__ = "get_request_headers" requester = MagicMock(use_cache=False) stream_slicer = MagicMock() stream_slicer.get_request_headers.return_value = {"key_from_slicer": "value"} + stream_slicer.get_request_headers.__name__ = "get_request_headers" record_selector = MagicMock() retriever = SimpleRetriever( @@ -612,6 +634,7 @@ def test_request_body_data( ): paginator = MagicMock() paginator.get_request_body_data.return_value = paginator_body_data + paginator.get_request_body_data.__name__ = "get_request_body_data" requester = MagicMock(use_cache=False) request_option_provider = MagicMock() @@ -825,11 +848,25 @@ def test_emit_log_request_response_messages(mocker): "airbyte_cdk.sources.declarative.retrievers.simple_retriever.format_http_message" ) requester = MagicMock() + + # Add __name__ to mock methods + requester.get_request_params.__name__ = "get_request_params" + requester.get_request_headers.__name__ = "get_request_headers" + requester.get_request_body_data.__name__ = "get_request_body_data" + requester.get_request_body_json.__name__ = "get_request_body_json" + + # The paginator mock also needs __name__ attributes + paginator = MagicMock() + paginator.get_request_params.__name__ = "get_request_params" + paginator.get_request_headers.__name__ = "get_request_headers" + paginator.get_request_body_data.__name__ = "get_request_body_data" + paginator.get_request_body_json.__name__ = "get_request_body_json" + retriever = SimpleRetrieverTestReadDecorator( name="stream_name", primary_key=primary_key, requester=requester, - paginator=MagicMock(), + paginator=paginator, record_selector=record_selector, stream_slicer=SinglePartitionRouter(parameters={}), parameters={}, diff --git a/unit_tests/utils/test_mapping_helpers.py b/unit_tests/utils/test_mapping_helpers.py index d8524bc53..124bf4565 100644 --- a/unit_tests/utils/test_mapping_helpers.py +++ b/unit_tests/utils/test_mapping_helpers.py @@ -1,28 +1,19 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - import pytest from airbyte_cdk.utils.mapping_helpers import combine_mappings @pytest.mark.parametrize( - "test_name, mappings, expected_result, expected_error", + "test_name, mappings, expected_result", [ - ("basic_merge", [{"a": 1}, {"b": 2}, {"c": 3}, {}], {"a": 1, "b": 2, "c": 3}, None), - ("handle_none_values", [{"a": 1}, None, {"b": 2}], {"a": 1, "b": 2}, None), - ("empty_mappings", [], {}, None), - ("single_mapping", [{"a": 1}], {"a": 1}, None), - ("overlapping_keys", [{"a": 1, "b": 2}, {"b": 3}], None, "Duplicate keys"), + ("empty_mappings", [], {}), + ("single_mapping", [{"a": 1}], {"a": 1}), + ("handle_none_values", [{"a": 1}, None, {"b": 2}], {"a": 1, "b": 2}), ], ) -def test_basic_mapping_operations(test_name, mappings, expected_result, expected_error): - if expected_error: - with pytest.raises(ValueError, match=expected_error): - combine_mappings(mappings) - else: - assert combine_mappings(mappings) == expected_result +def test_basic_functionality(test_name, mappings, expected_result): + """Test basic mapping operations that work the same regardless of request type""" + assert combine_mappings(mappings) == expected_result @pytest.mark.parametrize( @@ -44,6 +35,7 @@ def test_basic_mapping_operations(test_name, mappings, expected_result, expected ], ) def test_string_handling(test_name, mappings, expected_result, expected_error): + """Test string handling behavior which is independent of request type""" if expected_error: with pytest.raises(ValueError, match=expected_error): combine_mappings(mappings) @@ -51,6 +43,25 @@ def test_string_handling(test_name, mappings, expected_result, expected_error): assert combine_mappings(mappings) == expected_result +@pytest.mark.parametrize( + "test_name, mappings, expected_error", + [ + ("duplicate_keys_same_value", [{"a": 1}, {"a": 1}], "Duplicate keys found"), + ("duplicate_keys_different_value", [{"a": 1}, {"a": 2}], "Duplicate keys found"), + ( + "nested_structure_not_allowed", + [{"a": {"b": 1}}, {"a": {"c": 2}}], + "Duplicate keys found", + ), + ("any_nesting_not_allowed", [{"a": {"b": 1}}, {"a": {"d": 2}}], "Duplicate keys found"), + ], +) +def test_non_body_json_requests(test_name, mappings, expected_error): + """Test strict validation for non-body-json requests (headers, params, body_data)""" + with pytest.raises(ValueError, match=expected_error): + combine_mappings(mappings, allow_same_value_merge=False) + + @pytest.mark.parametrize( "test_name, mappings, expected_result, expected_error", [ @@ -72,28 +83,33 @@ def test_string_handling(test_name, mappings, expected_result, expected_error): {"data": {"user": {"id": 1}, "status": "active"}}, {"data": {"user": {"name": "test"}, "type": "admin"}}, ], - {"data": {"user": {"id": 1, "name": "test"}, "status": "active", "type": "admin"}}, + { + "data": { + "user": {"id": 1, "name": "test"}, + "status": "active", + "type": "admin", + }, + }, None, ), - ("nested_conflict", [{"a": {"b": 1}}, {"a": {"b": 2}}], None, "nested path conflict"), - ("type_conflict", [{"a": 1}, {"a": {"b": 2}}], None, "nested path conflict"), ( - "merge_empty_nested", - [{"a": {"b": {}}}, {"a": {"b": {"c": 1}}}], - {"a": {"b": {"c": 1}}}, + "nested_conflict", + [{"a": {"b": 1}}, {"a": {"b": 2}}], None, + "Duplicate keys found", ), ( - "different_value_types", - [{"data": {"field": "string"}}, {"data": {"field": {"nested": "value"}}}], + "type_conflict", + [{"a": 1}, {"a": {"b": 2}}], None, - "nested path conflict", + "Duplicate keys found", ), ], ) -def test_nested_structures(test_name, mappings, expected_result, expected_error): +def test_body_json_requests(test_name, mappings, expected_result, expected_error): + """Test nested structure support for body_json requests""" if expected_error: with pytest.raises(ValueError, match=expected_error): - combine_mappings(mappings) + combine_mappings(mappings, allow_same_value_merge=True) else: - assert combine_mappings(mappings) == expected_result + assert combine_mappings(mappings, allow_same_value_merge=True) == expected_result From b8287d60a65911cf80ca21152e19ce7b58c50f45 Mon Sep 17 00:00:00 2001 From: ChristoGrab Date: Thu, 23 Jan 2025 20:47:59 -0800 Subject: [PATCH 24/25] fix: update component factory --- .../declarative/parsers/model_to_component_factory.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 12a7ea2cf..d17253169 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -2072,7 +2072,11 @@ def create_request_option( model: RequestOptionModel, config: Config, **kwargs: Any ) -> RequestOption: inject_into = RequestOptionType(model.inject_into.value) - return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={}) + field_path: Optional[List[Union[InterpolatedString, str]]] = model.field_path # type: ignore + field_name = model.field_name if model.field_name else None + return RequestOption( + field_name=field_name, field_path=field_path, inject_into=inject_into, parameters={} + ) def create_record_selector( self, From 24264a0302769dfe9413648b41eea3fd009fbd0e Mon Sep 17 00:00:00 2001 From: Christo Grabowski <108154848+ChristoGrab@users.noreply.github.com> Date: Fri, 31 Jan 2025 13:52:43 -0500 Subject: [PATCH 25/25] chore: simplify RequestOption construction in component factory (#303) --- .../parsers/model_to_component_factory.py | 64 +++++++++---------- .../test_model_to_component_factory.py | 30 +++++---- .../test_manifest_declarative_source.py | 4 +- 3 files changed, 50 insertions(+), 48 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index d17253169..9cefdd0dc 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -709,8 +709,8 @@ def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[ } return names_to_types[value_type] - @staticmethod def create_api_key_authenticator( + self, model: ApiKeyAuthenticatorModel, config: Config, token_provider: Optional[TokenProvider] = None, @@ -732,10 +732,8 @@ def create_api_key_authenticator( ) request_option = ( - RequestOption( - inject_into=RequestOptionType(model.inject_into.inject_into.value), - field_name=model.inject_into.field_name, - parameters=model.parameters or {}, + self._create_component_from_model( + model.inject_into, config, parameters=model.parameters or {} ) if model.inject_into else RequestOption( @@ -744,6 +742,7 @@ def create_api_key_authenticator( parameters=model.parameters or {}, ) ) + return ApiKeyAuthenticator( token_provider=( token_provider @@ -825,7 +824,7 @@ def create_session_token_authenticator( token_provider=token_provider, ) else: - return ModelToComponentFactory.create_api_key_authenticator( + return self.create_api_key_authenticator( ApiKeyAuthenticatorModel( type="ApiKeyAuthenticator", api_token="", @@ -1272,19 +1271,15 @@ def create_datetime_based_cursor( ) end_time_option = ( - RequestOption( - inject_into=RequestOptionType(model.end_time_option.inject_into.value), - field_name=model.end_time_option.field_name, - parameters=model.parameters or {}, + self._create_component_from_model( + model.end_time_option, config, parameters=model.parameters or {} ) if model.end_time_option else None ) start_time_option = ( - RequestOption( - inject_into=RequestOptionType(model.start_time_option.inject_into.value), - field_name=model.start_time_option.field_name, - parameters=model.parameters or {}, + self._create_component_from_model( + model.start_time_option, config, parameters=model.parameters or {} ) if model.start_time_option else None @@ -1358,19 +1353,15 @@ def create_declarative_stream( cursor_model = model.incremental_sync end_time_option = ( - RequestOption( - inject_into=RequestOptionType(cursor_model.end_time_option.inject_into.value), - field_name=cursor_model.end_time_option.field_name, - parameters=cursor_model.parameters or {}, + self._create_component_from_model( + cursor_model.end_time_option, config, parameters=cursor_model.parameters or {} ) if cursor_model.end_time_option else None ) start_time_option = ( - RequestOption( - inject_into=RequestOptionType(cursor_model.start_time_option.inject_into.value), - field_name=cursor_model.start_time_option.field_name, - parameters=cursor_model.parameters or {}, + self._create_component_from_model( + cursor_model.start_time_option, config, parameters=cursor_model.parameters or {} ) if cursor_model.start_time_option else None @@ -1879,16 +1870,11 @@ def create_jwt_authenticator( additional_jwt_payload=model.additional_jwt_payload, ) - @staticmethod def create_list_partition_router( - model: ListPartitionRouterModel, config: Config, **kwargs: Any + self, model: ListPartitionRouterModel, config: Config, **kwargs: Any ) -> ListPartitionRouter: request_option = ( - RequestOption( - inject_into=RequestOptionType(model.request_option.inject_into.value), - field_name=model.request_option.field_name, - parameters=model.parameters or {}, - ) + self._create_component_from_model(model.request_option, config) if model.request_option else None ) @@ -2072,10 +2058,24 @@ def create_request_option( model: RequestOptionModel, config: Config, **kwargs: Any ) -> RequestOption: inject_into = RequestOptionType(model.inject_into.value) - field_path: Optional[List[Union[InterpolatedString, str]]] = model.field_path # type: ignore - field_name = model.field_name if model.field_name else None + field_path: Optional[List[Union[InterpolatedString, str]]] = ( + [ + InterpolatedString.create(segment, parameters=kwargs.get("parameters", {})) + for segment in model.field_path + ] + if model.field_path + else None + ) + field_name = ( + InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {})) + if model.field_name + else None + ) return RequestOption( - field_name=field_name, field_path=field_path, inject_into=inject_into, parameters={} + field_name=field_name, + field_path=field_path, + inject_into=inject_into, + parameters=kwargs.get("parameters", {}), ) def create_record_selector( diff --git a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py index c50e9e6e9..cd6edbae0 100644 --- a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +++ b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py @@ -593,8 +593,8 @@ def test_list_based_stream_slicer_with_values_defined_in_config(): cursor_field: repository request_option: type: RequestOption - inject_into: header - field_name: repository + inject_into: body_json + field_path: ["repository", "id"] """ parsed_manifest = YamlDeclarativeSource._parse(content) resolved_manifest = resolver.preprocess_manifest(parsed_manifest) @@ -610,8 +610,10 @@ def test_list_based_stream_slicer_with_values_defined_in_config(): assert isinstance(partition_router, ListPartitionRouter) assert partition_router.values == ["airbyte", "airbyte-cloud"] - assert partition_router.request_option.inject_into == RequestOptionType.header - assert partition_router.request_option.field_name.eval(config=input_config) == "repository" + assert partition_router.request_option.inject_into == RequestOptionType.body_json + for field in partition_router.request_option.field_path: + assert isinstance(field, InterpolatedString) + assert len(partition_router.request_option.field_path) == 2 def test_create_substream_partition_router(): @@ -714,7 +716,7 @@ def test_datetime_based_cursor(): end_time_option: type: RequestOption inject_into: body_json - field_name: "before_{{ parameters['cursor_field'] }}" + field_path: ["before_{{ parameters['cursor_field'] }}"] partition_field_start: star partition_field_end: en """ @@ -743,7 +745,9 @@ def test_datetime_based_cursor(): == "since_updated_at" ) assert stream_slicer.end_time_option.inject_into == RequestOptionType.body_json - assert stream_slicer.end_time_option.field_name.eval({}) == "before_created_at" + assert [field.eval({}) for field in stream_slicer.end_time_option.field_path] == [ + "before_created_at" + ] assert stream_slicer._partition_field_start.eval({}) == "star" assert stream_slicer._partition_field_end.eval({}) == "en" @@ -904,8 +908,8 @@ def test_resumable_full_refresh_stream(): type: DefaultPaginator page_size_option: type: RequestOption - inject_into: request_parameter - field_name: page_size + inject_into: body_json + field_path: ["variables", "page_size"] page_token_option: type: RequestPath pagination_strategy: @@ -1003,11 +1007,10 @@ def test_resumable_full_refresh_stream(): assert isinstance(stream.retriever.paginator, DefaultPaginator) assert isinstance(stream.retriever.paginator.decoder, PaginationDecoderDecorator) - assert stream.retriever.paginator.page_size_option.field_name.eval(input_config) == "page_size" - assert ( - stream.retriever.paginator.page_size_option.inject_into - == RequestOptionType.request_parameter - ) + for string in stream.retriever.paginator.page_size_option.field_path: + assert isinstance(string, InterpolatedString) + assert len(stream.retriever.paginator.page_size_option.field_path) == 2 + assert stream.retriever.paginator.page_size_option.inject_into == RequestOptionType.body_json assert isinstance(stream.retriever.paginator.page_token_option, RequestPath) assert stream.retriever.paginator.url_base.string == "https://api.sendgrid.com/v3/" assert stream.retriever.paginator.url_base.default == "https://api.sendgrid.com/v3/" @@ -2509,7 +2512,6 @@ def test_merge_incremental_and_partition_router(incremental, partition_router, e assert isinstance(stream, DeclarativeStream) assert isinstance(stream.retriever, SimpleRetriever) - print(stream.retriever.stream_slicer) assert isinstance(stream.retriever.stream_slicer, expected_type) if incremental and partition_router: diff --git a/unit_tests/sources/declarative/test_manifest_declarative_source.py b/unit_tests/sources/declarative/test_manifest_declarative_source.py index b3c9ab4bb..667e32d60 100644 --- a/unit_tests/sources/declarative/test_manifest_declarative_source.py +++ b/unit_tests/sources/declarative/test_manifest_declarative_source.py @@ -1024,8 +1024,8 @@ def test_manifest_without_at_least_one_stream(self): "page_size": 10, "page_size_option": { "type": "RequestOption", - "inject_into": "request_parameter", - "field_name": "page_size", + "inject_into": "request_body", + "field_path": ["variables", "page_size"], }, "page_token_option": {"type": "RequestPath"}, "pagination_strategy": {