From 99739556c6e607b5254b1ab14ecc0448db550623 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 13 Dec 2024 15:42:38 -0800 Subject: [PATCH 01/18] skeleton: components module from dynamic text input --- .../parsers/model_to_component_factory.py | 55 ++++++++++++++++--- .../test/utils/manifest_only_fixtures.py | 20 +++++++ 2 files changed, 68 insertions(+), 7 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 215d6fff9..65b1e0ea2 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -8,6 +8,7 @@ import importlib import inspect import re +import types from functools import partial from typing import ( Any, @@ -986,8 +987,25 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> :param config: The custom defined connector config :return: The declarative component built from the Pydantic model to be used at runtime """ + INJECTED_COMPONENTS_PY = "__injected_components_py" - custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name) + components_module: Optional[types.ModuleType] = None + if INJECTED_COMPONENTS_PY in config: + # declares a dynamic module `components` from provided text + python_text = config[INJECTED_COMPONENTS_PY] + module_name = "components" + + # Create a new module object + components_module = types.ModuleType(module_name) + # Execute the module text in the module's namespace + exec(python_text, components_module.__dict__) + # Skip insert the module into sys.modules because we pass by reference below + # sys.modules[module_name] = components_module + + custom_component_class = self._get_class_from_fully_qualified_class_name( + full_qualified_class_name=model.class_name, + components_module=components_module, + ) component_fields = get_type_hints(custom_component_class) model_args = model.dict() model_args["config"] = config @@ -1039,15 +1057,38 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> } return custom_component_class(**kwargs) - @staticmethod - def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any: + def _get_class_from_fully_qualified_class_name( + full_qualified_class_name: str, + components_module: Optional[types.ModuleType] = None, + ) -> Any: + """ + Get a class from its fully qualified name, optionally using a pre-parsed module. + + Args: + full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). + components_module (Optional[ModuleType]): An optional pre-parsed module. + + Returns: + Any: The class object. + + Raises: + ValueError: If the class cannot be loaded. + """ split = full_qualified_class_name.split(".") - module = ".".join(split[:-1]) + module_name = ".".join(split[:-1]) class_name = split[-1] + try: - return getattr(importlib.import_module(module), class_name) - except AttributeError: - raise ValueError(f"Could not load class {full_qualified_class_name}.") + # Use the provided module if available and if module name matches + if components_module and components_module.__name__ == module_name: + return getattr(components_module, class_name) + + # Fallback to importing the module dynamically + module = importlib.import_module(module_name) + return getattr(module, class_name) + + except (AttributeError, ModuleNotFoundError) as e: + raise ValueError(f"Could not load class {full_qualified_class_name}.") from e @staticmethod def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index 47620e7c1..01b2b393d 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -2,6 +2,7 @@ import importlib.util +import types from pathlib import Path from types import ModuleType from typing import Optional @@ -51,6 +52,25 @@ def components_module(connector_dir: Path) -> Optional[ModuleType]: return components_module +def components_module_from_string(components_py_text: str) -> Optional[ModuleType]: + """Load and return the components module from a provided string containing the python code. + + This assumes the components module is located at /components.py. + + TODO: Make new unit test to leverage this fixture + """ + module_name = "components" + + # Create a new module object + components_module = types.ModuleType(name=module_name) + + # Execute the module text in the module's namespace + exec(components_py_text, components_module.__dict__) + + # Now you can import and use the module + return components_module + + @pytest.fixture(scope="session") def manifest_path(connector_dir: Path) -> Path: """Return the path to the connector's manifest file.""" From 8309f7910c223568a8b516ae1336f9a20a80bd9e Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 16 Dec 2024 08:52:17 -0800 Subject: [PATCH 02/18] refactor / clean up --- .../parsers/model_to_component_factory.py | 57 +++++++++++-------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 65b1e0ea2..8d4ded84f 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -987,21 +987,7 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> :param config: The custom defined connector config :return: The declarative component built from the Pydantic model to be used at runtime """ - INJECTED_COMPONENTS_PY = "__injected_components_py" - - components_module: Optional[types.ModuleType] = None - if INJECTED_COMPONENTS_PY in config: - # declares a dynamic module `components` from provided text - python_text = config[INJECTED_COMPONENTS_PY] - module_name = "components" - - # Create a new module object - components_module = types.ModuleType(module_name) - # Execute the module text in the module's namespace - exec(python_text, components_module.__dict__) - # Skip insert the module into sys.modules because we pass by reference below - # sys.modules[module_name] = components_module - + components_module = self._get_components_module_object(config=config) custom_component_class = self._get_class_from_fully_qualified_class_name( full_qualified_class_name=model.class_name, components_module=components_module, @@ -1057,9 +1043,31 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> } return custom_component_class(**kwargs) + def _get_components_module_object( + config: Config, + ) -> None: + """Get a components module object based on the provided config. + + If custom python components is provided, this will be loaded. Otherwise, we will + attempt to load from the `components` module already imported. + """ + INJECTED_COMPONENTS_PY = "__injected_components_py" + COMPONENTS_MODULE_NAME = "components" + + components_module: types.ModuleType + if INJECTED_COMPONENTS_PY in config: + # Create a new module object and execute the provided Python code text within it + components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) + python_text = config[INJECTED_COMPONENTS_PY] + exec(python_text, components_module.__dict__) + # Skip insert the module into sys.modules because we pass by reference below + # sys.modules[module_name] = components_module + else: + components_module = importlib.import_module(name=COMPONENTS_MODULE_NAME) + def _get_class_from_fully_qualified_class_name( full_qualified_class_name: str, - components_module: Optional[types.ModuleType] = None, + components_module: types.ModuleType, ) -> Any: """ Get a class from its fully qualified name, optionally using a pre-parsed module. @@ -1075,18 +1083,17 @@ def _get_class_from_fully_qualified_class_name( ValueError: If the class cannot be loaded. """ split = full_qualified_class_name.split(".") - module_name = ".".join(split[:-1]) + module_name_full = ".".join(split[:-1]) + module_name = split[:-2] class_name = split[-1] - try: - # Use the provided module if available and if module name matches - if components_module and components_module.__name__ == module_name: - return getattr(components_module, class_name) - - # Fallback to importing the module dynamically - module = importlib.import_module(module_name) - return getattr(module, class_name) + if module_name != "components": + raise ValueError( + f"Custom components must be defined in a module named `components`. Found {module_name} instead." + ) + try: + return getattr(components_module, class_name) except (AttributeError, ModuleNotFoundError) as e: raise ValueError(f"Could not load class {full_qualified_class_name}.") from e From 399dd7ba5a36d93891ff1592158b0ef46a87716c Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 18 Dec 2024 16:07:47 -0800 Subject: [PATCH 03/18] add test resource for py_components unit test --- .../resources/valid_py_components_code.py | 15 + .../resources/valid_py_components_config.json | 3 + .../valid_py_components_manifest.yaml | 1368 +++++++++++++++++ 3 files changed, 1386 insertions(+) create mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_code.py create mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_config.json create mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py b/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py new file mode 100644 index 000000000..06c95e78a --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py @@ -0,0 +1,15 @@ +"""Custom Python components.py file for testing. + +This file is mostly a no-op (for now) but should trigger a failure if code file is not +correctly parsed. +""" + +from airbyte_cdk.sources.declarative.models import DeclarativeStream + + +class CustomDeclarativeStream(DeclarativeStream): + """Custom declarative stream class. + + We don't change anything from the base class, but this should still be enough to confirm + that the components.py file is correctly parsed. + """ diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json b/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json new file mode 100644 index 000000000..214fc684f --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json @@ -0,0 +1,3 @@ +{ + "pokemon_name": "blastoise" +} diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml b/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml new file mode 100644 index 000000000..bf15e3138 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml @@ -0,0 +1,1368 @@ +version: 3.9.6 + +type: DeclarativeSource + +description: This is just a test, with custom Python components enabled. Copied from Pokemon example. + +check: + type: CheckStream + stream_names: + - pokemon + +definitions: + streams: + pokemon: + type: components.CustomDeclarativeStream + name: pokemon + retriever: + type: SimpleRetriever + requester: + $ref: "#/definitions/base_requester" + path: /{{config['pokemon_name']}} + http_method: GET + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: [] + primary_key: + - id + schema_loader: + type: InlineSchemaLoader + schema: + $ref: "#/schemas/pokemon" + base_requester: + type: HttpRequester + url_base: https://pokeapi.co/api/v2/pokemon + +streams: + - $ref: "#/definitions/streams/pokemon" + +spec: + type: Spec + connection_specification: + type: object + $schema: http://json-schema.org/draft-07/schema# + required: + - pokemon_name + properties: + pokemon_name: + type: string + description: Pokemon requested from the API. + enum: + - bulbasaur + - ivysaur + - venusaur + - charmander + - charmeleon + - charizard + - squirtle + - wartortle + - blastoise + - caterpie + - metapod + - butterfree + - weedle + - kakuna + - beedrill + - pidgey + - pidgeotto + - pidgeot + - rattata + - raticate + - spearow + - fearow + - ekans + - arbok + - pikachu + - raichu + - sandshrew + - sandslash + - nidoranf + - nidorina + - nidoqueen + - nidoranm + - nidorino + - nidoking + - clefairy + - clefable + - vulpix + - ninetales + - jigglypuff + - wigglytuff + - zubat + - golbat + - oddish + - gloom + - vileplume + - paras + - parasect + - venonat + - venomoth + - diglett + - dugtrio + - meowth + - persian + - psyduck + - golduck + - mankey + - primeape + - growlithe + - arcanine + - poliwag + - poliwhirl + - poliwrath + - abra + - kadabra + - alakazam + - machop + - machoke + - machamp + - bellsprout + - weepinbell + - victreebel + - tentacool + - tentacruel + - geodude + - graveler + - golem + - ponyta + - rapidash + - slowpoke + - slowbro + - magnemite + - magneton + - farfetchd + - doduo + - dodrio + - seel + - dewgong + - grimer + - muk + - shellder + - cloyster + - gastly + - haunter + - gengar + - onix + - drowzee + - hypno + - krabby + - kingler + - voltorb + - electrode + - exeggcute + - exeggutor + - cubone + - marowak + - hitmonlee + - hitmonchan + - lickitung + - koffing + - weezing + - rhyhorn + - rhydon + - chansey + - tangela + - kangaskhan + - horsea + - seadra + - goldeen + - seaking + - staryu + - starmie + - mrmime + - scyther + - jynx + - electabuzz + - magmar + - pinsir + - tauros + - magikarp + - gyarados + - lapras + - ditto + - eevee + - vaporeon + - jolteon + - flareon + - porygon + - omanyte + - omastar + - kabuto + - kabutops + - aerodactyl + - snorlax + - articuno + - zapdos + - moltres + - dratini + - dragonair + - dragonite + - mewtwo + - mew + - chikorita + - bayleef + - meganium + - cyndaquil + - quilava + - typhlosion + - totodile + - croconaw + - feraligatr + - sentret + - furret + - hoothoot + - noctowl + - ledyba + - ledian + - spinarak + - ariados + - crobat + - chinchou + - lanturn + - pichu + - cleffa + - igglybuff + - togepi + - togetic + - natu + - xatu + - mareep + - flaaffy + - ampharos + - bellossom + - marill + - azumarill + - sudowoodo + - politoed + - hoppip + - skiploom + - jumpluff + - aipom + - sunkern + - sunflora + - yanma + - wooper + - quagsire + - espeon + - umbreon + - murkrow + - slowking + - misdreavus + - unown + - wobbuffet + - girafarig + - pineco + - forretress + - dunsparce + - gligar + - steelix + - snubbull + - granbull + - qwilfish + - scizor + - shuckle + - heracross + - sneasel + - teddiursa + - ursaring + - slugma + - magcargo + - swinub + - piloswine + - corsola + - remoraid + - octillery + - delibird + - mantine + - skarmory + - houndour + - houndoom + - kingdra + - phanpy + - donphan + - porygon2 + - stantler + - smeargle + - tyrogue + - hitmontop + - smoochum + - elekid + - magby + - miltank + - blissey + - raikou + - entei + - suicune + - larvitar + - pupitar + - tyranitar + - lugia + - ho-oh + - celebi + - treecko + - grovyle + - sceptile + - torchic + - combusken + - blaziken + - mudkip + - marshtomp + - swampert + - poochyena + - mightyena + - zigzagoon + - linoone + - wurmple + - silcoon + - beautifly + - cascoon + - dustox + - lotad + - lombre + - ludicolo + - seedot + - nuzleaf + - shiftry + - taillow + - swellow + - wingull + - pelipper + - ralts + - kirlia + - gardevoir + - surskit + - masquerain + - shroomish + - breloom + - slakoth + - vigoroth + - slaking + - nincada + - ninjask + - shedinja + - whismur + - loudred + - exploud + - makuhita + - hariyama + - azurill + - nosepass + - skitty + - delcatty + - sableye + - mawile + - aron + - lairon + - aggron + - meditite + - medicham + - electrike + - manectric + - plusle + - minun + - volbeat + - illumise + - roselia + - gulpin + - swalot + - carvanha + - sharpedo + - wailmer + - wailord + - numel + - camerupt + - torkoal + - spoink + - grumpig + - spinda + - trapinch + - vibrava + - flygon + - cacnea + - cacturne + - swablu + - altaria + - zangoose + - seviper + - lunatone + - solrock + - barboach + - whiscash + - corphish + - crawdaunt + - baltoy + - claydol + - lileep + - cradily + - anorith + - armaldo + - feebas + - milotic + - castform + - kecleon + - shuppet + - banette + - duskull + - dusclops + - tropius + - chimecho + - absol + - wynaut + - snorunt + - glalie + - spheal + - sealeo + - walrein + - clamperl + - huntail + - gorebyss + - relicanth + - luvdisc + - bagon + - shelgon + - salamence + - beldum + - metang + - metagross + - regirock + - regice + - registeel + - latias + - latios + - kyogre + - groudon + - rayquaza + - jirachi + - deoxys + - turtwig + - grotle + - torterra + - chimchar + - monferno + - infernape + - piplup + - prinplup + - empoleon + - starly + - staravia + - staraptor + - bidoof + - bibarel + - kricketot + - kricketune + - shinx + - luxio + - luxray + - budew + - roserade + - cranidos + - rampardos + - shieldon + - bastiodon + - burmy + - wormadam + - mothim + - combee + - vespiquen + - pachirisu + - buizel + - floatzel + - cherubi + - cherrim + - shellos + - gastrodon + - ambipom + - drifloon + - drifblim + - buneary + - lopunny + - mismagius + - honchkrow + - glameow + - purugly + - chingling + - stunky + - skuntank + - bronzor + - bronzong + - bonsly + - mimejr + - happiny + - chatot + - spiritomb + - gible + - gabite + - garchomp + - munchlax + - riolu + - lucario + - hippopotas + - hippowdon + - skorupi + - drapion + - croagunk + - toxicroak + - carnivine + - finneon + - lumineon + - mantyke + - snover + - abomasnow + - weavile + - magnezone + - lickilicky + - rhyperior + - tangrowth + - electivire + - magmortar + - togekiss + - yanmega + - leafeon + - glaceon + - gliscor + - mamoswine + - porygon-z + - gallade + - probopass + - dusknoir + - froslass + - rotom + - uxie + - mesprit + - azelf + - dialga + - palkia + - heatran + - regigigas + - giratina + - cresselia + - phione + - manaphy + - darkrai + - shaymin + - arceus + - victini + - snivy + - servine + - serperior + - tepig + - pignite + - emboar + - oshawott + - dewott + - samurott + - patrat + - watchog + - lillipup + - herdier + - stoutland + - purrloin + - liepard + - pansage + - simisage + - pansear + - simisear + - panpour + - simipour + - munna + - musharna + - pidove + - tranquill + - unfezant + - blitzle + - zebstrika + - roggenrola + - boldore + - gigalith + - woobat + - swoobat + - drilbur + - excadrill + - audino + - timburr + - gurdurr + - conkeldurr + - tympole + - palpitoad + - seismitoad + - throh + - sawk + - sewaddle + - swadloon + - leavanny + - venipede + - whirlipede + - scolipede + - cottonee + - whimsicott + - petilil + - lilligant + - basculin + - sandile + - krokorok + - krookodile + - darumaka + - darmanitan + - maractus + - dwebble + - crustle + - scraggy + - scrafty + - sigilyph + - yamask + - cofagrigus + - tirtouga + - carracosta + - archen + - archeops + - trubbish + - garbodor + - zorua + - zoroark + - minccino + - cinccino + - gothita + - gothorita + - gothitelle + - solosis + - duosion + - reuniclus + - ducklett + - swanna + - vanillite + - vanillish + - vanilluxe + - deerling + - sawsbuck + - emolga + - karrablast + - escavalier + - foongus + - amoonguss + - frillish + - jellicent + - alomomola + - joltik + - galvantula + - ferroseed + - ferrothorn + - klink + - klang + - klinklang + - tynamo + - eelektrik + - eelektross + - elgyem + - beheeyem + - litwick + - lampent + - chandelure + - axew + - fraxure + - haxorus + - cubchoo + - beartic + - cryogonal + - shelmet + - accelgor + - stunfisk + - mienfoo + - mienshao + - druddigon + - golett + - golurk + - pawniard + - bisharp + - bouffalant + - rufflet + - braviary + - vullaby + - mandibuzz + - heatmor + - durant + - deino + - zweilous + - hydreigon + - larvesta + - volcarona + - cobalion + - terrakion + - virizion + - tornadus + - thundurus + - reshiram + - zekrom + - landorus + - kyurem + - keldeo + - meloetta + - genesect + - chespin + - quilladin + - chesnaught + - fennekin + - braixen + - delphox + - froakie + - frogadier + - greninja + - bunnelby + - diggersby + - fletchling + - fletchinder + - talonflame + - scatterbug + - spewpa + - vivillon + - litleo + - pyroar + - flabebe + - floette + - florges + - skiddo + - gogoat + - pancham + - pangoro + - furfrou + - espurr + - meowstic + - honedge + - doublade + - aegislash + - spritzee + - aromatisse + - swirlix + - slurpuff + - inkay + - malamar + - binacle + - barbaracle + - skrelp + - dragalge + - clauncher + - clawitzer + - helioptile + - heliolisk + - tyrunt + - tyrantrum + - amaura + - aurorus + - sylveon + - hawlucha + - dedenne + - carbink + - goomy + - sliggoo + - goodra + - klefki + - phantump + - trevenant + - pumpkaboo + - gourgeist + - bergmite + - avalugg + - noibat + - noivern + - xerneas + - yveltal + - zygarde + - diancie + - hoopa + - volcanion + - rowlet + - dartrix + - decidueye + - litten + - torracat + - incineroar + - popplio + - brionne + - primarina + - pikipek + - trumbeak + - toucannon + - yungoos + - gumshoos + - grubbin + - charjabug + - vikavolt + - crabrawler + - crabominable + - oricorio + - cutiefly + - ribombee + - rockruff + - lycanroc + - wishiwashi + - mareanie + - toxapex + - mudbray + - mudsdale + - dewpider + - araquanid + - fomantis + - lurantis + - morelull + - shiinotic + - salandit + - salazzle + - stufful + - bewear + - bounsweet + - steenee + - tsareena + - comfey + - oranguru + - passimian + - wimpod + - golisopod + - sandygast + - palossand + - pyukumuku + - typenull + - silvally + - minior + - komala + - turtonator + - togedemaru + - mimikyu + - bruxish + - drampa + - dhelmise + - jangmo-o + - hakamo-o + - kommo-o + - tapukoko + - tapulele + - tapubulu + - tapufini + - cosmog + - cosmoem + - solgaleo + - lunala + - nihilego + - buzzwole + - pheromosa + - xurkitree + - celesteela + - kartana + - guzzlord + - necrozma + - magearna + - marshadow + - poipole + - naganadel + - stakataka + - blacephalon + - zeraora + - meltan + - melmetal + - grookey + - thwackey + - rillaboom + - scorbunny + - raboot + - cinderace + - sobble + - drizzile + - inteleon + - skwovet + - greedent + - rookidee + - corvisquire + - corviknight + - blipbug + - dottler + - orbeetle + - nickit + - thievul + - gossifleur + - eldegoss + - wooloo + - dubwool + - chewtle + - drednaw + - yamper + - boltund + - rolycoly + - carkol + - coalossal + - applin + - flapple + - appletun + - silicobra + - sandaconda + - cramorant + - arrokuda + - barraskewda + - toxel + - toxtricity + - sizzlipede + - centiskorch + - clobbopus + - grapploct + - sinistea + - polteageist + - hatenna + - hattrem + - hatterene + - impidimp + - morgrem + - grimmsnarl + - obstagoon + - perrserker + - cursola + - sirfetchd + - mrrime + - runerigus + - milcery + - alcremie + - falinks + - pincurchin + - snom + - frosmoth + - stonjourner + - eiscue + - indeedee + - morpeko + - cufant + - copperajah + - dracozolt + - arctozolt + - dracovish + - arctovish + - duraludon + - dreepy + - drakloak + - dragapult + - zacian + - zamazenta + - eternatus + - kubfu + - urshifu + - zarude + - regieleki + - regidrago + - glastrier + - spectrier + - calyrex + order: 0 + title: Pokemon Name + pattern: ^[a-z0-9_\-]+$ + examples: + - ditto + - luxray + - snorlax + additionalProperties: true + +metadata: + testedStreams: + pokemon: + hasRecords: true + streamHash: f619395f8c7a553f51cec2a7274a4ce517ab46c8 + hasResponse: true + primaryKeysAreUnique: true + primaryKeysArePresent: true + responsesAreSuccessful: true + autoImportSchema: + pokemon: false + +schemas: + pokemon: + type: object + $schema: http://json-schema.org/draft-07/schema# + properties: + id: + type: + - "null" + - integer + name: + type: + - "null" + - string + forms: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + moves: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + move: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + version_group_details: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + version_group: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + level_learned_at: + type: + - "null" + - integer + move_learn_method: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + additionalProperties: true + additionalProperties: true + order: + type: + - "null" + - integer + stats: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + stat: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + effort: + type: + - "null" + - integer + base_stat: + type: + - "null" + - integer + additionalProperties: true + types: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + type: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + slot: + type: + - "null" + - integer + additionalProperties: true + height: + type: + - "null" + - integer + weight: + type: + - "null" + - integer + species: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + sprites: + type: + - "null" + - object + properties: + back_shiny: + type: + - "null" + - string + back_female: + type: + - "null" + - string + front_shiny: + type: + - "null" + - string + back_default: + type: + - "null" + - string + front_female: + type: + - "null" + - string + front_default: + type: + - "null" + - string + back_shiny_female: + type: + - "null" + - string + front_shiny_female: + type: + - "null" + - string + additionalProperties: true + abilities: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + slot: + type: + - "null" + - integer + ability: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + is_hidden: + type: + - "null" + - boolean + additionalProperties: true + held_items: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + item: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + version_details: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + version: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + rarity: + type: + - "null" + - integer + additionalProperties: true + additionalProperties: true + is_default: + type: + - "null" + - boolean + past_types: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + types: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + type: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + slot: + type: + - "null" + - integer + additionalProperties: true + generation: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + additionalProperties: true + game_indices: + type: + - "null" + - array + items: + type: + - "null" + - object + properties: + version: + type: + - "null" + - object + properties: + url: + type: + - "null" + - string + name: + type: + - "null" + - string + additionalProperties: true + game_index: + type: + - "null" + - integer + additionalProperties: true + base_experience: + type: + - "null" + - integer + location_area_encounters: + type: + - "null" + - string + additionalProperties: true From 9115757b137524cc33df870e756ed26ed2f2fa61 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 18 Dec 2024 16:29:17 -0800 Subject: [PATCH 04/18] add fixture for custom py components scenario --- .../source_declarative_manifest/conftest.py | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index 3d61e65e8..a2598822d 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -2,13 +2,26 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +import hashlib import os +from pathlib import Path +from typing import Any, Literal import pytest import yaml -def get_fixture_path(file_name): +def hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str: + hashers = { + "md5": hashlib.md5, + "sha256": hashlib.sha256, + } + hash_object = hashers[hash_type]() + hash_object.update(input_text.encode()) + return hash_object.hexdigest() + + +def get_fixture_path(file_name) -> str: return os.path.join(os.path.dirname(__file__), file_name) @@ -52,3 +65,21 @@ def valid_local_config_file(): @pytest.fixture def invalid_local_config_file(): return get_fixture_path("resources/invalid_local_pokeapi_config.json") + + +@pytest.fixture +def py_components_config_dict() -> dict[str, Any]: + manifest_dict = yaml.safe_load( + get_fixture_path("resources/valid_py_components.yaml"), + ) + custom_py_code_path = get_fixture_path("resources/valid_py_components_code.py") + custom_py_code = Path(custom_py_code_path).read_text() + combined_config_dict = { + "__injected_declarative_manifest": manifest_dict, + "__injected_components_py": custom_py_code, + "__injected_components_py_checksum": { + "md5": hash_text(custom_py_code, "md5"), + "sha256": hash_text(custom_py_code, "sha256"), + }, + } + return combined_config_dict From 5dc664c95b3bea0eab71646527f39e74352224df Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 18 Dec 2024 16:47:52 -0800 Subject: [PATCH 05/18] add test --- ..._source_declarative_w_custom_components.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py new file mode 100644 index 000000000..c3ea2059d --- /dev/null +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -0,0 +1,24 @@ +# +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. +# + +import json +from tempfile import NamedTemporaryFile +from typing import Any + +from airbyte_cdk.cli.source_declarative_manifest._run import ( + create_declarative_source, +) +from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource + + +def test_given_injected_declarative_manifest_and_py_components_then_return_declarative_manifest( + py_components_config_dict: dict[str, Any], +): + with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: + json.dump(py_components_config_dict, temp_config_file) + temp_config_file.flush() + source = create_declarative_source( + ["check", "--config", temp_config_file.name], + ) + assert isinstance(source, ManifestDeclarativeSource) From 5be084f053bcf313d43cda750a7292c049f2b4a0 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 13:52:25 -0800 Subject: [PATCH 06/18] chore: add missing guard statement --- airbyte_cdk/cli/source_declarative_manifest/_run.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index 232ac302f..5def00602 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -171,6 +171,12 @@ def create_declarative_source( "Invalid config: `__injected_declarative_manifest` should be provided at the root " f"of the config but config only has keys: {list(config.keys() if config else [])}" ) + if not isinstance(config["__injected_declarative_manifest"], dict): + raise ValueError( + "Invalid config: `__injected_declarative_manifest` should be a dictionary, " + f"but got type: {type(config['__injected_declarative_manifest'])}" + ) + return ConcurrentDeclarativeSource( config=config, catalog=catalog, From 7379eeadd1902cfc39a26f6cb8d06b9c313a3836 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 13:53:35 -0800 Subject: [PATCH 07/18] chore: remove stale comment --- airbyte_cdk/test/utils/manifest_only_fixtures.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index 01b2b393d..bd53e2081 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -56,8 +56,6 @@ def components_module_from_string(components_py_text: str) -> Optional[ModuleTyp """Load and return the components module from a provided string containing the python code. This assumes the components module is located at /components.py. - - TODO: Make new unit test to leverage this fixture """ module_name = "components" From 51cbcbd98c5ab8b25ea909416ff7abc2021d8359 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 13:55:03 -0800 Subject: [PATCH 08/18] checkpoint: passing tests with pokeapi --- .../parsers/model_to_component_factory.py | 3 +- .../source_declarative_manifest/conftest.py | 18 ----- .../valid_py_components_manifest.yaml | 2 +- ..._source_declarative_w_custom_components.py | 77 +++++++++++++++++-- 4 files changed, 74 insertions(+), 26 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 8d4ded84f..1668bc50c 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -987,10 +987,9 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> :param config: The custom defined connector config :return: The declarative component built from the Pydantic model to be used at runtime """ - components_module = self._get_components_module_object(config=config) custom_component_class = self._get_class_from_fully_qualified_class_name( full_qualified_class_name=model.class_name, - components_module=components_module, + components_module=self._get_components_module_object(config=config), ) component_fields = get_type_hints(custom_component_class) model_args = model.dict() diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index a2598822d..8aafe924a 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -65,21 +65,3 @@ def valid_local_config_file(): @pytest.fixture def invalid_local_config_file(): return get_fixture_path("resources/invalid_local_pokeapi_config.json") - - -@pytest.fixture -def py_components_config_dict() -> dict[str, Any]: - manifest_dict = yaml.safe_load( - get_fixture_path("resources/valid_py_components.yaml"), - ) - custom_py_code_path = get_fixture_path("resources/valid_py_components_code.py") - custom_py_code = Path(custom_py_code_path).read_text() - combined_config_dict = { - "__injected_declarative_manifest": manifest_dict, - "__injected_components_py": custom_py_code, - "__injected_components_py_checksum": { - "md5": hash_text(custom_py_code, "md5"), - "sha256": hash_text(custom_py_code, "sha256"), - }, - } - return combined_config_dict diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml b/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml index bf15e3138..2ffcd2be5 100644 --- a/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml +++ b/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml @@ -12,7 +12,7 @@ check: definitions: streams: pokemon: - type: components.CustomDeclarativeStream + type: DeclarativeStream name: pokemon retriever: type: SimpleRetriever diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index c3ea2059d..89eaa5807 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -3,22 +3,89 @@ # import json +import os +import types +from collections.abc import Mapping +from pathlib import Path from tempfile import NamedTemporaryFile from typing import Any +import yaml + from airbyte_cdk.cli.source_declarative_manifest._run import ( create_declarative_source, ) from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource +from airbyte_cdk.test.utils.manifest_only_fixtures import components_module_from_string +from unit_tests.source_declarative_manifest.conftest import hash_text + +SAMPLE_COMPONENTS_PY_TEXT = """ +def sample_function() -> str: + return "Hello, World!" + +class SimpleClass: + def sample_method(self) -> str: + return sample_function() +""" + + +def get_fixture_path(file_name) -> str: + return os.path.join(os.path.dirname(__file__), file_name) + + +def test_components_module_from_string() -> None: + # Call the function to get the module + components_module: types.ModuleType = components_module_from_string(SAMPLE_COMPONENTS_PY_TEXT) + + # Check that the module is created and is of the correct type + assert isinstance(components_module, types.ModuleType) + + # Check that the function is correctly defined in the module + assert hasattr(components_module, "sample_function") + + # Check that simple functions are callable + assert components_module.sample_function() == "Hello, World!" + + # Check class definitions work as expected + assert isinstance(components_module.SimpleClass, type) + obj = components_module.SimpleClass() + assert isinstance(obj, components_module.SimpleClass) + assert obj.sample_method() == "Hello, World!" + + +def get_py_components_config_dict() -> dict[str, Any]: + manifest_dict = yaml.safe_load( + Path(get_fixture_path("resources/valid_py_components_manifest.yaml")).read_text(), + ) + assert manifest_dict, "Failed to load the manifest file." + assert isinstance( + manifest_dict, Mapping + ), f"Manifest file is type {type(manifest_dict).__name__}, not a mapping: {manifest_dict}" + + custom_py_code_path = get_fixture_path("resources/valid_py_components_code.py") + custom_py_code = Path(custom_py_code_path).read_text() + combined_config_dict = { + "__injected_declarative_manifest": manifest_dict, + "__injected_components_py": custom_py_code, + "__injected_components_py_checksum": { + "md5": hash_text(custom_py_code, "md5"), + "sha256": hash_text(custom_py_code, "sha256"), + }, + } + return combined_config_dict -def test_given_injected_declarative_manifest_and_py_components_then_return_declarative_manifest( - py_components_config_dict: dict[str, Any], -): +def test_given_injected_declarative_manifest_and_py_components() -> None: + py_components_config_dict = get_py_components_config_dict() + assert isinstance(py_components_config_dict, dict) + assert "__injected_declarative_manifest" in py_components_config_dict + assert "__injected_components_py" in py_components_config_dict with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: - json.dump(py_components_config_dict, temp_config_file) + json_str = json.dumps(py_components_config_dict) + Path(temp_config_file.name).write_text(json_str) temp_config_file.flush() source = create_declarative_source( ["check", "--config", temp_config_file.name], ) - assert isinstance(source, ManifestDeclarativeSource) + assert isinstance(source, ManifestDeclarativeSource) + source.check(logger=None, config=source._source_config) From aaef28508be73a98c78449dbb66672313c5fca40 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 13:56:02 -0800 Subject: [PATCH 09/18] chore: add `poe lock` task definition --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index fbc7ad7af..8e3bfb0fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,6 +126,7 @@ select = ["I"] [tool.poe.tasks] # Installation install = { shell = "poetry install --all-extras" } +lock = { shell = "poetry lock --no-update" } # Build tasks assemble = {cmd = "bin/generate-component-manifest-dagger.sh", help = "Generate component manifest files."} From e7c3eae77ffad22da87468422204b6664bb55b55 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 14:30:26 -0800 Subject: [PATCH 10/18] add 'source_the_guardian_api' test resources --- .../source_the_guardian_api/components.py | 36 ++ .../source_the_guardian_api/manifest.yaml | 376 ++++++++++++++++++ 2 files changed, 412 insertions(+) create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py new file mode 100644 index 000000000..db5b07971 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py @@ -0,0 +1,36 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from dataclasses import dataclass +from typing import Any, Mapping, Optional + +import requests + +from airbyte_cdk.sources.declarative.requesters.paginators.strategies.page_increment import ( + PageIncrement, +) + + +@dataclass +class CustomPageIncrement(PageIncrement): + """ + Starts page from 1 instead of the default value that is 0. Stops Pagination when currentPage is equal to totalPages. + """ + + def next_page_token(self, response: requests.Response, *args) -> Optional[Any]: + res = response.json().get("response") + currPage = res.get("currentPage") + totalPages = res.get("pages") + if currPage < totalPages: + self._page += 1 + return self._page + else: + return None + + def __post_init__(self, parameters: Mapping[str, Any]): + super().__post_init__(parameters) + self._page = 1 + + def reset(self): + self._page = 1 diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml new file mode 100644 index 000000000..7b440631f --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/manifest.yaml @@ -0,0 +1,376 @@ +version: "4.3.2" +definitions: + selector: + extractor: + field_path: + - response + - results + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + retriever: + record_selector: + extractor: + field_path: + - response + - results + paginator: + type: DefaultPaginator + pagination_strategy: + type: CustomPaginationStrategy + class_name: "source_declarative_manifest.components.CustomPageIncrement" + page_size: 10 + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + base_stream: + incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + retriever: + record_selector: + extractor: + field_path: + - response + - results + paginator: + type: DefaultPaginator + pagination_strategy: + type: CustomPaginationStrategy + class_name: "source_declarative_manifest.components.CustomPageIncrement" + page_size: 10 + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + content_stream: + incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + retriever: + record_selector: + extractor: + field_path: + - response + - results + paginator: + type: "DefaultPaginator" + pagination_strategy: + type: CustomPaginationStrategy + class_name: "source_declarative_manifest.components.CustomPageIncrement" + page_size: 10 + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/draft-04/schema# + type: object + properties: + id: + type: string + type: + type: string + sectionId: + type: string + sectionName: + type: string + webPublicationDate: + type: string + webTitle: + type: string + webUrl: + type: string + apiUrl: + type: string + isHosted: + type: boolean + pillarId: + type: string + pillarName: + type: string + required: + - id + - type + - sectionId + - sectionName + - webPublicationDate + - webTitle + - webUrl + - apiUrl + - isHosted + - pillarId + - pillarName +streams: + - incremental_sync: + type: DatetimeBasedCursor + start_datetime: + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%d" + type: MinMaxDatetime + end_datetime: + datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}" + datetime_format: "%Y-%m-%d" + type: MinMaxDatetime + step: "P7D" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + cursor_granularity: "PT1S" + cursor_field: "webPublicationDate" + start_time_option: + field_name: "from-date" + inject_into: "request_parameter" + type: RequestOption + end_time_option: + field_name: "to-date" + inject_into: "request_parameter" + type: RequestOption + retriever: + record_selector: + extractor: + field_path: + - response + - results + type: DpathExtractor + type: RecordSelector + paginator: + type: "DefaultPaginator" + pagination_strategy: + class_name: source_declarative_manifest.components.CustomPageIncrement + page_size: 10 + type: CustomPaginationStrategy + page_token_option: + type: RequestOption + inject_into: "request_parameter" + field_name: "page" + page_size_option: + inject_into: "body_data" + field_name: "page_size" + type: RequestOption + requester: + url_base: "https://content.guardianapis.com" + http_method: "GET" + request_parameters: + api-key: "{{ config['api_key'] }}" + q: "{{ config['query'] }}" + tag: "{{ config['tag'] }}" + section: "{{ config['section'] }}" + order-by: "oldest" + type: HttpRequester + path: "/search" + type: SimpleRetriever + schema_loader: + type: InlineSchemaLoader + schema: + $schema: http://json-schema.org/draft-04/schema# + type: object + properties: + id: + type: string + type: + type: string + sectionId: + type: string + sectionName: + type: string + webPublicationDate: + type: string + webTitle: + type: string + webUrl: + type: string + apiUrl: + type: string + isHosted: + type: boolean + pillarId: + type: string + pillarName: + type: string + required: + - id + - type + - sectionId + - sectionName + - webPublicationDate + - webTitle + - webUrl + - apiUrl + - isHosted + - pillarId + - pillarName + type: DeclarativeStream + name: "content" + primary_key: "id" +check: + stream_names: + - "content" + type: CheckStream +type: DeclarativeSource +spec: + type: Spec + documentation_url: https://docs.airbyte.com/integrations/sources/the-guardian-api + connection_specification: + $schema: http://json-schema.org/draft-07/schema# + title: The Guardian Api Spec + type: object + required: + - api_key + - start_date + additionalProperties: true + properties: + api_key: + title: API Key + type: string + description: + Your API Key. See here. + The key is case sensitive. + airbyte_secret: true + start_date: + title: Start Date + type: string + description: + Use this to set the minimum date (YYYY-MM-DD) of the results. + Results older than the start_date will not be shown. + pattern: ^([1-9][0-9]{3})\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01])$ + examples: + - YYYY-MM-DD + query: + title: Query + type: string + description: + (Optional) The query (q) parameter filters the results to only + those that include that search term. The q parameter supports AND, OR and + NOT operators. + examples: + - environment AND NOT water + - environment AND political + - amusement park + - political + tag: + title: Tag + type: string + description: + (Optional) A tag is a piece of data that is used by The Guardian + to categorise content. Use this parameter to filter results by showing only + the ones matching the entered tag. See here + for a list of all tags, and here + for the tags endpoint documentation. + examples: + - environment/recycling + - environment/plasticbags + - environment/energyefficiency + section: + title: Section + type: string + description: + (Optional) Use this to filter the results by a particular section. + See here + for a list of all sections, and here + for the sections endpoint documentation. + examples: + - media + - technology + - housing-network + end_date: + title: End Date + type: string + description: + (Optional) Use this to set the maximum date (YYYY-MM-DD) of the + results. Results newer than the end_date will not be shown. Default is set + to the current date (today) for incremental syncs. + pattern: ^([1-9][0-9]{3})\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01])$ + examples: + - YYYY-MM-DD From 2300f7a7d28e5a5718ee7d5b5711fd0c89db9a97 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 14:32:05 -0800 Subject: [PATCH 11/18] checkpoint: working `check` --- .../parsers/model_to_component_factory.py | 28 +++++++++++-------- ..._source_declarative_w_custom_components.py | 14 ++++++---- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 1668bc50c..8c70c2b88 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -5,9 +5,9 @@ from __future__ import annotations import datetime -import importlib import inspect import re +import sys import types from functools import partial from typing import ( @@ -1043,8 +1043,9 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> return custom_component_class(**kwargs) def _get_components_module_object( + self, config: Config, - ) -> None: + ) -> types.ModuleType: """Get a components module object based on the provided config. If custom python components is provided, this will be loaded. Otherwise, we will @@ -1054,17 +1055,20 @@ def _get_components_module_object( COMPONENTS_MODULE_NAME = "components" components_module: types.ModuleType - if INJECTED_COMPONENTS_PY in config: - # Create a new module object and execute the provided Python code text within it - components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) - python_text = config[INJECTED_COMPONENTS_PY] - exec(python_text, components_module.__dict__) - # Skip insert the module into sys.modules because we pass by reference below - # sys.modules[module_name] = components_module - else: - components_module = importlib.import_module(name=COMPONENTS_MODULE_NAME) + if not INJECTED_COMPONENTS_PY in config: + raise ValueError( + "Custom components must be defined in a module named `components`. Please provide a custom components module." + ) + + # Create a new module object and execute the provided Python code text within it + components_module = types.ModuleType(name=COMPONENTS_MODULE_NAME) + python_text = config[INJECTED_COMPONENTS_PY] + exec(python_text, components_module.__dict__) + sys.modules[COMPONENTS_MODULE_NAME] = components_module + return components_module def _get_class_from_fully_qualified_class_name( + self, full_qualified_class_name: str, components_module: types.ModuleType, ) -> Any: @@ -1086,7 +1090,7 @@ def _get_class_from_fully_qualified_class_name( module_name = split[:-2] class_name = split[-1] - if module_name != "components": + if "components" not in split: raise ValueError( f"Custom components must be defined in a module named `components`. Found {module_name} instead." ) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 89eaa5807..7afb24bff 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -3,6 +3,7 @@ # import json +import logging import os import types from collections.abc import Mapping @@ -11,6 +12,7 @@ from typing import Any import yaml +from airbyte_protocol_dataclasses.models.airbyte_protocol import AirbyteCatalog from airbyte_cdk.cli.source_declarative_manifest._run import ( create_declarative_source, @@ -54,16 +56,16 @@ def test_components_module_from_string() -> None: def get_py_components_config_dict() -> dict[str, Any]: - manifest_dict = yaml.safe_load( - Path(get_fixture_path("resources/valid_py_components_manifest.yaml")).read_text(), - ) + connector_dir = Path(get_fixture_path("resources/source_the_guardian_api")) + manifest_yml_path: Path = connector_dir / "manifest.yaml" + custom_py_code_path: Path = connector_dir / "components.py" + manifest_dict = yaml.safe_load(manifest_yml_path.read_text()) assert manifest_dict, "Failed to load the manifest file." assert isinstance( manifest_dict, Mapping ), f"Manifest file is type {type(manifest_dict).__name__}, not a mapping: {manifest_dict}" - custom_py_code_path = get_fixture_path("resources/valid_py_components_code.py") - custom_py_code = Path(custom_py_code_path).read_text() + custom_py_code = custom_py_code_path.read_text() combined_config_dict = { "__injected_declarative_manifest": manifest_dict, "__injected_components_py": custom_py_code, @@ -88,4 +90,4 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: ["check", "--config", temp_config_file.name], ) assert isinstance(source, ManifestDeclarativeSource) - source.check(logger=None, config=source._source_config) + source.check(logger=logging.getLogger(), config=py_components_config_dict) From 4efcd4032abe0ae33890334599b64fb2d56e8a1d Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 14:51:11 -0800 Subject: [PATCH 12/18] checkpoint: working discover --- .../resources/source_the_guardian_api/.gitignore | 1 + .../resources/source_the_guardian_api/README.md | 9 +++++++++ .../source_the_guardian_api/valid_config.yaml | 3 +++ .../test_source_declarative_w_custom_components.py | 14 ++++++++++++++ 4 files changed, 27 insertions(+) create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md create mode 100644 unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore new file mode 100644 index 000000000..c4ab49a30 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/.gitignore @@ -0,0 +1 @@ +secrets* diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md new file mode 100644 index 000000000..403a4baba --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/README.md @@ -0,0 +1,9 @@ +# The Guardian API Tests + +For these tests to work, you'll need to create a `secrets.yaml` file in this directory that looks like this: + +```yml +api_key: ****** +``` + +The `.gitignore` file in this directory should ensure your file is not committed to git, but it's a good practice to double-check. 👀 diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml new file mode 100644 index 000000000..e31112780 --- /dev/null +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/valid_config.yaml @@ -0,0 +1,3 @@ +{ + "start_date": "2024-01-01", +} diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 7afb24bff..7605b4531 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -59,6 +59,9 @@ def get_py_components_config_dict() -> dict[str, Any]: connector_dir = Path(get_fixture_path("resources/source_the_guardian_api")) manifest_yml_path: Path = connector_dir / "manifest.yaml" custom_py_code_path: Path = connector_dir / "components.py" + config_yaml_path: Path = connector_dir / "valid_config.yaml" + secrets_yaml_path: Path = connector_dir / "secrets.yaml" + manifest_dict = yaml.safe_load(manifest_yml_path.read_text()) assert manifest_dict, "Failed to load the manifest file." assert isinstance( @@ -74,6 +77,8 @@ def get_py_components_config_dict() -> dict[str, Any]: "sha256": hash_text(custom_py_code, "sha256"), }, } + combined_config_dict.update(yaml.safe_load(config_yaml_path.read_text())) + combined_config_dict.update(yaml.safe_load(secrets_yaml_path.read_text())) return combined_config_dict @@ -82,6 +87,7 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: assert isinstance(py_components_config_dict, dict) assert "__injected_declarative_manifest" in py_components_config_dict assert "__injected_components_py" in py_components_config_dict + with NamedTemporaryFile(delete=False, suffix=".json") as temp_config_file: json_str = json.dumps(py_components_config_dict) Path(temp_config_file.name).write_text(json_str) @@ -91,3 +97,11 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: ) assert isinstance(source, ManifestDeclarativeSource) source.check(logger=logging.getLogger(), config=py_components_config_dict) + catalog: AirbyteCatalog = source.discover( + logger=logging.getLogger(), config=py_components_config_dict + ) + assert isinstance(catalog, AirbyteCatalog) + + # source.read( + # logger=logging.getLogger(), config=py_components_config_dict, catalog=None, state=None + # ) From cb6a4ab10e875219fdc373f46d5a6d0254a24f2c Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 15:06:02 -0800 Subject: [PATCH 13/18] checkpoint: working sync --- ..._source_declarative_w_custom_components.py | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 7605b4531..55da8fbb6 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -2,6 +2,7 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. # +import datetime import json import logging import os @@ -17,8 +18,10 @@ from airbyte_cdk.cli.source_declarative_manifest._run import ( create_declarative_source, ) +from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.test.utils.manifest_only_fixtures import components_module_from_string +from unit_tests.connector_builder.test_connector_builder_handler import configured_catalog from unit_tests.source_declarative_manifest.conftest import hash_text SAMPLE_COMPONENTS_PY_TEXT = """ @@ -84,6 +87,10 @@ def get_py_components_config_dict() -> dict[str, Any]: def test_given_injected_declarative_manifest_and_py_components() -> None: py_components_config_dict = get_py_components_config_dict() + # Truncate the start_date to speed up tests + py_components_config_dict["start_date"] = ( + datetime.datetime.now() - datetime.timedelta(days=2) + ).strftime("%Y-%m-%d") assert isinstance(py_components_config_dict, dict) assert "__injected_declarative_manifest" in py_components_config_dict assert "__injected_components_py" in py_components_config_dict @@ -101,7 +108,22 @@ def test_given_injected_declarative_manifest_and_py_components() -> None: logger=logging.getLogger(), config=py_components_config_dict ) assert isinstance(catalog, AirbyteCatalog) + configured_catalog = ConfiguredAirbyteCatalog( + streams=[ + ConfiguredAirbyteStream( + stream=stream, + sync_mode="full_refresh", + destination_sync_mode="overwrite", + ) + for stream in catalog.streams + ] + ) - # source.read( - # logger=logging.getLogger(), config=py_components_config_dict, catalog=None, state=None - # ) + msg_iterator = source.read( + logger=logging.getLogger(), + config=py_components_config_dict, + catalog=configured_catalog, + state=None, + ) + for msg in msg_iterator: + assert msg From 051c57bad129eb0d9a858f013af331c792b08c52 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 15:10:18 -0800 Subject: [PATCH 14/18] improve module name parsing --- .../sources/declarative/parsers/model_to_component_factory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 8c70c2b88..86ff2ca89 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -1087,10 +1087,10 @@ def _get_class_from_fully_qualified_class_name( """ split = full_qualified_class_name.split(".") module_name_full = ".".join(split[:-1]) - module_name = split[:-2] + module_name = split[-2] class_name = split[-1] - if "components" not in split: + if module_name != "components": raise ValueError( f"Custom components must be defined in a module named `components`. Found {module_name} instead." ) From e511a2b49a168ffed285b21f9fa89eadc658d253 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 15:12:23 -0800 Subject: [PATCH 15/18] remove unused files --- .../source_declarative_manifest/conftest.py | 3 +- .../resources/valid_py_components_code.py | 15 - .../resources/valid_py_components_config.json | 3 - .../valid_py_components_manifest.yaml | 1368 ----------------- 4 files changed, 1 insertion(+), 1388 deletions(-) delete mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_code.py delete mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_config.json delete mode 100644 unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index 8aafe924a..d4c67a33e 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -4,8 +4,7 @@ import hashlib import os -from pathlib import Path -from typing import Any, Literal +from typing import Literal import pytest import yaml diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py b/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py deleted file mode 100644 index 06c95e78a..000000000 --- a/unit_tests/source_declarative_manifest/resources/valid_py_components_code.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Custom Python components.py file for testing. - -This file is mostly a no-op (for now) but should trigger a failure if code file is not -correctly parsed. -""" - -from airbyte_cdk.sources.declarative.models import DeclarativeStream - - -class CustomDeclarativeStream(DeclarativeStream): - """Custom declarative stream class. - - We don't change anything from the base class, but this should still be enough to confirm - that the components.py file is correctly parsed. - """ diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json b/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json deleted file mode 100644 index 214fc684f..000000000 --- a/unit_tests/source_declarative_manifest/resources/valid_py_components_config.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "pokemon_name": "blastoise" -} diff --git a/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml b/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml deleted file mode 100644 index 2ffcd2be5..000000000 --- a/unit_tests/source_declarative_manifest/resources/valid_py_components_manifest.yaml +++ /dev/null @@ -1,1368 +0,0 @@ -version: 3.9.6 - -type: DeclarativeSource - -description: This is just a test, with custom Python components enabled. Copied from Pokemon example. - -check: - type: CheckStream - stream_names: - - pokemon - -definitions: - streams: - pokemon: - type: DeclarativeStream - name: pokemon - retriever: - type: SimpleRetriever - requester: - $ref: "#/definitions/base_requester" - path: /{{config['pokemon_name']}} - http_method: GET - record_selector: - type: RecordSelector - extractor: - type: DpathExtractor - field_path: [] - primary_key: - - id - schema_loader: - type: InlineSchemaLoader - schema: - $ref: "#/schemas/pokemon" - base_requester: - type: HttpRequester - url_base: https://pokeapi.co/api/v2/pokemon - -streams: - - $ref: "#/definitions/streams/pokemon" - -spec: - type: Spec - connection_specification: - type: object - $schema: http://json-schema.org/draft-07/schema# - required: - - pokemon_name - properties: - pokemon_name: - type: string - description: Pokemon requested from the API. - enum: - - bulbasaur - - ivysaur - - venusaur - - charmander - - charmeleon - - charizard - - squirtle - - wartortle - - blastoise - - caterpie - - metapod - - butterfree - - weedle - - kakuna - - beedrill - - pidgey - - pidgeotto - - pidgeot - - rattata - - raticate - - spearow - - fearow - - ekans - - arbok - - pikachu - - raichu - - sandshrew - - sandslash - - nidoranf - - nidorina - - nidoqueen - - nidoranm - - nidorino - - nidoking - - clefairy - - clefable - - vulpix - - ninetales - - jigglypuff - - wigglytuff - - zubat - - golbat - - oddish - - gloom - - vileplume - - paras - - parasect - - venonat - - venomoth - - diglett - - dugtrio - - meowth - - persian - - psyduck - - golduck - - mankey - - primeape - - growlithe - - arcanine - - poliwag - - poliwhirl - - poliwrath - - abra - - kadabra - - alakazam - - machop - - machoke - - machamp - - bellsprout - - weepinbell - - victreebel - - tentacool - - tentacruel - - geodude - - graveler - - golem - - ponyta - - rapidash - - slowpoke - - slowbro - - magnemite - - magneton - - farfetchd - - doduo - - dodrio - - seel - - dewgong - - grimer - - muk - - shellder - - cloyster - - gastly - - haunter - - gengar - - onix - - drowzee - - hypno - - krabby - - kingler - - voltorb - - electrode - - exeggcute - - exeggutor - - cubone - - marowak - - hitmonlee - - hitmonchan - - lickitung - - koffing - - weezing - - rhyhorn - - rhydon - - chansey - - tangela - - kangaskhan - - horsea - - seadra - - goldeen - - seaking - - staryu - - starmie - - mrmime - - scyther - - jynx - - electabuzz - - magmar - - pinsir - - tauros - - magikarp - - gyarados - - lapras - - ditto - - eevee - - vaporeon - - jolteon - - flareon - - porygon - - omanyte - - omastar - - kabuto - - kabutops - - aerodactyl - - snorlax - - articuno - - zapdos - - moltres - - dratini - - dragonair - - dragonite - - mewtwo - - mew - - chikorita - - bayleef - - meganium - - cyndaquil - - quilava - - typhlosion - - totodile - - croconaw - - feraligatr - - sentret - - furret - - hoothoot - - noctowl - - ledyba - - ledian - - spinarak - - ariados - - crobat - - chinchou - - lanturn - - pichu - - cleffa - - igglybuff - - togepi - - togetic - - natu - - xatu - - mareep - - flaaffy - - ampharos - - bellossom - - marill - - azumarill - - sudowoodo - - politoed - - hoppip - - skiploom - - jumpluff - - aipom - - sunkern - - sunflora - - yanma - - wooper - - quagsire - - espeon - - umbreon - - murkrow - - slowking - - misdreavus - - unown - - wobbuffet - - girafarig - - pineco - - forretress - - dunsparce - - gligar - - steelix - - snubbull - - granbull - - qwilfish - - scizor - - shuckle - - heracross - - sneasel - - teddiursa - - ursaring - - slugma - - magcargo - - swinub - - piloswine - - corsola - - remoraid - - octillery - - delibird - - mantine - - skarmory - - houndour - - houndoom - - kingdra - - phanpy - - donphan - - porygon2 - - stantler - - smeargle - - tyrogue - - hitmontop - - smoochum - - elekid - - magby - - miltank - - blissey - - raikou - - entei - - suicune - - larvitar - - pupitar - - tyranitar - - lugia - - ho-oh - - celebi - - treecko - - grovyle - - sceptile - - torchic - - combusken - - blaziken - - mudkip - - marshtomp - - swampert - - poochyena - - mightyena - - zigzagoon - - linoone - - wurmple - - silcoon - - beautifly - - cascoon - - dustox - - lotad - - lombre - - ludicolo - - seedot - - nuzleaf - - shiftry - - taillow - - swellow - - wingull - - pelipper - - ralts - - kirlia - - gardevoir - - surskit - - masquerain - - shroomish - - breloom - - slakoth - - vigoroth - - slaking - - nincada - - ninjask - - shedinja - - whismur - - loudred - - exploud - - makuhita - - hariyama - - azurill - - nosepass - - skitty - - delcatty - - sableye - - mawile - - aron - - lairon - - aggron - - meditite - - medicham - - electrike - - manectric - - plusle - - minun - - volbeat - - illumise - - roselia - - gulpin - - swalot - - carvanha - - sharpedo - - wailmer - - wailord - - numel - - camerupt - - torkoal - - spoink - - grumpig - - spinda - - trapinch - - vibrava - - flygon - - cacnea - - cacturne - - swablu - - altaria - - zangoose - - seviper - - lunatone - - solrock - - barboach - - whiscash - - corphish - - crawdaunt - - baltoy - - claydol - - lileep - - cradily - - anorith - - armaldo - - feebas - - milotic - - castform - - kecleon - - shuppet - - banette - - duskull - - dusclops - - tropius - - chimecho - - absol - - wynaut - - snorunt - - glalie - - spheal - - sealeo - - walrein - - clamperl - - huntail - - gorebyss - - relicanth - - luvdisc - - bagon - - shelgon - - salamence - - beldum - - metang - - metagross - - regirock - - regice - - registeel - - latias - - latios - - kyogre - - groudon - - rayquaza - - jirachi - - deoxys - - turtwig - - grotle - - torterra - - chimchar - - monferno - - infernape - - piplup - - prinplup - - empoleon - - starly - - staravia - - staraptor - - bidoof - - bibarel - - kricketot - - kricketune - - shinx - - luxio - - luxray - - budew - - roserade - - cranidos - - rampardos - - shieldon - - bastiodon - - burmy - - wormadam - - mothim - - combee - - vespiquen - - pachirisu - - buizel - - floatzel - - cherubi - - cherrim - - shellos - - gastrodon - - ambipom - - drifloon - - drifblim - - buneary - - lopunny - - mismagius - - honchkrow - - glameow - - purugly - - chingling - - stunky - - skuntank - - bronzor - - bronzong - - bonsly - - mimejr - - happiny - - chatot - - spiritomb - - gible - - gabite - - garchomp - - munchlax - - riolu - - lucario - - hippopotas - - hippowdon - - skorupi - - drapion - - croagunk - - toxicroak - - carnivine - - finneon - - lumineon - - mantyke - - snover - - abomasnow - - weavile - - magnezone - - lickilicky - - rhyperior - - tangrowth - - electivire - - magmortar - - togekiss - - yanmega - - leafeon - - glaceon - - gliscor - - mamoswine - - porygon-z - - gallade - - probopass - - dusknoir - - froslass - - rotom - - uxie - - mesprit - - azelf - - dialga - - palkia - - heatran - - regigigas - - giratina - - cresselia - - phione - - manaphy - - darkrai - - shaymin - - arceus - - victini - - snivy - - servine - - serperior - - tepig - - pignite - - emboar - - oshawott - - dewott - - samurott - - patrat - - watchog - - lillipup - - herdier - - stoutland - - purrloin - - liepard - - pansage - - simisage - - pansear - - simisear - - panpour - - simipour - - munna - - musharna - - pidove - - tranquill - - unfezant - - blitzle - - zebstrika - - roggenrola - - boldore - - gigalith - - woobat - - swoobat - - drilbur - - excadrill - - audino - - timburr - - gurdurr - - conkeldurr - - tympole - - palpitoad - - seismitoad - - throh - - sawk - - sewaddle - - swadloon - - leavanny - - venipede - - whirlipede - - scolipede - - cottonee - - whimsicott - - petilil - - lilligant - - basculin - - sandile - - krokorok - - krookodile - - darumaka - - darmanitan - - maractus - - dwebble - - crustle - - scraggy - - scrafty - - sigilyph - - yamask - - cofagrigus - - tirtouga - - carracosta - - archen - - archeops - - trubbish - - garbodor - - zorua - - zoroark - - minccino - - cinccino - - gothita - - gothorita - - gothitelle - - solosis - - duosion - - reuniclus - - ducklett - - swanna - - vanillite - - vanillish - - vanilluxe - - deerling - - sawsbuck - - emolga - - karrablast - - escavalier - - foongus - - amoonguss - - frillish - - jellicent - - alomomola - - joltik - - galvantula - - ferroseed - - ferrothorn - - klink - - klang - - klinklang - - tynamo - - eelektrik - - eelektross - - elgyem - - beheeyem - - litwick - - lampent - - chandelure - - axew - - fraxure - - haxorus - - cubchoo - - beartic - - cryogonal - - shelmet - - accelgor - - stunfisk - - mienfoo - - mienshao - - druddigon - - golett - - golurk - - pawniard - - bisharp - - bouffalant - - rufflet - - braviary - - vullaby - - mandibuzz - - heatmor - - durant - - deino - - zweilous - - hydreigon - - larvesta - - volcarona - - cobalion - - terrakion - - virizion - - tornadus - - thundurus - - reshiram - - zekrom - - landorus - - kyurem - - keldeo - - meloetta - - genesect - - chespin - - quilladin - - chesnaught - - fennekin - - braixen - - delphox - - froakie - - frogadier - - greninja - - bunnelby - - diggersby - - fletchling - - fletchinder - - talonflame - - scatterbug - - spewpa - - vivillon - - litleo - - pyroar - - flabebe - - floette - - florges - - skiddo - - gogoat - - pancham - - pangoro - - furfrou - - espurr - - meowstic - - honedge - - doublade - - aegislash - - spritzee - - aromatisse - - swirlix - - slurpuff - - inkay - - malamar - - binacle - - barbaracle - - skrelp - - dragalge - - clauncher - - clawitzer - - helioptile - - heliolisk - - tyrunt - - tyrantrum - - amaura - - aurorus - - sylveon - - hawlucha - - dedenne - - carbink - - goomy - - sliggoo - - goodra - - klefki - - phantump - - trevenant - - pumpkaboo - - gourgeist - - bergmite - - avalugg - - noibat - - noivern - - xerneas - - yveltal - - zygarde - - diancie - - hoopa - - volcanion - - rowlet - - dartrix - - decidueye - - litten - - torracat - - incineroar - - popplio - - brionne - - primarina - - pikipek - - trumbeak - - toucannon - - yungoos - - gumshoos - - grubbin - - charjabug - - vikavolt - - crabrawler - - crabominable - - oricorio - - cutiefly - - ribombee - - rockruff - - lycanroc - - wishiwashi - - mareanie - - toxapex - - mudbray - - mudsdale - - dewpider - - araquanid - - fomantis - - lurantis - - morelull - - shiinotic - - salandit - - salazzle - - stufful - - bewear - - bounsweet - - steenee - - tsareena - - comfey - - oranguru - - passimian - - wimpod - - golisopod - - sandygast - - palossand - - pyukumuku - - typenull - - silvally - - minior - - komala - - turtonator - - togedemaru - - mimikyu - - bruxish - - drampa - - dhelmise - - jangmo-o - - hakamo-o - - kommo-o - - tapukoko - - tapulele - - tapubulu - - tapufini - - cosmog - - cosmoem - - solgaleo - - lunala - - nihilego - - buzzwole - - pheromosa - - xurkitree - - celesteela - - kartana - - guzzlord - - necrozma - - magearna - - marshadow - - poipole - - naganadel - - stakataka - - blacephalon - - zeraora - - meltan - - melmetal - - grookey - - thwackey - - rillaboom - - scorbunny - - raboot - - cinderace - - sobble - - drizzile - - inteleon - - skwovet - - greedent - - rookidee - - corvisquire - - corviknight - - blipbug - - dottler - - orbeetle - - nickit - - thievul - - gossifleur - - eldegoss - - wooloo - - dubwool - - chewtle - - drednaw - - yamper - - boltund - - rolycoly - - carkol - - coalossal - - applin - - flapple - - appletun - - silicobra - - sandaconda - - cramorant - - arrokuda - - barraskewda - - toxel - - toxtricity - - sizzlipede - - centiskorch - - clobbopus - - grapploct - - sinistea - - polteageist - - hatenna - - hattrem - - hatterene - - impidimp - - morgrem - - grimmsnarl - - obstagoon - - perrserker - - cursola - - sirfetchd - - mrrime - - runerigus - - milcery - - alcremie - - falinks - - pincurchin - - snom - - frosmoth - - stonjourner - - eiscue - - indeedee - - morpeko - - cufant - - copperajah - - dracozolt - - arctozolt - - dracovish - - arctovish - - duraludon - - dreepy - - drakloak - - dragapult - - zacian - - zamazenta - - eternatus - - kubfu - - urshifu - - zarude - - regieleki - - regidrago - - glastrier - - spectrier - - calyrex - order: 0 - title: Pokemon Name - pattern: ^[a-z0-9_\-]+$ - examples: - - ditto - - luxray - - snorlax - additionalProperties: true - -metadata: - testedStreams: - pokemon: - hasRecords: true - streamHash: f619395f8c7a553f51cec2a7274a4ce517ab46c8 - hasResponse: true - primaryKeysAreUnique: true - primaryKeysArePresent: true - responsesAreSuccessful: true - autoImportSchema: - pokemon: false - -schemas: - pokemon: - type: object - $schema: http://json-schema.org/draft-07/schema# - properties: - id: - type: - - "null" - - integer - name: - type: - - "null" - - string - forms: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - moves: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - move: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - version_group_details: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - version_group: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - level_learned_at: - type: - - "null" - - integer - move_learn_method: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - additionalProperties: true - additionalProperties: true - order: - type: - - "null" - - integer - stats: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - stat: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - effort: - type: - - "null" - - integer - base_stat: - type: - - "null" - - integer - additionalProperties: true - types: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - type: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - slot: - type: - - "null" - - integer - additionalProperties: true - height: - type: - - "null" - - integer - weight: - type: - - "null" - - integer - species: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - sprites: - type: - - "null" - - object - properties: - back_shiny: - type: - - "null" - - string - back_female: - type: - - "null" - - string - front_shiny: - type: - - "null" - - string - back_default: - type: - - "null" - - string - front_female: - type: - - "null" - - string - front_default: - type: - - "null" - - string - back_shiny_female: - type: - - "null" - - string - front_shiny_female: - type: - - "null" - - string - additionalProperties: true - abilities: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - slot: - type: - - "null" - - integer - ability: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - is_hidden: - type: - - "null" - - boolean - additionalProperties: true - held_items: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - item: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - version_details: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - version: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - rarity: - type: - - "null" - - integer - additionalProperties: true - additionalProperties: true - is_default: - type: - - "null" - - boolean - past_types: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - types: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - type: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - slot: - type: - - "null" - - integer - additionalProperties: true - generation: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - additionalProperties: true - game_indices: - type: - - "null" - - array - items: - type: - - "null" - - object - properties: - version: - type: - - "null" - - object - properties: - url: - type: - - "null" - - string - name: - type: - - "null" - - string - additionalProperties: true - game_index: - type: - - "null" - - integer - additionalProperties: true - base_experience: - type: - - "null" - - integer - location_area_encounters: - type: - - "null" - - string - additionalProperties: true From a19b5c16abba382329a931187700a0e13bbb220e Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 15:22:04 -0800 Subject: [PATCH 16/18] tidy up --- .../parsers/model_to_component_factory.py | 12 +++++++++--- airbyte_cdk/test/utils/manifest_only_fixtures.py | 5 ++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 86ff2ca89..adb126b5c 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -1042,8 +1042,8 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> } return custom_component_class(**kwargs) + @staticmethod def _get_components_module_object( - self, config: Config, ) -> types.ModuleType: """Get a components module object based on the provided config. @@ -1067,8 +1067,8 @@ def _get_components_module_object( sys.modules[COMPONENTS_MODULE_NAME] = components_module return components_module + @staticmethod def _get_class_from_fully_qualified_class_name( - self, full_qualified_class_name: str, components_module: types.ModuleType, ) -> Any: @@ -1092,7 +1092,13 @@ def _get_class_from_fully_qualified_class_name( if module_name != "components": raise ValueError( - f"Custom components must be defined in a module named `components`. Found {module_name} instead." + "Custom components must be defined in a module named " + f"`components`. Found `{module_name}` instead." + ) + if module_name_full != "source_declarative_manifest.components": + raise ValueError( + "Custom components must be defined in a module named " + f"`source_declarative_manifest.components`. Found `{module_name_full}` instead." ) try: diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index bd53e2081..43e90a2c4 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -5,7 +5,6 @@ import types from pathlib import Path from types import ModuleType -from typing import Optional import pytest @@ -31,7 +30,7 @@ def connector_dir(request: pytest.FixtureRequest) -> Path: @pytest.fixture(scope="session") -def components_module(connector_dir: Path) -> Optional[ModuleType]: +def components_module(connector_dir: Path) -> ModuleType | None: """Load and return the components module from the connector directory. This assumes the components module is located at /components.py. @@ -52,7 +51,7 @@ def components_module(connector_dir: Path) -> Optional[ModuleType]: return components_module -def components_module_from_string(components_py_text: str) -> Optional[ModuleType]: +def components_module_from_string(components_py_text: str) -> ModuleType | None: """Load and return the components module from a provided string containing the python code. This assumes the components module is located at /components.py. From c837745bc410f522431674339253b31282620e69 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 13 Jan 2025 15:32:03 -0800 Subject: [PATCH 17/18] skip if no creds --- .../test_source_declarative_w_custom_components.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 55da8fbb6..42880baca 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -12,6 +12,7 @@ from tempfile import NamedTemporaryFile from typing import Any +import pytest import yaml from airbyte_protocol_dataclasses.models.airbyte_protocol import AirbyteCatalog @@ -85,6 +86,10 @@ def get_py_components_config_dict() -> dict[str, Any]: return combined_config_dict +@pytest.mark.skipif( + condition=not Path(get_fixture_path("resources/source_the_guardian_api/secrets.yaml")).exists(), + reason="Skipped due to missing 'secrets.yaml'.", +) def test_given_injected_declarative_manifest_and_py_components() -> None: py_components_config_dict = get_py_components_config_dict() # Truncate the start_date to speed up tests From 9bfdd91a6edeac44abe683f496c709afdce86f14 Mon Sep 17 00:00:00 2001 From: "coderabbitai[bot]" <136622811+coderabbitai[bot]@users.noreply.github.com> Date: Mon, 13 Jan 2025 23:51:48 +0000 Subject: [PATCH 18/18] =?UTF-8?q?=F0=9F=93=9D=20Add=20docstrings=20to=20`a?= =?UTF-8?q?j/feat/accept-components-text-input`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docstrings generation was requested by @aaronsteers. * https://github.com/airbytehq/airbyte-python-cdk/pull/174#issuecomment-2588462740 The following files were modified: * `airbyte_cdk/cli/source_declarative_manifest/_run.py` * `airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py` * `airbyte_cdk/test/utils/manifest_only_fixtures.py` * `unit_tests/source_declarative_manifest/conftest.py` * `unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py` * `unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py` --- .../cli/source_declarative_manifest/_run.py | 24 ++++- .../parsers/model_to_component_factory.py | 91 +++++++++++++++---- .../test/utils/manifest_only_fixtures.py | 64 +++++++++++-- .../source_declarative_manifest/conftest.py | 30 ++++++ .../source_the_guardian_api/components.py | 32 +++++++ ..._source_declarative_w_custom_components.py | 70 ++++++++++++++ 6 files changed, 281 insertions(+), 30 deletions(-) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index 5def00602..3a00111f9 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -155,11 +155,25 @@ def handle_remote_manifest_command(args: list[str]) -> None: def create_declarative_source( args: list[str], ) -> ConcurrentDeclarativeSource: # type: ignore [type-arg] - """Creates the source with the injected config. - - This essentially does what other low-code sources do at build time, but at runtime, - with a user-provided manifest in the config. This better reflects what happens in the - connector builder. + """ + Create a declarative source with an injected manifest configuration. + + This function dynamically creates a ConcurrentDeclarativeSource at runtime using a user-provided manifest, similar to how low-code sources are built. It validates the configuration and prepares the source for execution. + + Parameters: + args (list[str]): Command-line arguments containing configuration, catalog, and state information. + + Returns: + ConcurrentDeclarativeSource: A configured declarative source ready for sync operations. + + Raises: + ValueError: If the configuration is invalid or missing required manifest information. + Exception: For any unexpected errors during source creation, with detailed error tracing. + + Notes: + - Requires a configuration with an '__injected_declarative_manifest' key + - The manifest must be a dictionary + - Provides structured error reporting for configuration issues """ try: config: Mapping[str, Any] | None diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index adb126b5c..400b99dc8 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -981,11 +981,27 @@ def create_cursor_pagination( def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: """ - Generically creates a custom component based on the model type and a class_name reference to the custom Python class being - instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor - :param model: The Pydantic model of the custom component being created - :param config: The custom defined connector config - :return: The declarative component built from the Pydantic model to be used at runtime + Create a custom component from a Pydantic model with dynamic class instantiation. + + This method dynamically creates a custom component by loading a class from a specified module and instantiating it with appropriate arguments. It handles complex scenarios such as nested components, type inference, and argument passing. + + Parameters: + model (Any): A Pydantic model representing the custom component configuration. + config (Config): The connector configuration used for module and component resolution. + **kwargs (Any): Additional keyword arguments to override or supplement model arguments. + + Returns: + Any: An instantiated custom component with resolved nested components and configurations. + + Raises: + ValueError: If the component class cannot be loaded or instantiated. + TypeError: If arguments do not match the component's constructor signature. + + Notes: + - Supports nested component creation + - Performs type inference for component fields + - Handles both dictionary and list-based component configurations + - Prioritizes kwargs over model arguments in case of field collisions """ custom_component_class = self._get_class_from_fully_qualified_class_name( full_qualified_class_name=model.class_name, @@ -1046,10 +1062,25 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> def _get_components_module_object( config: Config, ) -> types.ModuleType: - """Get a components module object based on the provided config. - - If custom python components is provided, this will be loaded. Otherwise, we will - attempt to load from the `components` module already imported. + """ + Get a components module object based on the provided configuration. + + This method dynamically creates a module for custom Python components defined in the configuration. It ensures that custom components are defined in a module named 'components' and allows runtime module creation and execution. + + Parameters: + config (Config): A configuration object containing the custom components definition. + + Returns: + types.ModuleType: A dynamically created module containing the custom components. + + Raises: + ValueError: If no custom components are provided or if the components are not defined in a module named 'components'. + + Notes: + - Uses the special key '__injected_components_py' to retrieve custom component code + - Creates a new module dynamically using types.ModuleType + - Executes the provided Python code within the new module's namespace + - Registers the module in sys.modules for future imports """ INJECTED_COMPONENTS_PY = "__injected_components_py" COMPONENTS_MODULE_NAME = "components" @@ -1073,17 +1104,24 @@ def _get_class_from_fully_qualified_class_name( components_module: types.ModuleType, ) -> Any: """ - Get a class from its fully qualified name, optionally using a pre-parsed module. - - Args: - full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName"). - components_module (Optional[ModuleType]): An optional pre-parsed module. - + Retrieve a class from its fully qualified name within a predefined components module. + + Parameters: + full_qualified_class_name (str): The complete dot-separated path to the class (e.g., "source_declarative_manifest.components.ClassName"). + components_module (types.ModuleType): The pre-parsed module containing custom components. + Returns: - Any: The class object. - + Any: The requested class object. + Raises: - ValueError: If the class cannot be loaded. + ValueError: If the class cannot be loaded or does not meet module naming conventions. + - Raised when the module is not named "components" + - Raised when the full module path is not "source_declarative_manifest.components" + - Raised when the specific class cannot be found in the module + + Notes: + - Enforces strict naming conventions for custom component modules + - Provides detailed error messages for debugging component loading issues """ split = full_qualified_class_name.split(".") module_name_full = ".".join(split[:-1]) @@ -1108,6 +1146,23 @@ def _get_class_from_fully_qualified_class_name( @staticmethod def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]: + """ + Derive the component type name from type hints by unwrapping nested generic types. + + This method extracts the underlying type from potentially nested generic type hints, + such as List[T], Optional[List[T]], etc., and returns the type name if it's a non-builtin type. + + Parameters: + field_type (Any): The type hint to analyze for component type extraction. + + Returns: + Optional[str]: The name of the underlying type if it's a non-builtin type, otherwise None. + + Examples: + - List[str] returns None + - List[CustomType] returns "CustomType" + - Optional[List[CustomType]] returns "CustomType" + """ interface = field_type while True: origin = get_origin(interface) diff --git a/airbyte_cdk/test/utils/manifest_only_fixtures.py b/airbyte_cdk/test/utils/manifest_only_fixtures.py index 43e90a2c4..643ff2327 100644 --- a/airbyte_cdk/test/utils/manifest_only_fixtures.py +++ b/airbyte_cdk/test/utils/manifest_only_fixtures.py @@ -31,9 +31,28 @@ def connector_dir(request: pytest.FixtureRequest) -> Path: @pytest.fixture(scope="session") def components_module(connector_dir: Path) -> ModuleType | None: - """Load and return the components module from the connector directory. - - This assumes the components module is located at /components.py. + """ + Load and return the components module from the connector directory. + + This function attempts to load the 'components.py' module from the specified connector directory. It handles various potential failure scenarios during module loading. + + Parameters: + connector_dir (Path): The root directory of the connector containing the components module. + + Returns: + ModuleType | None: The loaded components module if successful, or None if: + - The components.py file does not exist + - The module specification cannot be created + - The module loader is unavailable + + Raises: + No explicit exceptions are raised; returns None on failure. + + Example: + components = components_module(Path('/path/to/connector')) + if components: + # Use the loaded module + some_component = components.SomeComponent() """ components_path = connector_dir / "components.py" if not components_path.exists(): @@ -52,9 +71,25 @@ def components_module(connector_dir: Path) -> ModuleType | None: def components_module_from_string(components_py_text: str) -> ModuleType | None: - """Load and return the components module from a provided string containing the python code. - - This assumes the components module is located at /components.py. + """ + Load a Python module from a string containing module code. + + Parameters: + components_py_text (str): A string containing valid Python code representing a module. + + Returns: + ModuleType | None: A dynamically created module object containing the executed code, or None if execution fails. + + Raises: + Exception: Potential runtime errors during code execution. + + Example: + components_code = ''' + def sample_component(): + return "Hello, World!" + ''' + module = components_module_from_string(components_code) + result = module.sample_component() # Returns "Hello, World!" """ module_name = "components" @@ -70,7 +105,22 @@ def components_module_from_string(components_py_text: str) -> ModuleType | None: @pytest.fixture(scope="session") def manifest_path(connector_dir: Path) -> Path: - """Return the path to the connector's manifest file.""" + """ + Return the path to the connector's manifest file. + + Parameters: + connector_dir (Path): The root directory of the connector. + + Returns: + Path: The absolute path to the manifest.yaml file. + + Raises: + FileNotFoundError: If the manifest.yaml file does not exist in the specified connector directory. + + Example: + manifest_file = manifest_path(Path('/path/to/connector')) + # Returns Path('/path/to/connector/manifest.yaml') + """ path = connector_dir / "manifest.yaml" if not path.exists(): raise FileNotFoundError(f"Manifest file not found at {path}") diff --git a/unit_tests/source_declarative_manifest/conftest.py b/unit_tests/source_declarative_manifest/conftest.py index d4c67a33e..f2a37d763 100644 --- a/unit_tests/source_declarative_manifest/conftest.py +++ b/unit_tests/source_declarative_manifest/conftest.py @@ -11,6 +11,23 @@ def hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> str: + """ + Compute the hash of the input text using the specified hashing algorithm. + + Parameters: + input_text (str): The text to be hashed. + hash_type (Literal["md5", "sha256"], optional): The hashing algorithm to use. + Defaults to "md5". Supports "md5" and "sha256" algorithms. + + Returns: + str: The hexadecimal digest of the hashed input text. + + Examples: + >>> hash_text("hello world") + '5eb63bbbe01eeed093cb22bb8f5acdc3' + >>> hash_text("hello world", hash_type="sha256") + 'b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9' + """ hashers = { "md5": hashlib.md5, "sha256": hashlib.sha256, @@ -21,6 +38,19 @@ def hash_text(input_text: str, hash_type: Literal["md5", "sha256"] = "md5") -> s def get_fixture_path(file_name) -> str: + """ + Construct the full path to a fixture file relative to the current script's directory. + + Parameters: + file_name (str): The name of the fixture file to locate. + + Returns: + str: The absolute path to the specified fixture file. + + Example: + >>> get_fixture_path('config.json') + '/path/to/current/directory/config.json' + """ return os.path.join(os.path.dirname(__file__), file_name) diff --git a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py index db5b07971..5c8d76757 100644 --- a/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py +++ b/unit_tests/source_declarative_manifest/resources/source_the_guardian_api/components.py @@ -19,6 +19,23 @@ class CustomPageIncrement(PageIncrement): """ def next_page_token(self, response: requests.Response, *args) -> Optional[Any]: + """ + Retrieve the next page token for pagination based on the current page and total pages. + + Extracts the current page and total pages from the API response. If more pages are available, + increments the page counter and returns the next page number. Otherwise, returns None to + indicate the end of pagination. + + Parameters: + response (requests.Response): The HTTP response from the API containing pagination details. + *args: Variable length argument list (unused in this implementation). + + Returns: + Optional[Any]: The next page number if more pages are available, or None if pagination is complete. + + Raises: + KeyError: If the expected keys are missing in the response JSON. + """ res = response.json().get("response") currPage = res.get("currentPage") totalPages = res.get("pages") @@ -29,8 +46,23 @@ def next_page_token(self, response: requests.Response, *args) -> Optional[Any]: return None def __post_init__(self, parameters: Mapping[str, Any]): + """ + Initialize the page increment with a starting page number of 1. + + This method is called after the class initialization and sets the initial page + to 1 by invoking the parent class's __post_init__ method and then explicitly + setting the _page attribute. + + Parameters: + parameters (Mapping[str, Any]): Configuration parameters passed during initialization. + """ super().__post_init__(parameters) self._page = 1 def reset(self): + """ + Reset the page counter to the initial state. + + This method resets the internal page counter to 1, allowing pagination to start over from the beginning. It is useful when you want to restart the pagination process for a new request or after completing a previous pagination cycle. + """ self._page = 1 diff --git a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py index 42880baca..607184409 100644 --- a/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py +++ b/unit_tests/source_declarative_manifest/test_source_declarative_w_custom_components.py @@ -36,11 +36,35 @@ def sample_method(self) -> str: def get_fixture_path(file_name) -> str: + """ + Construct the absolute path to a fixture file relative to the current script's directory. + + Parameters: + file_name (str): The name of the fixture file to locate + + Returns: + str: The full absolute path to the specified fixture file + """ return os.path.join(os.path.dirname(__file__), file_name) def test_components_module_from_string() -> None: # Call the function to get the module + """ + Test the functionality of dynamically creating a Python module from a string containing code. + + This test verifies that the `components_module_from_string` function can successfully: + - Create a module from a string of Python code + - Define functions within the module + - Define classes within the module + - Allow instantiation and method calls on dynamically created classes + + Assertions: + - Checks that the returned object is a module + - Verifies the existence of a sample function + - Confirms the sample function returns the expected string + - Validates class definition and method invocation + """ components_module: types.ModuleType = components_module_from_string(SAMPLE_COMPONENTS_PY_TEXT) # Check that the module is created and is of the correct type @@ -60,6 +84,29 @@ def test_components_module_from_string() -> None: def get_py_components_config_dict() -> dict[str, Any]: + """ + Construct a configuration dictionary for a declarative source with custom Python components. + + This function loads and combines configuration data from multiple YAML files and a Python components file + for a specific Airbyte connector. It prepares a comprehensive configuration dictionary that includes: + - The declarative manifest + - Custom Python components + - Checksums for the Python components + - Configuration and secrets from YAML files + + Parameters: + None + + Returns: + dict[str, Any]: A configuration dictionary containing: + - '__injected_declarative_manifest': The loaded manifest configuration + - '__injected_components_py': The raw Python components code + - '__injected_components_py_checksum': MD5 and SHA256 checksums of the components + - Additional configuration and secret key-value pairs from YAML files + + Raises: + AssertionError: If the manifest file cannot be loaded or is not a mapping + """ connector_dir = Path(get_fixture_path("resources/source_the_guardian_api")) manifest_yml_path: Path = connector_dir / "manifest.yaml" custom_py_code_path: Path = connector_dir / "components.py" @@ -91,6 +138,29 @@ def get_py_components_config_dict() -> dict[str, Any]: reason="Skipped due to missing 'secrets.yaml'.", ) def test_given_injected_declarative_manifest_and_py_components() -> None: + """ + Test the integration of a declarative source with custom Python components. + + This test function validates the end-to-end functionality of a declarative source by: + 1. Retrieving a configuration dictionary with injected components + 2. Modifying the start date to limit test duration + 3. Creating a temporary configuration file + 4. Creating a declarative source + 5. Performing source check and discovery operations + 6. Reading messages from the source and validating them + + The test ensures that: + - The configuration dictionary is correctly structured + - A declarative source can be created from the configuration + - The source can perform check and discover operations + - The source can read messages without errors + + Args: + None + + Raises: + AssertionError: If any of the validation checks fail during the test process + """ py_components_config_dict = get_py_components_config_dict() # Truncate the start_date to speed up tests py_components_config_dict["start_date"] = (