From 47ec7a4af24d07312c1516705d72831229e42040 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Sun, 7 Apr 2024 00:25:48 -0700 Subject: [PATCH 01/35] skeleton of native declarative source support --- airbyte/sources/declarative.py | 64 ++++++++++++++ poetry.lock | 153 ++++++++++++++++++--------------- pyproject.toml | 4 +- 3 files changed, 150 insertions(+), 71 deletions(-) create mode 100644 airbyte/sources/declarative.py diff --git a/airbyte/sources/declarative.py b/airbyte/sources/declarative.py new file mode 100644 index 000000000..9993a6f51 --- /dev/null +++ b/airbyte/sources/declarative.py @@ -0,0 +1,64 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +"""Support for declarative yaml source testing.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from source_declarative_manifest import run + +from airbyte._executor import Executor +from airbyte.sources.base import Source + + +if TYPE_CHECKING: + from collections.abc import Iterator + from pathlib import Path + + +class DeclarativeExecutor(Executor): + """An executor for declarative sources.""" + + def __init__( + self, + manifest: str | dict | Path, + ) -> None: + """Initialize a declarative executor.""" + self.manifest = manifest + + def execute(self, args: list[str]) -> Iterator[str]: + """Execute the declarative source.""" + return run(args) + + def ensure_installation(self, *, auto_fix: bool = True) -> None: + """No-op. The declarative source is included with PyAirbyte.""" + _ = auto_fix + pass + + def install(self) -> None: + """No-op. The declarative source is included with PyAirbyte.""" + pass + + def uninstall(self) -> None: + """No-op. The declarative source is included with PyAirbyte.""" + pass + + +class DeclarativeSource(Source): + """A declarative source using Airbyte's Yaml low-code/no-code framework.""" + + executor_class = DeclarativeExecutor + + def __init__( + self, + manifest: str | dict | Path, + ) -> None: + """Initialize a declarative source.""" + self.manifest = manifest + super().__init__( + name="Declarative", # TODO: Get name from manifest + config={ # TODO: Put 'real' config here + "manifest": manifest, + }, + executor=DeclarativeExecutor(manifest), + ) diff --git a/poetry.lock b/poetry.lock index 4cabdaa87..315269432 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2,39 +2,38 @@ [[package]] name = "airbyte-cdk" -version = "0.73.0" +version = "0.79.1" description = "A framework for writing Airbyte Connectors." optional = false -python-versions = ">=3.9" +python-versions = "<4.0,>=3.9" files = [ - {file = "airbyte-cdk-0.73.0.tar.gz", hash = "sha256:a03e0265a8a4afb1378d285993624659d9f481404aaf69cf7c0a5ddad3568ea2"}, - {file = "airbyte_cdk-0.73.0-py3-none-any.whl", hash = "sha256:339e42a7602461073a69bf0c4e11be26a7eea3157def43ffecdf9d0d73f32c6f"}, + {file = "airbyte_cdk-0.79.1-py3-none-any.whl", hash = "sha256:36c4b1fe98448b7d116f16c612982af8e22cbff28ea37da918c851d7feb1093c"}, + {file = "airbyte_cdk-0.79.1.tar.gz", hash = "sha256:a49d10b3c87770ab1e7b7ebf9a1e945d49274c18548756f93a841ebd4c195146"}, ] [package.dependencies] airbyte-protocol-models = "0.5.1" backoff = "*" cachetools = "*" -Deprecated = ">=1.2,<2.0" +Deprecated = ">=1.2,<1.3" dpath = ">=2.0.1,<2.1.0" genson = "1.2.2" isodate = ">=0.6.1,<0.7.0" Jinja2 = ">=3.1.2,<3.2.0" -jsonref = ">=0.2,<1.0" +jsonref = ">=0.2,<0.3" jsonschema = ">=3.2.0,<3.3.0" pendulum = "<3.0.0" pydantic = ">=1.10.8,<2.0.0" pyrate-limiter = ">=3.1.0,<3.2.0" python-dateutil = "*" -PyYAML = ">=6.0.1" +PyYAML = ">=6.0.1,<7.0.0" requests = "*" -requests-cache = "*" +requests_cache = "*" wcmatch = "8.4" [package.extras] -dev = ["avro (>=1.11.2,<1.12.0)", "cohere (==4.21)", "fastavro (>=1.8.0,<1.9.0)", "freezegun", "langchain (==0.0.271)", "markdown", "mypy", "openai[embeddings] (==0.27.9)", "pandas (==2.0.3)", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "pytest", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests-mock", "tiktoken (==0.4.0)", "unstructured (==0.10.27)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] -file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "unstructured (==0.10.27)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] -sphinx-docs = ["Sphinx (>=4.2,<5.0)", "sphinx-rtd-theme (>=1.0,<2.0)"] +file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] +sphinx-docs = ["Sphinx (>=4.2,<4.3)", "sphinx-rtd-theme (>=1.0,<1.1)"] vector-db-based = ["cohere (==4.21)", "langchain (==0.0.271)", "openai[embeddings] (==0.27.9)", "tiktoken (==0.4.0)"] [[package]] @@ -51,6 +50,20 @@ files = [ [package.dependencies] pydantic = ">=1.9.2,<2.0.0" +[[package]] +name = "airbyte-source-declarative-manifest" +version = "0.79.1" +description = "Base source implementation for low-code sources." +optional = false +python-versions = "<3.12,>=3.9" +files = [ + {file = "airbyte_source_declarative_manifest-0.79.1-py3-none-any.whl", hash = "sha256:54fb03c157ea60175513a9f50543c7bc90dcd6e2a789344991fa393137e38820"}, + {file = "airbyte_source_declarative_manifest-0.79.1.tar.gz", hash = "sha256:e350276d678d298950e1dcd6519e0c6832454d77eb5669775da94cb62c66865f"}, +] + +[package.dependencies] +airbyte-cdk = "0.79.1" + [[package]] name = "airbyte-source-faker" version = "6.0.1" @@ -640,13 +653,13 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"] [[package]] name = "google-cloud-bigquery" -version = "3.19.0" +version = "3.20.1" description = "Google BigQuery API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-bigquery-3.19.0.tar.gz", hash = "sha256:8e311dae49768e1501fcdc5e916bff4b7e169471e5707919f4a6f78a02b3b5a6"}, - {file = "google_cloud_bigquery-3.19.0-py2.py3-none-any.whl", hash = "sha256:c6b8850247a4b132066e49f6e45f850c22824482838688d744a4398eea1120ed"}, + {file = "google-cloud-bigquery-3.20.1.tar.gz", hash = "sha256:318aa3abab5f1900ee24f63ba8bd02b9cdafaa942d738b4dc14a4ef2cc2d925f"}, + {file = "google_cloud_bigquery-3.20.1-py2.py3-none-any.whl", hash = "sha256:d3e62fe61138c658b8853c402e2d8fb9346c84e602e21e3a26584be10fc5b0a4"}, ] [package.dependencies] @@ -1045,13 +1058,13 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jsonref" -version = "0.3.0" -description = "jsonref is a library for automatic dereferencing of JSON Reference objects for Python." +version = "0.2" +description = "An implementation of JSON Reference for Python" optional = false -python-versions = ">=3.3,<4.0" +python-versions = "*" files = [ - {file = "jsonref-0.3.0-py3-none-any.whl", hash = "sha256:9480ad1b500f7e795daeb0ef29f9c55ae3a9ab38fb8d6659b6f4868acb5a5bc8"}, - {file = "jsonref-0.3.0.tar.gz", hash = "sha256:68b330c6815dc0d490dbb3d65ccda265ddde9f7856fd2f3322f971d456ea7549"}, + {file = "jsonref-0.2-py3-none-any.whl", hash = "sha256:b1e82fa0b62e2c2796a13e5401fe51790b248f6d9bf9d7212a3e31a3501b291f"}, + {file = "jsonref-0.2.tar.gz", hash = "sha256:f3c45b121cf6257eafabdc3a8008763aed1cd7da06dbabc59a9e4d2a5e4e6697"}, ] [[package]] @@ -1685,58 +1698,58 @@ pyasn1 = ">=0.4.6,<0.7.0" [[package]] name = "pycparser" -version = "2.21" +version = "2.22" description = "C parser in Python" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.8" files = [ - {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, - {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] [[package]] name = "pydantic" -version = "1.10.14" +version = "1.10.15" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-1.10.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7f4fcec873f90537c382840f330b90f4715eebc2bc9925f04cb92de593eae054"}, - {file = "pydantic-1.10.14-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e3a76f571970fcd3c43ad982daf936ae39b3e90b8a2e96c04113a369869dc87"}, - {file = "pydantic-1.10.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82d886bd3c3fbeaa963692ef6b643159ccb4b4cefaf7ff1617720cbead04fd1d"}, - {file = "pydantic-1.10.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:798a3d05ee3b71967844a1164fd5bdb8c22c6d674f26274e78b9f29d81770c4e"}, - {file = "pydantic-1.10.14-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:23d47a4b57a38e8652bcab15a658fdb13c785b9ce217cc3a729504ab4e1d6bc9"}, - {file = "pydantic-1.10.14-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9f674b5c3bebc2eba401de64f29948ae1e646ba2735f884d1594c5f675d6f2a"}, - {file = "pydantic-1.10.14-cp310-cp310-win_amd64.whl", hash = "sha256:24a7679fab2e0eeedb5a8924fc4a694b3bcaac7d305aeeac72dd7d4e05ecbebf"}, - {file = "pydantic-1.10.14-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9d578ac4bf7fdf10ce14caba6f734c178379bd35c486c6deb6f49006e1ba78a7"}, - {file = "pydantic-1.10.14-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa7790e94c60f809c95602a26d906eba01a0abee9cc24150e4ce2189352deb1b"}, - {file = "pydantic-1.10.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad4e10efa5474ed1a611b6d7f0d130f4aafadceb73c11d9e72823e8f508e663"}, - {file = "pydantic-1.10.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1245f4f61f467cb3dfeced2b119afef3db386aec3d24a22a1de08c65038b255f"}, - {file = "pydantic-1.10.14-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:21efacc678a11114c765eb52ec0db62edffa89e9a562a94cbf8fa10b5db5c046"}, - {file = "pydantic-1.10.14-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:412ab4a3f6dbd2bf18aefa9f79c7cca23744846b31f1d6555c2ee2b05a2e14ca"}, - {file = "pydantic-1.10.14-cp311-cp311-win_amd64.whl", hash = "sha256:e897c9f35281f7889873a3e6d6b69aa1447ceb024e8495a5f0d02ecd17742a7f"}, - {file = "pydantic-1.10.14-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d604be0f0b44d473e54fdcb12302495fe0467c56509a2f80483476f3ba92b33c"}, - {file = "pydantic-1.10.14-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a42c7d17706911199798d4c464b352e640cab4351efe69c2267823d619a937e5"}, - {file = "pydantic-1.10.14-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:596f12a1085e38dbda5cbb874d0973303e34227b400b6414782bf205cc14940c"}, - {file = "pydantic-1.10.14-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bfb113860e9288d0886e3b9e49d9cf4a9d48b441f52ded7d96db7819028514cc"}, - {file = "pydantic-1.10.14-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:bc3ed06ab13660b565eed80887fcfbc0070f0aa0691fbb351657041d3e874efe"}, - {file = "pydantic-1.10.14-cp37-cp37m-win_amd64.whl", hash = "sha256:ad8c2bc677ae5f6dbd3cf92f2c7dc613507eafe8f71719727cbc0a7dec9a8c01"}, - {file = "pydantic-1.10.14-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c37c28449752bb1f47975d22ef2882d70513c546f8f37201e0fec3a97b816eee"}, - {file = "pydantic-1.10.14-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49a46a0994dd551ec051986806122767cf144b9702e31d47f6d493c336462597"}, - {file = "pydantic-1.10.14-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53e3819bd20a42470d6dd0fe7fc1c121c92247bca104ce608e609b59bc7a77ee"}, - {file = "pydantic-1.10.14-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fbb503bbbbab0c588ed3cd21975a1d0d4163b87e360fec17a792f7d8c4ff29f"}, - {file = "pydantic-1.10.14-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:336709883c15c050b9c55a63d6c7ff09be883dbc17805d2b063395dd9d9d0022"}, - {file = "pydantic-1.10.14-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4ae57b4d8e3312d486e2498d42aed3ece7b51848336964e43abbf9671584e67f"}, - {file = "pydantic-1.10.14-cp38-cp38-win_amd64.whl", hash = "sha256:dba49d52500c35cfec0b28aa8b3ea5c37c9df183ffc7210b10ff2a415c125c4a"}, - {file = "pydantic-1.10.14-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c66609e138c31cba607d8e2a7b6a5dc38979a06c900815495b2d90ce6ded35b4"}, - {file = "pydantic-1.10.14-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d986e115e0b39604b9eee3507987368ff8148222da213cd38c359f6f57b3b347"}, - {file = "pydantic-1.10.14-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:646b2b12df4295b4c3148850c85bff29ef6d0d9621a8d091e98094871a62e5c7"}, - {file = "pydantic-1.10.14-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282613a5969c47c83a8710cc8bfd1e70c9223feb76566f74683af889faadc0ea"}, - {file = "pydantic-1.10.14-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:466669501d08ad8eb3c4fecd991c5e793c4e0bbd62299d05111d4f827cded64f"}, - {file = "pydantic-1.10.14-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:13e86a19dca96373dcf3190fcb8797d40a6f12f154a244a8d1e8e03b8f280593"}, - {file = "pydantic-1.10.14-cp39-cp39-win_amd64.whl", hash = "sha256:08b6ec0917c30861e3fe71a93be1648a2aa4f62f866142ba21670b24444d7fd8"}, - {file = "pydantic-1.10.14-py3-none-any.whl", hash = "sha256:8ee853cd12ac2ddbf0ecbac1c289f95882b2d4482258048079d13be700aa114c"}, - {file = "pydantic-1.10.14.tar.gz", hash = "sha256:46f17b832fe27de7850896f3afee50ea682220dd218f7e9c88d436788419dca6"}, + {file = "pydantic-1.10.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22ed12ee588b1df028a2aa5d66f07bf8f8b4c8579c2e96d5a9c1f96b77f3bb55"}, + {file = "pydantic-1.10.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:75279d3cac98186b6ebc2597b06bcbc7244744f6b0b44a23e4ef01e5683cc0d2"}, + {file = "pydantic-1.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50f1666a9940d3d68683c9d96e39640f709d7a72ff8702987dab1761036206bb"}, + {file = "pydantic-1.10.15-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82790d4753ee5d00739d6cb5cf56bceb186d9d6ce134aca3ba7befb1eedbc2c8"}, + {file = "pydantic-1.10.15-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:d207d5b87f6cbefbdb1198154292faee8017d7495a54ae58db06762004500d00"}, + {file = "pydantic-1.10.15-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e49db944fad339b2ccb80128ffd3f8af076f9f287197a480bf1e4ca053a866f0"}, + {file = "pydantic-1.10.15-cp310-cp310-win_amd64.whl", hash = "sha256:d3b5c4cbd0c9cb61bbbb19ce335e1f8ab87a811f6d589ed52b0254cf585d709c"}, + {file = "pydantic-1.10.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c3d5731a120752248844676bf92f25a12f6e45425e63ce22e0849297a093b5b0"}, + {file = "pydantic-1.10.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c365ad9c394f9eeffcb30a82f4246c0006417f03a7c0f8315d6211f25f7cb654"}, + {file = "pydantic-1.10.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3287e1614393119c67bd4404f46e33ae3be3ed4cd10360b48d0a4459f420c6a3"}, + {file = "pydantic-1.10.15-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be51dd2c8596b25fe43c0a4a59c2bee4f18d88efb8031188f9e7ddc6b469cf44"}, + {file = "pydantic-1.10.15-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6a51a1dd4aa7b3f1317f65493a182d3cff708385327c1c82c81e4a9d6d65b2e4"}, + {file = "pydantic-1.10.15-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4e316e54b5775d1eb59187f9290aeb38acf620e10f7fd2f776d97bb788199e53"}, + {file = "pydantic-1.10.15-cp311-cp311-win_amd64.whl", hash = "sha256:0d142fa1b8f2f0ae11ddd5e3e317dcac060b951d605fda26ca9b234b92214986"}, + {file = "pydantic-1.10.15-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7ea210336b891f5ea334f8fc9f8f862b87acd5d4a0cbc9e3e208e7aa1775dabf"}, + {file = "pydantic-1.10.15-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3453685ccd7140715e05f2193d64030101eaad26076fad4e246c1cc97e1bb30d"}, + {file = "pydantic-1.10.15-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bea1f03b8d4e8e86702c918ccfd5d947ac268f0f0cc6ed71782e4b09353b26f"}, + {file = "pydantic-1.10.15-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:005655cabc29081de8243126e036f2065bd7ea5b9dff95fde6d2c642d39755de"}, + {file = "pydantic-1.10.15-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:af9850d98fc21e5bc24ea9e35dd80a29faf6462c608728a110c0a30b595e58b7"}, + {file = "pydantic-1.10.15-cp37-cp37m-win_amd64.whl", hash = "sha256:d31ee5b14a82c9afe2bd26aaa405293d4237d0591527d9129ce36e58f19f95c1"}, + {file = "pydantic-1.10.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5e09c19df304b8123938dc3c53d3d3be6ec74b9d7d0d80f4f4b5432ae16c2022"}, + {file = "pydantic-1.10.15-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7ac9237cd62947db00a0d16acf2f3e00d1ae9d3bd602b9c415f93e7a9fc10528"}, + {file = "pydantic-1.10.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:584f2d4c98ffec420e02305cf675857bae03c9d617fcfdc34946b1160213a948"}, + {file = "pydantic-1.10.15-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbc6989fad0c030bd70a0b6f626f98a862224bc2b1e36bfc531ea2facc0a340c"}, + {file = "pydantic-1.10.15-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d573082c6ef99336f2cb5b667b781d2f776d4af311574fb53d908517ba523c22"}, + {file = "pydantic-1.10.15-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6bd7030c9abc80134087d8b6e7aa957e43d35714daa116aced57269a445b8f7b"}, + {file = "pydantic-1.10.15-cp38-cp38-win_amd64.whl", hash = "sha256:3350f527bb04138f8aff932dc828f154847fbdc7a1a44c240fbfff1b57f49a12"}, + {file = "pydantic-1.10.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:51d405b42f1b86703555797270e4970a9f9bd7953f3990142e69d1037f9d9e51"}, + {file = "pydantic-1.10.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a980a77c52723b0dc56640ced396b73a024d4b74f02bcb2d21dbbac1debbe9d0"}, + {file = "pydantic-1.10.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67f1a1fb467d3f49e1708a3f632b11c69fccb4e748a325d5a491ddc7b5d22383"}, + {file = "pydantic-1.10.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:676ed48f2c5bbad835f1a8ed8a6d44c1cd5a21121116d2ac40bd1cd3619746ed"}, + {file = "pydantic-1.10.15-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:92229f73400b80c13afcd050687f4d7e88de9234d74b27e6728aa689abcf58cc"}, + {file = "pydantic-1.10.15-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2746189100c646682eff0bce95efa7d2e203420d8e1c613dc0c6b4c1d9c1fde4"}, + {file = "pydantic-1.10.15-cp39-cp39-win_amd64.whl", hash = "sha256:394f08750bd8eaad714718812e7fab615f873b3cdd0b9d84e76e51ef3b50b6b7"}, + {file = "pydantic-1.10.15-py3-none-any.whl", hash = "sha256:28e552a060ba2740d0d2aabe35162652c1459a0b9069fe0db7f4ee0e18e74d58"}, + {file = "pydantic-1.10.15.tar.gz", hash = "sha256:ca832e124eda231a60a041da4f013e3ff24949d94a01154b137fc2f2a43c3ffb"}, ] [package.dependencies] @@ -1963,13 +1976,13 @@ cli = ["click (>=5.0)"] [[package]] name = "python-ulid" -version = "2.3.0" +version = "2.4.0.post0" description = "Universally unique lexicographically sortable identifier" optional = false python-versions = ">=3.9" files = [ - {file = "python_ulid-2.3.0-py3-none-any.whl", hash = "sha256:1b6ac5b1fae214502feb50c7535ffa3f7f496f3f2abe73296be6bd0a6976bca5"}, - {file = "python_ulid-2.3.0.tar.gz", hash = "sha256:28108e5edf56ee981dd75ea12ae7279a8a23bf01514144dda7a64c38143204a5"}, + {file = "python_ulid-2.4.0.post0-py3-none-any.whl", hash = "sha256:e2c739e27e6d760136e5f411f311cdd3ec9c4c89696932fe803fa09a4dcd6ebe"}, + {file = "python_ulid-2.4.0.post0.tar.gz", hash = "sha256:45779c68b9060beb6fca72338a0620114489e1bbe274935149f14d1f776d4c43"}, ] [package.extras] @@ -2559,13 +2572,13 @@ files = [ [[package]] name = "types-jsonschema" -version = "4.21.0.20240311" +version = "4.21.0.20240331" description = "Typing stubs for jsonschema" optional = false python-versions = ">=3.8" files = [ - {file = "types-jsonschema-4.21.0.20240311.tar.gz", hash = "sha256:f7165ce70abd91df490c73b089873afd2899c5e56430ee495b64f851ad01f287"}, - {file = "types_jsonschema-4.21.0.20240311-py3-none-any.whl", hash = "sha256:e872f5661513824edf9698f73a66c9c114713d93eab58699bd0532e7e6db5750"}, + {file = "types-jsonschema-4.21.0.20240331.tar.gz", hash = "sha256:3a5ed0a72ab7bc304ca4accbb709272c620f396abf2fb19570b80d949e357eb6"}, + {file = "types_jsonschema-4.21.0.20240331-py3-none-any.whl", hash = "sha256:78dec1d88c5aec77e46e6bddce2a082157ce3059ec7aab19169b13b2ee553a51"}, ] [package.dependencies] @@ -2620,13 +2633,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.10.0" +version = "4.11.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, - {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, + {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, + {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, ] [[package]] @@ -2776,4 +2789,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "0b8786d136f0f18fa6b387555902acab38b79f65cb2576734bd8c826815162b1" +content-hash = "d2968f00e25c51f19d941e8c4ab812890c5f4b5d8ad21c6d97ac17d846b74436" diff --git a/pyproject.toml b/pyproject.toml index 427250d3d..d909d297a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ enable = true [tool.poetry.dependencies] python = ">=3.9,<4.0" -airbyte-cdk = "^0.73.0" +airbyte-cdk = "^0.79.1" duckdb = "0.9.2" # TODO: Change to "^0.10.0" once supported by MotherDuck duckdb-engine = "0.9.2" # TODO: Change to "^0.10.0" once supported by MotherDuck google-auth = ">=2.27.0,<3.0" @@ -42,6 +42,8 @@ ulid = "^1.1" # TODO: Remove this arbitrary python constraint once `sqlalchemy-bigquery` has done so. sqlalchemy-bigquery = { version = "1.9.0", python = "<3.13" } +airbyte-source-declarative-manifest = { version = "0.79.1", python = ">=3.9,<3.12" } + [tool.poetry.group.dev.dependencies] docker = "^7.0.0" faker = "^21.0.0" From 8288ce8142e751641b9c18b3db490edfd86d3f0c Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Sun, 7 Apr 2024 00:34:37 -0700 Subject: [PATCH 02/35] improve implementation --- airbyte/sources/declarative.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/airbyte/sources/declarative.py b/airbyte/sources/declarative.py index 9993a6f51..4b80cc316 100644 --- a/airbyte/sources/declarative.py +++ b/airbyte/sources/declarative.py @@ -47,14 +47,29 @@ def uninstall(self) -> None: class DeclarativeSource(Source): """A declarative source using Airbyte's Yaml low-code/no-code framework.""" - executor_class = DeclarativeExecutor - def __init__( self, manifest: str | dict | Path, ) -> None: - """Initialize a declarative source.""" + """Initialize a declarative source. + + Sample usages: + ```python + manifest_path = "path/to/manifest.yaml" + + source_a = DeclarativeSource(manifest=Path(manifest_path)) + source_b = DeclarativeSource(manifest=Path(manifest_path).read_text()) + source_c = DeclarativeSource(manifest=yaml.load(Path(manifest_path).read_text())) + ``` + + Args: + manifest: The manifest for the declarative source. This can be a path to a yaml file, a + yaml string, or a dict. + """ + # TODO: Conform manifest to a dict or str (TBD) self.manifest = manifest + + # Initialize the source using the base class implementation super().__init__( name="Declarative", # TODO: Get name from manifest config={ # TODO: Put 'real' config here From be4ecd43ecfd7533ae3e8de0ae41be70a23b29e1 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 30 May 2024 10:55:36 -0700 Subject: [PATCH 03/35] update with native execution, wrapping the proper CDK classes --- airbyte/sources/declarative.py | 35 ++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/airbyte/sources/declarative.py b/airbyte/sources/declarative.py index 4b80cc316..e107600ee 100644 --- a/airbyte/sources/declarative.py +++ b/airbyte/sources/declarative.py @@ -3,9 +3,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING +import json +from pathlib import Path +from typing import TYPE_CHECKING, cast -from source_declarative_manifest import run +from airbyte_cdk.entrypoint import AirbyteEntrypoint +from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte._executor import Executor from airbyte.sources.base import Source @@ -13,7 +16,6 @@ if TYPE_CHECKING: from collections.abc import Iterator - from pathlib import Path class DeclarativeExecutor(Executor): @@ -23,12 +25,33 @@ def __init__( self, manifest: str | dict | Path, ) -> None: - """Initialize a declarative executor.""" - self.manifest = manifest + """Initialize a declarative executor. + + - If `manifest` is a path, it will be read as a json file. + - If `manifest` is a string, it will be parsed as an HTTP path. + - If `manifest` is a dict, it will be used as is. + """ + self._manifest_dict: dict + if isinstance(manifest, Path): + self._manifest_dict = cast(dict, json.loads(manifest.read_text())) + + elif isinstance(manifest, str): + # TODO: Implement HTTP path parsing + raise NotImplementedError("HTTP path parsing is not yet implemented.") + + elif isinstance(manifest, dict): + self._manifest_dict = manifest + + if not isinstance(self._manifest_dict, dict): + raise ValueError("Manifest must be a dict.") + + self.declarative_source = ManifestDeclarativeSource(source_config=self._manifest_dict) def execute(self, args: list[str]) -> Iterator[str]: """Execute the declarative source.""" - return run(args) + source_entrypoint = AirbyteEntrypoint(self.declarative_source) + parsed_args = source_entrypoint.parse_args(args) + yield from source_entrypoint.run(parsed_args) def ensure_installation(self, *, auto_fix: bool = True) -> None: """No-op. The declarative source is included with PyAirbyte.""" From 95d05610da8b06b994a0ddc984b9e3d2f7390156 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 30 May 2024 11:15:07 -0700 Subject: [PATCH 04/35] update pyproject.toml --- pyproject.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cbd1d7d57..d78de9f4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,8 +45,6 @@ sqlalchemy-bigquery = { version = "1.9.0", python = "<3.13" } airbyte-api = "^0.49.2" google-cloud-bigquery-storage = "^2.25.0" -airbyte-source-declarative-manifest = { version = "0.79.1", python = ">=3.9,<3.12" } - [tool.poetry.group.dev.dependencies] docker = "^7.0.0" faker = "^21.0.0" From 8857c415f082c3170a7ac2f79f263b1f73f65180 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 30 May 2024 13:59:25 -0700 Subject: [PATCH 05/35] add missing property 'reported_version' --- airbyte/sources/declarative.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte/sources/declarative.py b/airbyte/sources/declarative.py index e107600ee..231649b63 100644 --- a/airbyte/sources/declarative.py +++ b/airbyte/sources/declarative.py @@ -46,6 +46,7 @@ def __init__( raise ValueError("Manifest must be a dict.") self.declarative_source = ManifestDeclarativeSource(source_config=self._manifest_dict) + self.reported_version: str | None = None # TODO: Consider adding version detection def execute(self, args: list[str]) -> Iterator[str]: """Execute the declarative source.""" From 2b263dc4950e8573fbdba024f8e3a99293a7a903 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 30 May 2024 14:00:58 -0700 Subject: [PATCH 06/35] =?UTF-8?q?partially=20working=20sync!=20?= =?UTF-8?q?=F0=9F=8E=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- airbyte/sources/util.py | 32 +++- examples/run_declarative_manifest_source.py | 174 ++++++++++++++++++++ poetry.lock | 50 ++---- pyproject.toml | 4 +- 4 files changed, 217 insertions(+), 43 deletions(-) create mode 100644 examples/run_declarative_manifest_source.py diff --git a/airbyte/sources/util.py b/airbyte/sources/util.py index 9ac340e5d..4c27c1efc 100644 --- a/airbyte/sources/util.py +++ b/airbyte/sources/util.py @@ -14,6 +14,7 @@ from airbyte._executor import DockerExecutor, PathExecutor, VenvExecutor from airbyte._util.telemetry import EventState, log_install_state from airbyte.sources.base import Source +from airbyte.sources.declarative import DeclarativeExecutor from airbyte.sources.registry import ConnectorMetadata, get_connector_metadata @@ -53,7 +54,8 @@ def _get_source( # noqa: PLR0912 # Too many branches version: str | None = None, pip_url: str | None = None, local_executable: Path | str | None = None, - docker_image: str | bool = False, + docker_image: bool | str = False, + source_manifest: bool | dict | Path | str = False, install_if_missing: bool = True, ) -> Source: """Get a connector by name and version. @@ -80,16 +82,27 @@ def _get_source( # noqa: PLR0912 # Too many branches install_if_missing: Whether to install the connector if it is not available locally. This parameter is ignored when local_executable is set. """ - if sum([bool(local_executable), bool(docker_image), bool(pip_url)]) > 1: + if ( + sum( + [ + bool(local_executable), + bool(docker_image), + bool(pip_url), + bool(source_manifest), + ] + ) + > 1 + ): raise exc.PyAirbyteInputError( message=( "You can only specify one of the settings: 'local_executable', 'docker_image', " - "or 'pip_url'." + "'pip_url', or 'source_manifest'." ), context={ "local_executable": local_executable, "docker_image": docker_image, "pip_url": pip_url, + "source_manifest": source_manifest, }, ) @@ -169,6 +182,19 @@ def _get_source( # noqa: PLR0912 # Too many branches ), ) + if source_manifest: + if source_manifest is True: + # TODO: Locate the manifest file + raise NotImplementedError("Manifest file location is not yet implemented.") + + return Source( + name=name, + config=config, + streams=streams, + executor=DeclarativeExecutor( + manifest=source_manifest, + ), + ) # else: we are installing a connector in a virtual environment: metadata: ConnectorMetadata | None = None diff --git a/examples/run_declarative_manifest_source.py b/examples/run_declarative_manifest_source.py new file mode 100644 index 000000000..ffc2bfa3a --- /dev/null +++ b/examples/run_declarative_manifest_source.py @@ -0,0 +1,174 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +"""A test of PyAirbyte calling a declarative manifest. + +Usage (from PyAirbyte root directory): +> poetry run python examples/run_declarative_manifest_source.py + +""" + +from __future__ import annotations + +from typing import cast + +import yaml + +import airbyte as ab +from airbyte.experimental import get_source + + +# Copy-pasted from the Builder "Yaml" view: +SOURCE_MANIFEST_TEXT = """ +version: 0.85.0 + +type: DeclarativeSource + +check: + type: CheckStream + stream_names: + - characters + +definitions: + streams: + characters: + type: DeclarativeStream + name: characters + retriever: + type: SimpleRetriever + paginator: + type: DefaultPaginator + page_token_option: + type: RequestOption + field_name: page + inject_into: request_parameter + pagination_strategy: + type: PageIncrement + start_from_page: 1 + requester: + $ref: '#/definitions/base_requester' + path: character/ + http_method: GET + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - results + primary_key: + - id + schema_loader: + type: InlineSchemaLoader + schema: + $ref: '#/schemas/characters' + base_requester: + type: HttpRequester + url_base: https://rickandmortyapi.com/api + +streams: + - $ref: '#/definitions/streams/characters' + +spec: + type: Spec + connection_specification: + type: object + $schema: http://json-schema.org/draft-07/schema# + required: [] + properties: {} + additionalProperties: true + +metadata: + autoImportSchema: + characters: true + +schemas: + characters: + type: object + $schema: http://json-schema.org/schema# + required: + - id + properties: + type: + type: + - string + - 'null' + id: + type: number + url: + type: + - string + - 'null' + name: + type: + - string + - 'null' + image: + type: + - string + - 'null' + gender: + type: + - string + - 'null' + origin: + type: + - object + - 'null' + properties: + url: + type: + - string + - 'null' + name: + type: + - string + - 'null' + status: + type: + - string + - 'null' + created: + type: + - string + - 'null' + episode: + type: + - array + - 'null' + items: + type: + - string + - 'null' + species: + type: + - string + - 'null' + location: + type: + - object + - 'null' + properties: + url: + type: + - string + - 'null' + name: + type: + - string + - 'null' + additionalProperties: true +""" + +source_manifest_dict = cast(dict, yaml.safe_load(SOURCE_MANIFEST_TEXT)) + +print("Installing declarative source...") +source = get_source( + "source-rick-and-morty", + config={}, + source_manifest=source_manifest_dict, +) +source.check() +source.select_all_streams() + +result = source.read() + +for name, records in result.streams.items(): + print(f"Stream {name}: {len(records)} records") diff --git a/poetry.lock b/poetry.lock index 274f71043..ee6feb391 100644 --- a/poetry.lock +++ b/poetry.lock @@ -32,21 +32,22 @@ dev = ["pylint (==3.1.0)"] [[package]] name = "airbyte-cdk" -version = "0.81.8" +version = "1.1.3" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "airbyte_cdk-0.81.8-py3-none-any.whl", hash = "sha256:1f826715e99b190b0581f0ce5192bd7e5eae69133e77a242a339a4227df02642"}, - {file = "airbyte_cdk-0.81.8.tar.gz", hash = "sha256:8854a899c9a4fabd2143b86befece8fd62130ffd74049b8c6fb8ac67c7c1da54"}, + {file = "airbyte_cdk-1.1.3-py3-none-any.whl", hash = "sha256:d72c8a26ed41dac11b2b945b98dd81fb868f31bed150c5a2495c2dd68c61df86"}, + {file = "airbyte_cdk-1.1.3.tar.gz", hash = "sha256:8d2a331a4a61f7d7ec1ff5ba76ca5d4fd70c2e24146e4b12673568c08484dece"}, ] [package.dependencies] -airbyte-protocol-models = "*" +airbyte-protocol-models = ">=0.9.0,<1.0" backoff = "*" cachetools = "*" +cryptography = ">=42.0.5,<43.0.0" Deprecated = ">=1.2,<1.3" -dpath = ">=2.0.1,<2.1.0" +dpath = ">=2.1.6,<3.0.0" genson = "1.2.2" isodate = ">=0.6.1,<0.7.0" Jinja2 = ">=3.1.2,<3.2.0" @@ -55,8 +56,10 @@ jsonschema = ">=3.2.0,<3.3.0" langchain_core = "0.1.42" pendulum = "<3.0.0" pydantic = ">=1.10.8,<2.0.0" +pyjwt = ">=2.8.0,<3.0.0" pyrate-limiter = ">=3.1.0,<3.2.0" python-dateutil = "*" +pytz = "2024.1" PyYAML = ">=6.0.1,<7.0.0" requests = "*" requests_cache = "*" @@ -81,24 +84,6 @@ files = [ [package.dependencies] pydantic = ">=1.9.2,<2.0.0" -[[package]] -name = "airbyte-source-faker" -version = "6.0.1" -description = "Source implementation for fake but realistic looking data." -optional = false -python-versions = "*" -files = [ - {file = "airbyte-source-faker-6.0.1.tar.gz", hash = "sha256:8173a48551fbfe0eb6e9c331fec650fa490f283736aef0d58e2f14e55f8cf90a"}, - {file = "airbyte_source_faker-6.0.1-py3-none-any.whl", hash = "sha256:622cd123589218cffe69755727addfe85873d7563002cf8d5f949586604e0d9f"}, -] - -[package.dependencies] -airbyte-cdk = ">=0.2,<1.0" -mimesis = "6.1.1" - -[package.extras] -tests = ["pytest (>=6.2,<7.0)", "pytest-mock (>=3.6.1,<3.7.0)", "requests-mock (>=1.9.3,<1.10.0)"] - [[package]] name = "airbyte-source-pokeapi" version = "0.2.0" @@ -563,13 +548,13 @@ websockets = ["websocket-client (>=1.3.0)"] [[package]] name = "dpath" -version = "2.0.8" +version = "2.1.6" description = "Filesystem-like pathing and searching for dictionaries" optional = false python-versions = ">=3.7" files = [ - {file = "dpath-2.0.8-py3-none-any.whl", hash = "sha256:f92f595214dd93a00558d75d4b858beee519f4cffca87f02616ad6cd013f3436"}, - {file = "dpath-2.0.8.tar.gz", hash = "sha256:a3440157ebe80d0a3ad794f1b61c571bef125214800ffdb9afc9424e8250fe9b"}, + {file = "dpath-2.1.6-py3-none-any.whl", hash = "sha256:31407395b177ab63ef72e2f6ae268c15e938f2990a8ecf6510f5686c02b6db73"}, + {file = "dpath-2.1.6.tar.gz", hash = "sha256:f1e07c72e8605c6a9e80b64bc8f42714de08a789c7de417e49c3f87a19692e47"}, ] [[package]] @@ -1408,17 +1393,6 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] -[[package]] -name = "mimesis" -version = "6.1.1" -description = "Mimesis: Fake Data Generator." -optional = false -python-versions = ">=3.8,<4.0" -files = [ - {file = "mimesis-6.1.1-py3-none-any.whl", hash = "sha256:eabe41d7afa23b01dffb51ebd9e10837df6417fef02fa9841989ca886e479790"}, - {file = "mimesis-6.1.1.tar.gz", hash = "sha256:044ac378c61db0e06832ff722548fd6e604881d36bc938002e0bd5b85eeb6a98"}, -] - [[package]] name = "mypy" version = "1.10.0" @@ -3082,4 +3056,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "6e3d7eeb9e986a14dd4c9936f6710e76973c2bd6e83e861ba7ab6209a4910a3b" +content-hash = "7f576cf7c1c4b9e2d58c73628f21a94cfdfa1c752e7459611329f32b1843c30e" diff --git a/pyproject.toml b/pyproject.toml index d78de9f4a..26953525d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ enable = true [tool.poetry.dependencies] python = ">=3.9,<4.0" -airbyte-cdk = "^0.81.6" +airbyte-cdk = "^1.1.3" duckdb = "0.9.2" # TODO: Change to "^0.10.0" once supported by MotherDuck duckdb-engine = "0.9.2" # TODO: Change to "^0.10.0" once supported by MotherDuck google-auth = ">=2.27.0,<3.0" @@ -58,7 +58,7 @@ ruff = "^0.4.1" types-jsonschema = "^4.20.0.0" types-requests = "2.31.0.4" freezegun = "^1.4.0" -airbyte-source-faker = "^6.0.0" +# airbyte-source-faker = { version = "^6.1.1", python = "<3.12" } tomli = "^2.0" responses = "^0.25.0" airbyte-source-pokeapi = "^0.2.0" From 9f828d14a1d8d7ef19ba3e4d6666e0ccf224649b Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 30 May 2024 14:02:33 -0700 Subject: [PATCH 07/35] downgrade cdk to declared version in manifest --- poetry.lock | 19 +++++++++---------- pyproject.toml | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/poetry.lock b/poetry.lock index ee6feb391..faadede44 100644 --- a/poetry.lock +++ b/poetry.lock @@ -32,22 +32,22 @@ dev = ["pylint (==3.1.0)"] [[package]] name = "airbyte-cdk" -version = "1.1.3" +version = "0.85.0" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "airbyte_cdk-1.1.3-py3-none-any.whl", hash = "sha256:d72c8a26ed41dac11b2b945b98dd81fb868f31bed150c5a2495c2dd68c61df86"}, - {file = "airbyte_cdk-1.1.3.tar.gz", hash = "sha256:8d2a331a4a61f7d7ec1ff5ba76ca5d4fd70c2e24146e4b12673568c08484dece"}, + {file = "airbyte_cdk-0.85.0-py3-none-any.whl", hash = "sha256:6bba454fa30cf3d9090f41557034cf8a9aba38af54576d50f1ae0db763f0b163"}, + {file = "airbyte_cdk-0.85.0.tar.gz", hash = "sha256:aa6b6b7438ea636d86b46c1bb6602971e42349ce81caed5d65e5561b5463f44f"}, ] [package.dependencies] -airbyte-protocol-models = ">=0.9.0,<1.0" +airbyte-protocol-models = "*" backoff = "*" cachetools = "*" cryptography = ">=42.0.5,<43.0.0" Deprecated = ">=1.2,<1.3" -dpath = ">=2.1.6,<3.0.0" +dpath = ">=2.0.1,<2.1.0" genson = "1.2.2" isodate = ">=0.6.1,<0.7.0" Jinja2 = ">=3.1.2,<3.2.0" @@ -59,7 +59,6 @@ pydantic = ">=1.10.8,<2.0.0" pyjwt = ">=2.8.0,<3.0.0" pyrate-limiter = ">=3.1.0,<3.2.0" python-dateutil = "*" -pytz = "2024.1" PyYAML = ">=6.0.1,<7.0.0" requests = "*" requests_cache = "*" @@ -548,13 +547,13 @@ websockets = ["websocket-client (>=1.3.0)"] [[package]] name = "dpath" -version = "2.1.6" +version = "2.0.8" description = "Filesystem-like pathing and searching for dictionaries" optional = false python-versions = ">=3.7" files = [ - {file = "dpath-2.1.6-py3-none-any.whl", hash = "sha256:31407395b177ab63ef72e2f6ae268c15e938f2990a8ecf6510f5686c02b6db73"}, - {file = "dpath-2.1.6.tar.gz", hash = "sha256:f1e07c72e8605c6a9e80b64bc8f42714de08a789c7de417e49c3f87a19692e47"}, + {file = "dpath-2.0.8-py3-none-any.whl", hash = "sha256:f92f595214dd93a00558d75d4b858beee519f4cffca87f02616ad6cd013f3436"}, + {file = "dpath-2.0.8.tar.gz", hash = "sha256:a3440157ebe80d0a3ad794f1b61c571bef125214800ffdb9afc9424e8250fe9b"}, ] [[package]] @@ -3056,4 +3055,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "7f576cf7c1c4b9e2d58c73628f21a94cfdfa1c752e7459611329f32b1843c30e" +content-hash = "3bb2e370643ccc7d80bf811e6f937eecea4426b203392103c6d035708065a102" diff --git a/pyproject.toml b/pyproject.toml index 26953525d..f7b5ea016 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ enable = true [tool.poetry.dependencies] python = ">=3.9,<4.0" -airbyte-cdk = "^1.1.3" +airbyte-cdk = "0.85.0" duckdb = "0.9.2" # TODO: Change to "^0.10.0" once supported by MotherDuck duckdb-engine = "0.9.2" # TODO: Change to "^0.10.0" once supported by MotherDuck google-auth = ">=2.27.0,<3.0" From 28cdb772877a155ea3173356d6587ceae657c247 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 30 May 2024 14:03:58 -0700 Subject: [PATCH 08/35] lint fix --- airbyte/sources/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte/sources/util.py b/airbyte/sources/util.py index 4c27c1efc..140d4b78a 100644 --- a/airbyte/sources/util.py +++ b/airbyte/sources/util.py @@ -46,7 +46,7 @@ def get_connector( # This non-public function includes the `docker_image` parameter, which is not exposed in the # public API. See the `experimental` module for more info. -def _get_source( # noqa: PLR0912 # Too many branches +def _get_source( # noqa: PLR0912, PLR0913 # Too many branches & arguments name: str, config: dict[str, Any] | None = None, *, From 24431d6126fed252c29dd4b9dcd6e2c61a385a12 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 30 May 2024 14:13:58 -0700 Subject: [PATCH 09/35] add error handler in declarative yaml --- examples/run_declarative_manifest_source.py | 94 +++++++++++---------- 1 file changed, 51 insertions(+), 43 deletions(-) diff --git a/examples/run_declarative_manifest_source.py b/examples/run_declarative_manifest_source.py index ffc2bfa3a..d4cfb8a55 100644 --- a/examples/run_declarative_manifest_source.py +++ b/examples/run_declarative_manifest_source.py @@ -32,29 +32,37 @@ characters: type: DeclarativeStream name: characters + primary_key: + - id retriever: type: SimpleRetriever - paginator: - type: DefaultPaginator - page_token_option: - type: RequestOption - field_name: page - inject_into: request_parameter - pagination_strategy: - type: PageIncrement - start_from_page: 1 requester: $ref: '#/definitions/base_requester' path: character/ http_method: GET + error_handler: + type: CompositeErrorHandler + error_handlers: + - type: DefaultErrorHandler + response_filters: + - type: HttpResponseFilter + action: SUCCESS + error_message_contains: There is nothing here record_selector: type: RecordSelector extractor: type: DpathExtractor field_path: - results - primary_key: - - id + paginator: + type: DefaultPaginator + page_token_option: + type: RequestOption + inject_into: request_parameter + field_name: page + pagination_strategy: + type: PageIncrement + start_from_page: 40 schema_loader: type: InlineSchemaLoader schema: @@ -83,77 +91,77 @@ characters: type: object $schema: http://json-schema.org/schema# - required: - - id properties: type: type: - string - 'null' - id: - type: number - url: + created: type: - string - 'null' - name: + episode: type: - - string + - array - 'null' - image: + items: + type: + - string + - 'null' + gender: type: - string - 'null' - gender: + id: + type: number + image: type: - string - 'null' - origin: + location: type: - object - 'null' properties: - url: + name: type: - string - 'null' - name: + url: type: - string - 'null' - status: - type: - - string - - 'null' - created: - type: - - string - - 'null' - episode: - type: - - array - - 'null' - items: - type: - - string - - 'null' - species: + name: type: - string - 'null' - location: + origin: type: - object - 'null' properties: - url: + name: type: - string - 'null' - name: + url: type: - string - 'null' + species: + type: + - string + - 'null' + status: + type: + - string + - 'null' + url: + type: + - string + - 'null' + required: + - id additionalProperties: true """ From 3a8e79f676259642a87e492948c29de1b9ee0d16 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Thu, 30 May 2024 15:26:02 -0700 Subject: [PATCH 10/35] fix start page --- examples/run_declarative_manifest_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/run_declarative_manifest_source.py b/examples/run_declarative_manifest_source.py index d4cfb8a55..bedc028df 100644 --- a/examples/run_declarative_manifest_source.py +++ b/examples/run_declarative_manifest_source.py @@ -62,7 +62,7 @@ field_name: page pagination_strategy: type: PageIncrement - start_from_page: 40 + start_from_page: 1 schema_loader: type: InlineSchemaLoader schema: From 1ae64acf486e1c3e8cc8865941614b59f09ee5ff Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 31 May 2024 14:53:10 -0700 Subject: [PATCH 11/35] feat: enable detecting yaml declarative sources in `get_available_connectors()` --- airbyte/sources/registry.py | 73 ++++++++++++++++++++++++-- examples/run_downloaded_yaml_source.py | 36 +++++++++++++ 2 files changed, 106 insertions(+), 3 deletions(-) create mode 100644 examples/run_downloaded_yaml_source.py diff --git a/airbyte/sources/registry.py b/airbyte/sources/registry.py index a14861637..ed8fd47b4 100644 --- a/airbyte/sources/registry.py +++ b/airbyte/sources/registry.py @@ -3,9 +3,12 @@ import json import os +import warnings from copy import copy from dataclasses import dataclass +from enum import Enum from pathlib import Path +from typing import Union, cast import requests @@ -20,6 +23,18 @@ _REGISTRY_URL = "https://connectors.airbyte.com/files/registries/v0/oss_registry.json" +class InstallType(str, Enum): + YAML = "yaml" + PYTHON = "python" + DOCKER = "docker" + JAVA = "java" + + +class Language(str, Enum): + PYTHON = InstallType.PYTHON.value + JAVA = InstallType.JAVA.value + + @dataclass class ConnectorMetadata: """Metadata for a connector.""" @@ -33,6 +48,12 @@ class ConnectorMetadata: pypi_package_name: str | None """The name of the PyPI package for the connector, if it exists.""" + language: Language | None + """The language of the connector.""" + + install_types: set[InstallType] + """The supported install types for the connector.""" + def _get_registry_url() -> str: if _REGISTRY_ENV_VAR in os.environ: @@ -43,14 +64,27 @@ def _get_registry_url() -> str: def _registry_entry_to_connector_metadata(entry: dict) -> ConnectorMetadata: name = entry["dockerRepository"].replace("airbyte/", "") + language = cast(Union[str, None], entry.get("language", None)) remote_registries: dict = entry.get("remoteRegistries", {}) pypi_registry: dict = remote_registries.get("pypi", {}) pypi_package_name: str = pypi_registry.get("packageName", None) pypi_enabled: bool = pypi_registry.get("enabled", False) + install_types: set[InstallType] = { + InstallType(x) + for x in [ + "docker" if entry.get("dockerImageTag") else None, + "python" if pypi_enabled else None, + "java" if language == "java" else None, + "yaml" if "cdk:low-code" in entry.get("tags", []) else None, + ] + if x + } return ConnectorMetadata( name=name, latest_available_version=entry["dockerImageTag"], pypi_package_name=pypi_package_name if pypi_enabled else None, + language=Language(language) if language else None, + install_types=install_types, ) @@ -114,11 +148,44 @@ def get_connector_metadata(name: str) -> ConnectorMetadata: return cache[name] -def get_available_connectors() -> list[str]: +def get_available_connectors(install_type: InstallType | str = InstallType.PYTHON) -> list[str]: """Return a list of all available connectors. Connectors will be returned in alphabetical order, with the standard prefix "source-". """ - return sorted( - conn.name for conn in _get_registry_cache().values() if conn.pypi_package_name is not None + if not isinstance(install_type, InstallType): + install_type = InstallType(install_type) + + if install_type == InstallType.PYTHON: + return sorted( + conn.name + for conn in _get_registry_cache().values() + if conn.pypi_package_name is not None + ) + + if install_type == InstallType.JAVA: + warnings.warn( + message="Java connectors are not yet supported.", + stacklevel=2, + ) + return sorted( + conn.name for conn in _get_registry_cache().values() if conn.language == Language.JAVA + ) + + if install_type == InstallType.DOCKER: + return sorted(conn.name for conn in _get_registry_cache().values()) + + if install_type == InstallType.YAML: + return sorted( + conn.name + for conn in _get_registry_cache().values() + if InstallType.YAML in conn.install_types + ) + + # pragma: no cover # Should never be reached. + raise exc.PyAirbyteInputError( + message="Invalid install type.", + context={ + "install_type": install_type, + }, ) diff --git a/examples/run_downloaded_yaml_source.py b/examples/run_downloaded_yaml_source.py new file mode 100644 index 000000000..f5019cd9a --- /dev/null +++ b/examples/run_downloaded_yaml_source.py @@ -0,0 +1,36 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +"""A test of PyAirbyte calling a declarative manifest. + +Usage (from PyAirbyte root directory): +> poetry run python examples/run_downloadable_yaml_source.py + +""" + +from __future__ import annotations + +from typing import cast + +import yaml + +import airbyte as ab +from airbyte.experimental import get_source + + +nl = "\n" +print( + f"Downloadable yaml sources: \n- {(nl + '- ').join(ab.get_available_connectors(install_type='yaml'))}" +) + +print("Running declarative source...") +source = get_source( + "source-xkcd", + config={}, + source_manifest=True, +) +source.check() +source.select_all_streams() + +result = source.read() + +for name, records in result.streams.items(): + print(f"Stream {name}: {len(records)} records") From 564f57260c3e6adc17e7e3c92010a86c875f1c0b Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 31 May 2024 16:30:39 -0700 Subject: [PATCH 12/35] feat: auto-download manifest.yaml from github `master` branch --- airbyte/sources/util.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/airbyte/sources/util.py b/airbyte/sources/util.py index 140d4b78a..621e89682 100644 --- a/airbyte/sources/util.py +++ b/airbyte/sources/util.py @@ -3,13 +3,18 @@ from __future__ import annotations +import http import shutil import sys import tempfile import warnings +from json import JSONDecodeError from pathlib import Path from typing import Any +import requests +import yaml + from airbyte import exceptions as exc from airbyte._executor import DockerExecutor, PathExecutor, VenvExecutor from airbyte._util.telemetry import EventState, log_install_state @@ -184,8 +189,24 @@ def _get_source( # noqa: PLR0912, PLR0913 # Too many branches & arguments if source_manifest: if source_manifest is True: - # TODO: Locate the manifest file - raise NotImplementedError("Manifest file location is not yet implemented.") + http_url = ( + "https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-integrations" + f"/connectors/{name}/{name.replace('-', '_')}/manifest.yaml" + ) + print("Installing connector from YAML manifest URL:", http_url) + # Download the file + response = requests.get(http_url) + response.raise_for_status() # Raise an exception if the download failed + + try: + source_manifest: dict = yaml.safe_load(response.content) + except JSONDecodeError as ex: + raise exc.AirbyteConnectorInstallationError( + connector_name=name, + context={ + "manifest_url": http_url, + }, + ) from ex return Source( name=name, From 46be6f2c0ff67d75e3f717f9e26b57b2a4f3c9af Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 31 May 2024 16:30:49 -0700 Subject: [PATCH 13/35] update example script --- examples/run_downloaded_yaml_source.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/examples/run_downloaded_yaml_source.py b/examples/run_downloaded_yaml_source.py index f5019cd9a..07630d58e 100644 --- a/examples/run_downloaded_yaml_source.py +++ b/examples/run_downloaded_yaml_source.py @@ -8,23 +8,21 @@ from __future__ import annotations -from typing import cast - -import yaml - import airbyte as ab from airbyte.experimental import get_source -nl = "\n" print( - f"Downloadable yaml sources: \n- {(nl + '- ').join(ab.get_available_connectors(install_type='yaml'))}" + "Downloadable yaml sources: \n- " + + "\n- ".join(ab.get_available_connectors(install_type="yaml")) ) print("Running declarative source...") source = get_source( - "source-xkcd", - config={}, + "source-pokeapi", + config={ + "pokemon_name": "ditto", + }, source_manifest=True, ) source.check() From b438206c12167c12f057b09e304bd4edd7d0de9d Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 31 May 2024 16:55:42 -0700 Subject: [PATCH 14/35] add hard-coded low-code exclusions --- airbyte/sources/registry.py | 74 +++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/airbyte/sources/registry.py b/airbyte/sources/registry.py index ed8fd47b4..3fb3cdb6d 100644 --- a/airbyte/sources/registry.py +++ b/airbyte/sources/registry.py @@ -22,6 +22,79 @@ _REGISTRY_ENV_VAR = "AIRBYTE_LOCAL_REGISTRY" _REGISTRY_URL = "https://connectors.airbyte.com/files/registries/v0/oss_registry.json" +_LOWCODE_CONNECTORS_NEEDING_PYTHON = [ + "source-alpha-vantage", + "source-amplitude", + "source-apify-dataset", + "source-avni", + "source-braintree", + "source-braze", + "source-chargebee", + "source-close-com", + "source-commercetools", + "source-facebook-pages", + "source-fastbill", + "source-freshdesk", + "source-gitlab", + "source-gnews", + "source-greenhouse", + "source-instatus", + "source-intercom", + "source-iterable", + "source-jira", + "source-klaviyo", + "source-mailchimp", + "source-mixpanel", + "source-monday", + "source-my-hours", + "source-notion", + "source-okta", + "source-outreach", + "source-partnerstack", + "source-paypal-transaction", + "source-pinterest", + "source-pipedrive", + "source-pocket", + "source-posthog", + "source-prestashop", + "source-public-apis", + "source-qualaroo", + "source-quickbooks", + "source-railz", + "source-recharge", + "source-retently", + "source-rss", + "source-slack", + "source-surveymonkey", + "source-the-guardian-api", + "source-trello", + "source-typeform", + "source-xero", + "source-younium", + "source-zendesk-chat", + "source-zendesk-sunshine", + "source-zendesk-support", + "source-zendesk-talk", + "source-zenloop", + "source-zoom", +] +_LOWCODE_CONNECTORS_FAILING_VALIDATION = [ + "source-amazon-ads", + "source-senseforce", + "source-shortio", + "source-smaily", + "source-vantage", + "source-woocommerce", +] +_LOWCODE_CONNECTORS_404 = [ + "source-unleash", +] +_LOWCODE_CONNECTORS_EXCLUDED: list[str] = [ + *_LOWCODE_CONNECTORS_FAILING_VALIDATION, + *_LOWCODE_CONNECTORS_404, + *_LOWCODE_CONNECTORS_NEEDING_PYTHON, +] + class InstallType(str, Enum): YAML = "yaml" @@ -180,6 +253,7 @@ def get_available_connectors(install_type: InstallType | str = InstallType.PYTHO conn.name for conn in _get_registry_cache().values() if InstallType.YAML in conn.install_types + and conn.name not in _LOWCODE_CONNECTORS_EXCLUDED ) # pragma: no cover # Should never be reached. From 925f4def0d6471e23f8da28a81f12434935b9793 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 31 May 2024 16:55:59 -0700 Subject: [PATCH 15/35] raise errors when low-code definition isn't viable --- airbyte/sources/util.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/airbyte/sources/util.py b/airbyte/sources/util.py index 621e89682..d4ef5edab 100644 --- a/airbyte/sources/util.py +++ b/airbyte/sources/util.py @@ -3,14 +3,13 @@ from __future__ import annotations -import http import shutil import sys import tempfile import warnings from json import JSONDecodeError from pathlib import Path -from typing import Any +from typing import Any, cast import requests import yaml @@ -193,13 +192,25 @@ def _get_source( # noqa: PLR0912, PLR0913 # Too many branches & arguments "https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-integrations" f"/connectors/{name}/{name.replace('-', '_')}/manifest.yaml" ) - print("Installing connector from YAML manifest URL:", http_url) + print("Installing connector from YAML manifest:", http_url) # Download the file response = requests.get(http_url) response.raise_for_status() # Raise an exception if the download failed + if "class_name:" in response.text: + raise exc.AirbyteConnectorInstallationError( + message=( + "The provided manifest requires additional code files" + "and is not compatible with the declarative no-code executor." + ), + connector_name=name, + context={ + "manifest_url": http_url, + }, + ) + try: - source_manifest: dict = yaml.safe_load(response.content) + source_manifest = cast(dict, yaml.safe_load(response.text)) except JSONDecodeError as ex: raise exc.AirbyteConnectorInstallationError( connector_name=name, From bb49e79fb0c0d697be999139f37d590ea3d1268a Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 31 May 2024 16:56:14 -0700 Subject: [PATCH 16/35] print debug info on non-installable connectors --- examples/run_downloaded_yaml_source.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/examples/run_downloaded_yaml_source.py b/examples/run_downloaded_yaml_source.py index 07630d58e..aef6ee1e7 100644 --- a/examples/run_downloaded_yaml_source.py +++ b/examples/run_downloaded_yaml_source.py @@ -9,13 +9,29 @@ from __future__ import annotations import airbyte as ab +from airbyte import exceptions as exc from airbyte.experimental import get_source -print( - "Downloadable yaml sources: \n- " - + "\n- ".join(ab.get_available_connectors(install_type="yaml")) -) +yaml_connectors: list[str] = ab.get_available_connectors(install_type="yaml") + +print("Downloadable yaml sources: \n- " + "\n- ".join(yaml_connectors)) + +failed_installs: dict[str, list[str]] = {} + +for yaml_connector in yaml_connectors: + try: + _ = get_source(yaml_connector, source_manifest=True) + except Exception as ex: + exception_type = type(ex).__name__ + if exception_type in failed_installs: + failed_installs[exception_type].append(yaml_connector) + else: + failed_installs[exception_type] = [yaml_connector] + +# Print any connector failures, grouped by the error message +for error, connectors_failed in failed_installs.items(): + print(f"\nInstallation Errors: {error}\n- " + "\n- ".join(connectors_failed) + "\n") print("Running declarative source...") source = get_source( From 8ecf5cc5e3c417591dd1e88841bd2a73fabf8a1a Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 31 May 2024 17:04:36 -0700 Subject: [PATCH 17/35] add simple test --- tests/unit_tests/test_lowcode_connectors.py | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/unit_tests/test_lowcode_connectors.py diff --git a/tests/unit_tests/test_lowcode_connectors.py b/tests/unit_tests/test_lowcode_connectors.py new file mode 100644 index 000000000..8abe0a9a4 --- /dev/null +++ b/tests/unit_tests/test_lowcode_connectors.py @@ -0,0 +1,29 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from __future__ import annotations + +from pathlib import Path + +import pytest +from airbyte.experimental import get_source + +UNIT_TEST_DB_PATH: Path = Path(".cache") / "unit_tests" / "test_db.duckdb" + + +@pytest.mark.parametrize( + "connector_name, config", + [ + ("source-pokeapi", {"pokemon_name": "ditto"}), + ], +) +def test_nocode_execution(connector_name: str, config: dict) -> None: + source = get_source( + name=connector_name, + config=config, + source_manifest=True, + ) + source.check() + source.select_all_streams() + source.read() + for name, records in source.read().streams.items(): + assert name + assert len(records) > 0 From 5e18e3d98c6b0999e90ee2ee57db6db178e7c2f8 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 31 May 2024 17:08:59 -0700 Subject: [PATCH 18/35] include printed count of connectors in example script --- ...ed_yaml_source.py => run_downloadable_yaml_source.py} | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) rename examples/{run_downloaded_yaml_source.py => run_downloadable_yaml_source.py} (83%) diff --git a/examples/run_downloaded_yaml_source.py b/examples/run_downloadable_yaml_source.py similarity index 83% rename from examples/run_downloaded_yaml_source.py rename to examples/run_downloadable_yaml_source.py index aef6ee1e7..3285761fc 100644 --- a/examples/run_downloaded_yaml_source.py +++ b/examples/run_downloadable_yaml_source.py @@ -9,13 +9,12 @@ from __future__ import annotations import airbyte as ab -from airbyte import exceptions as exc from airbyte.experimental import get_source yaml_connectors: list[str] = ab.get_available_connectors(install_type="yaml") -print("Downloadable yaml sources: \n- " + "\n- ".join(yaml_connectors)) +print(f"Downloadable yaml sources ({len(yaml_connectors)}): \n- " + "\n- ".join(yaml_connectors)) failed_installs: dict[str, list[str]] = {} @@ -31,7 +30,11 @@ # Print any connector failures, grouped by the error message for error, connectors_failed in failed_installs.items(): - print(f"\nInstallation Errors: {error}\n- " + "\n- ".join(connectors_failed) + "\n") + print( + f"\nInstallation Errors ({len(failed_installs)}): {error}\n- " + + "\n- ".join(connectors_failed) + + "\n" + ) print("Running declarative source...") source = get_source( From 8f0d8382694a126040384a92b545956b0d3943be Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Fri, 31 May 2024 17:34:16 -0700 Subject: [PATCH 19/35] lint fixes --- airbyte/sources/declarative.py | 3 ++- airbyte/sources/util.py | 2 +- examples/run_declarative_manifest_source.py | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte/sources/declarative.py b/airbyte/sources/declarative.py index 231649b63..d96af8a1a 100644 --- a/airbyte/sources/declarative.py +++ b/airbyte/sources/declarative.py @@ -11,6 +11,7 @@ from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte._executor import Executor +from airbyte.exceptions import PyAirbyteInternalError from airbyte.sources.base import Source @@ -43,7 +44,7 @@ def __init__( self._manifest_dict = manifest if not isinstance(self._manifest_dict, dict): - raise ValueError("Manifest must be a dict.") + raise PyAirbyteInternalError(message="Manifest must be a dict.") self.declarative_source = ManifestDeclarativeSource(source_config=self._manifest_dict) self.reported_version: str | None = None # TODO: Consider adding version detection diff --git a/airbyte/sources/util.py b/airbyte/sources/util.py index d4ef5edab..dd28fc031 100644 --- a/airbyte/sources/util.py +++ b/airbyte/sources/util.py @@ -50,7 +50,7 @@ def get_connector( # This non-public function includes the `docker_image` parameter, which is not exposed in the # public API. See the `experimental` module for more info. -def _get_source( # noqa: PLR0912, PLR0913 # Too many branches & arguments +def _get_source( # noqa: PLR0912, PLR0913, PLR0915 # Too complex name: str, config: dict[str, Any] | None = None, *, diff --git a/examples/run_declarative_manifest_source.py b/examples/run_declarative_manifest_source.py index bedc028df..80bc511e8 100644 --- a/examples/run_declarative_manifest_source.py +++ b/examples/run_declarative_manifest_source.py @@ -12,7 +12,6 @@ import yaml -import airbyte as ab from airbyte.experimental import get_source From 438fbced417bad9a76808782c6b0e5d1d03bd76b Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Sat, 1 Jun 2024 12:17:15 -0700 Subject: [PATCH 20/35] chore: bump source-faker and airbyte-cdk to latest --- poetry.lock | 93 ++++++++++++++++++++++++++++++++------------------ pyproject.toml | 4 +-- 2 files changed, 62 insertions(+), 35 deletions(-) diff --git a/poetry.lock b/poetry.lock index faadede44..fc463b868 100644 --- a/poetry.lock +++ b/poetry.lock @@ -32,22 +32,22 @@ dev = ["pylint (==3.1.0)"] [[package]] name = "airbyte-cdk" -version = "0.85.0" +version = "1.1.3" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "airbyte_cdk-0.85.0-py3-none-any.whl", hash = "sha256:6bba454fa30cf3d9090f41557034cf8a9aba38af54576d50f1ae0db763f0b163"}, - {file = "airbyte_cdk-0.85.0.tar.gz", hash = "sha256:aa6b6b7438ea636d86b46c1bb6602971e42349ce81caed5d65e5561b5463f44f"}, + {file = "airbyte_cdk-1.1.3-py3-none-any.whl", hash = "sha256:d72c8a26ed41dac11b2b945b98dd81fb868f31bed150c5a2495c2dd68c61df86"}, + {file = "airbyte_cdk-1.1.3.tar.gz", hash = "sha256:8d2a331a4a61f7d7ec1ff5ba76ca5d4fd70c2e24146e4b12673568c08484dece"}, ] [package.dependencies] -airbyte-protocol-models = "*" +airbyte-protocol-models = ">=0.9.0,<1.0" backoff = "*" cachetools = "*" cryptography = ">=42.0.5,<43.0.0" Deprecated = ">=1.2,<1.3" -dpath = ">=2.0.1,<2.1.0" +dpath = ">=2.1.6,<3.0.0" genson = "1.2.2" isodate = ">=0.6.1,<0.7.0" Jinja2 = ">=3.1.2,<3.2.0" @@ -59,6 +59,7 @@ pydantic = ">=1.10.8,<2.0.0" pyjwt = ">=2.8.0,<3.0.0" pyrate-limiter = ">=3.1.0,<3.2.0" python-dateutil = "*" +pytz = "2024.1" PyYAML = ">=6.0.1,<7.0.0" requests = "*" requests_cache = "*" @@ -83,6 +84,21 @@ files = [ [package.dependencies] pydantic = ">=1.9.2,<2.0.0" +[[package]] +name = "airbyte-source-faker" +version = "6.1.2" +description = "Source implementation for fake but realistic looking data." +optional = false +python-versions = "<3.12,>=3.9" +files = [ + {file = "airbyte_source_faker-6.1.2-py3-none-any.whl", hash = "sha256:7447c5ce9448b40e679cb5cbea93df36ef22e05f8f934605f8d68f02c11c8d40"}, + {file = "airbyte_source_faker-6.1.2.tar.gz", hash = "sha256:a6fd5c7f37890d95cbb5567595069ca0f6b3cc41e46169933f1f35d3250d2b47"}, +] + +[package.dependencies] +airbyte-cdk = ">=0.73.0,<2.0" +mimesis = "6.1.1" + [[package]] name = "airbyte-source-pokeapi" version = "0.2.0" @@ -547,13 +563,13 @@ websockets = ["websocket-client (>=1.3.0)"] [[package]] name = "dpath" -version = "2.0.8" +version = "2.1.6" description = "Filesystem-like pathing and searching for dictionaries" optional = false python-versions = ">=3.7" files = [ - {file = "dpath-2.0.8-py3-none-any.whl", hash = "sha256:f92f595214dd93a00558d75d4b858beee519f4cffca87f02616ad6cd013f3436"}, - {file = "dpath-2.0.8.tar.gz", hash = "sha256:a3440157ebe80d0a3ad794f1b61c571bef125214800ffdb9afc9424e8250fe9b"}, + {file = "dpath-2.1.6-py3-none-any.whl", hash = "sha256:31407395b177ab63ef72e2f6ae268c15e938f2990a8ecf6510f5686c02b6db73"}, + {file = "dpath-2.1.6.tar.gz", hash = "sha256:f1e07c72e8605c6a9e80b64bc8f42714de08a789c7de417e49c3f87a19692e47"}, ] [[package]] @@ -1255,13 +1271,13 @@ extended-testing = ["jinja2 (>=3,<4)"] [[package]] name = "langsmith" -version = "0.1.65" +version = "0.1.67" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.65-py3-none-any.whl", hash = "sha256:ab4487029240e69cca30da1065f1e9138e5a7ca2bbe8c697f0bd7d5839f71cf7"}, - {file = "langsmith-0.1.65.tar.gz", hash = "sha256:d3c2eb2391478bd79989f02652cf66e29a7959d677614b6993a47cef43f7f43b"}, + {file = "langsmith-0.1.67-py3-none-any.whl", hash = "sha256:7eb2e1c1b375925ff47700ed8071e10c15e942e9d1d634b4a449a9060364071a"}, + {file = "langsmith-0.1.67.tar.gz", hash = "sha256:149558669a2ac4f21471cd964e61072687bba23b7c1ccb51f190a8f59b595b39"}, ] [package.dependencies] @@ -1392,6 +1408,17 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] +[[package]] +name = "mimesis" +version = "6.1.1" +description = "Mimesis: Fake Data Generator." +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "mimesis-6.1.1-py3-none-any.whl", hash = "sha256:eabe41d7afa23b01dffb51ebd9e10837df6417fef02fa9841989ca886e479790"}, + {file = "mimesis-6.1.1.tar.gz", hash = "sha256:044ac378c61db0e06832ff722548fd6e604881d36bc938002e0bd5b85eeb6a98"}, +] + [[package]] name = "mypy" version = "1.10.0" @@ -2541,28 +2568,28 @@ pyasn1 = ">=0.1.3" [[package]] name = "ruff" -version = "0.4.6" +version = "0.4.7" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.4.6-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ef995583a038cd4a7edf1422c9e19118e2511b8ba0b015861b4abd26ec5367c5"}, - {file = "ruff-0.4.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:602ebd7ad909eab6e7da65d3c091547781bb06f5f826974a53dbe563d357e53c"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f9ced5cbb7510fd7525448eeb204e0a22cabb6e99a3cb160272262817d49786"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04a80acfc862e0e1630c8b738e70dcca03f350bad9e106968a8108379e12b31f"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be47700ecb004dfa3fd4dcdddf7322d4e632de3c06cd05329d69c45c0280e618"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:1ff930d6e05f444090a0139e4e13e1e2e1f02bd51bb4547734823c760c621e79"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f13410aabd3b5776f9c5699f42b37a3a348d65498c4310589bc6e5c548dc8a2f"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0cf5cc02d3ae52dfb0c8a946eb7a1d6ffe4d91846ffc8ce388baa8f627e3bd50"}, - {file = "ruff-0.4.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea3424793c29906407e3cf417f28fc33f689dacbbadfb52b7e9a809dd535dcef"}, - {file = "ruff-0.4.6-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1fa8561489fadf483ffbb091ea94b9c39a00ed63efacd426aae2f197a45e67fc"}, - {file = "ruff-0.4.6-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:4d5b914818d8047270308fe3e85d9d7f4a31ec86c6475c9f418fbd1624d198e0"}, - {file = "ruff-0.4.6-py3-none-musllinux_1_2_i686.whl", hash = "sha256:4f02284335c766678778475e7698b7ab83abaf2f9ff0554a07b6f28df3b5c259"}, - {file = "ruff-0.4.6-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3a6a0a4f4b5f54fff7c860010ab3dd81425445e37d35701a965c0248819dde7a"}, - {file = "ruff-0.4.6-py3-none-win32.whl", hash = "sha256:9018bf59b3aa8ad4fba2b1dc0299a6e4e60a4c3bc62bbeaea222679865453062"}, - {file = "ruff-0.4.6-py3-none-win_amd64.whl", hash = "sha256:a769ae07ac74ff1a019d6bd529426427c3e30d75bdf1e08bb3d46ac8f417326a"}, - {file = "ruff-0.4.6-py3-none-win_arm64.whl", hash = "sha256:735a16407a1a8f58e4c5b913ad6102722e80b562dd17acb88887685ff6f20cf6"}, - {file = "ruff-0.4.6.tar.gz", hash = "sha256:a797a87da50603f71e6d0765282098245aca6e3b94b7c17473115167d8dfb0b7"}, + {file = "ruff-0.4.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e089371c67892a73b6bb1525608e89a2aca1b77b5440acf7a71dda5dac958f9e"}, + {file = "ruff-0.4.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:10f973d521d910e5f9c72ab27e409e839089f955be8a4c8826601a6323a89753"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59c3d110970001dfa494bcd95478e62286c751126dfb15c3c46e7915fc49694f"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa9773c6c00f4958f73b317bc0fd125295110c3776089f6ef318f4b775f0abe4"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07fc80bbb61e42b3b23b10fda6a2a0f5a067f810180a3760c5ef1b456c21b9db"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:fa4dafe3fe66d90e2e2b63fa1591dd6e3f090ca2128daa0be33db894e6c18648"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7c0083febdec17571455903b184a10026603a1de078428ba155e7ce9358c5f6"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad1b20e66a44057c326168437d680a2166c177c939346b19c0d6b08a62a37589"}, + {file = "ruff-0.4.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cbf5d818553add7511c38b05532d94a407f499d1a76ebb0cad0374e32bc67202"}, + {file = "ruff-0.4.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:50e9651578b629baec3d1513b2534de0ac7ed7753e1382272b8d609997e27e83"}, + {file = "ruff-0.4.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8874a9df7766cb956b218a0a239e0a5d23d9e843e4da1e113ae1d27ee420877a"}, + {file = "ruff-0.4.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b9de9a6e49f7d529decd09381c0860c3f82fa0b0ea00ea78409b785d2308a567"}, + {file = "ruff-0.4.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:13a1768b0691619822ae6d446132dbdfd568b700ecd3652b20d4e8bc1e498f78"}, + {file = "ruff-0.4.7-py3-none-win32.whl", hash = "sha256:769e5a51df61e07e887b81e6f039e7ed3573316ab7dd9f635c5afaa310e4030e"}, + {file = "ruff-0.4.7-py3-none-win_amd64.whl", hash = "sha256:9e3ab684ad403a9ed1226894c32c3ab9c2e0718440f6f50c7c5829932bc9e054"}, + {file = "ruff-0.4.7-py3-none-win_arm64.whl", hash = "sha256:10f2204b9a613988e3484194c2c9e96a22079206b22b787605c255f130db5ed7"}, + {file = "ruff-0.4.7.tar.gz", hash = "sha256:2331d2b051dc77a289a653fcc6a42cce357087c5975738157cd966590b18b5e1"}, ] [[package]] @@ -2884,13 +2911,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.12.0" +version = "4.12.1" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, - {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, + {file = "typing_extensions-4.12.1-py3-none-any.whl", hash = "sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a"}, + {file = "typing_extensions-4.12.1.tar.gz", hash = "sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1"}, ] [[package]] @@ -3055,4 +3082,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "3bb2e370643ccc7d80bf811e6f937eecea4426b203392103c6d035708065a102" +content-hash = "e95824292e51cbc6f4756733ed23c2414ec5d68eece43e0d326ea2d9b1700f10" diff --git a/pyproject.toml b/pyproject.toml index f7b5ea016..a867f3e21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ enable = true [tool.poetry.dependencies] python = ">=3.9,<4.0" -airbyte-cdk = "0.85.0" +airbyte-cdk = "^1.1.3" duckdb = "0.9.2" # TODO: Change to "^0.10.0" once supported by MotherDuck duckdb-engine = "0.9.2" # TODO: Change to "^0.10.0" once supported by MotherDuck google-auth = ">=2.27.0,<3.0" @@ -58,7 +58,7 @@ ruff = "^0.4.1" types-jsonschema = "^4.20.0.0" types-requests = "2.31.0.4" freezegun = "^1.4.0" -# airbyte-source-faker = { version = "^6.1.1", python = "<3.12" } +airbyte-source-faker = { version = "^6.1.2", python = "<3.12" } tomli = "^2.0" responses = "^0.25.0" airbyte-source-pokeapi = "^0.2.0" From b1c5c7196ae225f975f40c50b2e07a1fb22f3df4 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Sat, 1 Jun 2024 12:47:55 -0700 Subject: [PATCH 21/35] add integration tests for all hard-coded failures --- .../test_lowcode_connectors.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 tests/integration_tests/test_lowcode_connectors.py diff --git a/tests/integration_tests/test_lowcode_connectors.py b/tests/integration_tests/test_lowcode_connectors.py new file mode 100644 index 000000000..c20208236 --- /dev/null +++ b/tests/integration_tests/test_lowcode_connectors.py @@ -0,0 +1,78 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +from __future__ import annotations + +from pathlib import Path + +import airbyte +import jsonschema +import pytest +from airbyte import exceptions as exc +from airbyte.experimental import get_source +from airbyte.sources.registry import ( + _LOWCODE_CONNECTORS_404, + _LOWCODE_CONNECTORS_FAILING_VALIDATION, + _LOWCODE_CONNECTORS_NEEDING_PYTHON, +) + +UNIT_TEST_DB_PATH: Path = Path(".cache") / "unit_tests" / "test_db.duckdb" + + +@pytest.mark.parametrize( + "connector_name", + airbyte.get_available_connectors(install_type="yaml"), +) +def test_nocode_connectors_setup(connector_name: str) -> None: + """Test that all connectors can be initialized. + + If a specific connector fails to initialize, it should be added to the + hardcoded failure list (tested below). + """ + try: + _ = get_source( + name=connector_name, + source_manifest=True, + ) + except Exception as ex: + raise AssertionError( + f"Expected '{connector_name}' init success but got '{type(ex).__name__}': {ex}" + ) + + +@pytest.mark.parametrize( + "failure_group, exception_type", + [ + (_LOWCODE_CONNECTORS_FAILING_VALIDATION, jsonschema.exceptions.ValidationError), + (_LOWCODE_CONNECTORS_NEEDING_PYTHON, exc.AirbyteConnectorInstallationError), + (_LOWCODE_CONNECTORS_404, Exception), + ], +) +def test_expected_hardcoded_failures( + failure_group, + exception_type: str, +) -> None: + """Test that hardcoded failure groups are failing as expected. + + If a connector starts passing, this is probably good news, and it should be removed from the + hardcoded failure list. + """ + for connector_name in failure_group: + try: + _ = get_source( + name=connector_name, + source_manifest=True, + ) + except Exception as ex: + if isinstance(ex, exception_type): + pass + else: + raise AssertionError( + f"Expected connector {connector_name} to fail with" + f" '{exception_type}' but got '{type(ex).__name__}'. " + ) + else: + raise AssertionError( + f"Expected connector {connector_name} to fail with" + f" '{exception_type}' but got no exception. " + "This probably means you need to remove this connector from the" + " hardcoded failure list." + ) From 8e28bfc85ef134a0ec4dcfc461acb81379287894 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Sat, 1 Jun 2024 12:48:08 -0700 Subject: [PATCH 22/35] remove stale failures list --- airbyte/sources/registry.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/airbyte/sources/registry.py b/airbyte/sources/registry.py index 3fb3cdb6d..c1be19530 100644 --- a/airbyte/sources/registry.py +++ b/airbyte/sources/registry.py @@ -80,11 +80,6 @@ ] _LOWCODE_CONNECTORS_FAILING_VALIDATION = [ "source-amazon-ads", - "source-senseforce", - "source-shortio", - "source-smaily", - "source-vantage", - "source-woocommerce", ] _LOWCODE_CONNECTORS_404 = [ "source-unleash", From 82baf690f24bfd65e1b93ac9dc0d126eb9333cf5 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Sat, 1 Jun 2024 12:52:47 -0700 Subject: [PATCH 23/35] tests: only autouse source registry in specific files --- tests/conftest.py | 2 +- tests/integration_tests/test_source_test_fixture.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0e176b24f..c87596398 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -225,7 +225,7 @@ def new_postgres_cache(): postgres.remove() -@pytest.fixture(autouse=True) +@pytest.fixture(autouse=False) def source_test_registry(monkeypatch): """ Set environment variables for the test source. diff --git a/tests/integration_tests/test_source_test_fixture.py b/tests/integration_tests/test_source_test_fixture.py index bbb568490..f0e7628c6 100644 --- a/tests/integration_tests/test_source_test_fixture.py +++ b/tests/integration_tests/test_source_test_fixture.py @@ -27,6 +27,11 @@ from sqlalchemy import column, text +@pytest.fixture(scope="function", autouse=True) +def autouse_source_test_registry(source_test_registry): + return + + def pop_internal_columns_from_dataset( dataset: datasets.DatasetBase | list[dict[str, Any]], /, From e6b265531daf994d962b10e535d3497a546d01e5 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Sat, 1 Jun 2024 12:57:01 -0700 Subject: [PATCH 24/35] lint fixes --- airbyte/__init__.py | 1 + airbyte/_processors/sql/__init__.py | 1 + airbyte/caches/__init__.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/airbyte/__init__.py b/airbyte/__init__.py index 3a739ae8c..b4f6acea6 100644 --- a/airbyte/__init__.py +++ b/airbyte/__init__.py @@ -16,6 +16,7 @@ documents, exceptions, # noqa: ICN001 # No 'exc' alias for top-level module experimental, + records, results, secrets, sources, diff --git a/airbyte/_processors/sql/__init__.py b/airbyte/_processors/sql/__init__.py index 980745fe1..cf1d02f27 100644 --- a/airbyte/_processors/sql/__init__.py +++ b/airbyte/_processors/sql/__init__.py @@ -3,6 +3,7 @@ from __future__ import annotations +from airbyte._processors.sql import snowflakecortex from airbyte._processors.sql.snowflakecortex import ( SnowflakeCortexSqlProcessor, SnowflakeCortexTypeConverter, diff --git a/airbyte/caches/__init__.py b/airbyte/caches/__init__.py index 14e714bdd..ed6ab6833 100644 --- a/airbyte/caches/__init__.py +++ b/airbyte/caches/__init__.py @@ -3,7 +3,7 @@ from __future__ import annotations -from airbyte.caches import bigquery, duckdb, motherduck, postgres, snowflake, util +from airbyte.caches import base, bigquery, duckdb, motherduck, postgres, snowflake, util from airbyte.caches.base import CacheBase from airbyte.caches.bigquery import BigQueryCache from airbyte.caches.duckdb import DuckDBCache From 40cae573e341a1bea890ad5f9959b5836d15d26e Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Sat, 1 Jun 2024 12:57:06 -0700 Subject: [PATCH 25/35] fix autouse --- tests/integration_tests/test_source_test_fixture.py | 5 ----- tests/unit_tests/test_anonymous_usage_stats.py | 11 +++++++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/integration_tests/test_source_test_fixture.py b/tests/integration_tests/test_source_test_fixture.py index f0e7628c6..0f279e00d 100644 --- a/tests/integration_tests/test_source_test_fixture.py +++ b/tests/integration_tests/test_source_test_fixture.py @@ -98,11 +98,6 @@ def autouse_source_test_installation(source_test_installation): return -@pytest.fixture(scope="function", autouse=True) -def autouse_source_test_registry(source_test_registry): - return - - @pytest.fixture def source_test(source_test_env) -> ab.Source: return ab.get_source("source-test", config={"apiKey": "test"}) diff --git a/tests/unit_tests/test_anonymous_usage_stats.py b/tests/unit_tests/test_anonymous_usage_stats.py index e29b10d8b..83256c36a 100644 --- a/tests/unit_tests/test_anonymous_usage_stats.py +++ b/tests/unit_tests/test_anonymous_usage_stats.py @@ -2,18 +2,21 @@ from __future__ import annotations import json -from pathlib import Path import re +from pathlib import Path from unittest.mock import MagicMock -import responses - import airbyte as ab import pytest - +import responses from airbyte._util import telemetry +@pytest.fixture(scope="function", autouse=True) +def autouse_source_test_registry(source_test_registry): + return + + @responses.activate def test_telemetry_track(monkeypatch): """Check that track is called and the correct data is sent.""" From f0183e02a210a8cc98cc22b45acfc4b93f051fad Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Sun, 2 Jun 2024 16:26:38 -0700 Subject: [PATCH 26/35] fix some tests --- tests/integration_tests/test_source_test_fixture.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration_tests/test_source_test_fixture.py b/tests/integration_tests/test_source_test_fixture.py index 0f279e00d..fbabc839b 100644 --- a/tests/integration_tests/test_source_test_fixture.py +++ b/tests/integration_tests/test_source_test_fixture.py @@ -218,6 +218,8 @@ def test_version_enforcement( name="source-test", latest_available_version=latest_available_version, pypi_package_name="airbyte-source-test", + language=registry.Language.PYTHON, + install_types={registry.InstallType.PYTHON, registry.InstallType.DOCKER}, ) # We need to initialize the cache before we can patch it. From 7623619ac768043d0d50d670f0cf35dee1a17dee Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Sun, 2 Jun 2024 16:32:45 -0700 Subject: [PATCH 27/35] add missing fixture inclusions --- tests/unit_tests/test_anonymous_usage_stats.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/unit_tests/test_anonymous_usage_stats.py b/tests/unit_tests/test_anonymous_usage_stats.py index 83256c36a..cb8558438 100644 --- a/tests/unit_tests/test_anonymous_usage_stats.py +++ b/tests/unit_tests/test_anonymous_usage_stats.py @@ -18,7 +18,7 @@ def autouse_source_test_registry(source_test_registry): @responses.activate -def test_telemetry_track(monkeypatch): +def test_telemetry_track(monkeypatch, source_test_registry): """Check that track is called and the correct data is sent.""" monkeypatch.delenv("DO_NOT_TRACK", raising=False) @@ -72,7 +72,11 @@ def test_telemetry_track(monkeypatch): @pytest.mark.parametrize("do_not_track", ["1", "true", "t"]) @responses.activate -def test_do_not_track(monkeypatch, do_not_track): +def test_do_not_track( + monkeypatch, + do_not_track, + source_test_registry, +): """Check that track is called and the correct data is sent.""" monkeypatch.setenv("DO_NOT_TRACK", do_not_track) From b5d0924c0510afe10ffe75f6b5bfb8f4e4fcac5f Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 3 Jun 2024 12:11:16 -0700 Subject: [PATCH 28/35] improve fixture isolation for 'source_test_registry' --- tests/conftest.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index c87596398..845166edd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ import warnings from pathlib import Path +import airbyte import docker import psycopg2 as psycopg import pytest @@ -228,18 +229,28 @@ def new_postgres_cache(): @pytest.fixture(autouse=False) def source_test_registry(monkeypatch): """ - Set environment variables for the test source. - - These are applied to this test file only. + Mock the registry to return our custom registry containing the 'source-test' connector. This means the normal registry is not usable. Expect AirbyteConnectorNotRegisteredError for other connectors. """ - env_vars = { - "AIRBYTE_LOCAL_REGISTRY": LOCAL_TEST_REGISTRY_URL, - } - for key, value in env_vars.items(): - monkeypatch.setenv(key, value) + + # Define the mock function + def mock_get_registry_cache(): + return LOCAL_TEST_REGISTRY_URL + + # Replace _get_registry_url() with the mock function + monkeypatch.setattr( + airbyte.sources.registry, "_get_registry_url", mock_get_registry_cache + ) + + # reset the registry cache + airbyte.sources.registry.__cache = None + + yield + + # reset the registry cache (clean up) + airbyte.sources.registry.__cache = None @pytest.fixture(autouse=True) From 4376af1d9140e537c18017a3cb517bf7ac1f61b3 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 3 Jun 2024 12:17:18 -0700 Subject: [PATCH 29/35] chore: improve error message --- airbyte/sources/util.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/airbyte/sources/util.py b/airbyte/sources/util.py index dd28fc031..f87f83031 100644 --- a/airbyte/sources/util.py +++ b/airbyte/sources/util.py @@ -200,8 +200,10 @@ def _get_source( # noqa: PLR0912, PLR0913, PLR0915 # Too complex if "class_name:" in response.text: raise exc.AirbyteConnectorInstallationError( message=( - "The provided manifest requires additional code files" - "and is not compatible with the declarative no-code executor." + "The provided manifest requires additional code files (`class_name` key " + "detected). This feature is not compatible with the declarative YAML " + "executor. To use this executor, please try again with the Python " + "executor." ), connector_name=name, context={ From b178d333bfb6156d31cab2a561ce4aaba01f5b62 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 3 Jun 2024 12:18:55 -0700 Subject: [PATCH 30/35] fix: add new bamboo connector to exception list --- airbyte/sources/registry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte/sources/registry.py b/airbyte/sources/registry.py index c1be19530..fd068df97 100644 --- a/airbyte/sources/registry.py +++ b/airbyte/sources/registry.py @@ -27,6 +27,7 @@ "source-amplitude", "source-apify-dataset", "source-avni", + "source-bamboo-hr", "source-braintree", "source-braze", "source-chargebee", From 6753854ba356d63caede9ce8c241220d9065dbbf Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 3 Jun 2024 12:38:10 -0700 Subject: [PATCH 31/35] use enum values directly --- airbyte/sources/registry.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte/sources/registry.py b/airbyte/sources/registry.py index fd068df97..5e5410dc3 100644 --- a/airbyte/sources/registry.py +++ b/airbyte/sources/registry.py @@ -139,12 +139,12 @@ def _registry_entry_to_connector_metadata(entry: dict) -> ConnectorMetadata: pypi_package_name: str = pypi_registry.get("packageName", None) pypi_enabled: bool = pypi_registry.get("enabled", False) install_types: set[InstallType] = { - InstallType(x) + x for x in [ - "docker" if entry.get("dockerImageTag") else None, - "python" if pypi_enabled else None, - "java" if language == "java" else None, - "yaml" if "cdk:low-code" in entry.get("tags", []) else None, + InstallType.DOCKER if entry.get("dockerImageTag") else None, + InstallType.PYTHON if pypi_enabled else None, + InstallType.JAVA if language == "java" else None, + InstallType.YAML if "cdk:low-code" in entry.get("tags", []) else None, ] if x } From 83ead8ed2a907ee24192e8fd3f7dca90ebf6cf15 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 3 Jun 2024 12:55:12 -0700 Subject: [PATCH 32/35] make registry parsing safer and more resilient --- airbyte/sources/registry.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/airbyte/sources/registry.py b/airbyte/sources/registry.py index 5e5410dc3..89f40d09f 100644 --- a/airbyte/sources/registry.py +++ b/airbyte/sources/registry.py @@ -4,6 +4,7 @@ import json import os import warnings +from contextlib import suppress from copy import copy from dataclasses import dataclass from enum import Enum @@ -22,6 +23,8 @@ _REGISTRY_ENV_VAR = "AIRBYTE_LOCAL_REGISTRY" _REGISTRY_URL = "https://connectors.airbyte.com/files/registries/v0/oss_registry.json" +_LOWCODE_LABEL = "cdk:low-code" + _LOWCODE_CONNECTORS_NEEDING_PYTHON = [ "source-alpha-vantage", "source-amplitude", @@ -133,7 +136,15 @@ def _get_registry_url() -> str: def _registry_entry_to_connector_metadata(entry: dict) -> ConnectorMetadata: name = entry["dockerRepository"].replace("airbyte/", "") - language = cast(Union[str, None], entry.get("language", None)) + language: Language | None = None + if "language" in entry and entry["language"] is not None: + try: + language = Language(entry["language"]) + except Exception: + warnings.warn( + message=f"Invalid language for connector {name}: {entry['language']}", + stacklevel=2, + ) remote_registries: dict = entry.get("remoteRegistries", {}) pypi_registry: dict = remote_registries.get("pypi", {}) pypi_package_name: str = pypi_registry.get("packageName", None) @@ -143,16 +154,17 @@ def _registry_entry_to_connector_metadata(entry: dict) -> ConnectorMetadata: for x in [ InstallType.DOCKER if entry.get("dockerImageTag") else None, InstallType.PYTHON if pypi_enabled else None, - InstallType.JAVA if language == "java" else None, - InstallType.YAML if "cdk:low-code" in entry.get("tags", []) else None, + InstallType.JAVA if language == Language.JAVA else None, + InstallType.YAML if _LOWCODE_LABEL in entry.get("tags", []) else None, ] if x } + return ConnectorMetadata( name=name, - latest_available_version=entry["dockerImageTag"], + latest_available_version=entry.get("dockerImageTag", None), pypi_package_name=pypi_package_name if pypi_enabled else None, - language=Language(language) if language else None, + language=language, install_types=install_types, ) From 836e00f26d9f85ad692282bc928daf5662515d96 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 3 Jun 2024 13:39:45 -0700 Subject: [PATCH 33/35] bump to latest airbyte-cdk --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7e4cb2ff8..d47070b41 100644 --- a/poetry.lock +++ b/poetry.lock @@ -32,13 +32,13 @@ dev = ["pylint (==3.1.0)"] [[package]] name = "airbyte-cdk" -version = "1.2.0" +version = "1.2.1" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "airbyte_cdk-1.2.0-py3-none-any.whl", hash = "sha256:c206858aa7ecc6ea253f8b55ba71c1a98f8501f0c533850ec2656a9613cf77d0"}, - {file = "airbyte_cdk-1.2.0.tar.gz", hash = "sha256:111ea617cbcd786340b9cc0ce003b33ee268414f437f90274ea2b46e96aa2ea0"}, + {file = "airbyte_cdk-1.2.1-py3-none-any.whl", hash = "sha256:ca60ae569cdb8360daac2f428efb52591a34fb959ab48498a6996788ade8af24"}, + {file = "airbyte_cdk-1.2.1.tar.gz", hash = "sha256:da958afe1a08701a5db47786f865aa889003f77d4863e60384a363845ee78042"}, ] [package.dependencies] @@ -3093,4 +3093,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "9760c471339d38ed0b5a1a9652d3fa85a3e3337f04cf7b823bbc45a850ecdbc3" +content-hash = "09d2f91cad4deb7bb2c1b4cc87375b69d7792a4fa324de8d04d67256d47f7f47" diff --git a/pyproject.toml b/pyproject.toml index 89bbd1605..9f3cb17ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ enable = true [tool.poetry.dependencies] python = ">=3.9,<4.0" -airbyte-cdk = "^1.1.3" +airbyte-cdk = "^1.2.1" duckdb = "^1.0.0" duckdb-engine = "^0.13.0" google-auth = ">=2.27.0,<3.0" From b34c45daf10425f7655f325d99393bfc73b01469 Mon Sep 17 00:00:00 2001 From: octavia-squidington-iii Date: Mon, 3 Jun 2024 20:41:49 +0000 Subject: [PATCH 34/35] Auto-fix lint and format issues --- airbyte/sources/registry.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/airbyte/sources/registry.py b/airbyte/sources/registry.py index 89f40d09f..8df8cd60e 100644 --- a/airbyte/sources/registry.py +++ b/airbyte/sources/registry.py @@ -4,12 +4,10 @@ import json import os import warnings -from contextlib import suppress from copy import copy from dataclasses import dataclass from enum import Enum from pathlib import Path -from typing import Union, cast import requests From 8b095222dc9655333f368e2779b44be8797143bb Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 3 Jun 2024 15:54:16 -0700 Subject: [PATCH 35/35] tests: xfail pokemon no-code on windows --- tests/unit_tests/test_lowcode_connectors.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit_tests/test_lowcode_connectors.py b/tests/unit_tests/test_lowcode_connectors.py index 8abe0a9a4..0147b7216 100644 --- a/tests/unit_tests/test_lowcode_connectors.py +++ b/tests/unit_tests/test_lowcode_connectors.py @@ -4,6 +4,7 @@ from pathlib import Path import pytest +from airbyte._util.meta import is_windows from airbyte.experimental import get_source UNIT_TEST_DB_PATH: Path = Path(".cache") / "unit_tests" / "test_db.duckdb" @@ -15,6 +16,7 @@ ("source-pokeapi", {"pokemon_name": "ditto"}), ], ) +@pytest.mark.xfail(condition=is_windows(), reason="Test expected to fail on Windows.") def test_nocode_execution(connector_name: str, config: dict) -> None: source = get_source( name=connector_name,