Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .github/workflows/python_dependency_analysis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Dependency Analysis

on:
push:
branches:
- main
paths:
- "airbyte_cdk/**"
- "poetry.lock"
- "pyproject.toml"
pull_request:
paths:
- "airbyte_cdk/**"
- "poetry.lock"
- "pyproject.toml"

jobs:
dependency-analysis:
name: Dependency Analysis with Deptry
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Set up Poetry
uses: Gr1N/setup-poetry@v9
with:
poetry-version: "2.0.1"
- name: Install dependencies
run: poetry install --all-extras

# Job-specific step(s):
- name: Run Deptry
run: |
poetry run deptry .
97 changes: 93 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

77 changes: 67 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,31 +40,33 @@ isodate = "~0.6.1"
Jinja2 = "~3.1.2"
jsonref = "~0.2"
jsonschema = "~4.17.3" # 4.18 has some significant breaking changes: https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0
packaging = "*" # Transitive dependency used directly in code
pandas = "2.2.2"
psutil = "6.1.0"
psutil = "6.1.0" # TODO: Remove if unused
pydantic = "^2.7"
pyrate-limiter = "~3.1.0"
python-dateutil = "^2.9.0"
python-ulid = "^3.0.0"
PyYAML = "^6.0.1"
rapidfuzz = "^3.10.1"
rapidfuzz = "^3.10.1" # TODO: Remove if unused
requests = "*"
requests_cache = "*"
typing-extensions = "*" # Transitive dependency used directly in code
wcmatch = "10.0"
# Extras depedencies
avro = { version = ">=1.11.2,<1.13.0", optional = true }
avro = { version = ">=1.11.2,<1.13.0", optional = true } # TODO: Move into dev dependencies if only used in tests
cohere = { version = "4.21", optional = true }
fastavro = { version = "~1.8.0", optional = true }
langchain = { version = "0.1.16", optional = true }
langchain_core = { version = "0.1.42", optional = true }
markdown = { version = "*", optional = true }
openai = { version = "0.27.9", extras = ["embeddings"], optional = true }
markdown = { version = "*", optional = true } # TODO: Remove if unused
openai = { version = "0.27.9", extras = ["embeddings"], optional = true } # Used indirectly by langchain library
pdf2image = { version = "1.16.3", optional = true }
"pdfminer.six" = { version = "20221105", optional = true }
"pdfminer.six" = { version = "20221105", optional = true } # Used indirectly by unstructured library
pyarrow = { version = "^19.0.0", optional = true }
pytesseract = { version = "0.3.10", optional = true }
python-calamine = { version = "0.2.3", optional = true }
python-snappy = { version = "0.7.3", optional = true }
pytesseract = { version = "0.3.10", optional = true } # Used indirectly by unstructured library
python-calamine = { version = "0.2.3", optional = true } # TODO: Remove if unused
python-snappy = { version = "0.7.3", optional = true } # TODO: remove if unused
tiktoken = { version = "0.8.0", optional = true }
nltk = { version = "3.9.1", optional = true }
# This will ensure that even when you run poetry install or pip install, the compatible version of numpy will always be chosen.
Expand All @@ -73,7 +75,7 @@ numpy = "<2"
unstructured = { version = "0.10.27", extras = ["docx", "pptx"], optional = true }
"unstructured.pytesseract" = { version = ">=0.3.12", optional = true }
pyjwt = "^2.8.0"
cryptography = ">=44.0.0,<45.0.0"
cryptography = ">=44.0.0,<45.0.0" # Constrained as transitive dependency due to a bug in newer versions
pytz = "2024.2"
orjson = "^3.10.7"
serpyco-rs = "^1.10.2"
Expand Down Expand Up @@ -102,6 +104,7 @@ types-requests = "^2.32.0.20241016"
types-python-dateutil = "^2.9.0.20241003"
types-pyyaml = "^6.0.12.20240917"
types-cachetools = "^5.5.0.20240820"
deptry = "^0.23.0"

[tool.poetry.extras]
file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy"]
Expand Down Expand Up @@ -196,3 +199,57 @@ optional_poetry_groups = ["dev"]
poetry_extras = ["file-based", "vector-db-based"]
poe_tasks = ["check-ci"]
mount_docker_socket = true

[tool.deptry]
exclude = [
"bin",
"docs",
"unit_tests",
]

[tool.deptry.per_rule_ignores]
# This is a mapping of rules and package names to be ignored for that rule.

# DEP001: Project should not contain missing dependencies.
# https://deptry.com/rules-violations/#missing-dependencies-dep001
DEP001 = [
# These are imported but not declared:
"source_declarative_manifest" # Imported only in dynamic import tests, not in main code
]

# DEP002: Project should not contain unused dependencies.
# https://deptry.com/rules-violations/#missing-dependencies-dep002
DEP002 = [
"cryptography", # Constrained as transitive dependency due to a bug in newer versions

# TODO: Remove these dependencies if not needed:
"avro", # Only imported in `unit_tests` code
"psutil",
"rapidfuzz",
Comment thread
aaronsteers marked this conversation as resolved.
"cohere",
"markdown",
"openai",
"pdf2image",
"pdfminer.six",
"pytesseract",
"python-calamine",
"python-snappy",
"tiktoken",
"unstructured.pytesseract",
]

# DEP003: Project should not use transitive dependencies.
# https://deptry.com/rules-violations/#transitive-dependencies-dep003
DEP003 = [
# Transitive dependencies that are imported directly
"pydantic_core" # Pydantic internals, no need to define as separate dependency
]

# DEP004: Project should not use development dependencies in non-development code.
# https://deptry.com/rules-violations/#misplaced-development-dependencies-dep004
DEP004 = [
# The `airbyte_cdk.test.utils` module is main code.
# TODO: These should probably be declared within a `tests` extra:
"pytest",
"requests_mock",
]