Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- **Protocol DTO serialization:** `core.protocol_dto` base dataclasses (`TrackerResultDataclass`, `IncrementalStateDataclass`, `ActivityRecordDataclass`) provide canonical `asdict()`, `to_json()`, `from_dict()`, and truncated `__repr__` on all tracker `protocol_impl` frozen dataclasses; `core.collectors.GenericActivityRecord` added for the default `ActivityRecord` implementation.
- **Stability policy** ([`STABILITY.md`](STABILITY.md)): documents stable vs evolving vs unstable interfaces for production and contributors; README links to it.
- **`core.adapters`:** stable adapter protocols and implementations for Pinecone (`PineconeAdapter`), Slack Web API (`SlackWebApiAdapter`), and GitHub REST/GraphQL (`GitHubApiAdapter`). The `pinecone` SDK is imported only from `core/adapters/pinecone.py`; `cppa_pinecone_sync.ingestion` uses `PineconeClientProtocol` with injectable fakes for tests.

Expand All @@ -20,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed

- **core.collectors:** `BaseCollectorCommand` logs `result_repr` and `result_json` in structured `extra` when the run result is a `TrackerResultDataclass` subclass.
- **core.collectors:** `AbstractCollector.last_result` is set only after `post_collect()` completes successfully (including default incremental checkpoint persistence), matching the documented “most recent successful run” semantics.
- **discord_activity_tracker:** `backfill_discord_activity_tracker` reports per-file import failures on `DiscordCollectionTrackerResult` (`success=False`, `errors`, `failed_files` count) instead of always returning `success=True`.
- **core.protocols / ActivityRecord:** `occurred_at` is timezone-aware UTC `datetime | None`; `source_system` is `SourceSystem` (`StrEnum`); `activity_type` is branded `ActivityType`; `actor_external_id` is `ActorExternalId` (`NewType`). Legacy string payloads use `core.activity_types.migrate_legacy_activity_fields` and `activity_record_to_legacy_dict` on GitHub/Discord `protocol_impl` dataclasses.
Expand Down
18 changes: 5 additions & 13 deletions boost_library_docs_tracker/protocol_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,14 @@

from __future__ import annotations

from dataclasses import dataclass, field
from types import MappingProxyType
from typing import Mapping
from dataclasses import dataclass

from core.protocol_dto import TrackerResultDataclass

@dataclass(frozen=True)
class LibraryDocsTrackerResult:
"""Structured :class:`~core.protocols.TrackerResult` for docs scrape runs."""

success: bool
counts: Mapping[str, int]
errors: tuple[str, ...] = field(default_factory=tuple)
duration_seconds: float | None = None

def __post_init__(self) -> None:
object.__setattr__(self, "counts", MappingProxyType(dict(self.counts)))
@dataclass(frozen=True, repr=False)
class LibraryDocsTrackerResult(TrackerResultDataclass):
"""Structured :class:`~core.protocols.TrackerResult` for docs scrape runs."""

@classmethod
def from_run(
Expand Down
18 changes: 5 additions & 13 deletions boost_library_tracker/protocol_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,14 @@

from __future__ import annotations

from dataclasses import dataclass, field
from types import MappingProxyType
from typing import Mapping
from dataclasses import dataclass

from core.protocol_dto import TrackerResultDataclass

@dataclass(frozen=True)
class CollectBoostLibrariesResult:
"""Structured :class:`~core.protocols.TrackerResult` for library metadata collection."""

success: bool
counts: Mapping[str, int]
errors: tuple[str, ...] = field(default_factory=tuple)
duration_seconds: float | None = None

def __post_init__(self) -> None:
object.__setattr__(self, "counts", MappingProxyType(dict(self.counts)))
@dataclass(frozen=True, repr=False)
class CollectBoostLibrariesResult(TrackerResultDataclass):
"""Structured :class:`~core.protocols.TrackerResult` for library metadata collection."""

@classmethod
def from_totals(
Expand Down
13 changes: 5 additions & 8 deletions boost_library_usage_dashboard/protocol_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,15 @@

from __future__ import annotations

from dataclasses import dataclass, field
from dataclasses import dataclass
from typing import Any, Mapping

from core.protocol_dto import TrackerResultDataclass

@dataclass(frozen=True)
class UsageDashboardTrackerResult:
"""Structured :class:`~core.protocols.TrackerResult` for dashboard runs."""

success: bool
counts: Mapping[str, int]
errors: tuple[str, ...] = field(default_factory=tuple)
duration_seconds: float | None = None
@dataclass(frozen=True, repr=False)
class UsageDashboardTrackerResult(TrackerResultDataclass):
"""Structured :class:`~core.protocols.TrackerResult` for dashboard runs."""

@classmethod
def from_stats(cls, stats: Mapping[str, Any] | None) -> UsageDashboardTrackerResult:
Expand Down
22 changes: 7 additions & 15 deletions boost_mailing_list_tracker/protocol_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,14 @@

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Any, Mapping
from dataclasses import dataclass

from core.protocol_dto import IncrementalStateDataclass, TrackerResultDataclass

@dataclass(frozen=True)
class MailingListTrackerResult:
"""Structured :class:`~core.protocols.TrackerResult` for mailing list runs."""

success: bool
counts: Mapping[str, int]
errors: tuple[str, ...] = field(default_factory=tuple)
duration_seconds: float | None = None
@dataclass(frozen=True, repr=False)
class MailingListTrackerResult(TrackerResultDataclass):
"""Structured :class:`~core.protocols.TrackerResult` for mailing list runs."""

@classmethod
def from_run(
Expand All @@ -35,14 +31,10 @@ def from_run(
)


@dataclass(frozen=True)
class MailingListIncrementalState:
@dataclass(frozen=True, repr=False)
class MailingListIncrementalState(IncrementalStateDataclass):
"""Checkpoint between mailing list runs."""

checkpoint_token: str | None
human_readable_marker: str | None
extras: Mapping[str, Any] = field(default_factory=dict)

@classmethod
def from_start_date(cls, start_date: str | None) -> MailingListIncrementalState:
return cls(
Expand Down
22 changes: 7 additions & 15 deletions clang_github_tracker/protocol_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,15 @@

from __future__ import annotations

from dataclasses import dataclass, field
from dataclasses import dataclass
from datetime import datetime
from typing import Any, Mapping

from core.protocol_dto import IncrementalStateDataclass, TrackerResultDataclass

@dataclass(frozen=True)
class ClangGithubTrackerResult:
"""Structured :class:`~core.protocols.TrackerResult` for Clang sync outcomes."""

success: bool
counts: Mapping[str, int]
errors: tuple[str, ...] = field(default_factory=tuple)
duration_seconds: float | None = None
@dataclass(frozen=True, repr=False)
class ClangGithubTrackerResult(TrackerResultDataclass):
"""Structured :class:`~core.protocols.TrackerResult` for Clang sync outcomes."""

@classmethod
def from_sync(
Expand All @@ -40,14 +36,10 @@ def dry_run(cls) -> ClangGithubTrackerResult:
return cls(success=True, counts={})


@dataclass(frozen=True)
class ClangGithubIncrementalState:
@dataclass(frozen=True, repr=False)
class ClangGithubIncrementalState(IncrementalStateDataclass):
"""Checkpoint between Clang GitHub runs."""

checkpoint_token: str | None
human_readable_marker: str | None
extras: Mapping[str, Any] = field(default_factory=dict)

@classmethod
def from_watermarks(
cls,
Expand Down
12 changes: 12 additions & 0 deletions core/activity_record.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Shared :class:`~core.protocols.ActivityRecord` implementation for collectors."""

from __future__ import annotations

from dataclasses import dataclass

from core.protocol_dto import ActivityRecordDataclass


@dataclass(frozen=True, repr=False)
class GenericActivityRecord(ActivityRecordDataclass):
"""Default frozen DTO satisfying :class:`~core.protocols.ActivityRecord`."""
5 changes: 3 additions & 2 deletions core/activity_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ def _parse_occurred_at(raw: str | datetime | None) -> datetime | None:
return parse_activity_occurred_at(text)


def _format_occurred_at_z(dt: datetime) -> str:
def format_occurred_at_z(dt: datetime) -> str:
"""Serialize *dt* as timezone-aware UTC ISO-8601 with ``Z`` suffix."""
aware = ensure_activity_occurred_at(dt)
return aware.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")

Expand Down Expand Up @@ -133,7 +134,7 @@ def activity_record_to_legacy_dict(
summary: str,
) -> LegacyActivityRecordDict:
"""Serialize typed activity fields to string-keyed export/bridge JSON."""
occurred_str = _format_occurred_at_z(occurred_at) if occurred_at is not None else ""
occurred_str = format_occurred_at_z(occurred_at) if occurred_at is not None else ""
return LegacyActivityRecordDict(
source_system=source_system.value,
external_id=external_id,
Expand Down
2 changes: 2 additions & 0 deletions core/collectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
)
from core.collectors.command_base import BaseCollectorCommand
from core.errors import CollectorFailureCategory, classify_failure
from core.activity_record import GenericActivityRecord
from core.incremental_state import GenericIncrementalState
from core.tracker_result import GenericTrackerResult

Expand All @@ -14,6 +15,7 @@
"BaseCollectorCommand",
"CollectorFailureCategory",
"CollectorRunnable",
"GenericActivityRecord",
"GenericIncrementalState",
"GenericTrackerResult",
"classify_failure",
Expand Down
24 changes: 15 additions & 9 deletions core/collectors/command_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from django.core.management.base import BaseCommand, CommandError

from core.collectors.base_collector import CollectorRunnable
from core.protocol_dto import TrackerResultDataclass
from core.protocols import TrackerResult

logger = logging.getLogger(__name__)
Expand All @@ -34,23 +35,28 @@ def _log_collector_result(collector: CollectorRunnable, result: TrackerResult) -
if not isinstance(collector_id, str) or not collector_id:
collector_id = collector.__class__.__name__
records = _records_collected(result)
extra: dict[str, Any] = {
"collector": collector_id,
"success": result.success,
"records_collected": records,
"error_count": len(result.errors),
"duration_seconds": result.duration_seconds,
"counts": dict(result.counts),
}
if isinstance(result, TrackerResultDataclass):
extra["result_repr"] = repr(result)
extra["result_json"] = result.to_json()
logger.info(
"Collector finished: collector=%s success=%s records_collected=%s "
"error_count=%s duration_seconds=%s counts=%s",
"error_count=%s duration_seconds=%s counts=%s result=%s",
collector_id,
result.success,
records,
len(result.errors),
result.duration_seconds,
dict(result.counts),
extra={
"collector": collector_id,
"success": result.success,
"records_collected": records,
"error_count": len(result.errors),
"duration_seconds": result.duration_seconds,
"counts": dict(result.counts),
},
extra.get("result_repr", result),
extra=extra,
)


Expand Down
17 changes: 5 additions & 12 deletions core/incremental_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,11 @@

from __future__ import annotations

from dataclasses import dataclass, field
from types import MappingProxyType
from typing import Any, Mapping
from dataclasses import dataclass

from core.protocol_dto import IncrementalStateDataclass

@dataclass(frozen=True)
class GenericIncrementalState:
"""Default frozen DTO satisfying :class:`~core.protocols.IncrementalState`."""

checkpoint_token: str | None
human_readable_marker: str | None
extras: Mapping[str, Any] = field(default_factory=dict)

def __post_init__(self) -> None:
object.__setattr__(self, "extras", MappingProxyType(dict(self.extras)))
@dataclass(frozen=True, repr=False)
class GenericIncrementalState(IncrementalStateDataclass):
"""Default frozen DTO satisfying :class:`~core.protocols.IncrementalState`."""
Loading
Loading