diff --git a/CHANGELOG.md b/CHANGELOG.md index a25f9ac..67601a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- **Protocol DTO serialization:** `core.protocol_dto` base dataclasses (`TrackerResultDataclass`, `IncrementalStateDataclass`, `ActivityRecordDataclass`) provide canonical `asdict()`, `to_json()`, `from_dict()`, and truncated `__repr__` on all tracker `protocol_impl` frozen dataclasses; `core.collectors.GenericActivityRecord` added for the default `ActivityRecord` implementation. - **Stability policy** ([`STABILITY.md`](STABILITY.md)): documents stable vs evolving vs unstable interfaces for production and contributors; README links to it. - **`core.adapters`:** stable adapter protocols and implementations for Pinecone (`PineconeAdapter`), Slack Web API (`SlackWebApiAdapter`), and GitHub REST/GraphQL (`GitHubApiAdapter`). The `pinecone` SDK is imported only from `core/adapters/pinecone.py`; `cppa_pinecone_sync.ingestion` uses `PineconeClientProtocol` with injectable fakes for tests. @@ -20,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- **core.collectors:** `BaseCollectorCommand` logs `result_repr` and `result_json` in structured `extra` when the run result is a `TrackerResultDataclass` subclass. - **core.collectors:** `AbstractCollector.last_result` is set only after `post_collect()` completes successfully (including default incremental checkpoint persistence), matching the documented “most recent successful run” semantics. - **discord_activity_tracker:** `backfill_discord_activity_tracker` reports per-file import failures on `DiscordCollectionTrackerResult` (`success=False`, `errors`, `failed_files` count) instead of always returning `success=True`. - **core.protocols / ActivityRecord:** `occurred_at` is timezone-aware UTC `datetime | None`; `source_system` is `SourceSystem` (`StrEnum`); `activity_type` is branded `ActivityType`; `actor_external_id` is `ActorExternalId` (`NewType`). Legacy string payloads use `core.activity_types.migrate_legacy_activity_fields` and `activity_record_to_legacy_dict` on GitHub/Discord `protocol_impl` dataclasses. diff --git a/boost_library_docs_tracker/protocol_impl.py b/boost_library_docs_tracker/protocol_impl.py index 64e9904..16839f5 100644 --- a/boost_library_docs_tracker/protocol_impl.py +++ b/boost_library_docs_tracker/protocol_impl.py @@ -2,22 +2,14 @@ from __future__ import annotations -from dataclasses import dataclass, field -from types import MappingProxyType -from typing import Mapping +from dataclasses import dataclass +from core.protocol_dto import TrackerResultDataclass -@dataclass(frozen=True) -class LibraryDocsTrackerResult: - """Structured :class:`~core.protocols.TrackerResult` for docs scrape runs.""" - - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None - def __post_init__(self) -> None: - object.__setattr__(self, "counts", MappingProxyType(dict(self.counts))) +@dataclass(frozen=True, repr=False) +class LibraryDocsTrackerResult(TrackerResultDataclass): + """Structured :class:`~core.protocols.TrackerResult` for docs scrape runs.""" @classmethod def from_run( diff --git a/boost_library_tracker/protocol_impl.py b/boost_library_tracker/protocol_impl.py index 73c72c7..07f5e10 100644 --- a/boost_library_tracker/protocol_impl.py +++ b/boost_library_tracker/protocol_impl.py @@ -2,22 +2,14 @@ from __future__ import annotations -from dataclasses import dataclass, field -from types import MappingProxyType -from typing import Mapping +from dataclasses import dataclass +from core.protocol_dto import TrackerResultDataclass -@dataclass(frozen=True) -class CollectBoostLibrariesResult: - """Structured :class:`~core.protocols.TrackerResult` for library metadata collection.""" - - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None - def __post_init__(self) -> None: - object.__setattr__(self, "counts", MappingProxyType(dict(self.counts))) +@dataclass(frozen=True, repr=False) +class CollectBoostLibrariesResult(TrackerResultDataclass): + """Structured :class:`~core.protocols.TrackerResult` for library metadata collection.""" @classmethod def from_totals( diff --git a/boost_library_usage_dashboard/protocol_impl.py b/boost_library_usage_dashboard/protocol_impl.py index 26ba6c9..acbd976 100644 --- a/boost_library_usage_dashboard/protocol_impl.py +++ b/boost_library_usage_dashboard/protocol_impl.py @@ -2,18 +2,15 @@ from __future__ import annotations -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Any, Mapping +from core.protocol_dto import TrackerResultDataclass -@dataclass(frozen=True) -class UsageDashboardTrackerResult: - """Structured :class:`~core.protocols.TrackerResult` for dashboard runs.""" - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None +@dataclass(frozen=True, repr=False) +class UsageDashboardTrackerResult(TrackerResultDataclass): + """Structured :class:`~core.protocols.TrackerResult` for dashboard runs.""" @classmethod def from_stats(cls, stats: Mapping[str, Any] | None) -> UsageDashboardTrackerResult: diff --git a/boost_mailing_list_tracker/protocol_impl.py b/boost_mailing_list_tracker/protocol_impl.py index c43c05b..de23cbc 100644 --- a/boost_mailing_list_tracker/protocol_impl.py +++ b/boost_mailing_list_tracker/protocol_impl.py @@ -2,18 +2,14 @@ from __future__ import annotations -from dataclasses import dataclass, field -from typing import Any, Mapping +from dataclasses import dataclass +from core.protocol_dto import IncrementalStateDataclass, TrackerResultDataclass -@dataclass(frozen=True) -class MailingListTrackerResult: - """Structured :class:`~core.protocols.TrackerResult` for mailing list runs.""" - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None +@dataclass(frozen=True, repr=False) +class MailingListTrackerResult(TrackerResultDataclass): + """Structured :class:`~core.protocols.TrackerResult` for mailing list runs.""" @classmethod def from_run( @@ -35,14 +31,10 @@ def from_run( ) -@dataclass(frozen=True) -class MailingListIncrementalState: +@dataclass(frozen=True, repr=False) +class MailingListIncrementalState(IncrementalStateDataclass): """Checkpoint between mailing list runs.""" - checkpoint_token: str | None - human_readable_marker: str | None - extras: Mapping[str, Any] = field(default_factory=dict) - @classmethod def from_start_date(cls, start_date: str | None) -> MailingListIncrementalState: return cls( diff --git a/clang_github_tracker/protocol_impl.py b/clang_github_tracker/protocol_impl.py index 380d8c8..92ff436 100644 --- a/clang_github_tracker/protocol_impl.py +++ b/clang_github_tracker/protocol_impl.py @@ -2,19 +2,15 @@ from __future__ import annotations -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime -from typing import Any, Mapping +from core.protocol_dto import IncrementalStateDataclass, TrackerResultDataclass -@dataclass(frozen=True) -class ClangGithubTrackerResult: - """Structured :class:`~core.protocols.TrackerResult` for Clang sync outcomes.""" - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None +@dataclass(frozen=True, repr=False) +class ClangGithubTrackerResult(TrackerResultDataclass): + """Structured :class:`~core.protocols.TrackerResult` for Clang sync outcomes.""" @classmethod def from_sync( @@ -40,14 +36,10 @@ def dry_run(cls) -> ClangGithubTrackerResult: return cls(success=True, counts={}) -@dataclass(frozen=True) -class ClangGithubIncrementalState: +@dataclass(frozen=True, repr=False) +class ClangGithubIncrementalState(IncrementalStateDataclass): """Checkpoint between Clang GitHub runs.""" - checkpoint_token: str | None - human_readable_marker: str | None - extras: Mapping[str, Any] = field(default_factory=dict) - @classmethod def from_watermarks( cls, diff --git a/core/activity_record.py b/core/activity_record.py new file mode 100644 index 0000000..b64efc1 --- /dev/null +++ b/core/activity_record.py @@ -0,0 +1,12 @@ +"""Shared :class:`~core.protocols.ActivityRecord` implementation for collectors.""" + +from __future__ import annotations + +from dataclasses import dataclass + +from core.protocol_dto import ActivityRecordDataclass + + +@dataclass(frozen=True, repr=False) +class GenericActivityRecord(ActivityRecordDataclass): + """Default frozen DTO satisfying :class:`~core.protocols.ActivityRecord`.""" diff --git a/core/activity_types.py b/core/activity_types.py index 1f565e1..881090f 100644 --- a/core/activity_types.py +++ b/core/activity_types.py @@ -101,7 +101,8 @@ def _parse_occurred_at(raw: str | datetime | None) -> datetime | None: return parse_activity_occurred_at(text) -def _format_occurred_at_z(dt: datetime) -> str: +def format_occurred_at_z(dt: datetime) -> str: + """Serialize *dt* as timezone-aware UTC ISO-8601 with ``Z`` suffix.""" aware = ensure_activity_occurred_at(dt) return aware.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") @@ -133,7 +134,7 @@ def activity_record_to_legacy_dict( summary: str, ) -> LegacyActivityRecordDict: """Serialize typed activity fields to string-keyed export/bridge JSON.""" - occurred_str = _format_occurred_at_z(occurred_at) if occurred_at is not None else "" + occurred_str = format_occurred_at_z(occurred_at) if occurred_at is not None else "" return LegacyActivityRecordDict( source_system=source_system.value, external_id=external_id, diff --git a/core/collectors/__init__.py b/core/collectors/__init__.py index aa8079d..c8c2bec 100644 --- a/core/collectors/__init__.py +++ b/core/collectors/__init__.py @@ -6,6 +6,7 @@ ) from core.collectors.command_base import BaseCollectorCommand from core.errors import CollectorFailureCategory, classify_failure +from core.activity_record import GenericActivityRecord from core.incremental_state import GenericIncrementalState from core.tracker_result import GenericTrackerResult @@ -14,6 +15,7 @@ "BaseCollectorCommand", "CollectorFailureCategory", "CollectorRunnable", + "GenericActivityRecord", "GenericIncrementalState", "GenericTrackerResult", "classify_failure", diff --git a/core/collectors/command_base.py b/core/collectors/command_base.py index 0577be4..d76a8e3 100644 --- a/core/collectors/command_base.py +++ b/core/collectors/command_base.py @@ -17,6 +17,7 @@ from django.core.management.base import BaseCommand, CommandError from core.collectors.base_collector import CollectorRunnable +from core.protocol_dto import TrackerResultDataclass from core.protocols import TrackerResult logger = logging.getLogger(__name__) @@ -34,23 +35,28 @@ def _log_collector_result(collector: CollectorRunnable, result: TrackerResult) - if not isinstance(collector_id, str) or not collector_id: collector_id = collector.__class__.__name__ records = _records_collected(result) + extra: dict[str, Any] = { + "collector": collector_id, + "success": result.success, + "records_collected": records, + "error_count": len(result.errors), + "duration_seconds": result.duration_seconds, + "counts": dict(result.counts), + } + if isinstance(result, TrackerResultDataclass): + extra["result_repr"] = repr(result) + extra["result_json"] = result.to_json() logger.info( "Collector finished: collector=%s success=%s records_collected=%s " - "error_count=%s duration_seconds=%s counts=%s", + "error_count=%s duration_seconds=%s counts=%s result=%s", collector_id, result.success, records, len(result.errors), result.duration_seconds, dict(result.counts), - extra={ - "collector": collector_id, - "success": result.success, - "records_collected": records, - "error_count": len(result.errors), - "duration_seconds": result.duration_seconds, - "counts": dict(result.counts), - }, + extra.get("result_repr", result), + extra=extra, ) diff --git a/core/incremental_state.py b/core/incremental_state.py index 28bbbfc..3b904da 100644 --- a/core/incremental_state.py +++ b/core/incremental_state.py @@ -2,18 +2,11 @@ from __future__ import annotations -from dataclasses import dataclass, field -from types import MappingProxyType -from typing import Any, Mapping +from dataclasses import dataclass +from core.protocol_dto import IncrementalStateDataclass -@dataclass(frozen=True) -class GenericIncrementalState: - """Default frozen DTO satisfying :class:`~core.protocols.IncrementalState`.""" - - checkpoint_token: str | None - human_readable_marker: str | None - extras: Mapping[str, Any] = field(default_factory=dict) - def __post_init__(self) -> None: - object.__setattr__(self, "extras", MappingProxyType(dict(self.extras))) +@dataclass(frozen=True, repr=False) +class GenericIncrementalState(IncrementalStateDataclass): + """Default frozen DTO satisfying :class:`~core.protocols.IncrementalState`.""" diff --git a/core/protocol_dto.py b/core/protocol_dto.py new file mode 100644 index 0000000..142c04c --- /dev/null +++ b/core/protocol_dto.py @@ -0,0 +1,237 @@ +""" +Shared frozen dataclass bases with canonical serialization for protocol DTOs. + +Concrete tracker implementations subclass :class:`TrackerResultDataclass`, +:class:`IncrementalStateDataclass`, and :class:`ActivityRecordDataclass` to inherit +``asdict()``, ``to_json()``, ``from_dict()``, and log-friendly ``__repr__``. +""" + +from __future__ import annotations + +import json +from collections.abc import Mapping, Sequence +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from types import MappingProxyType +from typing import Any, Self + +from core.activity_types import ( + ActivityType, + ActorExternalId, + SourceSystem, + actor_external_id, + ensure_activity_occurred_at, + format_occurred_at_z, + parse_activity_occurred_at, +) + +_REPR_SUMMARY_MAX = 60 +_REPR_MAP_MAX_KEYS = 5 +_REPR_ERROR_MAX_ITEMS = 2 +_REPR_ERROR_ITEM_MAX = 80 + + +def _json_safe(value: Any) -> Any: + """Recursively convert a value to JSON-serializable primitives.""" + if value is None or isinstance(value, (bool, int, float, str)): + return value + if isinstance(value, datetime): + return format_occurred_at_z(value) + if isinstance(value, Enum): + return value.value + if isinstance(value, ActivityType): + return str(value) + if isinstance(value, Mapping): + return _sorted_dict({str(k): _json_safe(v) for k, v in value.items()}) + if isinstance(value, (list, tuple)): + return [_json_safe(item) for item in value] + return str(value) + + +def _sorted_dict(d: dict[str, Any]) -> dict[str, Any]: + """Return *d* with sorted keys and JSON-safe values.""" + return {k: _json_safe(v) for k, v in sorted(d.items())} + + +def _repr_mapping(mapping: Mapping[str, Any], *, name: str) -> str: + items = dict(mapping) + if len(items) <= _REPR_MAP_MAX_KEYS: + return f"{name}={items!r}" + preview = {k: items[k] for k in sorted(items)[:2]} + return f"{name}={preview!r} <{len(items)} keys>" + + +def _repr_errors(errors: Sequence[str]) -> str: + if not errors: + return "errors=()" + if len(errors) <= _REPR_ERROR_MAX_ITEMS and all( + len(e) <= _REPR_ERROR_ITEM_MAX for e in errors + ): + return f"errors={tuple(errors)!r}" + return f"errors=<{len(errors)} items>" + + +def _truncate_summary(summary: str) -> str: + if len(summary) <= _REPR_SUMMARY_MAX: + return summary + return summary[: _REPR_SUMMARY_MAX - 3] + "..." + + +@dataclass(frozen=True) +class TrackerResultDataclass: + """Frozen base for :class:`~core.protocols.TrackerResult` implementations.""" + + success: bool + counts: Mapping[str, int] + errors: tuple[str, ...] = field(default_factory=tuple) + duration_seconds: float | None = None + + def __post_init__(self) -> None: + object.__setattr__(self, "counts", MappingProxyType(dict(self.counts))) + + def asdict(self) -> dict[str, Any]: + return _sorted_dict( + { + "success": self.success, + "counts": dict(self.counts), + "errors": list(self.errors), + "duration_seconds": self.duration_seconds, + } + ) + + def to_json(self) -> str: + return json.dumps(self.asdict(), sort_keys=True, separators=(",", ":")) + + @classmethod + def from_dict(cls, data: Mapping[str, Any]) -> Self: + raw_errors = data.get("errors") or [] + return cls( + success=bool(data["success"]), + counts={str(k): int(v) for k, v in dict(data["counts"]).items()}, + errors=tuple(str(e) for e in raw_errors), + duration_seconds=data.get("duration_seconds"), + ) + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(" + f"success={self.success!r}, " + f"{_repr_mapping(self.counts, name='counts')}, " + f"{_repr_errors(self.errors)}, " + f"duration_seconds={self.duration_seconds!r})" + ) + + +@dataclass(frozen=True) +class IncrementalStateDataclass: + """Frozen base for :class:`~core.protocols.IncrementalState` implementations.""" + + checkpoint_token: str | None + human_readable_marker: str | None + extras: Mapping[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + object.__setattr__(self, "extras", MappingProxyType(dict(self.extras))) + + def asdict(self) -> dict[str, Any]: + return _sorted_dict( + { + "checkpoint_token": self.checkpoint_token, + "human_readable_marker": self.human_readable_marker, + "extras": _json_safe(dict(self.extras)), + } + ) + + def to_json(self) -> str: + return json.dumps(self.asdict(), sort_keys=True, separators=(",", ":")) + + @classmethod + def from_dict(cls, data: Mapping[str, Any]) -> Self: + raw_extras = data.get("extras") or {} + return cls( + checkpoint_token=data.get("checkpoint_token"), + human_readable_marker=data.get("human_readable_marker"), + extras=dict(raw_extras), + ) + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(" + f"checkpoint_token={self.checkpoint_token!r}, " + f"human_readable_marker={self.human_readable_marker!r}, " + f"{_repr_mapping(self.extras, name='extras')})" + ) + + +@dataclass(frozen=True) +class ActivityRecordDataclass: + """Frozen base for :class:`~core.protocols.ActivityRecord` implementations.""" + + source_system: SourceSystem + external_id: str + occurred_at: datetime | None + activity_type: ActivityType + actor_external_id: ActorExternalId + source_url: str | None + summary: str + + def __post_init__(self) -> None: + if self.occurred_at is not None: + object.__setattr__( + self, "occurred_at", ensure_activity_occurred_at(self.occurred_at) + ) + + def asdict(self) -> dict[str, Any]: + occurred = ( + format_occurred_at_z(self.occurred_at) + if self.occurred_at is not None + else None + ) + return _sorted_dict( + { + "source_system": self.source_system.value, + "external_id": self.external_id, + "occurred_at": occurred, + "activity_type": str(self.activity_type), + "actor_external_id": str(self.actor_external_id), + "source_url": self.source_url, + "summary": self.summary, + } + ) + + def to_json(self) -> str: + return json.dumps(self.asdict(), sort_keys=True, separators=(",", ":")) + + @classmethod + def from_dict(cls, data: Mapping[str, Any]) -> Self: + raw_occurred = data.get("occurred_at") + occurred: datetime | None + if raw_occurred is None: + occurred = None + elif isinstance(raw_occurred, datetime): + occurred = ensure_activity_occurred_at(raw_occurred) + else: + text = str(raw_occurred).strip() + occurred = parse_activity_occurred_at(text) if text else None + return cls( + source_system=SourceSystem(str(data["source_system"])), + external_id=str(data["external_id"]), + occurred_at=occurred, + activity_type=ActivityType.parse(str(data["activity_type"])), + actor_external_id=actor_external_id(data.get("actor_external_id")), + source_url=data.get("source_url"), + summary=str(data["summary"]), + ) + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(" + f"source_system={self.source_system!r}, " + f"external_id={self.external_id!r}, " + f"occurred_at={self.occurred_at!r}, " + f"activity_type={self.activity_type!r}, " + f"actor_external_id={self.actor_external_id!r}, " + f"source_url={self.source_url!r}, " + f"summary={_truncate_summary(self.summary)!r})" + ) diff --git a/core/tests/test_protocol_serialization.py b/core/tests/test_protocol_serialization.py new file mode 100644 index 0000000..078f2fa --- /dev/null +++ b/core/tests/test_protocol_serialization.py @@ -0,0 +1,159 @@ +"""Round-trip and repr tests for protocol DTO serialization.""" + +from __future__ import annotations + +import json +from datetime import datetime, timezone + +import pytest + +from core.activity_record import GenericActivityRecord +from core.activity_types import ActivityType, SourceSystem, actor_external_id +from core.incremental_state import GenericIncrementalState +from core.tracker_result import GenericTrackerResult +from boost_library_docs_tracker.protocol_impl import LibraryDocsTrackerResult +from boost_library_tracker.protocol_impl import CollectBoostLibrariesResult +from boost_library_usage_dashboard.protocol_impl import UsageDashboardTrackerResult +from boost_mailing_list_tracker.protocol_impl import ( + MailingListIncrementalState, + MailingListTrackerResult, +) +from clang_github_tracker.protocol_impl import ( + ClangGithubIncrementalState, + ClangGithubTrackerResult, +) +from cppa_pinecone_sync.protocol_impl import PineconeSyncTrackerResult +from cppa_slack_tracker.protocol_impl import SlackIncrementalState, SlackTrackerResult +from cppa_youtube_script_tracker.protocol_impl import YoutubeScriptTrackerResult +from discord_activity_tracker.protocol_impl import ( + DiscordActivityRecord, + DiscordCollectionTrackerResult, + DiscordIncrementalState, +) +from github_activity_tracker.protocol_impl import ( + GitHubActivityRecord, + GitHubIncrementalState, + GitHubSyncTrackerResult, +) +from wg21_paper_tracker.protocol_impl import Wg21PaperTrackerResult + +_TRACKER_RESULTS = [ + GenericTrackerResult.ok(), + GenericTrackerResult.failed("oops", items=0), + GenericTrackerResult( + success=False, + counts={"items": 1}, + errors=("e1", "e2"), + duration_seconds=1.5, + ), + GitHubSyncTrackerResult(success=True, counts={"issues": 1}), + DiscordCollectionTrackerResult(success=True, counts={"messages": 2}), + PineconeSyncTrackerResult.from_sync_dict( + {"upserted": 1, "total": 1, "failed_count": 0} + ), + UsageDashboardTrackerResult.from_stats({"repos_analyzed": 3}), + Wg21PaperTrackerResult.dry_run(), + ClangGithubTrackerResult.dry_run(), + MailingListTrackerResult.from_run(fetched=1, created=1, skipped=0), + SlackTrackerResult.dry_run(), + YoutubeScriptTrackerResult.from_run(videos=1), + LibraryDocsTrackerResult.from_run(versions=1, pages=5), + CollectBoostLibrariesResult.empty(), +] + +_INCREMENTAL_STATES = [ + GenericIncrementalState(checkpoint_token="t", human_readable_marker="m"), + GitHubIncrementalState.from_repo_watermark(repo_id=1, marker="2024"), + DiscordIncrementalState.from_after_date(after=None), + DiscordIncrementalState.from_after_date( + after=datetime(2024, 6, 1, 12, 0, 0, tzinfo=timezone.utc), + last_message_id=100, + channel_id=55, + ), + MailingListIncrementalState.from_start_date("2024-01-01"), + SlackIncrementalState.from_team(team_id="T1", start_date="2024-01-01"), + ClangGithubIncrementalState.from_watermarks(start_commit="abc", start_item="2024"), +] + +_ACTIVITY_RECORDS = [ + GitHubActivityRecord.from_issue(repo_id=7, issue_number=123, summary="title"), + DiscordActivityRecord.from_converted_export_dict( + { + "id": 5, + "created_at": "2024-01-01T00:00:00.0000000Z", + "message_type": "Reply", + "content": "hello", + "author": {"id": 7}, + }, + server_id=1, + channel_id=2, + ), + GenericActivityRecord( + source_system=SourceSystem.GITHUB, + external_id="1:issue:1", + occurred_at=None, + activity_type=ActivityType.github_issue(), + actor_external_id=actor_external_id(""), + source_url=None, + summary="x" * 200, + ), +] + + +@pytest.mark.parametrize("obj", _TRACKER_RESULTS, ids=lambda o: type(o).__name__) +def test_tracker_result_round_trip(obj) -> None: + restored = type(obj).from_dict(obj.asdict()) + assert restored == obj + assert json.loads(obj.to_json()) == obj.asdict() + json.dumps(obj.asdict()) + + +@pytest.mark.parametrize("obj", _INCREMENTAL_STATES, ids=lambda o: type(o).__name__) +def test_incremental_state_round_trip(obj) -> None: + restored = type(obj).from_dict(obj.asdict()) + assert restored == obj + assert json.loads(obj.to_json()) == obj.asdict() + json.dumps(obj.asdict()) + + +@pytest.mark.parametrize("obj", _ACTIVITY_RECORDS, ids=lambda o: type(o).__name__) +def test_activity_record_round_trip(obj) -> None: + restored = type(obj).from_dict(obj.asdict()) + assert restored == obj + assert json.loads(obj.to_json()) == obj.asdict() + json.dumps(obj.asdict()) + + +@pytest.mark.parametrize( + "obj", _TRACKER_RESULTS + _INCREMENTAL_STATES + _ACTIVITY_RECORDS +) +def test_to_json_is_deterministic(obj) -> None: + again = type(obj).from_dict(json.loads(obj.to_json())) + assert again.to_json() == obj.to_json() + + +def test_tracker_result_repr_truncates_many_errors() -> None: + obj = GenericTrackerResult( + success=False, + counts={"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}, + errors=("one", "two", "three"), + ) + text = repr(obj) + assert "errors=<3 items>" in text + assert "counts=" in text + assert "<6 keys>" in text + + +def test_activity_record_repr_truncates_summary() -> None: + obj = GenericActivityRecord( + source_system=SourceSystem.DISCORD, + external_id="1:2:3", + occurred_at=datetime(2024, 1, 1, tzinfo=timezone.utc), + activity_type=ActivityType.discord_message("Default"), + actor_external_id=actor_external_id("1"), + source_url=None, + summary="word " * 50, + ) + text = repr(obj) + assert "..." in text + assert len(text) < 400 diff --git a/core/tracker_result.py b/core/tracker_result.py index 45de67f..1fdb486 100644 --- a/core/tracker_result.py +++ b/core/tracker_result.py @@ -2,26 +2,19 @@ from __future__ import annotations -from dataclasses import dataclass, field, fields, is_dataclass, replace -from types import MappingProxyType -from typing import TYPE_CHECKING, Mapping +from dataclasses import dataclass, fields, is_dataclass, replace +from typing import TYPE_CHECKING + +from core.protocol_dto import TrackerResultDataclass if TYPE_CHECKING: from core.protocols import TrackerResult -@dataclass(frozen=True) -class GenericTrackerResult: +@dataclass(frozen=True, repr=False) +class GenericTrackerResult(TrackerResultDataclass): """Default frozen DTO satisfying :class:`~core.protocols.TrackerResult`.""" - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None - - def __post_init__(self) -> None: - object.__setattr__(self, "counts", MappingProxyType(dict(self.counts))) - @classmethod def ok(cls, **counts: int) -> GenericTrackerResult: """Build a successful result with the given count fields.""" diff --git a/cppa_pinecone_sync/protocol_impl.py b/cppa_pinecone_sync/protocol_impl.py index 2fe4fee..a53b74b 100644 --- a/cppa_pinecone_sync/protocol_impl.py +++ b/cppa_pinecone_sync/protocol_impl.py @@ -2,18 +2,15 @@ from __future__ import annotations -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Any, Mapping +from core.protocol_dto import TrackerResultDataclass -@dataclass(frozen=True) -class PineconeSyncTrackerResult: - """Structured :class:`~core.protocols.TrackerResult` for ``sync_to_pinecone`` outcomes.""" - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None +@dataclass(frozen=True, repr=False) +class PineconeSyncTrackerResult(TrackerResultDataclass): + """Structured :class:`~core.protocols.TrackerResult` for ``sync_to_pinecone`` outcomes.""" @classmethod def from_sync_dict(cls, d: Mapping[str, Any]) -> PineconeSyncTrackerResult: diff --git a/cppa_slack_tracker/protocol_impl.py b/cppa_slack_tracker/protocol_impl.py index 1b975e2..afff2ad 100644 --- a/cppa_slack_tracker/protocol_impl.py +++ b/cppa_slack_tracker/protocol_impl.py @@ -2,22 +2,14 @@ from __future__ import annotations -from dataclasses import dataclass, field -from types import MappingProxyType -from typing import Any, Mapping +from dataclasses import dataclass +from core.protocol_dto import IncrementalStateDataclass, TrackerResultDataclass -@dataclass(frozen=True) -class SlackTrackerResult: - """Structured :class:`~core.protocols.TrackerResult` for Slack sync runs.""" - - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None - def __post_init__(self) -> None: - object.__setattr__(self, "counts", MappingProxyType(dict(self.counts))) +@dataclass(frozen=True, repr=False) +class SlackTrackerResult(TrackerResultDataclass): + """Structured :class:`~core.protocols.TrackerResult` for Slack sync runs.""" @classmethod def from_counts(cls, **counts: int) -> SlackTrackerResult: @@ -28,17 +20,10 @@ def dry_run(cls) -> SlackTrackerResult: return cls(success=True, counts={}) -@dataclass(frozen=True) -class SlackIncrementalState: +@dataclass(frozen=True, repr=False) +class SlackIncrementalState(IncrementalStateDataclass): """Checkpoint between Slack message sync runs.""" - checkpoint_token: str | None - human_readable_marker: str | None - extras: Mapping[str, Any] = field(default_factory=dict) - - def __post_init__(self) -> None: - object.__setattr__(self, "extras", MappingProxyType(dict(self.extras))) - @classmethod def from_team( cls, *, team_id: str, start_date: str | None diff --git a/cppa_youtube_script_tracker/protocol_impl.py b/cppa_youtube_script_tracker/protocol_impl.py index 89ccc08..6093954 100644 --- a/cppa_youtube_script_tracker/protocol_impl.py +++ b/cppa_youtube_script_tracker/protocol_impl.py @@ -2,22 +2,14 @@ from __future__ import annotations -from dataclasses import dataclass, field -from types import MappingProxyType -from typing import Mapping +from dataclasses import dataclass +from core.protocol_dto import TrackerResultDataclass -@dataclass(frozen=True) -class YoutubeScriptTrackerResult: - """Structured :class:`~core.protocols.TrackerResult` for YouTube runs.""" - - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None - def __post_init__(self) -> None: - object.__setattr__(self, "counts", MappingProxyType(dict(self.counts))) +@dataclass(frozen=True, repr=False) +class YoutubeScriptTrackerResult(TrackerResultDataclass): + """Structured :class:`~core.protocols.TrackerResult` for YouTube runs.""" @classmethod def from_run( diff --git a/discord_activity_tracker/protocol_impl.py b/discord_activity_tracker/protocol_impl.py index ec6b3ca..9e765dd 100644 --- a/discord_activity_tracker/protocol_impl.py +++ b/discord_activity_tracker/protocol_impl.py @@ -2,39 +2,33 @@ from __future__ import annotations -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime from typing import Any, Mapping from core.activity_types import ( ActivityType, - ActorExternalId, LegacyActivityRecordDict, SourceSystem, activity_record_to_legacy_dict, - ensure_activity_occurred_at, migrate_legacy_activity_fields, ) +from core.protocol_dto import ( + ActivityRecordDataclass, + IncrementalStateDataclass, + TrackerResultDataclass, +) -@dataclass(frozen=True) -class DiscordCollectionTrackerResult: +@dataclass(frozen=True, repr=False) +class DiscordCollectionTrackerResult(TrackerResultDataclass): """Counts for a Discord collection slice (messages, channels, etc.).""" - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None - -@dataclass(frozen=True) -class DiscordIncrementalState: +@dataclass(frozen=True, repr=False) +class DiscordIncrementalState(IncrementalStateDataclass): """Checkpoint between Discord runs (after-cursor + optional snowflake).""" - checkpoint_token: str | None - human_readable_marker: str | None - extras: Mapping[str, Any] = field(default_factory=dict) - @classmethod def from_after_date( cls, @@ -61,25 +55,12 @@ def from_after_date( ) -@dataclass(frozen=True) -class DiscordActivityRecord: +@dataclass(frozen=True, repr=False) +class DiscordActivityRecord(ActivityRecordDataclass): """Normalized Discord message as a portable activity row.""" - source_system: SourceSystem - external_id: str - occurred_at: datetime | None - activity_type: ActivityType - actor_external_id: ActorExternalId - source_url: str | None - summary: str - - def __post_init__(self) -> None: - if self.occurred_at is not None: - object.__setattr__( - self, "occurred_at", ensure_activity_occurred_at(self.occurred_at) - ) - def to_legacy_dict(self) -> LegacyActivityRecordDict: + """Tier-C workspace bridge format; prefer :meth:`asdict` for canonical protocol JSON.""" return activity_record_to_legacy_dict( source_system=self.source_system, external_id=self.external_id, diff --git a/docs/Core_public_API.md b/docs/Core_public_API.md index 8f4c2a9..dc493ab 100644 --- a/docs/Core_public_API.md +++ b/docs/Core_public_API.md @@ -13,6 +13,7 @@ The `core` Django app holds shared infrastructure. Treat the following as the ** | `core.collectors.BaseCollectorCommand` | Thin `BaseCommand` adapter: runs `get_collector(**opts).run()`, logs structured `TrackerResult` fields, then `sync_pinecone()`. | | `core.collectors.GenericTrackerResult` | Default frozen `TrackerResult` DTO (`ok()`, `failed()`); used by stubs and simple collectors. | | `core.collectors.GenericIncrementalState` | Default frozen `IncrementalState` DTO for checkpoint hooks. | +| `core.collectors.GenericActivityRecord` | Default frozen `ActivityRecord` DTO for portable activity rows. | ### Application collectors @@ -56,7 +57,9 @@ Structural contracts for **data** that crosses tracker layers (sync outcomes, ac | `core.protocols.IncrementalState` | `@runtime_checkable` protocol: `checkpoint_token`, `human_readable_marker`, `extras`. | | `core.protocols.require_tracker_result` / `require_activity_record` / `require_incremental_state` | Runtime guards raising `TypeError` when an object does not satisfy the protocol. | -Implementations are frozen dataclasses in each tracker app's `protocol_impl.py` (for example `github_activity_tracker.protocol_impl`, `discord_activity_tracker.protocol_impl`, `boost_library_tracker.protocol_impl`). Simple collectors may return `GenericTrackerResult` directly. Prefer dataclasses over plain `dict` for reliable `isinstance` checks with `@runtime_checkable`. +Implementations are frozen dataclasses in each tracker app's `protocol_impl.py` (for example `github_activity_tracker.protocol_impl`, `discord_activity_tracker.protocol_impl`, `boost_library_tracker.protocol_impl`). They subclass shared bases in **`core.protocol_dto`** (`TrackerResultDataclass`, `IncrementalStateDataclass`, `ActivityRecordDataclass`) which provide canonical `asdict()`, `to_json()`, `from_dict()`, and log-friendly `__repr__`. Simple collectors may return `GenericTrackerResult` directly. Prefer dataclasses over plain `dict` for reliable `isinstance` checks with `@runtime_checkable`. + +`BaseCollectorCommand` structured logs include `result_repr` and `result_json` in `extra` when the collector returns a `TrackerResultDataclass` subclass. `AbstractCollector.collect()` must return a `TrackerResult`. Override `load_incremental_state()` / `persist_incremental_state()` when a collector needs checkpoint read/write between runs (default hooks are no-ops). diff --git a/github_activity_tracker/protocol_impl.py b/github_activity_tracker/protocol_impl.py index 87f17a8..fda485a 100644 --- a/github_activity_tracker/protocol_impl.py +++ b/github_activity_tracker/protocol_impl.py @@ -2,31 +2,29 @@ from __future__ import annotations -from dataclasses import dataclass, field +from dataclasses import dataclass from datetime import datetime from typing import Any, Mapping from core.activity_types import ( ActivityType, - ActorExternalId, LegacyActivityRecordDict, SourceSystem, activity_record_to_legacy_dict, - ensure_activity_occurred_at, migrate_legacy_activity_fields, ) +from core.protocol_dto import ( + ActivityRecordDataclass, + IncrementalStateDataclass, + TrackerResultDataclass, +) from github_activity_tracker.sync import sync_github -@dataclass(frozen=True) -class GitHubSyncTrackerResult: +@dataclass(frozen=True, repr=False) +class GitHubSyncTrackerResult(TrackerResultDataclass): """Structured :class:`~core.protocols.TrackerResult` for ``sync_github`` outcomes.""" - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None - @classmethod def from_sync_dict(cls, d: dict[str, list[int]]) -> GitHubSyncTrackerResult: issues = d.get("issues") or [] @@ -63,25 +61,12 @@ def sync_github_tracker_result( return GitHubSyncTrackerResult.from_sync_dict(raw) -@dataclass(frozen=True) -class GitHubActivityRecord: +@dataclass(frozen=True, repr=False) +class GitHubActivityRecord(ActivityRecordDataclass): """Single issue/PR touch for cross-layer logging or bridges.""" - source_system: SourceSystem - external_id: str - occurred_at: datetime | None - activity_type: ActivityType - actor_external_id: ActorExternalId - source_url: str | None - summary: str - - def __post_init__(self) -> None: - if self.occurred_at is not None: - object.__setattr__( - self, "occurred_at", ensure_activity_occurred_at(self.occurred_at) - ) - def to_legacy_dict(self) -> LegacyActivityRecordDict: + """Tier-C workspace bridge format; prefer :meth:`asdict` for canonical protocol JSON.""" return activity_record_to_legacy_dict( source_system=self.source_system, external_id=self.external_id, @@ -146,14 +131,10 @@ def from_legacy_dict( ) -@dataclass(frozen=True) -class GitHubIncrementalState: +@dataclass(frozen=True, repr=False) +class GitHubIncrementalState(IncrementalStateDataclass): """Opaque + human-readable sync watermark (app-specific *extras*).""" - checkpoint_token: str | None - human_readable_marker: str | None - extras: Mapping[str, Any] = field(default_factory=dict) - @classmethod def from_repo_watermark( cls, *, repo_id: int, marker: str diff --git a/wg21_paper_tracker/protocol_impl.py b/wg21_paper_tracker/protocol_impl.py index d959d40..36cddcd 100644 --- a/wg21_paper_tracker/protocol_impl.py +++ b/wg21_paper_tracker/protocol_impl.py @@ -2,25 +2,16 @@ from __future__ import annotations -from dataclasses import dataclass, field -from types import MappingProxyType -from typing import Mapping +from dataclasses import dataclass +from core.protocol_dto import TrackerResultDataclass from wg21_paper_tracker.pipeline import TrackerPipelineResult -@dataclass(frozen=True) -class Wg21PaperTrackerResult: +@dataclass(frozen=True, repr=False) +class Wg21PaperTrackerResult(TrackerResultDataclass): """Structured :class:`~core.protocols.TrackerResult` for pipeline outcomes.""" - success: bool - counts: Mapping[str, int] - errors: tuple[str, ...] = field(default_factory=tuple) - duration_seconds: float | None = None - - def __post_init__(self) -> None: - object.__setattr__(self, "counts", MappingProxyType(dict(self.counts))) - @classmethod def from_pipeline(cls, result: TrackerPipelineResult) -> Wg21PaperTrackerResult: n = result.new_paper_count