From 7bc0ac4e8a08e9d6943f700afc6c90f030f87f26 Mon Sep 17 00:00:00 2001 From: jakeross Date: Mon, 29 Jun 2026 02:17:34 -0600 Subject: [PATCH] Collapse source class tables into one SOURCES registry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SOURCE_DICT and the two *_SOURCE_PAIRS tables each repeated the same connector classes, so adding a source meant editing three tables in lockstep (plus the orchestration list, fixed in the prior change). They are now derived from a single `SOURCES` registry of `SourceDef(key, site, waterlevel?, analyte?)`: SOURCE_DICT = {s.key: s.site ...} WATERLEVEL_SOURCE_PAIRS = {s.key: (s.site, s.waterlevel) for s with waterlevel} ANALYTE_SOURCE_PAIRS = {s.key: (s.site, s.analyte) for s with analyte} Adding a source is now one SourceDef entry (plus listing it under the parameters it serves in PARAMETER_SOURCE_MAP, which stays as authored data — it encodes which analytes each agency actually reports). tests/test_source_registry.py ties the registry to PARAMETER_SOURCE_MAP: the waterlevels agency list must equal the set of sources with a waterlevel class, and every analyte agency must have an analyte class — so a source wired in one place but not the other fails a test instead of silently dropping out. Iteration order of water_level_sources()/analyte_sources() is now source-key order (was a hand-curated order); full suite (306) confirms nothing depends on the old order. dg check defs clean. Co-Authored-By: Claude Opus 4.8 --- backend/config.py | 86 +++++++++++++++++++++-------------- tests/test_source_registry.py | 57 +++++++++++++++++++++++ 2 files changed, 109 insertions(+), 34 deletions(-) create mode 100644 tests/test_source_registry.py diff --git a/backend/config.py b/backend/config.py index 7380c24..d9f209f 100644 --- a/backend/config.py +++ b/backend/config.py @@ -15,6 +15,7 @@ # =============================================================================== import os import sys +from dataclasses import dataclass from datetime import datetime, timedelta import shapely.wkt import yaml @@ -96,45 +97,62 @@ TDS: {"agencies": ["bor", "nmbgmr_amp", "nmed_dwb", "nmose_isc_seven_rivers", "wqp"]}, } -SOURCE_DICT = { - "bernco": BernCoSiteSource, - "bor": BORSiteSource, - "cabq": CABQSiteSource, - "ebid": EBIDSiteSource, - "nmbgmr_amp": NMBGMRSiteSource, - "nmed_dwb": DWBSiteSource, - "nmose_isc_seven_rivers": ISCSevenRiversSiteSource, - "nmose_pod": NMOSEPODSiteSource, - "nmose_roswell": NMOSERoswellSiteSource, - "nwis": NWISSiteSource, - "pvacd": PVACDSiteSource, - "wqp": WQPSiteSource, -} +@dataclass(frozen=True) +class SourceDef: + """One data source's class wiring, declared in a single place. + + ``site`` is the site-source class (every source has one). ``waterlevel`` and + ``analyte`` are the parameter-source classes for each group, or ``None`` when + the source doesn't serve that group (e.g. ``bor`` is analyte-only; ``nmose_pod`` + is site-only). The ``SOURCE_DICT`` / ``*_SOURCE_PAIRS`` lookup tables below are + derived from this, so adding a source is one ``SourceDef`` entry here (plus + listing it under the parameters it serves in ``PARAMETER_SOURCE_MAP``).""" + + key: str + site: type + waterlevel: type | None = None + analyte: type | None = None + + +# The single registry of sources. Order is the source-key order; it drives the +# iteration order of water_level_sources()/analyte_sources(). A consistency test +# (tests/test_source_registry.py) asserts this stays in sync with +# PARAMETER_SOURCE_MAP so a source can't be wired in one place but not the other. +SOURCES = ( + SourceDef("bernco", BernCoSiteSource, waterlevel=BernCoWaterLevelSource), + SourceDef("bor", BORSiteSource, analyte=BORAnalyteSource), + SourceDef("cabq", CABQSiteSource, waterlevel=CABQWaterLevelSource), + SourceDef("ebid", EBIDSiteSource, waterlevel=EBIDWaterLevelSource), + SourceDef( + "nmbgmr_amp", + NMBGMRSiteSource, + waterlevel=NMBGMRWaterLevelSource, + analyte=NMBGMRAnalyteSource, + ), + SourceDef("nmed_dwb", DWBSiteSource, analyte=DWBAnalyteSource), + SourceDef( + "nmose_isc_seven_rivers", + ISCSevenRiversSiteSource, + waterlevel=ISCSevenRiversWaterLevelSource, + analyte=ISCSevenRiversAnalyteSource, + ), + SourceDef("nmose_pod", NMOSEPODSiteSource), + SourceDef("nmose_roswell", NMOSERoswellSiteSource, waterlevel=NMOSERoswellWaterLevelSource), + SourceDef("nwis", NWISSiteSource, waterlevel=NWISWaterLevelSource), + SourceDef("pvacd", PVACDSiteSource, waterlevel=PVACDWaterLevelSource), + SourceDef("wqp", WQPSiteSource, waterlevel=WQPWaterLevelSource, analyte=WQPAnalyteSource), +) -SOURCE_KEYS = sorted(list(SOURCE_DICT.keys())) +# Lookup tables derived from the registry — keep these read-only/derived; edit +# SOURCES (and PARAMETER_SOURCE_MAP) instead. +SOURCE_DICT = {s.key: s.site for s in SOURCES} +SOURCE_KEYS = sorted(SOURCE_DICT) -# Per-source (site_source, parameter_source) class pairs, keyed by source key. -# Insertion order mirrors the historical order of analyte_sources()/ -# water_level_sources(). source_pair() and the *_sources() methods build from -# these so per-source unification can resolve a single source by key. ANALYTE_SOURCE_PAIRS = { - "bor": (BORSiteSource, BORAnalyteSource), - "wqp": (WQPSiteSource, WQPAnalyteSource), - "nmose_isc_seven_rivers": (ISCSevenRiversSiteSource, ISCSevenRiversAnalyteSource), - "nmbgmr_amp": (NMBGMRSiteSource, NMBGMRAnalyteSource), - "nmed_dwb": (DWBSiteSource, DWBAnalyteSource), + s.key: (s.site, s.analyte) for s in SOURCES if s.analyte is not None } - WATERLEVEL_SOURCE_PAIRS = { - "nmbgmr_amp": (NMBGMRSiteSource, NMBGMRWaterLevelSource), - "nmose_isc_seven_rivers": (ISCSevenRiversSiteSource, ISCSevenRiversWaterLevelSource), - "nwis": (NWISSiteSource, NWISWaterLevelSource), - "nmose_roswell": (NMOSERoswellSiteSource, NMOSERoswellWaterLevelSource), - "pvacd": (PVACDSiteSource, PVACDWaterLevelSource), - "bernco": (BernCoSiteSource, BernCoWaterLevelSource), - "ebid": (EBIDSiteSource, EBIDWaterLevelSource), - "cabq": (CABQSiteSource, CABQWaterLevelSource), - "wqp": (WQPSiteSource, WQPWaterLevelSource), + s.key: (s.site, s.waterlevel) for s in SOURCES if s.waterlevel is not None } diff --git a/tests/test_source_registry.py b/tests/test_source_registry.py new file mode 100644 index 0000000..c7fd536 --- /dev/null +++ b/tests/test_source_registry.py @@ -0,0 +1,57 @@ +"""Consistency tests for the source registry (backend.config.SOURCES). + +These tie the derived lookup tables and the empirical PARAMETER_SOURCE_MAP back +to the single SOURCES registry, so a source wired in one place but not another +fails fast instead of silently dropping out of a parameter's source list. +""" +from backend.config import ( + SOURCES, + SOURCE_DICT, + SOURCE_KEYS, + ANALYTE_SOURCE_PAIRS, + WATERLEVEL_SOURCE_PAIRS, + PARAMETER_SOURCE_MAP, +) +from backend.constants import WATERLEVELS + + +def test_keys_unique(): + keys = [s.key for s in SOURCES] + assert len(keys) == len(set(keys)) + + +def test_derived_tables_match_registry(): + assert SOURCE_DICT == {s.key: s.site for s in SOURCES} + assert SOURCE_KEYS == sorted(s.key for s in SOURCES) + assert WATERLEVEL_SOURCE_PAIRS == { + s.key: (s.site, s.waterlevel) for s in SOURCES if s.waterlevel + } + assert ANALYTE_SOURCE_PAIRS == { + s.key: (s.site, s.analyte) for s in SOURCES if s.analyte + } + + +def test_waterlevel_agencies_match_registry(): + # Every source the parameter map lists for waterlevels must have a + # waterlevel source class — and vice versa. + registry_wl = {s.key for s in SOURCES if s.waterlevel} + map_wl = set(PARAMETER_SOURCE_MAP[WATERLEVELS]["agencies"]) + assert map_wl == registry_wl + + +def test_analyte_agencies_have_analyte_source(): + # Every agency listed for any analyte must actually have an analyte source + # class in the registry (the map is a subset per analyte; the registry is + # the universe of analyte-capable sources). + analyte_keys = {s.key for s in SOURCES if s.analyte} + for parameter, entry in PARAMETER_SOURCE_MAP.items(): + if parameter == WATERLEVELS: + continue + missing = set(entry["agencies"]) - analyte_keys + assert not missing, f"{parameter}: agencies without an analyte source: {missing}" + + +def test_every_map_agency_is_a_known_source(): + for entry in PARAMETER_SOURCE_MAP.values(): + for agency in entry["agencies"]: + assert agency in SOURCE_DICT