From e4d45fb817411423a3bb895ab44a6ca5397d932d Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 30 Jan 2026 11:39:32 +1100 Subject: [PATCH 1/5] feat: add nma_data_quality to Observation model and update related functionality --- ...63109252fb1_add_legacy_equipment_fields.py | 18 ++++---- ...23456789ab_add_observation_data_quality.py | 46 +++++++++++++++++++ core/lexicon.json | 10 ++-- db/deployment.py | 4 +- db/observation.py | 8 +++- schemas/observation.py | 4 +- tests/test_sensor_transfer.py | 24 ++++++++++ tests/test_transfer_legacy_dates.py | 35 ++++++++++++++ transfers/sensor_transfer.py | 22 ++++++++- transfers/waterlevels_transfer.py | 12 +++++ 10 files changed, 163 insertions(+), 20 deletions(-) create mode 100644 alembic/versions/e123456789ab_add_observation_data_quality.py create mode 100644 tests/test_sensor_transfer.py diff --git a/alembic/versions/263109252fb1_add_legacy_equipment_fields.py b/alembic/versions/263109252fb1_add_legacy_equipment_fields.py index 35d8166b0..1092b68ad 100644 --- a/alembic/versions/263109252fb1_add_legacy_equipment_fields.py +++ b/alembic/versions/263109252fb1_add_legacy_equipment_fields.py @@ -17,24 +17,24 @@ branch_labels: Union[str, Sequence[str], None] = None depends_on: Union[str, Sequence[str], None] = None FIELDS = ( - "WI_Duration", - "WI_EndFrequency", - "WI_Magnitude", - "WI_MicGain", - "WI_MinSoundDepth", - "WI_StartFrequency", + ("WI_Duration", sa.Integer()), + ("WI_EndFrequency", sa.Integer()), + ("WI_Magnitude", sa.Integer()), + ("WI_MicGain", sa.Boolean()), + ("WI_MinSoundDepth", sa.Integer()), + ("WI_StartFrequency", sa.Integer()), ) def upgrade() -> None: """Upgrade schema.""" - for field in FIELDS: + for field, column_type in FIELDS: op.add_column( "deployment", sa.Column( f"nma_{field}", - sa.Integer(), + column_type, nullable=True, ), ) @@ -42,5 +42,5 @@ def upgrade() -> None: def downgrade() -> None: """Downgrade schema.""" - for field in FIELDS: + for field, _ in FIELDS: op.drop_column("deployment", f"nma_{field}") diff --git a/alembic/versions/e123456789ab_add_observation_data_quality.py b/alembic/versions/e123456789ab_add_observation_data_quality.py new file mode 100644 index 000000000..717a0c82e --- /dev/null +++ b/alembic/versions/e123456789ab_add_observation_data_quality.py @@ -0,0 +1,46 @@ +"""add nma_data_quality to observation + +Revision ID: e123456789ab +Revises: b12e3919077e +Create Date: 2026-02-05 12:00:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "e123456789ab" +down_revision: Union[str, Sequence[str], None] = "b12e3919077e" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.add_column( + "observation", + sa.Column( + "nma_data_quality", + sa.String(length=100), + sa.ForeignKey("lexicon_term.term", onupdate="CASCADE"), + nullable=True, + ), + ) + op.add_column( + "observation_version", + sa.Column( + "nma_data_quality", + sa.String(length=100), + sa.ForeignKey("lexicon_term.term", onupdate="CASCADE"), + nullable=True, + ), + ) + + +def downgrade() -> None: + """Downgrade schema.""" + op.drop_column("observation_version", "nma_data_quality") + op.drop_column("observation", "nma_data_quality") diff --git a/core/lexicon.json b/core/lexicon.json index 273956469..01539f2d2 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -656,21 +656,21 @@ "data_quality" ], "term": "Water level accurate to within two hundreths of a foot", - "definition": "Good" + "definition": "Water level accurate to within two hundreths of a foot" }, { "categories": [ "data_quality" ], "term": "Water level accurate to within one foot", - "definition": "Fair" + "definition": "Water level accurate to within one foot" }, { "categories": [ "data_quality" ], "term": "Water level accuracy not to nearest foot or water level not repeatable", - "definition": "Poor" + "definition": "Water level accuracy not to nearest foot or water level not repeatable" }, { "categories": [ @@ -712,7 +712,7 @@ "data_quality" ], "term": "None", - "definition": "NA" + "definition": "None" }, { "categories": [ @@ -8149,4 +8149,4 @@ "definition": "Data were not field checked but are considered reliable" } ] -} +} \ No newline at end of file diff --git a/db/deployment.py b/db/deployment.py index 20c4e8651..6f07830a7 100644 --- a/db/deployment.py +++ b/db/deployment.py @@ -6,7 +6,7 @@ from typing import TYPE_CHECKING -from sqlalchemy import Integer, ForeignKey, Date, Numeric, Text +from sqlalchemy import Integer, ForeignKey, Date, Numeric, Text, Boolean from sqlalchemy.orm import relationship, Mapped, mapped_column from db.base import Base, AutoBaseMixin, ReleaseMixin, lexicon_term @@ -49,7 +49,7 @@ class Deployment(Base, AutoBaseMixin, ReleaseMixin): nma_WI_Duration: Mapped[int] = mapped_column(Integer, nullable=True) nma_WI_EndFrequency: Mapped[int] = mapped_column(Integer, nullable=True) nma_WI_Magnitude: Mapped[int] = mapped_column(Integer, nullable=True) - nma_WI_MicGain: Mapped[int] = mapped_column(Integer, nullable=True) + nma_WI_MicGain: Mapped[bool] = mapped_column(Boolean, nullable=True) nma_WI_MinSoundDepth: Mapped[int] = mapped_column(Integer, nullable=True) nma_WI_StartFrequency: Mapped[int] = mapped_column(Integer, nullable=True) diff --git a/db/observation.py b/db/observation.py index 27fe70458..d716f9084 100644 --- a/db/observation.py +++ b/db/observation.py @@ -14,6 +14,8 @@ # limitations under the License. # =============================================================================== from datetime import datetime +from typing import TYPE_CHECKING + from sqlalchemy import ( ForeignKey, DateTime, @@ -23,8 +25,6 @@ from db.base import Base, AutoBaseMixin, ReleaseMixin, lexicon_term -from typing import TYPE_CHECKING - if TYPE_CHECKING: from db.sample import Sample from db.sensor import Sensor @@ -64,6 +64,10 @@ class Observation(Base, AutoBaseMixin, ReleaseMixin): ) unit: Mapped[str] = lexicon_term(nullable=False) notes: Mapped[str] = mapped_column(nullable=True) + nma_data_quality: Mapped[str] = lexicon_term( + nullable=True, + comment="Legacy WaterLevels DataQuality mapped to lexicon term", + ) # groundwater measuring_point_height: Mapped[float] = mapped_column( diff --git a/schemas/observation.py b/schemas/observation.py index 2012f002f..a42a9fc29 100644 --- a/schemas/observation.py +++ b/schemas/observation.py @@ -25,6 +25,7 @@ ) from typing_extensions import Self +from core.enums import Unit from schemas import ( BaseCreateModel, BaseUpdateModel, @@ -32,7 +33,7 @@ UTCAwareDatetime, ) from schemas.parameter import ParameterResponse -from core.enums import Unit + # class GeothermalMixin: # depth: float @@ -111,6 +112,7 @@ class BaseObservationResponse(BaseResponseModel): parameter: ParameterResponse value: float | None unit: Unit + nma_data_quality: str | None = None class GroundwaterLevelObservationResponse(BaseObservationResponse): diff --git a/tests/test_sensor_transfer.py b/tests/test_sensor_transfer.py new file mode 100644 index 000000000..02ef92db5 --- /dev/null +++ b/tests/test_sensor_transfer.py @@ -0,0 +1,24 @@ +import numpy as np +import pandas as pd + +from transfers.sensor_transfer import _coerce_wi_mic_gain + + +def test_coerce_wi_mic_gain_numeric(): + assert _coerce_wi_mic_gain(1) is True + assert _coerce_wi_mic_gain(0) is False + assert _coerce_wi_mic_gain(1.0) is True + + +def test_coerce_wi_mic_gain_strings(): + assert _coerce_wi_mic_gain("1") is True + assert _coerce_wi_mic_gain("0") is False + assert _coerce_wi_mic_gain(" true ") is True + assert _coerce_wi_mic_gain("False") is False + + +def test_coerce_wi_mic_gain_handles_none_like(): + assert _coerce_wi_mic_gain(None) is None + assert _coerce_wi_mic_gain(" ") is None + assert _coerce_wi_mic_gain(pd.NA) is None + assert _coerce_wi_mic_gain(np.nan) is None diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index a709fa21b..ab0d05952 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -27,7 +27,10 @@ import pandas as pd import pytest +from db import Sample from transfers.util import make_location +from transfers.waterlevels_transfer import WaterLevelTransferer + # ============================================================================ # FIXTURES @@ -173,6 +176,38 @@ def test_make_location_maps_data_reliability_code(mock_lexicon_mapper): assert location.nma_data_reliability == mock_lexicon_mapper.map_value.return_value +def test_make_observation_maps_data_quality(): + transfer = WaterLevelTransferer.__new__(WaterLevelTransferer) + transfer.groundwater_parameter_id = 1 + + row = pd.Series( + { + "MPHeight": 1.0, + "DepthToWater": 10.0, + "DepthToWaterBGS": 9.0, + "GlobalID": "TEST-GLOBAL", + "DataQuality": "U2", + } + ) + + sample = Sample( + field_activity_id=1, + sample_date=datetime.datetime.now(datetime.timezone.utc), + sample_name="test-sample", + sample_matrix="water", + sample_method="grab sample", + qc_type="Normal", + ) + + with patch("transfers.waterlevels_transfer.lexicon_mapper") as mapper: + mapper.map_value.return_value = "Mapped Quality" + observation = transfer._make_observation( + row, sample, datetime.datetime.now(datetime.timezone.utc), "Reason" + ) + mapper.map_value.assert_any_call("LU_DataQuality:U2") + assert observation.nma_data_quality == "Mapped Quality" + + def test_make_location_with_very_old_site_date(mock_lexicon_mapper): """Test that very old SiteDates (1950s) are preserved correctly""" row = pd.Series( diff --git a/transfers/sensor_transfer.py b/transfers/sensor_transfer.py index 91d5f8475..97d3c3583 100644 --- a/transfers/sensor_transfer.py +++ b/transfers/sensor_transfer.py @@ -48,6 +48,26 @@ } +def _coerce_wi_mic_gain(value): + if value is None or (isinstance(value, str) and not value.strip()): + return None + if isinstance(value, str): + value = value.strip() + if pd.isna(value): + return None + if isinstance(value, bool): + return value + try: + return bool(int(float(value))) + except (ValueError, TypeError): + lowered = str(value).strip().lower() + if lowered in {"true", "t", "yes", "y"}: + return True + if lowered in {"false", "f", "no", "n"}: + return False + return None + + class SensorTransferer(ThingBasedTransferer): source_table = "Equipment" @@ -221,7 +241,7 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base): nma_WI_Duration=row.WI_Duration, nma_WI_EndFrequency=row.WI_EndFrequency, nma_WI_Magnitude=row.WI_Magnitude, - nma_WI_MicGain=row.WI_MicGain, + nma_WI_MicGain=_coerce_wi_mic_gain(row.WI_MicGain), nma_WI_MinSoundDepth=row.WI_MinSoundDepth, nma_WI_StartFrequency=row.WI_StartFrequency, ) diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index c09d7d3dd..fcab34009 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -197,6 +197,17 @@ def _make_observation( else: value = row.DepthToWater + data_quality = None + dq_raw = getattr(row, "DataQuality", None) + if dq_raw and pd.notna(dq_raw): + dq_code = str(dq_raw).strip() + try: + data_quality = lexicon_mapper.map_value(f"LU_DataQuality:{dq_code}") + except KeyError: + logger.warning( + f"{SPACE_6}Unknown DataQuality code '{dq_code}' for WaterLevels record {row.GlobalID}" + ) + # TODO: after sensors have been added to the database update sensor_id (or sensor) for waterlevels that come from db sensors (like e probes?) observation = Observation( nma_pk_waterlevels=row.GlobalID, @@ -209,6 +220,7 @@ def _make_observation( unit="ft", measuring_point_height=measuring_point_height, groundwater_level_reason=glv, + nma_data_quality=data_quality, ) return observation From 3bb4e1037200af20505f498deb7f26a1e366fb38 Mon Sep 17 00:00:00 2001 From: jirhiker Date: Fri, 30 Jan 2026 00:39:56 +0000 Subject: [PATCH 2/5] Formatting changes --- schemas/observation.py | 1 - tests/test_transfer_legacy_dates.py | 1 - 2 files changed, 2 deletions(-) diff --git a/schemas/observation.py b/schemas/observation.py index a42a9fc29..6f645b13f 100644 --- a/schemas/observation.py +++ b/schemas/observation.py @@ -34,7 +34,6 @@ ) from schemas.parameter import ParameterResponse - # class GeothermalMixin: # depth: float # temperature: float diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index ab0d05952..ad7df0365 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -31,7 +31,6 @@ from transfers.util import make_location from transfers.waterlevels_transfer import WaterLevelTransferer - # ============================================================================ # FIXTURES # ============================================================================ From 471a9ad2f30c01afd1ddec4784cce2f3d4e88155 Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 30 Jan 2026 11:54:57 +1100 Subject: [PATCH 3/5] feat: add timezone handling for DateMeasured in WaterLevelTransferer --- tests/test_transfer_legacy_dates.py | 20 ++++++++++++++++++++ transfers/waterlevels_transfer.py | 13 +++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index ad7df0365..caf194b8c 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -31,6 +31,7 @@ from transfers.util import make_location from transfers.waterlevels_transfer import WaterLevelTransferer + # ============================================================================ # FIXTURES # ============================================================================ @@ -207,6 +208,25 @@ def test_make_observation_maps_data_quality(): assert observation.nma_data_quality == "Mapped Quality" +def test_get_dt_utc_respects_time_datum(): + transfer = WaterLevelTransferer.__new__(WaterLevelTransferer) + base = { + "PointID": "TEST", + "OBJECTID": 1, + "DateMeasured": "2025-01-01", + "TimeMeasured": "10:00:00.000000", + } + + row_mst = pd.Series({**base, "TimeDatum": "MST"}) + dt_mst = transfer._get_dt_utc(row_mst) + assert dt_mst.tzinfo == datetime.timezone.utc + assert dt_mst.hour == 17 + + row_mdt = pd.Series({**base, "TimeDatum": "MDT"}) + dt_mdt = transfer._get_dt_utc(row_mdt) + assert dt_mdt.hour == 16 + + def test_make_location_with_very_old_site_date(mock_lexicon_mapper): """Test that very old SiteDates (1950s) are preserved correctly""" row = pd.Series( diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index fcab34009..8ebf1e795 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -15,7 +15,7 @@ # =============================================================================== import json import uuid -from datetime import datetime +from datetime import datetime, timezone, timedelta import pandas as pd from sqlalchemy.orm import Session @@ -335,7 +335,6 @@ def _get_dt_utc(self, row) -> datetime | None: try: dt = datetime.strptime(dt_measured, fmt) - return convert_mt_to_utc(dt) except ValueError as e: self._capture_error(row.PointID, str(e), "DateMeasured") logger.critical( @@ -344,5 +343,15 @@ def _get_dt_utc(self, row) -> datetime | None: ) return None + time_datum = getattr(row, "TimeDatum", None) + if time_datum and pd.notna(time_datum): + datum = str(time_datum).strip().upper() + if datum in {"MST", "MDT"}: + offset_hours = -7 if datum == "MST" else -6 + tz = timezone(timedelta(hours=offset_hours)) + return dt.replace(tzinfo=tz).astimezone(timezone.utc) + + return convert_mt_to_utc(dt) + # ============= EOF ============================================= From 4739aa4279f9b63fd722d19abdbaadf50cff69bb Mon Sep 17 00:00:00 2001 From: jirhiker Date: Fri, 30 Jan 2026 00:55:30 +0000 Subject: [PATCH 4/5] Formatting changes --- tests/test_transfer_legacy_dates.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index caf194b8c..0701189ee 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -31,7 +31,6 @@ from transfers.util import make_location from transfers.waterlevels_transfer import WaterLevelTransferer - # ============================================================================ # FIXTURES # ============================================================================ From d632b625970dfe1073f929e4da69839b0ad8aaae Mon Sep 17 00:00:00 2001 From: jakeross Date: Fri, 30 Jan 2026 12:10:10 +1100 Subject: [PATCH 5/5] feat: enhance datetime handling in WaterLevelTransferer and related tests --- tests/test_transfer_legacy_dates.py | 2 ++ transfers/waterlevels_transfer.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_transfer_legacy_dates.py b/tests/test_transfer_legacy_dates.py index 0701189ee..bbfce3a56 100644 --- a/tests/test_transfer_legacy_dates.py +++ b/tests/test_transfer_legacy_dates.py @@ -209,6 +209,8 @@ def test_make_observation_maps_data_quality(): def test_get_dt_utc_respects_time_datum(): transfer = WaterLevelTransferer.__new__(WaterLevelTransferer) + transfer.errors = [] + transfer.source_table = "WaterLevels" base = { "PointID": "TEST", "OBJECTID": 1, diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index 8ebf1e795..6697b3442 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -329,7 +329,8 @@ def _get_dt_utc(self, row) -> datetime | None: t = row.TimeMeasured # Truncate microseconds to 6 digits if present if "." in t: - t = t[:-6] + dot_index = t.find(".") + t = t[: dot_index + 7] dt_measured = f"{row.DateMeasured} {t}"