Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 217 additions & 0 deletions tests/test_transfer_preserve_empty_strings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
# ===============================================================================
# Copyright 2026 ross
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===============================================================================
from __future__ import annotations

from types import SimpleNamespace
import uuid

from transfers.chemistry_sampleinfo import ChemistrySampleInfoTransferer
from transfers.hydraulicsdata import HydraulicsDataTransferer
from transfers.major_chemistry import MajorChemistryTransferer
from transfers.minor_trace_chemistry_transfer import MinorTraceChemistryTransferer
from transfers.radionuclides import RadionuclidesTransferer
from transfers.surface_water_data import SurfaceWaterDataTransferer
from transfers.weather_data import WeatherDataTransferer


def _make_transferer(cls, **attrs):
transferer = cls.__new__(cls)
transferer.errors = []
transferer.flags = {}
transferer.source_table = getattr(cls, "source_table", "Test")
for key, value in attrs.items():
setattr(transferer, key, value)
return transferer


def test_major_chemistry_preserves_empty_strings():
transferer = _make_transferer(MajorChemistryTransferer)
sample_pt_id = uuid.uuid4()
global_id = uuid.uuid4()
row = {
"SamplePtID": sample_pt_id,
"GlobalID": global_id,
"SamplePointID": "",
"Analyte": "",
"Symbol": "",
"Units": "",
"Notes": "",
}

result = transferer._row_dict(row)

assert result["SamplePointID"] == ""
assert result["Analyte"] == ""
assert result["Symbol"] == ""
assert result["Units"] == ""
assert result["Notes"] == ""


def test_minor_trace_chemistry_preserves_empty_strings():
sample_pt_id = uuid.uuid4()
global_id = uuid.uuid4()
transferer = _make_transferer(
MinorTraceChemistryTransferer, _sample_pt_ids={sample_pt_id}
)
row = SimpleNamespace(
SamplePtID=sample_pt_id,
GlobalID=global_id,
Analyte="",
Units="",
Symbol="",
AnalysisMethod="",
Notes="",
AnalysesAgency="",
VolumeUnit="",
)

result = transferer._row_to_dict(row)

assert result["analyte"] == ""
assert result["units"] == ""
assert result["symbol"] == ""
assert result["analysis_method"] == ""
assert result["notes"] == ""
assert result["analyses_agency"] == ""
assert result["volume_unit"] == ""


def test_radionuclides_preserves_empty_strings():
transferer = _make_transferer(RadionuclidesTransferer, _thing_id_by_sample_pt_id={})
sample_pt_id = uuid.uuid4()
global_id = uuid.uuid4()
row = {
"SamplePtID": sample_pt_id,
"GlobalID": global_id,
"SamplePointID": "",
"Analyte": "",
"Symbol": "",
"Units": "",
"AnalysesAgency": "",
"WCLab_ID": "",
}

result = transferer._row_dict(row)

assert result["SamplePointID"] == ""
assert result["Analyte"] == ""
assert result["Symbol"] == ""
assert result["Units"] == ""
assert result["AnalysesAgency"] == ""
assert result["WCLab_ID"] == ""


def test_weather_data_preserves_empty_strings():
transferer = _make_transferer(WeatherDataTransferer)
row = {
"LocationId": None,
"PointID": "",
"WeatherID": None,
"OBJECTID": 1,
}

result = transferer._row_dict(row)

assert result["PointID"] == ""


def test_surface_water_data_preserves_empty_strings():
transferer = _make_transferer(SurfaceWaterDataTransferer)
row = {
"SurfaceID": uuid.uuid4(),
"PointID": "",
"OBJECTID": 1,
"Discharge": "",
"DischargeMethod": "",
"DischargeUnits": "",
"DischargeSource": "",
"SiteNotes": "",
"FieldMethodNotes": "",
"FormationZone": "",
"AqClass": "",
"SourceNotes": "",
"DataSource": "",
}

result = transferer._row_dict(row)

assert result["Discharge"] == ""
assert result["DischargeMethod"] == ""
assert result["DischargeUnits"] == ""
assert result["DischargeSource"] == ""
assert result["SiteNotes"] == ""
assert result["FieldMethodNotes"] == ""
assert result["FormationZone"] == ""
assert result["AqClass"] == ""
assert result["SourceNotes"] == ""
assert result["DataSource"] == ""


def test_hydraulics_preserves_empty_strings():
transferer = _make_transferer(HydraulicsDataTransferer, _thing_id_cache={"TEST": 1})
row = {
"GlobalID": uuid.uuid4(),
"WellID": uuid.uuid4(),
"PointID": "TEST",
"HydraulicUnit": "",
"HydraulicUnitType": "",
"Hydraulic Remarks": "",
"Data Source": "",
"TestTop": 1,
"TestBottom": 2,
}

result = transferer._row_dict(row)

assert result["HydraulicUnit"] == ""
assert result["HydraulicUnitType"] == ""
assert result["Hydraulic Remarks"] == ""
assert result["Data Source"] == ""


def test_chemistry_sampleinfo_preserves_empty_strings():
transferer = _make_transferer(
ChemistrySampleInfoTransferer, _thing_id_cache={"TEST": 1}
)
row = {
"SamplePtID": uuid.uuid4(),
"SamplePointID": "TEST",
"WCLab_ID": "",
"CollectionMethod": "",
"CollectedBy": "",
"AnalysesAgency": "",
"SampleType": "",
"SampleMaterialNotH2O": "",
"WaterType": "",
"StudySample": "",
"DataSource": "",
"SampleNotes": "",
"LocationId": uuid.uuid4(),
"OBJECTID": 1,
}

result = transferer._row_dict(row)

assert result["WCLab_ID"] == ""
assert result["CollectionMethod"] == ""
assert result["CollectedBy"] == ""
assert result["AnalysesAgency"] == ""
assert result["SampleType"] == ""
assert result["SampleMaterialNotH2O"] == ""
assert result["WaterType"] == ""
assert result["StudySample"] == ""
assert result["DataSource"] == ""
assert result["SampleNotes"] == ""
9 changes: 8 additions & 1 deletion transfers/chemistry_sampleinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ def _build_thing_id_cache(self):
logger.info(f"Built Thing ID cache with {len(self._thing_id_cache)} entries")

def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]:
input_df = read_csv(self.source_table, parse_dates=["CollectionDate"])
input_df = read_csv(
self.source_table, parse_dates=["CollectionDate"], keep_default_na=False
)
# Filter to only include rows where Thing exists (prevent orphan records)
cleaned_df = self._filter_to_valid_things(input_df)
cleaned_df = self._filter_to_valid_sample_pt_ids(cleaned_df)
Expand Down Expand Up @@ -242,6 +244,9 @@ def _transfer_hook(self, session: Session) -> None:
session.expunge_all()

def _row_dict(self, row: dict[str, Any]) -> dict[str, Any]:
def is_blank(value: Any) -> bool:
return isinstance(value, str) and value.strip() == ""

def val(key: str) -> Optional[Any]:
v = row.get(key)
if pd.isna(v):
Expand Down Expand Up @@ -287,6 +292,8 @@ def bool_val(key: str) -> Optional[bool]:

# Convert pandas Timestamp to datetime; native datetime stays unchanged.
collection_date = val("CollectionDate")
if is_blank(collection_date):
collection_date = None
if hasattr(collection_date, "to_pydatetime"):
collection_date = collection_date.to_pydatetime()

Expand Down
38 changes: 25 additions & 13 deletions transfers/hydraulicsdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _build_thing_id_cache(self) -> None:
logger.info(f"Built Thing ID cache with {len(self._thing_id_cache)} entries")

def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]:
df = read_csv(self.source_table)
df = read_csv(self.source_table, keep_default_na=False)
cleaned_df = self._filter_to_valid_things(df)
return df, cleaned_df

Expand Down Expand Up @@ -129,6 +129,9 @@ def _transfer_hook(self, session: Session) -> None:
session.expunge_all()

def _row_dict(self, row: dict[str, Any]) -> dict[str, Any]:
def is_blank(value: Any) -> bool:
return isinstance(value, str) and value.strip() == ""

def val(key: str) -> Optional[Any]:
v = row.get(key)
if pd.isna(v):
Expand All @@ -148,13 +151,22 @@ def as_uuid(key: str) -> Optional[uuid.UUID]:

def as_int(key: str) -> Optional[int]:
v = val(key)
if v is None:
if v is None or is_blank(v):
return None
try:
return int(v)
except (TypeError, ValueError):
return None

def as_float(key: str) -> Optional[float]:
v = val(key)
if v is None or is_blank(v):
return None
try:
return float(v)
except (TypeError, ValueError):
return None

return {
"GlobalID": as_uuid("GlobalID"),
"WellID": as_uuid("WellID"),
Expand All @@ -165,17 +177,17 @@ def as_int(key: str) -> Optional[int]:
"TestBottom": as_int("TestBottom"),
"HydraulicUnitType": val("HydraulicUnitType"),
"Hydraulic Remarks": val("Hydraulic Remarks"),
"T (ft2/d)": val("T (ft2/d)"),
"S (dimensionless)": val("S (dimensionless)"),
"Ss (ft-1)": val("Ss (ft-1)"),
"Sy (decimalfractn)": val("Sy (decimalfractn)"),
"KH (ft/d)": val("KH (ft/d)"),
"KV (ft/d)": val("KV (ft/d)"),
"HL (day-1)": val("HL (day-1)"),
"HD (ft2/d)": val("HD (ft2/d)"),
"Cs (gal/d/ft)": val("Cs (gal/d/ft)"),
"P (decimal fraction)": val("P (decimal fraction)"),
"k (darcy)": val("k (darcy)"),
"T (ft2/d)": as_float("T (ft2/d)"),
"S (dimensionless)": as_float("S (dimensionless)"),
"Ss (ft-1)": as_float("Ss (ft-1)"),
"Sy (decimalfractn)": as_float("Sy (decimalfractn)"),
"KH (ft/d)": as_float("KH (ft/d)"),
"KV (ft/d)": as_float("KV (ft/d)"),
"HL (day-1)": as_float("HL (day-1)"),
"HD (ft2/d)": as_float("HD (ft2/d)"),
"Cs (gal/d/ft)": as_float("Cs (gal/d/ft)"),
"P (decimal fraction)": as_float("P (decimal fraction)"),
"k (darcy)": as_float("k (darcy)"),
"Data Source": val("Data Source"),
"OBJECTID": as_int("OBJECTID"),
}
Expand Down
13 changes: 10 additions & 3 deletions transfers/major_chemistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ def _build_sample_pt_id_cache(self) -> None:
)

def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]:
input_df = read_csv(self.source_table, parse_dates=["AnalysisDate"])
input_df = read_csv(
self.source_table, parse_dates=["AnalysisDate"], keep_default_na=False
)
Comment thread
jirhiker marked this conversation as resolved.
cleaned_df = self._filter_to_valid_sample_infos(input_df)
return input_df, cleaned_df

Expand Down Expand Up @@ -131,6 +133,9 @@ def _transfer_hook(self, session: Session) -> None:
session.expunge_all()

def _row_dict(self, row: dict[str, Any]) -> Optional[dict[str, Any]]:
def is_blank(value: Any) -> bool:
return isinstance(value, str) and value.strip() == ""

def val(key: str) -> Optional[Any]:
v = row.get(key)
if pd.isna(v):
Expand All @@ -139,7 +144,7 @@ def val(key: str) -> Optional[Any]:

def float_val(key: str) -> Optional[float]:
v = val(key)
if v is None:
if v is None or is_blank(v):
return None
try:
return float(v)
Expand All @@ -148,14 +153,16 @@ def float_val(key: str) -> Optional[float]:

def int_val(key: str) -> Optional[int]:
v = val(key)
if v is None:
if v is None or is_blank(v):
return None
try:
return int(v)
except (TypeError, ValueError):
return None

analysis_date = val("AnalysisDate")
if is_blank(analysis_date):
analysis_date = None
if hasattr(analysis_date, "to_pydatetime"):
analysis_date = analysis_date.to_pydatetime()
if isinstance(analysis_date, datetime):
Expand Down
6 changes: 5 additions & 1 deletion transfers/minor_trace_chemistry_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _build_sample_pt_id_cache(self):
)

def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]:
input_df = read_csv(self.source_table)
input_df = read_csv(self.source_table, keep_default_na=False)
# Filter to only include rows where ChemistrySampleInfo exists
cleaned_df = self._filter_to_valid_sample_infos(input_df)
return input_df, cleaned_df
Expand Down Expand Up @@ -223,13 +223,17 @@ def _safe_float(self, row, attr: str) -> Optional[float]:
val = getattr(row, attr, None)
if val is None or pd.isna(val):
return None
if isinstance(val, str) and val.strip() == "":
return None
return float(val)

def _parse_date(self, row, attr: str) -> Optional[date]:
"""Parse a date value from the row."""
val = getattr(row, attr, None)
if val is None or pd.isna(val):
return None
if isinstance(val, str) and val.strip() == "":
return None

# Handle pandas Timestamp
if hasattr(val, "date"):
Expand Down
Loading
Loading