Skip to content

Commit b0a5ca9

Browse files
change behavior to use shorthand columns if extant even if other defaults defined
1 parent 38e2af7 commit b0a5ca9

File tree

4 files changed

+35
-21
lines changed

4 files changed

+35
-21
lines changed

metameq/src/metadata_extender.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -728,9 +728,10 @@ def _find_internal_col_source_name(
728728
if specified_name:
729729
if specified_name != internal_key:
730730
if internal_key in raw_metadata_df.columns:
731-
raise ValueError(f"Metadata contains both '{internal_key}' and "
732-
f"'{specified_name}' columns, which are both specified as "
733-
f"names for the {internal_key} field.")
731+
logger.warning(
732+
f"Metadata contains both '{internal_key}' and "
733+
f"'{specified_name}' columns; using '{internal_key}'.")
734+
return None
734735
if specified_name not in raw_metadata_df.columns:
735736
raise ValueError(
736737
f"Specified {internal_key} column '{specified_name}' not found in metadata.")

metameq/tests/test_metadata_extender/test__extend_metadata_from_full_flat_config.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -258,19 +258,24 @@ def test_col_name_not_found_raises(self):
258258
input_df, self.BASIC_FLAT_CONFIG, None,
259259
"nonexistent_col", None)
260260

261-
def test_col_name_conflict_raises(self):
262-
"""Test that both internal and alternate columns raises ValueError."""
261+
def test_col_name_conflict_warns_and_uses_internal_key(self):
262+
"""Test that both internal and alternate columns warns and uses internal key."""
263263
input_df = pandas.DataFrame({
264264
SAMPLE_NAME_KEY: ["sample1"],
265265
HOSTTYPE_SHORTHAND_KEY: ["human"],
266266
"host_type": ["human"],
267267
SAMPLETYPE_SHORTHAND_KEY: ["stool"]
268268
})
269269

270-
with self.assertRaisesRegex(ValueError, "contains both"):
271-
_extend_metadata_from_full_flat_config(
272-
input_df, self.BASIC_FLAT_CONFIG, None,
273-
"host_type", None)
270+
with self.assertLogs("metameq.src.metadata_extender", level="WARNING") as cm:
271+
result_df, validation_msgs_df, col_name_mapping = \
272+
_extend_metadata_from_full_flat_config(
273+
input_df, self.BASIC_FLAT_CONFIG, None,
274+
"host_type", None)
275+
276+
self.assertTrue(any("contains both" in msg for msg in cm.output))
277+
self.assertEqual(HOSTTYPE_SHORTHAND_KEY,
278+
col_name_mapping[HOSTTYPE_SHORTHAND_KEY])
274279

275280
def test_unknown_host_type(self):
276281
"""Test that unknown host type adds QC note."""

metameq/tests/test_metadata_extender/test_group_column_identification.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -551,20 +551,23 @@ def test__find_internal_col_source_name_param_key_equals_internal_key(self):
551551

552552
self.assertIsNone(result)
553553

554-
def test__find_internal_col_source_name_param_key_both_cols_raises(self):
555-
"""Test raises ValueError when both internal_key and param_key columns exist."""
554+
def test__find_internal_col_source_name_param_key_both_cols_warns(self):
555+
"""Test warns and returns None when both internal_key and param_key columns exist."""
556556
input_df = pandas.DataFrame({
557557
"host_type": ["human"],
558558
HOSTTYPE_SHORTHAND_KEY: ["human"]
559559
})
560560
config_dict = {}
561561

562-
with self.assertRaisesRegex(ValueError, "contains both"):
563-
_find_internal_col_source_name(
562+
with self.assertLogs("metameq.src.metadata_extender", level="WARNING") as cm:
563+
result = _find_internal_col_source_name(
564564
input_df, config_dict,
565565
"host_type", HOSTTYPE_SHORTHAND_KEY,
566566
HOSTTYPE_COL_OPTIONS_KEY)
567567

568+
self.assertIsNone(result)
569+
self.assertTrue(any("contains both" in msg for msg in cm.output))
570+
568571
def test__find_internal_col_source_name_param_key_not_in_df_raises(self):
569572
"""Test raises ValueError when param_key column not found in df."""
570573
input_df = pandas.DataFrame({
@@ -623,8 +626,8 @@ def test__find_internal_col_source_name_default_equals_internal_key(self):
623626

624627
self.assertIsNone(result)
625628

626-
def test__find_internal_col_source_name_default_both_cols_raises(self):
627-
"""Test raises ValueError when default column and internal_key both exist in df."""
629+
def test__find_internal_col_source_name_default_both_cols_warns(self):
630+
"""Test warns and returns None when default column and internal_key both exist in df."""
628631
input_df = pandas.DataFrame({
629632
"host_type": ["human"],
630633
HOSTTYPE_SHORTHAND_KEY: ["human"]
@@ -633,7 +636,10 @@ def test__find_internal_col_source_name_default_both_cols_raises(self):
633636
HOSTTYPE_COL_OPTIONS_KEY: ["host_type"]
634637
}
635638

636-
with self.assertRaisesRegex(ValueError, "contains both"):
637-
_find_internal_col_source_name(
639+
with self.assertLogs("metameq.src.metadata_extender", level="WARNING") as cm:
640+
result = _find_internal_col_source_name(
638641
input_df, config_dict,
639642
None, HOSTTYPE_SHORTHAND_KEY, HOSTTYPE_COL_OPTIONS_KEY)
643+
644+
self.assertIsNone(result)
645+
self.assertTrue(any("contains both" in msg for msg in cm.output))

metameq/tests/test_metadata_extender/test_group_populate_and_extend.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,8 +1006,8 @@ def test_extend_metadata_df_col_name_not_found_raises(self):
10061006
input_df, study_config, None, None, self.TEST_STDS_FP,
10071007
hosttype_col_name="nonexistent_col")
10081008

1009-
def test_extend_metadata_df_col_name_conflicts_raises(self):
1010-
"""Test that both internal and alternate columns existing raises ValueError."""
1009+
def test_extend_metadata_df_col_name_conflicts_warns_and_uses_internal_key(self):
1010+
"""Test that both internal and alternate columns warns and uses internal key."""
10111011
input_df = pandas.DataFrame({
10121012
SAMPLE_NAME_KEY: ["sample1"],
10131013
HOSTTYPE_SHORTHAND_KEY: ["human"],
@@ -1016,7 +1016,9 @@ def test_extend_metadata_df_col_name_conflicts_raises(self):
10161016
})
10171017
study_config = {}
10181018

1019-
with self.assertRaisesRegex(ValueError, "contains both"):
1020-
extend_metadata_df(
1019+
with self.assertLogs("metameq.src.metadata_extender", level="WARNING") as cm:
1020+
result_df, validation_msgs_df = extend_metadata_df(
10211021
input_df, study_config, None, None, self.TEST_STDS_FP,
10221022
hosttype_col_name="host_type")
1023+
1024+
self.assertTrue(any("contains both" in msg for msg in cm.output))

0 commit comments

Comments
 (0)