From a5c8d89b9c6e162459b693428f060e698d460069 Mon Sep 17 00:00:00 2001 From: Mahhheshh <100200105+Mahhheshh@users.noreply.github.com> Date: Sun, 22 Feb 2026 16:03:48 +0530 Subject: [PATCH 1/7] add extra error messages and fix response schema --- src/core/errors.py | 41 ++++++++++++++++++++++++++ src/routers/openml/qualities.py | 36 ++++++++++++++-------- tests/routers/openml/qualities_test.py | 3 +- 3 files changed, 66 insertions(+), 14 deletions(-) diff --git a/src/core/errors.py b/src/core/errors.py index 3f53364a..315aab6d 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -374,6 +374,47 @@ class ServiceNotFoundError(ProblemDetailError): _default_status_code = HTTPStatus.NOT_FOUND +# ============================================================================= +# Quality Errors +# ============================================================================= + + +class QualityUnknownDatasetError(ProblemDetailError): + """Raised when requesting qualities for an unknown or inaccessible dataset.""" + + uri = "https://openml.org/problems/quality-unknown-dataset" + title = "Unknown Dataset" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 361 + + +class QualityNoQualitiesError(ProblemDetailError): + """Raised when a dataset has no stored quality values.""" + + uri = "https://openml.org/problems/quality-no-qualities" + title = "No Qualities Found" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 362 + + +class QualityDatasetNotProcessedError(ProblemDetailError): + """Raised when quality values are requested before dataset processing finished.""" + + uri = "https://openml.org/problems/quality-dataset-not-processed" + title = "Dataset Not Processed" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 363 + + +class QualityDatasetProcessingError(ProblemDetailError): + """Raised when quality values are unavailable due to processing errors.""" + + uri = "https://openml.org/problems/quality-dataset-processing-error" + title = "Dataset Processed With Error" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 364 + + # ============================================================================= # Internal Errors # ============================================================================= diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py index 0f40f848..bcac1eee 100644 --- a/src/routers/openml/qualities.py +++ b/src/routers/openml/qualities.py @@ -7,7 +7,12 @@ import database.datasets import database.qualities from core.access import _user_has_access -from core.errors import DatasetNotFoundError +from core.errors import ( + DatasetNotFoundError, + QualityDatasetNotProcessedError, + QualityDatasetProcessingError, + QualityNoQualitiesError, +) from database.users import User from routers.dependencies import expdb_connection, fetch_user from schemas.datasets.openml import Quality @@ -37,17 +42,24 @@ async def get_qualities( if not dataset or not await _user_has_access(dataset, user): # Backwards compatibility: PHP API returns 412 with code 113 msg = f"Dataset with id {dataset_id} not found." - no_data_file = 113 raise DatasetNotFoundError( msg, - code=no_data_file, + code=113, status_code=HTTPStatus.PRECONDITION_FAILED, - ) - return await database.qualities.get_for_dataset(dataset_id, expdb) - # The PHP API provided (sometime) helpful error messages - # if not qualities: - # check if dataset exists: error 360 - # check if user has access: error 361 - # check if there is a data processed entry and forward the error: 364 - # if nothing in process table: 363 - # otherwise: error 362 + ) from None + + processing = await database.datasets.get_latest_processing_update(dataset_id, expdb) + if processing is None: + msg = f"Dataset not processed yet for dataset {dataset_id}." + raise QualityDatasetNotProcessedError(msg) + + if processing.error: + msg = f"Dataset processed with error for dataset {dataset_id}." + raise QualityDatasetProcessingError(msg) + + qualities = await database.qualities.get_for_dataset(dataset_id, expdb) + if not qualities: + msg = f"No qualities found for dataset {dataset_id}." + raise QualityNoQualitiesError(msg) + + return qualities diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py index a1360cfc..673ea404 100644 --- a/tests/routers/openml/qualities_test.py +++ b/tests/routers/openml/qualities_test.py @@ -317,9 +317,8 @@ async def test_get_quality_identical_error( py_api: httpx.AsyncClient, php_api: httpx.AsyncClient, ) -> None: - if data_id in [55, 56, 59]: - pytest.skip("Detailed error for code 364 (failed processing) not yet supported.") if data_id in [116]: # noqa: FURB171 + # skipping 116 is not valid case for 362 pytest.skip("Detailed error for code 362 (no qualities) not yet supported.") python_response, php_response = await asyncio.gather( py_api.get(f"/datasets/qualities/{data_id}"), From 6c222279e2fca56274e2aab609f58f341abd4627 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 23 Mar 2026 14:10:43 +0100 Subject: [PATCH 2/7] Remove unused error class --- src/core/errors.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/core/errors.py b/src/core/errors.py index 315aab6d..48af54c3 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -379,15 +379,6 @@ class ServiceNotFoundError(ProblemDetailError): # ============================================================================= -class QualityUnknownDatasetError(ProblemDetailError): - """Raised when requesting qualities for an unknown or inaccessible dataset.""" - - uri = "https://openml.org/problems/quality-unknown-dataset" - title = "Unknown Dataset" - _default_status_code = HTTPStatus.PRECONDITION_FAILED - _default_code = 361 - - class QualityNoQualitiesError(ProblemDetailError): """Raised when a dataset has no stored quality values.""" From 59f39ae5e499b86aa098892f094456d2bbb67487 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 23 Mar 2026 14:11:41 +0100 Subject: [PATCH 3/7] Start unify the test function for now to ease migration tests --- src/routers/openml/qualities.py | 6 +-- tests/routers/openml/qualities_test.py | 65 +++++++++++++++----------- 2 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py index bcac1eee..54056d3c 100644 --- a/src/routers/openml/qualities.py +++ b/src/routers/openml/qualities.py @@ -44,8 +44,7 @@ async def get_qualities( msg = f"Dataset with id {dataset_id} not found." raise DatasetNotFoundError( msg, - code=113, - status_code=HTTPStatus.PRECONDITION_FAILED, + code=361, ) from None processing = await database.datasets.get_latest_processing_update(dataset_id, expdb) @@ -54,8 +53,7 @@ async def get_qualities( raise QualityDatasetNotProcessedError(msg) if processing.error: - msg = f"Dataset processed with error for dataset {dataset_id}." - raise QualityDatasetProcessingError(msg) + raise QualityDatasetProcessingError(processing.error.strip()) qualities = await database.qualities.get_for_dataset(dataset_id, expdb) if not qualities: diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py index 673ea404..b56d5310 100644 --- a/tests/routers/openml/qualities_test.py +++ b/tests/routers/openml/qualities_test.py @@ -1,5 +1,6 @@ import asyncio from http import HTTPStatus +import re import deepdiff import httpx @@ -287,7 +288,7 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None: @pytest.mark.parametrize( "data_id", - list(set(range(1, 132)) - {55, 56, 59, 116, 130}), + list(set(range(1, 133))), ) async def test_get_quality_identical( data_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient @@ -296,8 +297,21 @@ async def test_get_quality_identical( py_api.get(f"/datasets/qualities/{data_id}"), php_api.get(f"/data/qualities/{data_id}"), ) - assert python_response.status_code == php_response.status_code + if php_response.status_code == HTTPStatus.OK: + _assert_get_quality_success_equal(python_response, php_response) + return + + php_error_code = int(php_response.json()["error"]["code"]) + if php_error_code == 361: + _assert_get_quality_error_dataset_not_found(python_response, php_response) + elif php_error_code == 364: + _assert_get_quality_error_dataset_process_error(python_response, php_response) + else: + raise AssertionError(f"Dataset {data_id} response not under test:", php_response.json()) + +def _assert_get_quality_success_equal(python_response, php_response): + assert python_response.status_code == php_response.status_code expected = [ { "name": quality["name"], @@ -308,27 +322,26 @@ async def test_get_quality_identical( assert python_response.json() == expected -@pytest.mark.parametrize( - "data_id", - [55, 56, 59, 116, 130, 132], -) -async def test_get_quality_identical_error( - data_id: int, - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, -) -> None: - if data_id in [116]: # noqa: FURB171 - # skipping 116 is not valid case for 362 - pytest.skip("Detailed error for code 362 (no qualities) not yet supported.") - python_response, php_response = await asyncio.gather( - py_api.get(f"/datasets/qualities/{data_id}"), - php_api.get(f"/data/qualities/{data_id}"), - ) - assert python_response.status_code == php_response.status_code - # RFC 9457: Python API now returns problem+json format - assert python_response.headers["content-type"] == "application/problem+json" - error = python_response.json() - assert error["type"] == DatasetNotFoundError.uri - # Verify the error message matches the PHP API semantically - assert php_response.json()["error"]["message"] == "Unknown dataset" - assert error["detail"] == f"Dataset with id {data_id} not found." +def _assert_get_quality_error_dataset_not_found(python_response, php_response): + assert php_response.status_code == HTTPStatus.CONFLICT + assert python_response.status_code == HTTPStatus.NOT_FOUND + + php_error = php_response.json()["error"] + py_error = python_response.json() + + assert int(php_error["code"]) == py_error["code"] + assert php_error["message"] == "Unknown dataset" + assert re.match(r"Dataset with id \d+ not found.", py_error["detail"]) + + +def _assert_get_quality_error_dataset_process_error(python_response, php_response): + assert php_response.status_code == python_response.status_code + + php_error = php_response.json()["error"] + py_error = python_response.json() + + assert php_error["code"] == py_error["code"] + assert php_error["message"].title() == py_error["title"] + # The PHP can add some additional unnecessary escapes. + assert php_error["additional_information"][:30] == py_error["detail"][:30] + assert php_error["additional_information"][-30:] == py_error["detail"][-30:] From eeca93fd51aba5fa947e75e7c32cfac6a2d57ceb Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 23 Mar 2026 14:19:21 +0100 Subject: [PATCH 4/7] Add type hints --- src/routers/openml/qualities.py | 1 - tests/routers/openml/qualities_test.py | 27 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py index 54056d3c..075a29c2 100644 --- a/src/routers/openml/qualities.py +++ b/src/routers/openml/qualities.py @@ -1,4 +1,3 @@ -from http import HTTPStatus from typing import Annotated, Literal from fastapi import APIRouter, Depends diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py index b56d5310..7d6641ef 100644 --- a/tests/routers/openml/qualities_test.py +++ b/tests/routers/openml/qualities_test.py @@ -1,6 +1,6 @@ import asyncio -from http import HTTPStatus import re +from http import HTTPStatus import deepdiff import httpx @@ -8,8 +8,6 @@ from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncConnection -from core.errors import DatasetNotFoundError - async def _remove_quality_from_database(quality_name: str, expdb_test: AsyncConnection) -> None: await expdb_test.execute( @@ -302,15 +300,18 @@ async def test_get_quality_identical( return php_error_code = int(php_response.json()["error"]["code"]) - if php_error_code == 361: + if php_error_code == 361: # noqa: PLR2004 _assert_get_quality_error_dataset_not_found(python_response, php_response) - elif php_error_code == 364: + elif php_error_code == 364: # noqa: PLR2004 _assert_get_quality_error_dataset_process_error(python_response, php_response) else: - raise AssertionError(f"Dataset {data_id} response not under test:", php_response.json()) + msg = f"Dataset {data_id} response not under test:", php_response.json() + raise AssertionError(msg) -def _assert_get_quality_success_equal(python_response, php_response): +def _assert_get_quality_success_equal( + python_response: httpx.Response, php_response: httpx.Response +) -> None: assert python_response.status_code == php_response.status_code expected = [ { @@ -322,19 +323,23 @@ def _assert_get_quality_success_equal(python_response, php_response): assert python_response.json() == expected -def _assert_get_quality_error_dataset_not_found(python_response, php_response): - assert php_response.status_code == HTTPStatus.CONFLICT +def _assert_get_quality_error_dataset_not_found( + python_response: httpx.Response, php_response: httpx.Response +) -> None: + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED assert python_response.status_code == HTTPStatus.NOT_FOUND php_error = php_response.json()["error"] py_error = python_response.json() - assert int(php_error["code"]) == py_error["code"] + assert php_error["code"] == py_error["code"] assert php_error["message"] == "Unknown dataset" assert re.match(r"Dataset with id \d+ not found.", py_error["detail"]) -def _assert_get_quality_error_dataset_process_error(python_response, php_response): +def _assert_get_quality_error_dataset_process_error( + python_response: httpx.Response, php_response: httpx.Response +) -> None: assert php_response.status_code == python_response.status_code php_error = php_response.json()["error"] From 5c31d231487b6e03cc1dcf7a6f653b2c9880ac69 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 23 Mar 2026 14:25:29 +0100 Subject: [PATCH 5/7] Use the pre-existing Dataset error classes The used error class should in general be consistent for the cause of the error. --- src/core/errors.py | 20 +------------------- src/routers/openml/qualities.py | 12 ++++++------ tests/routers/openml/qualities_test.py | 3 ++- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/src/core/errors.py b/src/core/errors.py index 48af54c3..8469b9a3 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -379,7 +379,7 @@ class ServiceNotFoundError(ProblemDetailError): # ============================================================================= -class QualityNoQualitiesError(ProblemDetailError): +class NoQualitiesError(ProblemDetailError): """Raised when a dataset has no stored quality values.""" uri = "https://openml.org/problems/quality-no-qualities" @@ -388,24 +388,6 @@ class QualityNoQualitiesError(ProblemDetailError): _default_code = 362 -class QualityDatasetNotProcessedError(ProblemDetailError): - """Raised when quality values are requested before dataset processing finished.""" - - uri = "https://openml.org/problems/quality-dataset-not-processed" - title = "Dataset Not Processed" - _default_status_code = HTTPStatus.PRECONDITION_FAILED - _default_code = 363 - - -class QualityDatasetProcessingError(ProblemDetailError): - """Raised when quality values are unavailable due to processing errors.""" - - uri = "https://openml.org/problems/quality-dataset-processing-error" - title = "Dataset Processed With Error" - _default_status_code = HTTPStatus.PRECONDITION_FAILED - _default_code = 364 - - # ============================================================================= # Internal Errors # ============================================================================= diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py index 075a29c2..64fd7a0f 100644 --- a/src/routers/openml/qualities.py +++ b/src/routers/openml/qualities.py @@ -8,9 +8,9 @@ from core.access import _user_has_access from core.errors import ( DatasetNotFoundError, - QualityDatasetNotProcessedError, - QualityDatasetProcessingError, - QualityNoQualitiesError, + DatasetNotProcessedError, + DatasetProcessingError, + NoQualitiesError, ) from database.users import User from routers.dependencies import expdb_connection, fetch_user @@ -49,14 +49,14 @@ async def get_qualities( processing = await database.datasets.get_latest_processing_update(dataset_id, expdb) if processing is None: msg = f"Dataset not processed yet for dataset {dataset_id}." - raise QualityDatasetNotProcessedError(msg) + raise DatasetNotProcessedError(msg, code=363) if processing.error: - raise QualityDatasetProcessingError(processing.error.strip()) + raise DatasetProcessingError(processing.error.strip(), code=364) qualities = await database.qualities.get_for_dataset(dataset_id, expdb) if not qualities: msg = f"No qualities found for dataset {dataset_id}." - raise QualityNoQualitiesError(msg) + raise NoQualitiesError(msg) return qualities diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py index 7d6641ef..594bb464 100644 --- a/tests/routers/openml/qualities_test.py +++ b/tests/routers/openml/qualities_test.py @@ -346,7 +346,8 @@ def _assert_get_quality_error_dataset_process_error( py_error = python_response.json() assert php_error["code"] == py_error["code"] - assert php_error["message"].title() == py_error["title"] + assert php_error["message"] == "Dataset processed with error" + assert py_error["title"] == "Dataset Processing Error" # The PHP can add some additional unnecessary escapes. assert php_error["additional_information"][:30] == py_error["detail"][:30] assert php_error["additional_information"][-30:] == py_error["detail"][-30:] From a9dede1b6ca1e010164657d13a053e509ab4f129 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 23 Mar 2026 14:35:55 +0100 Subject: [PATCH 6/7] remove old misleading comment --- src/routers/openml/qualities.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py index 64fd7a0f..eff7081a 100644 --- a/src/routers/openml/qualities.py +++ b/src/routers/openml/qualities.py @@ -39,7 +39,6 @@ async def get_qualities( ) -> list[Quality]: dataset = await database.datasets.get(dataset_id, expdb) if not dataset or not await _user_has_access(dataset, user): - # Backwards compatibility: PHP API returns 412 with code 113 msg = f"Dataset with id {dataset_id} not found." raise DatasetNotFoundError( msg, @@ -52,7 +51,8 @@ async def get_qualities( raise DatasetNotProcessedError(msg, code=363) if processing.error: - raise DatasetProcessingError(processing.error.strip(), code=364) + msg = processing.error.strip() or "Error occurred during processing." + raise DatasetProcessingError(msg, code=364) qualities = await database.qualities.get_for_dataset(dataset_id, expdb) if not qualities: From 570f51f6940cb82a6c4207cb2799c3f5b720b0ed Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 23 Mar 2026 14:37:10 +0100 Subject: [PATCH 7/7] add dataset that for sure is missing --- tests/routers/openml/qualities_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py index 594bb464..a7825202 100644 --- a/tests/routers/openml/qualities_test.py +++ b/tests/routers/openml/qualities_test.py @@ -286,7 +286,7 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None: @pytest.mark.parametrize( "data_id", - list(set(range(1, 133))), + [*list(set(range(1, 133))), 9999999], ) async def test_get_quality_identical( data_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient