diff --git a/src/core/errors.py b/src/core/errors.py index 3f53364a..8469b9a3 100644 --- a/src/core/errors.py +++ b/src/core/errors.py @@ -374,6 +374,20 @@ class ServiceNotFoundError(ProblemDetailError): _default_status_code = HTTPStatus.NOT_FOUND +# ============================================================================= +# Quality Errors +# ============================================================================= + + +class NoQualitiesError(ProblemDetailError): + """Raised when a dataset has no stored quality values.""" + + uri = "https://openml.org/problems/quality-no-qualities" + title = "No Qualities Found" + _default_status_code = HTTPStatus.PRECONDITION_FAILED + _default_code = 362 + + # ============================================================================= # Internal Errors # ============================================================================= diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py index 0f40f848..eff7081a 100644 --- a/src/routers/openml/qualities.py +++ b/src/routers/openml/qualities.py @@ -1,4 +1,3 @@ -from http import HTTPStatus from typing import Annotated, Literal from fastapi import APIRouter, Depends @@ -7,7 +6,12 @@ import database.datasets import database.qualities from core.access import _user_has_access -from core.errors import DatasetNotFoundError +from core.errors import ( + DatasetNotFoundError, + DatasetNotProcessedError, + DatasetProcessingError, + NoQualitiesError, +) from database.users import User from routers.dependencies import expdb_connection, fetch_user from schemas.datasets.openml import Quality @@ -35,19 +39,24 @@ async def get_qualities( ) -> list[Quality]: dataset = await database.datasets.get(dataset_id, expdb) if not dataset or not await _user_has_access(dataset, user): - # Backwards compatibility: PHP API returns 412 with code 113 msg = f"Dataset with id {dataset_id} not found." - no_data_file = 113 raise DatasetNotFoundError( msg, - code=no_data_file, - status_code=HTTPStatus.PRECONDITION_FAILED, - ) - return await database.qualities.get_for_dataset(dataset_id, expdb) - # The PHP API provided (sometime) helpful error messages - # if not qualities: - # check if dataset exists: error 360 - # check if user has access: error 361 - # check if there is a data processed entry and forward the error: 364 - # if nothing in process table: 363 - # otherwise: error 362 + code=361, + ) from None + + processing = await database.datasets.get_latest_processing_update(dataset_id, expdb) + if processing is None: + msg = f"Dataset not processed yet for dataset {dataset_id}." + raise DatasetNotProcessedError(msg, code=363) + + if processing.error: + msg = processing.error.strip() or "Error occurred during processing." + raise DatasetProcessingError(msg, code=364) + + qualities = await database.qualities.get_for_dataset(dataset_id, expdb) + if not qualities: + msg = f"No qualities found for dataset {dataset_id}." + raise NoQualitiesError(msg) + + return qualities diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py index a1360cfc..a7825202 100644 --- a/tests/routers/openml/qualities_test.py +++ b/tests/routers/openml/qualities_test.py @@ -1,4 +1,5 @@ import asyncio +import re from http import HTTPStatus import deepdiff @@ -7,8 +8,6 @@ from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncConnection -from core.errors import DatasetNotFoundError - async def _remove_quality_from_database(quality_name: str, expdb_test: AsyncConnection) -> None: await expdb_test.execute( @@ -287,7 +286,7 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None: @pytest.mark.parametrize( "data_id", - list(set(range(1, 132)) - {55, 56, 59, 116, 130}), + [*list(set(range(1, 133))), 9999999], ) async def test_get_quality_identical( data_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient @@ -296,8 +295,24 @@ async def test_get_quality_identical( py_api.get(f"/datasets/qualities/{data_id}"), php_api.get(f"/data/qualities/{data_id}"), ) - assert python_response.status_code == php_response.status_code + if php_response.status_code == HTTPStatus.OK: + _assert_get_quality_success_equal(python_response, php_response) + return + + php_error_code = int(php_response.json()["error"]["code"]) + if php_error_code == 361: # noqa: PLR2004 + _assert_get_quality_error_dataset_not_found(python_response, php_response) + elif php_error_code == 364: # noqa: PLR2004 + _assert_get_quality_error_dataset_process_error(python_response, php_response) + else: + msg = f"Dataset {data_id} response not under test:", php_response.json() + raise AssertionError(msg) + +def _assert_get_quality_success_equal( + python_response: httpx.Response, php_response: httpx.Response +) -> None: + assert python_response.status_code == php_response.status_code expected = [ { "name": quality["name"], @@ -308,28 +323,31 @@ async def test_get_quality_identical( assert python_response.json() == expected -@pytest.mark.parametrize( - "data_id", - [55, 56, 59, 116, 130, 132], -) -async def test_get_quality_identical_error( - data_id: int, - py_api: httpx.AsyncClient, - php_api: httpx.AsyncClient, +def _assert_get_quality_error_dataset_not_found( + python_response: httpx.Response, php_response: httpx.Response ) -> None: - if data_id in [55, 56, 59]: - pytest.skip("Detailed error for code 364 (failed processing) not yet supported.") - if data_id in [116]: # noqa: FURB171 - pytest.skip("Detailed error for code 362 (no qualities) not yet supported.") - python_response, php_response = await asyncio.gather( - py_api.get(f"/datasets/qualities/{data_id}"), - php_api.get(f"/data/qualities/{data_id}"), - ) - assert python_response.status_code == php_response.status_code - # RFC 9457: Python API now returns problem+json format - assert python_response.headers["content-type"] == "application/problem+json" - error = python_response.json() - assert error["type"] == DatasetNotFoundError.uri - # Verify the error message matches the PHP API semantically - assert php_response.json()["error"]["message"] == "Unknown dataset" - assert error["detail"] == f"Dataset with id {data_id} not found." + assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED + assert python_response.status_code == HTTPStatus.NOT_FOUND + + php_error = php_response.json()["error"] + py_error = python_response.json() + + assert php_error["code"] == py_error["code"] + assert php_error["message"] == "Unknown dataset" + assert re.match(r"Dataset with id \d+ not found.", py_error["detail"]) + + +def _assert_get_quality_error_dataset_process_error( + python_response: httpx.Response, php_response: httpx.Response +) -> None: + assert php_response.status_code == python_response.status_code + + php_error = php_response.json()["error"] + py_error = python_response.json() + + assert php_error["code"] == py_error["code"] + assert php_error["message"] == "Dataset processed with error" + assert py_error["title"] == "Dataset Processing Error" + # The PHP can add some additional unnecessary escapes. + assert php_error["additional_information"][:30] == py_error["detail"][:30] + assert php_error["additional_information"][-30:] == py_error["detail"][-30:]