From a5c8d89b9c6e162459b693428f060e698d460069 Mon Sep 17 00:00:00 2001
From: Mahhheshh <100200105+Mahhheshh@users.noreply.github.com>
Date: Sun, 22 Feb 2026 16:03:48 +0530
Subject: [PATCH 1/7] add extra error messages and fix response schema
---
src/core/errors.py | 41 ++++++++++++++++++++++++++
src/routers/openml/qualities.py | 36 ++++++++++++++--------
tests/routers/openml/qualities_test.py | 3 +-
3 files changed, 66 insertions(+), 14 deletions(-)
diff --git a/src/core/errors.py b/src/core/errors.py
index 3f53364a..315aab6d 100644
--- a/src/core/errors.py
+++ b/src/core/errors.py
@@ -374,6 +374,47 @@ class ServiceNotFoundError(ProblemDetailError):
_default_status_code = HTTPStatus.NOT_FOUND
+# =============================================================================
+# Quality Errors
+# =============================================================================
+
+
+class QualityUnknownDatasetError(ProblemDetailError):
+ """Raised when requesting qualities for an unknown or inaccessible dataset."""
+
+ uri = "https://openml.org/problems/quality-unknown-dataset"
+ title = "Unknown Dataset"
+ _default_status_code = HTTPStatus.PRECONDITION_FAILED
+ _default_code = 361
+
+
+class QualityNoQualitiesError(ProblemDetailError):
+ """Raised when a dataset has no stored quality values."""
+
+ uri = "https://openml.org/problems/quality-no-qualities"
+ title = "No Qualities Found"
+ _default_status_code = HTTPStatus.PRECONDITION_FAILED
+ _default_code = 362
+
+
+class QualityDatasetNotProcessedError(ProblemDetailError):
+ """Raised when quality values are requested before dataset processing finished."""
+
+ uri = "https://openml.org/problems/quality-dataset-not-processed"
+ title = "Dataset Not Processed"
+ _default_status_code = HTTPStatus.PRECONDITION_FAILED
+ _default_code = 363
+
+
+class QualityDatasetProcessingError(ProblemDetailError):
+ """Raised when quality values are unavailable due to processing errors."""
+
+ uri = "https://openml.org/problems/quality-dataset-processing-error"
+ title = "Dataset Processed With Error"
+ _default_status_code = HTTPStatus.PRECONDITION_FAILED
+ _default_code = 364
+
+
# =============================================================================
# Internal Errors
# =============================================================================
diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py
index 0f40f848..bcac1eee 100644
--- a/src/routers/openml/qualities.py
+++ b/src/routers/openml/qualities.py
@@ -7,7 +7,12 @@
import database.datasets
import database.qualities
from core.access import _user_has_access
-from core.errors import DatasetNotFoundError
+from core.errors import (
+ DatasetNotFoundError,
+ QualityDatasetNotProcessedError,
+ QualityDatasetProcessingError,
+ QualityNoQualitiesError,
+)
from database.users import User
from routers.dependencies import expdb_connection, fetch_user
from schemas.datasets.openml import Quality
@@ -37,17 +42,24 @@ async def get_qualities(
if not dataset or not await _user_has_access(dataset, user):
# Backwards compatibility: PHP API returns 412 with code 113
msg = f"Dataset with id {dataset_id} not found."
- no_data_file = 113
raise DatasetNotFoundError(
msg,
- code=no_data_file,
+ code=113,
status_code=HTTPStatus.PRECONDITION_FAILED,
- )
- return await database.qualities.get_for_dataset(dataset_id, expdb)
- # The PHP API provided (sometime) helpful error messages
- # if not qualities:
- # check if dataset exists: error 360
- # check if user has access: error 361
- # check if there is a data processed entry and forward the error: 364
- # if nothing in process table: 363
- # otherwise: error 362
+ ) from None
+
+ processing = await database.datasets.get_latest_processing_update(dataset_id, expdb)
+ if processing is None:
+ msg = f"Dataset not processed yet for dataset {dataset_id}."
+ raise QualityDatasetNotProcessedError(msg)
+
+ if processing.error:
+ msg = f"Dataset processed with error for dataset {dataset_id}."
+ raise QualityDatasetProcessingError(msg)
+
+ qualities = await database.qualities.get_for_dataset(dataset_id, expdb)
+ if not qualities:
+ msg = f"No qualities found for dataset {dataset_id}."
+ raise QualityNoQualitiesError(msg)
+
+ return qualities
diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py
index a1360cfc..673ea404 100644
--- a/tests/routers/openml/qualities_test.py
+++ b/tests/routers/openml/qualities_test.py
@@ -317,9 +317,8 @@ async def test_get_quality_identical_error(
py_api: httpx.AsyncClient,
php_api: httpx.AsyncClient,
) -> None:
- if data_id in [55, 56, 59]:
- pytest.skip("Detailed error for code 364 (failed processing) not yet supported.")
if data_id in [116]: # noqa: FURB171
+ # skipping 116 is not valid case for 362
pytest.skip("Detailed error for code 362 (no qualities) not yet supported.")
python_response, php_response = await asyncio.gather(
py_api.get(f"/datasets/qualities/{data_id}"),
From 6c222279e2fca56274e2aab609f58f341abd4627 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 23 Mar 2026 14:10:43 +0100
Subject: [PATCH 2/7] Remove unused error class
---
src/core/errors.py | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/src/core/errors.py b/src/core/errors.py
index 315aab6d..48af54c3 100644
--- a/src/core/errors.py
+++ b/src/core/errors.py
@@ -379,15 +379,6 @@ class ServiceNotFoundError(ProblemDetailError):
# =============================================================================
-class QualityUnknownDatasetError(ProblemDetailError):
- """Raised when requesting qualities for an unknown or inaccessible dataset."""
-
- uri = "https://openml.org/problems/quality-unknown-dataset"
- title = "Unknown Dataset"
- _default_status_code = HTTPStatus.PRECONDITION_FAILED
- _default_code = 361
-
-
class QualityNoQualitiesError(ProblemDetailError):
"""Raised when a dataset has no stored quality values."""
From 59f39ae5e499b86aa098892f094456d2bbb67487 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 23 Mar 2026 14:11:41 +0100
Subject: [PATCH 3/7] Start unify the test function for now to ease migration
tests
---
src/routers/openml/qualities.py | 6 +--
tests/routers/openml/qualities_test.py | 65 +++++++++++++++-----------
2 files changed, 41 insertions(+), 30 deletions(-)
diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py
index bcac1eee..54056d3c 100644
--- a/src/routers/openml/qualities.py
+++ b/src/routers/openml/qualities.py
@@ -44,8 +44,7 @@ async def get_qualities(
msg = f"Dataset with id {dataset_id} not found."
raise DatasetNotFoundError(
msg,
- code=113,
- status_code=HTTPStatus.PRECONDITION_FAILED,
+ code=361,
) from None
processing = await database.datasets.get_latest_processing_update(dataset_id, expdb)
@@ -54,8 +53,7 @@ async def get_qualities(
raise QualityDatasetNotProcessedError(msg)
if processing.error:
- msg = f"Dataset processed with error for dataset {dataset_id}."
- raise QualityDatasetProcessingError(msg)
+ raise QualityDatasetProcessingError(processing.error.strip())
qualities = await database.qualities.get_for_dataset(dataset_id, expdb)
if not qualities:
diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py
index 673ea404..b56d5310 100644
--- a/tests/routers/openml/qualities_test.py
+++ b/tests/routers/openml/qualities_test.py
@@ -1,5 +1,6 @@
import asyncio
from http import HTTPStatus
+import re
import deepdiff
import httpx
@@ -287,7 +288,7 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None:
@pytest.mark.parametrize(
"data_id",
- list(set(range(1, 132)) - {55, 56, 59, 116, 130}),
+ list(set(range(1, 133))),
)
async def test_get_quality_identical(
data_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
@@ -296,8 +297,21 @@ async def test_get_quality_identical(
py_api.get(f"/datasets/qualities/{data_id}"),
php_api.get(f"/data/qualities/{data_id}"),
)
- assert python_response.status_code == php_response.status_code
+ if php_response.status_code == HTTPStatus.OK:
+ _assert_get_quality_success_equal(python_response, php_response)
+ return
+
+ php_error_code = int(php_response.json()["error"]["code"])
+ if php_error_code == 361:
+ _assert_get_quality_error_dataset_not_found(python_response, php_response)
+ elif php_error_code == 364:
+ _assert_get_quality_error_dataset_process_error(python_response, php_response)
+ else:
+ raise AssertionError(f"Dataset {data_id} response not under test:", php_response.json())
+
+def _assert_get_quality_success_equal(python_response, php_response):
+ assert python_response.status_code == php_response.status_code
expected = [
{
"name": quality["name"],
@@ -308,27 +322,26 @@ async def test_get_quality_identical(
assert python_response.json() == expected
-@pytest.mark.parametrize(
- "data_id",
- [55, 56, 59, 116, 130, 132],
-)
-async def test_get_quality_identical_error(
- data_id: int,
- py_api: httpx.AsyncClient,
- php_api: httpx.AsyncClient,
-) -> None:
- if data_id in [116]: # noqa: FURB171
- # skipping 116 is not valid case for 362
- pytest.skip("Detailed error for code 362 (no qualities) not yet supported.")
- python_response, php_response = await asyncio.gather(
- py_api.get(f"/datasets/qualities/{data_id}"),
- php_api.get(f"/data/qualities/{data_id}"),
- )
- assert python_response.status_code == php_response.status_code
- # RFC 9457: Python API now returns problem+json format
- assert python_response.headers["content-type"] == "application/problem+json"
- error = python_response.json()
- assert error["type"] == DatasetNotFoundError.uri
- # Verify the error message matches the PHP API semantically
- assert php_response.json()["error"]["message"] == "Unknown dataset"
- assert error["detail"] == f"Dataset with id {data_id} not found."
+def _assert_get_quality_error_dataset_not_found(python_response, php_response):
+ assert php_response.status_code == HTTPStatus.CONFLICT
+ assert python_response.status_code == HTTPStatus.NOT_FOUND
+
+ php_error = php_response.json()["error"]
+ py_error = python_response.json()
+
+ assert int(php_error["code"]) == py_error["code"]
+ assert php_error["message"] == "Unknown dataset"
+ assert re.match(r"Dataset with id \d+ not found.", py_error["detail"])
+
+
+def _assert_get_quality_error_dataset_process_error(python_response, php_response):
+ assert php_response.status_code == python_response.status_code
+
+ php_error = php_response.json()["error"]
+ py_error = python_response.json()
+
+ assert php_error["code"] == py_error["code"]
+ assert php_error["message"].title() == py_error["title"]
+ # The PHP can add some additional unnecessary escapes.
+ assert php_error["additional_information"][:30] == py_error["detail"][:30]
+ assert php_error["additional_information"][-30:] == py_error["detail"][-30:]
From eeca93fd51aba5fa947e75e7c32cfac6a2d57ceb Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 23 Mar 2026 14:19:21 +0100
Subject: [PATCH 4/7] Add type hints
---
src/routers/openml/qualities.py | 1 -
tests/routers/openml/qualities_test.py | 27 +++++++++++++++-----------
2 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py
index 54056d3c..075a29c2 100644
--- a/src/routers/openml/qualities.py
+++ b/src/routers/openml/qualities.py
@@ -1,4 +1,3 @@
-from http import HTTPStatus
from typing import Annotated, Literal
from fastapi import APIRouter, Depends
diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py
index b56d5310..7d6641ef 100644
--- a/tests/routers/openml/qualities_test.py
+++ b/tests/routers/openml/qualities_test.py
@@ -1,6 +1,6 @@
import asyncio
-from http import HTTPStatus
import re
+from http import HTTPStatus
import deepdiff
import httpx
@@ -8,8 +8,6 @@
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncConnection
-from core.errors import DatasetNotFoundError
-
async def _remove_quality_from_database(quality_name: str, expdb_test: AsyncConnection) -> None:
await expdb_test.execute(
@@ -302,15 +300,18 @@ async def test_get_quality_identical(
return
php_error_code = int(php_response.json()["error"]["code"])
- if php_error_code == 361:
+ if php_error_code == 361: # noqa: PLR2004
_assert_get_quality_error_dataset_not_found(python_response, php_response)
- elif php_error_code == 364:
+ elif php_error_code == 364: # noqa: PLR2004
_assert_get_quality_error_dataset_process_error(python_response, php_response)
else:
- raise AssertionError(f"Dataset {data_id} response not under test:", php_response.json())
+ msg = f"Dataset {data_id} response not under test:", php_response.json()
+ raise AssertionError(msg)
-def _assert_get_quality_success_equal(python_response, php_response):
+def _assert_get_quality_success_equal(
+ python_response: httpx.Response, php_response: httpx.Response
+) -> None:
assert python_response.status_code == php_response.status_code
expected = [
{
@@ -322,19 +323,23 @@ def _assert_get_quality_success_equal(python_response, php_response):
assert python_response.json() == expected
-def _assert_get_quality_error_dataset_not_found(python_response, php_response):
- assert php_response.status_code == HTTPStatus.CONFLICT
+def _assert_get_quality_error_dataset_not_found(
+ python_response: httpx.Response, php_response: httpx.Response
+) -> None:
+ assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
assert python_response.status_code == HTTPStatus.NOT_FOUND
php_error = php_response.json()["error"]
py_error = python_response.json()
- assert int(php_error["code"]) == py_error["code"]
+ assert php_error["code"] == py_error["code"]
assert php_error["message"] == "Unknown dataset"
assert re.match(r"Dataset with id \d+ not found.", py_error["detail"])
-def _assert_get_quality_error_dataset_process_error(python_response, php_response):
+def _assert_get_quality_error_dataset_process_error(
+ python_response: httpx.Response, php_response: httpx.Response
+) -> None:
assert php_response.status_code == python_response.status_code
php_error = php_response.json()["error"]
From 5c31d231487b6e03cc1dcf7a6f653b2c9880ac69 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 23 Mar 2026 14:25:29 +0100
Subject: [PATCH 5/7] Use the pre-existing Dataset error classes
The used error class should in general be consistent for the cause
of the error.
---
src/core/errors.py | 20 +-------------------
src/routers/openml/qualities.py | 12 ++++++------
tests/routers/openml/qualities_test.py | 3 ++-
3 files changed, 9 insertions(+), 26 deletions(-)
diff --git a/src/core/errors.py b/src/core/errors.py
index 48af54c3..8469b9a3 100644
--- a/src/core/errors.py
+++ b/src/core/errors.py
@@ -379,7 +379,7 @@ class ServiceNotFoundError(ProblemDetailError):
# =============================================================================
-class QualityNoQualitiesError(ProblemDetailError):
+class NoQualitiesError(ProblemDetailError):
"""Raised when a dataset has no stored quality values."""
uri = "https://openml.org/problems/quality-no-qualities"
@@ -388,24 +388,6 @@ class QualityNoQualitiesError(ProblemDetailError):
_default_code = 362
-class QualityDatasetNotProcessedError(ProblemDetailError):
- """Raised when quality values are requested before dataset processing finished."""
-
- uri = "https://openml.org/problems/quality-dataset-not-processed"
- title = "Dataset Not Processed"
- _default_status_code = HTTPStatus.PRECONDITION_FAILED
- _default_code = 363
-
-
-class QualityDatasetProcessingError(ProblemDetailError):
- """Raised when quality values are unavailable due to processing errors."""
-
- uri = "https://openml.org/problems/quality-dataset-processing-error"
- title = "Dataset Processed With Error"
- _default_status_code = HTTPStatus.PRECONDITION_FAILED
- _default_code = 364
-
-
# =============================================================================
# Internal Errors
# =============================================================================
diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py
index 075a29c2..64fd7a0f 100644
--- a/src/routers/openml/qualities.py
+++ b/src/routers/openml/qualities.py
@@ -8,9 +8,9 @@
from core.access import _user_has_access
from core.errors import (
DatasetNotFoundError,
- QualityDatasetNotProcessedError,
- QualityDatasetProcessingError,
- QualityNoQualitiesError,
+ DatasetNotProcessedError,
+ DatasetProcessingError,
+ NoQualitiesError,
)
from database.users import User
from routers.dependencies import expdb_connection, fetch_user
@@ -49,14 +49,14 @@ async def get_qualities(
processing = await database.datasets.get_latest_processing_update(dataset_id, expdb)
if processing is None:
msg = f"Dataset not processed yet for dataset {dataset_id}."
- raise QualityDatasetNotProcessedError(msg)
+ raise DatasetNotProcessedError(msg, code=363)
if processing.error:
- raise QualityDatasetProcessingError(processing.error.strip())
+ raise DatasetProcessingError(processing.error.strip(), code=364)
qualities = await database.qualities.get_for_dataset(dataset_id, expdb)
if not qualities:
msg = f"No qualities found for dataset {dataset_id}."
- raise QualityNoQualitiesError(msg)
+ raise NoQualitiesError(msg)
return qualities
diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py
index 7d6641ef..594bb464 100644
--- a/tests/routers/openml/qualities_test.py
+++ b/tests/routers/openml/qualities_test.py
@@ -346,7 +346,8 @@ def _assert_get_quality_error_dataset_process_error(
py_error = python_response.json()
assert php_error["code"] == py_error["code"]
- assert php_error["message"].title() == py_error["title"]
+ assert php_error["message"] == "Dataset processed with error"
+ assert py_error["title"] == "Dataset Processing Error"
# The PHP can add some additional unnecessary escapes.
assert php_error["additional_information"][:30] == py_error["detail"][:30]
assert php_error["additional_information"][-30:] == py_error["detail"][-30:]
From a9dede1b6ca1e010164657d13a053e509ab4f129 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 23 Mar 2026 14:35:55 +0100
Subject: [PATCH 6/7] remove old misleading comment
---
src/routers/openml/qualities.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/routers/openml/qualities.py b/src/routers/openml/qualities.py
index 64fd7a0f..eff7081a 100644
--- a/src/routers/openml/qualities.py
+++ b/src/routers/openml/qualities.py
@@ -39,7 +39,6 @@ async def get_qualities(
) -> list[Quality]:
dataset = await database.datasets.get(dataset_id, expdb)
if not dataset or not await _user_has_access(dataset, user):
- # Backwards compatibility: PHP API returns 412 with code 113
msg = f"Dataset with id {dataset_id} not found."
raise DatasetNotFoundError(
msg,
@@ -52,7 +51,8 @@ async def get_qualities(
raise DatasetNotProcessedError(msg, code=363)
if processing.error:
- raise DatasetProcessingError(processing.error.strip(), code=364)
+ msg = processing.error.strip() or "Error occurred during processing."
+ raise DatasetProcessingError(msg, code=364)
qualities = await database.qualities.get_for_dataset(dataset_id, expdb)
if not qualities:
From 570f51f6940cb82a6c4207cb2799c3f5b720b0ed Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Mon, 23 Mar 2026 14:37:10 +0100
Subject: [PATCH 7/7] add dataset that for sure is missing
---
tests/routers/openml/qualities_test.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/routers/openml/qualities_test.py b/tests/routers/openml/qualities_test.py
index 594bb464..a7825202 100644
--- a/tests/routers/openml/qualities_test.py
+++ b/tests/routers/openml/qualities_test.py
@@ -286,7 +286,7 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None:
@pytest.mark.parametrize(
"data_id",
- list(set(range(1, 133))),
+ [*list(set(range(1, 133))), 9999999],
)
async def test_get_quality_identical(
data_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient