From 922ab949a40153bc6e896852adea7d0d1e93ad5e Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Thu, 29 Aug 2024 16:36:09 +1000 Subject: [PATCH 1/5] Debug statistics record problem. --- src/mavedb/routers/statistics.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/mavedb/routers/statistics.py b/src/mavedb/routers/statistics.py index 53a3e8fc0..61be6a5ca 100644 --- a/src/mavedb/routers/statistics.py +++ b/src/mavedb/routers/statistics.py @@ -260,6 +260,7 @@ def _record_from_field_and_model( model_created_by_field = getattr(queried_model, "created_by_id") model_published_data_field = getattr(queried_model, "published_date") if field is RecordFields.createdBy: + print("111") query = ( select(User.username, func.count(User.id)) .join(queried_model, model_created_by_field == User.id) @@ -269,6 +270,7 @@ def _record_from_field_and_model( return db.execute(query).all() else: + print("2222") # All assc table identifiers which are linked to a published model. queried_assc_table = association_tables[model][field] published_score_sets_statement = ( @@ -332,6 +334,11 @@ def record_object_statistics( Model names and fields should be members of the Enum classes defined above. Providing an invalid model name or model field will yield a 422 Unprocessable Entity error with details about valid enum values. """ + # Validation to ensure 'keywords' is only used with 'experiment'. + if model == RecordNames.scoreSet and field == RecordFields.keywords: + raise HTTPException(status_code=422, + detail="The 'keywords' field can only be used with the 'experiment' model.") + count_data = _record_from_field_and_model(db, model, field) return {field_val: count for field_val, count in count_data if field_val is not None} From b05b4da8d2a2275825f1340fc97f834973ade9b4 Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Fri, 30 Aug 2024 09:44:56 +1000 Subject: [PATCH 2/5] Remove print. --- src/mavedb/routers/statistics.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mavedb/routers/statistics.py b/src/mavedb/routers/statistics.py index 61be6a5ca..29445fe2d 100644 --- a/src/mavedb/routers/statistics.py +++ b/src/mavedb/routers/statistics.py @@ -260,7 +260,6 @@ def _record_from_field_and_model( model_created_by_field = getattr(queried_model, "created_by_id") model_published_data_field = getattr(queried_model, "published_date") if field is RecordFields.createdBy: - print("111") query = ( select(User.username, func.count(User.id)) .join(queried_model, model_created_by_field == User.id) @@ -270,7 +269,6 @@ def _record_from_field_and_model( return db.execute(query).all() else: - print("2222") # All assc table identifiers which are linked to a published model. queried_assc_table = association_tables[model][field] published_score_sets_statement = ( From 80b23a43214d2fdae721ef7f396048e1e499dcbd Mon Sep 17 00:00:00 2001 From: EstelleDa Date: Tue, 10 Sep 2024 17:10:52 +1000 Subject: [PATCH 3/5] Debug has_permission function. --- src/mavedb/lib/permissions.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/mavedb/lib/permissions.py b/src/mavedb/lib/permissions.py index 972b91be1..1b2f966c8 100644 --- a/src/mavedb/lib/permissions.py +++ b/src/mavedb/lib/permissions.py @@ -95,8 +95,10 @@ def has_permission(user_data: Optional[UserData], item: Base, action: Action) -> elif private: # Do not acknowledge the existence of a private entity. return PermissionResponse(False, 404, f"experiment set with URN '{item.urn}' not found") + elif user_data is None or user_data.user is None: + return PermissionResponse(False, 401, f"insufficient permissions for URN '{item.urn}'") else: - return PermissionResponse(False) + return PermissionResponse(False, 403, f"insufficient permissions for URN '{item.urn}'") elif action == Action.UPDATE: if user_may_edit: return PermissionResponse(True) @@ -106,8 +108,10 @@ def has_permission(user_data: Optional[UserData], item: Base, action: Action) -> elif private: # Do not acknowledge the existence of a private entity. return PermissionResponse(False, 404, f"experiment set with URN '{item.urn}' not found") + elif user_data is None or user_data.user is None: + return PermissionResponse(False, 401, f"insufficient permissions for URN '{item.urn}'") else: - return PermissionResponse(False) + return PermissionResponse(False, 403, f"insufficient permissions for URN '{item.urn}'") elif action == Action.DELETE: # Owner may only delete an experiment set if it has not already been published. if user_may_edit: @@ -143,8 +147,10 @@ def has_permission(user_data: Optional[UserData], item: Base, action: Action) -> elif private: # Do not acknowledge the existence of a private entity. return PermissionResponse(False, 404, f"experiment with URN '{item.urn}' not found") + elif user_data is None or user_data.user is None: + return PermissionResponse(False, 401, f"insufficient permissions for URN '{item.urn}'") else: - return PermissionResponse(False) + return PermissionResponse(False, 403, f"insufficient permissions for URN '{item.urn}'") elif action == Action.UPDATE: if user_may_edit: return PermissionResponse(True) @@ -154,8 +160,10 @@ def has_permission(user_data: Optional[UserData], item: Base, action: Action) -> elif private: # Do not acknowledge the existence of a private entity. return PermissionResponse(False, 404, f"experiment with URN '{item.urn}' not found") + elif user_data is None or user_data.user is None: + return PermissionResponse(False, 401, f"insufficient permissions for URN '{item.urn}'") else: - return PermissionResponse(False) + return PermissionResponse(False, 403, f"insufficient permissions for URN '{item.urn}'") elif action == Action.DELETE: # Owner may only delete an experiment if it has not already been published. if user_may_edit: @@ -191,8 +199,10 @@ def has_permission(user_data: Optional[UserData], item: Base, action: Action) -> elif private: # Do not acknowledge the existence of a private entity. return PermissionResponse(False, 404, f"score set with URN '{item.urn}' not found") + elif user_data is None or user_data.user is None: + return PermissionResponse(False, 401, f"insufficient permissions for URN '{item.urn}'") else: - return PermissionResponse(False) + return PermissionResponse(False, 403, f"insufficient permissions for URN '{item.urn}'") elif action == Action.UPDATE: if user_may_edit: return PermissionResponse(True) @@ -202,8 +212,10 @@ def has_permission(user_data: Optional[UserData], item: Base, action: Action) -> elif private: # Do not acknowledge the existence of a private entity. return PermissionResponse(False, 404, f"score set with URN '{item.urn}' not found") + elif user_data is None or user_data.user is None: + return PermissionResponse(False, 401, f"insufficient permissions for URN '{item.urn}'") else: - return PermissionResponse(False) + return PermissionResponse(False, 403, f"insufficient permissions for URN '{item.urn}'") elif action == Action.DELETE: # Owner may only delete a score set if it has not already been published. if user_may_edit: @@ -247,7 +259,7 @@ def has_permission(user_data: Optional[UserData], item: Base, action: Action) -> elif roles_permitted(active_roles, [UserRole.admin]): return PermissionResponse(True) else: - return PermissionResponse(False) + return PermissionResponse(False, 403, "Insufficient permissions for user update.") elif action == Action.UPDATE: if user_is_self: return PermissionResponse(True) From 42e5520596acc42186204734e4eb364665ae65f2 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 11 Sep 2024 16:55:18 -0700 Subject: [PATCH 4/5] fixed: Count DF was being validated even without count data for score set updates --- src/mavedb/routers/score_sets.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 24c602ca6..44fd2a307 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -854,9 +854,13 @@ async def update_score_set( scores_data = pd.DataFrame( variants_to_csv_rows(item.variants, columns=score_columns, dtype="score_data") ).replace("NA", pd.NA) - count_data = pd.DataFrame( - variants_to_csv_rows(item.variants, columns=count_columns, dtype="count_data") - ).replace("NA", pd.NA) + + if item.dataset_columns["count_columns"]: + count_data = pd.DataFrame( + variants_to_csv_rows(item.variants, columns=count_columns, dtype="count_data") + ).replace("NA", pd.NA) + else: + count_data = None # Although this is also updated within the variant creation job, update it here # as well so that we can display the proper UI components (queue invocation delay From fa984bb212b1af43bd38b9492a63e49f124c1bca Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 11 Sep 2024 16:56:15 -0700 Subject: [PATCH 5/5] Add handling for NA values during CSV validation --- src/mavedb/lib/mave/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mavedb/lib/mave/utils.py b/src/mavedb/lib/mave/utils.py index f59a3fca0..dd6b75916 100644 --- a/src/mavedb/lib/mave/utils.py +++ b/src/mavedb/lib/mave/utils.py @@ -1,5 +1,7 @@ import re +import pandas as pd + NA_VALUE = "NA" NULL_VALUES = ("", "na", "nan", "nil", "none", "null", "n/a", "undefined", NA_VALUE) @@ -22,6 +24,9 @@ def is_csv_null(value): """Return True if a string from a CSV file represents a NULL value.""" + # Avoid any boolean miscasts from comparisons by handling NA types up front. + if pd.isna(value): + return True # Number 0 is treated as False so that all 0 will be converted to NA value. if value == 0: return value