From fef1f822d8be544deca75f7aff12fe9b19ac540e Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Tue, 28 Apr 2026 20:52:23 +0200 Subject: [PATCH] fix cascade delete bug and default --- dojo/finding/helper.py | 36 +++++++++++++++++-- dojo/settings/settings.dist.py | 5 +-- .../test_prepare_duplicates_for_delete.py | 36 ++++++++++++++----- 3 files changed, 62 insertions(+), 15 deletions(-) diff --git a/dojo/finding/helper.py b/dojo/finding/helper.py index 7ec3ba477ea..ceb1708c83e 100644 --- a/dojo/finding/helper.py +++ b/dojo/finding/helper.py @@ -667,6 +667,33 @@ def prepare_duplicates_for_delete(obj): # so original_finding.all() now only contains outside-scope duplicates. reconfigure_duplicate_cluster(original, original.original_finding.all()) + # When DUPLICATE_CLUSTER_CASCADE_DELETE=True, reconfigure_duplicate_cluster is a no-op. + # Match legacy finding_delete(): delete outside-scope cluster members that point at + # in-scope originals (duplicate_cluster.order_by("-id").delete()). Transitive duplicate + # chains do not need a separate expansion pass — fix_loop_duplicates above normalizes them. + if settings.DUPLICATE_CLUSTER_CASCADE_DELETE: + outside_cascade_qs = Finding.objects.filter( + duplicate_finding_id__in=scope_ids_subquery, + ).exclude(id__in=scope_ids_subquery) + outside_count = outside_cascade_qs.count() + if outside_count: + logger.debug( + "cascade-delete %d outside-scope duplicate findings (DUPLICATE_CLUSTER_CASCADE_DELETE)", + outside_count, + ) + bulk_delete_findings(outside_cascade_qs, order_desc=True) + else: + outside_orphan_count = Finding.objects.filter( + duplicate_finding_id__in=scope_ids_subquery, + ).exclude( + id__in=scope_ids_subquery, + ).update(duplicate_finding=None, duplicate=False) + if outside_orphan_count: + logger.debug( + "nulled %d outside-scope duplicate_finding references to prevent FK violation", + outside_orphan_count, + ) + @receiver(pre_delete, sender=Test) def test_pre_delete(sender, instance, **kwargs): @@ -763,7 +790,7 @@ def bulk_clear_finding_m2m(finding_qs): Notes.objects.filter(id__in=note_ids).delete() -def bulk_delete_findings(finding_qs, chunk_size=1000): +def bulk_delete_findings(finding_qs, chunk_size=1000, *, order_desc=False): """ Delete findings and all related objects efficiently. Including any related object in Dojo-Pro @@ -771,6 +798,10 @@ def bulk_delete_findings(finding_qs, chunk_size=1000): discovered by _meta.related_objects), then uses cascade_delete for all FK relations via raw SQL. Chunked with per-chunk transaction.atomic() for crash safety. + + When order_desc is True, findings are processed highest id first (matches + finding_delete: duplicate_cluster.order_by("-id").delete()) so self-FK + duplicate chains delete children before parents. """ from dojo.signals import pre_bulk_delete_findings # noqa: PLC0415 circular import from dojo.utils_cascade_delete import ( # noqa: PLC0415 circular import @@ -780,9 +811,10 @@ def bulk_delete_findings(finding_qs, chunk_size=1000): pre_bulk_delete_findings.send(sender=Finding, finding_qs=finding_qs) bulk_clear_finding_m2m(finding_qs) + ordered_qs = finding_qs.order_by("-id") if order_desc else finding_qs.order_by("id") for chunk_num, chunk_ids in enumerate( batched( - finding_qs.values_list("id", flat=True).order_by("id").iterator(chunk_size=chunk_size), + ordered_qs.values_list("id", flat=True).iterator(chunk_size=chunk_size), chunk_size, strict=False, ), diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index 2c828aa9582..8b670d8f96b 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -318,10 +318,7 @@ DD_FEATURE_FINDING_GROUPS=(bool, True), DD_JIRA_TEMPLATE_ROOT=(str, "dojo/templates/issue-trackers"), DD_TEMPLATE_DIR_PREFIX=(str, "dojo/templates/"), - # Initial behaviour in Defect Dojo was to delete all duplicates when an original was deleted - # New behaviour is to leave the duplicates in place, but set the oldest of duplicates as new original - # Set to True to revert to the old behaviour where all duplicates are deleted - DD_DUPLICATE_CLUSTER_CASCADE_DELETE=(bool, True), + DD_DUPLICATE_CLUSTER_CASCADE_DELETE=(bool, False), # Enable Rate Limiting for the login page DD_RATE_LIMITER_ENABLED=(bool, False), # Examples include 5/m 100/h and more https://django-ratelimit.readthedocs.io/en/stable/rates.html#simple-rates diff --git a/unittests/test_prepare_duplicates_for_delete.py b/unittests/test_prepare_duplicates_for_delete.py index 3630f287fdf..b2fd4c64fb1 100644 --- a/unittests/test_prepare_duplicates_for_delete.py +++ b/unittests/test_prepare_duplicates_for_delete.py @@ -236,12 +236,11 @@ def test_mixed_inside_and_outside_duplicates(self): self.assertIsNone(outside_dupe.duplicate_finding) @override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=True) - def test_cascade_delete_skips_outside_reconfigure(self): + def test_cascade_delete_removes_outside_scope_duplicate_findings(self): """ - When DUPLICATE_CLUSTER_CASCADE_DELETE=True, outside duplicates are left untouched. - - The caller (async_delete_crawl_task) handles deletion of outside-scope - duplicates separately via bulk_delete_findings. + When DUPLICATE_CLUSTER_CASCADE_DELETE=True, outside-scope findings in the duplicate + cluster are deleted (same as legacy duplicate_cluster.order_by("-id").delete() in + finding_delete), not merely unlinked. """ original = self._create_finding(self.test1, "Original") outside_dupe = self._create_finding(self.test2, "Outside Dupe") @@ -250,10 +249,29 @@ def test_cascade_delete_skips_outside_reconfigure(self): with impersonate(self.testuser): prepare_duplicates_for_delete(self.test1) - outside_dupe.refresh_from_db() - # Outside dupe is still a duplicate — not reconfigured or deleted - self.assertTrue(outside_dupe.duplicate) - self.assertEqual(outside_dupe.duplicate_finding_id, original.id) + self.assertFalse( + Finding.objects.filter(id=outside_dupe.id).exists(), + msg="outside-scope duplicate should be cascade-deleted with DUPLICATE_CLUSTER_CASCADE_DELETE=True", + ) + + @override_settings(DUPLICATE_CLUSTER_CASCADE_DELETE=True) + def test_engagement_delete_with_outside_scope_duplicate_no_fk_violation(self): + # Regression: engagement.delete() raises IntegrityError when DUPLICATE_CLUSTER_CASCADE_DELETE=True + # and an outside-scope finding still holds a duplicate_finding FK to an in-scope finding. + original = self._create_finding(self.test1, "Original in Eng1") + outside_dupe = self._create_finding(self.test3, "Outside Dupe in Eng2") + self._make_duplicate(outside_dupe, original) + + # This must not raise django.db.utils.IntegrityError (FK violation) + with impersonate(self.testuser): + self.engagement1.delete() + + # Engagement and its findings are gone + self.assertFalse(Engagement.objects.filter(id=self.engagement1.id).exists()) + self.assertFalse(Finding.objects.filter(id=original.id).exists()) + + # Outside-scope duplicate was cascade-deleted with the cluster (legacy behaviour) + self.assertFalse(Finding.objects.filter(id=outside_dupe.id).exists()) def test_multiple_originals(self): """Multiple originals in the same test each get their clusters handled."""