From c56afd427fd1379a6688550424c72ef1608c8180 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 20 Mar 2026 19:29:20 +0100 Subject: [PATCH 1/2] fix: deterministic order when deleting excess duplicate findings order_by("date") does not define order for rows with identical dates. Add id as secondary sort so async_dupe_delete removes oldest duplicates first (by date, then id), matching documented behavior. --- dojo/tasks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dojo/tasks.py b/dojo/tasks.py index 1bbe104783b..f09f8c783f6 100644 --- a/dojo/tasks.py +++ b/dojo/tasks.py @@ -138,8 +138,11 @@ def _async_dupe_delete_impl(): originals_with_too_many_duplicates = Finding.objects.filter(id__in=originals_with_too_many_duplicates_ids).order_by("id") # prefetch to make it faster + # Oldest-first: delete from the front of the list until dupe_count <= 0, keeping the last max_dupes. + # order_by("date") alone leaves ties undefined when many duplicates share the same date (e.g. tool date); + # add id so we always drop lower-id (older) rows first and retain higher-id (newer) imports. originals_with_too_many_duplicates = originals_with_too_many_duplicates.prefetch_related(Prefetch("original_finding", - queryset=Finding.objects.filter(duplicate=True).order_by("date"))) + queryset=Finding.objects.filter(duplicate=True).order_by("date", "id"))) total_deleted_count = 0 affected_products = set() From f80fff42e7ff020d23af0c2a429c1c68e03c1bf6 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Fri, 20 Mar 2026 19:34:25 +0100 Subject: [PATCH 2/2] test: cover async_dupe_delete ordering when duplicate dates match Add test_delete_duplicate_order_same_date_tiebreak_by_id; lower-id duplicate is removed first when max_dupes is exceeded and date ties. --- unittests/test_duplication_loops.py | 37 +++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/unittests/test_duplication_loops.py b/unittests/test_duplication_loops.py index a2afbeb37f9..a3a70cf5c2b 100644 --- a/unittests/test_duplication_loops.py +++ b/unittests/test_duplication_loops.py @@ -407,6 +407,43 @@ def test_delete_duplicate_order(self): self.finding_a.refresh_from_db() self.assertEqual(self.finding_a.duplicate_finding_set().count(), 1) + def test_delete_duplicate_order_same_date_tiebreak_by_id(self): + """When duplicate findings share the same date, excess deletes use id as tie-break (oldest id first).""" + system_settings = System_Settings.objects.get() + system_settings.delete_duplicates = True + system_settings.max_dupes = 1 + system_settings.save() + + same_date = "2024-06-01" + self.finding_b.date = same_date + self.finding_c.date = same_date + self.finding_b.save() + self.finding_c.save() + + set_duplicate(self.finding_b, self.finding_a) + set_duplicate(self.finding_c, self.finding_a) + + self.finding_b.refresh_from_db() + self.finding_c.refresh_from_db() + self.assertLess( + self.finding_b.id, + self.finding_c.id, + "Fixture setup should give finding_b a lower id than finding_c for this tie-break.", + ) + + _async_dupe_delete_impl() + + self.assertFalse( + Finding.objects.filter(id=self.finding_b.id).exists(), + "The duplicate with lower id should be deleted when dates are identical.", + ) + self.assertTrue( + Finding.objects.filter(id=self.finding_c.id).exists(), + "The duplicate with higher id should remain when dates are identical.", + ) + self.finding_a.refresh_from_db() + self.assertEqual(self.finding_a.duplicate_finding_set().count(), 1) + def test_delete_all_engagements(self): # make sure there is no exception when deleting all engagements for engagement in Engagement.objects.all().order_by("id"):