Skip to content

Commit 8b7d79c

Browse files
author
Cyril de Catheu
authored
[persistence] improve time mock for quartz scheduler (#1787)
* [persistence] improve time mock for quartz scheduler * fix flaky anomaly resolution test
1 parent 85278ac commit 8b7d79c

File tree

10 files changed

+137
-135
lines changed

10 files changed

+137
-135
lines changed

thirdeye-integration-tests/src/test/java/ai/startree/thirdeye/AnomalyResolutionTest.java

Lines changed: 84 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -90,17 +90,6 @@ private static long epoch(final String dateTime) {
9090
return localDateTime.toInstant(UTC).toEpochMilli();
9191
}
9292

93-
private static long jumpToTime(final String dateTime) throws InterruptedException {
94-
final long jumpTime = epoch(dateTime);
95-
CLOCK.useMockTime(jumpTime);
96-
CLOCK.tick(5); // simulate move time forward
97-
98-
// give thread to detectionCronScheduler and to quartz scheduler -
99-
// (quartz idle time is weaved to 100 ms for test speed)
100-
Thread.sleep(1000);
101-
return jumpTime;
102-
}
103-
10493
@BeforeClass
10594
public void beforeClass() throws Exception {
10695
// ensure time is controlled via the TimeProvider CLOCK - ie weaving is working correctly
@@ -195,134 +184,111 @@ public void testOnboardingTaskRun() throws Exception {
195184
assertThat(alertLastTimestamp).isEqualTo(epoch("2020-02-16 00:00"));
196185
}
197186

187+
/**
188+
* the alert detects anomalies on [feb 3 , feb6), [feb 10, feb 13), [feb 17, feb 20), [feb 24, feb
189+
* 27), etc...
190+
* this test checks the behaviour for the anomaly on [feb 17, feb 20)
191+
* time is currently "2020-02-16 15:00". time is increased day by day from feb 16 to feb 24, and
192+
* the state is checked every day
193+
* the max merge gap is P3D, so once there is no anomaly detected on [feb 23, feb 24), the
194+
* completed anomaly notification should be sent
195+
*/
198196
@Test(dependsOnMethods = "testOnboardingTaskRun", timeOut = TEST_IMEOUT)
199-
public void testDailyFeb21() throws InterruptedException {
200-
// get current number of anomalies
201-
final int numAnomaliesBeforeDetectionRun = client.getAnomalies().size();
202-
203-
final List<AnomalyApi> parentAnomalies = client.getParentAnomalies();
204-
assertThat(parentAnomalies).hasSize(1);
205-
206-
// No notifications sent yet.
207-
assertThat(nsf.getCount()).isZero();
208-
long jumpTime = jumpToTime("2020-02-21 00:00");
209-
210-
waitForDetectionRun();
211-
// wait for new anomalies to be created
212-
while (client.getAnomalies().size() == numAnomaliesBeforeDetectionRun) {
213-
Thread.sleep(1000);
214-
}
215-
216-
// check that lastTimestamp after detection is the runTime of the cron
217-
assertThat(getAlertLastTimestamp()).isEqualTo(jumpTime);
218-
197+
public void testCompletedAnomalyIsSentCorrectly() throws InterruptedException {
198+
// sanity checks after the onboarding task
219199
assertThat(client.getParentAnomalies()).hasSize(2);
200+
// no notification happened yet - time has not increased since subscription group creation
201+
assertThat(nsf.notificationSentCount()).isZero();
220202

221-
// There is at least 1 successful subscription group task
203+
// run the detections every day and check the results
204+
// the cron is at 5 am, so moving to 6 means the cron at 5 will be triggered
205+
// no anomaly on [Feb 16, Feb 17)
206+
CLOCK.useMockTime(epoch("2020-02-17 06:00"));
207+
waitForDetectionRun();
208+
// notification cron is every day at 7 am, so moving to 8 will trigger a notification
209+
CLOCK.useMockTime(epoch("2020-02-17 08:00"));
222210
waitForNotificationTaskRun();
223-
assertThat(nsf.getCount()).isEqualTo(0);
224-
225-
// Move time forward by 5 minutes to make subscription group task run again
226-
jumpToTime("2020-02-21 00:05");
211+
assertThat(nsf.notificationSentCount()).isZero();
227212

213+
// anomaly on [Feb 17, Feb 18)
214+
CLOCK.useMockTime(epoch("2020-02-18 06:00"));
215+
assertThat(nsf.notificationSentCount()).isZero();
216+
waitForDetectionRun();
217+
CLOCK.useMockTime(epoch("2020-02-18 08:00"));
228218
waitForNotificationTaskRun();
229-
assertThat(nsf.getCount()).isEqualTo(1);
230-
231-
final NotificationPayloadApi notificationPayload = nsf.getNotificationPayload();
219+
assertThat(nsf.notificationSentCount()).isEqualTo(1); // a new anomaly is detected and notified
220+
final NotificationPayloadApi notificationPayload = nsf.lastNotificationPayload();
232221
assertThat(notificationPayload.getAnomalyReports()).hasSize(1);
233-
234222
final AnomalyApi anomalyApi = notificationPayload.getAnomalyReports().getFirst().getAnomaly();
235223
assertThat(anomalyApi.getStartTime()).isEqualTo(new Date(epoch("2020-02-17 00:00")));
236-
assertThat(anomalyApi.getEndTime()).isEqualTo(new Date(epoch("2020-02-21 00:00")));
237-
}
238-
239-
@Test(dependsOnMethods = "testDailyFeb21", timeOut = TEST_IMEOUT)
240-
public void testDailyFeb22() throws InterruptedException {
241-
jumpToTimeAndWait("2020-02-22 00:06");
242-
assertThat(client.getParentAnomalies()).hasSize(2);
243-
assertThat(nsf.getCount()).isEqualTo(1); // no new notifications
244-
}
245-
246-
@Test(dependsOnMethods = "testDailyFeb22", timeOut = TEST_IMEOUT)
247-
public void testDailyFeb23() throws InterruptedException {
248-
jumpToTimeAndWait("2020-02-23 00:06");
249-
assertThat(client.getParentAnomalies()).hasSize(2);
250-
assertThat(nsf.getCount()).isEqualTo(1); // no new notifications
251-
}
252-
253-
@Test(dependsOnMethods = "testDailyFeb23", timeOut = TEST_IMEOUT)
254-
public void testDailyFeb24() throws InterruptedException {
255-
jumpToTimeAndWait("2020-02-24 00:06");
256-
assertThat(client.getParentAnomalies()).hasSize(2);
257-
assertThat(nsf.getCount()).isEqualTo(1); // no new notifications
258-
}
259-
260-
@Test(dependsOnMethods = "testDailyFeb24", timeOut = TEST_IMEOUT)
261-
public void testDailyFeb25() throws InterruptedException {
262-
jumpToTimeAndWait("2020-02-25 00:06");
263-
264-
final List<AnomalyApi> parentAnomalies = client.getParentAnomalies();
265-
assertThat(parentAnomalies).hasSize(2);
224+
assertThat(anomalyApi.getEndTime()).isEqualTo(new Date(epoch("2020-02-18 00:00")));
266225

267-
final AnomalyApi ongoingAnomaly = parentAnomalies.stream()
268-
.filter(a -> a.getStartTime().equals(new Date(epoch("2020-02-17 00:00"))))
269-
.findFirst()
270-
.orElseThrow(() -> new AssertionError("Anomaly not found"));
271-
272-
// anomalies are merged here
273-
assertThat(ongoingAnomaly.getEndTime()).isEqualTo(new Date(epoch("2020-02-25 00:00")));
226+
// anomaly on [Feb 18, Feb 19)
227+
CLOCK.useMockTime(epoch("2020-02-19 06:00"));
228+
waitForDetectionRun();
229+
CLOCK.useMockTime(epoch("2020-02-19 08:00"));
230+
waitForNotificationTaskRun();
231+
assertThat(nsf.notificationSentCount()).isEqualTo(
232+
1); // this point is anomalous but merged in current anomaly
274233

275-
// No new notifications yet
276-
assertThat(nsf.getCount()).isEqualTo(1);
277-
}
234+
// anomaly on [Feb 19, Feb 20)
235+
CLOCK.useMockTime(epoch("2020-02-20 06:00"));
236+
waitForDetectionRun();
237+
CLOCK.useMockTime(epoch("2020-02-20 08:00"));
238+
waitForNotificationTaskRun();
239+
assertThat(nsf.notificationSentCount()).isEqualTo(
240+
1); // this point is anomalous but merged in current anomaly
278241

279-
@Test(dependsOnMethods = "testDailyFeb25", timeOut = TEST_IMEOUT)
280-
public void testDailyMar3() throws InterruptedException {
281-
jumpToTimeAndWait("2020-03-03 00:06");
242+
// sanity checks on the detection state
243+
// check that lastTimestamp after detection is the expected runTime of the cron, floored to alert granularity 2020-02-20 06:00 --> 2020-02-20 00:00
244+
assertThat(getAlertLastTimestamp()).isEqualTo(epoch("2020-02-20 00:00"));
282245
assertThat(client.getParentAnomalies()).hasSize(3);
283-
assertThat(nsf.getCount()).isEqualTo(1);
284-
}
246+
final int anomaliesCurrentCount = client.getAnomalies().size();
285247

286-
@Test(dependsOnMethods = "testDailyMar3", timeOut = TEST_IMEOUT)
287-
public void testDailyMar4() throws InterruptedException {
288-
jumpToTimeAndWait("2020-03-04 00:06");
289-
290-
final List<AnomalyApi> parentAnomalies = client.getParentAnomalies();
291-
assertThat(parentAnomalies).hasSize(3);
292-
293-
// No new notifications yet
294-
assertThat(nsf.getCount()).isEqualTo(2);
248+
// no anomaly on [Feb 20, Feb 21)
249+
CLOCK.useMockTime(epoch("2020-02-21 06:00"));
250+
waitForDetectionRun();
251+
// ensure the number of anomalies hasn't changed
252+
assertThat(client.getAnomalies()).hasSize(anomaliesCurrentCount);
253+
CLOCK.useMockTime(epoch("2020-02-21 08:00"));
254+
waitForNotificationTaskRun();
255+
// ensure the number of notification hasn't changed
256+
assertThat(nsf.notificationSentCount()).isEqualTo(1);
295257

296-
final NotificationPayloadApi notificationPayload = nsf.getNotificationPayload();
297-
assertThat(notificationPayload.getAnomalyReports()).hasSize(1);
258+
// no anomaly on [Feb 21, Feb 22) - same checks as above
259+
CLOCK.useMockTime(epoch("2020-02-22 06:00"));
260+
waitForDetectionRun();
261+
assertThat(client.getAnomalies()).hasSize(anomaliesCurrentCount);
262+
CLOCK.useMockTime(epoch("2020-02-22 08:00"));
263+
waitForNotificationTaskRun();
264+
assertThat(nsf.notificationSentCount()).isEqualTo(1);
298265

299-
final AnomalyApi anomalyApi = notificationPayload.getAnomalyReports().getFirst().getAnomaly();
300-
assertThat(anomalyApi.getStartTime()).isEqualTo(new Date(epoch("2020-03-02 00:00")));
301-
assertThat(anomalyApi.getEndTime()).isEqualTo(new Date(epoch("2020-03-03 00:00")));
302-
}
266+
// no anomaly on [Feb 22, Feb 23) - same checks as above
267+
CLOCK.useMockTime(epoch("2020-02-23 06:00"));
268+
waitForDetectionRun();
269+
assertThat(client.getAnomalies()).hasSize(anomaliesCurrentCount);
270+
CLOCK.useMockTime(epoch("2020-02-23 08:00"));
271+
waitForNotificationTaskRun();
272+
assertThat(nsf.notificationSentCount()).isEqualTo(1);
303273

304-
@Test(dependsOnMethods = "testDailyMar4", timeOut = TEST_IMEOUT)
305-
public void testDailyMar5() throws InterruptedException {
306-
jumpToTimeAndWait("2020-03-05 00:06");
274+
// no anomaly on [Feb 23, Feb 24)
275+
CLOCK.useMockTime(epoch("2020-02-24 06:00"));
276+
waitForDetectionRun();
277+
assertThat(client.getAnomalies()).hasSize(anomaliesCurrentCount);
278+
CLOCK.useMockTime(epoch("2020-02-24 08:00"));
279+
waitForNotificationTaskRun();
280+
// maxMergeGap is P3D, so a notification for completed anomaly can be sent now
281+
assertThat(nsf.notificationSentCount()).isEqualTo(2);
282+
// sanity check on number of anomalies
307283
assertThat(client.getParentAnomalies()).hasSize(3);
308-
assertThat(nsf.getCount()).isEqualTo(3);
309-
310-
final NotificationPayloadApi payload = nsf.getNotificationPayload();
284+
// ensure the completed anomaly notification was sent
285+
final NotificationPayloadApi payload = nsf.lastNotificationPayload();
311286
assertThat(payload.getAnomalyReports()).hasSize(0);
312287
assertThat(payload.getCompletedAnomalyReports()).hasSize(1);
313288
}
314289

315-
private void jumpToTimeAndWait(final String dateTime) throws InterruptedException {
316-
jumpToTime(dateTime); // allow both detection and notification to run
317-
waitForDetectionRun();
318-
waitForNotificationTaskRun();
319-
}
320-
321290
private void waitForDetectionRun() throws InterruptedException {
322291
nDetectionTaskRuns = waitFor(alertId, nDetectionTaskRuns);
323-
324-
// Even after the task is complete, anomalies are persisted async. Giving another sec
325-
Thread.sleep(1000);
326292
}
327293

328294
private void waitForNotificationTaskRun() throws InterruptedException {
@@ -333,11 +299,11 @@ private int waitFor(final Long refId, int currentCount) throws InterruptedExcept
333299
int nTasks;
334300
do {
335301
nTasks = client.getSuccessfulTasks(refId).size();
336-
if (!(nTasks <= currentCount)) {
302+
if (nTasks > currentCount) {
337303
break;
338304
}
339-
// should trigger another task after time jump
340-
Thread.sleep(1000);
305+
// should give time to run tasks
306+
Thread.sleep(500);
341307
} while (true);
342308

343309
return nTasks;

thirdeye-integration-tests/src/test/java/ai/startree/thirdeye/HappyPathTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ public void testGetAnomaliesCreate() throws InterruptedException {
419419
List<AnomalyApi> anomalies = List.of();
420420
while (anomalies.size() == 0) {
421421
// see taskDriver server config for optimization
422-
Thread.sleep(1000);
422+
Thread.sleep(500);
423423
final Response response = request("api/anomalies?isChild=false").get();
424424
assert200(response);
425425
anomalies = response.readEntity(ANOMALIES_LIST_TYPE);
@@ -448,7 +448,7 @@ public void testGetAnomaliesAfterUpdate() throws InterruptedException {
448448
long newAlertLastUpdateTime = alertLastUpdateTime;
449449
while (newAlertLastUpdateTime == alertLastUpdateTime) {
450450
// see taskDriver server config for optimization
451-
Thread.sleep(1000);
451+
Thread.sleep(500);
452452
newAlertLastUpdateTime = getAlertLastUpdatedTime();
453453
}
454454
final Response response = request("api/anomalies?isChild=false").get();
@@ -595,7 +595,7 @@ public void testReplayIsIdemPotent() throws InterruptedException {
595595
assertThat(replayResponse.getStatus()).isEqualTo(200);
596596

597597
while (getAlertLastUpdatedTime() == lastUpdatedTime) {
598-
Thread.sleep(1000);
598+
Thread.sleep(500);
599599
}
600600

601601
final Response afterReplayResponse = request(alertAnomaliesRoute).get();
@@ -867,7 +867,7 @@ private RcaInvestigationApi mustGetInvestigation(long id) {
867867
private void waitForAnyAnomalies(final long alertId) throws InterruptedException {
868868
List<AnomalyApi> gotAnomalies = mustGetAnomaliesForAlert(alertId);
869869
while (gotAnomalies.size() == 0) {
870-
Thread.sleep(1000);
870+
Thread.sleep(500);
871871
gotAnomalies = mustGetAnomaliesForAlert(alertId);
872872
}
873873
}

thirdeye-integration-tests/src/test/java/ai/startree/thirdeye/SchedulingTest.java

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
import java.util.Comparator;
3939
import java.util.List;
4040
import java.util.Set;
41-
import java.util.stream.Collectors;
4241
import org.slf4j.Logger;
4342
import org.slf4j.LoggerFactory;
4443
import org.testng.annotations.AfterClass;
@@ -168,7 +167,7 @@ public void testOnboardingLastTimestamp() throws Exception {
168167
TaskApi onboardingTask = getTask(onboardingTaskId);
169168
while (TASK_PENDING_STATUSES.contains(onboardingTask.getStatus())) {
170169
// see taskDriver server config for optimization
171-
Thread.sleep(1000);
170+
Thread.sleep(500);
172171
onboardingTask = getTask(onboardingTaskId);
173172
}
174173

@@ -185,12 +184,12 @@ public void testAfterDetectionCronLastTimestamp() throws InterruptedException {
185184
// not exact time should not impact lastTimestamp
186185
CLOCK.tick(5);
187186
// give thread to detectionCronScheduler and to quartz scheduler - (quartz idle time is weaved to 100 ms for test speed)
188-
Thread.sleep(1000);
187+
Thread.sleep(500);
189188

190189
// wait for the new task to be created - proxy to know when the detection is triggered
191190
List<TaskApi> tasks = getTasks();
192191
while (tasks.size() == 1 || TASK_PENDING_STATUSES.contains(tasks.getLast().getStatus())) {
193-
Thread.sleep(1000);
192+
Thread.sleep(500);
194193
tasks = getTasks();
195194
}
196195
assertThat(tasks.getLast().getTaskSubType()).isEqualTo(TaskSubType.DETECTION_TRIGGERED_BY_CRON);
@@ -211,11 +210,11 @@ public void testSecondAnomalyIsMerged() throws InterruptedException {
211210
// not exact time should not impact lastTimestamp
212211
CLOCK.tick(5);
213212
// give thread to quartz scheduler - (quartz idle time is weaved to 1000 ms for test speed)
214-
Thread.sleep(1000);
213+
Thread.sleep(500);
215214

216215
// wait for a new anomaly to be created - proxy to know when the detection has run
217216
while (anomalies.size() == numAnomaliesBeforeDetectionRun) {
218-
Thread.sleep(1000);
217+
Thread.sleep(500);
219218
anomalies = getAnomalies();
220219
}
221220

@@ -226,7 +225,7 @@ public void testSecondAnomalyIsMerged() throws InterruptedException {
226225
// find anomalies starting on MARCH 21 - there should be 2
227226
final List<AnomalyApi> march21Anomalies = anomalies.stream()
228227
.filter(a -> a.getStartTime().getTime() == MARCH_21_2020_00H00)
229-
.collect(Collectors.toList());
228+
.toList();
230229
assertThat(march21Anomalies.size()).isEqualTo(2);
231230
// check that one anomaly finishes on MARCH 22: the child anomaly
232231
assertThat(march21Anomalies.stream()

thirdeye-integration-tests/src/test/java/ai/startree/thirdeye/TestNotificationServiceFactory.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,11 @@ public NotificationService build(final Map<String, Object> params) {
4343
};
4444
}
4545

46-
public int getCount() {
46+
public int notificationSentCount() {
4747
return count;
4848
}
4949

50-
public NotificationPayloadApi getNotificationPayload() {
50+
public NotificationPayloadApi lastNotificationPayload() {
5151
return f.get();
5252
}
5353

thirdeye-integration-tests/src/test/resources/anomalyresolution/payloads/alert-1.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "threshold-pageviews-daily-merge-3-15",
33
"description": "Daily Threshold Alert with mergeMaxGap = 3 days and mergeMaxDuration = 15 days",
4-
"cron": "0 0 5 1/1 * ? *",
4+
"cron": "0 0 5 * * ? *",
55
"template": {
66
"nodes": [
77
{
@@ -74,9 +74,9 @@
7474
"monitoringGranularity": "P1D",
7575
"timeColumn": "AUTO",
7676
"timeColumnFormat": "",
77-
"max": "600000",
77+
"max": "680000",
7878
"min": "250000",
7979
"mergeMaxGap": "P3D",
80-
"mergeMaxDuration": "P15D"
80+
"mergeMaxDuration": "P6D"
8181
}
8282
}

thirdeye-integration-tests/src/test/resources/anomalyresolution/payloads/subscription-group.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "sg-anomalyresolution",
3-
"cron": "0 0/5 0 ? * * *",
3+
"cron": "0 0 7 * * ? *",
44
"active": true,
55
"notifyHistoricalAnomalies": false,
66
"specs": [

thirdeye-notification/src/main/java/ai/startree/thirdeye/notification/NotificationTaskFilter.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,9 @@ private AnomalyFilter buildAnomalyFilterCompletedAnomalies(final AlertAssociatio
193193
final AlertDTO alert = alertManager.findById(alertId);
194194
final AlertTemplateDTO renderedTemplate = alertTemplateRenderer.renderAlert(alert);
195195
final Period mergeMaxGap = AlertUtils.getMergeMaxGap(renderedTemplate);
196+
// note: a ThirdEye anomaly could also be completed if its mergeMaxDuration is reached,
197+
// but in this case a new ThirdEye anomaly would be created for the same data anomaly just after
198+
// we don't call out these anomalies as completed
196199
final long endTimeIsLt = alert.getLastTimestamp() - mergeMaxGap.toStandardDuration().getMillis();
197200

198201
return new AnomalyFilter()

0 commit comments

Comments
 (0)