Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion quickwit/quickwit-control-plane/src/indexing_scheduler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use quickwit_proto::types::NodeId;
use scheduling::{SourceToSchedule, SourceToScheduleType};
use serde::Serialize;
use tracing::{debug, info, warn};
use ulid::Ulid;

use crate::indexing_plan::PhysicalIndexingPlan;
use crate::indexing_scheduler::change_tracker::{NotifyChangeOnDrop, RebuildNotifier};
Expand Down Expand Up @@ -414,11 +415,16 @@ impl IndexingScheduler {
) {
debug!(new_physical_plan=?new_physical_plan, "apply physical indexing plan");
APPLY_PLAN_TOTAL.inc();
// ULIDs are time-ordered, so a later application always yields a greater id. Indexers use
// it as the publish token for the shards they acquire. This guarantees a stale plan
// can never steal a shard from a more recent one.
let indexing_plan_id = Ulid::new().to_string();

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Use a monotonic ULID generator for plan IDs

This relies on each later plan id comparing greater than the previous one, but Ulid::new() does not guarantee monotonic sort order within the same millisecond. If two plans are applied in the same millisecond (for example via direct rebalance-triggered rebuilds), the newer plan can get a lexicographically smaller id, causing AcquireShards to reject it as stale and leaving the older owner in place.

Useful? React with 👍 / 👎.

for (node_id, indexing_tasks) in new_physical_plan.indexing_tasks_per_indexer() {
// We don't want to block on a slow indexer so we apply this change asynchronously
// TODO not blocking is cool, but we need to make sure there is not accumulation
// possible here.
let notify_on_drop = notify_on_drop.clone();
let indexing_plan_id = indexing_plan_id.clone();
tokio::spawn({
let indexer = indexers
.iter()
Expand All @@ -430,7 +436,10 @@ impl IndexingScheduler {
if let Err(error) = indexer
.client
.clone()
.apply_indexing_plan(ApplyIndexingPlanRequest { indexing_tasks })
.apply_indexing_plan(ApplyIndexingPlanRequest {
indexing_tasks,
indexing_plan_id,
})
.await
{
warn!(
Expand Down
7 changes: 7 additions & 0 deletions quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ pub struct IndexingPipeline {
// requiring a respawn of the pipeline.
// We keep the list of shards here however, to reassign them after a respawn.
shard_ids: BTreeSet<ShardId>,
// Id of the last indexing plan assigned to this pipeline. Kept here, like `shard_ids`, so it
// can be re-sent to the source on respawn; the source adopts it as its publish token.
indexing_plan_id: String,
_indexing_pipelines_gauge_guard: GaugeGuard,
}

Expand Down Expand Up @@ -137,6 +140,7 @@ impl IndexingPipeline {
..Default::default()
},
shard_ids: Default::default(),
indexing_plan_id: String::new(),
_indexing_pipelines_gauge_guard: indexing_pipelines_gauge_guard,
}
}
Expand Down Expand Up @@ -402,6 +406,7 @@ impl IndexingPipeline {
.spawn(actor_source);
let assign_shards_message = AssignShards(Assignment {
shard_ids: self.shard_ids.clone(),
indexing_plan_id: self.indexing_plan_id.clone(),
});
source_mailbox.send_message(assign_shards_message).await?;

Expand Down Expand Up @@ -496,6 +501,8 @@ impl Handler<AssignShards> for IndexingPipeline {
) -> Result<(), ActorExitStatus> {
self.shard_ids
.clone_from(&assign_shards_message.0.shard_ids);
self.indexing_plan_id
.clone_from(&assign_shards_message.0.indexing_plan_id);
// If the pipeline is running, we forward the message to its source.
// If it is not, it will be respawned soon, and the shards will be assigned afterward.
if let Some(handles) = &self.handles_opt {
Expand Down
23 changes: 16 additions & 7 deletions quickwit/quickwit-indexing/src/actors/indexing_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -760,8 +760,8 @@ impl IndexingService {
/// or not.
///
/// If a pipeline actor has failed, this function just logs an error.
async fn assign_shards_to_pipelines(&mut self, tasks: &[IndexingTask]) {
for task in tasks {
async fn assign_shards_to_pipelines(&mut self, plan_request: &ApplyIndexingPlanRequest) {
for task in &plan_request.indexing_tasks {
if task.shard_ids.is_empty() {
continue;
}
Expand All @@ -771,6 +771,7 @@ impl IndexingService {
};
let assignment = Assignment {
shard_ids: task.shard_ids.iter().cloned().collect(),
indexing_plan_id: plan_request.indexing_plan_id.clone(),
};
let message = AssignShards(assignment);

Expand All @@ -785,10 +786,10 @@ impl IndexingService {
/// - Starting the pipelines that are not running.
async fn apply_indexing_plan(
&mut self,
tasks: &[IndexingTask],
plan_request: ApplyIndexingPlanRequest,

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Ignore stale indexing plan requests

When an older ApplyIndexingPlanRequest arrives after a newer one (the control plane sends applications asynchronously), this path still computes the diff and shuts down/reassigns pipelines before any plan-id comparison. A delayed stale plan can therefore reset or remove work from the newer plan, then fail to acquire its shards because the metastore rejects the old token, leaving the indexer idle until the control loop reapplies the latest plan.

Useful? React with 👍 / 👎.

ctx: &ActorContext<Self>,
) -> Result<(), IndexingError> {
let pipeline_diff = self.compute_pipeline_diff(tasks);
let pipeline_diff = self.compute_pipeline_diff(&plan_request.indexing_tasks);

if !pipeline_diff.pipelines_to_shutdown.is_empty() {
self.shutdown_pipelines(&pipeline_diff.pipelines_to_shutdown)
Expand All @@ -801,7 +802,7 @@ impl IndexingService {
.spawn_pipelines(&pipeline_diff.pipelines_to_spawn, ctx)
.await?;
}
self.assign_shards_to_pipelines(tasks).await;
self.assign_shards_to_pipelines(&plan_request).await;
self.update_chitchat_running_plan().await;

if !spawn_pipeline_failures.is_empty() {
Expand Down Expand Up @@ -1135,7 +1136,7 @@ impl Handler<ApplyIndexingPlanRequest> for IndexingService {
ctx: &ActorContext<Self>,
) -> Result<Self::Reply, ActorExitStatus> {
Ok(self
.apply_indexing_plan(&plan_request.indexing_tasks, ctx)
.apply_indexing_plan(plan_request, ctx)
.await
.map(|_| ApplyIndexingPlanResponse {}))
}
Expand Down Expand Up @@ -1465,7 +1466,10 @@ mod tests {
},
];
indexing_service
.ask_for_res(ApplyIndexingPlanRequest { indexing_tasks })
.ask_for_res(ApplyIndexingPlanRequest {
indexing_tasks,
indexing_plan_id: "01ARZ3NDEKTSV4RRFFQ69G5FAV".to_string(),
})
.await
.unwrap();
assert_eq!(
Expand Down Expand Up @@ -1531,6 +1535,7 @@ mod tests {
indexing_service
.ask_for_res(ApplyIndexingPlanRequest {
indexing_tasks: indexing_tasks.clone(),
indexing_plan_id: "01ARZ3NDEKTSV4RRFFQ69G5FAV".to_string(),
})
.await
.unwrap();
Expand Down Expand Up @@ -1587,6 +1592,7 @@ mod tests {
indexing_service
.ask_for_res(ApplyIndexingPlanRequest {
indexing_tasks: indexing_tasks.clone(),
indexing_plan_id: "01ARZ3NDEKTSV4RRFFQ69G5FAV".to_string(),
})
.await
.unwrap();
Expand Down Expand Up @@ -1646,6 +1652,7 @@ mod tests {
indexing_service
.ask_for_res(ApplyIndexingPlanRequest {
indexing_tasks: indexing_tasks.clone(),
indexing_plan_id: "01ARZ3NDEKTSV4RRFFQ69G5FAV".to_string(),
})
.await
.unwrap();
Expand All @@ -1665,6 +1672,7 @@ mod tests {
indexing_service
.ask_for_res(ApplyIndexingPlanRequest {
indexing_tasks: Vec::new(),
indexing_plan_id: "01ARZ3NDEKTSV4RRFFQ69G5FAV".to_string(),
})
.await
.unwrap();
Expand Down Expand Up @@ -2072,6 +2080,7 @@ mod tests {
params_fingerprint: 0,
},
],
indexing_plan_id: "01ARZ3NDEKTSV4RRFFQ69G5FAV".to_string(),
})
.await
.unwrap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ pub struct MetricsPipeline {
handles_opt: Option<MetricsPipelineHandles>,
kill_switch: KillSwitch,
shard_ids: BTreeSet<ShardId>,
// Id of the last indexing plan assigned to this pipeline. Kept here, like `shard_ids`, so it
// can be re-sent to the source on respawn; the source adopts it as its publish token.
indexing_plan_id: String,
_indexing_pipelines_gauge_guard: GaugeGuard,
}

Expand Down Expand Up @@ -163,6 +166,7 @@ impl MetricsPipeline {
..Default::default()
},
shard_ids: Default::default(),
indexing_plan_id: String::new(),
_indexing_pipelines_gauge_guard: indexing_pipelines_gauge_guard,
}
}
Expand Down Expand Up @@ -447,6 +451,7 @@ impl MetricsPipeline {
.spawn(actor_source);
let assign_shards_message = AssignShards(Assignment {
shard_ids: self.shard_ids.clone(),
indexing_plan_id: self.indexing_plan_id.clone(),
});
source_mailbox.send_message(assign_shards_message).await?;

Expand Down Expand Up @@ -539,6 +544,8 @@ impl Handler<AssignShards> for MetricsPipeline {
) -> Result<(), ActorExitStatus> {
self.shard_ids
.clone_from(&assign_shards_message.0.shard_ids);
self.indexing_plan_id
.clone_from(&assign_shards_message.0.indexing_plan_id);
if let Some(handles) = &self.handles_opt {
info!(
shard_ids=?assign_shards_message.0.shard_ids,
Expand Down
Loading
Loading