diff --git a/.gitignore b/.gitignore
index fc72832b..cbf7f9d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,5 @@ coverage/
 # per-package lockfiles are stray
 apps/*/pnpm-lock.yaml
 !pnpm-lock.yaml
+
+.tool-versions
diff --git a/apps/backend/.env.example b/apps/backend/.env.example
index 6815a66f..807de908 100644
--- a/apps/backend/.env.example
+++ b/apps/backend/.env.example
@@ -23,7 +23,11 @@ WALLET_ADDRESS=0x0000000000000000000000000000000000000000
 WALLET_PRIVATE_KEY=your_private_key_here
 CHECK_DATASET_CREATION_FEES=true
 USE_ONLY_APPROVED_PROVIDERS=true
+# Upstream pdp-explorer subgraph — drives the data-retention / overdue-periods path.
 PDP_SUBGRAPH_ENDPOINT=https://api.thegraph.com/subgraphs/filecoin/pdp
+# Dealbot-owned subgraph on Goldsky (see apps/subgraph/README.md) — drives only
+# the new anonymous-retrieval candidate-piece query for now.
+SUBGRAPH_ENDPOINT=https://api.goldsky.com/api/public/<project>/subgraphs/dealbot-subgraph/<version>/gn
 
 # Minimum number of datasets per SP (default: 1). When > 1, a separate data_set_creation job provisions extra datasets.
 MIN_NUM_DATASETS_FOR_CHECKS=1
@@ -52,6 +56,9 @@ DEALBOT_MAINTENANCE_WINDOW_MINUTES=20
 DEALS_PER_SP_PER_HOUR=2
 DATASET_CREATIONS_PER_SP_PER_HOUR=1
 RETRIEVALS_PER_SP_PER_HOUR=1
+RETRIEVALS_ANON_PER_SP_PER_HOUR=
+ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT=5
+METRICS_PER_HOUR=2
 PG_BOSS_LOCAL_CONCURRENCY=20
 JOB_SCHEDULER_POLL_SECONDS=300
 JOB_WORKER_POLL_SECONDS=60
@@ -60,6 +67,7 @@ JOB_SCHEDULE_PHASE_SECONDS=0
 JOB_ENQUEUE_JITTER_SECONDS=0
 DEAL_JOB_TIMEOUT_SECONDS=360          # 6m: Max runtime for deal jobs (TODO: reduce default to 3m)
 RETRIEVAL_JOB_TIMEOUT_SECONDS=60     # 1m: Max runtime for retrieval jobs (TODO: reduce default to 30s)
+ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS=360 # 6m: Max runtime for anon retrieval jobs (pieces up to ~500 MiB)
 IPFS_BLOCK_FETCH_CONCURRENCY=6       # Parallel block fetches when validating IPFS DAGs
 DEALBOT_PGBOSS_POOL_MAX=1
 DEALBOT_PGBOSS_SCHEDULER_ENABLED=true
@@ -73,9 +81,13 @@ PROXY_LIST=http://username:password@host:port,http://username:password@host:port
 PROXY_LOCATIONS=l1,l2
 
 # Timeout Configuration (in milliseconds)
-CONNECT_TIMEOUT_MS=10000             # 10s: Initial connection timeout
-HTTP_REQUEST_TIMEOUT_MS=240000       # 4m: Total transfer timeout for HTTP/1.1 (10MiB @ 170KB/s + overhead)
-HTTP2_REQUEST_TIMEOUT_MS=240000      # 4m: Total transfer timeout for HTTP/2 (10MiB @ 170KB/s + overhead)
+CONNECT_TIMEOUT_MS=10000             # 10s: Connection + response-headers timeout (scoped to the header phase only)
+# HTTP_REQUEST_TIMEOUT_MS and HTTP2_REQUEST_TIMEOUT_MS default to the longest job timeout above
+# (max of DEAL_/RETRIEVAL_/ANON_RETRIEVAL_/DATA_SET_CREATION_/MAX_PIECE_CLEANUP_ * 1000 ms) so the
+# HTTP-level ceiling never pre-empts a job-scoped AbortSignal. Only override when you have a non-job
+# caller of HttpClientService that needs a specific deadline.
+# HTTP_REQUEST_TIMEOUT_MS=360000
+# HTTP2_REQUEST_TIMEOUT_MS=360000
 
 # SP Blocklists configuration
 # BLOCKED_SP_IDS=1234,5678
diff --git a/apps/backend/README.md b/apps/backend/README.md
index 19ee970a..e4dafd6e 100644
--- a/apps/backend/README.md
+++ b/apps/backend/README.md
@@ -105,6 +105,7 @@ All configuration is done via environment variables in `.env`.
 | `ENABLE_IPNI_TESTING`         | IPNI testing mode (`disabled`/`random`/`always`) | `always`          |
 | `USE_ONLY_APPROVED_PROVIDERS` | Only use approved storage providers    | `true`                     |
 | `PDP_SUBGRAPH_ENDPOINT`       | PDP subgraph API endpoint for PDP proof-set/data-retention | `https://api.thegraph.com/subgraphs/filecoin/pdp` |
+| `SUBGRAPH_ENDPOINT`           | Subgraph GraphQL endpoint for anon-retrieval queries | `https://api.goldsky.com/api/public/<project>/subgraphs/dealbot-subgraph/<version>/gn` |
 
 ### Scheduling Configuration (pg-boss)
 
diff --git a/apps/backend/src/app.module.ts b/apps/backend/src/app.module.ts
index 569ec5e4..0580f339 100644
--- a/apps/backend/src/app.module.ts
+++ b/apps/backend/src/app.module.ts
@@ -13,6 +13,7 @@ import { JobsModule } from "./jobs/jobs.module.js";
 import { MetricsPrometheusModule } from "./metrics-prometheus/metrics-prometheus.module.js";
 import { ProvidersModule } from "./providers/providers.module.js";
 import { RetrievalModule } from "./retrieval/retrieval.module.js";
+import { RetrievalAnonModule } from "./retrieval-anon/retrieval-anon.module.js";
 
 @Module({
   imports: [
@@ -28,6 +29,7 @@ import { RetrievalModule } from "./retrieval/retrieval.module.js";
     JobsModule,
     DealModule,
     RetrievalModule,
+    RetrievalAnonModule,
     DataSourceModule,
     ProvidersModule,
     ...(process.env.ENABLE_DEV_MODE === "true" ? [DevToolsModule] : []),
diff --git a/apps/backend/src/clickhouse/clickhouse.schema.ts b/apps/backend/src/clickhouse/clickhouse.schema.ts
index 85d91052..b27ba0e2 100644
--- a/apps/backend/src/clickhouse/clickhouse.schema.ts
+++ b/apps/backend/src/clickhouse/clickhouse.schema.ts
@@ -62,6 +62,50 @@ export function buildMigrations(database: string): string[] {
   PARTITION BY toStartOfMonth(timestamp)
   TTL toDateTime(timestamp) + INTERVAL 1 YEAR`,
 
+    `CREATE TABLE IF NOT EXISTS ${database}.anon_retrieval_checks
+(
+    timestamp                  DateTime64(3, 'UTC'),              -- when the check completed
+    probe_location             LowCardinality(String),            -- dealbot location
+    sp_address                 String,                            -- storage provider address (lowercased)
+    sp_id                      Nullable(UInt64),                  -- storage provider numeric id
+    sp_name                    Nullable(String),                  -- storage provider name
+
+    retrieval_id               UUID,                              -- per-event correlation id (log/Prometheus join)
+
+    piece_cid                  String,                            -- piece CID (v2/CommP) sampled from the subgraph
+    data_set_id                UInt64,                            -- on-chain data set id
+    piece_id                   UInt64,                            -- on-chain piece id within the data set
+    raw_size                   UInt64,                            -- raw (unpadded) piece size, bytes
+    with_ipfs_indexing         Bool,                              -- whether the piece advertises IPNI metadata
+    ipfs_root_cid              Nullable(String),                  -- root CID of the contained DAG; null when not IPFS-indexed
+
+    service_type               LowCardinality(String),            -- 'direct_sp' (only mode for anon retrievals today)
+    retrieval_endpoint         String,                            -- URL probed (e.g. {spBaseUrl}/piece/{pieceCid})
+
+    piece_fetch_status         LowCardinality(String),            -- 'success' | 'failed' — HTTP transport outcome of GET /piece/<pieceCid> (HTTP 2xx). CommP validity, CAR/IPNI/block-fetch outcomes live in their own columns.
+    http_response_code         Nullable(UInt16),                  -- raw HTTP status; null on transport failure
+    first_byte_ms              Nullable(Float64),                 -- time to first response byte
+    last_byte_ms               Nullable(Float64),                 -- time to last response byte
+    bytes_retrieved            Nullable(UInt64),                  -- bytes received from /piece/{cid}
+    throughput_bps             Nullable(UInt64),                  -- effective throughput, bytes per second
+
+    commp_valid                Nullable(Bool),                    -- null when retrieval failed before CommP could be hashed
+    car_parseable              Nullable(Bool),                    -- null when CAR validation was skipped (no IPFS indexing or piece fetch failed); true if bytes parsed as a CAR
+    car_block_count            Nullable(UInt32),                  -- total number of blocks observed inside the CAR; null when skipped or unparseable
+    block_fetch_endpoint       Nullable(String),                  -- gateway base URL probed for block fetch (e.g. {spBaseUrl}/ipfs/); null when skipped
+    block_fetch_valid          Nullable(Bool),                    -- null when skipped; true if all sampled blocks fetched + hash-verified
+    block_fetch_sampled_count  Nullable(UInt32),                  -- number of blocks sampled and probed via /ipfs/<cid>?format=raw
+    block_fetch_failed_count   Nullable(UInt32),                  -- number of sampled blocks that failed (non-2xx, hash mismatch, unsupported codec, or transport error)
+
+    ipni_status                LowCardinality(String),            -- 'valid' | 'invalid' | 'skipped' | 'error' — all-or-nothing across the root CID and the sampled child CIDs (filecoin-pin verifies them as a single batch)
+    ipni_verify_ms             Nullable(Float64),                 -- IPNI verification duration; null when skipped
+
+    error_message              Nullable(String)                   -- failure reason; null on success
+) ENGINE MergeTree()
+  PRIMARY KEY (probe_location, sp_address, timestamp)
+  PARTITION BY toStartOfMonth(timestamp)
+  TTL toDateTime(timestamp) + INTERVAL 1 YEAR`,
+
     `CREATE TABLE IF NOT EXISTS ${database}.data_retention_challenges
 (
     timestamp               DateTime64(3, 'UTC'),   -- when the poll ran and detected these periods
diff --git a/apps/backend/src/config/app.config.ts b/apps/backend/src/config/app.config.ts
index b3b32a37..49b55606 100644
--- a/apps/backend/src/config/app.config.ts
+++ b/apps/backend/src/config/app.config.ts
@@ -56,7 +56,16 @@ export const configValidationSchema = Joi.object({
   USE_ONLY_APPROVED_PROVIDERS: Joi.boolean().default(true),
   DEALBOT_DATASET_VERSION: Joi.string().optional(),
   MIN_NUM_DATASETS_FOR_CHECKS: Joi.number().integer().min(1).default(1),
+  // Two subgraph endpoints coexist intentionally to limit blast radius while we
+  // migrate off the upstream pdp-explorer subgraph:
+  //   - PDP_SUBGRAPH_ENDPOINT drives the established overdue-periods / data
+  //     retention path against the existing pdp-explorer subgraph.
+  //   - SUBGRAPH_ENDPOINT drives only the new anonymous-retrieval candidate
+  //     piece query against the dealbot-owned subgraph.
+  // Once the dealbot-owned subgraph has soaked in production we can drop
+  // PDP_SUBGRAPH_ENDPOINT and route everything through SUBGRAPH_ENDPOINT.
   PDP_SUBGRAPH_ENDPOINT: Joi.string().uri().optional().allow(""),
+  SUBGRAPH_ENDPOINT: Joi.string().uri().optional().allow(""),
 
   // Scheduling
   PROVIDERS_REFRESH_INTERVAL_SECONDS: Joi.number().default(4 * 3600),
@@ -80,6 +89,7 @@ export const configValidationSchema = Joi.object({
   DEALS_PER_SP_PER_HOUR: Joi.number().min(0.001).max(20).default(4),
   DATASET_CREATIONS_PER_SP_PER_HOUR: Joi.number().min(0.001).max(20).default(1),
   RETRIEVALS_PER_SP_PER_HOUR: Joi.number().min(0.001).max(20).default(2),
+  RETRIEVALS_ANON_PER_SP_PER_HOUR: Joi.number().min(0.001).max(20).optional(),
   // Polling interval for pg-boss scheduler (lower = more responsive, higher = less DB chatter).
   JOB_SCHEDULER_POLL_SECONDS: Joi.number().min(60).default(300),
   JOB_WORKER_POLL_SECONDS: Joi.number().min(5).default(60),
@@ -91,8 +101,10 @@ export const configValidationSchema = Joi.object({
   JOB_ENQUEUE_JITTER_SECONDS: Joi.number().min(0).default(0),
   DEAL_JOB_TIMEOUT_SECONDS: Joi.number().min(120).default(360), // 6 minutes max runtime for data storage jobs (TODO: reduce default to 3 minutes)
   RETRIEVAL_JOB_TIMEOUT_SECONDS: Joi.number().min(60).default(60), // 1 minute max runtime for retrieval jobs (TODO: reduce default to 30 seconds)
+  ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS: Joi.number().min(60).default(360), // 6 minutes max runtime for anon retrieval jobs (pieces can be up to 500 MiB)
   DATA_SET_CREATION_JOB_TIMEOUT_SECONDS: Joi.number().min(60).default(300), // 5 minutes max runtime for dataset creation jobs
   IPFS_BLOCK_FETCH_CONCURRENCY: Joi.number().integer().min(1).max(32).default(6),
+  ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT: Joi.number().integer().min(1).max(50).default(5),
 
   // Piece Cleanup
   MAX_DATASET_STORAGE_SIZE_BYTES: Joi.number()
@@ -131,8 +143,9 @@ export const configValidationSchema = Joi.object({
 
   // Timeouts (in milliseconds)
   CONNECT_TIMEOUT_MS: Joi.number().min(1000).default(10000), // 10 seconds to establish connection/receive headers
-  HTTP_REQUEST_TIMEOUT_MS: Joi.number().min(1000).default(240000), // 4 minutes total for HTTP requests (10MiB @ 170KB/s + overhead)
-  HTTP2_REQUEST_TIMEOUT_MS: Joi.number().min(1000).default(240000), // 4 minutes total for HTTP/2 requests (10MiB @ 170KB/s + overhead)
+  // Defaults intentionally omitted so loadConfig can derive them from the longest job timeout.
+  HTTP_REQUEST_TIMEOUT_MS: Joi.number().min(1000).optional(),
+  HTTP2_REQUEST_TIMEOUT_MS: Joi.number().min(1000).optional(),
   IPNI_VERIFICATION_TIMEOUT_MS: Joi.number().min(1000).default(60000), // 60 seconds max time to wait for IPNI verification
   IPNI_VERIFICATION_POLLING_MS: Joi.number().min(250).default(2000), // 2 seconds between IPNI verification polls
 
@@ -174,6 +187,7 @@ export interface IBlockchainConfig {
   dealbotDataSetVersion?: string;
   minNumDataSetsForChecks: number;
   pdpSubgraphEndpoint?: string;
+  subgraphEndpoint?: string; // Endpoint of the dealbot-owned subgraph. Eventually replaces `pdpSubgraphEndpoint`
 }
 
 export interface ISchedulingConfig {
@@ -264,6 +278,14 @@ export interface IJobsConfig {
    * Uses AbortController to actively cancel job execution.
    */
   retrievalJobTimeoutSeconds: number;
+  /**
+   * Maximum runtime (seconds) for anonymous retrieval jobs before forced abort.
+   *
+   * Anonymous retrievals fetch arbitrary pieces (up to ~500 MiB), so this is
+   * typically larger than `retrievalJobTimeoutSeconds`. Uses AbortController
+   * to actively cancel job execution while still persisting partial metrics.
+   */
+  anonRetrievalJobTimeoutSeconds: number;
   /**
    * Target number of piece cleanup runs per storage provider per hour.
    *
@@ -278,6 +300,12 @@ export interface IJobsConfig {
    * Only used when `DEALBOT_JOBS_MODE=pgboss`.
    */
   maxPieceCleanupRuntimeSeconds: number;
+
+  /**
+   * Target number of anonymous retrieval tests per storage provider per hour.
+   * Defaults to retrievalsPerSpPerHour when not set.
+   */
+  retrievalsAnonPerSpPerHour: number;
 }
 
 export interface IDatasetConfig {
@@ -295,6 +323,10 @@ export interface ITimeoutConfig {
 
 export interface IRetrievalConfig {
   ipfsBlockFetchConcurrency: number;
+  /**
+   * Number of CAR blocks to sample for IPNI + block-fetch validation.
+   */
+  anonBlockSampleCount: number;
 }
 
 export interface IPieceCleanupConfig {
@@ -336,6 +368,43 @@ export interface IConfig {
 }
 
 export function loadConfig(): IConfig {
+  const jobTimeoutSeconds = {
+    deal: Number.parseInt(process.env.DEAL_JOB_TIMEOUT_SECONDS || "360", 10),
+    retrieval: Number.parseInt(process.env.RETRIEVAL_JOB_TIMEOUT_SECONDS || "60", 10),
+    anonRetrieval: Number.parseInt(process.env.ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS || "360", 10),
+    dataSetCreation: Number.parseInt(process.env.DATA_SET_CREATION_JOB_TIMEOUT_SECONDS || "300", 10),
+    pieceCleanup: Number.parseInt(process.env.MAX_PIECE_CLEANUP_RUNTIME_SECONDS || "300", 10),
+  };
+
+  // HTTP-level request timeouts default to the longest job timeout so the
+  // per-request ceiling never caps below the per-job budget. Any job-scoped
+  // AbortSignal fires first and is authoritative; the HTTP timer only kicks
+  // in for callers that do not pass a parent signal.
+  const longestJobTimeoutMs = Math.max(...Object.values(jobTimeoutSeconds)) * 1000;
+
+  const httpRequestTimeoutMs = Number.parseInt(process.env.HTTP_REQUEST_TIMEOUT_MS || String(longestJobTimeoutMs), 10);
+  const http2RequestTimeoutMs = Number.parseInt(
+    process.env.HTTP2_REQUEST_TIMEOUT_MS || String(longestJobTimeoutMs),
+    10,
+  );
+
+  // Misconfiguration guard: if someone explicitly sets an HTTP timeout below
+  // the longest job timeout, the HTTP-level timer will abort in-flight work
+  // before the job signal has a chance to report it. Warn loudly so this is
+  // caught at boot rather than inferred from short-timeout incidents later.
+  for (const [name, value] of [
+    ["HTTP_REQUEST_TIMEOUT_MS", httpRequestTimeoutMs],
+    ["HTTP2_REQUEST_TIMEOUT_MS", http2RequestTimeoutMs],
+  ] as const) {
+    if (value < longestJobTimeoutMs) {
+      // eslint-disable-next-line no-console
+      console.warn(
+        `[config] ${name}=${value}ms is lower than the longest job timeout (${longestJobTimeoutMs}ms). ` +
+          `HTTP requests may abort before the job signal fires, producing short, unexplained timeouts.`,
+      );
+    }
+  }
+
   return {
     app: {
       env: process.env.NODE_ENV || "development",
@@ -379,6 +448,7 @@ export function loadConfig(): IConfig {
       dealbotDataSetVersion: process.env.DEALBOT_DATASET_VERSION,
       minNumDataSetsForChecks: Number.parseInt(process.env.MIN_NUM_DATASETS_FOR_CHECKS || "1", 10),
       pdpSubgraphEndpoint: process.env.PDP_SUBGRAPH_ENDPOINT || "",
+      subgraphEndpoint: process.env.SUBGRAPH_ENDPOINT || "",
     },
     scheduling: {
       providersRefreshIntervalSeconds: Number.parseInt(process.env.PROVIDERS_REFRESH_INTERVAL_SECONDS || "14400", 10),
@@ -401,11 +471,15 @@ export function loadConfig(): IConfig {
       catchupMaxEnqueue: Number.parseInt(process.env.JOB_CATCHUP_MAX_ENQUEUE || "10", 10),
       schedulePhaseSeconds: Number.parseInt(process.env.JOB_SCHEDULE_PHASE_SECONDS || "0", 10),
       enqueueJitterSeconds: Number.parseInt(process.env.JOB_ENQUEUE_JITTER_SECONDS || "0", 10),
-      dealJobTimeoutSeconds: Number.parseInt(process.env.DEAL_JOB_TIMEOUT_SECONDS || "360", 10),
-      retrievalJobTimeoutSeconds: Number.parseInt(process.env.RETRIEVAL_JOB_TIMEOUT_SECONDS || "60", 10),
-      dataSetCreationJobTimeoutSeconds: Number.parseInt(process.env.DATA_SET_CREATION_JOB_TIMEOUT_SECONDS || "300", 10),
+      dealJobTimeoutSeconds: jobTimeoutSeconds.deal,
+      retrievalJobTimeoutSeconds: jobTimeoutSeconds.retrieval,
+      anonRetrievalJobTimeoutSeconds: jobTimeoutSeconds.anonRetrieval,
+      retrievalsAnonPerSpPerHour: Number.parseFloat(
+        process.env.RETRIEVALS_ANON_PER_SP_PER_HOUR || process.env.RETRIEVALS_PER_SP_PER_HOUR || "2",
+      ),
+      dataSetCreationJobTimeoutSeconds: jobTimeoutSeconds.dataSetCreation,
       pieceCleanupPerSpPerHour: Number.parseFloat(process.env.JOB_PIECE_CLEANUP_PER_SP_PER_HOUR || String(1 / 24)),
-      maxPieceCleanupRuntimeSeconds: Number.parseInt(process.env.MAX_PIECE_CLEANUP_RUNTIME_SECONDS || "300", 10),
+      maxPieceCleanupRuntimeSeconds: jobTimeoutSeconds.pieceCleanup,
     },
     dataset: {
       localDatasetsPath: process.env.DEALBOT_LOCAL_DATASETS_PATH || DEFAULT_LOCAL_DATASETS_PATH,
@@ -427,13 +501,14 @@ export function loadConfig(): IConfig {
     },
     timeouts: {
       connectTimeoutMs: Number.parseInt(process.env.CONNECT_TIMEOUT_MS || "10000", 10),
-      httpRequestTimeoutMs: Number.parseInt(process.env.HTTP_REQUEST_TIMEOUT_MS || "240000", 10),
-      http2RequestTimeoutMs: Number.parseInt(process.env.HTTP2_REQUEST_TIMEOUT_MS || "240000", 10),
+      httpRequestTimeoutMs,
+      http2RequestTimeoutMs,
       ipniVerificationTimeoutMs: Number.parseInt(process.env.IPNI_VERIFICATION_TIMEOUT_MS || "60000", 10),
       ipniVerificationPollingMs: Number.parseInt(process.env.IPNI_VERIFICATION_POLLING_MS || "2000", 10),
     },
     retrieval: {
       ipfsBlockFetchConcurrency: Number.parseInt(process.env.IPFS_BLOCK_FETCH_CONCURRENCY || "6", 10),
+      anonBlockSampleCount: Number.parseInt(process.env.ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT || "5", 10),
     },
     clickhouse: {
       url: process.env.CLICKHOUSE_URL || undefined,
diff --git a/apps/backend/src/data-retention/data-retention.service.spec.ts b/apps/backend/src/data-retention/data-retention.service.spec.ts
index 87ced66a..3fde29e8 100644
--- a/apps/backend/src/data-retention/data-retention.service.spec.ts
+++ b/apps/backend/src/data-retention/data-retention.service.spec.ts
@@ -921,7 +921,7 @@ describe("DataRetentionService", () => {
       expect(incCalls).toEqual(expect.arrayContaining([[10], [25]]));
     });
 
-    it("reloads baselines from DB on every poll", async () => {
+    it("only loads baselines from DB once across multiple polls", async () => {
       pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
 
       await service.pollDataRetention();
diff --git a/apps/backend/src/database/entities/job-schedule-state.entity.ts b/apps/backend/src/database/entities/job-schedule-state.entity.ts
index d1758ae9..ebd5254d 100644
--- a/apps/backend/src/database/entities/job-schedule-state.entity.ts
+++ b/apps/backend/src/database/entities/job-schedule-state.entity.ts
@@ -6,6 +6,7 @@ import { Column, CreateDateColumn, Entity, Index, PrimaryGeneratedColumn, Update
 export type JobType =
   | "deal"
   | "retrieval"
+  | "retrieval_anon"
   | "data_set_creation"
   | "metrics" // legacy: no longer scheduled; see RemoveMetricsJobScheduleRows migration. TODO(#457): remove.
   | "metrics_cleanup" // legacy: no longer scheduled; see RemoveMetricsJobScheduleRows migration. TODO(#457): remove.
diff --git a/apps/backend/src/database/types.ts b/apps/backend/src/database/types.ts
index 46fd5d28..c56b355a 100644
--- a/apps/backend/src/database/types.ts
+++ b/apps/backend/src/database/types.ts
@@ -28,6 +28,13 @@ export enum IpniStatus {
   FAILED = "failed",
 }
 
+export enum IpniCheckStatus {
+  VALID = "valid",
+  INVALID = "invalid",
+  SKIPPED = "skipped",
+  ERROR = "error",
+}
+
 /**
  * Metadata schema for deal storage and retrieval
  */
diff --git a/apps/backend/src/http-client/http-client.service.spec.ts b/apps/backend/src/http-client/http-client.service.spec.ts
index 96604139..511910ba 100644
--- a/apps/backend/src/http-client/http-client.service.spec.ts
+++ b/apps/backend/src/http-client/http-client.service.spec.ts
@@ -64,25 +64,94 @@ describe("HttpClientService", () => {
     expect(config.timeout).toBe(120000);
   });
 
-  it("times out HTTP/2 requests using the connection timeout", async () => {
+  it("passes the configured headersTimeout to undici and translates its error", async () => {
     const service = await createService();
 
-    if (typeof AbortSignal.timeout !== "function") {
-      (AbortSignal as any).timeout = () => new AbortController().signal;
+    let receivedHeadersTimeout: number | undefined;
+    undiciRequestMock.mockImplementationOnce((_url: string, options: { headersTimeout?: number }) => {
+      receivedHeadersTimeout = options.headersTimeout;
+      const err = new Error("Headers Timeout Error") as Error & { code?: string };
+      err.name = "HeadersTimeoutError";
+      err.code = "UND_ERR_HEADERS_TIMEOUT";
+      return Promise.reject(err);
+    });
+
+    await expect(service.requestWithMetrics("http://example.com", { httpVersion: "2" })).rejects.toThrow(
+      "HTTP/2 connection/headers timed out after 25ms",
+    );
+
+    expect(receivedHeadersTimeout).toBe(25);
+  });
+
+  it("keeps the request signal alive after the connect timeout window elapses", async () => {
+    const service = await createService();
+
+    // Previously, connectTimeoutMs (25ms) was folded into the request signal,
+    // so any download lasting longer than 25ms was aborted mid-stream. The
+    // signal must now stay live until the transfer timeout or parent signal
+    // fires.
+    let sawAbortBeforeResolve = false;
+    undiciRequestMock.mockImplementationOnce(async (_url: string, options: { signal?: AbortSignal }) => {
+      await new Promise((r) => setTimeout(r, 75));
+      sawAbortBeforeResolve = options.signal?.aborted === true;
+      async function* body() {
+        yield Buffer.from("ok");
+      }
+      return { statusCode: 200, body: body() };
+    });
+
+    const result = await service.requestWithMetrics<Buffer>("http://example.com", { httpVersion: "2" });
+
+    expect(sawAbortBeforeResolve).toBe(false);
+    expect(result.aborted).toBeUndefined();
+    expect(result.metrics.statusCode).toBe(200);
+  });
+
+  it("returns partial bytes and metrics when HTTP/2 download is aborted after headers", async () => {
+    const service = await createService();
+
+    const parentAbort = new AbortController();
+
+    async function* abortingBody() {
+      yield Buffer.from("hello");
+      yield Buffer.from(" world");
+      // Simulate an abort mid-stream after two chunks.
+      parentAbort.abort(new Error("Anon retrieval job timeout (60s) for sp1"));
+      throw new Error("aborted");
     }
 
-    undiciRequestMock.mockImplementationOnce((_url: string, options: { signal?: AbortSignal }) => {
-      return new Promise((_resolve, reject) => {
-        options.signal?.addEventListener("abort", () => reject(new Error("aborted")), { once: true });
-      });
+    undiciRequestMock.mockImplementationOnce(async () => ({
+      statusCode: 200,
+      body: abortingBody(),
+    }));
+
+    const result = await service.requestWithMetrics<Buffer>("http://example.com/piece", {
+      httpVersion: "2",
+      signal: parentAbort.signal,
     });
 
-    vi.useFakeTimers();
+    expect(result.aborted).toBe(true);
+    expect(result.abortReason).toContain("timeout");
+    expect(result.metrics.statusCode).toBe(200);
+    expect(result.metrics.responseSize).toBe(11);
+    expect(Buffer.isBuffer(result.data) ? result.data.toString() : "").toBe("hello world");
+  });
+
+  it("rethrows non-abort download errors on HTTP/2", async () => {
+    const service = await createService();
 
-    const promise = service.requestWithMetrics("http://example.com", { httpVersion: "2" });
-    const assertion = expect(promise).rejects.toThrow("HTTP/2 connection/headers timed out after 25ms");
-    await vi.advanceTimersByTimeAsync(25);
+    async function* brokenBody() {
+      yield Buffer.from("partial");
+      throw new Error("network reset");
+    }
+
+    undiciRequestMock.mockImplementationOnce(async () => ({
+      statusCode: 200,
+      body: brokenBody(),
+    }));
 
-    await assertion;
+    await expect(service.requestWithMetrics<Buffer>("http://example.com/piece", { httpVersion: "2" })).rejects.toThrow(
+      "network reset",
+    );
   });
 });
diff --git a/apps/backend/src/http-client/http-client.service.ts b/apps/backend/src/http-client/http-client.service.ts
index 48e10e5c..81140162 100644
--- a/apps/backend/src/http-client/http-client.service.ts
+++ b/apps/backend/src/http-client/http-client.service.ts
@@ -81,12 +81,11 @@ export class HttpClientService {
       let ttfbTime = 0;
       let statusCode = 0;
 
-      /**
-       * Dual-timeout strategy for HTTP/2 requests:
-       * 1. AbortSignal.timeout() - Undici's native timeout (10 min default)
-       * 2. AbortSignal.timeout() for connection/headers (10 sec default)
-       */
-      const { signal, connectTimeoutSignal } = this.buildHttp2Signals(options.signal);
+      // Dual-timeout strategy for HTTP/2 requests:
+      // - `headersTimeout` (undici): scopes the connect + response-headers phase.
+      // - Combined AbortSignal: transfer-timeout ceiling + parent (job) signal.
+      const transferTimeoutSignal = AbortSignal.timeout(this.http2TimeoutMs);
+      const signal = options.signal ? anySignal([transferTimeoutSignal, options.signal]) : transferTimeoutSignal;
       const requestOptions: any = {
         method,
         headers: {
@@ -94,6 +93,7 @@ export class HttpClientService {
           ...headers,
         },
         signal,
+        headersTimeout: this.connectTimeoutMs,
       };
 
       if (data) {
@@ -105,7 +105,8 @@ export class HttpClientService {
       try {
         response = await undiciRequest(url, requestOptions);
       } catch (error) {
-        if (connectTimeoutSignal.aborted) {
+        // discern connection error from transfer error
+        if (isHeadersTimeoutError(error)) {
           throw new Error(`HTTP/2 connection/headers timed out after ${this.connectTimeoutMs}ms`);
         }
         throw error;
@@ -115,8 +116,15 @@ export class HttpClientService {
       statusCode = response.statusCode;
 
       const chunks: Buffer[] = [];
-      for await (const chunk of response.body) {
-        chunks.push(Buffer.from(chunk));
+      let downloadError: unknown;
+      try {
+        for await (const chunk of response.body) {
+          chunks.push(Buffer.from(chunk));
+        }
+      } catch (error) {
+        // Download-phase failures (e.g. abort signal) fall through so we can
+        // return the partial buffer + metrics collected so far.
+        downloadError = error;
       }
       const dataBuffer = Buffer.concat(chunks);
 
@@ -133,6 +141,29 @@ export class HttpClientService {
         httpVersion: "2",
       };
 
+      if (downloadError !== undefined) {
+        const aborted = options.signal?.aborted === true || isAbortLikeError(downloadError);
+        if (!aborted) {
+          throw downloadError;
+        }
+        const abortReason = describeAbortReason(options.signal, downloadError);
+        this.logger.warn({
+          event: "http2_download_aborted",
+          message: "HTTP/2 download aborted after headers; returning partial data",
+          url,
+          bytesReceived: dataBuffer.length,
+          totalTime: metrics.totalTime,
+          ttfb: metrics.ttfb,
+          abortReason,
+        });
+        return {
+          data: dataBuffer as T,
+          metrics,
+          aborted: true,
+          abortReason,
+        };
+      }
+
       return {
         data: dataBuffer as T,
         metrics,
@@ -255,24 +286,28 @@ export class HttpClientService {
     // Fallback for objects/arrays
     return Buffer.from(JSON.stringify(data));
   }
+}
 
-  private buildHttp2Signals(parentSignal?: AbortSignal): {
-    signal: AbortSignal;
-    connectTimeoutSignal: AbortSignal;
-  } {
-    const transferTimeoutSignal = AbortSignal.timeout(this.http2TimeoutMs);
-    const connectTimeoutSignal = AbortSignal.timeout(this.connectTimeoutMs);
+function isAbortLikeError(error: unknown): boolean {
+  if (error instanceof Error) {
+    return error.name === "AbortError" || error.name === "TimeoutError" || /abort/i.test(error.message);
+  }
+  return false;
+}
 
-    if (parentSignal) {
-      return {
-        signal: anySignal([transferTimeoutSignal, connectTimeoutSignal, parentSignal]),
-        connectTimeoutSignal,
-      };
-    }
+/**
+ * Determines if a given error represents a "Headers Timeout" error.
+ */
+function isHeadersTimeoutError(error: unknown): boolean {
+  if (!(error instanceof Error)) return false;
+  const code = (error as Error & { code?: string }).code;
+  return error.name === "HeadersTimeoutError" || code === "UND_ERR_HEADERS_TIMEOUT";
+}
 
-    return {
-      signal: anySignal([transferTimeoutSignal, connectTimeoutSignal]),
-      connectTimeoutSignal,
-    };
-  }
+function describeAbortReason(signal: AbortSignal | undefined, fallback: unknown): string {
+  const reason = signal?.reason;
+  if (reason instanceof Error && reason.message) return reason.message;
+  if (typeof reason === "string" && reason.length > 0) return reason;
+  if (fallback instanceof Error && fallback.message) return fallback.message;
+  return "aborted";
 }
diff --git a/apps/backend/src/http-client/types.ts b/apps/backend/src/http-client/types.ts
index 7e48ce7d..26892ee6 100644
--- a/apps/backend/src/http-client/types.ts
+++ b/apps/backend/src/http-client/types.ts
@@ -13,4 +13,6 @@ export interface RequestMetrics {
 export interface RequestWithMetrics<T> {
   data: T;
   metrics: RequestMetrics;
+  aborted?: boolean; // Set when the request was aborted mid-download after response headers arrived.
+  abortReason?: string; // Error message when `aborted` is true; human-readable summary of the abort reason.
 }
diff --git a/apps/backend/src/jobs/job-queues.ts b/apps/backend/src/jobs/job-queues.ts
index 9488ce7b..db475d49 100644
--- a/apps/backend/src/jobs/job-queues.ts
+++ b/apps/backend/src/jobs/job-queues.ts
@@ -7,3 +7,4 @@ export const LEGACY_DEAL_QUEUE = "deal.run";
 export const LEGACY_RETRIEVAL_QUEUE = "retrieval.run";
 export const DATA_RETENTION_POLL_QUEUE = "data.retention.poll";
 export const PROVIDERS_REFRESH_QUEUE = "providers.refresh";
+export const RETRIEVAL_ANON_QUEUE = "retrieval.anon.run";
diff --git a/apps/backend/src/jobs/jobs.module.ts b/apps/backend/src/jobs/jobs.module.ts
index 15ad4d64..fb708e09 100644
--- a/apps/backend/src/jobs/jobs.module.ts
+++ b/apps/backend/src/jobs/jobs.module.ts
@@ -7,6 +7,7 @@ import { StorageProvider } from "../database/entities/storage-provider.entity.js
 import { DealModule } from "../deal/deal.module.js";
 import { PieceCleanupModule } from "../piece-cleanup/piece-cleanup.module.js";
 import { RetrievalModule } from "../retrieval/retrieval.module.js";
+import { RetrievalAnonModule } from "../retrieval-anon/retrieval-anon.module.js";
 import { WalletSdkModule } from "../wallet-sdk/wallet-sdk.module.js";
 import { JobsService } from "./jobs.service.js";
 import { JobScheduleRepository } from "./repositories/job-schedule.repository.js";
@@ -20,6 +21,7 @@ import { JobScheduleRepository } from "./repositories/job-schedule.repository.js
     WalletSdkModule,
     DataRetentionModule,
     PieceCleanupModule,
+    RetrievalAnonModule,
   ],
   providers: [JobsService, JobScheduleRepository],
 })
diff --git a/apps/backend/src/jobs/jobs.service.spec.ts b/apps/backend/src/jobs/jobs.service.spec.ts
index d556f3d6..8983c723 100644
--- a/apps/backend/src/jobs/jobs.service.spec.ts
+++ b/apps/backend/src/jobs/jobs.service.spec.ts
@@ -30,18 +30,18 @@ describe("JobsService schedule rows", () => {
   };
   let dataRetentionServiceMock: { pollDataRetention: ReturnType<typeof vi.fn> };
   let metricsMocks: {
-    jobsQueuedGauge: JobsServiceDeps[8];
-    jobsRetryScheduledGauge: JobsServiceDeps[9];
-    oldestQueuedAgeGauge: JobsServiceDeps[10];
-    oldestInFlightAgeGauge: JobsServiceDeps[11];
-    jobsInFlightGauge: JobsServiceDeps[12];
-    jobsEnqueueAttemptsCounter: JobsServiceDeps[13];
-    jobsStartedCounter: JobsServiceDeps[14];
-    jobsCompletedCounter: JobsServiceDeps[15];
-    jobsPausedGauge: JobsServiceDeps[16];
-    jobDuration: JobsServiceDeps[17];
-    storageProvidersActive: JobsServiceDeps[18];
-    storageProvidersTested: JobsServiceDeps[19];
+    jobsQueuedGauge: JobsServiceDeps[9];
+    jobsRetryScheduledGauge: JobsServiceDeps[10];
+    oldestQueuedAgeGauge: JobsServiceDeps[11];
+    oldestInFlightAgeGauge: JobsServiceDeps[12];
+    jobsInFlightGauge: JobsServiceDeps[13];
+    jobsEnqueueAttemptsCounter: JobsServiceDeps[14];
+    jobsStartedCounter: JobsServiceDeps[15];
+    jobsCompletedCounter: JobsServiceDeps[16];
+    jobsPausedGauge: JobsServiceDeps[17];
+    jobDuration: JobsServiceDeps[18];
+    storageProvidersActive: JobsServiceDeps[19];
+    storageProvidersTested: JobsServiceDeps[20];
   };
   let baseConfigValues: Partial<IConfig>;
   let configService: JobsServiceDeps[0];
@@ -55,18 +55,19 @@ describe("JobsService schedule rows", () => {
       walletSdkService: JobsServiceDeps[5];
       dataRetentionService: JobsServiceDeps[6];
       pieceCleanupService: JobsServiceDeps[7];
-      jobsQueuedGauge: JobsServiceDeps[8];
-      jobsRetryScheduledGauge: JobsServiceDeps[9];
-      oldestQueuedAgeGauge: JobsServiceDeps[10];
-      oldestInFlightAgeGauge: JobsServiceDeps[11];
-      jobsInFlightGauge: JobsServiceDeps[12];
-      jobsEnqueueAttemptsCounter: JobsServiceDeps[13];
-      jobsStartedCounter: JobsServiceDeps[14];
-      jobsCompletedCounter: JobsServiceDeps[15];
-      jobsPausedGauge: JobsServiceDeps[16];
-      jobDuration: JobsServiceDeps[17];
-      storageProvidersActive: JobsServiceDeps[18];
-      storageProvidersTested: JobsServiceDeps[19];
+      anonRetrievalService: JobsServiceDeps[8];
+      jobsQueuedGauge: JobsServiceDeps[9];
+      jobsRetryScheduledGauge: JobsServiceDeps[10];
+      oldestQueuedAgeGauge: JobsServiceDeps[11];
+      oldestInFlightAgeGauge: JobsServiceDeps[12];
+      jobsInFlightGauge: JobsServiceDeps[13];
+      jobsEnqueueAttemptsCounter: JobsServiceDeps[14];
+      jobsStartedCounter: JobsServiceDeps[15];
+      jobsCompletedCounter: JobsServiceDeps[16];
+      jobsPausedGauge: JobsServiceDeps[17];
+      jobDuration: JobsServiceDeps[18];
+      storageProvidersActive: JobsServiceDeps[19];
+      storageProvidersTested: JobsServiceDeps[20];
     }>,
   ) => JobsService;
 
@@ -96,18 +97,18 @@ describe("JobsService schedule rows", () => {
     };
 
     metricsMocks = {
-      jobsQueuedGauge: { set: vi.fn() } as unknown as JobsServiceDeps[8],
-      jobsRetryScheduledGauge: { set: vi.fn() } as unknown as JobsServiceDeps[9],
-      oldestQueuedAgeGauge: { set: vi.fn() } as unknown as JobsServiceDeps[10],
-      oldestInFlightAgeGauge: { set: vi.fn() } as unknown as JobsServiceDeps[11],
-      jobsInFlightGauge: { set: vi.fn() } as unknown as JobsServiceDeps[12],
-      jobsEnqueueAttemptsCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[13],
-      jobsStartedCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[14],
-      jobsCompletedCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[15],
-      jobsPausedGauge: { set: vi.fn() } as unknown as JobsServiceDeps[16],
-      jobDuration: { observe: vi.fn() } as unknown as JobsServiceDeps[17],
-      storageProvidersActive: { set: vi.fn() } as unknown as JobsServiceDeps[18],
-      storageProvidersTested: { set: vi.fn() } as unknown as JobsServiceDeps[19],
+      jobsQueuedGauge: { set: vi.fn() } as unknown as JobsServiceDeps[9],
+      jobsRetryScheduledGauge: { set: vi.fn() } as unknown as JobsServiceDeps[10],
+      oldestQueuedAgeGauge: { set: vi.fn() } as unknown as JobsServiceDeps[11],
+      oldestInFlightAgeGauge: { set: vi.fn() } as unknown as JobsServiceDeps[12],
+      jobsInFlightGauge: { set: vi.fn() } as unknown as JobsServiceDeps[13],
+      jobsEnqueueAttemptsCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[14],
+      jobsStartedCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[15],
+      jobsCompletedCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[16],
+      jobsPausedGauge: { set: vi.fn() } as unknown as JobsServiceDeps[17],
+      jobDuration: { observe: vi.fn() } as unknown as JobsServiceDeps[18],
+      storageProvidersActive: { set: vi.fn() } as unknown as JobsServiceDeps[19],
+      storageProvidersTested: { set: vi.fn() } as unknown as JobsServiceDeps[20],
     };
 
     const emptySpBlocklists: ISpBlocklistConfig = {
@@ -133,6 +134,7 @@ describe("JobsService schedule rows", () => {
         dataSetCreationJobTimeoutSeconds: 300,
         pieceCleanupPerSpPerHour: 1,
         maxPieceCleanupRuntimeSeconds: 300,
+        retrievalsAnonPerSpPerHour: 2,
       } as IConfig["jobs"],
       database: {
         host: "localhost",
@@ -161,6 +163,7 @@ describe("JobsService schedule rows", () => {
         overrides.walletSdkService ?? ({} as JobsServiceDeps[5]),
         overrides.dataRetentionService ?? (dataRetentionServiceMock as unknown as JobsServiceDeps[6]),
         overrides.pieceCleanupService ?? ({} as JobsServiceDeps[7]),
+        overrides.anonRetrievalService ?? ({} as JobsServiceDeps[8]),
         overrides.jobsQueuedGauge ?? metricsMocks.jobsQueuedGauge,
         overrides.jobsRetryScheduledGauge ?? metricsMocks.jobsRetryScheduledGauge,
         overrides.oldestQueuedAgeGauge ?? metricsMocks.oldestQueuedAgeGauge,
@@ -615,12 +618,13 @@ describe("JobsService schedule rows", () => {
     // Check upserts for providerB
     const upsertCalls = jobScheduleRepositoryMock.upsertSchedule.mock.calls;
     const upsertsForB = upsertCalls.filter((call) => call[1] === providerB.address);
-    expect(upsertsForB).toHaveLength(4);
+    expect(upsertsForB).toHaveLength(5);
     expect(upsertsForB.map((call) => call[0]).sort()).toEqual([
       "data_set_creation",
       "deal",
       "piece_cleanup",
       "retrieval",
+      "retrieval_anon",
     ]);
   });
 
@@ -976,7 +980,7 @@ describe("JobsService schedule rows", () => {
     expect(dealService.createDealForProvider).toHaveBeenCalledTimes(1);
   });
 
-  it("deal job maps DealJobTerminatedDataSetError to handler_result=error", async () => {
+  it("data storage job does not run data-storage check when data-set selection aborts", async () => {
     const completedCounter = metricsMocks.jobsCompletedCounter as unknown as { inc: ReturnType<typeof vi.fn> };
     vi.useFakeTimers();
     vi.setSystemTime(new Date("2024-01-01T12:00:00Z"));
diff --git a/apps/backend/src/jobs/jobs.service.ts b/apps/backend/src/jobs/jobs.service.ts
index f8fe1d80..e09cf42c 100644
--- a/apps/backend/src/jobs/jobs.service.ts
+++ b/apps/backend/src/jobs/jobs.service.ts
@@ -16,18 +16,32 @@ import { StorageProvider } from "../database/entities/storage-provider.entity.js
 import { DealService } from "../deal/deal.service.js";
 import { PieceCleanupService } from "../piece-cleanup/piece-cleanup.service.js";
 import { RetrievalService } from "../retrieval/retrieval.service.js";
+import { AnonRetrievalService } from "../retrieval-anon/anon-retrieval.service.js";
 import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
 import { provisionNextMissingDataSet } from "./data-set-creation.handler.js";
-import { DATA_RETENTION_POLL_QUEUE, PROVIDERS_REFRESH_QUEUE, SP_WORK_QUEUE } from "./job-queues.js";
+import {
+  DATA_RETENTION_POLL_QUEUE,
+  PROVIDERS_REFRESH_QUEUE,
+  RETRIEVAL_ANON_QUEUE,
+  SP_WORK_QUEUE,
+} from "./job-queues.js";
 import { JobScheduleRepository } from "./repositories/job-schedule.repository.js";
 
-type SpJobType = "deal" | "retrieval" | "data_set_creation" | "piece_cleanup";
-const SP_JOB_TYPES: ReadonlySet<string> = new Set<string>(["deal", "retrieval", "data_set_creation", "piece_cleanup"]);
+type SpJobType = "deal" | "retrieval" | "data_set_creation" | "retrieval_anon" | "piece_cleanup";
+const SP_JOB_TYPES: ReadonlySet<string> = new Set<string>([
+  "deal",
+  "retrieval",
+  "retrieval_anon",
+  "data_set_creation",
+  "piece_cleanup",
+]);
+
 function isSpJobType(jobType: string): jobType is SpJobType {
   return SP_JOB_TYPES.has(jobType);
 }
 
 type SpJobData = { jobType: SpJobType; spAddress: string; intervalSeconds: number };
+type AnonRetrievalJobData = { spAddress: string; intervalSeconds: number };
 type ProvidersRefreshJobData = { intervalSeconds: number };
 type SpJob = Job<SpJobData>;
 type DataRetentionJobData = { intervalSeconds: number };
@@ -61,6 +75,8 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     private readonly walletSdkService: WalletSdkService,
     private readonly dataRetentionService: DataRetentionService,
     private readonly pieceCleanupService: PieceCleanupService,
+    private readonly anonRetrievalService: AnonRetrievalService,
+
     @InjectMetric("jobs_queued")
     private readonly jobsQueuedGauge: Gauge,
     @InjectMetric("jobs_retry_scheduled")
@@ -258,6 +274,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     await boss.createQueue(SP_WORK_QUEUE, { policy: "singleton" });
     await boss.createQueue(PROVIDERS_REFRESH_QUEUE);
     await boss.createQueue(DATA_RETENTION_POLL_QUEUE);
+    await boss.createQueue(RETRIEVAL_ANON_QUEUE);
   }
 
   private registerWorkers(): void {
@@ -335,6 +352,23 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
           error: toStructuredError(error),
         }),
       );
+    void this.boss
+      .work<AnonRetrievalJobData, void>(
+        RETRIEVAL_ANON_QUEUE,
+        { batchSize: 1, localConcurrency: spConcurrency, pollingIntervalSeconds: workerPollSeconds },
+        async ([job]) => {
+          if (!job) return;
+          await this.handleAnonRetrievalJob(job);
+        },
+      )
+      .catch((error) =>
+        this.logger.error({
+          event: "worker_register_failed",
+          message: "Failed to register worker",
+          queue: RETRIEVAL_ANON_QUEUE,
+          error: toStructuredError(error),
+        }),
+      );
   }
 
   private getMaintenanceWindowStatus(now: Date = new Date()) {
@@ -587,6 +621,51 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     });
   }
 
+  private async handleAnonRetrievalJob(job: Job<AnonRetrievalJobData>): Promise<void> {
+    const data = job.data;
+    const spAddress = data.spAddress;
+
+    // Create AbortController for job timeout enforcement
+    const abortController = new AbortController();
+    const timeoutSeconds = this.configService.get("jobs").anonRetrievalJobTimeoutSeconds;
+    const timeoutMs = Math.max(60000, timeoutSeconds * 1000);
+    const effectiveTimeoutSeconds = Math.round(timeoutMs / 1000);
+    const abortReason = new Error(`Anon retrieval job timeout (${effectiveTimeoutSeconds}s) for ${spAddress}`);
+    const timeoutId = setTimeout(() => {
+      abortController.abort(abortReason);
+    }, timeoutMs);
+
+    await this.recordJobExecution("retrieval_anon", async () => {
+      const logContext = await this.resolveProviderJobContext(spAddress, job.id);
+      try {
+        await this.anonRetrievalService.performForProvider(spAddress, abortController.signal, logContext);
+        return "success";
+      } catch (error) {
+        if (abortController.signal.aborted) {
+          const reason = abortController.signal.reason;
+          const reasonMessage = reason instanceof Error ? reason.message : String(reason ?? "");
+          this.logger.error({
+            ...logContext,
+            event: "anon_retrieval_job_aborted",
+            message: reasonMessage || "Anon retrieval job aborted after timeout",
+            timeoutSeconds: effectiveTimeoutSeconds,
+            error: toStructuredError(reason ?? error),
+          });
+          return "aborted";
+        }
+        this.logger.error({
+          ...logContext,
+          event: "anon_retrieval_job_failed",
+          message: "Anon retrieval job failed",
+          error: toStructuredError(error),
+        });
+        throw error;
+      } finally {
+        clearTimeout(timeoutId);
+      }
+    });
+  }
+
   private async handleDataRetentionJob(data: DataRetentionJobData): Promise<void> {
     void data;
     await this.recordJobExecution("data_retention_poll", async () => {
@@ -865,6 +944,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
   private getIntervalSecondsForRates(): {
     dealIntervalSeconds: number;
     retrievalIntervalSeconds: number;
+    retrievalAnonIntervalSeconds: number;
     dataSetCreationIntervalSeconds: number;
     dataRetentionPollIntervalSeconds: number;
     providersRefreshIntervalSeconds: number;
@@ -885,9 +965,13 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     const dataRetentionPollIntervalSeconds = scheduling.dataRetentionPollIntervalSeconds;
     const providersRefreshIntervalSeconds = scheduling.providersRefreshIntervalSeconds;
 
+    const retrievalsAnonPerHour = jobsConfig.retrievalsAnonPerSpPerHour;
+    const retrievalAnonIntervalSeconds = Math.max(1, Math.round(3600 / retrievalsAnonPerHour));
+
     return {
       dealIntervalSeconds,
       retrievalIntervalSeconds,
+      retrievalAnonIntervalSeconds,
       dataSetCreationIntervalSeconds,
       dataRetentionPollIntervalSeconds,
       providersRefreshIntervalSeconds,
@@ -907,6 +991,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     const {
       dealIntervalSeconds,
       retrievalIntervalSeconds,
+      retrievalAnonIntervalSeconds,
       dataSetCreationIntervalSeconds,
       dataRetentionPollIntervalSeconds,
       providersRefreshIntervalSeconds,
@@ -924,6 +1009,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     const phaseMs = this.schedulePhaseSeconds() * 1000;
     const dealStartAt = new Date(now.getTime() + phaseMs);
     const retrievalStartAt = new Date(now.getTime() + phaseMs);
+    const retrievalAnonStartAt = new Date(now.getTime() + phaseMs);
     const dataSetCreationStartAt = new Date(now.getTime() + phaseMs);
     const dataRetentionPollStartAt = new Date(now.getTime() + phaseMs);
     const providersRefreshStartAt = new Date(now.getTime() + phaseMs);
@@ -947,6 +1033,12 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     for (const address of unblockedAddresses) {
       await this.jobScheduleRepository.upsertSchedule("deal", address, dealIntervalSeconds, dealStartAt);
       await this.jobScheduleRepository.upsertSchedule("retrieval", address, retrievalIntervalSeconds, retrievalStartAt);
+      await this.jobScheduleRepository.upsertSchedule(
+        "retrieval_anon",
+        address,
+        retrievalAnonIntervalSeconds,
+        retrievalAnonStartAt,
+      );
       if (minDataSets >= 1) {
         await this.jobScheduleRepository.upsertSchedule(
           "data_set_creation",
@@ -1104,6 +1196,8 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
         return SP_WORK_QUEUE;
       case "piece_cleanup":
         return SP_WORK_QUEUE;
+      case "retrieval_anon":
+        return RETRIEVAL_ANON_QUEUE;
       case "data_retention_poll":
         return DATA_RETENTION_POLL_QUEUE;
       case "providers_refresh":
@@ -1123,6 +1217,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     if (
       row.job_type === "deal" ||
       row.job_type === "retrieval" ||
+      row.job_type === "retrieval_anon" ||
       row.job_type === "data_set_creation" ||
       row.job_type === "piece_cleanup"
     ) {
@@ -1195,6 +1290,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     const jobTypes: JobType[] = [
       "deal",
       "retrieval",
+      "retrieval_anon",
       "data_set_creation",
       "piece_cleanup",
       "data_retention_poll",
diff --git a/apps/backend/src/metrics-prometheus/check-metric-labels.ts b/apps/backend/src/metrics-prometheus/check-metric-labels.ts
index d8447160..9d776586 100644
--- a/apps/backend/src/metrics-prometheus/check-metric-labels.ts
+++ b/apps/backend/src/metrics-prometheus/check-metric-labels.ts
@@ -1,4 +1,4 @@
-export type CheckType = "dataStorage" | "retrieval" | "dataRetention" | "dataSetCreation";
+export type CheckType = "dataStorage" | "retrieval" | "anon_retrieval" | "dataRetention" | "dataSetCreation";
 export type ProviderStatus = "approved" | "unapproved";
 
 export type CheckMetricLabels = {
diff --git a/apps/backend/src/metrics-prometheus/check-metrics.service.ts b/apps/backend/src/metrics-prometheus/check-metrics.service.ts
index 55975cad..76a8ee31 100644
--- a/apps/backend/src/metrics-prometheus/check-metrics.service.ts
+++ b/apps/backend/src/metrics-prometheus/check-metrics.service.ts
@@ -248,3 +248,66 @@ export class DataSetCreationCheckMetrics {
     this.dataSetCreationStatusCounter.inc({ ...labels, value });
   }
 }
+
+@Injectable()
+export class AnonRetrievalCheckMetrics {
+  constructor(
+    @InjectMetric("anonPieceRetrievalFirstByteMs")
+    private readonly firstByteMs: Histogram,
+    @InjectMetric("anonPieceRetrievalLastByteMs")
+    private readonly lastByteMs: Histogram,
+    @InjectMetric("anonPieceRetrievalThroughputBps")
+    private readonly throughputBps: Histogram,
+    @InjectMetric("anonRetrievalCheckMs")
+    private readonly checkMs: Histogram,
+    @InjectMetric("anonPieceRetrievalStatus")
+    private readonly statusCounter: Counter,
+    @InjectMetric("anonPieceHttpResponseCode")
+    private readonly httpResponseCounter: Counter,
+    @InjectMetric("anonCarParseStatus")
+    private readonly carParseCounter: Counter,
+    @InjectMetric("anonIpniStatus")
+    private readonly ipniCounter: Counter,
+    @InjectMetric("anonBlockFetchStatus")
+    private readonly blockFetchCounter: Counter,
+  ) {}
+
+  observeFirstByteMs(labels: CheckMetricLabels, value: number | null | undefined): void {
+    observePositive(this.firstByteMs, labels, value);
+  }
+
+  observeLastByteMs(labels: CheckMetricLabels, value: number | null | undefined): void {
+    observePositive(this.lastByteMs, labels, value);
+  }
+
+  observeThroughput(labels: CheckMetricLabels, value: number | null | undefined): void {
+    observePositive(this.throughputBps, labels, value);
+  }
+
+  observeCheckDuration(labels: CheckMetricLabels, value: number | null | undefined): void {
+    observePositive(this.checkMs, labels, value);
+  }
+
+  recordStatus(labels: CheckMetricLabels, value: string): void {
+    this.statusCounter.inc({ ...labels, value });
+  }
+
+  recordHttpResponseCode(labels: CheckMetricLabels, statusCode: number): void {
+    this.httpResponseCounter.inc({
+      ...labels,
+      value: classifyHttpResponseCode(statusCode),
+    });
+  }
+
+  recordCarParseStatus(labels: CheckMetricLabels, parseable: boolean): void {
+    this.carParseCounter.inc({ ...labels, value: parseable ? "parseable" : "not_parseable" });
+  }
+
+  recordIpniStatus(labels: CheckMetricLabels, value: "valid" | "invalid" | "skipped" | "error"): void {
+    this.ipniCounter.inc({ ...labels, value });
+  }
+
+  recordBlockFetchStatus(labels: CheckMetricLabels, value: "valid" | "invalid" | "skipped" | "error"): void {
+    this.blockFetchCounter.inc({ ...labels, value });
+  }
+}
diff --git a/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts b/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts
index 18bda30d..4ebeb01a 100644
--- a/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts
+++ b/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts
@@ -8,6 +8,7 @@ import {
 } from "@willsoto/nestjs-prometheus";
 import { WalletSdkModule } from "../wallet-sdk/wallet-sdk.module.js";
 import {
+  AnonRetrievalCheckMetrics,
   DataSetCreationCheckMetrics,
   DataStorageCheckMetrics,
   DiscoverabilityCheckMetrics,
@@ -207,6 +208,56 @@ const metricProviders = [
     help: "Estimated number of unrecorded overdue proving periods per provider. Resets to 0 when the subgraph catches up.",
     labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const,
   }),
+  // Anonymous Retrieval Metrics
+  makeHistogramProvider({
+    name: "anonPieceRetrievalFirstByteMs",
+    help: "Time to first byte for anonymous piece retrievals via /piece/{cid} (ms)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const,
+    buckets: [1, 5, 10, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 30000],
+  }),
+  makeHistogramProvider({
+    name: "anonPieceRetrievalLastByteMs",
+    help: "Total time to retrieve an anonymous piece via /piece/{cid} (ms)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const,
+    buckets: [1, 5, 10, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000],
+  }),
+  makeHistogramProvider({
+    name: "anonPieceRetrievalThroughputBps",
+    help: "Throughput for anonymous piece retrievals (bytes/s)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const,
+    buckets: throughputBuckets,
+  }),
+  makeHistogramProvider({
+    name: "anonRetrievalCheckMs",
+    help: "End-to-end anonymous retrieval check duration (ms)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const,
+    buckets: [100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000, 600000],
+  }),
+  makeCounterProvider({
+    name: "anonPieceRetrievalStatus",
+    help: "Anonymous piece retrieval overall outcome",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
+  }),
+  makeCounterProvider({
+    name: "anonPieceHttpResponseCode",
+    help: "HTTP response codes for anonymous piece retrieval requests",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
+  }),
+  makeCounterProvider({
+    name: "anonCarParseStatus",
+    help: "Anonymous retrieval CAR parse outcomes (parseable / not_parseable)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
+  }),
+  makeCounterProvider({
+    name: "anonIpniStatus",
+    help: "Anonymous retrieval IPNI check outcomes (valid / invalid / skipped)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
+  }),
+  makeCounterProvider({
+    name: "anonBlockFetchStatus",
+    help: "Anonymous retrieval block fetch validation outcomes (valid / invalid / skipped)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
+  }),
   // Storage provider metrics: absolute counts, independent of query filters.
   makeGaugeProvider({
     name: "storage_providers_active",
@@ -333,6 +384,7 @@ const metricProviders = [
     RetrievalCheckMetrics,
     DiscoverabilityCheckMetrics,
     DataSetCreationCheckMetrics,
+    AnonRetrievalCheckMetrics,
     WalletBalanceCollector,
     // HTTP metrics interceptor
     {
@@ -347,6 +399,7 @@ const metricProviders = [
     RetrievalCheckMetrics,
     DiscoverabilityCheckMetrics,
     DataSetCreationCheckMetrics,
+    AnonRetrievalCheckMetrics,
     WalletBalanceCollector,
   ],
 })
diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
new file mode 100644
index 00000000..30a04486
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
@@ -0,0 +1,153 @@
+import type { ConfigService } from "@nestjs/config";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import type { IConfig } from "../config/app.config.js";
+import type { SampleAnonPieceParams, SubgraphService } from "../subgraph/subgraph.service.js";
+import type { AnonCandidatePiece } from "../subgraph/types.js";
+import { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
+
+const SP_ADDRESS = "0xAaAaAAaAaaaAaAAAAaaaaAAaaAaaaAAaaaaa1111";
+const DEALBOT_PAYER = "0xBbBBBbBBbbbBbBBBBBbbbbbBBbbBbbbBBbbbb2222";
+
+const makePiece = (overrides: Partial<AnonCandidatePiece> = {}): AnonCandidatePiece => ({
+  pieceCid: `baga6ea4seaqpiece${Math.random().toString(36).slice(2, 10)}`,
+  pieceId: "1",
+  dataSetId: "42",
+  rawSize: "1048576",
+  withIPFSIndexing: true,
+  ipfsRootCid: "bafyroot",
+  indexedAtBlock: 12345,
+  pdpPaymentEndEpoch: null,
+  ...overrides,
+});
+
+const makeConfigService = (): ConfigService<IConfig, true> =>
+  ({
+    get: vi.fn((key: string) => {
+      if (key === "blockchain") {
+        return { walletAddress: DEALBOT_PAYER };
+      }
+      return undefined;
+    }),
+  }) as unknown as ConfigService<IConfig, true>;
+
+describe("AnonPieceSelectorService", () => {
+  let subgraphService: SubgraphService;
+  let sampleAnonPiece: ReturnType<typeof vi.fn>;
+
+  beforeEach(() => {
+    sampleAnonPiece = vi.fn();
+    subgraphService = { sampleAnonPiece } as unknown as SubgraphService;
+  });
+
+  it("returns null when every fallback attempt yields no piece", async () => {
+    sampleAnonPiece.mockResolvedValue(null);
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
+
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result).toBeNull();
+    expect(sampleAnonPiece).toHaveBeenCalled();
+  });
+
+  it("returns the sampled piece with SP address lowercased", async () => {
+    sampleAnonPiece.mockResolvedValueOnce(makePiece({ pieceCid: "baga-the-one" }));
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
+
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result).not.toBeNull();
+    expect(result?.pieceCid).toBe("baga-the-one");
+    expect(result?.serviceProvider).toBe(SP_ADDRESS.toLowerCase());
+  });
+
+  it("passes the dealbot payer address to sampleAnonPiece for exclusion", async () => {
+    sampleAnonPiece.mockResolvedValueOnce(makePiece());
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
+
+    await service.selectPieceForProvider(SP_ADDRESS);
+
+    const call = sampleAnonPiece.mock.calls[0][0] as SampleAnonPieceParams;
+    expect(call.payer).toBe(DEALBOT_PAYER);
+    expect(call.serviceProvider).toBe(SP_ADDRESS);
+  });
+
+  it("redraws when the first sampled piece's payment has already terminated", async () => {
+    const staleCid = "baga-terminated";
+    const freshCid = "baga-live";
+    sampleAnonPiece
+      .mockResolvedValueOnce(makePiece({ pieceCid: staleCid, pdpPaymentEndEpoch: 100n, indexedAtBlock: 200 }))
+      .mockResolvedValueOnce(makePiece({ pieceCid: freshCid, pdpPaymentEndEpoch: null }));
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result?.pieceCid).toBe(freshCid);
+  });
+
+  it("treats payment-end exactly equal to current epoch as terminated (boundary)", async () => {
+    // pdpPaymentEndEpoch === indexedAtBlock should be rejected (<=, not <).
+    // This guards against an off-by-one regression where pieces in the final
+    // payment epoch silently slip through.
+    const boundaryCid = "baga-boundary";
+    const liveCid = "baga-still-live";
+    sampleAnonPiece
+      .mockResolvedValueOnce(makePiece({ pieceCid: boundaryCid, pdpPaymentEndEpoch: 200n, indexedAtBlock: 200 }))
+      .mockResolvedValueOnce(makePiece({ pieceCid: liveCid, pdpPaymentEndEpoch: 201n, indexedAtBlock: 200 }));
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result?.pieceCid).toBe(liveCid);
+  });
+
+  it("falls back to the opposite pool when the preferred one is empty", async () => {
+    // First pool call returns nothing twice (both attempts), second pool succeeds.
+    const fresh = makePiece({ pieceCid: "baga-other-pool" });
+    sampleAnonPiece.mockResolvedValueOnce(null).mockResolvedValueOnce(null).mockResolvedValueOnce(fresh);
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result?.pieceCid).toBe("baga-other-pool");
+
+    // The second (fallback) call should target the opposite pool.
+    const firstCall = sampleAnonPiece.mock.calls[0][0] as SampleAnonPieceParams;
+    const fallbackCall = sampleAnonPiece.mock.calls[2][0] as SampleAnonPieceParams;
+    expect(fallbackCall.pool).not.toBe(firstCall.pool);
+  });
+
+  it("widens size bucket to 'any' after both pools fail in the primary bucket", async () => {
+    // 4 empty attempts across (bucket × both pools × 2 draws each) then
+    // succeed on the first `any` bucket call.
+    sampleAnonPiece
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(makePiece({ pieceCid: "baga-any-bucket" }));
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result?.pieceCid).toBe("baga-any-bucket");
+
+    // The 5th call (index 4) should be the widened-bucket attempt; its size
+    // range covers at least the 32 GiB ceiling of the "large" bucket.
+    const widened = sampleAnonPiece.mock.calls[4][0] as SampleAnonPieceParams;
+    expect(BigInt(widened.maxSize)).toBeGreaterThanOrEqual(32n * 1024n * 1024n * 1024n);
+    expect(widened.minSize).toBe("0");
+  });
+
+  it("draws a fresh sampleKey for each subgraph call", async () => {
+    sampleAnonPiece.mockResolvedValueOnce(null).mockResolvedValueOnce(makePiece());
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
+    await service.selectPieceForProvider(SP_ADDRESS);
+
+    const call1 = sampleAnonPiece.mock.calls[0][0] as SampleAnonPieceParams;
+    const call2 = sampleAnonPiece.mock.calls[1][0] as SampleAnonPieceParams;
+    expect(call1.sampleKey).toMatch(/^0x[0-9a-f]{64}$/);
+    expect(call2.sampleKey).toMatch(/^0x[0-9a-f]{64}$/);
+    expect(call1.sampleKey).not.toBe(call2.sampleKey);
+  });
+});
diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
new file mode 100644
index 00000000..d354a222
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
@@ -0,0 +1,182 @@
+import { randomBytes } from "node:crypto";
+import { Injectable, Logger } from "@nestjs/common";
+import { ConfigService } from "@nestjs/config";
+import type { IConfig } from "../config/app.config.js";
+import type { AnonPiecePool, SampleAnonPieceParams } from "../subgraph/subgraph.service.js";
+import { SubgraphService } from "../subgraph/subgraph.service.js";
+import type { AnonCandidatePiece } from "../subgraph/types.js";
+import type { AnonPiece } from "./types.js";
+
+/**
+ * Piece size buckets, in raw (unpadded) bytes. Weighted sampling across
+ * these buckets keeps tests meaningful for bandwidth measurement without
+ * locking out SPs whose corpus skews small or large.
+ */
+type SizeBucket = "small" | "medium" | "large";
+type SizeRange = { min: bigint; max: bigint };
+
+const MIB = 1024n * 1024n;
+
+// All downloads are buffered in-memory, so we need to keep piece sizes reasonable
+// When changing these values, also update ./docs/checks/anon-retrievals.md#piece-selection
+const SIZE_BUCKETS: Record<SizeBucket, SizeRange> = {
+  small: { min: 1n * MIB, max: 20n * MIB - 1n },
+  medium: { min: 20n * MIB, max: 100n * MIB - 1n },
+  large: { min: 100n * MIB, max: 500n * MIB - 1n },
+};
+
+// Weights for choosing a bucket per selection. Must sum to 1.
+// When changing these values, also update ./docs/checks/anon-retrievals.md#piece-selection
+const BUCKET_WEIGHTS: Record<SizeBucket, number> = {
+  small: 0.2,
+  medium: 0.5,
+  large: 0.3,
+};
+
+/**
+ * Probability the primary draw targets the withIPFSIndexing pool.
+ * The rest of the time we sample across all FWSS pieces, so SPs can't
+ * optimise only their CAR corpus.
+ *
+ * When changing this value, also update ./docs/checks/anon-retrievals.md#piece-selection
+ */
+const IPFS_INDEXED_SAMPLE_RATE = 0.8;
+
+@Injectable()
+export class AnonPieceSelectorService {
+  private readonly logger = new Logger(AnonPieceSelectorService.name);
+
+  constructor(
+    private readonly subgraphService: SubgraphService,
+    private readonly configService: ConfigService<IConfig, true>,
+  ) {}
+
+  /**
+   * Select an anonymous piece to test against the given SP.
+   *
+   * Strategy:
+   * 1. Pick a size bucket by weighted random.
+   * 2. Pick a pool (`indexed` 80% / `any` 20%).
+   * 3. Generate a uniform-random sampleKey and query the subgraph for the
+   *    smallest `Root.sampleKey ≥ $sampleKey` matching the filters.
+   * 4. Drop the pick if `pdpPaymentEndEpoch` has passed or it was tested
+   *    recently; redraw once.
+   * 5. If still empty, fall back through: (same bucket, opposite pool) →
+   *    (any bucket, indexed) → (any bucket, any).
+   */
+  async selectPieceForProvider(spAddress: string): Promise<AnonPiece | null> {
+    const dealbotPayer = this.configService.get("blockchain", { infer: true }).walletAddress;
+
+    const bucket = this.pickBucket();
+    const pool: AnonPiecePool = Math.random() < IPFS_INDEXED_SAMPLE_RATE ? "indexed" : "any";
+
+    const attempts: Array<{ bucket: SizeBucket | "any"; pool: AnonPiecePool }> = [
+      { bucket: bucket, pool: pool },
+      { bucket: bucket, pool: pool === "indexed" ? "any" : "indexed" },
+      { bucket: "any", pool: "indexed" },
+      { bucket: "any", pool: "any" },
+    ];
+
+    for (const attempt of attempts) {
+      const piece = await this.drawPiece({
+        spAddress,
+        dealbotPayer,
+        bucket: attempt.bucket,
+        pool: attempt.pool,
+      });
+
+      if (piece) {
+        this.logger.log({
+          event: "anon_piece_selected",
+          message: "Selected anonymous piece for retrieval test",
+          spAddress,
+          pieceCid: piece.pieceCid,
+          dataSetId: piece.dataSetId,
+          withIPFSIndexing: piece.withIPFSIndexing,
+          bucket: attempt.bucket,
+          pool: attempt.pool,
+        });
+
+        return {
+          pieceCid: piece.pieceCid,
+          dataSetId: piece.dataSetId,
+          pieceId: piece.pieceId,
+          serviceProvider: spAddress.toLowerCase(),
+          withIPFSIndexing: piece.withIPFSIndexing,
+          ipfsRootCid: piece.ipfsRootCid,
+          rawSize: piece.rawSize,
+        };
+      }
+    }
+
+    this.logger.warn({
+      event: "anon_no_candidates",
+      message: "No anonymous piece found after all fallbacks",
+      spAddress,
+    });
+
+    return null;
+  }
+
+  /**
+   * Try to draw a piece for one (bucket, pool) combination. Up to two draws
+   * with fresh sampleKeys, each filtered by dedup + epoch-termination.
+   */
+  private async drawPiece(args: {
+    spAddress: string;
+    dealbotPayer: string;
+    bucket: SizeBucket | "any";
+    pool: AnonPiecePool;
+  }): Promise<AnonCandidatePiece | null> {
+    const range = args.bucket === "any" ? fullRange() : SIZE_BUCKETS[args.bucket];
+
+    for (let attempt = 0; attempt < 2; attempt++) {
+      const params: SampleAnonPieceParams = {
+        serviceProvider: args.spAddress,
+        payer: args.dealbotPayer,
+        sampleKey: randomSampleKey(),
+        minSize: range.min.toString(),
+        maxSize: range.max.toString(),
+        pool: args.pool,
+      };
+
+      const piece = await this.subgraphService.sampleAnonPiece(params);
+      if (!piece) {
+        continue;
+      }
+
+      // On Filecoin FEVM the EVM block number IS the chain epoch (one block per
+      // epoch), so the subgraph's indexedAtBlock is a safe proxy for "now" when
+      // checking if PDP payment for this piece has already terminated.
+      if (piece.pdpPaymentEndEpoch != null && piece.pdpPaymentEndEpoch <= BigInt(piece.indexedAtBlock)) {
+        continue;
+      }
+
+      return piece;
+    }
+
+    return null;
+  }
+
+  private pickBucket(): SizeBucket {
+    const r = Math.random();
+    let acc = 0;
+    for (const [name, weight] of Object.entries(BUCKET_WEIGHTS) as Array<[SizeBucket, number]>) {
+      acc += weight;
+      if (r < acc) {
+        return name;
+      }
+    }
+    return "medium";
+  }
+}
+
+/** Uniform-random 32-byte sort key as `0x`-prefixed hex. */
+function randomSampleKey(): string {
+  return `0x${randomBytes(32).toString("hex")}`;
+}
+
+/** The full size range (used when bucket fallback is "any"). */
+function fullRange(): SizeRange {
+  return { min: 0n, max: (1n << 63n) - 1n };
+}
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
new file mode 100644
index 00000000..adc75920
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
@@ -0,0 +1,366 @@
+import type { Repository } from "typeorm";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import type { ClickhouseService } from "../clickhouse/clickhouse.service.js";
+import type { StorageProvider } from "../database/entities/storage-provider.entity.js";
+import { RetrievalStatus } from "../database/types.js";
+import type { AnonRetrievalCheckMetrics } from "../metrics-prometheus/check-metrics.service.js";
+import type { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
+import type { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
+import { AnonRetrievalService } from "./anon-retrieval.service.js";
+import type { CarValidationService } from "./car-validation.service.js";
+import type { PieceRetrievalService } from "./piece-retrieval.service.js";
+import type { AnonPiece, CarValidationResult, PieceRetrievalResult } from "./types.js";
+
+const SP_ADDRESS = "0xaaaa0000000000000000000000000000000000aa";
+
+const PIECE = {
+  pieceCid: "baga6ea4seaqpiece",
+  pieceId: "1",
+  dataSetId: "42",
+  rawSize: "1048576",
+  withIPFSIndexing: false,
+  ipfsRootCid: null,
+  serviceProvider: SP_ADDRESS,
+};
+
+function makeProvider(): StorageProvider {
+  return {
+    address: SP_ADDRESS,
+    providerId: 7,
+    name: "sp-test",
+    isApproved: true,
+  } as unknown as StorageProvider;
+}
+
+function makeService(opts: {
+  pieceResult: PieceRetrievalResult;
+  fetchPieceImpl?: (signal?: AbortSignal) => Promise<PieceRetrievalResult>;
+  piece?: AnonPiece | null;
+  carResult?: CarValidationResult;
+  validateCarImpl?: () => Promise<CarValidationResult>;
+}): {
+  service: AnonRetrievalService;
+  insertSpy: ReturnType<typeof vi.fn>;
+  fetchSpy: ReturnType<typeof vi.fn>;
+  validateCarSpy: ReturnType<typeof vi.fn>;
+  metricsRecordStatusSpy: ReturnType<typeof vi.fn>;
+  metricsRecordIpniSpy: ReturnType<typeof vi.fn>;
+  metricsRecordBlockFetchSpy: ReturnType<typeof vi.fn>;
+} {
+  const insertSpy = vi.fn();
+  const clickhouseService = {
+    insert: insertSpy,
+    enabled: true,
+    probeLocation: "test-location",
+  } as unknown as ClickhouseService;
+
+  const spRepository = {
+    findOne: vi.fn(async () => makeProvider()),
+  } as unknown as Repository<StorageProvider>;
+
+  const anonPieceSelector = {
+    selectPieceForProvider: vi.fn(async () => (opts.piece === null ? null : (opts.piece ?? PIECE))),
+  } as unknown as AnonPieceSelectorService;
+
+  const fetchSpy = vi.fn(opts.fetchPieceImpl ?? (async () => opts.pieceResult));
+  const pieceRetrievalService = {
+    fetchPiece: fetchSpy,
+  } as unknown as PieceRetrievalService;
+
+  const validateCarSpy = vi.fn(opts.validateCarImpl ?? (async () => opts.carResult));
+  const carValidationService = {
+    validateCarPiece: validateCarSpy,
+  } as unknown as CarValidationService;
+
+  const walletSdkService = {
+    getProviderInfo: vi.fn(() => ({ pdp: { serviceURL: "https://sp.test/" } })),
+  } as unknown as WalletSdkService;
+
+  const metricsRecordStatusSpy = vi.fn();
+  const metricsRecordIpniSpy = vi.fn();
+  const metricsRecordBlockFetchSpy = vi.fn();
+  const metrics = {
+    observeFirstByteMs: vi.fn(),
+    observeLastByteMs: vi.fn(),
+    observeThroughput: vi.fn(),
+    observeCheckDuration: vi.fn(),
+    recordStatus: metricsRecordStatusSpy,
+    recordHttpResponseCode: vi.fn(),
+    recordCarParseStatus: vi.fn(),
+    recordIpniStatus: metricsRecordIpniSpy,
+    recordBlockFetchStatus: metricsRecordBlockFetchSpy,
+  } as unknown as AnonRetrievalCheckMetrics;
+
+  const service = new AnonRetrievalService(
+    anonPieceSelector,
+    pieceRetrievalService,
+    carValidationService,
+    walletSdkService,
+    metrics,
+    clickhouseService,
+    spRepository,
+  );
+
+  return {
+    service,
+    insertSpy,
+    fetchSpy,
+    validateCarSpy,
+    metricsRecordStatusSpy,
+    metricsRecordIpniSpy,
+    metricsRecordBlockFetchSpy,
+  };
+}
+
+describe("AnonRetrievalService", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("emits a ClickHouse row with partial metrics when fetchPiece returns aborted=true", async () => {
+    const partial: PieceRetrievalResult = {
+      success: false,
+      pieceCid: PIECE.pieceCid,
+      bytesReceived: 524288,
+      pieceBytes: null,
+      latencyMs: 42000,
+      ttfbMs: 150,
+      throughputBps: 12500,
+      statusCode: 200,
+      commPValid: false,
+      errorMessage: "Anon retrieval job timeout (60s) for sp1",
+      aborted: true,
+    };
+
+    const { service, insertSpy } = makeService({ pieceResult: partial });
+
+    await service.performForProvider(SP_ADDRESS);
+
+    expect(insertSpy).toHaveBeenCalledTimes(1);
+    const [table, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+    expect(table).toBe("anon_retrieval_checks");
+    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
+    expect(row.bytes_retrieved).toBe(524288);
+    expect(row.first_byte_ms).toBe(150);
+    expect(row.last_byte_ms).toBe(42000);
+    expect(row.throughput_bps).toBe(12500);
+    expect(row.http_response_code).toBe(200);
+    expect(row.error_message).toContain("Anon retrieval job timeout");
+    expect(row.piece_cid).toBe(PIECE.pieceCid);
+    expect(row.sp_address).toBe(SP_ADDRESS);
+    expect(row.sp_id).toBe(7);
+    expect(row.probe_location).toBe("test-location");
+    expect(typeof row.retrieval_id).toBe("string");
+
+    // CAR/IPNI/block-fetch were never run on a non-IPFS-indexed piece — every
+    // dimension column should explicitly say "skipped" (ipni_status) or null.
+    expect(row.car_parseable).toBeNull();
+    expect(row.car_block_count).toBeNull();
+    expect(row.block_fetch_endpoint).toBeNull();
+    expect(row.block_fetch_valid).toBeNull();
+    expect(row.block_fetch_sampled_count).toBeNull();
+    expect(row.block_fetch_failed_count).toBeNull();
+    expect(row.ipni_status).toBe("skipped");
+    expect(row.ipni_verify_ms).toBeNull();
+  });
+
+  it("still emits a row when the signal aborts before fetchPiece runs", async () => {
+    const ac = new AbortController();
+    ac.abort(new Error("Anon retrieval job timeout (60s) for sp1"));
+
+    const never: PieceRetrievalResult = {
+      success: false,
+      pieceCid: PIECE.pieceCid,
+      bytesReceived: 0,
+      pieceBytes: null,
+      latencyMs: 0,
+      ttfbMs: 0,
+      throughputBps: 0,
+      statusCode: 0,
+      commPValid: false,
+    };
+
+    const { service, insertSpy, fetchSpy } = makeService({ pieceResult: never });
+
+    await service.performForProvider(SP_ADDRESS, ac.signal);
+
+    expect(fetchSpy).not.toHaveBeenCalled();
+    expect(insertSpy).toHaveBeenCalledTimes(1);
+    const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
+    expect(row.error_message).toContain("Anon retrieval job timeout");
+    expect(row.bytes_retrieved).toBeNull();
+    expect(row.first_byte_ms).toBeNull();
+  });
+
+  it("still emits a row when fetchPiece throws unexpectedly", async () => {
+    const never: PieceRetrievalResult = {
+      success: false,
+      pieceCid: PIECE.pieceCid,
+      bytesReceived: 0,
+      pieceBytes: null,
+      latencyMs: 0,
+      ttfbMs: 0,
+      throughputBps: 0,
+      statusCode: 0,
+      commPValid: false,
+    };
+
+    const { service, insertSpy } = makeService({
+      pieceResult: never,
+      fetchPieceImpl: async () => {
+        throw new Error("network down");
+      },
+    });
+
+    await expect(service.performForProvider(SP_ADDRESS)).rejects.toThrow("network down");
+
+    expect(insertSpy).toHaveBeenCalledTimes(1);
+    const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
+  });
+
+  describe("with IPFS indexing", () => {
+    const INDEXED_PIECE: AnonPiece = {
+      ...PIECE,
+      withIPFSIndexing: true,
+      ipfsRootCid: "bafyrootcid",
+    };
+
+    function okPiece(bytes: Buffer): PieceRetrievalResult {
+      return {
+        success: true,
+        pieceCid: INDEXED_PIECE.pieceCid,
+        bytesReceived: bytes.length,
+        pieceBytes: bytes,
+        latencyMs: 200,
+        ttfbMs: 20,
+        throughputBps: 51200,
+        statusCode: 200,
+        commPValid: true,
+      };
+    }
+
+    it("emits populated CAR/IPNI/block-fetch columns when validation fully succeeds", async () => {
+      const carResult: CarValidationResult = {
+        carParseable: true,
+        blockCount: 42,
+        sampledCidCount: 5,
+        ipniValid: true,
+        ipniVerifyMs: 137,
+        blockFetchValid: true,
+        blockFetchFailedCount: 0,
+        blockFetchEndpoint: "https://sp.test/ipfs/",
+      };
+
+      const { service, insertSpy, validateCarSpy } = makeService({
+        pieceResult: okPiece(Buffer.from("car-bytes")),
+        piece: INDEXED_PIECE,
+        carResult,
+      });
+
+      await service.performForProvider(SP_ADDRESS);
+
+      expect(validateCarSpy).toHaveBeenCalledTimes(1);
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      expect(row.piece_fetch_status).toBe(RetrievalStatus.SUCCESS);
+      expect(row.commp_valid).toBe(true);
+      expect(row.car_parseable).toBe(true);
+      expect(row.car_block_count).toBe(42);
+      expect(row.block_fetch_endpoint).toBe("https://sp.test/ipfs/");
+      expect(row.block_fetch_valid).toBe(true);
+      expect(row.block_fetch_sampled_count).toBe(5);
+      expect(row.block_fetch_failed_count).toBe(0);
+      expect(row.ipni_status).toBe("valid");
+      expect(row.ipni_verify_ms).toBe(137);
+    });
+
+    it("distinguishes IPNI invalid from block-fetch failures", async () => {
+      const carResult: CarValidationResult = {
+        carParseable: true,
+        blockCount: 100,
+        sampledCidCount: 5,
+        ipniValid: false,
+        ipniVerifyMs: 250,
+        blockFetchValid: false,
+        blockFetchFailedCount: 2,
+        blockFetchEndpoint: "https://sp.test/ipfs/",
+      };
+
+      const { service, insertSpy } = makeService({
+        pieceResult: okPiece(Buffer.from("car-bytes")),
+        piece: INDEXED_PIECE,
+        carResult,
+      });
+
+      await service.performForProvider(SP_ADDRESS);
+
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      // The piece-fetch path still succeeded — failures are surfaced as
+      // independent dimensions, not folded into piece_fetch_status.
+      expect(row.piece_fetch_status).toBe(RetrievalStatus.SUCCESS);
+      expect(row.car_parseable).toBe(true);
+      expect(row.ipni_status).toBe("invalid");
+      expect(row.block_fetch_valid).toBe(false);
+      expect(row.block_fetch_sampled_count).toBe(5);
+      expect(row.block_fetch_failed_count).toBe(2);
+    });
+
+    it("emits ipni_status='error' (not 'skipped') when CAR validation throws on a successful piece", async () => {
+      // Distinguishes a real infra outage (e.g. IpniVerificationService down)
+      // from a piece that legitimately had no IPFS indexing. Without the
+      // distinction, an outage looks like normal non-IPFS volume in dashboards.
+      const { service, insertSpy, metricsRecordIpniSpy, metricsRecordBlockFetchSpy } = makeService({
+        pieceResult: okPiece(Buffer.from("car-bytes")),
+        piece: INDEXED_PIECE,
+        validateCarImpl: async () => {
+          throw new Error("IpniVerificationService down");
+        },
+      });
+
+      await service.performForProvider(SP_ADDRESS);
+
+      expect(metricsRecordIpniSpy).toHaveBeenCalledWith(expect.anything(), "error");
+      expect(metricsRecordBlockFetchSpy).toHaveBeenCalledWith(expect.anything(), "error");
+
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      expect(row.ipni_status).toBe("error");
+      // Piece-fetch path itself succeeded — only the validation pipeline failed.
+      expect(row.commp_valid).toBe(true);
+      expect(row.car_parseable).toBeNull();
+    });
+
+    it("emits car_parseable=false with skipped IPNI/block-fetch when bytes don't parse as CAR", async () => {
+      const carResult: CarValidationResult = {
+        carParseable: false,
+        blockCount: 0,
+        sampledCidCount: 0,
+        ipniValid: null,
+        ipniVerifyMs: null,
+        blockFetchValid: null,
+        blockFetchFailedCount: null,
+        blockFetchEndpoint: null,
+      };
+
+      const { service, insertSpy } = makeService({
+        pieceResult: okPiece(Buffer.from("not-a-car")),
+        piece: INDEXED_PIECE,
+        carResult,
+      });
+
+      await service.performForProvider(SP_ADDRESS);
+
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      expect(row.car_parseable).toBe(false);
+      // car_block_count and block_fetch_sampled_count are gated on carParseable
+      // so an unparseable CAR doesn't emit a misleading 0.
+      expect(row.car_block_count).toBeNull();
+      expect(row.block_fetch_sampled_count).toBeNull();
+      expect(row.block_fetch_endpoint).toBeNull();
+      expect(row.block_fetch_valid).toBeNull();
+      expect(row.block_fetch_failed_count).toBeNull();
+      expect(row.ipni_status).toBe("skipped");
+      expect(row.ipni_verify_ms).toBeNull();
+    });
+  });
+});
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
new file mode 100644
index 00000000..a74c2bf0
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -0,0 +1,252 @@
+import { randomUUID } from "node:crypto";
+import { Injectable, Logger } from "@nestjs/common";
+import { InjectRepository } from "@nestjs/typeorm";
+import type { Repository } from "typeorm";
+import { ClickhouseService } from "../clickhouse/clickhouse.service.js";
+import { type ProviderJobContext, toStructuredError } from "../common/logging.js";
+import { StorageProvider } from "../database/entities/storage-provider.entity.js";
+import { IpniCheckStatus, RetrievalStatus, ServiceType } from "../database/types.js";
+import { buildCheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js";
+import { AnonRetrievalCheckMetrics } from "../metrics-prometheus/check-metrics.service.js";
+import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
+import { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
+import { CarValidationService } from "./car-validation.service.js";
+import { PieceRetrievalService } from "./piece-retrieval.service.js";
+import type { CarValidationResult, PieceRetrievalResult } from "./types.js";
+
+const ANON_RETRIEVAL_CHECKS_TABLE = "anon_retrieval_checks";
+
+@Injectable()
+export class AnonRetrievalService {
+  private readonly logger = new Logger(AnonRetrievalService.name);
+
+  constructor(
+    private readonly anonPieceSelectorService: AnonPieceSelectorService,
+    private readonly pieceRetrievalService: PieceRetrievalService,
+    private readonly carValidationService: CarValidationService,
+    private readonly walletSdkService: WalletSdkService,
+    private readonly metrics: AnonRetrievalCheckMetrics,
+    private readonly clickhouseService: ClickhouseService,
+    @InjectRepository(StorageProvider)
+    private readonly spRepository: Repository<StorageProvider>,
+  ) {}
+
+  async performForProvider(spAddress: string, signal?: AbortSignal, logContext?: ProviderJobContext): Promise<void> {
+    // Build metric labels
+    const provider = await this.spRepository.findOne({ where: { address: spAddress } });
+    const labels = buildCheckMetricLabels({
+      checkType: "anon_retrieval",
+      providerId: provider?.providerId,
+      providerName: provider?.name,
+      providerIsApproved: provider?.isApproved,
+    });
+
+    // 1. Select an anonymous piece
+    const piece = await this.anonPieceSelectorService.selectPieceForProvider(spAddress);
+    if (!piece) {
+      this.logger.warn({
+        ...logContext,
+        event: "anon_retrieval_no_piece",
+        message: "No anonymous piece found for SP",
+        spAddress,
+      });
+      this.metrics.recordStatus(labels, "failure.no_piece");
+      return;
+    }
+
+    this.logger.log({
+      ...logContext,
+      event: "anon_retrieval_started",
+      message: "Starting anonymous retrieval test",
+      pieceCid: piece.pieceCid,
+      dataSetId: piece.dataSetId,
+      pieceId: piece.pieceId,
+      withIPFSIndexing: piece.withIPFSIndexing,
+      spAddress,
+    });
+
+    const checkStart = Date.now();
+    const startedAt = new Date();
+
+    let pieceResult: PieceRetrievalResult | null = null;
+    let carResult: CarValidationResult | null = null;
+    let validatedCarPiece: boolean = false;
+
+    try {
+      // 2. Fetch the piece. fetchPiece never throws on abort — it returns a
+      // result with partial metrics so we can persist what we have.
+      if (signal?.aborted) {
+        pieceResult = buildAbortedPlaceholder(piece.pieceCid, signal.reason);
+      } else {
+        pieceResult = await this.pieceRetrievalService.fetchPiece(spAddress, piece.pieceCid, signal);
+      }
+
+      // Emit piece retrieval metrics
+      this.metrics.observeFirstByteMs(labels, pieceResult.ttfbMs);
+      this.metrics.observeLastByteMs(labels, pieceResult.latencyMs);
+      this.metrics.observeThroughput(labels, pieceResult.throughputBps);
+      this.metrics.recordHttpResponseCode(labels, pieceResult.statusCode);
+
+      // 3. CAR validation (only if piece was successfully retrieved and has IPFS indexing)
+      if (
+        pieceResult.success &&
+        piece.withIPFSIndexing &&
+        piece.ipfsRootCid &&
+        pieceResult.pieceBytes &&
+        provider &&
+        !signal?.aborted
+      ) {
+        try {
+          validatedCarPiece = true;
+          carResult = await this.carValidationService.validateCarPiece(
+            pieceResult.pieceBytes,
+            provider,
+            piece.ipfsRootCid,
+            signal,
+          );
+          this.metrics.recordCarParseStatus(labels, carResult.carParseable);
+          this.metrics.recordIpniStatus(labels, ipniStatusFromResult(carResult));
+          this.metrics.recordBlockFetchStatus(
+            labels,
+            carResult.blockFetchValid === null
+              ? IpniCheckStatus.SKIPPED
+              : carResult.blockFetchValid
+                ? IpniCheckStatus.VALID
+                : IpniCheckStatus.INVALID,
+          );
+        } catch (error) {
+          // Validation was attempted on a successful piece retrieval but threw.
+          this.metrics.recordCarParseStatus(labels, false);
+          this.metrics.recordIpniStatus(labels, IpniCheckStatus.ERROR);
+          this.metrics.recordBlockFetchStatus(labels, IpniCheckStatus.ERROR);
+          this.logger.warn({
+            ...logContext,
+            event: "anon_retrieval_car_validation_failed",
+            message: "CAR validation threw an error",
+            pieceCid: piece.pieceCid,
+            spAddress,
+            error: toStructuredError(error),
+          });
+        }
+      } else if (!pieceResult.success) {
+        // Piece retrieval failed — IPNI and block fetch were skipped
+        this.metrics.recordIpniStatus(labels, IpniCheckStatus.SKIPPED);
+        this.metrics.recordBlockFetchStatus(labels, IpniCheckStatus.SKIPPED);
+      }
+
+      // Overall check duration and status
+      this.metrics.observeCheckDuration(labels, Date.now() - checkStart);
+      const pieceServedCorrectly = pieceResult.success && pieceResult.commPValid;
+      this.metrics.recordStatus(
+        labels,
+        pieceServedCorrectly
+          ? "success"
+          : pieceResult.aborted
+            ? "failure.timedout"
+            : pieceResult.success
+              ? "failure.commp"
+              : "failure.http",
+      );
+    } finally {
+      // Always emit a ClickHouse row — even on abort or unexpected error — so
+      // we never lose the evidence (ttfb, bytes, response code) we already
+      // collected. ClickhouseService.insert is a no-op when disabled.
+      const finalPieceResult = pieceResult ?? buildAbortedPlaceholder(piece.pieceCid, signal?.reason);
+      const retrievalId = randomUUID();
+      const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
+      const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
+      const pieceFetchStatus = finalPieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
+      const ipniStatus: IpniCheckStatus = !validatedCarPiece
+        ? IpniCheckStatus.SKIPPED
+        : carResult
+          ? ipniStatusFromResult(carResult)
+          : IpniCheckStatus.ERROR;
+
+      try {
+        this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, {
+          timestamp: startedAt.getTime(),
+          probe_location: this.clickhouseService.probeLocation,
+          sp_address: spAddress,
+          sp_id: provider?.providerId != null ? Number(provider.providerId) : null,
+          sp_name: provider?.name ?? null,
+          retrieval_id: retrievalId,
+          piece_cid: piece.pieceCid,
+          data_set_id: piece.dataSetId,
+          piece_id: piece.pieceId,
+          raw_size: piece.rawSize,
+          with_ipfs_indexing: piece.withIPFSIndexing,
+          ipfs_root_cid: piece.ipfsRootCid,
+          service_type: ServiceType.DIRECT_SP,
+          retrieval_endpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
+          piece_fetch_status: pieceFetchStatus,
+          http_response_code: finalPieceResult.statusCode > 0 ? finalPieceResult.statusCode : null,
+          first_byte_ms: finalPieceResult.ttfbMs > 0 ? finalPieceResult.ttfbMs : null,
+          last_byte_ms: finalPieceResult.latencyMs > 0 ? finalPieceResult.latencyMs : null,
+          bytes_retrieved: finalPieceResult.bytesReceived > 0 ? finalPieceResult.bytesReceived : null,
+          throughput_bps: finalPieceResult.throughputBps > 0 ? Math.round(finalPieceResult.throughputBps) : null,
+          commp_valid: finalPieceResult.success ? finalPieceResult.commPValid : null,
+          car_parseable: carResult ? carResult.carParseable : null,
+          car_block_count: carResult?.carParseable ? carResult?.blockCount : null,
+          block_fetch_endpoint: carResult?.blockFetchEndpoint ?? null,
+          block_fetch_valid: carResult ? carResult.blockFetchValid : null,
+          block_fetch_sampled_count: carResult?.carParseable ? carResult?.sampledCidCount : null,
+          block_fetch_failed_count: carResult?.blockFetchFailedCount ?? null,
+          ipni_status: ipniStatus,
+          ipni_verify_ms: carResult?.ipniVerifyMs ?? null,
+          error_message: finalPieceResult.errorMessage ?? null,
+        });
+      } catch (error) {
+        // ClickhouseService.insert is buffered/non-throwing in normal operation, but
+        // guard against unexpected runtime errors so we don't break the probe cycle.
+        this.logger.warn({
+          ...logContext,
+          event: "anon_retrieval_clickhouse_insert_failed",
+          message: "Failed to enqueue anonymous retrieval row to ClickHouse",
+          pieceCid: piece.pieceCid,
+          spAddress,
+          error: toStructuredError(error),
+        });
+      }
+
+      this.logger.log({
+        ...logContext,
+        event: "anon_retrieval_completed",
+        message: "Anonymous retrieval test completed",
+        retrievalId,
+        pieceCid: piece.pieceCid,
+        spAddress,
+        success: finalPieceResult.success,
+        aborted: finalPieceResult.aborted === true,
+        latencyMs: finalPieceResult.latencyMs,
+        ttfbMs: finalPieceResult.ttfbMs,
+        bytesRetrieved: finalPieceResult.bytesReceived,
+        carParseable: carResult?.carParseable,
+        ipniValid: carResult?.ipniValid,
+        blockFetchValid: carResult?.blockFetchValid,
+      });
+    }
+  }
+}
+
+function ipniStatusFromResult(result: CarValidationResult): IpniCheckStatus {
+  if (result.ipniValid === null) return IpniCheckStatus.SKIPPED;
+  return result.ipniValid ? IpniCheckStatus.VALID : IpniCheckStatus.INVALID;
+}
+
+function buildAbortedPlaceholder(pieceCid: string, reason: unknown): PieceRetrievalResult {
+  const message =
+    reason instanceof Error && reason.message ? reason.message : typeof reason === "string" ? reason : "aborted";
+  return {
+    success: false,
+    pieceCid,
+    bytesReceived: 0,
+    pieceBytes: null,
+    latencyMs: 0,
+    ttfbMs: 0,
+    throughputBps: 0,
+    statusCode: 0,
+    commPValid: false,
+    errorMessage: message,
+    aborted: true,
+  };
+}
diff --git a/apps/backend/src/retrieval-anon/car-validation.service.ts b/apps/backend/src/retrieval-anon/car-validation.service.ts
new file mode 100644
index 00000000..c3a6c717
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/car-validation.service.ts
@@ -0,0 +1,243 @@
+import { CarReader } from "@ipld/car";
+import * as dagPB from "@ipld/dag-pb";
+import { Injectable, Logger } from "@nestjs/common";
+import { ConfigService } from "@nestjs/config";
+import { create as createBlock } from "multiformats/block";
+import { CID } from "multiformats/cid";
+import * as raw from "multiformats/codecs/raw";
+import { sha256 } from "multiformats/hashes/sha2";
+import { toStructuredError } from "../common/logging.js";
+import type { IConfig } from "../config/app.config.js";
+import type { StorageProvider } from "../database/entities/storage-provider.entity.js";
+import { HttpClientService } from "../http-client/http-client.service.js";
+import { IpniVerificationService } from "../ipni/ipni-verification.service.js";
+import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
+import type { CarValidationResult } from "./types.js";
+
+// UnixFS DAGs use only dag-pb (interior nodes) and raw (leaf data) codecs
+const unixfsCodecs: Record<number, { code: number; decode: (bytes: Uint8Array) => unknown }> = {
+  [dagPB.code]: dagPB,
+  [raw.code]: raw,
+};
+
+@Injectable()
+export class CarValidationService {
+  private readonly logger = new Logger(CarValidationService.name);
+
+  constructor(
+    private readonly configService: ConfigService<IConfig, true>,
+    private readonly httpClientService: HttpClientService,
+    private readonly walletSdkService: WalletSdkService,
+    private readonly ipniVerificationService: IpniVerificationService,
+  ) {}
+
+  /**
+   * Validate an anonymous piece retrieved as a CAR:
+   * 1. parse the CAR,
+   * 2. sample random blocks,
+   * 3. confirm the SP is advertised for the root + sampled CIDs via IPNI,
+   * 4. fetch each sampled block from the SP and hash-verify it.
+   *
+   * CAR parse failure is attributed to the client (bad upload), not the SP.
+   */
+  async validateCarPiece(
+    pieceBytes: Buffer,
+    provider: StorageProvider,
+    ipfsRootCid: string,
+    signal?: AbortSignal,
+  ): Promise<CarValidationResult> {
+    let blocks: { cid: CID; bytes: Uint8Array }[];
+    try {
+      blocks = await this.parseCar(pieceBytes);
+    } catch (error) {
+      this.logger.debug({
+        event: "car_parse_failed",
+        message: "Failed to parse piece bytes as CAR - client fault, not SP",
+        spAddress: provider.address,
+        ipfsRootCid,
+        error: toStructuredError(error),
+      });
+      return {
+        carParseable: false,
+        blockCount: 0,
+        sampledCidCount: 0,
+        ipniValid: null,
+        ipniVerifyMs: null,
+        blockFetchValid: null,
+        blockFetchFailedCount: null,
+        blockFetchEndpoint: null,
+      };
+    }
+    if (blocks.length === 0) {
+      return {
+        carParseable: true,
+        blockCount: 0,
+        sampledCidCount: 0,
+        ipniValid: null,
+        ipniVerifyMs: null,
+        blockFetchValid: null,
+        blockFetchFailedCount: null,
+        blockFetchEndpoint: null,
+        errorMessage: "CAR contained no blocks",
+      };
+    }
+
+    const sampleCount = this.configService.get("retrieval", { infer: true }).anonBlockSampleCount;
+    const shuffled = [...blocks].sort(() => Math.random() - 0.5);
+    const sampledBlocks = shuffled.slice(0, sampleCount);
+
+    const ipni = await this.checkIpni(provider, ipfsRootCid, sampledBlocks, signal);
+    const blockFetchResult = await this.checkBlockFetch(sampledBlocks, provider.address, signal);
+
+    return {
+      carParseable: true,
+      blockCount: blocks.length,
+      sampledCidCount: sampledBlocks.length,
+      ipniValid: ipni.valid,
+      ipniVerifyMs: ipni.durationMs,
+      blockFetchValid: blockFetchResult.valid,
+      blockFetchFailedCount: blockFetchResult.failedCount,
+      blockFetchEndpoint: blockFetchResult.endpoint,
+      errorMessage: blockFetchResult.errorMessage,
+    };
+  }
+
+  private async parseCar(pieceBytes: Buffer): Promise<{ cid: CID; bytes: Uint8Array }[]> {
+    const reader = await CarReader.fromBytes(new Uint8Array(pieceBytes));
+    const blocks: { cid: CID; bytes: Uint8Array }[] = [];
+    for await (const block of reader.blocks()) {
+      blocks.push({ cid: block.cid, bytes: block.bytes });
+    }
+    return blocks;
+  }
+
+  /**
+   * Verify via IPNI that the SP is advertised for the root CID and each sampled child CID.
+   * Delegates to the shared IpniVerificationService which uses filecoin-pin's provider-scoped check.
+   */
+  private async checkIpni(
+    provider: StorageProvider,
+    ipfsRootCid: string,
+    sampledBlocks: ReadonlyArray<{ cid: CID }>,
+    signal?: AbortSignal,
+  ): Promise<{
+    valid: boolean;
+    durationMs: number | null;
+  }> {
+    const timeouts = this.configService.get("timeouts", { infer: true });
+    let rootCid: CID;
+    try {
+      rootCid = CID.parse(ipfsRootCid);
+    } catch (error) {
+      this.logger.warn({
+        event: "ipni_root_cid_invalid",
+        message: "Failed to parse ipfsRootCID",
+        ipfsRootCid,
+        providerAddress: provider.address,
+        error: toStructuredError(error),
+      });
+      return { valid: false, durationMs: null };
+    }
+
+    const result = await this.ipniVerificationService.verify({
+      rootCid,
+      blockCids: sampledBlocks.map((b) => b.cid),
+      storageProvider: provider,
+      timeoutMs: timeouts.ipniVerificationTimeoutMs,
+      pollIntervalMs: timeouts.ipniVerificationPollingMs,
+      signal,
+    });
+
+    return {
+      valid: result.rootCIDVerified,
+      durationMs: result.durationMs,
+    };
+  }
+
+  /**
+   * Fetch each sampled block from the SP endpoint and hash-verify the response
+   * against the declared CID. Mirrors IpfsBlockRetrievalStrategy's per-block
+   * verification for the sampled subset (no DAG traversal).
+   */
+  private async checkBlockFetch(
+    sampledBlocks: ReadonlyArray<{ cid: CID; bytes: Uint8Array }>,
+    spAddress: string,
+    signal?: AbortSignal,
+  ): Promise<{ valid: boolean | null; failedCount: number | null; endpoint: string | null; errorMessage?: string }> {
+    const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
+    if (!providerInfo) {
+      return {
+        valid: null,
+        failedCount: null,
+        endpoint: null,
+        errorMessage: `Provider info not found for ${spAddress}`,
+      };
+    }
+
+    const spBaseUrl = providerInfo.pdp.serviceURL.replace(/\/$/, "");
+    const endpoint = `${spBaseUrl}/ipfs/`;
+    let failedCount = 0;
+
+    for (const block of sampledBlocks) {
+      const cidStr = block.cid.toString();
+      const blockUrl = `${spBaseUrl}/ipfs/${cidStr}?format=raw`;
+
+      try {
+        const resp = await this.httpClientService.requestWithMetrics<Buffer>(blockUrl, {
+          headers: { Accept: "application/vnd.ipld.raw" },
+          httpVersion: "2",
+          signal,
+        });
+
+        if (resp.metrics.statusCode < 200 || resp.metrics.statusCode >= 300) {
+          failedCount += 1;
+          this.logger.warn({
+            event: "block_fetch_non_2xx",
+            message: "Block fetch returned non-2xx status",
+            cid: cidStr,
+            spAddress,
+            statusCode: resp.metrics.statusCode,
+          });
+          continue;
+        }
+
+        if (block.cid.multihash.code !== sha256.code) {
+          this.logger.warn({
+            event: "block_unsupported_hash",
+            message: `Unsupported hash algorithm 0x${block.cid.multihash.code.toString(16)}`,
+            cid: cidStr,
+            spAddress,
+          });
+          failedCount += 1;
+          continue;
+        }
+
+        const codec = unixfsCodecs[block.cid.code];
+        if (!codec) {
+          this.logger.warn({
+            event: "block_unsupported_codec",
+            message: `Unsupported codec 0x${block.cid.code.toString(16)}`,
+            cid: cidStr,
+            spAddress,
+          });
+          failedCount += 1;
+          continue;
+        }
+
+        // Hash-verifies and decodes; throws on mismatch
+        await createBlock({ bytes: resp.data, cid: block.cid, hasher: sha256, codec });
+      } catch (error) {
+        failedCount += 1;
+        this.logger.warn({
+          event: "block_fetch_failed",
+          message: "Block fetch or hash verification failed",
+          cid: cidStr,
+          spAddress,
+          error: toStructuredError(error),
+        });
+      }
+    }
+
+    return { valid: failedCount === 0, failedCount, endpoint };
+  }
+}
diff --git a/apps/backend/src/retrieval-anon/piece-retrieval.service.ts b/apps/backend/src/retrieval-anon/piece-retrieval.service.ts
new file mode 100644
index 00000000..51150661
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/piece-retrieval.service.ts
@@ -0,0 +1,195 @@
+import { asPieceCID, calculate as calculatePieceCid } from "@filoz/synapse-core/piece";
+import { Injectable, Logger } from "@nestjs/common";
+import { toStructuredError } from "../common/logging.js";
+import { HttpClientService } from "../http-client/http-client.service.js";
+import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
+import type { PieceRetrievalResult } from "./types.js";
+
+@Injectable()
+export class PieceRetrievalService {
+  private readonly logger = new Logger(PieceRetrievalService.name);
+
+  constructor(
+    private readonly walletSdkService: WalletSdkService,
+    private readonly httpClientService: HttpClientService,
+  ) {}
+
+  async fetchPiece(spAddress: string, pieceCid: string, signal?: AbortSignal): Promise<PieceRetrievalResult> {
+    const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
+
+    if (!providerInfo) {
+      this.logger.warn({
+        event: "provider_info_not_found",
+        message: "Cannot fetch piece: provider info not found",
+        spAddress,
+        pieceCid,
+      });
+
+      return {
+        success: false,
+        pieceCid,
+        bytesReceived: 0,
+        pieceBytes: null,
+        latencyMs: 0,
+        ttfbMs: 0,
+        throughputBps: 0,
+        statusCode: 0,
+        commPValid: false,
+        errorMessage: `Provider info not found for ${spAddress}`,
+      };
+    }
+
+    const baseUrl = providerInfo.pdp.serviceURL.replace(/\/$/, "");
+    const url = `${baseUrl}/piece/${pieceCid}`;
+
+    try {
+      const result = await this.httpClientService.requestWithMetrics<Buffer>(url, {
+        httpVersion: "2",
+        signal,
+      });
+
+      const { metrics } = result;
+      const isSuccess = metrics.statusCode >= 200 && metrics.statusCode < 300;
+      const throughputBps = metrics.totalTime > 0 ? metrics.responseSize / (metrics.totalTime / 1000) : 0;
+
+      if (result.aborted) {
+        this.logger.warn({
+          event: "piece_fetch_aborted",
+          message: "Piece fetch aborted mid-download; returning partial metrics",
+          url,
+          pieceCid,
+          spAddress,
+          bytesReceived: metrics.responseSize,
+          ttfbMs: metrics.ttfb,
+          abortReason: result.abortReason,
+        });
+
+        return {
+          success: false,
+          pieceCid,
+          bytesReceived: metrics.responseSize,
+          pieceBytes: null,
+          latencyMs: metrics.totalTime,
+          ttfbMs: metrics.ttfb,
+          throughputBps,
+          statusCode: metrics.statusCode,
+          commPValid: false,
+          errorMessage: result.abortReason ?? "aborted",
+          aborted: true,
+        };
+      }
+
+      if (!isSuccess) {
+        this.logger.warn({
+          event: "piece_fetch_non_2xx",
+          message: "Piece fetch returned non-2xx status",
+          url,
+          statusCode: metrics.statusCode,
+          pieceCid,
+          spAddress,
+        });
+
+        return {
+          success: false,
+          pieceCid,
+          bytesReceived: metrics.responseSize,
+          pieceBytes: null,
+          latencyMs: metrics.totalTime,
+          ttfbMs: metrics.ttfb,
+          throughputBps,
+          statusCode: metrics.statusCode,
+          commPValid: false,
+          errorMessage: `HTTP ${metrics.statusCode}`,
+        };
+      }
+
+      const pieceBytes = Buffer.isBuffer(result.data) ? result.data : Buffer.from(result.data);
+      const commPValid = await this.validateCommP(pieceBytes, pieceCid);
+
+      this.logger.debug({
+        event: "piece_fetch_success",
+        message: "Piece fetched successfully",
+        pieceCid,
+        spAddress,
+        bytesReceived: metrics.responseSize,
+        latencyMs: metrics.totalTime,
+        ttfbMs: metrics.ttfb,
+      });
+
+      return {
+        success: true,
+        pieceCid,
+        bytesReceived: metrics.responseSize,
+        pieceBytes,
+        latencyMs: metrics.totalTime,
+        ttfbMs: metrics.ttfb,
+        throughputBps,
+        statusCode: metrics.statusCode,
+        commPValid,
+      };
+    } catch (error) {
+      const aborted = signal?.aborted === true;
+      this.logger.warn({
+        event: "piece_fetch_failed",
+        message: "Piece fetch threw an error",
+        url,
+        pieceCid,
+        spAddress,
+        aborted,
+        error: toStructuredError(error),
+      });
+
+      return {
+        success: false,
+        pieceCid,
+        bytesReceived: 0,
+        pieceBytes: null,
+        latencyMs: 0,
+        ttfbMs: 0,
+        throughputBps: 0,
+        statusCode: 0,
+        commPValid: false,
+        errorMessage: error instanceof Error ? error.message : String(error),
+        aborted,
+      };
+    }
+  }
+
+  /**
+   * Compute the piece CID (sha2-256-trunc254-padded) of the retrieved bytes and compare
+   * against the expected CID. Returns false on parse failure, computation failure, or mismatch.
+   */
+  private async validateCommP(bytes: Buffer, pieceCid: string): Promise<boolean> {
+    const expected = asPieceCID(pieceCid);
+    if (!expected) {
+      this.logger.warn({
+        event: "commp_invalid_piece_cid",
+        message: "Cannot parse expected piece CID for CommP validation",
+        pieceCid,
+      });
+      return false;
+    }
+
+    try {
+      const computed = calculatePieceCid(bytes);
+      const matches = computed.toString() === expected.toString();
+      if (!matches) {
+        this.logger.warn({
+          event: "commp_mismatch",
+          message: "Piece CID mismatch: SP-returned bytes hash to a different CID",
+          expected: expected.toString(),
+          computed: computed.toString(),
+        });
+      }
+      return matches;
+    } catch (error) {
+      this.logger.warn({
+        event: "commp_validation_error",
+        message: "CommP computation threw an error",
+        pieceCid,
+        error: toStructuredError(error),
+      });
+      return false;
+    }
+  }
+}
diff --git a/apps/backend/src/retrieval-anon/retrieval-anon.module.ts b/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
new file mode 100644
index 00000000..c05dcb5f
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
@@ -0,0 +1,26 @@
+import { Module } from "@nestjs/common";
+import { ConfigModule } from "@nestjs/config";
+import { TypeOrmModule } from "@nestjs/typeorm";
+import { StorageProvider } from "../database/entities/storage-provider.entity.js";
+import { HttpClientModule } from "../http-client/http-client.module.js";
+import { IpniModule } from "../ipni/ipni.module.js";
+import { SubgraphModule } from "../subgraph/subgraph.module.js";
+import { WalletSdkModule } from "../wallet-sdk/wallet-sdk.module.js";
+import { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
+import { AnonRetrievalService } from "./anon-retrieval.service.js";
+import { CarValidationService } from "./car-validation.service.js";
+import { PieceRetrievalService } from "./piece-retrieval.service.js";
+
+@Module({
+  imports: [
+    ConfigModule,
+    TypeOrmModule.forFeature([StorageProvider]),
+    SubgraphModule,
+    WalletSdkModule,
+    HttpClientModule,
+    IpniModule,
+  ],
+  providers: [AnonPieceSelectorService, PieceRetrievalService, CarValidationService, AnonRetrievalService],
+  exports: [AnonRetrievalService],
+})
+export class RetrievalAnonModule {}
diff --git a/apps/backend/src/retrieval-anon/types.ts b/apps/backend/src/retrieval-anon/types.ts
new file mode 100644
index 00000000..9013a5ea
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/types.ts
@@ -0,0 +1,38 @@
+/** The result of anonymous piece selection. */
+export type AnonPiece = {
+  pieceCid: string;
+  dataSetId: string;
+  pieceId: string;
+  serviceProvider: string;
+  withIPFSIndexing: boolean;
+  ipfsRootCid: string | null;
+  rawSize: string;
+};
+
+/** Result of piece retrieval. */
+export type PieceRetrievalResult = {
+  success: boolean;
+  pieceCid: string;
+  bytesReceived: number;
+  pieceBytes: Buffer | null;
+  latencyMs: number;
+  ttfbMs: number;
+  throughputBps: number;
+  statusCode: number;
+  commPValid: boolean;
+  errorMessage?: string;
+  aborted?: boolean;
+};
+
+/** Result of CAR validation. */
+export type CarValidationResult = {
+  carParseable: boolean;
+  blockCount: number;
+  sampledCidCount: number;
+  ipniValid: boolean | null;
+  ipniVerifyMs: number | null;
+  blockFetchValid: boolean | null;
+  blockFetchFailedCount: number | null;
+  blockFetchEndpoint: string | null;
+  errorMessage?: string;
+};
diff --git a/apps/backend/src/subgraph/queries.ts b/apps/backend/src/subgraph/queries.ts
new file mode 100644
index 00000000..74802ddf
--- /dev/null
+++ b/apps/backend/src/subgraph/queries.ts
@@ -0,0 +1,78 @@
+export const Queries = {
+  GET_PROVIDERS_WITH_DATASETS: `
+      query GetProvidersWithDataSet($addresses: [Bytes!], $blockNumber: BigInt!) {
+        providers(where: {address_in: $addresses}) {
+          address
+          totalFaultedPeriods
+          totalProvingPeriods
+          proofSets (where: {nextDeadline_lt: $blockNumber, status: PROVING}) {
+            nextDeadline
+            maxProvingPeriod
+          }
+        }
+      }
+    `,
+  GET_SUBGRAPH_META: `
+    query GetSubgraphMeta {
+      _meta {
+        block {
+          number
+        }
+      }
+    }
+  `,
+} as const;
+
+/**
+ * Build a sampleAnonPiece query scoped to the requested pool. The single
+ * piece of query shape that differs is whether the proofSet filter pins
+ * `withIPFSIndexing: true`; assembling the fragment here keeps the rest
+ * of the query and the returned selection set shared.
+ */
+export function buildSampleAnonPieceQuery(pool: "indexed" | "any"): string {
+  const indexingFilter = pool === "indexed" ? "withIPFSIndexing: true" : "";
+  return `
+    query SampleAnonPiece(
+      $serviceProvider: Bytes!
+      $payer: Bytes!
+      $sampleKey: Bytes!
+      $minSize: BigInt!
+      $maxSize: BigInt!
+    ) {
+      _meta {
+        block {
+          number
+        }
+      }
+      roots(
+        first: 1
+        orderBy: sampleKey
+        orderDirection: asc
+        where: {
+          sampleKey_gte: $sampleKey
+          removed: false
+          rawSize_gte: $minSize
+          rawSize_lte: $maxSize
+          proofSet_: {
+            fwssServiceProvider: $serviceProvider
+            fwssPayer_not: $payer
+            isActive: true
+            ${indexingFilter}
+          }
+        }
+        subgraphError: allow
+      ) {
+        rootId
+        cid
+        rawSize
+        ipfsRootCID
+        proofSet {
+          setId
+          withIPFSIndexing
+          fwssPayer
+          pdpPaymentEndEpoch
+        }
+      }
+    }
+  `;
+}
diff --git a/apps/backend/src/subgraph/subgraph.module.ts b/apps/backend/src/subgraph/subgraph.module.ts
new file mode 100644
index 00000000..7834c39b
--- /dev/null
+++ b/apps/backend/src/subgraph/subgraph.module.ts
@@ -0,0 +1,8 @@
+import { Module } from "@nestjs/common";
+import { SubgraphService } from "./subgraph.service.js";
+
+@Module({
+  providers: [SubgraphService],
+  exports: [SubgraphService],
+})
+export class SubgraphModule {}
diff --git a/apps/backend/src/subgraph/subgraph.service.spec.ts b/apps/backend/src/subgraph/subgraph.service.spec.ts
new file mode 100644
index 00000000..64f28435
--- /dev/null
+++ b/apps/backend/src/subgraph/subgraph.service.spec.ts
@@ -0,0 +1,851 @@
+import type { ConfigService } from "@nestjs/config";
+import { CID } from "multiformats/cid";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { IConfig } from "../config/app.config.js";
+import { SubgraphService } from "./subgraph.service.js";
+
+const VALID_ADDRESS = "0xd8da6bf26964af9d7eed9e03e53415d37aa96045" as const;
+const SUBGRAPH_ENDPOINT = "https://api.thegraph.com/subgraphs/filecoin/pdp" as const;
+
+const makeSubgraphResponse = (providers: Record<string, unknown>[] = []) => ({
+  data: { providers },
+});
+
+const makeValidProvider = (overrides: Record<string, unknown> = {}) => ({
+  address: VALID_ADDRESS,
+  totalFaultedPeriods: "10",
+  totalProvingPeriods: "100",
+  proofSets: [
+    {
+      totalFaultedPeriods: "2",
+      currentDeadlineCount: "5",
+      nextDeadline: "1000",
+      maxProvingPeriod: "100",
+    },
+  ],
+  ...overrides,
+});
+
+const makeSubgraphMetaResponse = (blockNumber = 12345) => ({
+  data: {
+    _meta: {
+      block: {
+        number: blockNumber,
+      },
+    },
+  },
+});
+
+const FWSS_SP_ADDRESS = "0xAaaaAAaaaaAAaaaAaAaAaaAaaaAaAaAaaAaaa111";
+const FWSS_PAYER = "0xBBbbBBbbBBbBBbBbbBBbbBBbbbbBbBBbbBBbb222";
+const EXAMPLE_PIECE_CID = "baga6ea4seaqpzwrimvoc4jp4l7mk6knsknf6owsc2ev4krrs2peenl5qelh6u4y";
+const pieceCidHex = `0x${Buffer.from(CID.parse(EXAMPLE_PIECE_CID).bytes).toString("hex")}`;
+
+const makeSampleRoot = (overrides: Record<string, unknown> = {}) => ({
+  rootId: "1",
+  cid: pieceCidHex,
+  rawSize: "1048576",
+  ipfsRootCID: "bafyroot",
+  proofSet: {
+    setId: "42",
+    withIPFSIndexing: true,
+    fwssPayer: FWSS_PAYER.toLowerCase(),
+    pdpPaymentEndEpoch: null,
+  },
+  ...overrides,
+});
+
+const makeSampleResponse = (roots: Record<string, unknown>[] = [], blockNumber = 12345) => ({
+  data: {
+    _meta: { block: { number: blockNumber } },
+    roots,
+  },
+});
+
+const SAMPLE_KEY = "0x0000000000000000000000000000000000000000000000000000000000000001";
+const defaultSampleParams = {
+  serviceProvider: FWSS_SP_ADDRESS,
+  payer: FWSS_PAYER,
+  sampleKey: SAMPLE_KEY,
+  minSize: "0",
+  maxSize: "1000000000000",
+  pool: "indexed" as const,
+};
+
+describe("SubgraphService", () => {
+  let service: SubgraphService;
+  let fetchMock: ReturnType<typeof vi.fn>;
+
+  beforeEach(() => {
+    const configService = {
+      get: vi.fn((key: keyof IConfig) => {
+        if (key === "blockchain") {
+          return { subgraphEndpoint: SUBGRAPH_ENDPOINT };
+        }
+        return undefined;
+      }),
+    } as unknown as ConfigService<IConfig, true>;
+
+    service = new SubgraphService(configService);
+
+    fetchMock = vi.fn();
+    vi.stubGlobal("fetch", fetchMock);
+
+    vi.useFakeTimers();
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    vi.useRealTimers();
+  });
+
+  describe("fetchProvidersWithDatasets", () => {
+    it("fetches and returns validated providers with bigint fields", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphResponse([makeValidProvider()]),
+      });
+
+      const providers = await service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+
+      expect(fetchMock).toHaveBeenCalledWith(SUBGRAPH_ENDPOINT, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: expect.stringContaining('"addresses"'),
+      });
+
+      expect(providers).toHaveLength(1);
+      expect(providers[0].address).toBe(VALID_ADDRESS);
+      expect(providers[0].totalFaultedPeriods).toBe(10n);
+      expect(providers[0].totalProvingPeriods).toBe(100n);
+      expect(providers[0].proofSets[0].maxProvingPeriod).toBe(100n);
+    });
+
+    it("returns empty array when no providers exist", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphResponse([]),
+      });
+
+      const providers = await service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+      expect(providers).toEqual([]);
+    });
+
+    it("returns empty array when addresses array is empty", async () => {
+      const providers = await service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [],
+      });
+
+      expect(providers).toEqual([]);
+      expect(fetchMock).not.toHaveBeenCalled();
+    });
+
+    it("throws on HTTP error response", async () => {
+      fetchMock.mockResolvedValue({
+        ok: false,
+        status: 500,
+      });
+
+      const promise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+
+      // This stops Node.js from throwing an Unhandled Rejection during fast-forward.
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      await expect(promise).rejects.toThrow("Failed to fetch provider data after 3 attempts");
+      expect(fetchMock).toHaveBeenCalledTimes(3);
+    });
+
+    it("throws on GraphQL errors in response", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: null,
+          errors: [{ message: "Query failed" }],
+        }),
+      });
+
+      const promise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Failed to fetch provider data after 3 attempts");
+      expect(fetchMock).toHaveBeenCalledTimes(3);
+    });
+
+    it("throws on network failure", async () => {
+      fetchMock.mockRejectedValueOnce(new Error("Network error"));
+
+      const promise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Failed to fetch provider data after 3 attempts");
+      expect(fetchMock).toHaveBeenCalledTimes(3); // Initial + 2 retries = 3 total
+    });
+
+    it("throws immediately on validation error without retrying", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: { providers: [{ address: "invalid" }] },
+        }),
+      });
+
+      await expect(
+        service.fetchProvidersWithDatasets({
+          blockNumber: 5000,
+          addresses: [VALID_ADDRESS],
+        }),
+      ).rejects.toThrow("Data validation failed");
+
+      // Should only be called once - no retries for validation errors
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    it("throws immediately when response data is missing required fields", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: { providers: [{ address: VALID_ADDRESS }] }, // Missing required fields
+        }),
+      });
+
+      await expect(
+        service.fetchProvidersWithDatasets({
+          blockNumber: 5000,
+          addresses: [VALID_ADDRESS],
+        }),
+      ).rejects.toThrow("Data validation failed");
+
+      // Should only be called once - no retries for validation errors
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    it("sends blockNumber as string in the GraphQL variables", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphResponse([makeValidProvider()]),
+      });
+
+      await service.fetchProvidersWithDatasets({
+        blockNumber: 12345,
+        addresses: [VALID_ADDRESS],
+      });
+
+      const body = JSON.parse(fetchMock.mock.calls[0][1].body);
+      expect(body.variables.blockNumber).toBe("12345");
+    });
+
+    it("retries network errors but not validation errors", async () => {
+      // First attempt: network error (should retry)
+      fetchMock.mockRejectedValueOnce(new Error("Network timeout"));
+
+      // Second attempt: succeeds but validation fails (should not retry)
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: { providers: [{ address: "invalid" }] },
+        }),
+      });
+
+      const promise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Data validation failed");
+
+      // Should be called twice: initial network error + 1 retry that fails validation
+      expect(fetchMock).toHaveBeenCalledTimes(2);
+    });
+
+    it("sends addresses array in the GraphQL variables", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphResponse([makeValidProvider()]),
+      });
+
+      const addresses = [VALID_ADDRESS, "0xAb5801a7D398351b8bE11C439e05C5B3259aeC9B"];
+      await service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses,
+      });
+
+      const body = JSON.parse(fetchMock.mock.calls[0][1].body);
+      expect(body.variables.addresses).toEqual(addresses);
+    });
+
+    it("batches large address lists into chunks of MAX_PROVIDERS_PER_QUERY", async () => {
+      // Create 150 addresses (should be split into 2 batches: 100 + 50)
+      const addresses = Array.from({ length: 150 }, (_, i) => `0x${i.toString().padStart(40, "0")}`);
+
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphResponse([]),
+      });
+
+      await service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses,
+      });
+
+      // Should make 2 requests
+      expect(fetchMock).toHaveBeenCalledTimes(2);
+    });
+
+    it("retries failed requests with exponential backoff", async () => {
+      // Fail on first attempt, succeed on second attempt (1 retry)
+      fetchMock.mockRejectedValueOnce(new Error("Network timeout")).mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphResponse([makeValidProvider()]),
+      });
+
+      const promise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to resolve
+      const providers = await promise;
+
+      expect(fetchMock).toHaveBeenCalledTimes(2); // Initial attempt + 1 retry
+      expect(providers).toHaveLength(1);
+    });
+
+    it("processes batches with concurrency control", async () => {
+      // Create 120 addresses (should be 2 batches of 100 each, but processed with concurrency limit)
+      const addresses = Array.from({ length: 120 }, (_, i) => `0x${i.toString().padStart(40, "0")}`);
+
+      let concurrentCalls = 0;
+      let maxConcurrentCalls = 0;
+
+      fetchMock.mockImplementation(async () => {
+        concurrentCalls++;
+        maxConcurrentCalls = Math.max(maxConcurrentCalls, concurrentCalls);
+        await new Promise((resolve) => setTimeout(resolve, 10));
+        concurrentCalls--;
+        return {
+          ok: true,
+          json: async () => makeSubgraphResponse([]),
+        };
+      });
+
+      const fetchPromise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses,
+      });
+
+      await vi.runAllTimersAsync();
+
+      await fetchPromise;
+
+      // Should respect MAX_CONCURRENT_REQUESTS (50)
+      expect(maxConcurrentCalls).toBeLessThanOrEqual(50);
+      expect(fetchMock).toHaveBeenCalledTimes(2);
+    });
+  });
+
+  describe("fetchSubgraphMeta", () => {
+    it("fetches and returns subgraph metadata with block number", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      const meta = await service.fetchSubgraphMeta();
+
+      expect(fetchMock).toHaveBeenCalledWith(SUBGRAPH_ENDPOINT, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: expect.stringContaining("GetSubgraphMeta"),
+      });
+
+      expect(meta).toEqual({
+        _meta: {
+          block: {
+            number: 12345,
+          },
+        },
+      });
+    });
+
+    it("throws when subgraph endpoint is not configured", async () => {
+      const configService = {
+        get: vi.fn(() => ({ subgraphEndpoint: "" })),
+      } as unknown as ConfigService<IConfig, true>;
+
+      const serviceWithoutEndpoint = new SubgraphService(configService);
+
+      await expect(serviceWithoutEndpoint.fetchSubgraphMeta()).rejects.toThrow("No subgraph endpoint configured");
+    });
+
+    it("throws on HTTP error response", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: false,
+        status: 500,
+        statusText: "Internal Server Error",
+      });
+
+      const promise = service.fetchSubgraphMeta();
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Failed to fetch subgraph metadata after 3 attempts");
+    });
+
+    it("throws on GraphQL errors in response", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          errors: [{ message: "Query timeout" }],
+        }),
+      });
+
+      const promise = service.fetchSubgraphMeta();
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Failed to fetch subgraph metadata after 3 attempts");
+    });
+
+    it("throws on validation failure without retry", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: {
+            _meta: {
+              block: {
+                number: "not-a-number", // Invalid - should be number
+              },
+            },
+          },
+        }),
+      });
+
+      await expect(service.fetchSubgraphMeta()).rejects.toThrow("Data validation failed");
+      expect(fetchMock).toHaveBeenCalledTimes(1); // Should not retry validation errors
+    });
+
+    it("throws on missing required fields", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: {
+            _meta: {
+              block: {
+                number: undefined, // missing required field
+              },
+            },
+          },
+        }),
+      });
+
+      await expect(service.fetchSubgraphMeta()).rejects.toThrow("Data validation failed");
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    it("retries on network failures with exponential backoff", async () => {
+      fetchMock.mockRejectedValueOnce(new Error("Network timeout")).mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      const promise = service.fetchSubgraphMeta();
+
+      await vi.runAllTimersAsync();
+
+      // Now await the second promise to resolve
+      const meta = await promise;
+
+      expect(fetchMock).toHaveBeenCalledTimes(2); // Initial + 1 retry
+      expect(meta._meta.block.number).toBe(12345);
+    });
+
+    it("throws after MAX_RETRIES attempts on persistent network errors", async () => {
+      fetchMock.mockRejectedValue(new Error("Network timeout"));
+
+      const promise = service.fetchSubgraphMeta();
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Failed to fetch subgraph metadata after 3 attempts");
+      expect(fetchMock).toHaveBeenCalledTimes(3);
+    });
+  });
+
+  describe("enforceRateLimit (sliding window)", () => {
+    it("allows requests when under the rate limit", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      const startTime = Date.now();
+
+      // Make 5 requests - should all go through immediately
+      const promises = Array.from({ length: 5 }, () => service.fetchSubgraphMeta());
+
+      await Promise.all(promises);
+
+      const endTime = Date.now();
+      const elapsed = endTime - startTime;
+
+      // Should complete quickly (no waiting)
+      expect(elapsed).toBeLessThan(100);
+      expect(fetchMock).toHaveBeenCalledTimes(5);
+    });
+
+    it("enforces rate limit when exceeding MAX_CONCURRENT_REQUESTS", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Fill up the rate limit window with 50 requests
+      const initialPromises = Array.from({ length: 50 }, () => service.fetchSubgraphMeta());
+      await Promise.all(initialPromises);
+
+      fetchMock.mockClear();
+
+      // Try to make one more request - should wait for oldest to expire
+      const promise = service.fetchSubgraphMeta();
+
+      // Advance past the 10 second window + buffer
+      await vi.advanceTimersByTimeAsync(10010);
+      await promise;
+
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    it("throws error when requestCount exceeds MAX_CONCURRENT_REQUESTS", async () => {
+      // Access private method via type assertion for testing
+      const enforceRateLimit = (service as any).enforceRateLimit.bind(service);
+
+      await expect(enforceRateLimit(51)).rejects.toThrow("Cannot request 51 items; exceeds rate limit window of 50");
+    });
+
+    it("correctly calculates wait time for multiple required slots", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Fill 48 slots
+      const initialPromises = Array.from({ length: 48 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(initialPromises);
+
+      fetchMock.mockClear();
+
+      // Request 5 more slots (need 3 to free up: 5 - 2 available = 3)
+      // Should wait for the 3rd oldest timestamp to expire
+      const enforceRateLimit = (service as any).enforceRateLimit.bind(service);
+      const promise = enforceRateLimit(5);
+
+      // The 3rd request should expire at ~10 seconds
+      await vi.advanceTimersByTimeAsync(10010);
+      await promise;
+
+      // Verify slots were reserved
+      // After 10s, the first 48 expired, so we should only have the 5 new ones
+      const timestamps = (service as any).requestTimestamps;
+      expect(timestamps.length).toBe(5); // Only the 5 new slots remain
+    });
+
+    it("handles sliding window correctly as old requests expire", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Make 30 requests at t=0
+      const batch1 = Array.from({ length: 30 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(batch1);
+
+      // Advance 5 seconds
+      await vi.advanceTimersByTimeAsync(5000);
+
+      // Make 20 more requests at t=5000
+      const batch2 = Array.from({ length: 20 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(batch2);
+
+      // Now at t=5000, we have 50 requests in the window
+      // Advance to t=10100 - first 30 should expire
+      await vi.advanceTimersByTimeAsync(5100);
+
+      fetchMock.mockClear();
+
+      // Should be able to make 30 more requests immediately
+      const batch3 = Array.from({ length: 30 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(batch3);
+
+      expect(fetchMock).toHaveBeenCalledTimes(30);
+    });
+
+    it("adds 10ms buffer to prevent timing edge cases", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Fill the window
+      const initialPromises = Array.from({ length: 50 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(initialPromises);
+
+      fetchMock.mockClear();
+
+      const promise = service.fetchSubgraphMeta();
+
+      // Advance past the window + buffer
+      await vi.advanceTimersByTimeAsync(10010);
+      await promise;
+
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    it("recursively waits when multiple batches need to expire", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Fill window with 50 requests
+      const batch1 = Array.from({ length: 50 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(batch1);
+
+      // Advance 5 seconds
+      await vi.advanceTimersByTimeAsync(5000);
+
+      fetchMock.mockClear();
+
+      // Try to request 30 slots (need to wait for 30 to expire)
+      const enforceRateLimit = (service as any).enforceRateLimit.bind(service);
+      const promise = enforceRateLimit(30);
+
+      // First recursion: wait for 30th oldest to expire (~10s from start)
+      await vi.advanceTimersByTimeAsync(5010);
+
+      // Should recursively check and complete
+      await promise;
+
+      const timestamps = (service as any).requestTimestamps;
+      // After 10s from start, all 50 initial requests expired, only 30 new ones remain
+      expect(timestamps.length).toBe(30); // Only the 30 new slots
+    });
+
+    it("reserves slots immediately to prevent race conditions", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Fill 47 slots
+      const initial = Array.from({ length: 47 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(initial);
+
+      // Now we have 3 available slots
+      const enforceRateLimit = (service as any).enforceRateLimit.bind(service);
+
+      // Request 3 slots - should succeed immediately
+      await enforceRateLimit(3);
+
+      const timestamps = (service as any).requestTimestamps;
+      expect(timestamps.length).toBe(50); // 47 + 3 = 50 (full)
+
+      // Try to request 1 more - should need to wait
+      const promise = enforceRateLimit(1);
+
+      // Advance time to free up a slot
+      await vi.advanceTimersByTimeAsync(10010);
+      await promise;
+
+      // After waiting, the old slots expired and new one was added
+      const finalTimestamps = (service as any).requestTimestamps;
+      expect(finalTimestamps.length).toBe(1); // Only the new request remains
+    });
+
+    it("filters out expired timestamps from the sliding window", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Make 20 requests
+      const batch1 = Array.from({ length: 20 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(batch1);
+
+      // Advance past the window
+      await vi.advanceTimersByTimeAsync(11000);
+
+      fetchMock.mockClear();
+
+      // Make another request - should have full window available
+      await service.fetchSubgraphMeta();
+
+      const timestamps = (service as any).requestTimestamps;
+      // Should only have 1 timestamp (the new one), old ones filtered out
+      expect(timestamps.length).toBe(1);
+    });
+  });
+
+  describe("sampleAnonPiece", () => {
+    it("throws when endpoint is not configured (distinct from empty result)", async () => {
+      // Returning null here would make a misconfigured deployment indistinguishable
+      // from a genuinely empty candidate pool — every anon job would silently
+      // no-op forever. Fail loudly instead.
+      const noEndpointConfig = {
+        get: vi.fn(() => ({ subgraphEndpoint: "" })),
+      } as unknown as ConfigService<IConfig, true>;
+      const noEndpointService = new SubgraphService(noEndpointConfig);
+
+      await expect(noEndpointService.sampleAnonPiece(defaultSampleParams)).rejects.toThrow(
+        "No subgraph endpoint configured",
+      );
+      expect(fetchMock).not.toHaveBeenCalled();
+    });
+
+    it("returns null when the subgraph yields no matching root", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSampleResponse([]),
+      });
+
+      const piece = await service.sampleAnonPiece(defaultSampleParams);
+      expect(piece).toBeNull();
+    });
+
+    it("parses the sampled root into a decoded candidate piece", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSampleResponse([makeSampleRoot()]),
+      });
+
+      const piece = await service.sampleAnonPiece(defaultSampleParams);
+
+      expect(piece).toMatchObject({
+        pieceCid: EXAMPLE_PIECE_CID,
+        pieceId: "1",
+        dataSetId: "42",
+        rawSize: "1048576",
+        withIPFSIndexing: true,
+        ipfsRootCid: "bafyroot",
+        pdpPaymentEndEpoch: null,
+        indexedAtBlock: 12345,
+      });
+    });
+
+    it("returns pdpPaymentEndEpoch as bigint when the dataset is terminating", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () =>
+          makeSampleResponse([
+            makeSampleRoot({
+              proofSet: {
+                setId: "42",
+                withIPFSIndexing: true,
+                fwssPayer: FWSS_PAYER.toLowerCase(),
+                pdpPaymentEndEpoch: "5000",
+              },
+            }),
+          ]),
+      });
+
+      const piece = await service.sampleAnonPiece(defaultSampleParams);
+      expect(piece?.pdpPaymentEndEpoch).toBe(5000n);
+    });
+
+    it("lowercases SP and payer addresses before querying", async () => {
+      fetchMock.mockResolvedValueOnce({ ok: true, json: async () => makeSampleResponse([]) });
+
+      await service.sampleAnonPiece(defaultSampleParams);
+
+      const [, opts] = fetchMock.mock.calls[0];
+      const body = JSON.parse(opts.body as string);
+      expect(body.variables.serviceProvider).toBe(FWSS_SP_ADDRESS.toLowerCase());
+      expect(body.variables.payer).toBe(FWSS_PAYER.toLowerCase());
+      expect(body.query).toContain("withIPFSIndexing: true");
+    });
+
+    it("uses the any-pool query when pool is 'any'", async () => {
+      fetchMock.mockResolvedValueOnce({ ok: true, json: async () => makeSampleResponse([]) });
+
+      await service.sampleAnonPiece({ ...defaultSampleParams, pool: "any" });
+
+      const [, opts] = fetchMock.mock.calls[0];
+      const body = JSON.parse(opts.body as string);
+      expect(body.query).not.toContain("withIPFSIndexing: true");
+    });
+
+    it("returns null when the sampled root has an undecodable CID", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSampleResponse([makeSampleRoot({ cid: "0xdeadbeef" })]),
+      });
+
+      const piece = await service.sampleAnonPiece(defaultSampleParams);
+      expect(piece).toBeNull();
+    });
+
+    it("throws after max retries on repeated HTTP errors", async () => {
+      fetchMock.mockResolvedValue({ ok: false, status: 500, statusText: "Internal Server Error" });
+
+      const promise = service.sampleAnonPiece(defaultSampleParams);
+      promise.catch(() => {});
+      await vi.runAllTimersAsync();
+
+      await expect(promise).rejects.toThrow("Failed to fetch subgraph sample_anon_piece_indexed after 3 attempts");
+      expect(fetchMock).toHaveBeenCalledTimes(3);
+    });
+
+    it("does not retry on schema validation failure", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ data: { _meta: { block: { number: 1 } } } }), // missing roots
+      });
+
+      await expect(service.sampleAnonPiece(defaultSampleParams)).rejects.toThrow(/validation failed/i);
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+  });
+});
diff --git a/apps/backend/src/subgraph/subgraph.service.ts b/apps/backend/src/subgraph/subgraph.service.ts
new file mode 100644
index 00000000..3d4e8370
--- /dev/null
+++ b/apps/backend/src/subgraph/subgraph.service.ts
@@ -0,0 +1,422 @@
+import { Injectable, Logger } from "@nestjs/common";
+import { ConfigService } from "@nestjs/config";
+import { toStructuredError } from "../common/logging.js";
+import type { IBlockchainConfig, IConfig } from "../config/app.config.js";
+import { buildSampleAnonPieceQuery, Queries } from "./queries.js";
+import type {
+  AnonCandidatePiece,
+  GraphQLResponse,
+  ProviderDataSetResponse,
+  ProvidersWithDataSetsOptions,
+  RawSampleAnonPieceResponse,
+  SubgraphMeta,
+} from "./types.js";
+import {
+  decodePieceCid,
+  validateProviderDataSetResponse,
+  validateSampleAnonPieceResponse,
+  validateSubgraphMetaResponse,
+} from "./types.js";
+
+/** Pool of pieces to sample from. */
+export type AnonPiecePool = "indexed" | "any";
+
+/** Inputs for a single anonymous piece sample query. */
+export type SampleAnonPieceParams = {
+  /** Service provider address (lowercase hex). */
+  serviceProvider: string;
+  /** Dealbot's own payer address (excluded to keep the sample non-dealbot). */
+  payer: string;
+  /** Uniform-random 32-byte sort key as `0x`-prefixed hex. */
+  sampleKey: string;
+  /** Inclusive lower bound on raw piece size in bytes (decimal string). */
+  minSize: string;
+  /** Inclusive upper bound on raw piece size in bytes (decimal string). */
+  maxSize: string;
+  /** Which pool to sample from. */
+  pool: AnonPiecePool;
+};
+
+/**
+ * Error thrown when data validation fails.
+ * These errors should not be retried as they indicate schema/data issues.
+ */
+class ValidationError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "ValidationError";
+    if (Error.captureStackTrace) {
+      Error.captureStackTrace(this, ValidationError);
+    }
+  }
+}
+
+/**
+ * Client for the dealbot-owned subgraph (driven by `SUBGRAPH_ENDPOINT`).
+ *
+ * Functionally a superset of `PDPSubgraphService`: it exposes the same
+ * `fetchSubgraphMeta` / `fetchProvidersWithDatasets` surface plus the new
+ * `sampleAnonPiece` query used by anonymous retrievals.
+ *
+ * The two services intentionally coexist while we migrate off the upstream
+ * pdp-explorer subgraph: `PDPSubgraphService` continues to drive the
+ * established data-retention path against `PDP_SUBGRAPH_ENDPOINT`, and
+ * `SubgraphService` is scoped to the new anonymous-retrieval flow only.
+ * Once the dealbot-owned subgraph has soaked in production, this service
+ * should become the single drop-in replacement for `PDPSubgraphService`
+ * and `PDP_SUBGRAPH_ENDPOINT` can be retired.
+ */
+@Injectable()
+export class SubgraphService {
+  private readonly logger: Logger = new Logger(SubgraphService.name);
+  private readonly blockchainConfig: IBlockchainConfig;
+
+  private static readonly MAX_PROVIDERS_PER_QUERY = 100;
+  private static readonly MAX_CONCURRENT_REQUESTS = 50;
+  private static readonly RATE_LIMIT_WINDOW_MS = 10000;
+  private static readonly MAX_RETRIES = 3;
+  private static readonly INITIAL_RETRY_DELAY_MS = 1000;
+
+  private requestTimestamps: number[] = [];
+
+  constructor(private readonly configService: ConfigService<IConfig, true>) {
+    this.blockchainConfig = this.configService.get<IBlockchainConfig>("blockchain");
+  }
+
+  /**
+   * Fetch subgraph metadata including the latest indexed block number.
+   *
+   * @throws Error if endpoint is not configured or after MAX_RETRIES attempts
+   */
+  async fetchSubgraphMeta(): Promise<SubgraphMeta> {
+    return this.executeQuery<SubgraphMeta>("metadata", Queries.GET_SUBGRAPH_META, {}, validateSubgraphMetaResponse);
+  }
+
+  /**
+   * Fetch provider-level totals from subgraph with batching, pagination, and rate limiting
+   *
+   * @param options - Options containing block number and provider addresses
+   * @returns Array of providers with their data sets currently proving
+   */
+  async fetchProvidersWithDatasets(
+    options: ProvidersWithDataSetsOptions,
+  ): Promise<ProviderDataSetResponse["providers"]> {
+    const { blockNumber, addresses } = options;
+
+    if (addresses.length === 0) {
+      return [];
+    }
+
+    if (addresses.length <= SubgraphService.MAX_PROVIDERS_PER_QUERY) {
+      return this.fetchWithRetry(blockNumber, addresses);
+    }
+
+    return this.fetchMultipleBatchesWithRateLimit(blockNumber, addresses);
+  }
+
+  /**
+   * Draw a single random anonymous piece for retrieval testing.
+   *
+   * Uses the Root.sampleKey (keccak256 of the entity id) to pick the
+   * smallest key ≥ `params.sampleKey` that matches the filters — a uniform
+   * random pick when `sampleKey` is generated uniformly. Server-side filters
+   * cover SP, payer-exclusion, active status, size range, and optionally
+   * `withIPFSIndexing`. Returns null when no piece matches (callers should
+   * retry with a fresh sampleKey or relax the pool/bucket).
+   *
+   * `pdpPaymentEndEpoch` is returned to the caller for a cheap client-side
+   * epoch comparison — GraphQL filters on nullable BigInts are awkward.
+   */
+  async sampleAnonPiece(params: SampleAnonPieceParams): Promise<AnonCandidatePiece | null> {
+    if (!this.blockchainConfig.subgraphEndpoint) {
+      // Surface misconfiguration distinctly so it does not look like an empty
+      // candidate pool (which silently no-ops every anon retrieval job).
+      this.logger.error({
+        event: "subgraph_endpoint_not_configured",
+        message: "Cannot sample anonymous piece — no subgraph endpoint configured",
+      });
+      throw new Error("No subgraph endpoint configured");
+    }
+
+    const query = buildSampleAnonPieceQuery(params.pool);
+    const variables = {
+      serviceProvider: params.serviceProvider.toLowerCase(),
+      payer: params.payer.toLowerCase(),
+      sampleKey: params.sampleKey,
+      minSize: params.minSize,
+      maxSize: params.maxSize,
+    };
+
+    const validated = await this.executeQuery<RawSampleAnonPieceResponse>(
+      `sample_anon_piece_${params.pool}`,
+      query,
+      variables,
+      validateSampleAnonPieceResponse,
+    );
+
+    const root = validated.roots[0];
+    if (!root) {
+      return null;
+    }
+
+    try {
+      return {
+        pieceCid: decodePieceCid(root.cid),
+        pieceId: root.rootId,
+        dataSetId: root.proofSet.setId,
+        rawSize: root.rawSize,
+        withIPFSIndexing: root.proofSet.withIPFSIndexing,
+        ipfsRootCid: root.ipfsRootCID ?? null,
+        indexedAtBlock: validated._meta.block.number,
+        pdpPaymentEndEpoch: root.proofSet.pdpPaymentEndEpoch != null ? BigInt(root.proofSet.pdpPaymentEndEpoch) : null,
+      };
+    } catch (error) {
+      this.logger.warn({
+        event: "anon_piece_cid_decode_failed",
+        message: "Failed to decode piece CID from subgraph data",
+        dataSetId: root.proofSet.setId,
+        pieceId: root.rootId,
+        error: toStructuredError(error),
+      });
+      return null;
+    }
+  }
+
+  /**
+   * Generic single-query helper with retry and rate limiting. Used by queries that
+   * don't fit the batched provider-fetch shape.
+   */
+  private async executeQuery<T>(
+    operationName: string,
+    query: string,
+    variables: Record<string, unknown>,
+    transform: (data: unknown) => T,
+    attempt: number = 1,
+  ): Promise<T> {
+    if (!this.blockchainConfig.subgraphEndpoint) {
+      throw new Error("No subgraph endpoint configured");
+    }
+
+    try {
+      await this.enforceRateLimit();
+
+      const response = await fetch(this.blockchainConfig.subgraphEndpoint, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ query, variables }),
+      });
+
+      if (!response.ok) {
+        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+      }
+
+      const result = (await response.json()) as GraphQLResponse;
+
+      if (result.errors) {
+        const errorMessage = result.errors?.[0]?.message || "Unknown GraphQL error";
+        throw new Error(`GraphQL error: ${errorMessage}`);
+      }
+
+      try {
+        return transform(result.data);
+      } catch (validationError) {
+        const errorMessage = validationError instanceof Error ? validationError.message : "Unknown validation error";
+        throw new ValidationError(`Data validation failed: ${errorMessage}`);
+      }
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : "Unknown error";
+
+      if (error instanceof ValidationError) {
+        this.logger.error({
+          event: `subgraph_${operationName}_validation_failed`,
+          message: `Subgraph ${operationName} validation failed`,
+          error: toStructuredError(error),
+        });
+        throw error;
+      }
+
+      if (attempt < SubgraphService.MAX_RETRIES) {
+        const delay = SubgraphService.INITIAL_RETRY_DELAY_MS * (1 << (attempt - 1));
+        this.logger.warn({
+          event: `subgraph_${operationName}_request_retry`,
+          message: `Subgraph ${operationName} request failed. Retrying...`,
+          attempt,
+          maxRetries: SubgraphService.MAX_RETRIES,
+          retryDelayMs: delay,
+          error: toStructuredError(error),
+        });
+        await new Promise((resolve) => setTimeout(resolve, delay));
+        return this.executeQuery(operationName, query, variables, transform, attempt + 1);
+      }
+
+      this.logger.error({
+        event: `subgraph_${operationName}_request_failed`,
+        message: `Subgraph ${operationName} request failed after maximum retries`,
+        maxRetries: SubgraphService.MAX_RETRIES,
+        error: toStructuredError(error),
+      });
+      throw new Error(
+        `Failed to fetch subgraph ${operationName} after ${SubgraphService.MAX_RETRIES} attempts: ${errorMessage}`,
+      );
+    }
+  }
+
+  /**
+   * Fetch multiple batches with rate limiting and concurrency control
+   */
+  private async fetchMultipleBatchesWithRateLimit(
+    blockNumber: number,
+    addresses: string[],
+  ): Promise<ProviderDataSetResponse["providers"]> {
+    const batches: string[][] = [];
+    for (let i = 0; i < addresses.length; i += SubgraphService.MAX_PROVIDERS_PER_QUERY) {
+      const addressesLimit = Math.min(addresses.length, i + SubgraphService.MAX_PROVIDERS_PER_QUERY);
+      batches.push(addresses.slice(i, addressesLimit));
+    }
+
+    const allProviders: ProviderDataSetResponse["providers"] = [];
+
+    for (let i = 0; i < batches.length; i += SubgraphService.MAX_CONCURRENT_REQUESTS) {
+      const batchGroup = batches.slice(i, i + SubgraphService.MAX_CONCURRENT_REQUESTS);
+
+      const results = await Promise.all(batchGroup.map((batch) => this.fetchWithRetry(blockNumber, batch)));
+
+      allProviders.push(...results.flat());
+    }
+
+    return allProviders;
+  }
+
+  /**
+   * Fetch with exponential backoff retry mechanism
+   * Assuming initial request to be first attempt
+   */
+  private async fetchWithRetry(
+    blockNumber: number,
+    addresses: string[],
+    attempt: number = 1,
+  ): Promise<ProviderDataSetResponse["providers"]> {
+    if (!this.blockchainConfig.subgraphEndpoint) {
+      throw new Error("No subgraph endpoint configured");
+    }
+
+    const variables = {
+      blockNumber: blockNumber.toString(),
+      addresses,
+    };
+
+    try {
+      await this.enforceRateLimit();
+
+      const response = await fetch(this.blockchainConfig.subgraphEndpoint, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          query: Queries.GET_PROVIDERS_WITH_DATASETS,
+          variables,
+        }),
+      });
+
+      if (!response.ok) {
+        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+      }
+
+      const result = (await response.json()) as GraphQLResponse;
+
+      if (result.errors) {
+        const errorMessage = result.errors?.[0]?.message || "Unknown GraphQL error";
+        throw new Error(`GraphQL error: ${errorMessage}`);
+      }
+
+      let validated: ProviderDataSetResponse;
+      try {
+        validated = validateProviderDataSetResponse(result.data);
+      } catch (validationError) {
+        const errorMessage = validationError instanceof Error ? validationError.message : "Unknown validation error";
+        throw new ValidationError(`Data validation failed: ${errorMessage}`);
+      }
+
+      return validated.providers;
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : "Unknown error";
+
+      // No need to retry on validation errors - they indicate schema/data issues, not transient failures
+      if (error instanceof ValidationError) {
+        this.logger.error({
+          event: "subgraph_provider_data_validation_failed",
+          message: "Subgraph data validation failed",
+          error: toStructuredError(error),
+        });
+        throw error;
+      }
+
+      // Retry on network/HTTP errors
+      if (attempt < SubgraphService.MAX_RETRIES) {
+        const delay = SubgraphService.INITIAL_RETRY_DELAY_MS * (1 << (attempt - 1));
+        this.logger.warn({
+          event: "subgraph_provider_request_retry",
+          message: "Subgraph provider request failed. Retrying...",
+          attempt,
+          maxRetries: SubgraphService.MAX_RETRIES,
+          retryDelayMs: delay,
+          addressCount: addresses.length,
+          error: toStructuredError(error),
+        });
+        await new Promise((resolve) => setTimeout(resolve, delay));
+        return this.fetchWithRetry(blockNumber, addresses, attempt + 1);
+      }
+
+      this.logger.error({
+        event: "subgraph_provider_request_failed",
+        message: "Subgraph provider request failed after maximum retries",
+        maxRetries: SubgraphService.MAX_RETRIES,
+        blockNumber,
+        addressCount: addresses.length,
+        error: toStructuredError(error),
+      });
+      throw new Error(`Failed to fetch provider data after ${SubgraphService.MAX_RETRIES} attempts: ${errorMessage}`);
+    }
+  }
+
+  /**
+   * Enforce rate limiting: max 50 requests per 10 seconds
+   * This rate limit is applied by Goldsky on their public endpoints
+   * Read more here: https://docs.goldsky.com/subgraphs/graphql-endpoints#public-endpoints
+   */
+  private async enforceRateLimit(requestCount: number = 1): Promise<void> {
+    if (requestCount > SubgraphService.MAX_CONCURRENT_REQUESTS) {
+      throw new Error(
+        `Cannot request ${requestCount} items; exceeds rate limit window of ${SubgraphService.MAX_CONCURRENT_REQUESTS}`,
+      );
+    }
+
+    const now = Date.now();
+    const windowStart = now - SubgraphService.RATE_LIMIT_WINDOW_MS;
+
+    this.requestTimestamps = this.requestTimestamps.filter((timestamp) => timestamp > windowStart);
+
+    const availableSlots = SubgraphService.MAX_CONCURRENT_REQUESTS - this.requestTimestamps.length;
+
+    if (requestCount > availableSlots) {
+      const requiredSlots = requestCount - availableSlots;
+
+      const index = Math.min(this.requestTimestamps.length, requiredSlots) - 1;
+      const oldestTimestamp = this.requestTimestamps[index] || now;
+
+      // wait time with 10ms buffer
+      const waitTime = oldestTimestamp + SubgraphService.RATE_LIMIT_WINDOW_MS - now + 10;
+
+      if (waitTime > 0) {
+        await new Promise((resolve) => setTimeout(resolve, waitTime));
+        return this.enforceRateLimit(requestCount);
+      }
+    }
+
+    // Reserve the slots NOW
+    for (let i = 0; i < requestCount; i++) {
+      this.requestTimestamps.push(Date.now());
+    }
+  }
+}
diff --git a/apps/backend/src/subgraph/types.spec.ts b/apps/backend/src/subgraph/types.spec.ts
new file mode 100644
index 00000000..02e6eee0
--- /dev/null
+++ b/apps/backend/src/subgraph/types.spec.ts
@@ -0,0 +1,245 @@
+import { describe, expect, it } from "vitest";
+import { validateProviderDataSetResponse, validateSubgraphMetaResponse } from "./types.js";
+
+// Subgraph stores addresses in lowercase
+const VALID_ADDRESS = "0xd8da6bf26964af9d7eed9e03e53415d37aa96045" as const;
+
+const makeValidProvider = (overrides: Record<string, unknown> = {}) => ({
+  address: VALID_ADDRESS,
+  totalFaultedPeriods: "10",
+  totalProvingPeriods: "100",
+  proofSets: [
+    {
+      nextDeadline: "1000",
+      maxProvingPeriod: "100",
+    },
+  ],
+  ...overrides,
+});
+
+const makeValidResponse = (providers = [makeValidProvider()]) => ({
+  providers,
+});
+
+describe("validateProviderDataSetResponse", () => {
+  it("validates and transforms a well-formed response", () => {
+    const result = validateProviderDataSetResponse(makeValidResponse());
+
+    expect(result.providers).toHaveLength(1);
+    const provider = result.providers[0];
+    expect(provider.address).toBe(VALID_ADDRESS);
+    expect(provider.totalFaultedPeriods).toBe(10n);
+    expect(provider.totalProvingPeriods).toBe(100n);
+
+    const proofSet = provider.proofSets[0];
+    expect(proofSet.nextDeadline).toBe(1000n);
+    expect(proofSet.maxProvingPeriod).toBe(100n);
+  });
+
+  it("converts string numbers to bigint", () => {
+    const result = validateProviderDataSetResponse(
+      makeValidResponse([
+        makeValidProvider({
+          totalFaultedPeriods: "999999999999999999",
+          totalProvingPeriods: "1000000000000000000",
+        }),
+      ]),
+    );
+
+    expect(typeof result.providers[0].totalFaultedPeriods).toBe("bigint");
+    expect(result.providers[0].totalFaultedPeriods).toBe(999999999999999999n);
+    expect(result.providers[0].totalProvingPeriods).toBe(1000000000000000000n);
+  });
+
+  it("accepts an empty providers array", () => {
+    const result = validateProviderDataSetResponse({ providers: [] });
+    expect(result.providers).toEqual([]);
+  });
+
+  it("accepts a provider with empty proofSets", () => {
+    const result = validateProviderDataSetResponse(makeValidResponse([makeValidProvider({ proofSets: [] })]));
+    expect(result.providers[0].proofSets).toEqual([]);
+  });
+
+  it("preserves unknown fields (schema uses .unknown(true))", () => {
+    const result = validateProviderDataSetResponse(makeValidResponse([makeValidProvider({ extraField: "hello" })]));
+    expect((result.providers[0] as Record<string, unknown>).extraField).toBe("hello");
+  });
+
+  it("throws on missing providers field", () => {
+    expect(() => validateProviderDataSetResponse({})).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on null input", () => {
+    expect(() => validateProviderDataSetResponse(null)).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on missing required provider fields", () => {
+    expect(() =>
+      validateProviderDataSetResponse({
+        providers: [{ address: VALID_ADDRESS }],
+      }),
+    ).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on invalid Ethereum address", () => {
+    expect(() =>
+      validateProviderDataSetResponse(makeValidResponse([makeValidProvider({ address: "not-an-address" })])),
+    ).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on non-numeric string for bigint fields", () => {
+    expect(() =>
+      validateProviderDataSetResponse(makeValidResponse([makeValidProvider({ totalFaultedPeriods: "abc" })])),
+    ).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on negative number string for bigint fields", () => {
+    expect(() =>
+      validateProviderDataSetResponse(makeValidResponse([makeValidProvider({ totalFaultedPeriods: "-1" })])),
+    ).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on missing proofSet fields", () => {
+    expect(() =>
+      validateProviderDataSetResponse(
+        makeValidResponse([
+          makeValidProvider({
+            proofSets: [{ totalFaultedPeriods: "1" }],
+          }),
+        ]),
+      ),
+    ).toThrow("Invalid provider dataset response format");
+  });
+
+  it("validates multiple providers in a single response", () => {
+    const provider1 = makeValidProvider({ address: VALID_ADDRESS, totalFaultedPeriods: "5" });
+    const provider2 = makeValidProvider({
+      address: "0xAb5801a7D398351b8bE11C439e05C5B3259aeC9B",
+      totalFaultedPeriods: "15",
+    });
+
+    const result = validateProviderDataSetResponse(makeValidResponse([provider1, provider2]));
+
+    expect(result.providers).toHaveLength(2);
+    expect(result.providers[0].totalFaultedPeriods).toBe(5n);
+    expect(result.providers[1].totalFaultedPeriods).toBe(15n);
+  });
+
+  it("handles zero values correctly", () => {
+    const result = validateProviderDataSetResponse(
+      makeValidResponse([
+        makeValidProvider({
+          totalFaultedPeriods: "0",
+          totalProvingPeriods: "0",
+          proofSets: [
+            {
+              nextDeadline: "0",
+              maxProvingPeriod: "0",
+            },
+          ],
+        }),
+      ]),
+    );
+
+    expect(result.providers[0].totalFaultedPeriods).toBe(0n);
+    expect(result.providers[0].totalProvingPeriods).toBe(0n);
+    expect(result.providers[0].proofSets[0].maxProvingPeriod).toBe(0n);
+  });
+});
+
+describe("validateSubgraphMetaResponse", () => {
+  it("validates a well-formed subgraph meta response", () => {
+    const input = {
+      _meta: {
+        block: {
+          number: 12345,
+        },
+      },
+    };
+
+    const result = validateSubgraphMetaResponse(input);
+
+    expect(result._meta.block.number).toBe(12345);
+  });
+
+  it("accepts large block numbers", () => {
+    const input = {
+      _meta: {
+        block: {
+          number: 999999999,
+        },
+      },
+    };
+
+    const result = validateSubgraphMetaResponse(input);
+
+    expect(result._meta.block.number).toBe(999999999);
+  });
+
+  it("accepts numeric strings block number", () => {
+    const result = validateSubgraphMetaResponse({
+      _meta: {
+        block: {
+          number: "12345",
+        },
+      },
+    });
+
+    expect(result._meta.block.number).toBe(12345);
+  });
+
+  it("throws on missing _meta field", () => {
+    expect(() => validateSubgraphMetaResponse({})).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on missing block field", () => {
+    expect(() =>
+      validateSubgraphMetaResponse({
+        _meta: {},
+      }),
+    ).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on missing number field", () => {
+    expect(() =>
+      validateSubgraphMetaResponse({
+        _meta: {
+          block: {},
+        },
+      }),
+    ).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on null input", () => {
+    expect(() => validateSubgraphMetaResponse(null)).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on undefined input", () => {
+    expect(() => validateSubgraphMetaResponse(undefined)).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on negative block number", () => {
+    expect(() =>
+      validateSubgraphMetaResponse({
+        _meta: {
+          block: {
+            number: -1,
+          },
+        },
+      }),
+    ).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on floating point block number", () => {
+    expect(() =>
+      validateSubgraphMetaResponse({
+        _meta: {
+          block: {
+            number: 123.45,
+          },
+        },
+      }),
+    ).toThrow("Invalid subgraph meta response format");
+  });
+});
diff --git a/apps/backend/src/subgraph/types.ts b/apps/backend/src/subgraph/types.ts
new file mode 100644
index 00000000..3a89f360
--- /dev/null
+++ b/apps/backend/src/subgraph/types.ts
@@ -0,0 +1,252 @@
+import Joi from "joi";
+import { CID } from "multiformats/cid";
+import { Hex, isAddress } from "viem";
+
+// -----------------------------------------
+// Types
+// -----------------------------------------
+
+/** The response from the subgraph GraphQL query */
+export type GraphQLResponse = {
+  /** The data from the query */
+  data?: unknown;
+  /** The errors from the query */
+  errors?: { message: string }[];
+};
+
+/**
+ * Options for fetching providers with data sets
+ */
+export type ProvidersWithDataSetsOptions = {
+  addresses: string[];
+  blockNumber: number;
+};
+
+/**
+ * Validated response from the PDP subgraph meta query.
+ */
+export type SubgraphMeta = {
+  _meta: {
+    block: {
+      number: number;
+    };
+  };
+};
+
+/**
+ * A single proof set within a provider, representing deadline-related proving data.
+ * All numeric fields are bigints converted from the subgraph string representation.
+ */
+export type DataSet = {
+  nextDeadline: bigint;
+  maxProvingPeriod: bigint;
+};
+
+/**
+ * Validated and transformed response from the PDP subgraph providers query.
+ * Numeric fields are converted from subgraph string representation to bigint.
+ */
+export type ProviderDataSetResponse = {
+  providers: {
+    address: Hex;
+    totalFaultedPeriods: bigint;
+    totalProvingPeriods: bigint;
+    proofSets: DataSet[];
+  }[];
+};
+
+/** A piece eligible for anonymous retrieval. */
+export type AnonCandidatePiece = {
+  /** Decoded piece CID string (e.g. "bafk..."). */
+  pieceCid: string;
+  /** On-chain piece ID (rootId) as a decimal string. */
+  pieceId: string;
+  /** On-chain dataset ID (setId) as a decimal string. */
+  dataSetId: string;
+  /** Raw piece size in bytes, as a decimal string. */
+  rawSize: string;
+  /** True iff the parent dataset declared withIPFSIndexing metadata. */
+  withIPFSIndexing: boolean;
+  /** IPFS root CID declared by the client when uploading, or null. */
+  ipfsRootCid: string | null;
+  /** Subgraph-indexed block number at query time. */
+  indexedAtBlock: number;
+  /** pdpPaymentEndEpoch from the parent dataset, or null. */
+  pdpPaymentEndEpoch: bigint | null;
+};
+
+/**
+ * Validated raw shape of the anonymous piece sampling subgraph response.
+ * At most one root is returned (`first: 1`).
+ */
+export type RawSampleAnonPieceResponse = {
+  _meta: { block: { number: number } };
+  roots: Array<{
+    rootId: string;
+    cid: string;
+    rawSize: string;
+    ipfsRootCID: string | null;
+    proofSet: {
+      setId: string;
+      withIPFSIndexing: boolean;
+      fwssPayer: string | null;
+      pdpPaymentEndEpoch: string | null;
+    };
+  }>;
+};
+
+// -----------------------------------------
+// Helpers
+// -----------------------------------------
+
+/**
+ * Decodes a hex-encoded CID (0x...) into its string representation.
+ */
+export function decodePieceCid(hexData: string): string {
+  const bytes = Buffer.from(hexData.slice(2), "hex");
+  return CID.decode(new Uint8Array(bytes)).toString();
+}
+
+// -----------------------------------------
+// Joi Custom Schema Converters
+// -----------------------------------------
+
+/** Joi custom validator that converts a numeric string to bigint. */
+const toBigInt = (value: unknown, helpers: Joi.CustomHelpers) => {
+  try {
+    return BigInt(value as string);
+  } catch {
+    return helpers.error("any.invalid", {
+      message: "Invalid bigint value",
+    });
+  }
+};
+
+/** Joi custom validator to validate an Ethereum address and normalize to lowercase. */
+const toEthereumAddress = (value: unknown, helpers: Joi.CustomHelpers) => {
+  if (!isAddress(value as string)) {
+    return helpers.error("any.invalid", { message: "Invalid Ethereum address" });
+  }
+
+  // Normalize to lowercase for consistent key lookups
+  return (value as string).toLowerCase() as Hex;
+};
+
+// -----------------------------------------
+// Joi Schemas
+// -----------------------------------------
+
+const metaSchema = Joi.object({
+  _meta: Joi.object({
+    block: Joi.object({
+      number: Joi.number().integer().positive().required(),
+    })
+      .unknown(true)
+      .required(),
+  })
+    .unknown(true)
+    .required(),
+})
+  .unknown(true)
+  .required();
+
+const dataSetSchema = Joi.object({
+  nextDeadline: Joi.string().pattern(/^\d+$/).required().custom(toBigInt),
+  maxProvingPeriod: Joi.string().pattern(/^\d+$/).required().custom(toBigInt),
+}).unknown(true);
+
+const providerDataSetResponseSchema = Joi.object({
+  providers: Joi.array()
+    .items(
+      Joi.object({
+        address: Joi.string().required().custom(toEthereumAddress),
+        totalFaultedPeriods: Joi.string().pattern(/^\d+$/).required().custom(toBigInt),
+        totalProvingPeriods: Joi.string().pattern(/^\d+$/).required().custom(toBigInt),
+        proofSets: Joi.array().items(dataSetSchema).required(),
+      }).unknown(true),
+    )
+    .required(),
+})
+  .unknown(true)
+  .required();
+
+const sampleRootProofSetSchema = Joi.object({
+  setId: Joi.string().pattern(/^\d+$/).required(),
+  withIPFSIndexing: Joi.boolean().required(),
+  fwssPayer: Joi.string()
+    .pattern(/^0x[0-9a-fA-F]{40}$/)
+    .allow(null)
+    .optional(),
+  pdpPaymentEndEpoch: Joi.string().pattern(/^\d+$/).allow(null).optional(),
+}).unknown(true);
+
+const sampleRootSchema = Joi.object({
+  rootId: Joi.string().pattern(/^\d+$/).required(),
+  cid: Joi.string()
+    .pattern(/^0x[0-9a-fA-F]+$/)
+    .required(),
+  rawSize: Joi.string().pattern(/^\d+$/).required(),
+  ipfsRootCID: Joi.string().allow(null).optional(),
+  proofSet: sampleRootProofSetSchema.required(),
+}).unknown(true);
+
+const sampleAnonPieceResponseSchema = Joi.object({
+  _meta: Joi.object({
+    block: Joi.object({
+      number: Joi.number().integer().positive().required(),
+    })
+      .unknown(true)
+      .required(),
+  })
+    .unknown(true)
+    .required(),
+  roots: Joi.array().items(sampleRootSchema).max(1).required(),
+})
+  .unknown(true)
+  .required();
+
+// -----------------------------------------
+// Validator Functions
+// -----------------------------------------
+
+/**
+ * Validates a raw subgraph meta response into SubgraphMeta.
+ *
+ * @param value - The raw parsed JSON from the subgraph
+ * @throws Error if validation fails
+ */
+export function validateSubgraphMetaResponse(value: unknown): SubgraphMeta {
+  const { error, value: validated } = metaSchema.validate(value, { abortEarly: false });
+  if (error) {
+    throw new Error(`Invalid subgraph meta response format: ${error.message}`);
+  }
+  return validated as SubgraphMeta;
+}
+
+/**
+ * Validates and transforms a raw subgraph response into ProviderDataSetResponse.
+ * Converts string fields to bigint.
+ *
+ * @param value - The raw parsed JSON from the subgraph
+ * @throws Error if validation fails
+ */
+export function validateProviderDataSetResponse(value: unknown): ProviderDataSetResponse {
+  const { error, value: validated } = providerDataSetResponseSchema.validate(value, { abortEarly: false });
+  if (error) {
+    throw new Error(`Invalid provider dataset response format: ${error.message}`);
+  }
+  return validated as ProviderDataSetResponse;
+}
+
+/**
+ * Validates the raw sampleAnonPiece response from the subgraph.
+ *
+ * @throws Error if validation fails
+ */
+export function validateSampleAnonPieceResponse(value: unknown): RawSampleAnonPieceResponse {
+  const { error, value: validated } = sampleAnonPieceResponseSchema.validate(value, { abortEarly: false });
+  if (error) {
+    throw new Error(`Invalid sampleAnonPiece response format: ${error.message}`);
+  }
+  return validated as RawSampleAnonPieceResponse;
+}
diff --git a/docs/checks/README.md b/docs/checks/README.md
index 74b1a872..136349ee 100644
--- a/docs/checks/README.md
+++ b/docs/checks/README.md
@@ -4,6 +4,7 @@ The files are:
 - [production-configuration-and-approval-methodology.md](./production-configuration-and-approval-methodology.md): Defines the production configuration and approval methodology.
 - [data-storage.md](./data-storage.md): Defines the "data storage check" and how it is calculated.
 - [retrievals.md](./retrievals.md): Defines the "retrieval check" and how it is calculated.
+- [anon-retrievals.md](./anon-retrievals.md): Defines the "anonymous retrieval check" (sampled public pieces, not dealbot-uploaded) and how it is calculated.
 - [data-retention.md](./data-retention.md): Defines the "data retention check" and how it is calculated.
 - [events-and-metrics.md](./events-and-metrics.md): Defines the events and metrics that are used to assess SP performance.
 
@@ -14,7 +15,7 @@ DealBot creates synthetic traffic for SPs in the onchain SP registry and monitor
 
 ## Terminology
 ### Check
-A "check" refers to a task type that dealbot performs on a SP.  We currently have [Data Storage](./data-storage.md) and [Retrieval](./retrievals.md) checks.
+A "check" refers to a task type that dealbot performs on an SP. We currently have [Data Storage](./data-storage.md), [Retrieval](./retrievals.md), [Anonymous Retrieval](./anon-retrievals.md), and [Data Retention](./data-retention.md) checks.
 
 ### Deal
 This is synonym for "Data Storage Check".  This is covered in the [data-storage.md](./data-storage.md).
diff --git a/docs/checks/anon-retrievals.md b/docs/checks/anon-retrievals.md
new file mode 100644
index 00000000..c3b69610
--- /dev/null
+++ b/docs/checks/anon-retrievals.md
@@ -0,0 +1,145 @@
+# Anonymous Retrieval Check
+
+This document is the **source of truth** for how dealbot's Anonymous Retrieval check works.
+
+Source code links throughout this document point to the current implementation.
+
+For event and metric definitions to be used by the dashboard, see [Dealbot Events & Metrics](./events-and-metrics.md).
+
+## Overview
+
+The Anonymous Retrieval check (sometimes referred to internally as [retrieval++](https://github.com/FilOzone/dealbot/pull/427)) tests publicly discoverable pieces on a storage provider (pieces that were *not* uploaded by dealbot). The intent is to measure SP retrievability against real-world tenant data, not just dealbot's own corpus.
+
+This is distinct from the [Retrieval check](./retrievals.md), which exercises pieces dealbot itself uploaded as part of a [Data Storage check](./data-storage.md). The Anonymous Retrieval check answers a different question: does the SP serve arbitrary pieces from its broader public corpus, with the same correctness and performance properties as dealbot's controlled pieces?
+
+### Definition of Successful Retrieval
+
+A successful anonymous retrieval requires:
+
+1. **Piece fetch** — `GET {spBaseUrl}/piece/{pieceCid}` returns HTTP 2xx and the response bytes hash to the declared CommP (piece CID).
+
+If the piece advertises IPFS indexing (`withIPFSIndexing = true` and a non-null `ipfsRootCid`), three additional dimensions are validated *independently*. Importantly, they do not gate the overall `piece_fetch_status`, and each is recorded as its own outcome column / metric:
+
+2. **CAR parseable:** the fetched bytes parse as a CAR file.
+3. **IPNI:** the SP is advertised as a provider for the root CID and a sample of child CIDs via filecoinpin.contact.
+4. **Block fetch:** a sample of CIDs from the parsed CAR is re-fetched via `{spBaseUrl}/ipfs/{cid}?format=raw` and each response is hash-verified against its declared CID.
+
+A piece without IPFS indexing is exercised only at step (1).
+
+Operational timeouts exist to prevent jobs from running indefinitely. If the job exceeds `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS`, it is aborted; a row is still emitted so that partial metrics (TTFB, bytes, response code) are not lost.
+
+## Piece Selection
+
+Unlike the [Retrieval check](./retrievals.md#piece-selection), dealbot does not retrieve from its own deals. Pieces are sampled from the [on-chain subgraph](../../src/subgraph) of all FWSS-served pieces for the SP under test.
+
+Selection strategy (per scheduled job, per SP):
+
+1. **Pick a size bucket** by weighted random:
+   - `small` (1–20 MiB) — 20%
+   - `medium` (20–100 MiB) — 50%
+   - `large` (100–500 MiB) — 30%
+2. **Pick a pool**:
+   - `indexed` (IPFS-indexed pieces) — 80%
+   - `any` (all FWSS pieces) — 20%
+3. **Generate a uniform-random `sampleKey`** and query the subgraph for the smallest `Root.sampleKey ≥ $sampleKey` matching the SP, payer, size range, and pool filters.
+4. **Drop the candidate** if `pdpPaymentEndEpoch` has passed.
+5. **Fall back** through: (same bucket, opposite pool) → (any bucket, indexed) → (any bucket, any).
+
+The 80/20 split for `indexed` vs `any` exists so that SPs cannot optimize only their CAR corpus and still appear healthy on this check.
+
+> [!NOTE]
+> The bucket sizes were chosen such that the whole file will still fit into memory. In the future we may implement a streaming verification and parsing.
+
+Source: [`anon-piece-selector.service.ts`](../../apps/backend/src/retrieval-anon/anon-piece-selector.service.ts)
+
+## What Happens Each Cycle
+
+```mermaid
+flowchart TD
+  Select["Sample anonymous piece for SP from subgraph"] --> Fetch["GET /piece/{pieceCid}"]
+  Fetch --> CommP["Hash bytes → verify CommP"]
+  CommP --> HasIpfs{"piece.withIPFSIndexing<br/>and ipfsRootCid?"}
+  HasIpfs -- "no" --> Record["Persist Clickhosue row + emit Prometheus metrics"]
+  HasIpfs -- "yes" --> ParseCar["Parse bytes as CAR"]
+  ParseCar --> SampleBlocks["Pick N random CIDs<br/>(ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT)"]
+  SampleBlocks --> Ipni["IPNI: verify SP advertises root + sampled CIDs"]
+  SampleBlocks --> BlockFetch["GET /ipfs/{cid}?format=raw for each sampled CID"]
+  BlockFetch --> HashCheck["Hash-verify each response against its CID"]
+  Ipni --> Record
+  HashCheck --> Record
+```
+
+### Piece Fetch
+
+- **URL:** `{spBaseUrl}/piece/{pieceCid}` (HTTP/2)
+- **Buffered in memory** — piece sizes are capped at 500 MiB by selection.
+- **Validates CommP** — the CommP of the response bytes must match `pieceCid`.
+
+Source: [`piece-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/piece-retrieval.service.ts)
+
+### CAR Validation (only when piece advertises IPFS indexing)
+
+When the selected piece has `withIPFSIndexing = true` and a non-null `ipfsRootCid`, the fetched bytes are parsed as a CAR and a random sample of `ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT` CIDs is exercised:
+
+- **IPNI check:** `IpniVerificationService.verify(rootCid, sampledCids, sp)` polls filecoinpin.contact until each CID resolves to the SP under test, the timeout fires, or `IPNI_VERIFICATION_TIMEOUT_MS` is reached.
+- **Block fetch check:** for each sampled CID, fetch `{spBaseUrl}/ipfs/{cid}?format=raw` and hash-verify the response against the CID. Non-2xx, hash mismatch, unsupported codec, or transport errors all count as a single failed block.
+
+Source: [`car-validation.service.ts`](../../apps/backend/src/retrieval-anon/car-validation.service.ts)
+
+## What Gets Asserted
+
+| # | Assertion | How It's Checked | Retries | Relevant Metric | Implemented? |
+|---|-----------|------------------|:---:|------------------|:---:|
+| 1 | SP serves the piece | `GET /piece/{pieceCid}` returns HTTP 2xx | 0 | [`anonPieceRetrievalLastByteMs`](./events-and-metrics.md#anonPieceRetrievalLastByteMs) | Yes |
+| 2 | Bytes match the declared CommP | Hash of response bytes equals `pieceCid` | 0 | [`anonPieceRetrievalStatus`](./events-and-metrics.md#anonPieceRetrievalStatus) | Yes |
+| 3 | Bytes parse as a CAR (IPFS-indexed pieces only) | `@ipld/car` parses the response | 0 | [`anonCarParseStatus`](./events-and-metrics.md#anonCarParseStatus) | Yes |
+| 4 | SP is advertised on IPNI for root + sampled CIDs | filecoinpin.contact returns provider records | polling until timeout | [`anonIpniStatus`](./events-and-metrics.md#anonIpniStatus) | Yes |
+| 5 | Sampled blocks fetch + hash-verify | `/ipfs/{cid}?format=raw` for each sample | 0 | [`anonBlockFetchStatus`](./events-and-metrics.md#anonBlockFetchStatus) | Yes |
+
+## Result Recording
+
+Each anonymous retrieval attempt writes one row to the `anon_retrieval_checks` ClickHouse table. The row is emitted **even on abort or unexpected error** so that the partial evidence (TTFB, bytes, response code) is preserved.
+
+The DDL and column-level comments in [`clickhouse.schema.ts`](../../apps/backend/src/clickhouse/clickhouse.schema.ts) are authoritative. The summary below is for orientation.
+
+| Column | Meaning |
+|--------|---------|
+| `timestamp` | When the check started (ms UTC) |
+| `probe_location` | Dealbot probe location (`DEALBOT_PROBE_LOCATION`) |
+| `sp_address`, `sp_id`, `sp_name` | SP identity |
+| `retrieval_id` | Per-event UUID; correlates row to logs and Prometheus |
+| `piece_cid`, `data_set_id`, `piece_id`, `raw_size` | Sampled piece identity |
+| `with_ipfs_indexing`, `ipfs_root_cid` | Whether the piece advertises IPNI metadata |
+| `service_type` | Always `direct_sp` today |
+| `retrieval_endpoint` | URL probed for piece fetch |
+| `piece_fetch_status` | `success` or `failed` — outcome of `/piece/{cid}` (HTTP 2xx **and** CommP match). CAR/IPNI/block-fetch outcomes live in their own columns and do **not** flip this status. |
+| `http_response_code` | Raw HTTP status; null on transport failure |
+| `first_byte_ms`, `last_byte_ms`, `bytes_retrieved`, `throughput_bps` | Piece-fetch performance |
+| `commp_valid` | Null when retrieval failed before CommP could be hashed |
+| `car_parseable`, `car_block_count` | Null when CAR validation was skipped (no IPFS indexing or piece fetch failed) |
+| `block_fetch_endpoint`, `block_fetch_valid`, `block_fetch_sampled_count`, `block_fetch_failed_count` | Block-fetch outcomes; null when skipped |
+| `ipni_status` | `valid` \| `invalid` \| `skipped` \| `error` |
+| `ipni_verify_ms`, `ipni_verified_cids_count`, `ipni_unverified_cids_count` | IPNI check details |
+| `error_message` | Failure reason; null on success |
+
+Source: [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts)
+
+## Metrics Recorded
+
+Anonymous-retrieval Prometheus metric definitions live in [Dealbot Events & Metrics](./events-and-metrics.md). All anon-retrieval metrics carry `checkType=anon_retrieval`.
+
+## Configuration
+
+Key environment variables that control anonymous retrieval testing:
+
+| Variable | Description |
+|----------|-------------|
+| `RETRIEVALS_ANON_PER_SP_PER_HOUR` | Anonymous retrieval rate per SP. Falls back to `RETRIEVALS_PER_SP_PER_HOUR` when unset. |
+| `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS` | Max end-to-end anon retrieval job runtime before forced abort (default 360s). |
+| `ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT` | Number of CIDs sampled from the parsed CAR for IPNI + block-fetch verification (default 5, max 50). |
+| `IPNI_VERIFICATION_TIMEOUT_MS` | Max time to wait for IPNI provider verification (shared with the Retrieval check). |
+| `IPNI_VERIFICATION_POLLING_MS` | Poll interval between IPNI verification attempts (shared). |
+| `CONNECT_TIMEOUT_MS` | Connection/header timeout for HTTP requests. |
+| `HTTP2_REQUEST_TIMEOUT_MS` | Total timeout for HTTP/2 retrieval requests. |
+
+See also: [`docs/environment-variables.md`](../environment-variables.md) for the full configuration reference.
diff --git a/docs/checks/events-and-metrics.md b/docs/checks/events-and-metrics.md
index 6c461f7f..37761e89 100644
--- a/docs/checks/events-and-metrics.md
+++ b/docs/checks/events-and-metrics.md
@@ -6,6 +6,16 @@ This document is the intended **source of truth** for the events emitted by deal
 
 ## Data Storage Event Model
 
+The [Anonymous Retrieval check](./anon-retrievals.md) is a single-shot flow per piece: select → fetch piece → (optional) parse CAR + IPNI + block fetch → write one ClickHouse row.
+
+It is not modeled as a sequence of named lifecycle events. Instead it emits:
+
+- **Outcome metrics** when each step completes — see the [time](#time-related-metrics) and [status](#status-count-related-metrics) metric tables for `anonPieceRetrievalFirstByteMs`, `anonRetrievalCheckMs`, `anonPieceRetrievalStatus`, `anonCarParseStatus`, `anonIpniStatus`, `anonBlockFetchStatus`, and friends.
+- **One row per attempt** in the `anon_retrieval_checks` [ClickHouse table](#clickhouse-tables), emitted even on abort or unexpected error.
+- **Structured log lines** (`anon_retrieval_started`, `anon_retrieval_completed`, `anon_retrieval_no_piece`, `anon_retrieval_car_validation_failed`, `anon_retrieval_clickhouse_insert_failed`) carrying a `retrievalId` so each row can be joined back to log evidence.
+
+## Data Storage Event Model
+
 Below are the sequence of events for a [Data Storage check](./data-storage.md).  The Data Storage flow is used because it encapsulates a [Retrieval check](./retrievals.md) as well.
 
 ### Data Storage Event Timeline
@@ -87,6 +97,10 @@ sequenceDiagram
 | <a id="dataStorageCheckMs"></a>`dataStorageCheckMs` | Data Storage | [`uploadToSpStart`](#uploadToSpStart) | [`ipfsRetrievalIntegrityChecked`](#ipfsRetrievalIntegrityChecked) | Duration of a Data Storage check | |
 | <a id="retrievalCheckMs"></a>`retrievalCheckMs` | Retrieval | Retrieval check start | [`ipfsRetrievalIntegrityChecked`](#ipfsRetrievalIntegrityChecked) | Duration of a Retrieval check | |
 | <a id="dataSetCreationMs"></a>`dataSetCreationMs` | Data-Set Creation | Data-set creation uploadToSpStart | Data-set creation pieceConfirmed | Duration of one data-set creation with confirmed piece (all using `createDataSetWithPiece`) | [`deal.service.ts`](../../apps/backend/src/deal/deal.service.ts) |
+| <a id="anonPieceRetrievalFirstByteMs"></a>`anonPieceRetrievalFirstByteMs` | Anonymous Retrieval | Piece fetch start | First byte received from `/piece/{pieceCid}` | Time to first byte for anonymous piece retrievals | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonPieceRetrievalLastByteMs"></a>`anonPieceRetrievalLastByteMs` | Anonymous Retrieval | Piece fetch start | Last byte received from `/piece/{pieceCid}` | Total time to retrieve an anonymous piece | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonPieceRetrievalThroughputBps"></a>`anonPieceRetrievalThroughputBps` | Anonymous Retrieval | n/a | n/a | `(bytesRetrieved / anonPieceRetrievalLastByteMs) * 1000` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonRetrievalCheckMs"></a>`anonRetrievalCheckMs` | Anonymous Retrieval | Anon retrieval check start | After CAR/IPNI/block-fetch validation completes (or on abort) | End-to-end anonymous retrieval check duration | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 
 
 ### Status Count Related Metrics
@@ -106,6 +120,11 @@ sequenceDiagram
 | <a id="dataSetCreationStatus"></a>`dataSetCreationStatus` | Data-Set Creation | Not tied to an [event above](#event-list) but rather to data-set creation start (`pending`) and completion (`success`/`failure.*`) | `pending`, `success`, `failure.timedout`, `failure.other` | [`deal.service.ts`](../../apps/backend/src/deal/deal.service.ts) |
 | <a id="dataSetChallengeStatus"></a>`dataSetChallengeStatus` | Data Retention | Emitted on each [Data Retention Check](./data-retention.md) poll when a provider's confirmed proving-period totals advance (strictly positive deltas). Unit: **challenges** (period delta × `CHALLENGES_PER_PROVING_PERIOD = 5`). | `success` (challenges in successfully-proven periods), `failure` (challenges in faulted periods) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
 | <a id="pdp_provider_estimated_overdue_periods"></a>`pdp_provider_estimated_overdue_periods` | Data Retention | Emitted on every [Data Retention Check](./data-retention.md) poll for every successfully processed provider. | Gauge value in proving periods (non-negative integer) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
+| <a id="anonPieceRetrievalStatus"></a>`anonPieceRetrievalStatus` | Anonymous Retrieval | After piece fetch completes (or on abort) | `success` (HTTP 2xx **and** CommP matches), `failure.http`, `failure.commp` (HTTP 2xx but bytes hashed to a different CID), `failure.timedout`, `failure.no_piece`. | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonPieceHttpResponseCode"></a>`anonPieceHttpResponseCode` | Anonymous Retrieval | After piece fetch completes | `200`, `500`, `2xxSuccess`, `4xxClientError`, `5xxServerError`, `otherHttpStatusCodes`, `failure` (same classifier as [`ipfsRetrievalHttpResponseCode`](#ipfsRetrievalHttpResponseCode)) | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonCarParseStatus"></a>`anonCarParseStatus` | Anonymous Retrieval | After CAR validation runs (skipped when piece fetch failed or piece is not IPFS-indexed) | `parseable`, `not_parseable` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonIpniStatus"></a>`anonIpniStatus` | Anonymous Retrieval | After CAR validation runs, **or** when piece fetch failed (records `skipped`) | `valid`, `invalid`, `skipped`, `error` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonBlockFetchStatus"></a>`anonBlockFetchStatus` | Anonymous Retrieval | After block-fetch sampling runs, **or** when piece fetch failed (records `skipped`) | `valid`, `invalid`, `skipped`, `error` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 
 ## ClickHouse Tables
 
@@ -115,6 +134,7 @@ When `CLICKHOUSE_URL` is configured, dealbot writes one row per check result to
 
 - **`data_storage_checks`** — one row written each time a deal is saved (on every status transition). Populated by [`deal.service.ts`](../../apps/backend/src/deal/deal.service.ts).
 - **`retrieval_checks`** — one row per retrieval attempt. Populated by [`retrieval.service.ts`](../../apps/backend/src/retrieval/retrieval.service.ts).
+- **`anon_retrieval_checks`** — one row per [Anonymous Retrieval check](./anon-retrievals.md) attempt; emitted even on abort or unexpected error. Populated by [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts). See [Anonymous Retrieval § Result Recording](./anon-retrievals.md#result-recording) for column-level meanings.
 - **`data_retention_challenges`** — one row per provider per poll cycle. Populated by [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts).
 
 All tables share the primary key `(probe_location, sp_address, timestamp)`:
diff --git a/docs/checks/production-configuration-and-approval-methodology.md b/docs/checks/production-configuration-and-approval-methodology.md
index 5566904d..2e89a45d 100644
--- a/docs/checks/production-configuration-and-approval-methodology.md
+++ b/docs/checks/production-configuration-and-approval-methodology.md
@@ -41,7 +41,7 @@ Relevant parameters include:
 | Parameter | Value | Notes |
 |-----------|-------|-------|
 | [`PDP_SUBGRAPH_ENDPOINT`](../environment-variables.md#pdp_subgraph_endpoint) | TODO: fill this in | Uses the subgraph from [pdp-explorer](https://github.com/FilOzone/pdp-explorer). |
-| [`MIN_NUM_DATASETS_FOR_CHECKS`](../environment-variables.md#dataset-configuration) | 15 | Ensure there are enough datasets with pieces being added so that statistical significance for [Data Retention Fault Rate](#data-retention-fault-rate) can be achieved quicker. Note that on mainnet each dataset incurs 5 challenges[^1] per daily proof[^2]. With this many datasets, an SP can be approved for data retention after a faultless ~7 days even if the SP doesn't have other datasets. |
+| [`MIN_NUM_DATASETS_FOR_CHECKS`](../environment-variables.md#dataset-configuration) | 15    | Ensure there are enough datasets with pieces being added so that statistical significance for [Data Retention Fault Rate](#data-retention-fault-rate) can be achieved quicker. Note that on mainnet each dataset incurs 5 challenges[^1] per daily proof[^2]. With this many datasets, an SP can be approved for data retention after a faultless ~7 days even if the SP doesn't have other datasets. |
 
 See [How are data retention statistics/thresholds calculated?](#how-are-data-retention-statisticsthresholds-calculated) for more details.
 
diff --git a/docs/environment-variables.md b/docs/environment-variables.md
index 359d86da..72fadca0 100644
--- a/docs/environment-variables.md
+++ b/docs/environment-variables.md
@@ -8,10 +8,10 @@ This document provides a comprehensive guide to all environment variables used b
 | ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | [Application](#application-configuration) | `NODE_ENV`, `DEALBOT_PORT`, `DEALBOT_HOST`, `DEALBOT_RUN_MODE`, `DEALBOT_METRICS_PORT`, `DEALBOT_METRICS_HOST`, `DEALBOT_ALLOWED_ORIGINS`, `ENABLE_DEV_MODE` |
 | [Database](#database-configuration)       | `DATABASE_HOST`, `DATABASE_PORT`, `DATABASE_POOL_MAX`, `DATABASE_USER`, `DATABASE_PASSWORD`, `DATABASE_NAME`                                                 |
-| [Blockchain](#blockchain-configuration)   | `NETWORK`, `RPC_URL`, `WALLET_ADDRESS`, `WALLET_PRIVATE_KEY`, `SESSION_KEY_PRIVATE_KEY`, `CHECK_DATASET_CREATION_FEES`, `USE_ONLY_APPROVED_PROVIDERS`, `PDP_SUBGRAPH_ENDPOINT` |
+| [Blockchain](#blockchain-configuration)   | `NETWORK`, `RPC_URL`, `WALLET_ADDRESS`, `WALLET_PRIVATE_KEY`, `SESSION_KEY_PRIVATE_KEY`, `CHECK_DATASET_CREATION_FEES`, `USE_ONLY_APPROVED_PROVIDERS`, `PDP_SUBGRAPH_ENDPOINT`, `SUBGRAPH_ENDPOINT` |
 | [Dataset Versioning](#dataset-versioning) | `DEALBOT_DATASET_VERSION`                                                                                                                                    |
 | [Scheduling](#scheduling-configuration)   | `PROVIDERS_REFRESH_INTERVAL_SECONDS`, `DATA_RETENTION_POLL_INTERVAL_SECONDS`, `DEALBOT_MAINTENANCE_WINDOWS_UTC`, `DEALBOT_MAINTENANCE_WINDOW_MINUTES`                                                                                                                                 |
-| [Jobs (pg-boss)](#jobs-pg-boss)           | `DEALBOT_PGBOSS_SCHEDULER_ENABLED`, `DEALBOT_PGBOSS_POOL_MAX`, `DEALS_PER_SP_PER_HOUR`, `DATASET_CREATIONS_PER_SP_PER_HOUR`, `RETRIEVALS_PER_SP_PER_HOUR`,  `JOB_SCHEDULER_POLL_SECONDS`, `JOB_WORKER_POLL_SECONDS`, `PG_BOSS_LOCAL_CONCURRENCY`, `JOB_CATCHUP_MAX_ENQUEUE`, `JOB_SCHEDULE_PHASE_SECONDS`, `JOB_ENQUEUE_JITTER_SECONDS`, `DEAL_JOB_TIMEOUT_SECONDS`, `RETRIEVAL_JOB_TIMEOUT_SECONDS`, `IPFS_BLOCK_FETCH_CONCURRENCY` |
+| [Jobs (pg-boss)](#jobs-pg-boss)           | `DEALBOT_PGBOSS_SCHEDULER_ENABLED`, `DEALBOT_PGBOSS_POOL_MAX`, `DEALS_PER_SP_PER_HOUR`, `DATASET_CREATIONS_PER_SP_PER_HOUR`, `RETRIEVALS_PER_SP_PER_HOUR`, `RETRIEVALS_ANON_PER_SP_PER_HOUR`, `JOB_SCHEDULER_POLL_SECONDS`, `JOB_WORKER_POLL_SECONDS`, `PG_BOSS_LOCAL_CONCURRENCY`, `JOB_CATCHUP_MAX_ENQUEUE`, `JOB_SCHEDULE_PHASE_SECONDS`, `JOB_ENQUEUE_JITTER_SECONDS`, `DEAL_JOB_TIMEOUT_SECONDS`, `RETRIEVAL_JOB_TIMEOUT_SECONDS`, `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS`, `ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT`, `IPFS_BLOCK_FETCH_CONCURRENCY` |
 | [Dataset](#dataset-configuration)         | `DEALBOT_LOCAL_DATASETS_PATH`, `RANDOM_PIECE_SIZES`                                                                                                          |
 | [ClickHouse](#clickhouse-configuration)   | `CLICKHOUSE_URL`, `CLICKHOUSE_BATCH_SIZE`, `CLICKHOUSE_FLUSH_INTERVAL_MS`, `DEALBOT_PROBE_LOCATION`          |
 | [Timeouts](#timeout-configuration)        | `CONNECT_TIMEOUT_MS`, `HTTP_REQUEST_TIMEOUT_MS`, `HTTP2_REQUEST_TIMEOUT_MS`, `IPNI_VERIFICATION_TIMEOUT_MS`, `IPNI_VERIFICATION_POLLING_MS`                   |
@@ -433,9 +433,11 @@ Session keys are scoped (only storage operations, not deposits or withdrawals) a
 
 **Role**: The Graph API endpoint for querying PDP (Proof of Data Possession) subgraph data. This endpoint is used to retrieve data retention info for provider data.
 
+This variable is kept distinct from [`SUBGRAPH_ENDPOINT`](#subgraph_endpoint) so the [dealbot-owned subgraph](../../src/subgraph) can be rolled out incrementally. Only the newer [anonymous-retrieval check](./checks/anon-retrievals.md) points at the new endpoint while the established [data-retention check](./checks/data-retention.md) stays on the upstream subgraph.
+
 **When to update**:
 
-- When switching between different Graph API endpoints
+- When switching between different Graph API endpoints for the pdp-explorer subgraph.
 
 **Example**:
 
@@ -445,6 +447,29 @@ PDP_SUBGRAPH_ENDPOINT=https://api.thegraph.com/subgraphs/filecoin/pdp
 
 ---
 
+### `SUBGRAPH_ENDPOINT`
+
+- **Type**: `string` (URL)
+- **Required**: No
+- **Default**: Empty string (feature disabled)
+
+**Role**: The Graph API endpoint for the dealbot-owned subgraph. Currently drives only the [anonymous-retrieval](./checks/anon-retrievals.md) candidate-piece query. Once the dealbot-owned subgraph has soaked in production it is intended to replace [`PDP_SUBGRAPH_ENDPOINT`](#pdp_subgraph_endpoint).
+
+The dealbot-owned subgraph lives at [`apps/subgraph/`](../apps/subgraph) (package `@dealbot/subgraph`) and is deployed to [Goldsky](https://goldsky.com).
+
+**When to update**:
+
+- When swapping between the dealbot-owned subgraph slots on Goldsky (mainnet vs calibnet).
+- When deploying a new subgraph version.
+
+**Example**:
+
+```bash
+SUBGRAPH_ENDPOINT=https://api.goldsky.com/api/public/<project>/subgraphs/dealbot-subgraph/<version>/gn
+```
+
+---
+
 ## Dataset Versioning
 
 ### `DEALBOT_DATASET_VERSION`
@@ -619,6 +644,19 @@ rate-based (per hour) and persisted in Postgres so restarts do not reset timing.
 
 ---
 
+### `RETRIEVALS_ANON_PER_SP_PER_HOUR`
+
+- **Type**: `number`
+- **Required**: No
+- **Default**: Falls back to `RETRIEVALS_PER_SP_PER_HOUR`, which itself defaults to `2`
+- **Limits**: `0.001` – `20`
+
+**Role**: Target [anonymous retrieval](./checks/anon-retrievals.md) check rate per storage provider. Anonymous retrievals fetch arbitrary FWSS pieces sampled from the on-chain subgraph (not pieces dealbot uploaded), so this rate controls coverage of the SP's broader public corpus independently of the dealbot-owned [retrieval check](./checks/retrievals.md) rate.
+
+**Notes**: Fractional values are supported. For example, `0.5` means one anon retrieval every 2 hours per storage provider.
+
+---
+
 ### `DATASET_CREATIONS_PER_SP_PER_HOUR`
 
 - **Type**: `number`
@@ -784,6 +822,50 @@ Use this to stagger multiple dealbot deployments that are not sharing a database
 
 **Note**: This is independent of HTTP-level timeouts. The job timeout enforces end-to-end execution time of a Retrieval Check job.
 
+---
+
+### `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS`
+
+- **Type**: `number`
+- **Required**: No
+- **Default**: `360` (6 minutes)
+- **Minimum**: `60`
+- **Enforced**: Yes (config validation)
+
+**Role**: Maximum runtime for anonymous retrieval jobs before forced abort. Anonymous retrievals fetch arbitrary pieces (up to ~500 MiB) that were not produced by the dealbot, so this is typically larger than `RETRIEVAL_JOB_TIMEOUT_SECONDS`. When the timeout trips, partial metrics (`ttfb_ms`, `bytes_retrieved`, `response_code`) are still persisted so the abort is not silently lost.
+
+**When to update**:
+
+- Increase if large pieces are consistently being cut off mid-download
+- Decrease to detect and fail stuck retrievals faster
+
+**Note**: This is independent of HTTP-level timeouts (`CONNECT_TIMEOUT_MS`, `HTTP2_REQUEST_TIMEOUT_MS`). The job timeout covers the end-to-end execution of an Anon Retrieval Check (piece selection, download, CommP validation, CAR/IPNI validation).
+
+---
+
+### `ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT`
+
+- **Type**: `number` (integer)
+- **Required**: No
+- **Default**: `5`
+- **Minimum**: `1`
+- **Maximum**: `50`
+- **Enforced**: Yes (config validation)
+
+**Role**: Number of CIDs randomly sampled from the parsed CAR for IPNI verification and block-fetch validation during an [anonymous retrieval check](./checks/anon-retrievals.md). Only applies to pieces with IPFS indexing enabled — pieces without an `ipfsRootCid` skip CAR validation entirely.
+
+For each sampled CID, dealbot:
+
+1. Confirms via filecoinpin.contact that the SP is advertised as a provider for the CID.
+2. Re-fetches the block via `{spBaseUrl}/ipfs/{cid}?format=raw` and hash-verifies the response.
+
+**When to update**:
+
+- Increase for stronger statistical confidence that the SP serves the entire DAG correctly (more IPNI queries + per-block fetches per check)
+- Decrease to reduce per-check load on the SP and on filecoinpin.contact
+
+**Note**: A higher sample count multiplies both IPNI traffic and block-fetch traffic per check. The IPNI step is all-or-nothing across the root CID and the sampled child CIDs — see [Anonymous Retrieval § CAR Validation](./checks/anon-retrievals.md#car-validation-only-when-piece-advertises-ipfs-indexing).
+
 ---
 ### `IPFS_BLOCK_FETCH_CONCURRENCY`
 
diff --git a/kustomize/overlays/local/backend-configmap-local.yaml b/kustomize/overlays/local/backend-configmap-local.yaml
index 9226d24e..52918aa2 100644
--- a/kustomize/overlays/local/backend-configmap-local.yaml
+++ b/kustomize/overlays/local/backend-configmap-local.yaml
@@ -27,6 +27,7 @@ data:
   JOB_WORKER_POLL_SECONDS: "60"
   RANDOM_PIECE_SIZES: "10485760"
   PDP_SUBGRAPH_ENDPOINT: "https://api.goldsky.com/api/public/project_cmdfaaxeuz6us01u359yjdctw/subgraphs/pdp-explorer/calibration311a/gn"
+  SUBGRAPH_ENDPOINT: "https://api.goldsky.com/api/public/project_cmdfaaxeuz6us01u359yjdctw/subgraphs/pdp-explorer/calibration311a/gn"
   JOB_SCHEDULER_POLL_SECONDS: "60"
   CLICKHOUSE_URL: "http://default:@dealbot-clickhouse:8123/dealbot"
   DEALBOT_PROBE_LOCATION: "local"