From c9bdfa4393b5cb28199c310ee81cea26a40c890a Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Tue, 21 Apr 2026 14:54:42 +0200
Subject: [PATCH 01/28] feat: anon piece selection and retrieval

---
 .gitignore                                    |   2 +
 apps/backend/.env.example                     |  17 +-
 apps/backend/README.md                        |   2 +-
 apps/backend/src/app.module.ts                |   2 +
 apps/backend/src/config/app.config.ts         |  86 +++++-
 .../data-retention/data-retention.module.ts   |   4 +-
 .../data-retention.service.spec.ts            | 182 ++++++-------
 .../data-retention/data-retention.service.ts  |  16 +-
 apps/backend/src/database/database.module.ts  |   9 +-
 .../entities/anon-retrieval.entity.ts         | 100 +++++++
 .../entities/job-schedule-state.entity.ts     |   1 +
 .../1762000000000-CreateAnonRetrievals.ts     |  64 +++++
 .../http-client/http-client.service.spec.ts   |  93 ++++++-
 .../src/http-client/http-client.service.ts    |  87 +++++--
 apps/backend/src/http-client/types.ts         |   2 +
 apps/backend/src/jobs/job-queues.ts           |   1 +
 apps/backend/src/jobs/jobs.module.ts          |   2 +
 apps/backend/src/jobs/jobs.service.spec.ts    | 128 ++++-----
 apps/backend/src/jobs/jobs.service.ts         | 101 +++++++-
 .../metrics-prometheus/check-metric-labels.ts |   2 +-
 .../check-metrics.service.ts                  |  63 +++++
 .../metrics-prometheus.module.ts              |  53 ++++
 .../src/pdp-subgraph/pdp-subgraph.module.ts   |   8 -
 apps/backend/src/pdp-subgraph/queries.ts      |  24 --
 .../anon-piece-selector.service.spec.ts       | 168 ++++++++++++
 .../anon-piece-selector.service.ts            | 208 +++++++++++++++
 .../anon-retrieval.service.spec.ts            | 189 ++++++++++++++
 .../retrieval-anon/anon-retrieval.service.ts  | 244 ++++++++++++++++++
 .../retrieval-anon/car-validation.service.ts  | 223 ++++++++++++++++
 .../retrieval-anon/piece-retrieval.service.ts | 195 ++++++++++++++
 .../retrieval-anon/retrieval-anon.module.ts   |  27 ++
 apps/backend/src/retrieval-anon/types.ts      |  35 +++
 apps/backend/src/subgraph/queries.ts          |  78 ++++++
 apps/backend/src/subgraph/subgraph.module.ts  |   8 +
 .../subgraph.service.spec.ts}                 | 167 +++++++++++-
 .../subgraph.service.ts}                      | 232 ++++++++++++++---
 .../{pdp-subgraph => subgraph}/types.spec.ts  |   0
 .../src/{pdp-subgraph => subgraph}/types.ts   | 101 ++++++++
 .../src/wallet-sdk/wallet-sdk.service.spec.ts |   2 +-
 docs/checks/data-retention.md                 |  10 +-
 ...-configuration-and-approval-methodology.md |   2 +-
 docs/environment-variables.md                 |  34 ++-
 .../local/backend-configmap-local.yaml        |   2 +-
 pnpm-lock.yaml                                |  36 +--
 44 files changed, 2683 insertions(+), 327 deletions(-)
 create mode 100644 apps/backend/src/database/entities/anon-retrieval.entity.ts
 create mode 100644 apps/backend/src/database/migrations/1762000000000-CreateAnonRetrievals.ts
 delete mode 100644 apps/backend/src/pdp-subgraph/pdp-subgraph.module.ts
 delete mode 100644 apps/backend/src/pdp-subgraph/queries.ts
 create mode 100644 apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
 create mode 100644 apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
 create mode 100644 apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
 create mode 100644 apps/backend/src/retrieval-anon/anon-retrieval.service.ts
 create mode 100644 apps/backend/src/retrieval-anon/car-validation.service.ts
 create mode 100644 apps/backend/src/retrieval-anon/piece-retrieval.service.ts
 create mode 100644 apps/backend/src/retrieval-anon/retrieval-anon.module.ts
 create mode 100644 apps/backend/src/retrieval-anon/types.ts
 create mode 100644 apps/backend/src/subgraph/queries.ts
 create mode 100644 apps/backend/src/subgraph/subgraph.module.ts
 rename apps/backend/src/{pdp-subgraph/pdp-subgraph.service.spec.ts => subgraph/subgraph.service.spec.ts} (79%)
 rename apps/backend/src/{pdp-subgraph/pdp-subgraph.service.ts => subgraph/subgraph.service.ts} (52%)
 rename apps/backend/src/{pdp-subgraph => subgraph}/types.spec.ts (100%)
 rename apps/backend/src/{pdp-subgraph => subgraph}/types.ts (58%)

diff --git a/.gitignore b/.gitignore
index fc72832b..cbf7f9d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,5 @@ coverage/
 # per-package lockfiles are stray
 apps/*/pnpm-lock.yaml
 !pnpm-lock.yaml
+
+.tool-versions
diff --git a/apps/backend/.env.example b/apps/backend/.env.example
index 6815a66f..26469c52 100644
--- a/apps/backend/.env.example
+++ b/apps/backend/.env.example
@@ -23,7 +23,8 @@ WALLET_ADDRESS=0x0000000000000000000000000000000000000000
 WALLET_PRIVATE_KEY=your_private_key_here
 CHECK_DATASET_CREATION_FEES=true
 USE_ONLY_APPROVED_PROVIDERS=true
-PDP_SUBGRAPH_ENDPOINT=https://api.thegraph.com/subgraphs/filecoin/pdp
+# Point at the dealbot-owned subgraph on Goldsky (see apps/subgraph/README.md).
+SUBGRAPH_ENDPOINT=https://api.goldsky.com/api/public/<project>/subgraphs/dealbot-subgraph/<version>/gn
 
 # Minimum number of datasets per SP (default: 1). When > 1, a separate data_set_creation job provisions extra datasets.
 MIN_NUM_DATASETS_FOR_CHECKS=1
@@ -52,6 +53,9 @@ DEALBOT_MAINTENANCE_WINDOW_MINUTES=20
 DEALS_PER_SP_PER_HOUR=2
 DATASET_CREATIONS_PER_SP_PER_HOUR=1
 RETRIEVALS_PER_SP_PER_HOUR=1
+RETRIEVALS_ANON_PER_SP_PER_HOUR=
+ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT=5
+METRICS_PER_HOUR=2
 PG_BOSS_LOCAL_CONCURRENCY=20
 JOB_SCHEDULER_POLL_SECONDS=300
 JOB_WORKER_POLL_SECONDS=60
@@ -60,6 +64,7 @@ JOB_SCHEDULE_PHASE_SECONDS=0
 JOB_ENQUEUE_JITTER_SECONDS=0
 DEAL_JOB_TIMEOUT_SECONDS=360          # 6m: Max runtime for deal jobs (TODO: reduce default to 3m)
 RETRIEVAL_JOB_TIMEOUT_SECONDS=60     # 1m: Max runtime for retrieval jobs (TODO: reduce default to 30s)
+ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS=360 # 6m: Max runtime for anon retrieval jobs (pieces up to ~70 MiB)
 IPFS_BLOCK_FETCH_CONCURRENCY=6       # Parallel block fetches when validating IPFS DAGs
 DEALBOT_PGBOSS_POOL_MAX=1
 DEALBOT_PGBOSS_SCHEDULER_ENABLED=true
@@ -73,9 +78,13 @@ PROXY_LIST=http://username:password@host:port,http://username:password@host:port
 PROXY_LOCATIONS=l1,l2
 
 # Timeout Configuration (in milliseconds)
-CONNECT_TIMEOUT_MS=10000             # 10s: Initial connection timeout
-HTTP_REQUEST_TIMEOUT_MS=240000       # 4m: Total transfer timeout for HTTP/1.1 (10MiB @ 170KB/s + overhead)
-HTTP2_REQUEST_TIMEOUT_MS=240000      # 4m: Total transfer timeout for HTTP/2 (10MiB @ 170KB/s + overhead)
+CONNECT_TIMEOUT_MS=10000             # 10s: Connection + response-headers timeout (scoped to the header phase only)
+# HTTP_REQUEST_TIMEOUT_MS and HTTP2_REQUEST_TIMEOUT_MS default to the longest job timeout above
+# (max of DEAL_/RETRIEVAL_/ANON_RETRIEVAL_/DATA_SET_CREATION_/MAX_PIECE_CLEANUP_ * 1000 ms) so the
+# HTTP-level ceiling never pre-empts a job-scoped AbortSignal. Only override when you have a non-job
+# caller of HttpClientService that needs a specific deadline.
+# HTTP_REQUEST_TIMEOUT_MS=360000
+# HTTP2_REQUEST_TIMEOUT_MS=360000
 
 # SP Blocklists configuration
 # BLOCKED_SP_IDS=1234,5678
diff --git a/apps/backend/README.md b/apps/backend/README.md
index 19ee970a..4805080f 100644
--- a/apps/backend/README.md
+++ b/apps/backend/README.md
@@ -104,7 +104,7 @@ All configuration is done via environment variables in `.env`.
 | `CHECK_DATASET_CREATION_FEES` | Check fees before dataset creation     | `true`                     |
 | `ENABLE_IPNI_TESTING`         | IPNI testing mode (`disabled`/`random`/`always`) | `always`          |
 | `USE_ONLY_APPROVED_PROVIDERS` | Only use approved storage providers    | `true`                     |
-| `PDP_SUBGRAPH_ENDPOINT`       | PDP subgraph API endpoint for PDP proof-set/data-retention | `https://api.thegraph.com/subgraphs/filecoin/pdp` |
+| `SUBGRAPH_ENDPOINT`           | Subgraph GraphQL endpoint for PDP proof-set/data-retention and anon-retrieval queries | `https://api.goldsky.com/api/public/<project>/subgraphs/dealbot-subgraph/<version>/gn` |
 
 ### Scheduling Configuration (pg-boss)
 
diff --git a/apps/backend/src/app.module.ts b/apps/backend/src/app.module.ts
index 569ec5e4..0580f339 100644
--- a/apps/backend/src/app.module.ts
+++ b/apps/backend/src/app.module.ts
@@ -13,6 +13,7 @@ import { JobsModule } from "./jobs/jobs.module.js";
 import { MetricsPrometheusModule } from "./metrics-prometheus/metrics-prometheus.module.js";
 import { ProvidersModule } from "./providers/providers.module.js";
 import { RetrievalModule } from "./retrieval/retrieval.module.js";
+import { RetrievalAnonModule } from "./retrieval-anon/retrieval-anon.module.js";
 
 @Module({
   imports: [
@@ -28,6 +29,7 @@ import { RetrievalModule } from "./retrieval/retrieval.module.js";
     JobsModule,
     DealModule,
     RetrievalModule,
+    RetrievalAnonModule,
     DataSourceModule,
     ProvidersModule,
     ...(process.env.ENABLE_DEV_MODE === "true" ? [DevToolsModule] : []),
diff --git a/apps/backend/src/config/app.config.ts b/apps/backend/src/config/app.config.ts
index b3b32a37..4e49e4d8 100644
--- a/apps/backend/src/config/app.config.ts
+++ b/apps/backend/src/config/app.config.ts
@@ -56,7 +56,7 @@ export const configValidationSchema = Joi.object({
   USE_ONLY_APPROVED_PROVIDERS: Joi.boolean().default(true),
   DEALBOT_DATASET_VERSION: Joi.string().optional(),
   MIN_NUM_DATASETS_FOR_CHECKS: Joi.number().integer().min(1).default(1),
-  PDP_SUBGRAPH_ENDPOINT: Joi.string().uri().optional().allow(""),
+  SUBGRAPH_ENDPOINT: Joi.string().uri().optional().allow(""),
 
   // Scheduling
   PROVIDERS_REFRESH_INTERVAL_SECONDS: Joi.number().default(4 * 3600),
@@ -80,6 +80,7 @@ export const configValidationSchema = Joi.object({
   DEALS_PER_SP_PER_HOUR: Joi.number().min(0.001).max(20).default(4),
   DATASET_CREATIONS_PER_SP_PER_HOUR: Joi.number().min(0.001).max(20).default(1),
   RETRIEVALS_PER_SP_PER_HOUR: Joi.number().min(0.001).max(20).default(2),
+  RETRIEVALS_ANON_PER_SP_PER_HOUR: Joi.number().min(0.001).max(20).optional(),
   // Polling interval for pg-boss scheduler (lower = more responsive, higher = less DB chatter).
   JOB_SCHEDULER_POLL_SECONDS: Joi.number().min(60).default(300),
   JOB_WORKER_POLL_SECONDS: Joi.number().min(5).default(60),
@@ -91,8 +92,10 @@ export const configValidationSchema = Joi.object({
   JOB_ENQUEUE_JITTER_SECONDS: Joi.number().min(0).default(0),
   DEAL_JOB_TIMEOUT_SECONDS: Joi.number().min(120).default(360), // 6 minutes max runtime for data storage jobs (TODO: reduce default to 3 minutes)
   RETRIEVAL_JOB_TIMEOUT_SECONDS: Joi.number().min(60).default(60), // 1 minute max runtime for retrieval jobs (TODO: reduce default to 30 seconds)
+  ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS: Joi.number().min(60).default(360), // 6 minutes max runtime for anon retrieval jobs (pieces can be up to ~70 MiB)
   DATA_SET_CREATION_JOB_TIMEOUT_SECONDS: Joi.number().min(60).default(300), // 5 minutes max runtime for dataset creation jobs
   IPFS_BLOCK_FETCH_CONCURRENCY: Joi.number().integer().min(1).max(32).default(6),
+  ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT: Joi.number().integer().min(1).max(50).default(5),
 
   // Piece Cleanup
   MAX_DATASET_STORAGE_SIZE_BYTES: Joi.number()
@@ -131,8 +134,9 @@ export const configValidationSchema = Joi.object({
 
   // Timeouts (in milliseconds)
   CONNECT_TIMEOUT_MS: Joi.number().min(1000).default(10000), // 10 seconds to establish connection/receive headers
-  HTTP_REQUEST_TIMEOUT_MS: Joi.number().min(1000).default(240000), // 4 minutes total for HTTP requests (10MiB @ 170KB/s + overhead)
-  HTTP2_REQUEST_TIMEOUT_MS: Joi.number().min(1000).default(240000), // 4 minutes total for HTTP/2 requests (10MiB @ 170KB/s + overhead)
+  // Defaults intentionally omitted so loadConfig can derive them from the longest job timeout.
+  HTTP_REQUEST_TIMEOUT_MS: Joi.number().min(1000).optional(),
+  HTTP2_REQUEST_TIMEOUT_MS: Joi.number().min(1000).optional(),
   IPNI_VERIFICATION_TIMEOUT_MS: Joi.number().min(1000).default(60000), // 60 seconds max time to wait for IPNI verification
   IPNI_VERIFICATION_POLLING_MS: Joi.number().min(250).default(2000), // 2 seconds between IPNI verification polls
 
@@ -173,7 +177,7 @@ export interface IBlockchainConfig {
   useOnlyApprovedProviders: boolean;
   dealbotDataSetVersion?: string;
   minNumDataSetsForChecks: number;
-  pdpSubgraphEndpoint?: string;
+  subgraphEndpoint?: string;
 }
 
 export interface ISchedulingConfig {
@@ -264,6 +268,14 @@ export interface IJobsConfig {
    * Uses AbortController to actively cancel job execution.
    */
   retrievalJobTimeoutSeconds: number;
+  /**
+   * Maximum runtime (seconds) for anonymous retrieval jobs before forced abort.
+   *
+   * Anonymous retrievals fetch arbitrary pieces (up to ~70 MiB), so this is
+   * typically larger than `retrievalJobTimeoutSeconds`. Uses AbortController
+   * to actively cancel job execution while still persisting partial metrics.
+   */
+  anonRetrievalJobTimeoutSeconds: number;
   /**
    * Target number of piece cleanup runs per storage provider per hour.
    *
@@ -278,6 +290,12 @@ export interface IJobsConfig {
    * Only used when `DEALBOT_JOBS_MODE=pgboss`.
    */
   maxPieceCleanupRuntimeSeconds: number;
+
+  /**
+   * Target number of anonymous retrieval tests per storage provider per hour.
+   * Defaults to retrievalsPerSpPerHour when not set.
+   */
+  retrievalsAnonPerSpPerHour: number;
 }
 
 export interface IDatasetConfig {
@@ -295,6 +313,10 @@ export interface ITimeoutConfig {
 
 export interface IRetrievalConfig {
   ipfsBlockFetchConcurrency: number;
+  /**
+   * Number of CAR blocks to sample for IPNI + block-fetch validation.
+   */
+  anonBlockSampleCount: number;
 }
 
 export interface IPieceCleanupConfig {
@@ -336,6 +358,43 @@ export interface IConfig {
 }
 
 export function loadConfig(): IConfig {
+  const jobTimeoutSeconds = {
+    deal: Number.parseInt(process.env.DEAL_JOB_TIMEOUT_SECONDS || "360", 10),
+    retrieval: Number.parseInt(process.env.RETRIEVAL_JOB_TIMEOUT_SECONDS || "60", 10),
+    anonRetrieval: Number.parseInt(process.env.ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS || "360", 10),
+    dataSetCreation: Number.parseInt(process.env.DATA_SET_CREATION_JOB_TIMEOUT_SECONDS || "300", 10),
+    pieceCleanup: Number.parseInt(process.env.MAX_PIECE_CLEANUP_RUNTIME_SECONDS || "300", 10),
+  };
+
+  // HTTP-level request timeouts default to the longest job timeout so the
+  // per-request ceiling never caps below the per-job budget. Any job-scoped
+  // AbortSignal fires first and is authoritative; the HTTP timer only kicks
+  // in for callers that do not pass a parent signal.
+  const longestJobTimeoutMs = Math.max(...Object.values(jobTimeoutSeconds)) * 1000;
+
+  const httpRequestTimeoutMs = Number.parseInt(process.env.HTTP_REQUEST_TIMEOUT_MS || String(longestJobTimeoutMs), 10);
+  const http2RequestTimeoutMs = Number.parseInt(
+    process.env.HTTP2_REQUEST_TIMEOUT_MS || String(longestJobTimeoutMs),
+    10,
+  );
+
+  // Misconfiguration guard: if someone explicitly sets an HTTP timeout below
+  // the longest job timeout, the HTTP-level timer will abort in-flight work
+  // before the job signal has a chance to report it. Warn loudly so this is
+  // caught at boot rather than inferred from short-timeout incidents later.
+  for (const [name, value] of [
+    ["HTTP_REQUEST_TIMEOUT_MS", httpRequestTimeoutMs],
+    ["HTTP2_REQUEST_TIMEOUT_MS", http2RequestTimeoutMs],
+  ] as const) {
+    if (value < longestJobTimeoutMs) {
+      // eslint-disable-next-line no-console
+      console.warn(
+        `[config] ${name}=${value}ms is lower than the longest job timeout (${longestJobTimeoutMs}ms). ` +
+          `HTTP requests may abort before the job signal fires, producing short, unexplained timeouts.`,
+      );
+    }
+  }
+
   return {
     app: {
       env: process.env.NODE_ENV || "development",
@@ -378,7 +437,7 @@ export function loadConfig(): IConfig {
       useOnlyApprovedProviders: process.env.USE_ONLY_APPROVED_PROVIDERS !== "false",
       dealbotDataSetVersion: process.env.DEALBOT_DATASET_VERSION,
       minNumDataSetsForChecks: Number.parseInt(process.env.MIN_NUM_DATASETS_FOR_CHECKS || "1", 10),
-      pdpSubgraphEndpoint: process.env.PDP_SUBGRAPH_ENDPOINT || "",
+      subgraphEndpoint: process.env.SUBGRAPH_ENDPOINT || "",
     },
     scheduling: {
       providersRefreshIntervalSeconds: Number.parseInt(process.env.PROVIDERS_REFRESH_INTERVAL_SECONDS || "14400", 10),
@@ -401,11 +460,15 @@ export function loadConfig(): IConfig {
       catchupMaxEnqueue: Number.parseInt(process.env.JOB_CATCHUP_MAX_ENQUEUE || "10", 10),
       schedulePhaseSeconds: Number.parseInt(process.env.JOB_SCHEDULE_PHASE_SECONDS || "0", 10),
       enqueueJitterSeconds: Number.parseInt(process.env.JOB_ENQUEUE_JITTER_SECONDS || "0", 10),
-      dealJobTimeoutSeconds: Number.parseInt(process.env.DEAL_JOB_TIMEOUT_SECONDS || "360", 10),
-      retrievalJobTimeoutSeconds: Number.parseInt(process.env.RETRIEVAL_JOB_TIMEOUT_SECONDS || "60", 10),
-      dataSetCreationJobTimeoutSeconds: Number.parseInt(process.env.DATA_SET_CREATION_JOB_TIMEOUT_SECONDS || "300", 10),
+      dealJobTimeoutSeconds: jobTimeoutSeconds.deal,
+      retrievalJobTimeoutSeconds: jobTimeoutSeconds.retrieval,
+      anonRetrievalJobTimeoutSeconds: jobTimeoutSeconds.anonRetrieval,
+      retrievalsAnonPerSpPerHour: Number.parseFloat(
+        process.env.RETRIEVALS_ANON_PER_SP_PER_HOUR || process.env.RETRIEVALS_PER_SP_PER_HOUR || "2",
+      ),
+      dataSetCreationJobTimeoutSeconds: jobTimeoutSeconds.dataSetCreation,
       pieceCleanupPerSpPerHour: Number.parseFloat(process.env.JOB_PIECE_CLEANUP_PER_SP_PER_HOUR || String(1 / 24)),
-      maxPieceCleanupRuntimeSeconds: Number.parseInt(process.env.MAX_PIECE_CLEANUP_RUNTIME_SECONDS || "300", 10),
+      maxPieceCleanupRuntimeSeconds: jobTimeoutSeconds.pieceCleanup,
     },
     dataset: {
       localDatasetsPath: process.env.DEALBOT_LOCAL_DATASETS_PATH || DEFAULT_LOCAL_DATASETS_PATH,
@@ -427,13 +490,14 @@ export function loadConfig(): IConfig {
     },
     timeouts: {
       connectTimeoutMs: Number.parseInt(process.env.CONNECT_TIMEOUT_MS || "10000", 10),
-      httpRequestTimeoutMs: Number.parseInt(process.env.HTTP_REQUEST_TIMEOUT_MS || "240000", 10),
-      http2RequestTimeoutMs: Number.parseInt(process.env.HTTP2_REQUEST_TIMEOUT_MS || "240000", 10),
+      httpRequestTimeoutMs,
+      http2RequestTimeoutMs,
       ipniVerificationTimeoutMs: Number.parseInt(process.env.IPNI_VERIFICATION_TIMEOUT_MS || "60000", 10),
       ipniVerificationPollingMs: Number.parseInt(process.env.IPNI_VERIFICATION_POLLING_MS || "2000", 10),
     },
     retrieval: {
       ipfsBlockFetchConcurrency: Number.parseInt(process.env.IPFS_BLOCK_FETCH_CONCURRENCY || "6", 10),
+      anonBlockSampleCount: Number.parseInt(process.env.ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT || "5", 10),
     },
     clickhouse: {
       url: process.env.CLICKHOUSE_URL || undefined,
diff --git a/apps/backend/src/data-retention/data-retention.module.ts b/apps/backend/src/data-retention/data-retention.module.ts
index f459570a..f0aec1ec 100644
--- a/apps/backend/src/data-retention/data-retention.module.ts
+++ b/apps/backend/src/data-retention/data-retention.module.ts
@@ -2,12 +2,12 @@ import { Module } from "@nestjs/common";
 import { TypeOrmModule } from "@nestjs/typeorm";
 import { DataRetentionBaseline } from "../database/entities/data-retention-baseline.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
-import { PdpSubgraphModule } from "../pdp-subgraph/pdp-subgraph.module.js";
+import { SubgraphModule } from "../subgraph/subgraph.module.js";
 import { WalletSdkModule } from "../wallet-sdk/wallet-sdk.module.js";
 import { DataRetentionService } from "./data-retention.service.js";
 
 @Module({
-  imports: [WalletSdkModule, PdpSubgraphModule, TypeOrmModule.forFeature([DataRetentionBaseline, StorageProvider])],
+  imports: [WalletSdkModule, SubgraphModule, TypeOrmModule.forFeature([DataRetentionBaseline, StorageProvider])],
   providers: [DataRetentionService],
   exports: [DataRetentionService],
 })
diff --git a/apps/backend/src/data-retention/data-retention.service.spec.ts b/apps/backend/src/data-retention/data-retention.service.spec.ts
index 87ced66a..d2d539cf 100644
--- a/apps/backend/src/data-retention/data-retention.service.spec.ts
+++ b/apps/backend/src/data-retention/data-retention.service.spec.ts
@@ -7,8 +7,8 @@ import type { IConfig } from "../config/app.config.js";
 import type { DataRetentionBaseline } from "../database/entities/data-retention-baseline.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
 import { buildCheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js";
-import type { PDPSubgraphService } from "../pdp-subgraph/pdp-subgraph.service.js";
-import type { ProviderDataSetResponse } from "../pdp-subgraph/types.js";
+import type { SubgraphService } from "../subgraph/subgraph.service.js";
+import type { ProviderDataSetResponse } from "../subgraph/types.js";
 import type { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
 import { DataRetentionService } from "./data-retention.service.js";
 
@@ -42,7 +42,7 @@ describe("DataRetentionService", () => {
   let walletSdkServiceMock: {
     getTestingProviders: ReturnType<typeof vi.fn>;
   };
-  let pdpSubgraphServiceMock: {
+  let subgraphServiceMock: {
     fetchSubgraphMeta: ReturnType<typeof vi.fn>;
     fetchProvidersWithDatasets: ReturnType<typeof vi.fn>;
   };
@@ -69,7 +69,7 @@ describe("DataRetentionService", () => {
     configServiceMock = {
       get: vi.fn((key: keyof IConfig) => {
         if (key === "blockchain") {
-          return { pdpSubgraphEndpoint: "https://example.com/subgraph" };
+          return { subgraphEndpoint: "https://example.com/subgraph" };
         }
         if (key === "spBlocklists") {
           return { ids: new Set(), addresses: new Set() };
@@ -95,7 +95,7 @@ describe("DataRetentionService", () => {
       ]),
     };
 
-    pdpSubgraphServiceMock = {
+    subgraphServiceMock = {
       fetchSubgraphMeta: vi.fn().mockResolvedValue({
         _meta: {
           block: {
@@ -146,7 +146,7 @@ describe("DataRetentionService", () => {
     service = new DataRetentionService(
       configServiceMock,
       walletSdkServiceMock as unknown as WalletSdkService,
-      pdpSubgraphServiceMock as unknown as PDPSubgraphService,
+      subgraphServiceMock as unknown as SubgraphService,
       mockBaselineRepository as unknown as Repository<DataRetentionBaseline>,
       mockSPRepository as unknown as Repository<StorageProvider>,
       counterMock as unknown as Counter,
@@ -155,15 +155,15 @@ describe("DataRetentionService", () => {
     );
   });
 
-  it("returns early when pdpSubgraphEndpoint is empty", async () => {
+  it("returns early when subgraphEndpoint is empty", async () => {
     (configServiceMock.get as ReturnType<typeof vi.fn>).mockReturnValue({
-      pdpSubgraphEndpoint: "",
+      subgraphEndpoint: "",
     });
 
     await service.pollDataRetention();
 
-    expect(pdpSubgraphServiceMock.fetchSubgraphMeta).not.toHaveBeenCalled();
-    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
+    expect(subgraphServiceMock.fetchSubgraphMeta).not.toHaveBeenCalled();
+    expect(subgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
   });
 
   it("returns early when no testing providers configured", async () => {
@@ -171,31 +171,31 @@ describe("DataRetentionService", () => {
 
     await service.pollDataRetention();
 
-    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
+    expect(subgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
   });
 
   it("returns early when all providers are blocked for data-retention", async () => {
     (configServiceMock.get as ReturnType<typeof vi.fn>).mockImplementation((key: string) => {
-      if (key === "blockchain") return { pdpSubgraphEndpoint: "https://example.com/subgraph" };
+      if (key === "blockchain") return { subgraphEndpoint: "https://example.com/subgraph" };
       if (key === "spBlocklists") return { ids: new Set(), addresses: new Set([PROVIDER_A, PROVIDER_B]) };
     });
 
     await service.pollDataRetention();
 
-    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
+    expect(subgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
   });
 
   it("excludes blocked providers from data-retention polling while retaining unblocked ones", async () => {
     (configServiceMock.get as ReturnType<typeof vi.fn>).mockImplementation((key: string) => {
-      if (key === "blockchain") return { pdpSubgraphEndpoint: "https://example.com/subgraph" };
+      if (key === "blockchain") return { subgraphEndpoint: "https://example.com/subgraph" };
       if (key === "spBlocklists") return { ids: new Set(), addresses: new Set([PROVIDER_A]) };
     });
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
     await service.pollDataRetention();
 
     const allAddressesPolled: string[] = (
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mock.calls as [{ addresses: string[] }][]
+      subgraphServiceMock.fetchProvidersWithDatasets.mock.calls as [{ addresses: string[] }][]
     ).flatMap(([{ addresses }]) => addresses);
     expect(allAddressesPolled).toContain(PROVIDER_B.toLowerCase());
     expect(allAddressesPolled).not.toContain(PROVIDER_A.toLowerCase());
@@ -206,16 +206,16 @@ describe("DataRetentionService", () => {
 
     await service.pollDataRetention();
 
-    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
+    expect(subgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
   });
 
   it("sets baseline on first poll without emitting counters (fresh deploy / new provider)", async () => {
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
 
     await service.pollDataRetention();
 
-    expect(pdpSubgraphServiceMock.fetchSubgraphMeta).toHaveBeenCalled();
-    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledWith({
+    expect(subgraphServiceMock.fetchSubgraphMeta).toHaveBeenCalled();
+    expect(subgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledWith({
       blockNumber: 1200,
       addresses: [PROVIDER_A, PROVIDER_B],
     });
@@ -239,20 +239,20 @@ describe("DataRetentionService", () => {
 
   it("computes deltas correctly on consecutive polls", async () => {
     // First poll: blockNumber=1200
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
     await service.pollDataRetention();
 
     const firstCallCount = counterMock.labels.mock.calls.length;
 
     // Second poll: blockNumber=1300, provider totals changed
-    pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({
+    subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({
       _meta: {
         block: {
           number: 1300,
         },
       },
     });
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({
         totalFaultedPeriods: 12n,
         totalProvingPeriods: 105n,
@@ -266,7 +266,7 @@ describe("DataRetentionService", () => {
   });
 
   it("does not increment counters when deltas are zero", async () => {
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
 
     // First poll
     await service.pollDataRetention();
@@ -288,7 +288,7 @@ describe("DataRetentionService", () => {
 
     const providerA = makeProvider({ address: PROVIDER_A, totalFaultedPeriods: 5n });
     const providerB = makeProvider({ address: PROVIDER_B, totalFaultedPeriods: 20n });
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([providerA, providerB]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([providerA, providerB]);
 
     await service.pollDataRetention();
 
@@ -310,7 +310,7 @@ describe("DataRetentionService", () => {
     ]);
 
     const provider = makeProvider();
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
 
     await service.pollDataRetention();
 
@@ -333,7 +333,7 @@ describe("DataRetentionService", () => {
   });
 
   it("handles empty providers array without errors", async () => {
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([]);
 
     await service.pollDataRetention();
 
@@ -347,7 +347,7 @@ describe("DataRetentionService", () => {
     ]);
 
     const provider = makeProvider();
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
 
     await service.pollDataRetention();
 
@@ -370,7 +370,7 @@ describe("DataRetentionService", () => {
   });
 
   it("catches and logs errors without rethrowing", async () => {
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockRejectedValueOnce(new Error("subgraph down"));
+    subgraphServiceMock.fetchProvidersWithDatasets.mockRejectedValueOnce(new Error("subgraph down"));
 
     // Should not throw
     await expect(service.pollDataRetention()).resolves.toBeUndefined();
@@ -378,14 +378,14 @@ describe("DataRetentionService", () => {
 
   it("resets baseline on negative deltas without incrementing counters", async () => {
     // First poll: high values
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({ totalFaultedPeriods: 100n, totalProvingPeriods: 200n }),
     ]);
     await service.pollDataRetention();
     counterMock.labels.mockClear();
 
     // Second poll: lower values (e.g., chain reorg or subgraph correction)
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({ totalFaultedPeriods: 50n, totalProvingPeriods: 100n }),
     ]);
     await service.pollDataRetention();
@@ -394,7 +394,7 @@ describe("DataRetentionService", () => {
     expect(counterMock.labels).not.toHaveBeenCalled();
 
     // Third poll: values increase from new baseline
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({ totalFaultedPeriods: 52n, totalProvingPeriods: 105n }),
     ]);
     await service.pollDataRetention();
@@ -412,7 +412,7 @@ describe("DataRetentionService", () => {
       { providerAddress: PROVIDER_A, faultedPeriods: "0", successPeriods: "0", lastBlockNumber: "1000" },
     ]);
 
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({ totalFaultedPeriods: largeValue, totalProvingPeriods: largeValue * 2n }),
     ]);
 
@@ -436,7 +436,7 @@ describe("DataRetentionService", () => {
       { providerAddress: PROVIDER_A, faultedPeriods: "0", successPeriods: "0", lastBlockNumber: "1000" },
     ]);
 
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({ totalFaultedPeriods: maxSafeInt, totalProvingPeriods: maxSafeInt * 2n }),
     ]);
 
@@ -456,7 +456,7 @@ describe("DataRetentionService", () => {
       totalFaultedPeriods: 5n,
       totalProvingPeriods: 50n,
     });
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
 
     await service.pollDataRetention();
 
@@ -475,18 +475,18 @@ describe("DataRetentionService", () => {
     }));
     walletSdkServiceMock.getTestingProviders.mockReturnValueOnce(manyProviders);
 
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([]);
 
     await service.pollDataRetention();
 
     // Should be called twice: once for first 50, once for remaining 25
-    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledTimes(2);
-    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenNthCalledWith(1, {
+    expect(subgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledTimes(2);
+    expect(subgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenNthCalledWith(1, {
       addresses: expect.arrayContaining([expect.any(String)]),
       blockNumber: 1200,
     });
-    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets.mock.calls[0][0].addresses).toHaveLength(50);
-    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets.mock.calls[1][0].addresses).toHaveLength(25);
+    expect(subgraphServiceMock.fetchProvidersWithDatasets.mock.calls[0][0].addresses).toHaveLength(50);
+    expect(subgraphServiceMock.fetchProvidersWithDatasets.mock.calls[1][0].addresses).toHaveLength(25);
   });
 
   it("continues processing next batch if one batch fails", async () => {
@@ -499,20 +499,20 @@ describe("DataRetentionService", () => {
     walletSdkServiceMock.getTestingProviders.mockReturnValueOnce(manyProviders);
 
     // First batch fails, second succeeds
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets
+    subgraphServiceMock.fetchProvidersWithDatasets
       .mockRejectedValueOnce(new Error("Subgraph timeout"))
       .mockResolvedValueOnce([]);
 
     await service.pollDataRetention();
 
     // Both batches should be attempted
-    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledTimes(2);
+    expect(subgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledTimes(2);
   });
 
   it("logs error and skips counter update when provider not found in cache but returned from subgraph", async () => {
     // Provider C not in cache
     const PROVIDER_C = "0x1234567890123456789012345678901234567890";
-    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_C })]);
+    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_C })]);
 
     await service.pollDataRetention();
 
@@ -523,7 +523,7 @@ describe("DataRetentionService", () => {
   describe("cleanupStaleProviders", () => {
     it("does not cleanup when no stale providers exist", async () => {
       // First poll establishes baseline for both providers
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_A }),
         makeProvider({ address: PROVIDER_B }),
       ]);
@@ -536,7 +536,7 @@ describe("DataRetentionService", () => {
 
     it("successfully cleans up stale provider with valid database entry", async () => {
       // First poll: establish baseline for PROVIDER_A
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: PROVIDER_A removed from active list, only PROVIDER_B active
@@ -558,7 +558,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -589,7 +589,7 @@ describe("DataRetentionService", () => {
 
     it("skips cleanup entirely when database fetch fails", async () => {
       // First poll: establish baseline
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: provider removed, but DB fails
@@ -604,7 +604,7 @@ describe("DataRetentionService", () => {
 
       mockSPRepository.find.mockRejectedValueOnce(new Error("Database connection failed"));
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -624,7 +624,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_A, totalFaultedPeriods: 12n, totalProvingPeriods: 105n }),
       ]);
 
@@ -637,7 +637,7 @@ describe("DataRetentionService", () => {
 
     it("retains baseline when provider not found in database", async () => {
       // First poll: establish baseline
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: provider removed from active list
@@ -653,7 +653,7 @@ describe("DataRetentionService", () => {
       // Database returns empty array (provider not found)
       mockSPRepository.find.mockResolvedValueOnce([]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -670,7 +670,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_A, totalFaultedPeriods: 12n, totalProvingPeriods: 105n }),
       ]);
 
@@ -683,7 +683,7 @@ describe("DataRetentionService", () => {
 
     it("retains baseline when provider has null providerId", async () => {
       // First poll: establish baseline
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: provider removed
@@ -706,7 +706,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -716,7 +716,7 @@ describe("DataRetentionService", () => {
 
     it("retains baseline when counter removal throws error", async () => {
       // First poll: establish baseline
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: provider removed
@@ -743,7 +743,7 @@ describe("DataRetentionService", () => {
         throw new Error("Counter removal failed");
       });
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -760,7 +760,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_A, totalFaultedPeriods: 12n, totalProvingPeriods: 110n }),
       ]);
 
@@ -781,7 +781,7 @@ describe("DataRetentionService", () => {
         { id: 3, serviceProvider: PROVIDER_C, name: "Provider C", isApproved: true },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_A }),
         makeProvider({ address: PROVIDER_B }),
         makeProvider({ address: PROVIDER_C }),
@@ -799,7 +799,7 @@ describe("DataRetentionService", () => {
         { address: PROVIDER_C, name: "Provider C", providerId: 3, isApproved: true },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
 
       await service.pollDataRetention();
 
@@ -815,7 +815,7 @@ describe("DataRetentionService", () => {
 
     it("skips cleanup when processing errors occurred", async () => {
       // First poll: establish baseline
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: provider removed, but processing has errors
@@ -824,7 +824,7 @@ describe("DataRetentionService", () => {
       ]);
 
       // Simulate processing error
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockRejectedValueOnce(new Error("Processing failed"));
+      subgraphServiceMock.fetchProvidersWithDatasets.mockRejectedValueOnce(new Error("Processing failed"));
 
       await service.pollDataRetention();
 
@@ -841,7 +841,7 @@ describe("DataRetentionService", () => {
         { id: 1, serviceProvider: PROVIDER_MIXED_CASE, name: "Provider A", isApproved: true },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_MIXED_CASE.toLowerCase() as `0x${string}` }),
       ]);
 
@@ -861,7 +861,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -885,7 +885,7 @@ describe("DataRetentionService", () => {
       // Subgraph returns same values: totalFaultedPeriods=10, totalProvingPeriods=100
       // confirmedTotalSuccess = 100 - 10 = 90
       // With DB baseline: faultedDelta = 10 - 10 = 0, successDelta = 90 - 90 = 0
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
 
       await service.pollDataRetention();
 
@@ -907,7 +907,7 @@ describe("DataRetentionService", () => {
       // Subgraph returns: totalFaultedPeriods=10, totalProvingPeriods=100
       // confirmedTotalSuccess = 100 - 10 = 90
       // faultedDelta = 10 - 8 = 2, successDelta = 90 - 85 = 5
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
 
       await service.pollDataRetention();
 
@@ -922,7 +922,7 @@ describe("DataRetentionService", () => {
     });
 
     it("reloads baselines from DB on every poll", async () => {
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
 
       await service.pollDataRetention();
       await service.pollDataRetention();
@@ -932,13 +932,13 @@ describe("DataRetentionService", () => {
     });
 
     it("does not double-count when poll ownership alternates across worker pods", async () => {
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
       await service.pollDataRetention();
 
       const secondPod = new DataRetentionService(
         configServiceMock,
         walletSdkServiceMock as unknown as WalletSdkService,
-        pdpSubgraphServiceMock as unknown as PDPSubgraphService,
+        subgraphServiceMock as unknown as SubgraphService,
         mockBaselineRepository as unknown as Repository<DataRetentionBaseline>,
         mockSPRepository as unknown as Repository<StorageProvider>,
         counterMock as unknown as Counter,
@@ -946,8 +946,8 @@ describe("DataRetentionService", () => {
         { insert: vi.fn(), probeLocation: "test" } as unknown as ClickhouseService,
       );
 
-      pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1300 } } });
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1300 } } });
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 11n, totalProvingPeriods: 102n }),
       ]);
       await secondPod.pollDataRetention();
@@ -955,8 +955,8 @@ describe("DataRetentionService", () => {
       counterMock.labels.mockClear();
       counterMock.inc.mockClear();
 
-      pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1400 } } });
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1400 } } });
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 12n, totalProvingPeriods: 104n }),
       ]);
       await service.pollDataRetention();
@@ -972,8 +972,8 @@ describe("DataRetentionService", () => {
       ];
       mockBaselineRepository.upsert.mockRejectedValueOnce(new Error("DB write failed"));
 
-      pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1300 } } });
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1300 } } });
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 12n, totalProvingPeriods: 105n }),
       ]);
 
@@ -981,8 +981,8 @@ describe("DataRetentionService", () => {
 
       expect(counterMock.labels).not.toHaveBeenCalled();
 
-      pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1400 } } });
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1400 } } });
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 12n, totalProvingPeriods: 105n }),
       ]);
 
@@ -1003,12 +1003,12 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
 
       // First poll: DB load fails, poll bails out to avoid emitting bloated values
       await service.pollDataRetention();
       expect(mockBaselineRepository.find).toHaveBeenCalledTimes(1);
-      expect(pdpSubgraphServiceMock.fetchSubgraphMeta).not.toHaveBeenCalled();
+      expect(subgraphServiceMock.fetchSubgraphMeta).not.toHaveBeenCalled();
       expect(counterMock.labels).not.toHaveBeenCalled();
 
       // Second poll: DB load succeeds, baselines restored, normal delta computation
@@ -1021,16 +1021,16 @@ describe("DataRetentionService", () => {
     it("emits real deltas on second poll after fresh deploy baseline-only first poll", async () => {
       // First poll: fresh deploy, no baselines in DB
       // Baseline set to: faultedPeriods=10, successPeriods=90
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
       await service.pollDataRetention();
       counterMock.labels.mockClear();
       counterMock.inc.mockClear();
 
       // Second poll: values have increased
-      pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({
+      subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({
         _meta: { block: { number: 1300 } },
       });
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 12n, totalProvingPeriods: 105n }),
       ]);
 
@@ -1044,7 +1044,7 @@ describe("DataRetentionService", () => {
 
     it("deletes baseline from DB when stale provider is cleaned up", async () => {
       // First poll: establish baseline for PROVIDER_A
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: PROVIDER_A removed from active list
@@ -1056,7 +1056,7 @@ describe("DataRetentionService", () => {
         { address: PROVIDER_A, name: "Provider A", providerId: 1, isApproved: true },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -1069,7 +1069,7 @@ describe("DataRetentionService", () => {
     it("emits overdue gauge on first poll (baseline-only)", async () => {
       // Provider is overdue: currentBlock=1200,
       // estimatedOverduePeriods = (1200 - 901) / 100 = 2.99 -> 2
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
 
       await service.pollDataRetention();
 
@@ -1086,7 +1086,7 @@ describe("DataRetentionService", () => {
 
     it("emits overdue gauge = 0 when provider is not overdue", async () => {
       // nextDeadline=2000 > currentBlock=1200
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ proofSets: [] })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ proofSets: [] })]);
 
       await service.pollDataRetention();
 
@@ -1095,7 +1095,7 @@ describe("DataRetentionService", () => {
 
     it("emits overdue gauge even on negative delta (baseline reset)", async () => {
       // First poll: high values
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 100n, totalProvingPeriods: 200n }),
       ]);
       await service.pollDataRetention();
@@ -1103,7 +1103,7 @@ describe("DataRetentionService", () => {
       gaugeMock.set.mockClear();
 
       // Second poll: lower values (negative delta) but still overdue
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 50n, totalProvingPeriods: 100n }),
       ]);
       await service.pollDataRetention();
@@ -1115,7 +1115,7 @@ describe("DataRetentionService", () => {
 
     it("naturally resets gauge to 0 when subgraph catches up", async () => {
       // First poll: provider is overdue (currentBlock=1200, nextDeadline=1000)
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
       await service.pollDataRetention();
 
       expect(gaugeMock.set).toHaveBeenCalledWith(2);
@@ -1124,7 +1124,7 @@ describe("DataRetentionService", () => {
       gaugeMock.set.mockClear();
 
       // Second poll: subgraph caught up, nextDeadline advanced past currentBlock
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({
           totalFaultedPeriods: 12n,
           totalProvingPeriods: 102n,
@@ -1140,7 +1140,7 @@ describe("DataRetentionService", () => {
 
     it("removes overdue gauge when stale provider is cleaned up", async () => {
       // First poll: establish baseline for PROVIDER_A
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: PROVIDER_A removed from active list
@@ -1152,7 +1152,7 @@ describe("DataRetentionService", () => {
         { address: PROVIDER_A, name: "Provider A", providerId: 1, isApproved: true },
       ]);
 
-      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
diff --git a/apps/backend/src/data-retention/data-retention.service.ts b/apps/backend/src/data-retention/data-retention.service.ts
index c6ece7b5..1422bbfd 100644
--- a/apps/backend/src/data-retention/data-retention.service.ts
+++ b/apps/backend/src/data-retention/data-retention.service.ts
@@ -11,8 +11,8 @@ import { IConfig } from "../config/app.config.js";
 import { DataRetentionBaseline } from "../database/entities/data-retention-baseline.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
 import { buildCheckMetricLabels, CheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js";
-import { PDPSubgraphService } from "../pdp-subgraph/pdp-subgraph.service.js";
-import { type ProviderDataSetResponse } from "../pdp-subgraph/types.js";
+import { SubgraphService } from "../subgraph/subgraph.service.js";
+import { type ProviderDataSetResponse } from "../subgraph/types.js";
 import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
 import { type PDPProviderEx } from "../wallet-sdk/wallet-sdk.types.js";
 
@@ -41,7 +41,7 @@ export class DataRetentionService {
   constructor(
     private readonly configService: ConfigService<IConfig, true>,
     private readonly walletSdkService: WalletSdkService,
-    private readonly pdpSubgraphService: PDPSubgraphService,
+    private readonly subgraphService: SubgraphService,
     @InjectRepository(DataRetentionBaseline)
     private readonly baselineRepository: Repository<DataRetentionBaseline>,
     @InjectRepository(StorageProvider)
@@ -59,10 +59,10 @@ export class DataRetentionService {
    * challenge delta since the last poll.
    */
   async pollDataRetention(): Promise<void> {
-    const pdpSubgraphEndpoint = this.configService.get("blockchain").pdpSubgraphEndpoint;
-    if (!pdpSubgraphEndpoint) {
+    const subgraphEndpoint = this.configService.get("blockchain").subgraphEndpoint;
+    if (!subgraphEndpoint) {
       this.logger.warn({
-        event: "pdp_subgraph_endpoint_not_configured",
+        event: "subgraph_endpoint_not_configured",
         message: "No PDP subgraph endpoint configured",
       });
       return;
@@ -75,7 +75,7 @@ export class DataRetentionService {
     }
 
     try {
-      const subgraphMeta = await this.pdpSubgraphService.fetchSubgraphMeta();
+      const subgraphMeta = await this.subgraphService.fetchSubgraphMeta();
       const allProviderInfos = this.walletSdkService.getTestingProviders();
       const spBlocklists = this.configService.get("spBlocklists");
       const providerInfos = allProviderInfos?.filter((p) => !isSpBlocked(spBlocklists, p.serviceProvider, p.id));
@@ -104,7 +104,7 @@ export class DataRetentionService {
         );
 
         try {
-          const providersFromSubgraph = await this.pdpSubgraphService.fetchProvidersWithDatasets({
+          const providersFromSubgraph = await this.subgraphService.fetchProvidersWithDatasets({
             blockNumber,
             addresses: batchAddresses,
           });
diff --git a/apps/backend/src/database/database.module.ts b/apps/backend/src/database/database.module.ts
index 9249c3a9..f3f9ed09 100644
--- a/apps/backend/src/database/database.module.ts
+++ b/apps/backend/src/database/database.module.ts
@@ -7,6 +7,7 @@ import { fileURLToPath } from "url";
 import { toStructuredError } from "../common/logging.js";
 import { createPinoExitLogger } from "../common/pino.config.js";
 import type { IAppConfig, IConfig, IDatabaseConfig } from "../config/app.config.js";
+import { AnonRetrieval } from "./entities/anon-retrieval.entity.js";
 import { DataRetentionBaseline } from "./entities/data-retention-baseline.entity.js";
 import { Deal } from "./entities/deal.entity.js";
 import { JobScheduleState } from "./entities/job-schedule-state.entity.js";
@@ -49,7 +50,7 @@ function toSafeDataSourceContext(options: DataSourceOptions): Record<string, unk
           password: dbConfig.password,
           database: dbConfig.database,
           poolSize: dbConfig.poolMax,
-          entities: [DataRetentionBaseline, Deal, StorageProvider, Retrieval, JobScheduleState],
+          entities: [AnonRetrieval, DataRetentionBaseline, Deal, StorageProvider, Retrieval, JobScheduleState],
           migrations: [join(__dirname, "migrations", "*.{js,ts}")],
           migrationsRun: runMigrations,
           migrationsTransactionMode: "each",
@@ -81,9 +82,9 @@ function toSafeDataSourceContext(options: DataSourceOptions): Record<string, unk
         }
       },
     }),
-    TypeOrmModule.forFeature([Deal, StorageProvider, Retrieval, JobScheduleState]),
+    TypeOrmModule.forFeature([AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState]),
   ],
-  providers: [Deal, StorageProvider, Retrieval, JobScheduleState],
-  exports: [Deal, StorageProvider, Retrieval, JobScheduleState],
+  providers: [AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState],
+  exports: [AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState],
 })
 export class DatabaseModule {}
diff --git a/apps/backend/src/database/entities/anon-retrieval.entity.ts b/apps/backend/src/database/entities/anon-retrieval.entity.ts
new file mode 100644
index 00000000..1c15e4ac
--- /dev/null
+++ b/apps/backend/src/database/entities/anon-retrieval.entity.ts
@@ -0,0 +1,100 @@
+import { Column, CreateDateColumn, Entity, Index, PrimaryGeneratedColumn, UpdateDateColumn } from "typeorm";
+import { BigIntColumn } from "../helpers/bigint-column.js";
+import { RetrievalStatus, ServiceType } from "../types.js";
+
+/**
+ * Anonymous retrieval check records — pieces the dealbot did NOT upload,
+ * sampled from the subgraph and probed against an SP.
+ *
+ * Kept as a separate table from `retrievals` because the two checks have
+ * different input domains: basic retrievals reference a dealbot-owned deal,
+ * anonymous retrievals carry their own piece identity inline.
+ */
+@Entity("anon_retrievals")
+export class AnonRetrieval {
+  @PrimaryGeneratedColumn("uuid")
+  id!: string;
+
+  /** Lowercased SP address. Indexed for per-SP dashboards and dedup. */
+  @Index("IDX_anon_retrievals_sp_address")
+  @Column({ name: "sp_address", type: "varchar" })
+  spAddress!: string;
+
+  /** Piece CID (v2/CommP). Indexed for the recent-dedup selector query. */
+  @Index("IDX_anon_retrievals_piece_cid")
+  @Column({ name: "piece_cid", type: "varchar" })
+  pieceCid!: string;
+
+  @BigIntColumn({ name: "data_set_id" })
+  dataSetId!: bigint;
+
+  @BigIntColumn({ name: "piece_id" })
+  pieceId!: bigint;
+
+  /** Raw (unpadded) piece size in bytes, as reported by the subgraph at selection time. */
+  @BigIntColumn({ name: "raw_size" })
+  rawSize!: bigint;
+
+  @Column({ name: "with_ipfs_indexing", type: "boolean" })
+  withIpfsIndexing!: boolean;
+
+  /** Root CID of the contained DAG; null when the piece isn't IPFS-indexed. */
+  @Column({ name: "ipfs_root_cid", type: "varchar", nullable: true })
+  ipfsRootCid: string | null;
+
+  @Column({
+    name: "service_type",
+    type: "enum",
+    enum: ServiceType,
+    default: ServiceType.DIRECT_SP,
+  })
+  serviceType!: ServiceType;
+
+  @Column({ name: "retrieval_endpoint", type: "varchar" })
+  retrievalEndpoint!: string;
+
+  @Column({
+    type: "enum",
+    enum: RetrievalStatus,
+    default: RetrievalStatus.PENDING,
+  })
+  status!: RetrievalStatus;
+
+  @Column({ name: "started_at", type: "timestamptz" })
+  startedAt!: Date;
+
+  @Column({ name: "completed_at", type: "timestamptz", nullable: true })
+  completedAt: Date | null;
+
+  @Column({ name: "latency_ms", type: "int", nullable: true })
+  latencyMs: number | null;
+
+  @Column({ name: "ttfb_ms", type: "int", nullable: true })
+  ttfbMs: number | null;
+
+  @Column({ name: "throughput_bps", type: "int", nullable: true })
+  throughputBps: number | null;
+
+  @Column({ name: "bytes_retrieved", type: "bigint", nullable: true })
+  bytesRetrieved: number | null;
+
+  @Column({ name: "response_code", type: "int", nullable: true })
+  responseCode: number | null;
+
+  @Column({ name: "error_message", type: "varchar", nullable: true })
+  errorMessage: string | null;
+
+  /** NULL when the retrieval failed before the CommP hash was computed. */
+  @Column({ name: "commp_valid", type: "boolean", nullable: true })
+  commpValid: boolean | null;
+
+  /** NULL when the CAR validation step was skipped (no IPFS indexing, or piece fetch failed). */
+  @Column({ name: "car_valid", type: "boolean", nullable: true })
+  carValid: boolean | null;
+
+  @CreateDateColumn({ name: "created_at", type: "timestamptz" })
+  createdAt!: Date;
+
+  @UpdateDateColumn({ name: "updated_at", type: "timestamptz" })
+  updatedAt!: Date;
+}
diff --git a/apps/backend/src/database/entities/job-schedule-state.entity.ts b/apps/backend/src/database/entities/job-schedule-state.entity.ts
index d1758ae9..ebd5254d 100644
--- a/apps/backend/src/database/entities/job-schedule-state.entity.ts
+++ b/apps/backend/src/database/entities/job-schedule-state.entity.ts
@@ -6,6 +6,7 @@ import { Column, CreateDateColumn, Entity, Index, PrimaryGeneratedColumn, Update
 export type JobType =
   | "deal"
   | "retrieval"
+  | "retrieval_anon"
   | "data_set_creation"
   | "metrics" // legacy: no longer scheduled; see RemoveMetricsJobScheduleRows migration. TODO(#457): remove.
   | "metrics_cleanup" // legacy: no longer scheduled; see RemoveMetricsJobScheduleRows migration. TODO(#457): remove.
diff --git a/apps/backend/src/database/migrations/1762000000000-CreateAnonRetrievals.ts b/apps/backend/src/database/migrations/1762000000000-CreateAnonRetrievals.ts
new file mode 100644
index 00000000..4925b04b
--- /dev/null
+++ b/apps/backend/src/database/migrations/1762000000000-CreateAnonRetrievals.ts
@@ -0,0 +1,64 @@
+import type { MigrationInterface, QueryRunner } from "typeorm";
+
+/**
+ * Create the `anon_retrievals` table that stores anonymous retrieval check
+ * records. Kept separate from `retrievals` because the two checks have
+ * different input domains — `retrievals` is always tied to a dealbot-owned
+ * deal, while `anon_retrievals` carries its own piece identity inline.
+ */
+export class CreateAnonRetrievals1762000000000 implements MigrationInterface {
+  name = "CreateAnonRetrievals1762000000000";
+
+  public async up(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`
+      CREATE TABLE anon_retrievals (
+        id                   UUID         PRIMARY KEY DEFAULT gen_random_uuid(),
+        sp_address           VARCHAR      NOT NULL,
+        piece_cid            VARCHAR      NOT NULL,
+        data_set_id          BIGINT       NOT NULL,
+        piece_id             BIGINT       NOT NULL,
+        raw_size             BIGINT       NOT NULL,
+        with_ipfs_indexing   BOOLEAN      NOT NULL,
+        ipfs_root_cid        VARCHAR      NULL,
+        service_type         VARCHAR      NOT NULL DEFAULT 'direct_sp',
+        retrieval_endpoint   VARCHAR      NOT NULL,
+        status               VARCHAR      NOT NULL DEFAULT 'pending',
+        started_at           TIMESTAMPTZ  NOT NULL,
+        completed_at         TIMESTAMPTZ  NULL,
+        latency_ms           INT          NULL,
+        ttfb_ms              INT          NULL,
+        throughput_bps       INT          NULL,
+        bytes_retrieved      BIGINT       NULL,
+        response_code        INT          NULL,
+        error_message        VARCHAR      NULL,
+        commp_valid          BOOLEAN      NULL,
+        car_valid            BOOLEAN      NULL,
+        created_at           TIMESTAMPTZ  NOT NULL DEFAULT now(),
+        updated_at           TIMESTAMPTZ  NOT NULL DEFAULT now()
+      )
+    `);
+
+    // Per-SP dashboards.
+    await queryRunner.query(`
+      CREATE INDEX "IDX_anon_retrievals_sp_address"
+      ON anon_retrievals (sp_address)
+    `);
+
+    // Used by the recent-dedup query in AnonPieceSelectorService — keeps the
+    // most-recently-tested CIDs out of the next selection.
+    await queryRunner.query(`
+      CREATE INDEX "IDX_anon_retrievals_piece_cid"
+      ON anon_retrievals (piece_cid)
+    `);
+
+    // Supports "last N anonymous retrievals" ordering used by the selector.
+    await queryRunner.query(`
+      CREATE INDEX "IDX_anon_retrievals_created_at"
+      ON anon_retrievals (created_at DESC)
+    `);
+  }
+
+  public async down(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`DROP TABLE IF EXISTS anon_retrievals`);
+  }
+}
diff --git a/apps/backend/src/http-client/http-client.service.spec.ts b/apps/backend/src/http-client/http-client.service.spec.ts
index 96604139..511910ba 100644
--- a/apps/backend/src/http-client/http-client.service.spec.ts
+++ b/apps/backend/src/http-client/http-client.service.spec.ts
@@ -64,25 +64,94 @@ describe("HttpClientService", () => {
     expect(config.timeout).toBe(120000);
   });
 
-  it("times out HTTP/2 requests using the connection timeout", async () => {
+  it("passes the configured headersTimeout to undici and translates its error", async () => {
     const service = await createService();
 
-    if (typeof AbortSignal.timeout !== "function") {
-      (AbortSignal as any).timeout = () => new AbortController().signal;
+    let receivedHeadersTimeout: number | undefined;
+    undiciRequestMock.mockImplementationOnce((_url: string, options: { headersTimeout?: number }) => {
+      receivedHeadersTimeout = options.headersTimeout;
+      const err = new Error("Headers Timeout Error") as Error & { code?: string };
+      err.name = "HeadersTimeoutError";
+      err.code = "UND_ERR_HEADERS_TIMEOUT";
+      return Promise.reject(err);
+    });
+
+    await expect(service.requestWithMetrics("http://example.com", { httpVersion: "2" })).rejects.toThrow(
+      "HTTP/2 connection/headers timed out after 25ms",
+    );
+
+    expect(receivedHeadersTimeout).toBe(25);
+  });
+
+  it("keeps the request signal alive after the connect timeout window elapses", async () => {
+    const service = await createService();
+
+    // Previously, connectTimeoutMs (25ms) was folded into the request signal,
+    // so any download lasting longer than 25ms was aborted mid-stream. The
+    // signal must now stay live until the transfer timeout or parent signal
+    // fires.
+    let sawAbortBeforeResolve = false;
+    undiciRequestMock.mockImplementationOnce(async (_url: string, options: { signal?: AbortSignal }) => {
+      await new Promise((r) => setTimeout(r, 75));
+      sawAbortBeforeResolve = options.signal?.aborted === true;
+      async function* body() {
+        yield Buffer.from("ok");
+      }
+      return { statusCode: 200, body: body() };
+    });
+
+    const result = await service.requestWithMetrics<Buffer>("http://example.com", { httpVersion: "2" });
+
+    expect(sawAbortBeforeResolve).toBe(false);
+    expect(result.aborted).toBeUndefined();
+    expect(result.metrics.statusCode).toBe(200);
+  });
+
+  it("returns partial bytes and metrics when HTTP/2 download is aborted after headers", async () => {
+    const service = await createService();
+
+    const parentAbort = new AbortController();
+
+    async function* abortingBody() {
+      yield Buffer.from("hello");
+      yield Buffer.from(" world");
+      // Simulate an abort mid-stream after two chunks.
+      parentAbort.abort(new Error("Anon retrieval job timeout (60s) for sp1"));
+      throw new Error("aborted");
     }
 
-    undiciRequestMock.mockImplementationOnce((_url: string, options: { signal?: AbortSignal }) => {
-      return new Promise((_resolve, reject) => {
-        options.signal?.addEventListener("abort", () => reject(new Error("aborted")), { once: true });
-      });
+    undiciRequestMock.mockImplementationOnce(async () => ({
+      statusCode: 200,
+      body: abortingBody(),
+    }));
+
+    const result = await service.requestWithMetrics<Buffer>("http://example.com/piece", {
+      httpVersion: "2",
+      signal: parentAbort.signal,
     });
 
-    vi.useFakeTimers();
+    expect(result.aborted).toBe(true);
+    expect(result.abortReason).toContain("timeout");
+    expect(result.metrics.statusCode).toBe(200);
+    expect(result.metrics.responseSize).toBe(11);
+    expect(Buffer.isBuffer(result.data) ? result.data.toString() : "").toBe("hello world");
+  });
+
+  it("rethrows non-abort download errors on HTTP/2", async () => {
+    const service = await createService();
 
-    const promise = service.requestWithMetrics("http://example.com", { httpVersion: "2" });
-    const assertion = expect(promise).rejects.toThrow("HTTP/2 connection/headers timed out after 25ms");
-    await vi.advanceTimersByTimeAsync(25);
+    async function* brokenBody() {
+      yield Buffer.from("partial");
+      throw new Error("network reset");
+    }
+
+    undiciRequestMock.mockImplementationOnce(async () => ({
+      statusCode: 200,
+      body: brokenBody(),
+    }));
 
-    await assertion;
+    await expect(service.requestWithMetrics<Buffer>("http://example.com/piece", { httpVersion: "2" })).rejects.toThrow(
+      "network reset",
+    );
   });
 });
diff --git a/apps/backend/src/http-client/http-client.service.ts b/apps/backend/src/http-client/http-client.service.ts
index 48e10e5c..81140162 100644
--- a/apps/backend/src/http-client/http-client.service.ts
+++ b/apps/backend/src/http-client/http-client.service.ts
@@ -81,12 +81,11 @@ export class HttpClientService {
       let ttfbTime = 0;
       let statusCode = 0;
 
-      /**
-       * Dual-timeout strategy for HTTP/2 requests:
-       * 1. AbortSignal.timeout() - Undici's native timeout (10 min default)
-       * 2. AbortSignal.timeout() for connection/headers (10 sec default)
-       */
-      const { signal, connectTimeoutSignal } = this.buildHttp2Signals(options.signal);
+      // Dual-timeout strategy for HTTP/2 requests:
+      // - `headersTimeout` (undici): scopes the connect + response-headers phase.
+      // - Combined AbortSignal: transfer-timeout ceiling + parent (job) signal.
+      const transferTimeoutSignal = AbortSignal.timeout(this.http2TimeoutMs);
+      const signal = options.signal ? anySignal([transferTimeoutSignal, options.signal]) : transferTimeoutSignal;
       const requestOptions: any = {
         method,
         headers: {
@@ -94,6 +93,7 @@ export class HttpClientService {
           ...headers,
         },
         signal,
+        headersTimeout: this.connectTimeoutMs,
       };
 
       if (data) {
@@ -105,7 +105,8 @@ export class HttpClientService {
       try {
         response = await undiciRequest(url, requestOptions);
       } catch (error) {
-        if (connectTimeoutSignal.aborted) {
+        // discern connection error from transfer error
+        if (isHeadersTimeoutError(error)) {
           throw new Error(`HTTP/2 connection/headers timed out after ${this.connectTimeoutMs}ms`);
         }
         throw error;
@@ -115,8 +116,15 @@ export class HttpClientService {
       statusCode = response.statusCode;
 
       const chunks: Buffer[] = [];
-      for await (const chunk of response.body) {
-        chunks.push(Buffer.from(chunk));
+      let downloadError: unknown;
+      try {
+        for await (const chunk of response.body) {
+          chunks.push(Buffer.from(chunk));
+        }
+      } catch (error) {
+        // Download-phase failures (e.g. abort signal) fall through so we can
+        // return the partial buffer + metrics collected so far.
+        downloadError = error;
       }
       const dataBuffer = Buffer.concat(chunks);
 
@@ -133,6 +141,29 @@ export class HttpClientService {
         httpVersion: "2",
       };
 
+      if (downloadError !== undefined) {
+        const aborted = options.signal?.aborted === true || isAbortLikeError(downloadError);
+        if (!aborted) {
+          throw downloadError;
+        }
+        const abortReason = describeAbortReason(options.signal, downloadError);
+        this.logger.warn({
+          event: "http2_download_aborted",
+          message: "HTTP/2 download aborted after headers; returning partial data",
+          url,
+          bytesReceived: dataBuffer.length,
+          totalTime: metrics.totalTime,
+          ttfb: metrics.ttfb,
+          abortReason,
+        });
+        return {
+          data: dataBuffer as T,
+          metrics,
+          aborted: true,
+          abortReason,
+        };
+      }
+
       return {
         data: dataBuffer as T,
         metrics,
@@ -255,24 +286,28 @@ export class HttpClientService {
     // Fallback for objects/arrays
     return Buffer.from(JSON.stringify(data));
   }
+}
 
-  private buildHttp2Signals(parentSignal?: AbortSignal): {
-    signal: AbortSignal;
-    connectTimeoutSignal: AbortSignal;
-  } {
-    const transferTimeoutSignal = AbortSignal.timeout(this.http2TimeoutMs);
-    const connectTimeoutSignal = AbortSignal.timeout(this.connectTimeoutMs);
+function isAbortLikeError(error: unknown): boolean {
+  if (error instanceof Error) {
+    return error.name === "AbortError" || error.name === "TimeoutError" || /abort/i.test(error.message);
+  }
+  return false;
+}
 
-    if (parentSignal) {
-      return {
-        signal: anySignal([transferTimeoutSignal, connectTimeoutSignal, parentSignal]),
-        connectTimeoutSignal,
-      };
-    }
+/**
+ * Determines if a given error represents a "Headers Timeout" error.
+ */
+function isHeadersTimeoutError(error: unknown): boolean {
+  if (!(error instanceof Error)) return false;
+  const code = (error as Error & { code?: string }).code;
+  return error.name === "HeadersTimeoutError" || code === "UND_ERR_HEADERS_TIMEOUT";
+}
 
-    return {
-      signal: anySignal([transferTimeoutSignal, connectTimeoutSignal]),
-      connectTimeoutSignal,
-    };
-  }
+function describeAbortReason(signal: AbortSignal | undefined, fallback: unknown): string {
+  const reason = signal?.reason;
+  if (reason instanceof Error && reason.message) return reason.message;
+  if (typeof reason === "string" && reason.length > 0) return reason;
+  if (fallback instanceof Error && fallback.message) return fallback.message;
+  return "aborted";
 }
diff --git a/apps/backend/src/http-client/types.ts b/apps/backend/src/http-client/types.ts
index 7e48ce7d..26892ee6 100644
--- a/apps/backend/src/http-client/types.ts
+++ b/apps/backend/src/http-client/types.ts
@@ -13,4 +13,6 @@ export interface RequestMetrics {
 export interface RequestWithMetrics<T> {
   data: T;
   metrics: RequestMetrics;
+  aborted?: boolean; // Set when the request was aborted mid-download after response headers arrived.
+  abortReason?: string; // Error message when `aborted` is true; human-readable summary of the abort reason.
 }
diff --git a/apps/backend/src/jobs/job-queues.ts b/apps/backend/src/jobs/job-queues.ts
index 9488ce7b..db475d49 100644
--- a/apps/backend/src/jobs/job-queues.ts
+++ b/apps/backend/src/jobs/job-queues.ts
@@ -7,3 +7,4 @@ export const LEGACY_DEAL_QUEUE = "deal.run";
 export const LEGACY_RETRIEVAL_QUEUE = "retrieval.run";
 export const DATA_RETENTION_POLL_QUEUE = "data.retention.poll";
 export const PROVIDERS_REFRESH_QUEUE = "providers.refresh";
+export const RETRIEVAL_ANON_QUEUE = "retrieval.anon.run";
diff --git a/apps/backend/src/jobs/jobs.module.ts b/apps/backend/src/jobs/jobs.module.ts
index 15ad4d64..69f1edb1 100644
--- a/apps/backend/src/jobs/jobs.module.ts
+++ b/apps/backend/src/jobs/jobs.module.ts
@@ -7,6 +7,7 @@ import { StorageProvider } from "../database/entities/storage-provider.entity.js
 import { DealModule } from "../deal/deal.module.js";
 import { PieceCleanupModule } from "../piece-cleanup/piece-cleanup.module.js";
 import { RetrievalModule } from "../retrieval/retrieval.module.js";
+import { RetrievalAnonModule } from "../retrieval-anon/retrieval-anon.module.js";
 import { WalletSdkModule } from "../wallet-sdk/wallet-sdk.module.js";
 import { JobsService } from "./jobs.service.js";
 import { JobScheduleRepository } from "./repositories/job-schedule.repository.js";
@@ -17,6 +18,7 @@ import { JobScheduleRepository } from "./repositories/job-schedule.repository.js
     TypeOrmModule.forFeature([StorageProvider, JobScheduleState]),
     DealModule,
     RetrievalModule,
+    RetrievalAnonModule,
     WalletSdkModule,
     DataRetentionModule,
     PieceCleanupModule,
diff --git a/apps/backend/src/jobs/jobs.service.spec.ts b/apps/backend/src/jobs/jobs.service.spec.ts
index d556f3d6..c20d0890 100644
--- a/apps/backend/src/jobs/jobs.service.spec.ts
+++ b/apps/backend/src/jobs/jobs.service.spec.ts
@@ -30,18 +30,18 @@ describe("JobsService schedule rows", () => {
   };
   let dataRetentionServiceMock: { pollDataRetention: ReturnType<typeof vi.fn> };
   let metricsMocks: {
-    jobsQueuedGauge: JobsServiceDeps[8];
-    jobsRetryScheduledGauge: JobsServiceDeps[9];
-    oldestQueuedAgeGauge: JobsServiceDeps[10];
-    oldestInFlightAgeGauge: JobsServiceDeps[11];
-    jobsInFlightGauge: JobsServiceDeps[12];
-    jobsEnqueueAttemptsCounter: JobsServiceDeps[13];
-    jobsStartedCounter: JobsServiceDeps[14];
-    jobsCompletedCounter: JobsServiceDeps[15];
-    jobsPausedGauge: JobsServiceDeps[16];
-    jobDuration: JobsServiceDeps[17];
-    storageProvidersActive: JobsServiceDeps[18];
-    storageProvidersTested: JobsServiceDeps[19];
+    jobsQueuedGauge: JobsServiceDeps[9];
+    jobsRetryScheduledGauge: JobsServiceDeps[10];
+    oldestQueuedAgeGauge: JobsServiceDeps[11];
+    oldestInFlightAgeGauge: JobsServiceDeps[12];
+    jobsInFlightGauge: JobsServiceDeps[13];
+    jobsEnqueueAttemptsCounter: JobsServiceDeps[14];
+    jobsStartedCounter: JobsServiceDeps[15];
+    jobsCompletedCounter: JobsServiceDeps[16];
+    jobsPausedGauge: JobsServiceDeps[17];
+    jobDuration: JobsServiceDeps[18];
+    storageProvidersActive: JobsServiceDeps[19];
+    storageProvidersTested: JobsServiceDeps[20];
   };
   let baseConfigValues: Partial<IConfig>;
   let configService: JobsServiceDeps[0];
@@ -52,21 +52,22 @@ describe("JobsService schedule rows", () => {
       jobScheduleRepository: JobsServiceDeps[2];
       dealService: JobsServiceDeps[3];
       retrievalService: JobsServiceDeps[4];
-      walletSdkService: JobsServiceDeps[5];
-      dataRetentionService: JobsServiceDeps[6];
-      pieceCleanupService: JobsServiceDeps[7];
-      jobsQueuedGauge: JobsServiceDeps[8];
-      jobsRetryScheduledGauge: JobsServiceDeps[9];
-      oldestQueuedAgeGauge: JobsServiceDeps[10];
-      oldestInFlightAgeGauge: JobsServiceDeps[11];
-      jobsInFlightGauge: JobsServiceDeps[12];
-      jobsEnqueueAttemptsCounter: JobsServiceDeps[13];
-      jobsStartedCounter: JobsServiceDeps[14];
-      jobsCompletedCounter: JobsServiceDeps[15];
-      jobsPausedGauge: JobsServiceDeps[16];
-      jobDuration: JobsServiceDeps[17];
-      storageProvidersActive: JobsServiceDeps[18];
-      storageProvidersTested: JobsServiceDeps[19];
+      anonRetrievalService: JobsServiceDeps[5];
+      walletSdkService: JobsServiceDeps[6];
+      dataRetentionService: JobsServiceDeps[7];
+      pieceCleanupService: JobsServiceDeps[8];
+      jobsQueuedGauge: JobsServiceDeps[9];
+      jobsRetryScheduledGauge: JobsServiceDeps[10];
+      oldestQueuedAgeGauge: JobsServiceDeps[11];
+      oldestInFlightAgeGauge: JobsServiceDeps[12];
+      jobsInFlightGauge: JobsServiceDeps[13];
+      jobsEnqueueAttemptsCounter: JobsServiceDeps[14];
+      jobsStartedCounter: JobsServiceDeps[15];
+      jobsCompletedCounter: JobsServiceDeps[16];
+      jobsPausedGauge: JobsServiceDeps[17];
+      jobDuration: JobsServiceDeps[18];
+      storageProvidersActive: JobsServiceDeps[19];
+      storageProvidersTested: JobsServiceDeps[20];
     }>,
   ) => JobsService;
 
@@ -96,18 +97,18 @@ describe("JobsService schedule rows", () => {
     };
 
     metricsMocks = {
-      jobsQueuedGauge: { set: vi.fn() } as unknown as JobsServiceDeps[8],
-      jobsRetryScheduledGauge: { set: vi.fn() } as unknown as JobsServiceDeps[9],
-      oldestQueuedAgeGauge: { set: vi.fn() } as unknown as JobsServiceDeps[10],
-      oldestInFlightAgeGauge: { set: vi.fn() } as unknown as JobsServiceDeps[11],
-      jobsInFlightGauge: { set: vi.fn() } as unknown as JobsServiceDeps[12],
-      jobsEnqueueAttemptsCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[13],
-      jobsStartedCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[14],
-      jobsCompletedCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[15],
-      jobsPausedGauge: { set: vi.fn() } as unknown as JobsServiceDeps[16],
-      jobDuration: { observe: vi.fn() } as unknown as JobsServiceDeps[17],
-      storageProvidersActive: { set: vi.fn() } as unknown as JobsServiceDeps[18],
-      storageProvidersTested: { set: vi.fn() } as unknown as JobsServiceDeps[19],
+      jobsQueuedGauge: { set: vi.fn() } as unknown as JobsServiceDeps[9],
+      jobsRetryScheduledGauge: { set: vi.fn() } as unknown as JobsServiceDeps[10],
+      oldestQueuedAgeGauge: { set: vi.fn() } as unknown as JobsServiceDeps[11],
+      oldestInFlightAgeGauge: { set: vi.fn() } as unknown as JobsServiceDeps[12],
+      jobsInFlightGauge: { set: vi.fn() } as unknown as JobsServiceDeps[13],
+      jobsEnqueueAttemptsCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[14],
+      jobsStartedCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[15],
+      jobsCompletedCounter: { inc: vi.fn() } as unknown as JobsServiceDeps[16],
+      jobsPausedGauge: { set: vi.fn() } as unknown as JobsServiceDeps[17],
+      jobDuration: { observe: vi.fn() } as unknown as JobsServiceDeps[18],
+      storageProvidersActive: { set: vi.fn() } as unknown as JobsServiceDeps[19],
+      storageProvidersTested: { set: vi.fn() } as unknown as JobsServiceDeps[20],
     };
 
     const emptySpBlocklists: ISpBlocklistConfig = {
@@ -133,6 +134,7 @@ describe("JobsService schedule rows", () => {
         dataSetCreationJobTimeoutSeconds: 300,
         pieceCleanupPerSpPerHour: 1,
         maxPieceCleanupRuntimeSeconds: 300,
+        retrievalsAnonPerSpPerHour: 2,
       } as IConfig["jobs"],
       database: {
         host: "localhost",
@@ -158,9 +160,10 @@ describe("JobsService schedule rows", () => {
         overrides.jobScheduleRepository ?? (jobScheduleRepositoryMock as unknown as JobsServiceDeps[2]),
         overrides.dealService ?? ({} as JobsServiceDeps[3]),
         overrides.retrievalService ?? ({} as JobsServiceDeps[4]),
-        overrides.walletSdkService ?? ({} as JobsServiceDeps[5]),
-        overrides.dataRetentionService ?? (dataRetentionServiceMock as unknown as JobsServiceDeps[6]),
-        overrides.pieceCleanupService ?? ({} as JobsServiceDeps[7]),
+        overrides.anonRetrievalService ?? ({} as JobsServiceDeps[5]),
+        overrides.walletSdkService ?? ({} as JobsServiceDeps[6]),
+        overrides.dataRetentionService ?? (dataRetentionServiceMock as unknown as JobsServiceDeps[7]),
+        overrides.pieceCleanupService ?? ({} as JobsServiceDeps[8]),
         overrides.jobsQueuedGauge ?? metricsMocks.jobsQueuedGauge,
         overrides.jobsRetryScheduledGauge ?? metricsMocks.jobsRetryScheduledGauge,
         overrides.oldestQueuedAgeGauge ?? metricsMocks.oldestQueuedAgeGauge,
@@ -284,7 +287,7 @@ describe("JobsService schedule rows", () => {
     service = buildService({
       configService,
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
     });
 
     // Trigger the timeout immediately by using fake timers
@@ -343,7 +346,7 @@ describe("JobsService schedule rows", () => {
     service = buildService({
       configService,
       retrievalService: retrievalService as unknown as ConstructorParameters<typeof JobsService>[4],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
     });
 
     vi.useFakeTimers();
@@ -382,7 +385,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       retrievalService: retrievalService as unknown as ConstructorParameters<typeof JobsService>[4],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
     });
 
     await callPrivate(service, "handleRetrievalJob", {
@@ -422,7 +425,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       retrievalService: retrievalService as unknown as ConstructorParameters<typeof JobsService>[4],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
     });
 
     await expect(
@@ -615,12 +618,13 @@ describe("JobsService schedule rows", () => {
     // Check upserts for providerB
     const upsertCalls = jobScheduleRepositoryMock.upsertSchedule.mock.calls;
     const upsertsForB = upsertCalls.filter((call) => call[1] === providerB.address);
-    expect(upsertsForB).toHaveLength(4);
+    expect(upsertsForB).toHaveLength(5);
     expect(upsertsForB.map((call) => call[0]).sort()).toEqual([
       "data_set_creation",
       "deal",
       "piece_cleanup",
       "retrieval",
+      "retrieval_anon",
     ]);
   });
 
@@ -924,7 +928,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
     });
 
     await callPrivate(service, "handleDealJob", {
@@ -963,8 +967,8 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
-      pieceCleanupService: pieceCleanupService as unknown as JobsServiceDeps[7],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      pieceCleanupService: pieceCleanupService as unknown as JobsServiceDeps[8],
     });
 
     await callPrivate(service, "handleDealJob", {
@@ -976,7 +980,7 @@ describe("JobsService schedule rows", () => {
     expect(dealService.createDealForProvider).toHaveBeenCalledTimes(1);
   });
 
-  it("deal job maps DealJobTerminatedDataSetError to handler_result=error", async () => {
+  it("data storage job does not run data-storage check when data-set selection aborts", async () => {
     const completedCounter = metricsMocks.jobsCompletedCounter as unknown as { inc: ReturnType<typeof vi.fn> };
     vi.useFakeTimers();
     vi.setSystemTime(new Date("2024-01-01T12:00:00Z"));
@@ -996,7 +1000,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
     });
 
     await callPrivate(service, "handleDealJob", {
@@ -1025,7 +1029,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
     });
 
     await callPrivate(service, "handleDataSetCreationJob", {
@@ -1067,7 +1071,7 @@ describe("JobsService schedule rows", () => {
     service = buildService({
       configService,
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
     });
 
     await callPrivate(service, "handleDataSetCreationJob", {
@@ -1108,7 +1112,7 @@ describe("JobsService schedule rows", () => {
     service = buildService({
       configService,
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
     });
 
     await callPrivate(service, "handleDataSetCreationJob", {
@@ -1153,7 +1157,7 @@ describe("JobsService schedule rows", () => {
     service = buildService({
       configService,
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
     });
 
     await callPrivate(service, "handleDataSetCreationJob", {
@@ -1326,7 +1330,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as JobsServiceDeps[3],
-      walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
+      walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
     });
 
     await callPrivate(service, "handleDealJob", {
@@ -1350,7 +1354,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       retrievalService: retrievalService as unknown as JobsServiceDeps[4],
-      walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
+      walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
     });
 
     await callPrivate(service, "handleRetrievalJob", {
@@ -1379,7 +1383,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as JobsServiceDeps[3],
-      walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
+      walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
     });
 
     await callPrivate(service, "handleDataSetCreationJob", {
@@ -1421,7 +1425,7 @@ describe("JobsService schedule rows", () => {
         intervalSeconds: 60,
         service: buildService({
           dealService: dealService as unknown as JobsServiceDeps[3],
-          walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
+          walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
         }),
         expectCheckNotRun: () => expect(dealService.createDealForProvider).not.toHaveBeenCalled(),
       },
@@ -1431,7 +1435,7 @@ describe("JobsService schedule rows", () => {
         intervalSeconds: 60,
         service: buildService({
           retrievalService: retrievalService as unknown as JobsServiceDeps[4],
-          walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
+          walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
         }),
         expectCheckNotRun: () => expect(retrievalService.performRandomRetrievalForProvider).not.toHaveBeenCalled(),
       },
@@ -1441,7 +1445,7 @@ describe("JobsService schedule rows", () => {
         intervalSeconds: 3600,
         service: buildService({
           dealService: dataSetDealService as unknown as JobsServiceDeps[3],
-          walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
+          walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
         }),
         expectCheckNotRun: () => expect(dataSetDealService.createDataSetWithPiece).not.toHaveBeenCalled(),
       },
diff --git a/apps/backend/src/jobs/jobs.service.ts b/apps/backend/src/jobs/jobs.service.ts
index f8fe1d80..b070de5a 100644
--- a/apps/backend/src/jobs/jobs.service.ts
+++ b/apps/backend/src/jobs/jobs.service.ts
@@ -16,18 +16,32 @@ import { StorageProvider } from "../database/entities/storage-provider.entity.js
 import { DealService } from "../deal/deal.service.js";
 import { PieceCleanupService } from "../piece-cleanup/piece-cleanup.service.js";
 import { RetrievalService } from "../retrieval/retrieval.service.js";
+import { AnonRetrievalService } from "../retrieval-anon/anon-retrieval.service.js";
 import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
 import { provisionNextMissingDataSet } from "./data-set-creation.handler.js";
-import { DATA_RETENTION_POLL_QUEUE, PROVIDERS_REFRESH_QUEUE, SP_WORK_QUEUE } from "./job-queues.js";
+import {
+  DATA_RETENTION_POLL_QUEUE,
+  PROVIDERS_REFRESH_QUEUE,
+  RETRIEVAL_ANON_QUEUE,
+  SP_WORK_QUEUE,
+} from "./job-queues.js";
 import { JobScheduleRepository } from "./repositories/job-schedule.repository.js";
 
-type SpJobType = "deal" | "retrieval" | "data_set_creation" | "piece_cleanup";
-const SP_JOB_TYPES: ReadonlySet<string> = new Set<string>(["deal", "retrieval", "data_set_creation", "piece_cleanup"]);
+type SpJobType = "deal" | "retrieval" | "data_set_creation" | "retrieval_anon" | "piece_cleanup";
+const SP_JOB_TYPES: ReadonlySet<string> = new Set<string>([
+  "deal",
+  "retrieval",
+  "retrieval_anon",
+  "data_set_creation",
+  "piece_cleanup",
+]);
+
 function isSpJobType(jobType: string): jobType is SpJobType {
   return SP_JOB_TYPES.has(jobType);
 }
 
 type SpJobData = { jobType: SpJobType; spAddress: string; intervalSeconds: number };
+type AnonRetrievalJobData = { spAddress: string; intervalSeconds: number };
 type ProvidersRefreshJobData = { intervalSeconds: number };
 type SpJob = Job<SpJobData>;
 type DataRetentionJobData = { intervalSeconds: number };
@@ -58,6 +72,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     private readonly jobScheduleRepository: JobScheduleRepository,
     private readonly dealService: DealService,
     private readonly retrievalService: RetrievalService,
+    private readonly anonRetrievalService: AnonRetrievalService,
     private readonly walletSdkService: WalletSdkService,
     private readonly dataRetentionService: DataRetentionService,
     private readonly pieceCleanupService: PieceCleanupService,
@@ -258,6 +273,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     await boss.createQueue(SP_WORK_QUEUE, { policy: "singleton" });
     await boss.createQueue(PROVIDERS_REFRESH_QUEUE);
     await boss.createQueue(DATA_RETENTION_POLL_QUEUE);
+    await boss.createQueue(RETRIEVAL_ANON_QUEUE);
   }
 
   private registerWorkers(): void {
@@ -335,6 +351,23 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
           error: toStructuredError(error),
         }),
       );
+    void this.boss
+      .work<AnonRetrievalJobData, void>(
+        RETRIEVAL_ANON_QUEUE,
+        { batchSize: 1, localConcurrency: spConcurrency, pollingIntervalSeconds: workerPollSeconds },
+        async ([job]) => {
+          if (!job) return;
+          await this.handleAnonRetrievalJob(job);
+        },
+      )
+      .catch((error) =>
+        this.logger.error({
+          event: "worker_register_failed",
+          message: "Failed to register worker",
+          queue: RETRIEVAL_ANON_QUEUE,
+          error: toStructuredError(error),
+        }),
+      );
   }
 
   private getMaintenanceWindowStatus(now: Date = new Date()) {
@@ -587,6 +620,51 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     });
   }
 
+  private async handleAnonRetrievalJob(job: Job<AnonRetrievalJobData>): Promise<void> {
+    const data = job.data;
+    const spAddress = data.spAddress;
+
+    // Create AbortController for job timeout enforcement
+    const abortController = new AbortController();
+    const timeoutSeconds = this.configService.get("jobs").anonRetrievalJobTimeoutSeconds;
+    const timeoutMs = Math.max(60000, timeoutSeconds * 1000);
+    const effectiveTimeoutSeconds = Math.round(timeoutMs / 1000);
+    const abortReason = new Error(`Anon retrieval job timeout (${effectiveTimeoutSeconds}s) for ${spAddress}`);
+    const timeoutId = setTimeout(() => {
+      abortController.abort(abortReason);
+    }, timeoutMs);
+
+    await this.recordJobExecution("retrieval_anon", async () => {
+      const logContext = await this.resolveProviderJobContext(spAddress, job.id);
+      try {
+        await this.anonRetrievalService.performForProvider(spAddress, abortController.signal, logContext);
+        return "success";
+      } catch (error) {
+        if (abortController.signal.aborted) {
+          const reason = abortController.signal.reason;
+          const reasonMessage = reason instanceof Error ? reason.message : String(reason ?? "");
+          this.logger.error({
+            ...logContext,
+            event: "anon_retrieval_job_aborted",
+            message: reasonMessage || "Anon retrieval job aborted after timeout",
+            timeoutSeconds: effectiveTimeoutSeconds,
+            error: toStructuredError(reason ?? error),
+          });
+          return "aborted";
+        }
+        this.logger.error({
+          ...logContext,
+          event: "anon_retrieval_job_failed",
+          message: "Anon retrieval job failed",
+          error: toStructuredError(error),
+        });
+        throw error;
+      } finally {
+        clearTimeout(timeoutId);
+      }
+    });
+  }
+
   private async handleDataRetentionJob(data: DataRetentionJobData): Promise<void> {
     void data;
     await this.recordJobExecution("data_retention_poll", async () => {
@@ -865,6 +943,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
   private getIntervalSecondsForRates(): {
     dealIntervalSeconds: number;
     retrievalIntervalSeconds: number;
+    retrievalAnonIntervalSeconds: number;
     dataSetCreationIntervalSeconds: number;
     dataRetentionPollIntervalSeconds: number;
     providersRefreshIntervalSeconds: number;
@@ -885,9 +964,13 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     const dataRetentionPollIntervalSeconds = scheduling.dataRetentionPollIntervalSeconds;
     const providersRefreshIntervalSeconds = scheduling.providersRefreshIntervalSeconds;
 
+    const retrievalsAnonPerHour = jobsConfig.retrievalsAnonPerSpPerHour;
+    const retrievalAnonIntervalSeconds = Math.max(1, Math.round(3600 / retrievalsAnonPerHour));
+
     return {
       dealIntervalSeconds,
       retrievalIntervalSeconds,
+      retrievalAnonIntervalSeconds,
       dataSetCreationIntervalSeconds,
       dataRetentionPollIntervalSeconds,
       providersRefreshIntervalSeconds,
@@ -907,6 +990,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     const {
       dealIntervalSeconds,
       retrievalIntervalSeconds,
+      retrievalAnonIntervalSeconds,
       dataSetCreationIntervalSeconds,
       dataRetentionPollIntervalSeconds,
       providersRefreshIntervalSeconds,
@@ -924,6 +1008,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     const phaseMs = this.schedulePhaseSeconds() * 1000;
     const dealStartAt = new Date(now.getTime() + phaseMs);
     const retrievalStartAt = new Date(now.getTime() + phaseMs);
+    const retrievalAnonStartAt = new Date(now.getTime() + phaseMs);
     const dataSetCreationStartAt = new Date(now.getTime() + phaseMs);
     const dataRetentionPollStartAt = new Date(now.getTime() + phaseMs);
     const providersRefreshStartAt = new Date(now.getTime() + phaseMs);
@@ -947,6 +1032,12 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     for (const address of unblockedAddresses) {
       await this.jobScheduleRepository.upsertSchedule("deal", address, dealIntervalSeconds, dealStartAt);
       await this.jobScheduleRepository.upsertSchedule("retrieval", address, retrievalIntervalSeconds, retrievalStartAt);
+      await this.jobScheduleRepository.upsertSchedule(
+        "retrieval_anon",
+        address,
+        retrievalAnonIntervalSeconds,
+        retrievalAnonStartAt,
+      );
       if (minDataSets >= 1) {
         await this.jobScheduleRepository.upsertSchedule(
           "data_set_creation",
@@ -1104,6 +1195,8 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
         return SP_WORK_QUEUE;
       case "piece_cleanup":
         return SP_WORK_QUEUE;
+      case "retrieval_anon":
+        return RETRIEVAL_ANON_QUEUE;
       case "data_retention_poll":
         return DATA_RETENTION_POLL_QUEUE;
       case "providers_refresh":
@@ -1123,6 +1216,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     if (
       row.job_type === "deal" ||
       row.job_type === "retrieval" ||
+      row.job_type === "retrieval_anon" ||
       row.job_type === "data_set_creation" ||
       row.job_type === "piece_cleanup"
     ) {
@@ -1195,6 +1289,7 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     const jobTypes: JobType[] = [
       "deal",
       "retrieval",
+      "retrieval_anon",
       "data_set_creation",
       "piece_cleanup",
       "data_retention_poll",
diff --git a/apps/backend/src/metrics-prometheus/check-metric-labels.ts b/apps/backend/src/metrics-prometheus/check-metric-labels.ts
index d8447160..9d776586 100644
--- a/apps/backend/src/metrics-prometheus/check-metric-labels.ts
+++ b/apps/backend/src/metrics-prometheus/check-metric-labels.ts
@@ -1,4 +1,4 @@
-export type CheckType = "dataStorage" | "retrieval" | "dataRetention" | "dataSetCreation";
+export type CheckType = "dataStorage" | "retrieval" | "anon_retrieval" | "dataRetention" | "dataSetCreation";
 export type ProviderStatus = "approved" | "unapproved";
 
 export type CheckMetricLabels = {
diff --git a/apps/backend/src/metrics-prometheus/check-metrics.service.ts b/apps/backend/src/metrics-prometheus/check-metrics.service.ts
index 55975cad..85f1cdcf 100644
--- a/apps/backend/src/metrics-prometheus/check-metrics.service.ts
+++ b/apps/backend/src/metrics-prometheus/check-metrics.service.ts
@@ -248,3 +248,66 @@ export class DataSetCreationCheckMetrics {
     this.dataSetCreationStatusCounter.inc({ ...labels, value });
   }
 }
+
+@Injectable()
+export class AnonRetrievalCheckMetrics {
+  constructor(
+    @InjectMetric("anonPieceRetrievalFirstByteMs")
+    private readonly firstByteMs: Histogram,
+    @InjectMetric("anonPieceRetrievalLastByteMs")
+    private readonly lastByteMs: Histogram,
+    @InjectMetric("anonPieceRetrievalThroughputBps")
+    private readonly throughputBps: Histogram,
+    @InjectMetric("anonRetrievalCheckMs")
+    private readonly checkMs: Histogram,
+    @InjectMetric("anonRetrievalStatus")
+    private readonly statusCounter: Counter,
+    @InjectMetric("anonPieceHttpResponseCode")
+    private readonly httpResponseCounter: Counter,
+    @InjectMetric("anonCarParseStatus")
+    private readonly carParseCounter: Counter,
+    @InjectMetric("anonIpniStatus")
+    private readonly ipniCounter: Counter,
+    @InjectMetric("anonBlockFetchStatus")
+    private readonly blockFetchCounter: Counter,
+  ) {}
+
+  observeFirstByteMs(labels: CheckMetricLabels, value: number | null | undefined): void {
+    observePositive(this.firstByteMs, labels, value);
+  }
+
+  observeLastByteMs(labels: CheckMetricLabels, value: number | null | undefined): void {
+    observePositive(this.lastByteMs, labels, value);
+  }
+
+  observeThroughput(labels: CheckMetricLabels, value: number | null | undefined): void {
+    observePositive(this.throughputBps, labels, value);
+  }
+
+  observeCheckDuration(labels: CheckMetricLabels, value: number | null | undefined): void {
+    observePositive(this.checkMs, labels, value);
+  }
+
+  recordStatus(labels: CheckMetricLabels, value: string): void {
+    this.statusCounter.inc({ ...labels, value });
+  }
+
+  recordHttpResponseCode(labels: CheckMetricLabels, statusCode: number): void {
+    this.httpResponseCounter.inc({
+      ...labels,
+      value: classifyHttpResponseCode(statusCode),
+    });
+  }
+
+  recordCarParseStatus(labels: CheckMetricLabels, parseable: boolean): void {
+    this.carParseCounter.inc({ ...labels, value: parseable ? "parseable" : "not_parseable" });
+  }
+
+  recordIpniStatus(labels: CheckMetricLabels, value: "valid" | "invalid" | "skipped"): void {
+    this.ipniCounter.inc({ ...labels, value });
+  }
+
+  recordBlockFetchStatus(labels: CheckMetricLabels, value: "valid" | "invalid" | "skipped"): void {
+    this.blockFetchCounter.inc({ ...labels, value });
+  }
+}
diff --git a/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts b/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts
index 18bda30d..45f728b6 100644
--- a/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts
+++ b/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts
@@ -8,6 +8,7 @@ import {
 } from "@willsoto/nestjs-prometheus";
 import { WalletSdkModule } from "../wallet-sdk/wallet-sdk.module.js";
 import {
+  AnonRetrievalCheckMetrics,
   DataSetCreationCheckMetrics,
   DataStorageCheckMetrics,
   DiscoverabilityCheckMetrics,
@@ -207,6 +208,56 @@ const metricProviders = [
     help: "Estimated number of unrecorded overdue proving periods per provider. Resets to 0 when the subgraph catches up.",
     labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const,
   }),
+  // Anonymous Retrieval Metrics
+  makeHistogramProvider({
+    name: "anonPieceRetrievalFirstByteMs",
+    help: "Time to first byte for anonymous piece retrievals via /piece/{cid} (ms)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const,
+    buckets: [1, 5, 10, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 30000],
+  }),
+  makeHistogramProvider({
+    name: "anonPieceRetrievalLastByteMs",
+    help: "Total time to retrieve an anonymous piece via /piece/{cid} (ms)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const,
+    buckets: [1, 5, 10, 50, 100, 250, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000],
+  }),
+  makeHistogramProvider({
+    name: "anonPieceRetrievalThroughputBps",
+    help: "Throughput for anonymous piece retrievals (bytes/s)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const,
+    buckets: throughputBuckets,
+  }),
+  makeHistogramProvider({
+    name: "anonRetrievalCheckMs",
+    help: "End-to-end anonymous retrieval check duration (ms)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus"] as const,
+    buckets: [100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000, 600000],
+  }),
+  makeCounterProvider({
+    name: "anonRetrievalStatus",
+    help: "Anonymous retrieval overall outcome",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
+  }),
+  makeCounterProvider({
+    name: "anonPieceHttpResponseCode",
+    help: "HTTP response codes for anonymous piece retrieval requests",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
+  }),
+  makeCounterProvider({
+    name: "anonCarParseStatus",
+    help: "Anonymous retrieval CAR parse outcomes (parseable / not_parseable)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
+  }),
+  makeCounterProvider({
+    name: "anonIpniStatus",
+    help: "Anonymous retrieval IPNI check outcomes (valid / invalid / skipped)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
+  }),
+  makeCounterProvider({
+    name: "anonBlockFetchStatus",
+    help: "Anonymous retrieval block fetch validation outcomes (valid / invalid / skipped)",
+    labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
+  }),
   // Storage provider metrics: absolute counts, independent of query filters.
   makeGaugeProvider({
     name: "storage_providers_active",
@@ -333,6 +384,7 @@ const metricProviders = [
     RetrievalCheckMetrics,
     DiscoverabilityCheckMetrics,
     DataSetCreationCheckMetrics,
+    AnonRetrievalCheckMetrics,
     WalletBalanceCollector,
     // HTTP metrics interceptor
     {
@@ -347,6 +399,7 @@ const metricProviders = [
     RetrievalCheckMetrics,
     DiscoverabilityCheckMetrics,
     DataSetCreationCheckMetrics,
+    AnonRetrievalCheckMetrics,
     WalletBalanceCollector,
   ],
 })
diff --git a/apps/backend/src/pdp-subgraph/pdp-subgraph.module.ts b/apps/backend/src/pdp-subgraph/pdp-subgraph.module.ts
deleted file mode 100644
index 6e084fc1..00000000
--- a/apps/backend/src/pdp-subgraph/pdp-subgraph.module.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-import { Module } from "@nestjs/common";
-import { PDPSubgraphService } from "./pdp-subgraph.service.js";
-
-@Module({
-  providers: [PDPSubgraphService],
-  exports: [PDPSubgraphService],
-})
-export class PdpSubgraphModule {}
diff --git a/apps/backend/src/pdp-subgraph/queries.ts b/apps/backend/src/pdp-subgraph/queries.ts
deleted file mode 100644
index a21a3991..00000000
--- a/apps/backend/src/pdp-subgraph/queries.ts
+++ /dev/null
@@ -1,24 +0,0 @@
-export const Queries = {
-  GET_PROVIDERS_WITH_DATASETS: `
-      query GetProvidersWithDataSet($addresses: [Bytes!], $blockNumber: BigInt!) {
-        providers(where: {address_in: $addresses}) {
-          address
-          totalFaultedPeriods
-          totalProvingPeriods
-          proofSets (where: {nextDeadline_lt: $blockNumber, status: PROVING}) {
-            nextDeadline
-            maxProvingPeriod
-          }
-        }
-      }
-    `,
-  GET_SUBGRAPH_META: `
-    query GetSubgraphMeta {
-      _meta {
-        block {
-          number
-        }
-      }
-    }
-  `,
-} as const;
diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
new file mode 100644
index 00000000..b822fe5f
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
@@ -0,0 +1,168 @@
+import type { ConfigService } from "@nestjs/config";
+import type { Repository } from "typeorm";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import type { IConfig } from "../config/app.config.js";
+import type { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
+import type { SampleAnonPieceParams, SubgraphService } from "../subgraph/subgraph.service.js";
+import type { AnonCandidatePiece } from "../subgraph/types.js";
+import { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
+
+const SP_ADDRESS = "0xAaAaAAaAaaaAaAAAAaaaaAAaaAaaaAAaaaaa1111";
+const DEALBOT_PAYER = "0xBbBBBbBBbbbBbBBBBBbbbbbBBbbBbbbBBbbbb2222";
+
+const makePiece = (overrides: Partial<AnonCandidatePiece> = {}): AnonCandidatePiece => ({
+  pieceCid: `baga6ea4seaqpiece${Math.random().toString(36).slice(2, 10)}`,
+  pieceId: "1",
+  dataSetId: "42",
+  rawSize: "1048576",
+  withIPFSIndexing: true,
+  ipfsRootCid: "bafyroot",
+  indexedAtBlock: 12345,
+  pdpPaymentEndEpoch: null,
+  ...overrides,
+});
+
+const makeRetrievalRepository = (recentPieceCids: string[]): Repository<AnonRetrieval> => {
+  const queryBuilder = {
+    select: vi.fn().mockReturnThis(),
+    orderBy: vi.fn().mockReturnThis(),
+    limit: vi.fn().mockReturnThis(),
+    getRawMany: vi.fn().mockResolvedValue(recentPieceCids.map((c) => ({ pieceCid: c }))),
+  };
+  return {
+    createQueryBuilder: vi.fn().mockReturnValue(queryBuilder),
+  } as unknown as Repository<AnonRetrieval>;
+};
+
+const makeConfigService = (): ConfigService<IConfig, true> =>
+  ({
+    get: vi.fn((key: string) => {
+      if (key === "blockchain") {
+        return { walletAddress: DEALBOT_PAYER };
+      }
+      return undefined;
+    }),
+  }) as unknown as ConfigService<IConfig, true>;
+
+describe("AnonPieceSelectorService", () => {
+  let subgraphService: SubgraphService;
+  let sampleAnonPiece: ReturnType<typeof vi.fn>;
+
+  beforeEach(() => {
+    sampleAnonPiece = vi.fn();
+    subgraphService = { sampleAnonPiece } as unknown as SubgraphService;
+  });
+
+  it("returns null when every fallback attempt yields no piece", async () => {
+    sampleAnonPiece.mockResolvedValue(null);
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result).toBeNull();
+    expect(sampleAnonPiece).toHaveBeenCalled();
+  });
+
+  it("returns the sampled piece with SP address lowercased", async () => {
+    sampleAnonPiece.mockResolvedValueOnce(makePiece({ pieceCid: "baga-the-one" }));
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result).not.toBeNull();
+    expect(result?.pieceCid).toBe("baga-the-one");
+    expect(result?.serviceProvider).toBe(SP_ADDRESS.toLowerCase());
+  });
+
+  it("passes the dealbot payer address to sampleAnonPiece for exclusion", async () => {
+    sampleAnonPiece.mockResolvedValueOnce(makePiece());
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+
+    await service.selectPieceForProvider(SP_ADDRESS);
+
+    const call = sampleAnonPiece.mock.calls[0][0] as SampleAnonPieceParams;
+    expect(call.payer).toBe(DEALBOT_PAYER);
+    expect(call.serviceProvider).toBe(SP_ADDRESS);
+  });
+
+  it("redraws when the first sampled piece's payment has already terminated", async () => {
+    const staleCid = "baga-terminated";
+    const freshCid = "baga-live";
+    sampleAnonPiece
+      .mockResolvedValueOnce(makePiece({ pieceCid: staleCid, pdpPaymentEndEpoch: 100n, indexedAtBlock: 200 }))
+      .mockResolvedValueOnce(makePiece({ pieceCid: freshCid, pdpPaymentEndEpoch: null }));
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result?.pieceCid).toBe(freshCid);
+  });
+
+  it("redraws when the first sampled piece was recently tested", async () => {
+    const staleCid = "baga-stale";
+    const freshCid = "baga-fresh";
+    sampleAnonPiece
+      .mockResolvedValueOnce(makePiece({ pieceCid: staleCid }))
+      .mockResolvedValueOnce(makePiece({ pieceCid: freshCid }));
+
+    const service = new AnonPieceSelectorService(
+      subgraphService,
+      makeConfigService(),
+      makeRetrievalRepository([staleCid]),
+    );
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result?.pieceCid).toBe(freshCid);
+  });
+
+  it("falls back to the opposite pool when the preferred one is empty", async () => {
+    // First pool call returns nothing twice (both attempts), second pool succeeds.
+    const fresh = makePiece({ pieceCid: "baga-other-pool" });
+    sampleAnonPiece.mockResolvedValueOnce(null).mockResolvedValueOnce(null).mockResolvedValueOnce(fresh);
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result?.pieceCid).toBe("baga-other-pool");
+
+    // The second (fallback) call should target the opposite pool.
+    const firstCall = sampleAnonPiece.mock.calls[0][0] as SampleAnonPieceParams;
+    const fallbackCall = sampleAnonPiece.mock.calls[2][0] as SampleAnonPieceParams;
+    expect(fallbackCall.pool).not.toBe(firstCall.pool);
+  });
+
+  it("widens size bucket to 'any' after both pools fail in the primary bucket", async () => {
+    // 4 empty attempts across (bucket × both pools × 2 draws each) then
+    // succeed on the first `any` bucket call.
+    sampleAnonPiece
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(null)
+      .mockResolvedValueOnce(makePiece({ pieceCid: "baga-any-bucket" }));
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result?.pieceCid).toBe("baga-any-bucket");
+
+    // The 5th call (index 4) should be the widened-bucket attempt; its size
+    // range covers at least the 32 GiB ceiling of the "large" bucket.
+    const widened = sampleAnonPiece.mock.calls[4][0] as SampleAnonPieceParams;
+    expect(BigInt(widened.maxSize)).toBeGreaterThanOrEqual(32n * 1024n * 1024n * 1024n);
+    expect(widened.minSize).toBe("0");
+  });
+
+  it("draws a fresh sampleKey for each subgraph call", async () => {
+    sampleAnonPiece.mockResolvedValueOnce(null).mockResolvedValueOnce(makePiece());
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    await service.selectPieceForProvider(SP_ADDRESS);
+
+    const call1 = sampleAnonPiece.mock.calls[0][0] as SampleAnonPieceParams;
+    const call2 = sampleAnonPiece.mock.calls[1][0] as SampleAnonPieceParams;
+    expect(call1.sampleKey).toMatch(/^0x[0-9a-f]{64}$/);
+    expect(call2.sampleKey).toMatch(/^0x[0-9a-f]{64}$/);
+    expect(call1.sampleKey).not.toBe(call2.sampleKey);
+  });
+});
diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
new file mode 100644
index 00000000..acc19832
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
@@ -0,0 +1,208 @@
+import { randomBytes } from "node:crypto";
+import { Injectable, Logger } from "@nestjs/common";
+import { ConfigService } from "@nestjs/config";
+import { InjectRepository } from "@nestjs/typeorm";
+import type { Repository } from "typeorm";
+import type { IConfig } from "../config/app.config.js";
+import { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
+import type { AnonPiecePool, SampleAnonPieceParams } from "../subgraph/subgraph.service.js";
+import { SubgraphService } from "../subgraph/subgraph.service.js";
+import type { AnonCandidatePiece } from "../subgraph/types.js";
+import type { AnonPiece } from "./types.js";
+
+/**
+ * Number of most-recently-tested anonymous pieces to exclude from selection
+ * to avoid immediately retesting the same piece. Piece CIDs are globally
+ * unique and each one lives on a single SP's dataset, so scoping by CID
+ * is equivalent to scoping by (SP, CID) for this workload.
+ */
+const RECENT_DEDUP_WINDOW = 500;
+
+/**
+ * Piece size buckets, in raw (unpadded) bytes. Weighted sampling across
+ * these buckets keeps tests meaningful for bandwidth measurement without
+ * locking out SPs whose corpus skews small or large.
+ */
+type SizeBucket = "small" | "medium" | "large";
+type SizeRange = { min: bigint; max: bigint };
+
+const MIB = 1024n * 1024n;
+
+// All downloads are buffered in-memory, so we need to keep piece sizes reasonable
+const SIZE_BUCKETS: Record<SizeBucket, SizeRange> = {
+  small: { min: 1n * MIB, max: 20n * MIB - 1n },
+  medium: { min: 20n * MIB, max: 100n * MIB - 1n },
+  large: { min: 100n * MIB, max: 500n * MIB - 1n },
+};
+
+/** Weights for choosing a bucket per selection. Must sum to 1. */
+const BUCKET_WEIGHTS: Record<SizeBucket, number> = {
+  small: 0.2,
+  medium: 0.5,
+  large: 0.3,
+};
+
+/**
+ * Probability the primary draw targets the withIPFSIndexing pool.
+ * The rest of the time we sample across all FWSS pieces so SPs can't
+ * optimise only their CAR corpus.
+ */
+const IPFS_INDEXED_SAMPLE_RATE = 0.8;
+
+@Injectable()
+export class AnonPieceSelectorService {
+  private readonly logger = new Logger(AnonPieceSelectorService.name);
+
+  constructor(
+    private readonly subgraphService: SubgraphService,
+    private readonly configService: ConfigService<IConfig, true>,
+    @InjectRepository(AnonRetrieval)
+    private readonly anonRetrievalRepository: Repository<AnonRetrieval>,
+  ) {}
+
+  /**
+   * Select an anonymous piece to test against the given SP.
+   *
+   * Strategy:
+   * 1. Pick a size bucket by weighted random.
+   * 2. Pick a pool (`indexed` 80% / `any` 20%).
+   * 3. Generate a uniform-random sampleKey and query the subgraph for the
+   *    smallest `Root.sampleKey ≥ $sampleKey` matching the filters.
+   * 4. Drop the pick if `pdpPaymentEndEpoch` has passed or it was tested
+   *    recently; redraw once.
+   * 5. If still empty, fall back through: (same bucket, opposite pool) →
+   *    (any bucket, indexed) → (any bucket, any).
+   */
+  async selectPieceForProvider(spAddress: string): Promise<AnonPiece | null> {
+    const dealbotPayer = this.configService.get("blockchain", { infer: true }).walletAddress;
+    const recentlyTested = await this.loadRecentlyTestedPieceCids();
+
+    const bucket = this.pickBucket();
+    const pool: AnonPiecePool = Math.random() < IPFS_INDEXED_SAMPLE_RATE ? "indexed" : "any";
+
+    const attempts: Array<{ bucket: SizeBucket | "any"; pool: AnonPiecePool }> = [
+      { bucket, pool },
+      { bucket, pool: pool === "indexed" ? "any" : "indexed" },
+      { bucket: "any", pool: "indexed" },
+      { bucket: "any", pool: "any" },
+    ];
+
+    for (const attempt of attempts) {
+      const piece = await this.drawPiece({
+        spAddress,
+        dealbotPayer,
+        bucket: attempt.bucket,
+        pool: attempt.pool,
+        recentlyTested,
+      });
+
+      if (piece) {
+        this.logger.log({
+          event: "anon_piece_selected",
+          message: "Selected anonymous piece for retrieval test",
+          spAddress,
+          pieceCid: piece.pieceCid,
+          dataSetId: piece.dataSetId,
+          withIPFSIndexing: piece.withIPFSIndexing,
+          bucket: attempt.bucket,
+          pool: attempt.pool,
+        });
+        return {
+          pieceCid: piece.pieceCid,
+          dataSetId: piece.dataSetId,
+          pieceId: piece.pieceId,
+          serviceProvider: spAddress.toLowerCase(),
+          withIPFSIndexing: piece.withIPFSIndexing,
+          ipfsRootCid: piece.ipfsRootCid,
+          rawSize: piece.rawSize,
+        };
+      }
+    }
+
+    this.logger.warn({
+      event: "anon_no_candidates",
+      message: "No anonymous piece found after all fallbacks",
+      spAddress,
+    });
+    return null;
+  }
+
+  /**
+   * Try to draw a piece for one (bucket, pool) combination. Up to two draws
+   * with fresh sampleKeys, each filtered by dedup + epoch-termination.
+   */
+  private async drawPiece(args: {
+    spAddress: string;
+    dealbotPayer: string;
+    bucket: SizeBucket | "any";
+    pool: AnonPiecePool;
+    recentlyTested: Set<string>;
+  }): Promise<AnonCandidatePiece | null> {
+    const range = args.bucket === "any" ? fullRange() : SIZE_BUCKETS[args.bucket];
+
+    for (let attempt = 0; attempt < 2; attempt++) {
+      const params: SampleAnonPieceParams = {
+        serviceProvider: args.spAddress,
+        payer: args.dealbotPayer,
+        sampleKey: randomSampleKey(),
+        minSize: range.min.toString(),
+        maxSize: range.max.toString(),
+        pool: args.pool,
+      };
+
+      const piece = await this.subgraphService.sampleAnonPiece(params);
+      if (!piece) {
+        continue;
+      }
+
+      if (piece.pdpPaymentEndEpoch != null && piece.pdpPaymentEndEpoch <= BigInt(piece.indexedAtBlock)) {
+        continue;
+      }
+
+      if (args.recentlyTested.has(piece.pieceCid)) {
+        continue;
+      }
+
+      return piece;
+    }
+
+    return null;
+  }
+
+  private pickBucket(): SizeBucket {
+    const r = Math.random();
+    let acc = 0;
+    for (const [name, weight] of Object.entries(BUCKET_WEIGHTS) as Array<[SizeBucket, number]>) {
+      acc += weight;
+      if (r < acc) {
+        return name;
+      }
+    }
+    return "medium";
+  }
+
+  /**
+   * Return the set of piece CIDs tested in the last RECENT_DEDUP_WINDOW
+   * anonymous retrievals across all SPs.
+   */
+  private async loadRecentlyTestedPieceCids(): Promise<Set<string>> {
+    const rows = await this.anonRetrievalRepository
+      .createQueryBuilder("r")
+      .select("r.piece_cid", "pieceCid")
+      .orderBy("r.created_at", "DESC")
+      .limit(RECENT_DEDUP_WINDOW)
+      .getRawMany<{ pieceCid: string }>();
+
+    return new Set(rows.map((row) => row.pieceCid));
+  }
+}
+
+/** Uniform-random 32-byte sort key as `0x`-prefixed hex. */
+function randomSampleKey(): string {
+  return `0x${randomBytes(32).toString("hex")}`;
+}
+
+/** The full size range (used when bucket fallback is "any"). */
+function fullRange(): SizeRange {
+  return { min: 0n, max: (1n << 63n) - 1n };
+}
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
new file mode 100644
index 00000000..61e97105
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
@@ -0,0 +1,189 @@
+import type { Repository } from "typeorm";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import type { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
+import type { StorageProvider } from "../database/entities/storage-provider.entity.js";
+import { RetrievalStatus } from "../database/types.js";
+import type { AnonRetrievalCheckMetrics } from "../metrics-prometheus/check-metrics.service.js";
+import type { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
+import type { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
+import { AnonRetrievalService } from "./anon-retrieval.service.js";
+import type { CarValidationService } from "./car-validation.service.js";
+import type { PieceRetrievalService } from "./piece-retrieval.service.js";
+import type { PieceRetrievalResult } from "./types.js";
+
+const SP_ADDRESS = "0xaaaa0000000000000000000000000000000000aa";
+
+const PIECE = {
+  pieceCid: "baga6ea4seaqpiece",
+  pieceId: "1",
+  dataSetId: "42",
+  rawSize: "1048576",
+  withIPFSIndexing: false,
+  ipfsRootCid: null,
+  serviceProvider: SP_ADDRESS,
+};
+
+function makeProvider(): StorageProvider {
+  return {
+    address: SP_ADDRESS,
+    providerId: 7,
+    name: "sp-test",
+    isApproved: true,
+  } as unknown as StorageProvider;
+}
+
+function makeService(opts: {
+  pieceResult: PieceRetrievalResult;
+  fetchPieceImpl?: (signal?: AbortSignal) => Promise<PieceRetrievalResult>;
+}): {
+  service: AnonRetrievalService;
+  saveSpy: ReturnType<typeof vi.fn>;
+  fetchSpy: ReturnType<typeof vi.fn>;
+} {
+  const saveSpy = vi.fn(async (entity: AnonRetrieval) => entity);
+  const createdEntities: Partial<AnonRetrieval>[] = [];
+  const anonRetrievalRepository = {
+    create: vi.fn((data: Partial<AnonRetrieval>) => {
+      createdEntities.push(data);
+      return data;
+    }),
+    save: saveSpy,
+  } as unknown as Repository<AnonRetrieval>;
+
+  const spRepository = {
+    findOne: vi.fn(async () => makeProvider()),
+  } as unknown as Repository<StorageProvider>;
+
+  const anonPieceSelector = {
+    selectPieceForProvider: vi.fn(async () => PIECE),
+  } as unknown as AnonPieceSelectorService;
+
+  const fetchSpy = vi.fn(opts.fetchPieceImpl ?? (async () => opts.pieceResult));
+  const pieceRetrievalService = {
+    fetchPiece: fetchSpy,
+  } as unknown as PieceRetrievalService;
+
+  const carValidationService = {
+    validateCarPiece: vi.fn(),
+  } as unknown as CarValidationService;
+
+  const walletSdkService = {
+    getProviderInfo: vi.fn(() => ({ pdp: { serviceURL: "https://sp.test/" } })),
+  } as unknown as WalletSdkService;
+
+  const metrics = {
+    observeFirstByteMs: vi.fn(),
+    observeLastByteMs: vi.fn(),
+    observeThroughput: vi.fn(),
+    observeCheckDuration: vi.fn(),
+    recordStatus: vi.fn(),
+    recordHttpResponseCode: vi.fn(),
+    recordCarParseStatus: vi.fn(),
+    recordIpniStatus: vi.fn(),
+    recordBlockFetchStatus: vi.fn(),
+  } as unknown as AnonRetrievalCheckMetrics;
+
+  const service = new AnonRetrievalService(
+    anonPieceSelector,
+    pieceRetrievalService,
+    carValidationService,
+    walletSdkService,
+    metrics,
+    anonRetrievalRepository,
+    spRepository,
+  );
+
+  return { service, saveSpy, fetchSpy };
+}
+
+describe("AnonRetrievalService", () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  it("persists partial metrics when fetchPiece returns aborted=true", async () => {
+    const partial: PieceRetrievalResult = {
+      success: false,
+      pieceCid: PIECE.pieceCid,
+      bytesReceived: 524288,
+      pieceBytes: null,
+      latencyMs: 42000,
+      ttfbMs: 150,
+      throughputBps: 12500,
+      statusCode: 200,
+      commPValid: false,
+      errorMessage: "Anon retrieval job timeout (60s) for sp1",
+      aborted: true,
+    };
+
+    const { service, saveSpy } = makeService({ pieceResult: partial });
+
+    await service.performForProvider(SP_ADDRESS);
+
+    expect(saveSpy).toHaveBeenCalledTimes(1);
+    const saved = saveSpy.mock.calls[0][0] as Partial<AnonRetrieval>;
+    expect(saved.status).toBe(RetrievalStatus.FAILED);
+    expect(saved.bytesRetrieved).toBe(524288);
+    expect(saved.ttfbMs).toBe(150);
+    expect(saved.latencyMs).toBe(42000);
+    expect(saved.throughputBps).toBe(12500);
+    expect(saved.responseCode).toBe(200);
+    expect(saved.errorMessage).toContain("Anon retrieval job timeout");
+  });
+
+  it("still saves a row when the signal aborts before fetchPiece runs", async () => {
+    const ac = new AbortController();
+    ac.abort(new Error("Anon retrieval job timeout (60s) for sp1"));
+
+    const never: PieceRetrievalResult = {
+      success: false,
+      pieceCid: PIECE.pieceCid,
+      bytesReceived: 0,
+      pieceBytes: null,
+      latencyMs: 0,
+      ttfbMs: 0,
+      throughputBps: 0,
+      statusCode: 0,
+      commPValid: false,
+    };
+
+    const { service, saveSpy, fetchSpy } = makeService({ pieceResult: never });
+
+    await service.performForProvider(SP_ADDRESS, ac.signal);
+
+    expect(fetchSpy).not.toHaveBeenCalled();
+    expect(saveSpy).toHaveBeenCalledTimes(1);
+    const saved = saveSpy.mock.calls[0][0] as Partial<AnonRetrieval>;
+    expect(saved.status).toBe(RetrievalStatus.FAILED);
+    expect(saved.errorMessage).toContain("Anon retrieval job timeout");
+    expect(saved.bytesRetrieved).toBeNull();
+    expect(saved.ttfbMs).toBeNull();
+  });
+
+  it("still saves a row when fetchPiece throws unexpectedly", async () => {
+    const never: PieceRetrievalResult = {
+      success: false,
+      pieceCid: PIECE.pieceCid,
+      bytesReceived: 0,
+      pieceBytes: null,
+      latencyMs: 0,
+      ttfbMs: 0,
+      throughputBps: 0,
+      statusCode: 0,
+      commPValid: false,
+    };
+
+    const { service, saveSpy } = makeService({
+      pieceResult: never,
+      fetchPieceImpl: async () => {
+        throw new Error("network down");
+      },
+    });
+
+    await expect(service.performForProvider(SP_ADDRESS)).rejects.toThrow("network down");
+
+    expect(saveSpy).toHaveBeenCalledTimes(1);
+    const saved = saveSpy.mock.calls[0][0] as Partial<AnonRetrieval>;
+    expect(saved.status).toBe(RetrievalStatus.FAILED);
+  });
+});
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
new file mode 100644
index 00000000..d40fe315
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -0,0 +1,244 @@
+import { Injectable, Logger } from "@nestjs/common";
+import { InjectRepository } from "@nestjs/typeorm";
+import type { Repository } from "typeorm";
+import { type ProviderJobContext, toStructuredError } from "../common/logging.js";
+import { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
+import { StorageProvider } from "../database/entities/storage-provider.entity.js";
+import { RetrievalStatus, ServiceType } from "../database/types.js";
+import { buildCheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js";
+import { AnonRetrievalCheckMetrics } from "../metrics-prometheus/check-metrics.service.js";
+import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
+import { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
+import { CarValidationService } from "./car-validation.service.js";
+import { PieceRetrievalService } from "./piece-retrieval.service.js";
+import type { CarValidationResult, PieceRetrievalResult } from "./types.js";
+
+@Injectable()
+export class AnonRetrievalService {
+  private readonly logger = new Logger(AnonRetrievalService.name);
+
+  constructor(
+    private readonly anonPieceSelectorService: AnonPieceSelectorService,
+    private readonly pieceRetrievalService: PieceRetrievalService,
+    private readonly carValidationService: CarValidationService,
+    private readonly walletSdkService: WalletSdkService,
+    private readonly metrics: AnonRetrievalCheckMetrics,
+    @InjectRepository(AnonRetrieval)
+    private readonly anonRetrievalRepository: Repository<AnonRetrieval>,
+    @InjectRepository(StorageProvider)
+    private readonly spRepository: Repository<StorageProvider>,
+  ) {}
+
+  async performForProvider(
+    spAddress: string,
+    signal?: AbortSignal,
+    logContext?: ProviderJobContext,
+  ): Promise<AnonRetrieval | null> {
+    // Build metric labels
+    const provider = await this.spRepository.findOne({ where: { address: spAddress } });
+    const labels = buildCheckMetricLabels({
+      checkType: "anon_retrieval",
+      providerId: provider?.providerId,
+      providerName: provider?.name,
+      providerIsApproved: provider?.isApproved,
+    });
+
+    // 1. Select an anonymous piece
+    const piece = await this.anonPieceSelectorService.selectPieceForProvider(spAddress);
+    if (!piece) {
+      this.logger.warn({
+        ...logContext,
+        event: "anon_retrieval_no_piece",
+        message: "No anonymous piece found for SP",
+        spAddress,
+      });
+      this.metrics.recordStatus(labels, "failure.no_piece");
+      return null;
+    }
+
+    this.logger.log({
+      ...logContext,
+      event: "anon_retrieval_started",
+      message: "Starting anonymous retrieval test",
+      pieceCid: piece.pieceCid,
+      dataSetId: piece.dataSetId,
+      pieceId: piece.pieceId,
+      withIPFSIndexing: piece.withIPFSIndexing,
+      spAddress,
+    });
+
+    const checkStart = Date.now();
+    const startedAt = new Date();
+
+    let pieceResult: PieceRetrievalResult | null = null;
+    let carResult: CarValidationResult | null = null;
+    let saved: AnonRetrieval | null = null;
+
+    try {
+      // 2. Fetch the piece. fetchPiece never throws on abort — it returns a
+      // result with partial metrics so we can persist what we have.
+      if (signal?.aborted) {
+        pieceResult = buildAbortedPlaceholder(piece.pieceCid, signal.reason);
+      } else {
+        pieceResult = await this.pieceRetrievalService.fetchPiece(spAddress, piece.pieceCid, signal);
+      }
+
+      // Emit piece retrieval metrics
+      this.metrics.observeFirstByteMs(labels, pieceResult.ttfbMs);
+      this.metrics.observeLastByteMs(labels, pieceResult.latencyMs);
+      this.metrics.observeThroughput(labels, pieceResult.throughputBps);
+      this.metrics.recordHttpResponseCode(labels, pieceResult.statusCode);
+
+      // 3. CAR validation (only if piece was successfully retrieved and has IPFS indexing)
+      if (
+        pieceResult.success &&
+        piece.withIPFSIndexing &&
+        piece.ipfsRootCid &&
+        pieceResult.pieceBytes &&
+        provider &&
+        !signal?.aborted
+      ) {
+        try {
+          carResult = await this.carValidationService.validateCarPiece(
+            pieceResult.pieceBytes,
+            provider,
+            piece.ipfsRootCid,
+            signal,
+          );
+        } catch (error) {
+          this.logger.warn({
+            ...logContext,
+            event: "anon_retrieval_car_validation_failed",
+            message: "CAR validation threw an error",
+            pieceCid: piece.pieceCid,
+            spAddress,
+            error: toStructuredError(error),
+          });
+        }
+      }
+
+      // Emit CAR validation metrics
+      if (carResult) {
+        this.metrics.recordCarParseStatus(labels, carResult.carParseable);
+        this.metrics.recordIpniStatus(
+          labels,
+          carResult.ipniValid === null ? "skipped" : carResult.ipniValid ? "valid" : "invalid",
+        );
+        this.metrics.recordBlockFetchStatus(
+          labels,
+          carResult.blockFetchValid === null ? "skipped" : carResult.blockFetchValid ? "valid" : "invalid",
+        );
+      } else if (!pieceResult.success) {
+        // Piece retrieval failed — IPNI and block fetch were skipped
+        this.metrics.recordIpniStatus(labels, "skipped");
+        this.metrics.recordBlockFetchStatus(labels, "skipped");
+      }
+
+      // Overall check duration and status
+      this.metrics.observeCheckDuration(labels, Date.now() - checkStart);
+      this.metrics.recordStatus(
+        labels,
+        pieceResult.success ? "success" : pieceResult.aborted ? "failure.aborted" : "failure.http",
+      );
+    } finally {
+      // Always save a record — even on abort or unexpected error — so we never
+      // lose the evidence (ttfb, bytes, response code) we already collected.
+      pieceResult ??= buildAbortedPlaceholder(piece.pieceCid, signal?.reason);
+      saved = await this.saveRetrievalRecord(spAddress, piece, pieceResult, carResult, startedAt, logContext);
+    }
+
+    return saved;
+  }
+
+  private async saveRetrievalRecord(
+    spAddress: string,
+    piece: {
+      pieceCid: string;
+      dataSetId: string;
+      pieceId: string;
+      rawSize: string;
+      withIPFSIndexing: boolean;
+      ipfsRootCid: string | null;
+    },
+    pieceResult: PieceRetrievalResult,
+    carResult: CarValidationResult | null,
+    startedAt: Date,
+    logContext?: ProviderJobContext,
+  ): Promise<AnonRetrieval | null> {
+    const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
+    const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
+
+    const retrieval = this.anonRetrievalRepository.create({
+      spAddress,
+      pieceCid: piece.pieceCid,
+      dataSetId: BigInt(piece.dataSetId),
+      pieceId: BigInt(piece.pieceId),
+      rawSize: BigInt(piece.rawSize),
+      withIpfsIndexing: piece.withIPFSIndexing,
+      ipfsRootCid: piece.ipfsRootCid,
+      serviceType: ServiceType.DIRECT_SP,
+      retrievalEndpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
+      status: pieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED,
+      startedAt,
+      completedAt: new Date(),
+      latencyMs: pieceResult.latencyMs > 0 ? Math.round(pieceResult.latencyMs) : null,
+      ttfbMs: pieceResult.ttfbMs > 0 ? Math.round(pieceResult.ttfbMs) : null,
+      throughputBps: pieceResult.throughputBps > 0 ? Math.round(pieceResult.throughputBps) : null,
+      bytesRetrieved: pieceResult.bytesReceived > 0 ? pieceResult.bytesReceived : null,
+      responseCode: pieceResult.statusCode > 0 ? pieceResult.statusCode : null,
+      errorMessage: pieceResult.errorMessage ?? null,
+      commpValid: pieceResult.success ? pieceResult.commPValid : null,
+      carValid: carResult ? carResult.ipniValid !== false && carResult.blockFetchValid !== false : null,
+    });
+
+    try {
+      await this.anonRetrievalRepository.save(retrieval);
+    } catch (error) {
+      this.logger.warn({
+        ...logContext,
+        event: "anon_retrieval_save_failed",
+        message: "Failed to save anonymous retrieval record",
+        pieceCid: piece.pieceCid,
+        spAddress,
+        error: toStructuredError(error),
+      });
+      return null;
+    }
+
+    this.logger.log({
+      ...logContext,
+      event: "anon_retrieval_completed",
+      message: "Anonymous retrieval test completed",
+      pieceCid: piece.pieceCid,
+      spAddress,
+      success: pieceResult.success,
+      aborted: pieceResult.aborted === true,
+      latencyMs: pieceResult.latencyMs,
+      ttfbMs: pieceResult.ttfbMs,
+      bytesRetrieved: pieceResult.bytesReceived,
+      carParseable: carResult?.carParseable,
+      ipniValid: carResult?.ipniValid,
+      blockFetchValid: carResult?.blockFetchValid,
+    });
+
+    return retrieval;
+  }
+}
+
+function buildAbortedPlaceholder(pieceCid: string, reason: unknown): PieceRetrievalResult {
+  const message =
+    reason instanceof Error && reason.message ? reason.message : typeof reason === "string" ? reason : "aborted";
+  return {
+    success: false,
+    pieceCid,
+    bytesReceived: 0,
+    pieceBytes: null,
+    latencyMs: 0,
+    ttfbMs: 0,
+    throughputBps: 0,
+    statusCode: 0,
+    commPValid: false,
+    errorMessage: message,
+    aborted: true,
+  };
+}
diff --git a/apps/backend/src/retrieval-anon/car-validation.service.ts b/apps/backend/src/retrieval-anon/car-validation.service.ts
new file mode 100644
index 00000000..8019b8df
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/car-validation.service.ts
@@ -0,0 +1,223 @@
+import { CarReader } from "@ipld/car";
+import * as dagPB from "@ipld/dag-pb";
+import { Injectable, Logger } from "@nestjs/common";
+import { ConfigService } from "@nestjs/config";
+import { create as createBlock } from "multiformats/block";
+import { CID } from "multiformats/cid";
+import * as raw from "multiformats/codecs/raw";
+import { sha256 } from "multiformats/hashes/sha2";
+import { toStructuredError } from "../common/logging.js";
+import type { IConfig } from "../config/app.config.js";
+import type { StorageProvider } from "../database/entities/storage-provider.entity.js";
+import { HttpClientService } from "../http-client/http-client.service.js";
+import { IpniVerificationService } from "../ipni/ipni-verification.service.js";
+import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
+import type { CarValidationResult } from "./types.js";
+
+// UnixFS DAGs use only dag-pb (interior nodes) and raw (leaf data) codecs
+const unixfsCodecs: Record<number, { code: number; decode: (bytes: Uint8Array) => unknown }> = {
+  [dagPB.code]: dagPB,
+  [raw.code]: raw,
+};
+
+@Injectable()
+export class CarValidationService {
+  private readonly logger = new Logger(CarValidationService.name);
+
+  constructor(
+    private readonly configService: ConfigService<IConfig, true>,
+    private readonly httpClientService: HttpClientService,
+    private readonly walletSdkService: WalletSdkService,
+    private readonly ipniVerificationService: IpniVerificationService,
+  ) {}
+
+  /**
+   * Validate an anonymous piece retrieved as a CAR:
+   * 1. parse the CAR,
+   * 2. sample random blocks,
+   * 3. confirm the SP is advertised for the root + sampled CIDs via IPNI,
+   * 4. fetch each sampled block from the SP and hash-verify it.
+   *
+   * CAR parse failure is attributed to the client (bad upload), not the SP.
+   */
+  async validateCarPiece(
+    pieceBytes: Buffer,
+    provider: StorageProvider,
+    ipfsRootCid: string,
+    signal?: AbortSignal,
+  ): Promise<CarValidationResult> {
+    const blocks = await this.parseCar(pieceBytes, provider.address, ipfsRootCid);
+    if (blocks === null) {
+      return { carParseable: false, blockCount: 0, sampledCidCount: 0, ipniValid: null, blockFetchValid: null };
+    }
+    if (blocks.length === 0) {
+      return {
+        carParseable: true,
+        blockCount: 0,
+        sampledCidCount: 0,
+        ipniValid: null,
+        blockFetchValid: null,
+        errorMessage: "CAR contained no blocks",
+      };
+    }
+
+    const sampleCount = this.configService.get("retrieval", { infer: true }).anonBlockSampleCount;
+    const shuffled = [...blocks].sort(() => Math.random() - 0.5);
+    const sampledBlocks = shuffled.slice(0, sampleCount);
+
+    const ipniValid = await this.checkIpni(provider, ipfsRootCid, sampledBlocks, signal);
+    const blockFetchResult = await this.checkBlockFetch(sampledBlocks, provider.address, signal);
+
+    return {
+      carParseable: true,
+      blockCount: blocks.length,
+      sampledCidCount: sampledBlocks.length,
+      ipniValid,
+      blockFetchValid: blockFetchResult.valid,
+      errorMessage: blockFetchResult.errorMessage,
+    };
+  }
+
+  private async parseCar(
+    pieceBytes: Buffer,
+    spAddress: string,
+    ipfsRootCid: string,
+  ): Promise<{ cid: CID; bytes: Uint8Array }[] | null> {
+    try {
+      const reader = await CarReader.fromBytes(new Uint8Array(pieceBytes));
+      const blocks: { cid: CID; bytes: Uint8Array }[] = [];
+      for await (const block of reader.blocks()) {
+        blocks.push({ cid: block.cid, bytes: block.bytes });
+      }
+      return blocks;
+    } catch (error) {
+      this.logger.debug({
+        event: "car_parse_failed",
+        message: "Failed to parse piece bytes as CAR - client fault, not SP",
+        spAddress,
+        ipfsRootCid,
+        error: toStructuredError(error),
+      });
+      return null;
+    }
+  }
+
+  /**
+   * Verify via IPNI that the SP is advertised for the root CID and each sampled child CID.
+   * Delegates to the shared IpniVerificationService which uses filecoin-pin's provider-scoped check.
+   */
+  private async checkIpni(
+    provider: StorageProvider,
+    ipfsRootCid: string,
+    sampledBlocks: ReadonlyArray<{ cid: CID }>,
+    signal?: AbortSignal,
+  ): Promise<boolean> {
+    const timeouts = this.configService.get("timeouts", { infer: true });
+    let rootCid: CID;
+    try {
+      rootCid = CID.parse(ipfsRootCid);
+    } catch (error) {
+      this.logger.warn({
+        event: "ipni_root_cid_invalid",
+        message: "Failed to parse ipfsRootCID",
+        ipfsRootCid,
+        providerAddress: provider.address,
+        error: toStructuredError(error),
+      });
+      return false;
+    }
+
+    const result = await this.ipniVerificationService.verify({
+      rootCid,
+      blockCids: sampledBlocks.map((b) => b.cid),
+      storageProvider: provider,
+      timeoutMs: timeouts.ipniVerificationTimeoutMs,
+      pollIntervalMs: timeouts.ipniVerificationPollingMs,
+      signal,
+    });
+
+    return result.rootCIDVerified;
+  }
+
+  /**
+   * Fetch each sampled block from the SP endpoint and hash-verify the response
+   * against the declared CID. Mirrors IpfsBlockRetrievalStrategy's per-block
+   * verification for the sampled subset (no DAG traversal).
+   */
+  private async checkBlockFetch(
+    sampledBlocks: ReadonlyArray<{ cid: CID; bytes: Uint8Array }>,
+    spAddress: string,
+    signal?: AbortSignal,
+  ): Promise<{ valid: boolean | null; errorMessage?: string }> {
+    const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
+    if (!providerInfo) {
+      return { valid: null, errorMessage: `Provider info not found for ${spAddress}` };
+    }
+
+    const spBaseUrl = providerInfo.pdp.serviceURL.replace(/\/$/, "");
+    let allValid = true;
+
+    for (const block of sampledBlocks) {
+      signal?.throwIfAborted();
+      const cidStr = block.cid.toString();
+      const blockUrl = `${spBaseUrl}/ipfs/${cidStr}?format=raw`;
+
+      try {
+        const resp = await this.httpClientService.requestWithMetrics<Buffer>(blockUrl, {
+          headers: { Accept: "application/vnd.ipld.raw" },
+          httpVersion: "2",
+          signal,
+        });
+
+        if (resp.metrics.statusCode < 200 || resp.metrics.statusCode >= 300) {
+          allValid = false;
+          this.logger.warn({
+            event: "block_fetch_non_2xx",
+            message: "Block fetch returned non-2xx status",
+            cid: cidStr,
+            spAddress,
+            statusCode: resp.metrics.statusCode,
+          });
+          continue;
+        }
+
+        if (block.cid.multihash.code !== sha256.code) {
+          this.logger.warn({
+            event: "block_unsupported_hash",
+            message: `Unsupported hash algorithm 0x${block.cid.multihash.code.toString(16)}`,
+            cid: cidStr,
+            spAddress,
+          });
+          allValid = false;
+          continue;
+        }
+
+        const codec = unixfsCodecs[block.cid.code];
+        if (!codec) {
+          this.logger.warn({
+            event: "block_unsupported_codec",
+            message: `Unsupported codec 0x${block.cid.code.toString(16)}`,
+            cid: cidStr,
+            spAddress,
+          });
+          allValid = false;
+          continue;
+        }
+
+        // Hash-verifies and decodes; throws on mismatch
+        await createBlock({ bytes: resp.data, cid: block.cid, hasher: sha256, codec });
+      } catch (error) {
+        allValid = false;
+        this.logger.warn({
+          event: "block_fetch_failed",
+          message: "Block fetch or hash verification failed",
+          cid: cidStr,
+          spAddress,
+          error: toStructuredError(error),
+        });
+      }
+    }
+
+    return { valid: allValid };
+  }
+}
diff --git a/apps/backend/src/retrieval-anon/piece-retrieval.service.ts b/apps/backend/src/retrieval-anon/piece-retrieval.service.ts
new file mode 100644
index 00000000..51150661
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/piece-retrieval.service.ts
@@ -0,0 +1,195 @@
+import { asPieceCID, calculate as calculatePieceCid } from "@filoz/synapse-core/piece";
+import { Injectable, Logger } from "@nestjs/common";
+import { toStructuredError } from "../common/logging.js";
+import { HttpClientService } from "../http-client/http-client.service.js";
+import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
+import type { PieceRetrievalResult } from "./types.js";
+
+@Injectable()
+export class PieceRetrievalService {
+  private readonly logger = new Logger(PieceRetrievalService.name);
+
+  constructor(
+    private readonly walletSdkService: WalletSdkService,
+    private readonly httpClientService: HttpClientService,
+  ) {}
+
+  async fetchPiece(spAddress: string, pieceCid: string, signal?: AbortSignal): Promise<PieceRetrievalResult> {
+    const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
+
+    if (!providerInfo) {
+      this.logger.warn({
+        event: "provider_info_not_found",
+        message: "Cannot fetch piece: provider info not found",
+        spAddress,
+        pieceCid,
+      });
+
+      return {
+        success: false,
+        pieceCid,
+        bytesReceived: 0,
+        pieceBytes: null,
+        latencyMs: 0,
+        ttfbMs: 0,
+        throughputBps: 0,
+        statusCode: 0,
+        commPValid: false,
+        errorMessage: `Provider info not found for ${spAddress}`,
+      };
+    }
+
+    const baseUrl = providerInfo.pdp.serviceURL.replace(/\/$/, "");
+    const url = `${baseUrl}/piece/${pieceCid}`;
+
+    try {
+      const result = await this.httpClientService.requestWithMetrics<Buffer>(url, {
+        httpVersion: "2",
+        signal,
+      });
+
+      const { metrics } = result;
+      const isSuccess = metrics.statusCode >= 200 && metrics.statusCode < 300;
+      const throughputBps = metrics.totalTime > 0 ? metrics.responseSize / (metrics.totalTime / 1000) : 0;
+
+      if (result.aborted) {
+        this.logger.warn({
+          event: "piece_fetch_aborted",
+          message: "Piece fetch aborted mid-download; returning partial metrics",
+          url,
+          pieceCid,
+          spAddress,
+          bytesReceived: metrics.responseSize,
+          ttfbMs: metrics.ttfb,
+          abortReason: result.abortReason,
+        });
+
+        return {
+          success: false,
+          pieceCid,
+          bytesReceived: metrics.responseSize,
+          pieceBytes: null,
+          latencyMs: metrics.totalTime,
+          ttfbMs: metrics.ttfb,
+          throughputBps,
+          statusCode: metrics.statusCode,
+          commPValid: false,
+          errorMessage: result.abortReason ?? "aborted",
+          aborted: true,
+        };
+      }
+
+      if (!isSuccess) {
+        this.logger.warn({
+          event: "piece_fetch_non_2xx",
+          message: "Piece fetch returned non-2xx status",
+          url,
+          statusCode: metrics.statusCode,
+          pieceCid,
+          spAddress,
+        });
+
+        return {
+          success: false,
+          pieceCid,
+          bytesReceived: metrics.responseSize,
+          pieceBytes: null,
+          latencyMs: metrics.totalTime,
+          ttfbMs: metrics.ttfb,
+          throughputBps,
+          statusCode: metrics.statusCode,
+          commPValid: false,
+          errorMessage: `HTTP ${metrics.statusCode}`,
+        };
+      }
+
+      const pieceBytes = Buffer.isBuffer(result.data) ? result.data : Buffer.from(result.data);
+      const commPValid = await this.validateCommP(pieceBytes, pieceCid);
+
+      this.logger.debug({
+        event: "piece_fetch_success",
+        message: "Piece fetched successfully",
+        pieceCid,
+        spAddress,
+        bytesReceived: metrics.responseSize,
+        latencyMs: metrics.totalTime,
+        ttfbMs: metrics.ttfb,
+      });
+
+      return {
+        success: true,
+        pieceCid,
+        bytesReceived: metrics.responseSize,
+        pieceBytes,
+        latencyMs: metrics.totalTime,
+        ttfbMs: metrics.ttfb,
+        throughputBps,
+        statusCode: metrics.statusCode,
+        commPValid,
+      };
+    } catch (error) {
+      const aborted = signal?.aborted === true;
+      this.logger.warn({
+        event: "piece_fetch_failed",
+        message: "Piece fetch threw an error",
+        url,
+        pieceCid,
+        spAddress,
+        aborted,
+        error: toStructuredError(error),
+      });
+
+      return {
+        success: false,
+        pieceCid,
+        bytesReceived: 0,
+        pieceBytes: null,
+        latencyMs: 0,
+        ttfbMs: 0,
+        throughputBps: 0,
+        statusCode: 0,
+        commPValid: false,
+        errorMessage: error instanceof Error ? error.message : String(error),
+        aborted,
+      };
+    }
+  }
+
+  /**
+   * Compute the piece CID (sha2-256-trunc254-padded) of the retrieved bytes and compare
+   * against the expected CID. Returns false on parse failure, computation failure, or mismatch.
+   */
+  private async validateCommP(bytes: Buffer, pieceCid: string): Promise<boolean> {
+    const expected = asPieceCID(pieceCid);
+    if (!expected) {
+      this.logger.warn({
+        event: "commp_invalid_piece_cid",
+        message: "Cannot parse expected piece CID for CommP validation",
+        pieceCid,
+      });
+      return false;
+    }
+
+    try {
+      const computed = calculatePieceCid(bytes);
+      const matches = computed.toString() === expected.toString();
+      if (!matches) {
+        this.logger.warn({
+          event: "commp_mismatch",
+          message: "Piece CID mismatch: SP-returned bytes hash to a different CID",
+          expected: expected.toString(),
+          computed: computed.toString(),
+        });
+      }
+      return matches;
+    } catch (error) {
+      this.logger.warn({
+        event: "commp_validation_error",
+        message: "CommP computation threw an error",
+        pieceCid,
+        error: toStructuredError(error),
+      });
+      return false;
+    }
+  }
+}
diff --git a/apps/backend/src/retrieval-anon/retrieval-anon.module.ts b/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
new file mode 100644
index 00000000..4e9e38df
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
@@ -0,0 +1,27 @@
+import { Module } from "@nestjs/common";
+import { ConfigModule } from "@nestjs/config";
+import { TypeOrmModule } from "@nestjs/typeorm";
+import { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
+import { StorageProvider } from "../database/entities/storage-provider.entity.js";
+import { HttpClientModule } from "../http-client/http-client.module.js";
+import { IpniModule } from "../ipni/ipni.module.js";
+import { SubgraphModule } from "../subgraph/subgraph.module.js";
+import { WalletSdkModule } from "../wallet-sdk/wallet-sdk.module.js";
+import { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
+import { AnonRetrievalService } from "./anon-retrieval.service.js";
+import { CarValidationService } from "./car-validation.service.js";
+import { PieceRetrievalService } from "./piece-retrieval.service.js";
+
+@Module({
+  imports: [
+    ConfigModule,
+    TypeOrmModule.forFeature([AnonRetrieval, StorageProvider]),
+    SubgraphModule,
+    WalletSdkModule,
+    HttpClientModule,
+    IpniModule,
+  ],
+  providers: [AnonPieceSelectorService, PieceRetrievalService, CarValidationService, AnonRetrievalService],
+  exports: [AnonRetrievalService],
+})
+export class RetrievalAnonModule {}
diff --git a/apps/backend/src/retrieval-anon/types.ts b/apps/backend/src/retrieval-anon/types.ts
new file mode 100644
index 00000000..2c3384d5
--- /dev/null
+++ b/apps/backend/src/retrieval-anon/types.ts
@@ -0,0 +1,35 @@
+/** The result of anonymous piece selection. */
+export type AnonPiece = {
+  pieceCid: string;
+  dataSetId: string;
+  pieceId: string;
+  serviceProvider: string;
+  withIPFSIndexing: boolean;
+  ipfsRootCid: string | null;
+  rawSize: string;
+};
+
+/** Result of piece retrieval. */
+export type PieceRetrievalResult = {
+  success: boolean;
+  pieceCid: string;
+  bytesReceived: number;
+  pieceBytes: Buffer | null;
+  latencyMs: number;
+  ttfbMs: number;
+  throughputBps: number;
+  statusCode: number;
+  commPValid: boolean;
+  errorMessage?: string;
+  aborted?: boolean;
+};
+
+/** Result of CAR validation. */
+export type CarValidationResult = {
+  carParseable: boolean;
+  blockCount: number;
+  sampledCidCount: number;
+  ipniValid: boolean | null;
+  blockFetchValid: boolean | null;
+  errorMessage?: string;
+};
diff --git a/apps/backend/src/subgraph/queries.ts b/apps/backend/src/subgraph/queries.ts
new file mode 100644
index 00000000..74802ddf
--- /dev/null
+++ b/apps/backend/src/subgraph/queries.ts
@@ -0,0 +1,78 @@
+export const Queries = {
+  GET_PROVIDERS_WITH_DATASETS: `
+      query GetProvidersWithDataSet($addresses: [Bytes!], $blockNumber: BigInt!) {
+        providers(where: {address_in: $addresses}) {
+          address
+          totalFaultedPeriods
+          totalProvingPeriods
+          proofSets (where: {nextDeadline_lt: $blockNumber, status: PROVING}) {
+            nextDeadline
+            maxProvingPeriod
+          }
+        }
+      }
+    `,
+  GET_SUBGRAPH_META: `
+    query GetSubgraphMeta {
+      _meta {
+        block {
+          number
+        }
+      }
+    }
+  `,
+} as const;
+
+/**
+ * Build a sampleAnonPiece query scoped to the requested pool. The single
+ * piece of query shape that differs is whether the proofSet filter pins
+ * `withIPFSIndexing: true`; assembling the fragment here keeps the rest
+ * of the query and the returned selection set shared.
+ */
+export function buildSampleAnonPieceQuery(pool: "indexed" | "any"): string {
+  const indexingFilter = pool === "indexed" ? "withIPFSIndexing: true" : "";
+  return `
+    query SampleAnonPiece(
+      $serviceProvider: Bytes!
+      $payer: Bytes!
+      $sampleKey: Bytes!
+      $minSize: BigInt!
+      $maxSize: BigInt!
+    ) {
+      _meta {
+        block {
+          number
+        }
+      }
+      roots(
+        first: 1
+        orderBy: sampleKey
+        orderDirection: asc
+        where: {
+          sampleKey_gte: $sampleKey
+          removed: false
+          rawSize_gte: $minSize
+          rawSize_lte: $maxSize
+          proofSet_: {
+            fwssServiceProvider: $serviceProvider
+            fwssPayer_not: $payer
+            isActive: true
+            ${indexingFilter}
+          }
+        }
+        subgraphError: allow
+      ) {
+        rootId
+        cid
+        rawSize
+        ipfsRootCID
+        proofSet {
+          setId
+          withIPFSIndexing
+          fwssPayer
+          pdpPaymentEndEpoch
+        }
+      }
+    }
+  `;
+}
diff --git a/apps/backend/src/subgraph/subgraph.module.ts b/apps/backend/src/subgraph/subgraph.module.ts
new file mode 100644
index 00000000..7834c39b
--- /dev/null
+++ b/apps/backend/src/subgraph/subgraph.module.ts
@@ -0,0 +1,8 @@
+import { Module } from "@nestjs/common";
+import { SubgraphService } from "./subgraph.service.js";
+
+@Module({
+  providers: [SubgraphService],
+  exports: [SubgraphService],
+})
+export class SubgraphModule {}
diff --git a/apps/backend/src/pdp-subgraph/pdp-subgraph.service.spec.ts b/apps/backend/src/subgraph/subgraph.service.spec.ts
similarity index 79%
rename from apps/backend/src/pdp-subgraph/pdp-subgraph.service.spec.ts
rename to apps/backend/src/subgraph/subgraph.service.spec.ts
index cd3a1ea8..4dc2cd5e 100644
--- a/apps/backend/src/pdp-subgraph/pdp-subgraph.service.spec.ts
+++ b/apps/backend/src/subgraph/subgraph.service.spec.ts
@@ -1,7 +1,8 @@
 import type { ConfigService } from "@nestjs/config";
+import { CID } from "multiformats/cid";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 import type { IConfig } from "../config/app.config.js";
-import { PDPSubgraphService } from "./pdp-subgraph.service.js";
+import { SubgraphService } from "./subgraph.service.js";
 
 const VALID_ADDRESS = "0xd8da6bf26964af9d7eed9e03e53415d37aa96045" as const;
 const SUBGRAPH_ENDPOINT = "https://api.thegraph.com/subgraphs/filecoin/pdp" as const;
@@ -35,21 +36,57 @@ const makeSubgraphMetaResponse = (blockNumber = 12345) => ({
   },
 });
 
-describe("PDPSubgraphService", () => {
-  let service: PDPSubgraphService;
+const FWSS_SP_ADDRESS = "0xAaaaAAaaaaAAaaaAaAaAaaAaaaAaAaAaaAaaa111";
+const FWSS_PAYER = "0xBBbbBBbbBBbBBbBbbBBbbBBbbbbBbBBbbBBbb222";
+const EXAMPLE_PIECE_CID = "baga6ea4seaqpzwrimvoc4jp4l7mk6knsknf6owsc2ev4krrs2peenl5qelh6u4y";
+const pieceCidHex = `0x${Buffer.from(CID.parse(EXAMPLE_PIECE_CID).bytes).toString("hex")}`;
+
+const makeSampleRoot = (overrides: Record<string, unknown> = {}) => ({
+  rootId: "1",
+  cid: pieceCidHex,
+  rawSize: "1048576",
+  ipfsRootCID: "bafyroot",
+  proofSet: {
+    setId: "42",
+    withIPFSIndexing: true,
+    fwssPayer: FWSS_PAYER.toLowerCase(),
+    pdpPaymentEndEpoch: null,
+  },
+  ...overrides,
+});
+
+const makeSampleResponse = (roots: Record<string, unknown>[] = [], blockNumber = 12345) => ({
+  data: {
+    _meta: { block: { number: blockNumber } },
+    roots,
+  },
+});
+
+const SAMPLE_KEY = "0x0000000000000000000000000000000000000000000000000000000000000001";
+const defaultSampleParams = {
+  serviceProvider: FWSS_SP_ADDRESS,
+  payer: FWSS_PAYER,
+  sampleKey: SAMPLE_KEY,
+  minSize: "0",
+  maxSize: "1000000000000",
+  pool: "indexed" as const,
+};
+
+describe("SubgraphService", () => {
+  let service: SubgraphService;
   let fetchMock: ReturnType<typeof vi.fn>;
 
   beforeEach(() => {
     const configService = {
       get: vi.fn((key: keyof IConfig) => {
         if (key === "blockchain") {
-          return { pdpSubgraphEndpoint: SUBGRAPH_ENDPOINT };
+          return { subgraphEndpoint: SUBGRAPH_ENDPOINT };
         }
         return undefined;
       }),
     } as unknown as ConfigService<IConfig, true>;
 
-    service = new PDPSubgraphService(configService);
+    service = new SubgraphService(configService);
 
     fetchMock = vi.fn();
     vi.stubGlobal("fetch", fetchMock);
@@ -362,10 +399,10 @@ describe("PDPSubgraphService", () => {
 
     it("throws when PDP subgraph endpoint is not configured", async () => {
       const configService = {
-        get: vi.fn(() => ({ pdpSubgraphEndpoint: "" })),
+        get: vi.fn(() => ({ subgraphEndpoint: "" })),
       } as unknown as ConfigService<IConfig, true>;
 
-      const serviceWithoutEndpoint = new PDPSubgraphService(configService);
+      const serviceWithoutEndpoint = new SubgraphService(configService);
 
       await expect(serviceWithoutEndpoint.fetchSubgraphMeta()).rejects.toThrow("No PDP subgraph endpoint configured");
     });
@@ -691,4 +728,120 @@ describe("PDPSubgraphService", () => {
       expect(timestamps.length).toBe(1);
     });
   });
+
+  describe("sampleAnonPiece", () => {
+    it("returns null when endpoint is not configured", async () => {
+      const noEndpointConfig = {
+        get: vi.fn(() => ({ subgraphEndpoint: "" })),
+      } as unknown as ConfigService<IConfig, true>;
+      const noEndpointService = new SubgraphService(noEndpointConfig);
+
+      const piece = await noEndpointService.sampleAnonPiece(defaultSampleParams);
+      expect(piece).toBeNull();
+      expect(fetchMock).not.toHaveBeenCalled();
+    });
+
+    it("returns null when the subgraph yields no matching root", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSampleResponse([]),
+      });
+
+      const piece = await service.sampleAnonPiece(defaultSampleParams);
+      expect(piece).toBeNull();
+    });
+
+    it("parses the sampled root into a decoded candidate piece", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSampleResponse([makeSampleRoot()]),
+      });
+
+      const piece = await service.sampleAnonPiece(defaultSampleParams);
+
+      expect(piece).toMatchObject({
+        pieceCid: EXAMPLE_PIECE_CID,
+        pieceId: "1",
+        dataSetId: "42",
+        rawSize: "1048576",
+        withIPFSIndexing: true,
+        ipfsRootCid: "bafyroot",
+        pdpPaymentEndEpoch: null,
+        indexedAtBlock: 12345,
+      });
+    });
+
+    it("returns pdpPaymentEndEpoch as bigint when the dataset is terminating", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () =>
+          makeSampleResponse([
+            makeSampleRoot({
+              proofSet: {
+                setId: "42",
+                withIPFSIndexing: true,
+                fwssPayer: FWSS_PAYER.toLowerCase(),
+                pdpPaymentEndEpoch: "5000",
+              },
+            }),
+          ]),
+      });
+
+      const piece = await service.sampleAnonPiece(defaultSampleParams);
+      expect(piece?.pdpPaymentEndEpoch).toBe(5000n);
+    });
+
+    it("lowercases SP and payer addresses before querying", async () => {
+      fetchMock.mockResolvedValueOnce({ ok: true, json: async () => makeSampleResponse([]) });
+
+      await service.sampleAnonPiece(defaultSampleParams);
+
+      const [, opts] = fetchMock.mock.calls[0];
+      const body = JSON.parse(opts.body as string);
+      expect(body.variables.serviceProvider).toBe(FWSS_SP_ADDRESS.toLowerCase());
+      expect(body.variables.payer).toBe(FWSS_PAYER.toLowerCase());
+      expect(body.query).toContain("withIPFSIndexing: true");
+    });
+
+    it("uses the any-pool query when pool is 'any'", async () => {
+      fetchMock.mockResolvedValueOnce({ ok: true, json: async () => makeSampleResponse([]) });
+
+      await service.sampleAnonPiece({ ...defaultSampleParams, pool: "any" });
+
+      const [, opts] = fetchMock.mock.calls[0];
+      const body = JSON.parse(opts.body as string);
+      expect(body.query).not.toContain("withIPFSIndexing: true");
+    });
+
+    it("returns null when the sampled root has an undecodable CID", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSampleResponse([makeSampleRoot({ cid: "0xdeadbeef" })]),
+      });
+
+      const piece = await service.sampleAnonPiece(defaultSampleParams);
+      expect(piece).toBeNull();
+    });
+
+    it("throws after max retries on repeated HTTP errors", async () => {
+      fetchMock.mockResolvedValue({ ok: false, status: 500, statusText: "Internal Server Error" });
+
+      const promise = service.sampleAnonPiece(defaultSampleParams);
+      promise.catch(() => {});
+      await vi.runAllTimersAsync();
+
+      await expect(promise).rejects.toThrow("Failed to fetch subgraph sample_anon_piece_indexed after 3 attempts");
+      expect(fetchMock).toHaveBeenCalledTimes(3);
+    });
+
+    it("does not retry on schema validation failure", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({ data: { _meta: { block: { number: 1 } } } }), // missing roots
+      });
+
+      await expect(service.sampleAnonPiece(defaultSampleParams)).rejects.toThrow(/validation failed/i);
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+  });
 });
diff --git a/apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts b/apps/backend/src/subgraph/subgraph.service.ts
similarity index 52%
rename from apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts
rename to apps/backend/src/subgraph/subgraph.service.ts
index aedd8bce..55359179 100644
--- a/apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts
+++ b/apps/backend/src/subgraph/subgraph.service.ts
@@ -2,9 +2,40 @@ import { Injectable, Logger } from "@nestjs/common";
 import { ConfigService } from "@nestjs/config";
 import { toStructuredError } from "../common/logging.js";
 import type { IBlockchainConfig, IConfig } from "../config/app.config.js";
-import { Queries } from "./queries.js";
-import type { GraphQLResponse, ProviderDataSetResponse, ProvidersWithDataSetsOptions, SubgraphMeta } from "./types.js";
-import { validateProviderDataSetResponse, validateSubgraphMetaResponse } from "./types.js";
+import { buildSampleAnonPieceQuery, Queries } from "./queries.js";
+import type {
+  AnonCandidatePiece,
+  GraphQLResponse,
+  ProviderDataSetResponse,
+  ProvidersWithDataSetsOptions,
+  RawSampleAnonPieceResponse,
+  SubgraphMeta,
+} from "./types.js";
+import {
+  decodePieceCid,
+  validateProviderDataSetResponse,
+  validateSampleAnonPieceResponse,
+  validateSubgraphMetaResponse,
+} from "./types.js";
+
+/** Pool of pieces to sample from. */
+export type AnonPiecePool = "indexed" | "any";
+
+/** Inputs for a single anonymous piece sample query. */
+export type SampleAnonPieceParams = {
+  /** Service provider address (lowercase hex). */
+  serviceProvider: string;
+  /** Dealbot's own payer address (excluded to keep the sample non-dealbot). */
+  payer: string;
+  /** Uniform-random 32-byte sort key as `0x`-prefixed hex. */
+  sampleKey: string;
+  /** Inclusive lower bound on raw piece size in bytes (decimal string). */
+  minSize: string;
+  /** Inclusive upper bound on raw piece size in bytes (decimal string). */
+  maxSize: string;
+  /** Which pool to sample from. */
+  pool: AnonPiecePool;
+};
 
 /**
  * Error thrown when data validation fails.
@@ -21,8 +52,8 @@ class ValidationError extends Error {
 }
 
 @Injectable()
-export class PDPSubgraphService {
-  private readonly logger: Logger = new Logger(PDPSubgraphService.name);
+export class SubgraphService {
+  private readonly logger: Logger = new Logger(SubgraphService.name);
   private readonly blockchainConfig: IBlockchainConfig;
 
   private static readonly MAX_PROVIDERS_PER_QUERY = 100;
@@ -45,14 +76,14 @@ export class PDPSubgraphService {
    * @throws Error if endpoint is not configured or after MAX_RETRIES attempts
    */
   async fetchSubgraphMeta(attempt: number = 1): Promise<SubgraphMeta> {
-    if (!this.blockchainConfig.pdpSubgraphEndpoint) {
+    if (!this.blockchainConfig.subgraphEndpoint) {
       throw new Error("No PDP subgraph endpoint configured");
     }
 
     try {
       await this.enforceRateLimit();
 
-      const response = await fetch(this.blockchainConfig.pdpSubgraphEndpoint, {
+      const response = await fetch(this.blockchainConfig.subgraphEndpoint, {
         method: "POST",
         headers: {
           "Content-Type": "application/json",
@@ -95,13 +126,13 @@ export class PDPSubgraphService {
       }
 
       // Retry on network/HTTP errors
-      if (attempt < PDPSubgraphService.MAX_RETRIES) {
-        const delay = PDPSubgraphService.INITIAL_RETRY_DELAY_MS * (1 << (attempt - 1));
+      if (attempt < SubgraphService.MAX_RETRIES) {
+        const delay = SubgraphService.INITIAL_RETRY_DELAY_MS * (1 << (attempt - 1));
         this.logger.warn({
           event: "subgraph_meta_request_retry",
           message: "Subgraph meta request failed. Retrying...",
           attempt,
-          maxRetries: PDPSubgraphService.MAX_RETRIES,
+          maxRetries: SubgraphService.MAX_RETRIES,
           retryDelayMs: delay,
           error: toStructuredError(error),
         });
@@ -112,11 +143,11 @@ export class PDPSubgraphService {
       this.logger.error({
         event: "subgraph_meta_request_failed",
         message: "Subgraph meta request failed after maximum retries",
-        maxRetries: PDPSubgraphService.MAX_RETRIES,
+        maxRetries: SubgraphService.MAX_RETRIES,
         error: toStructuredError(error),
       });
       throw new Error(
-        `Failed to fetch subgraph metadata after ${PDPSubgraphService.MAX_RETRIES} attempts: ${errorMessage}`,
+        `Failed to fetch subgraph metadata after ${SubgraphService.MAX_RETRIES} attempts: ${errorMessage}`,
       );
     }
   }
@@ -136,13 +167,154 @@ export class PDPSubgraphService {
       return [];
     }
 
-    if (addresses.length <= PDPSubgraphService.MAX_PROVIDERS_PER_QUERY) {
+    if (addresses.length <= SubgraphService.MAX_PROVIDERS_PER_QUERY) {
       return this.fetchWithRetry(blockNumber, addresses);
     }
 
     return this.fetchMultipleBatchesWithRateLimit(blockNumber, addresses);
   }
 
+  /**
+   * Draw a single random anonymous piece for retrieval testing.
+   *
+   * Uses the Root.sampleKey (keccak256 of the entity id) to pick the
+   * smallest key ≥ `params.sampleKey` that matches the filters — a uniform
+   * random pick when `sampleKey` is generated uniformly. Server-side filters
+   * cover SP, payer-exclusion, active status, size range, and optionally
+   * `withIPFSIndexing`. Returns null when no piece matches (callers should
+   * retry with a fresh sampleKey or relax the pool/bucket).
+   *
+   * `pdpPaymentEndEpoch` is returned to the caller for a cheap client-side
+   * epoch comparison — GraphQL filters on nullable BigInts are awkward.
+   */
+  async sampleAnonPiece(params: SampleAnonPieceParams): Promise<AnonCandidatePiece | null> {
+    if (!this.blockchainConfig.subgraphEndpoint) {
+      return null;
+    }
+
+    const query = buildSampleAnonPieceQuery(params.pool);
+    const variables = {
+      serviceProvider: params.serviceProvider.toLowerCase(),
+      payer: params.payer.toLowerCase(),
+      sampleKey: params.sampleKey,
+      minSize: params.minSize,
+      maxSize: params.maxSize,
+    };
+
+    const validated = await this.executeQuery<RawSampleAnonPieceResponse>(
+      `sample_anon_piece_${params.pool}`,
+      query,
+      variables,
+      validateSampleAnonPieceResponse,
+    );
+
+    const root = validated.roots[0];
+    if (!root) {
+      return null;
+    }
+
+    try {
+      return {
+        pieceCid: decodePieceCid(root.cid),
+        pieceId: root.rootId,
+        dataSetId: root.proofSet.setId,
+        rawSize: root.rawSize,
+        withIPFSIndexing: root.proofSet.withIPFSIndexing,
+        ipfsRootCid: root.ipfsRootCID ?? null,
+        indexedAtBlock: validated._meta.block.number,
+        pdpPaymentEndEpoch: root.proofSet.pdpPaymentEndEpoch != null ? BigInt(root.proofSet.pdpPaymentEndEpoch) : null,
+      };
+    } catch (error) {
+      this.logger.warn({
+        event: "anon_piece_cid_decode_failed",
+        message: "Failed to decode piece CID from subgraph data",
+        dataSetId: root.proofSet.setId,
+        pieceId: root.rootId,
+        error: toStructuredError(error),
+      });
+      return null;
+    }
+  }
+
+  /**
+   * Generic single-query helper with retry and rate limiting. Used by queries that
+   * don't fit the batched provider-fetch shape.
+   */
+  private async executeQuery<T>(
+    operationName: string,
+    query: string,
+    variables: Record<string, unknown>,
+    transform: (data: unknown) => T,
+    attempt: number = 1,
+  ): Promise<T> {
+    if (!this.blockchainConfig.subgraphEndpoint) {
+      throw new Error("No PDP subgraph endpoint configured");
+    }
+
+    try {
+      await this.enforceRateLimit();
+
+      const response = await fetch(this.blockchainConfig.subgraphEndpoint, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ query, variables }),
+      });
+
+      if (!response.ok) {
+        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+      }
+
+      const result = (await response.json()) as GraphQLResponse;
+
+      if (result.errors) {
+        const errorMessage = result.errors?.[0]?.message || "Unknown GraphQL error";
+        throw new Error(`GraphQL error: ${errorMessage}`);
+      }
+
+      try {
+        return transform(result.data);
+      } catch (validationError) {
+        const errorMessage = validationError instanceof Error ? validationError.message : "Unknown validation error";
+        throw new ValidationError(`Data validation failed: ${errorMessage}`);
+      }
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : "Unknown error";
+
+      if (error instanceof ValidationError) {
+        this.logger.error({
+          event: `subgraph_${operationName}_validation_failed`,
+          message: `Subgraph ${operationName} validation failed`,
+          error: toStructuredError(error),
+        });
+        throw error;
+      }
+
+      if (attempt < SubgraphService.MAX_RETRIES) {
+        const delay = SubgraphService.INITIAL_RETRY_DELAY_MS * (1 << (attempt - 1));
+        this.logger.warn({
+          event: `subgraph_${operationName}_request_retry`,
+          message: `Subgraph ${operationName} request failed. Retrying...`,
+          attempt,
+          maxRetries: SubgraphService.MAX_RETRIES,
+          retryDelayMs: delay,
+          error: toStructuredError(error),
+        });
+        await new Promise((resolve) => setTimeout(resolve, delay));
+        return this.executeQuery(operationName, query, variables, transform, attempt + 1);
+      }
+
+      this.logger.error({
+        event: `subgraph_${operationName}_request_failed`,
+        message: `Subgraph ${operationName} request failed after maximum retries`,
+        maxRetries: SubgraphService.MAX_RETRIES,
+        error: toStructuredError(error),
+      });
+      throw new Error(
+        `Failed to fetch subgraph ${operationName} after ${SubgraphService.MAX_RETRIES} attempts: ${errorMessage}`,
+      );
+    }
+  }
+
   /**
    * Fetch multiple batches with rate limiting and concurrency control
    */
@@ -151,15 +323,15 @@ export class PDPSubgraphService {
     addresses: string[],
   ): Promise<ProviderDataSetResponse["providers"]> {
     const batches: string[][] = [];
-    for (let i = 0; i < addresses.length; i += PDPSubgraphService.MAX_PROVIDERS_PER_QUERY) {
-      const addressesLimit = Math.min(addresses.length, i + PDPSubgraphService.MAX_PROVIDERS_PER_QUERY);
+    for (let i = 0; i < addresses.length; i += SubgraphService.MAX_PROVIDERS_PER_QUERY) {
+      const addressesLimit = Math.min(addresses.length, i + SubgraphService.MAX_PROVIDERS_PER_QUERY);
       batches.push(addresses.slice(i, addressesLimit));
     }
 
     const allProviders: ProviderDataSetResponse["providers"] = [];
 
-    for (let i = 0; i < batches.length; i += PDPSubgraphService.MAX_CONCURRENT_REQUESTS) {
-      const batchGroup = batches.slice(i, i + PDPSubgraphService.MAX_CONCURRENT_REQUESTS);
+    for (let i = 0; i < batches.length; i += SubgraphService.MAX_CONCURRENT_REQUESTS) {
+      const batchGroup = batches.slice(i, i + SubgraphService.MAX_CONCURRENT_REQUESTS);
 
       const results = await Promise.all(batchGroup.map((batch) => this.fetchWithRetry(blockNumber, batch)));
 
@@ -178,7 +350,7 @@ export class PDPSubgraphService {
     addresses: string[],
     attempt: number = 1,
   ): Promise<ProviderDataSetResponse["providers"]> {
-    if (!this.blockchainConfig.pdpSubgraphEndpoint) {
+    if (!this.blockchainConfig.subgraphEndpoint) {
       throw new Error("No PDP subgraph endpoint configured");
     }
 
@@ -190,7 +362,7 @@ export class PDPSubgraphService {
     try {
       await this.enforceRateLimit();
 
-      const response = await fetch(this.blockchainConfig.pdpSubgraphEndpoint, {
+      const response = await fetch(this.blockchainConfig.subgraphEndpoint, {
         method: "POST",
         headers: {
           "Content-Type": "application/json",
@@ -235,13 +407,13 @@ export class PDPSubgraphService {
       }
 
       // Retry on network/HTTP errors
-      if (attempt < PDPSubgraphService.MAX_RETRIES) {
-        const delay = PDPSubgraphService.INITIAL_RETRY_DELAY_MS * (1 << (attempt - 1));
+      if (attempt < SubgraphService.MAX_RETRIES) {
+        const delay = SubgraphService.INITIAL_RETRY_DELAY_MS * (1 << (attempt - 1));
         this.logger.warn({
           event: "subgraph_provider_request_retry",
           message: "Subgraph provider request failed. Retrying...",
           attempt,
-          maxRetries: PDPSubgraphService.MAX_RETRIES,
+          maxRetries: SubgraphService.MAX_RETRIES,
           retryDelayMs: delay,
           addressCount: addresses.length,
           error: toStructuredError(error),
@@ -253,14 +425,12 @@ export class PDPSubgraphService {
       this.logger.error({
         event: "subgraph_provider_request_failed",
         message: "Subgraph provider request failed after maximum retries",
-        maxRetries: PDPSubgraphService.MAX_RETRIES,
+        maxRetries: SubgraphService.MAX_RETRIES,
         blockNumber,
         addressCount: addresses.length,
         error: toStructuredError(error),
       });
-      throw new Error(
-        `Failed to fetch provider data after ${PDPSubgraphService.MAX_RETRIES} attempts: ${errorMessage}`,
-      );
+      throw new Error(`Failed to fetch provider data after ${SubgraphService.MAX_RETRIES} attempts: ${errorMessage}`);
     }
   }
 
@@ -270,18 +440,18 @@ export class PDPSubgraphService {
    * Read more here: https://docs.goldsky.com/subgraphs/graphql-endpoints#public-endpoints
    */
   private async enforceRateLimit(requestCount: number = 1): Promise<void> {
-    if (requestCount > PDPSubgraphService.MAX_CONCURRENT_REQUESTS) {
+    if (requestCount > SubgraphService.MAX_CONCURRENT_REQUESTS) {
       throw new Error(
-        `Cannot request ${requestCount} items; exceeds rate limit window of ${PDPSubgraphService.MAX_CONCURRENT_REQUESTS}`,
+        `Cannot request ${requestCount} items; exceeds rate limit window of ${SubgraphService.MAX_CONCURRENT_REQUESTS}`,
       );
     }
 
     const now = Date.now();
-    const windowStart = now - PDPSubgraphService.RATE_LIMIT_WINDOW_MS;
+    const windowStart = now - SubgraphService.RATE_LIMIT_WINDOW_MS;
 
     this.requestTimestamps = this.requestTimestamps.filter((timestamp) => timestamp > windowStart);
 
-    const availableSlots = PDPSubgraphService.MAX_CONCURRENT_REQUESTS - this.requestTimestamps.length;
+    const availableSlots = SubgraphService.MAX_CONCURRENT_REQUESTS - this.requestTimestamps.length;
 
     if (requestCount > availableSlots) {
       const requiredSlots = requestCount - availableSlots;
@@ -290,7 +460,7 @@ export class PDPSubgraphService {
       const oldestTimestamp = this.requestTimestamps[index] || now;
 
       // wait time with 10ms buffer
-      const waitTime = oldestTimestamp + PDPSubgraphService.RATE_LIMIT_WINDOW_MS - now + 10;
+      const waitTime = oldestTimestamp + SubgraphService.RATE_LIMIT_WINDOW_MS - now + 10;
 
       if (waitTime > 0) {
         await new Promise((resolve) => setTimeout(resolve, waitTime));
diff --git a/apps/backend/src/pdp-subgraph/types.spec.ts b/apps/backend/src/subgraph/types.spec.ts
similarity index 100%
rename from apps/backend/src/pdp-subgraph/types.spec.ts
rename to apps/backend/src/subgraph/types.spec.ts
diff --git a/apps/backend/src/pdp-subgraph/types.ts b/apps/backend/src/subgraph/types.ts
similarity index 58%
rename from apps/backend/src/pdp-subgraph/types.ts
rename to apps/backend/src/subgraph/types.ts
index ad8dcdc4..3a89f360 100644
--- a/apps/backend/src/pdp-subgraph/types.ts
+++ b/apps/backend/src/subgraph/types.ts
@@ -1,4 +1,5 @@
 import Joi from "joi";
+import { CID } from "multiformats/cid";
 import { Hex, isAddress } from "viem";
 
 // -----------------------------------------
@@ -54,6 +55,58 @@ export type ProviderDataSetResponse = {
   }[];
 };
 
+/** A piece eligible for anonymous retrieval. */
+export type AnonCandidatePiece = {
+  /** Decoded piece CID string (e.g. "bafk..."). */
+  pieceCid: string;
+  /** On-chain piece ID (rootId) as a decimal string. */
+  pieceId: string;
+  /** On-chain dataset ID (setId) as a decimal string. */
+  dataSetId: string;
+  /** Raw piece size in bytes, as a decimal string. */
+  rawSize: string;
+  /** True iff the parent dataset declared withIPFSIndexing metadata. */
+  withIPFSIndexing: boolean;
+  /** IPFS root CID declared by the client when uploading, or null. */
+  ipfsRootCid: string | null;
+  /** Subgraph-indexed block number at query time. */
+  indexedAtBlock: number;
+  /** pdpPaymentEndEpoch from the parent dataset, or null. */
+  pdpPaymentEndEpoch: bigint | null;
+};
+
+/**
+ * Validated raw shape of the anonymous piece sampling subgraph response.
+ * At most one root is returned (`first: 1`).
+ */
+export type RawSampleAnonPieceResponse = {
+  _meta: { block: { number: number } };
+  roots: Array<{
+    rootId: string;
+    cid: string;
+    rawSize: string;
+    ipfsRootCID: string | null;
+    proofSet: {
+      setId: string;
+      withIPFSIndexing: boolean;
+      fwssPayer: string | null;
+      pdpPaymentEndEpoch: string | null;
+    };
+  }>;
+};
+
+// -----------------------------------------
+// Helpers
+// -----------------------------------------
+
+/**
+ * Decodes a hex-encoded CID (0x...) into its string representation.
+ */
+export function decodePieceCid(hexData: string): string {
+  const bytes = Buffer.from(hexData.slice(2), "hex");
+  return CID.decode(new Uint8Array(bytes)).toString();
+}
+
 // -----------------------------------------
 // Joi Custom Schema Converters
 // -----------------------------------------
@@ -117,6 +170,41 @@ const providerDataSetResponseSchema = Joi.object({
   .unknown(true)
   .required();
 
+const sampleRootProofSetSchema = Joi.object({
+  setId: Joi.string().pattern(/^\d+$/).required(),
+  withIPFSIndexing: Joi.boolean().required(),
+  fwssPayer: Joi.string()
+    .pattern(/^0x[0-9a-fA-F]{40}$/)
+    .allow(null)
+    .optional(),
+  pdpPaymentEndEpoch: Joi.string().pattern(/^\d+$/).allow(null).optional(),
+}).unknown(true);
+
+const sampleRootSchema = Joi.object({
+  rootId: Joi.string().pattern(/^\d+$/).required(),
+  cid: Joi.string()
+    .pattern(/^0x[0-9a-fA-F]+$/)
+    .required(),
+  rawSize: Joi.string().pattern(/^\d+$/).required(),
+  ipfsRootCID: Joi.string().allow(null).optional(),
+  proofSet: sampleRootProofSetSchema.required(),
+}).unknown(true);
+
+const sampleAnonPieceResponseSchema = Joi.object({
+  _meta: Joi.object({
+    block: Joi.object({
+      number: Joi.number().integer().positive().required(),
+    })
+      .unknown(true)
+      .required(),
+  })
+    .unknown(true)
+    .required(),
+  roots: Joi.array().items(sampleRootSchema).max(1).required(),
+})
+  .unknown(true)
+  .required();
+
 // -----------------------------------------
 // Validator Functions
 // -----------------------------------------
@@ -149,3 +237,16 @@ export function validateProviderDataSetResponse(value: unknown): ProviderDataSet
   }
   return validated as ProviderDataSetResponse;
 }
+
+/**
+ * Validates the raw sampleAnonPiece response from the subgraph.
+ *
+ * @throws Error if validation fails
+ */
+export function validateSampleAnonPieceResponse(value: unknown): RawSampleAnonPieceResponse {
+  const { error, value: validated } = sampleAnonPieceResponseSchema.validate(value, { abortEarly: false });
+  if (error) {
+    throw new Error(`Invalid sampleAnonPiece response format: ${error.message}`);
+  }
+  return validated as RawSampleAnonPieceResponse;
+}
diff --git a/apps/backend/src/wallet-sdk/wallet-sdk.service.spec.ts b/apps/backend/src/wallet-sdk/wallet-sdk.service.spec.ts
index d6613a31..195db19f 100644
--- a/apps/backend/src/wallet-sdk/wallet-sdk.service.spec.ts
+++ b/apps/backend/src/wallet-sdk/wallet-sdk.service.spec.ts
@@ -18,7 +18,7 @@ const baseConfig: IBlockchainConfig = {
   checkDatasetCreationFees: false,
   useOnlyApprovedProviders: false,
   minNumDataSetsForChecks: 1,
-  pdpSubgraphEndpoint: "https://api.thegraph.com/subgraphs/filecoin/pdp",
+  subgraphEndpoint: "https://api.thegraph.com/subgraphs/filecoin/pdp",
 };
 
 const makeProvider = (overrides: Partial<PDPProviderEx>): PDPProviderEx =>
diff --git a/docs/checks/data-retention.md b/docs/checks/data-retention.md
index 605753e7..4eb7a912 100644
--- a/docs/checks/data-retention.md
+++ b/docs/checks/data-retention.md
@@ -27,7 +27,7 @@ Dealbot polls The Graph API endpoint for PDP (Proof of Data Possession) data at
 
 **Subgraph repository**: [FilOzone/pdp-explorer](https://github.com/FilOzone/pdp-explorer/blob/main/subgraph/src/pdp-verifier.ts)
 
-**Subgraph endpoint**: Configured via `PDP_SUBGRAPH_ENDPOINT` environment variable (see [environment-variables.md](../environment-variables.md#pdp_subgraph_endpoint))
+**Subgraph endpoint**: Configured via `SUBGRAPH_ENDPOINT` environment variable (see [environment-variables.md](../environment-variables.md#subgraph_endpoint))
 
 > **Note**: The production subgraph URL is currently being finalized [here](https://github.com/FilOzone/pdp-explorer/pull/86).
 
@@ -48,7 +48,7 @@ From `GET_PROVIDERS_WITH_DATASETS` query for each provider:
 
 > **Note**: The subgraph query uses the field name `proofSets`, but this refers to "dataSets" in the current codebase. The terminology was updated from "proof set" to "data set" but the subgraph schema retains the old naming.
 
-Source: [`pdp-subgraph.service.ts` (`fetchSubgraphMeta`, `fetchProvidersWithDatasets`)](../../apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts)
+Source: [`subgraph.service.ts` (`fetchSubgraphMeta`, `fetchProvidersWithDatasets`)](../../apps/backend/src/subgraph/subgraph.service.ts)
 
 ### 2. Compute Challenge Totals and Overdue Estimates
 
@@ -170,7 +170,7 @@ The PDP subgraph service enforces Goldsky's public endpoint rate limits:
 
 Rate limiting is enforced client-side to prevent 429 errors.
 
-Source: [`pdp-subgraph.service.ts` (`enforceRateLimit`)](../../apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts)
+Source: [`subgraph.service.ts` (`enforceRateLimit`)](../../apps/backend/src/subgraph/subgraph.service.ts)
 
 ## Metrics Recorded
 
@@ -210,11 +210,11 @@ Key environment variables that control data retention check behavior:
 
 | Variable                | Required | Default      | Description                                                                                      |
 | ----------------------- | -------- | ------------ | ------------------------------------------------------------------------------------------------ |
-| `PDP_SUBGRAPH_ENDPOINT` | No       | Empty string | The Graph API endpoint for PDP subgraph queries. When empty, data retention checks are disabled. |
+| `SUBGRAPH_ENDPOINT` | No       | Empty string | The Graph API endpoint for PDP subgraph queries. When empty, data retention checks are disabled. |
 
 Source: [`app.config.ts`](../../apps/backend/src/config/app.config.ts)
 
-See also: [`environment-variables.md`](../environment-variables.md#pdp_subgraph_endpoint) for the full configuration reference.
+See also: [`environment-variables.md`](../environment-variables.md#subgraph_endpoint) for the full configuration reference.
 
 ## Error Handling
 
diff --git a/docs/checks/production-configuration-and-approval-methodology.md b/docs/checks/production-configuration-and-approval-methodology.md
index 5566904d..3d956aa4 100644
--- a/docs/checks/production-configuration-and-approval-methodology.md
+++ b/docs/checks/production-configuration-and-approval-methodology.md
@@ -40,7 +40,7 @@ Relevant parameters include:
 
 | Parameter | Value | Notes |
 |-----------|-------|-------|
-| [`PDP_SUBGRAPH_ENDPOINT`](../environment-variables.md#pdp_subgraph_endpoint) | TODO: fill this in | Uses the subgraph from [pdp-explorer](https://github.com/FilOzone/pdp-explorer). |
+| [`SUBGRAPH_ENDPOINT`](../environment-variables.md#subgraph_endpoint) | TODO: fill this in | Uses the subgraph from [pdp-explorer](https://github.com/FilOzone/pdp-explorer). |
 | [`MIN_NUM_DATASETS_FOR_CHECKS`](../environment-variables.md#dataset-configuration) | 15 | Ensure there are enough datasets with pieces being added so that statistical significance for [Data Retention Fault Rate](#data-retention-fault-rate) can be achieved quicker. Note that on mainnet each dataset incurs 5 challenges[^1] per daily proof[^2]. With this many datasets, an SP can be approved for data retention after a faultless ~7 days even if the SP doesn't have other datasets. |
 
 See [How are data retention statistics/thresholds calculated?](#how-are-data-retention-statisticsthresholds-calculated) for more details.
diff --git a/docs/environment-variables.md b/docs/environment-variables.md
index 359d86da..2f25943c 100644
--- a/docs/environment-variables.md
+++ b/docs/environment-variables.md
@@ -8,10 +8,10 @@ This document provides a comprehensive guide to all environment variables used b
 | ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | [Application](#application-configuration) | `NODE_ENV`, `DEALBOT_PORT`, `DEALBOT_HOST`, `DEALBOT_RUN_MODE`, `DEALBOT_METRICS_PORT`, `DEALBOT_METRICS_HOST`, `DEALBOT_ALLOWED_ORIGINS`, `ENABLE_DEV_MODE` |
 | [Database](#database-configuration)       | `DATABASE_HOST`, `DATABASE_PORT`, `DATABASE_POOL_MAX`, `DATABASE_USER`, `DATABASE_PASSWORD`, `DATABASE_NAME`                                                 |
-| [Blockchain](#blockchain-configuration)   | `NETWORK`, `RPC_URL`, `WALLET_ADDRESS`, `WALLET_PRIVATE_KEY`, `SESSION_KEY_PRIVATE_KEY`, `CHECK_DATASET_CREATION_FEES`, `USE_ONLY_APPROVED_PROVIDERS`, `PDP_SUBGRAPH_ENDPOINT` |
+| [Blockchain](#blockchain-configuration)   | `NETWORK`, `RPC_URL`, `WALLET_ADDRESS`, `WALLET_PRIVATE_KEY`, `SESSION_KEY_PRIVATE_KEY`, `CHECK_DATASET_CREATION_FEES`, `USE_ONLY_APPROVED_PROVIDERS`, `SUBGRAPH_ENDPOINT` |
 | [Dataset Versioning](#dataset-versioning) | `DEALBOT_DATASET_VERSION`                                                                                                                                    |
 | [Scheduling](#scheduling-configuration)   | `PROVIDERS_REFRESH_INTERVAL_SECONDS`, `DATA_RETENTION_POLL_INTERVAL_SECONDS`, `DEALBOT_MAINTENANCE_WINDOWS_UTC`, `DEALBOT_MAINTENANCE_WINDOW_MINUTES`                                                                                                                                 |
-| [Jobs (pg-boss)](#jobs-pg-boss)           | `DEALBOT_PGBOSS_SCHEDULER_ENABLED`, `DEALBOT_PGBOSS_POOL_MAX`, `DEALS_PER_SP_PER_HOUR`, `DATASET_CREATIONS_PER_SP_PER_HOUR`, `RETRIEVALS_PER_SP_PER_HOUR`,  `JOB_SCHEDULER_POLL_SECONDS`, `JOB_WORKER_POLL_SECONDS`, `PG_BOSS_LOCAL_CONCURRENCY`, `JOB_CATCHUP_MAX_ENQUEUE`, `JOB_SCHEDULE_PHASE_SECONDS`, `JOB_ENQUEUE_JITTER_SECONDS`, `DEAL_JOB_TIMEOUT_SECONDS`, `RETRIEVAL_JOB_TIMEOUT_SECONDS`, `IPFS_BLOCK_FETCH_CONCURRENCY` |
+| [Jobs (pg-boss)](#jobs-pg-boss)           | `DEALBOT_PGBOSS_SCHEDULER_ENABLED`, `DEALBOT_PGBOSS_POOL_MAX`, `DEALS_PER_SP_PER_HOUR`, `DATASET_CREATIONS_PER_SP_PER_HOUR`, `RETRIEVALS_PER_SP_PER_HOUR`,  `JOB_SCHEDULER_POLL_SECONDS`, `JOB_WORKER_POLL_SECONDS`, `PG_BOSS_LOCAL_CONCURRENCY`, `JOB_CATCHUP_MAX_ENQUEUE`, `JOB_SCHEDULE_PHASE_SECONDS`, `JOB_ENQUEUE_JITTER_SECONDS`, `DEAL_JOB_TIMEOUT_SECONDS`, `RETRIEVAL_JOB_TIMEOUT_SECONDS`, `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS`, `IPFS_BLOCK_FETCH_CONCURRENCY` |
 | [Dataset](#dataset-configuration)         | `DEALBOT_LOCAL_DATASETS_PATH`, `RANDOM_PIECE_SIZES`                                                                                                          |
 | [ClickHouse](#clickhouse-configuration)   | `CLICKHOUSE_URL`, `CLICKHOUSE_BATCH_SIZE`, `CLICKHOUSE_FLUSH_INTERVAL_MS`, `DEALBOT_PROBE_LOCATION`          |
 | [Timeouts](#timeout-configuration)        | `CONNECT_TIMEOUT_MS`, `HTTP_REQUEST_TIMEOUT_MS`, `HTTP2_REQUEST_TIMEOUT_MS`, `IPNI_VERIFICATION_TIMEOUT_MS`, `IPNI_VERIFICATION_POLLING_MS`                   |
@@ -425,22 +425,25 @@ Session keys are scoped (only storage operations, not deposits or withdrawals) a
 
 ---
 
-### `PDP_SUBGRAPH_ENDPOINT`
+### `SUBGRAPH_ENDPOINT`
 
 - **Type**: `string` (URL)
 - **Required**: No
 - **Default**: Empty string (feature disabled)
 
-**Role**: The Graph API endpoint for querying PDP (Proof of Data Possession) subgraph data. This endpoint is used to retrieve data retention info for provider data.
+**Role**: The Graph API endpoint for querying PDP (Proof of Data Possession) subgraph data. Drives the overdue-periods metric and the anonymous-retrieval candidate-piece query.
+
+The dealbot-owned subgraph lives at `apps/subgraph/` (package `@dealbot/subgraph`) and is deployed to Goldsky. Point this variable at one of those slots; the exact slugs are documented in `apps/subgraph/README.md`.
 
 **When to update**:
 
-- When switching between different Graph API endpoints
+- When swapping between the dealbot-owned subgraph slots on Goldsky (mainnet vs calibnet).
+- When deploying a new subgraph version.
 
 **Example**:
 
 ```bash
-PDP_SUBGRAPH_ENDPOINT=https://api.thegraph.com/subgraphs/filecoin/pdp
+SUBGRAPH_ENDPOINT=https://api.goldsky.com/api/public/<project>/subgraphs/dealbot-subgraph/<version>/gn
 ```
 
 ---
@@ -784,6 +787,25 @@ Use this to stagger multiple dealbot deployments that are not sharing a database
 
 **Note**: This is independent of HTTP-level timeouts. The job timeout enforces end-to-end execution time of a Retrieval Check job.
 
+---
+
+### `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS`
+
+- **Type**: `number`
+- **Required**: No
+- **Default**: `360` (6 minutes)
+- **Minimum**: `60`
+- **Enforced**: Yes (config validation)
+
+**Role**: Maximum runtime for anonymous retrieval jobs before forced abort. Anonymous retrievals fetch arbitrary pieces (up to ~70 MiB) that were not produced by the dealbot, so this is typically larger than `RETRIEVAL_JOB_TIMEOUT_SECONDS`. When the timeout trips, partial metrics (`ttfb_ms`, `bytes_retrieved`, `response_code`) are still persisted so the abort is not silently lost.
+
+**When to update**:
+
+- Increase if large pieces are consistently being cut off mid-download
+- Decrease to detect and fail stuck retrievals faster
+
+**Note**: This is independent of HTTP-level timeouts (`CONNECT_TIMEOUT_MS`, `HTTP2_REQUEST_TIMEOUT_MS`). The job timeout covers the end-to-end execution of an Anon Retrieval Check (piece selection, download, CommP validation, CAR/IPNI validation).
+
 ---
 ### `IPFS_BLOCK_FETCH_CONCURRENCY`
 
diff --git a/kustomize/overlays/local/backend-configmap-local.yaml b/kustomize/overlays/local/backend-configmap-local.yaml
index 9226d24e..b4febf61 100644
--- a/kustomize/overlays/local/backend-configmap-local.yaml
+++ b/kustomize/overlays/local/backend-configmap-local.yaml
@@ -26,7 +26,7 @@ data:
   PG_BOSS_LOCAL_CONCURRENCY: "3"
   JOB_WORKER_POLL_SECONDS: "60"
   RANDOM_PIECE_SIZES: "10485760"
-  PDP_SUBGRAPH_ENDPOINT: "https://api.goldsky.com/api/public/project_cmdfaaxeuz6us01u359yjdctw/subgraphs/pdp-explorer/calibration311a/gn"
+  SUBGRAPH_ENDPOINT: "https://api.goldsky.com/api/public/project_cmdfaaxeuz6us01u359yjdctw/subgraphs/pdp-explorer/calibration311a/gn"
   JOB_SCHEDULER_POLL_SECONDS: "60"
   CLICKHOUSE_URL: "http://default:@dealbot-clickhouse:8123/dealbot"
   DEALBOT_PROBE_LOCATION: "local"
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 0495aa11..8089b756 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -1513,24 +1513,24 @@ packages:
     engines: {node: ^14.18.0 || >=16.10.0, npm: '>=5.10.0'}
     hasBin: true
 
-  '@oclif/core@4.10.5':
-    resolution: {integrity: sha512-qcdCF7NrdWPfme6Kr34wwljRCXbCVpL1WVxiNy0Ep6vbWKjxAjFQwuhqkoyL0yjI+KdwtLcOCGn5z2yzdijc8w==}
+  '@oclif/core@4.10.6':
+    resolution: {integrity: sha512-ySCOYnPKZE3KACT1V9It99hWG9b8E5MpagbRdWxPNRO3beMqmbr4SLUQoFtZ9XRtW++kks1ZVwZOdpnR8rpb9A==}
     engines: {node: '>=18.0.0'}
 
   '@oclif/core@4.5.5':
     resolution: {integrity: sha512-iQzlaJQgPeUXrtrX71OzDwxPikQ7c2FhNd8U8rBB7BCtj2XYfmzBT/Hmbc+g9OKDIG/JkbJT0fXaWMMBrhi+1A==}
     engines: {node: '>=18.0.0'}
 
-  '@oclif/plugin-autocomplete@3.2.45':
-    resolution: {integrity: sha512-ENrUg8rbVCjh40uvi3MC9kGbiUoEf11nyqE59RBzegeeLpRXNo/Zp27L9j1tUmPEqGgfS2/wvHPihNzkpK1FDw==}
+  '@oclif/plugin-autocomplete@3.2.46':
+    resolution: {integrity: sha512-TFvuD6JlmqEVsEvMqunyj3cyCz/l2Q4MqCjp/XtlSLS9x3xTlam7PGlqWi4WAhxl/K8CtpYqVlMYFEnlLTHspw==}
     engines: {node: '>=18.0.0'}
 
-  '@oclif/plugin-not-found@3.2.80':
-    resolution: {integrity: sha512-yTLjWvR1r/Rd/cO2LxHdMCDoL5sQhBYRUcOMCmxZtWVWhx4rAZ8KVUPDVsb+SvjJDV5ADTDBgt1H52fFx7YWqg==}
+  '@oclif/plugin-not-found@3.2.81':
+    resolution: {integrity: sha512-M88tLONBH36hLAbkFbmCo1hoZPSdU5l8Px1xEIlIgSmGMam+CoAzx4kGqpLbokgfpaHeP8/Jx3QJ18u9ef/2Qw==}
     engines: {node: '>=18.0.0'}
 
-  '@oclif/plugin-warn-if-update-available@3.1.60':
-    resolution: {integrity: sha512-cRKBZm14IuA6G8W84dfd3iXj3BTAoxQ5o3pUE8DKEQ4n/tVha20t5nkVeD+ISC68e0Fuw5koTMvRwXb1lJSnzg==}
+  '@oclif/plugin-warn-if-update-available@3.1.61':
+    resolution: {integrity: sha512-4XcrTxcCs+brR/eZ0BPeuiREiH3USlJiaHbUqPhnIBuyxhhUSYVd8ZO6s5MQN7AXJq4SMQ+B5zLaHq+ep/afIw==}
     engines: {node: '>=18.0.0'}
 
   '@open-draft/deferred-promise@2.2.0':
@@ -7599,9 +7599,9 @@ snapshots:
     dependencies:
       '@float-capital/float-subgraph-uncrashable': 0.0.0-internal-testing.5
       '@oclif/core': 4.5.5
-      '@oclif/plugin-autocomplete': 3.2.45
-      '@oclif/plugin-not-found': 3.2.80(@types/node@25.6.2)
-      '@oclif/plugin-warn-if-update-available': 3.1.60
+      '@oclif/plugin-autocomplete': 3.2.46
+      '@oclif/plugin-not-found': 3.2.81(@types/node@25.2.3)
+      '@oclif/plugin-warn-if-update-available': 3.1.61
       '@pinax/graph-networks-registry': 0.7.1
       '@whatwg-node/fetch': 0.10.13
       assemblyscript: 0.19.23
@@ -8937,7 +8937,7 @@ snapshots:
     dependencies:
       consola: 3.4.2
 
-  '@oclif/core@4.10.5':
+  '@oclif/core@4.10.6':
     dependencies:
       ansi-escapes: 4.3.2
       ansis: 3.17.0
@@ -8979,7 +8979,7 @@ snapshots:
       wordwrap: 1.0.0
       wrap-ansi: 7.0.0
 
-  '@oclif/plugin-autocomplete@3.2.45':
+  '@oclif/plugin-autocomplete@3.2.46':
     dependencies:
       '@oclif/core': 4.5.5
       ansis: 3.17.0
@@ -8988,16 +8988,16 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
-  '@oclif/plugin-not-found@3.2.80(@types/node@25.6.2)':
+  '@oclif/plugin-not-found@3.2.81(@types/node@25.2.3)':
     dependencies:
-      '@inquirer/prompts': 7.10.1(@types/node@25.6.2)
-      '@oclif/core': 4.10.5
+      '@inquirer/prompts': 7.10.1(@types/node@25.2.3)
+      '@oclif/core': 4.10.6
       ansis: 3.17.0
       fast-levenshtein: 3.0.0
     transitivePeerDependencies:
       - '@types/node'
 
-  '@oclif/plugin-warn-if-update-available@3.1.60':
+  '@oclif/plugin-warn-if-update-available@3.1.61':
     dependencies:
       '@oclif/core': 4.5.5
       ansis: 3.17.0
@@ -11779,7 +11779,7 @@ snapshots:
     dependencies:
       foreground-child: 3.3.1
       jackspeak: 4.2.3
-      minimatch: 10.2.4
+      minimatch: 10.2.5
       minipass: 7.1.2
       package-json-from-dist: 1.0.1
       path-scurry: 2.0.1

From 96c82c66f050f5de83c2530f74dda0b18c68618a Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Wed, 29 Apr 2026 10:14:56 +0200
Subject: [PATCH 02/28] refactor(anon): only use clickhouse

---
 .../src/clickhouse/clickhouse.schema.ts       |  36 ++++++
 apps/backend/src/database/database.module.ts  |   9 +-
 .../entities/anon-retrieval.entity.ts         | 100 -----------------
 .../1762000000000-CreateAnonRetrievals.ts     |  64 -----------
 .../anon-piece-selector.service.spec.ts       |  49 ++++-----
 .../anon-piece-selector.service.ts            |  54 ++++-----
 .../anon-retrieval.service.spec.ts            |  98 ++++++++++-------
 .../retrieval-anon/anon-retrieval.service.ts  | 104 ++++++++++--------
 .../retrieval-anon/retrieval-anon.module.ts   |   3 +-
 9 files changed, 208 insertions(+), 309 deletions(-)
 delete mode 100644 apps/backend/src/database/entities/anon-retrieval.entity.ts
 delete mode 100644 apps/backend/src/database/migrations/1762000000000-CreateAnonRetrievals.ts

diff --git a/apps/backend/src/clickhouse/clickhouse.schema.ts b/apps/backend/src/clickhouse/clickhouse.schema.ts
index 85d91052..8af769d7 100644
--- a/apps/backend/src/clickhouse/clickhouse.schema.ts
+++ b/apps/backend/src/clickhouse/clickhouse.schema.ts
@@ -62,6 +62,42 @@ export function buildMigrations(database: string): string[] {
   PARTITION BY toStartOfMonth(timestamp)
   TTL toDateTime(timestamp) + INTERVAL 1 YEAR`,
 
+    `CREATE TABLE IF NOT EXISTS ${database}.anon_retrieval_checks
+(
+    timestamp                DateTime64(3, 'UTC'),    -- when the check completed
+    probe_location           LowCardinality(String),  -- dealbot location
+    sp_address               String,                  -- storage provider address (lowercased)
+    sp_id                    Nullable(UInt64),        -- storage provider numeric id
+    sp_name                  Nullable(String),        -- storage provider name
+
+    retrieval_id             UUID,                    -- per-event correlation id (log/Prometheus join)
+
+    piece_cid                String,                  -- piece CID (v2/CommP) sampled from the subgraph
+    data_set_id              UInt64,                  -- on-chain data set id
+    piece_id                 UInt64,                  -- on-chain piece id within the data set
+    raw_size                 UInt64,                  -- raw (unpadded) piece size, bytes
+    with_ipfs_indexing       Bool,                    -- whether the piece advertises IPNI metadata
+    ipfs_root_cid            Nullable(String),        -- root CID of the contained DAG; null when not IPFS-indexed
+
+    service_type             LowCardinality(String),  -- 'direct_sp' (only mode for anon retrievals today)
+    retrieval_endpoint       String,                  -- URL probed (e.g. {spBaseUrl}/piece/{pieceCid})
+
+    status                   LowCardinality(String),  -- RetrievalStatus: 'success' | 'failed' | 'pending' | 'in_progress' | 'timeout'
+    http_response_code       Nullable(UInt16),        -- raw HTTP status; null on transport failure
+    first_byte_ms            Nullable(Float64),       -- time to first response byte
+    last_byte_ms             Nullable(Float64),       -- time to last response byte
+    bytes_retrieved          Nullable(UInt64),        -- bytes received from /piece/{cid}
+    throughput_bps           Nullable(UInt64),        -- effective throughput, bytes per second
+
+    commp_valid              Nullable(Bool),          -- null when retrieval failed before CommP could be hashed
+    car_valid                Nullable(Bool),          -- null when CAR validation was skipped (no IPFS indexing or piece fetch failed)
+
+    error_message            Nullable(String)         -- failure reason; null on success
+) ENGINE MergeTree()
+  PRIMARY KEY (probe_location, sp_address, timestamp)
+  PARTITION BY toStartOfMonth(timestamp)
+  TTL toDateTime(timestamp) + INTERVAL 1 YEAR`,
+
     `CREATE TABLE IF NOT EXISTS ${database}.data_retention_challenges
 (
     timestamp               DateTime64(3, 'UTC'),   -- when the poll ran and detected these periods
diff --git a/apps/backend/src/database/database.module.ts b/apps/backend/src/database/database.module.ts
index f3f9ed09..9249c3a9 100644
--- a/apps/backend/src/database/database.module.ts
+++ b/apps/backend/src/database/database.module.ts
@@ -7,7 +7,6 @@ import { fileURLToPath } from "url";
 import { toStructuredError } from "../common/logging.js";
 import { createPinoExitLogger } from "../common/pino.config.js";
 import type { IAppConfig, IConfig, IDatabaseConfig } from "../config/app.config.js";
-import { AnonRetrieval } from "./entities/anon-retrieval.entity.js";
 import { DataRetentionBaseline } from "./entities/data-retention-baseline.entity.js";
 import { Deal } from "./entities/deal.entity.js";
 import { JobScheduleState } from "./entities/job-schedule-state.entity.js";
@@ -50,7 +49,7 @@ function toSafeDataSourceContext(options: DataSourceOptions): Record<string, unk
           password: dbConfig.password,
           database: dbConfig.database,
           poolSize: dbConfig.poolMax,
-          entities: [AnonRetrieval, DataRetentionBaseline, Deal, StorageProvider, Retrieval, JobScheduleState],
+          entities: [DataRetentionBaseline, Deal, StorageProvider, Retrieval, JobScheduleState],
           migrations: [join(__dirname, "migrations", "*.{js,ts}")],
           migrationsRun: runMigrations,
           migrationsTransactionMode: "each",
@@ -82,9 +81,9 @@ function toSafeDataSourceContext(options: DataSourceOptions): Record<string, unk
         }
       },
     }),
-    TypeOrmModule.forFeature([AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState]),
+    TypeOrmModule.forFeature([Deal, StorageProvider, Retrieval, JobScheduleState]),
   ],
-  providers: [AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState],
-  exports: [AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState],
+  providers: [Deal, StorageProvider, Retrieval, JobScheduleState],
+  exports: [Deal, StorageProvider, Retrieval, JobScheduleState],
 })
 export class DatabaseModule {}
diff --git a/apps/backend/src/database/entities/anon-retrieval.entity.ts b/apps/backend/src/database/entities/anon-retrieval.entity.ts
deleted file mode 100644
index 1c15e4ac..00000000
--- a/apps/backend/src/database/entities/anon-retrieval.entity.ts
+++ /dev/null
@@ -1,100 +0,0 @@
-import { Column, CreateDateColumn, Entity, Index, PrimaryGeneratedColumn, UpdateDateColumn } from "typeorm";
-import { BigIntColumn } from "../helpers/bigint-column.js";
-import { RetrievalStatus, ServiceType } from "../types.js";
-
-/**
- * Anonymous retrieval check records — pieces the dealbot did NOT upload,
- * sampled from the subgraph and probed against an SP.
- *
- * Kept as a separate table from `retrievals` because the two checks have
- * different input domains: basic retrievals reference a dealbot-owned deal,
- * anonymous retrievals carry their own piece identity inline.
- */
-@Entity("anon_retrievals")
-export class AnonRetrieval {
-  @PrimaryGeneratedColumn("uuid")
-  id!: string;
-
-  /** Lowercased SP address. Indexed for per-SP dashboards and dedup. */
-  @Index("IDX_anon_retrievals_sp_address")
-  @Column({ name: "sp_address", type: "varchar" })
-  spAddress!: string;
-
-  /** Piece CID (v2/CommP). Indexed for the recent-dedup selector query. */
-  @Index("IDX_anon_retrievals_piece_cid")
-  @Column({ name: "piece_cid", type: "varchar" })
-  pieceCid!: string;
-
-  @BigIntColumn({ name: "data_set_id" })
-  dataSetId!: bigint;
-
-  @BigIntColumn({ name: "piece_id" })
-  pieceId!: bigint;
-
-  /** Raw (unpadded) piece size in bytes, as reported by the subgraph at selection time. */
-  @BigIntColumn({ name: "raw_size" })
-  rawSize!: bigint;
-
-  @Column({ name: "with_ipfs_indexing", type: "boolean" })
-  withIpfsIndexing!: boolean;
-
-  /** Root CID of the contained DAG; null when the piece isn't IPFS-indexed. */
-  @Column({ name: "ipfs_root_cid", type: "varchar", nullable: true })
-  ipfsRootCid: string | null;
-
-  @Column({
-    name: "service_type",
-    type: "enum",
-    enum: ServiceType,
-    default: ServiceType.DIRECT_SP,
-  })
-  serviceType!: ServiceType;
-
-  @Column({ name: "retrieval_endpoint", type: "varchar" })
-  retrievalEndpoint!: string;
-
-  @Column({
-    type: "enum",
-    enum: RetrievalStatus,
-    default: RetrievalStatus.PENDING,
-  })
-  status!: RetrievalStatus;
-
-  @Column({ name: "started_at", type: "timestamptz" })
-  startedAt!: Date;
-
-  @Column({ name: "completed_at", type: "timestamptz", nullable: true })
-  completedAt: Date | null;
-
-  @Column({ name: "latency_ms", type: "int", nullable: true })
-  latencyMs: number | null;
-
-  @Column({ name: "ttfb_ms", type: "int", nullable: true })
-  ttfbMs: number | null;
-
-  @Column({ name: "throughput_bps", type: "int", nullable: true })
-  throughputBps: number | null;
-
-  @Column({ name: "bytes_retrieved", type: "bigint", nullable: true })
-  bytesRetrieved: number | null;
-
-  @Column({ name: "response_code", type: "int", nullable: true })
-  responseCode: number | null;
-
-  @Column({ name: "error_message", type: "varchar", nullable: true })
-  errorMessage: string | null;
-
-  /** NULL when the retrieval failed before the CommP hash was computed. */
-  @Column({ name: "commp_valid", type: "boolean", nullable: true })
-  commpValid: boolean | null;
-
-  /** NULL when the CAR validation step was skipped (no IPFS indexing, or piece fetch failed). */
-  @Column({ name: "car_valid", type: "boolean", nullable: true })
-  carValid: boolean | null;
-
-  @CreateDateColumn({ name: "created_at", type: "timestamptz" })
-  createdAt!: Date;
-
-  @UpdateDateColumn({ name: "updated_at", type: "timestamptz" })
-  updatedAt!: Date;
-}
diff --git a/apps/backend/src/database/migrations/1762000000000-CreateAnonRetrievals.ts b/apps/backend/src/database/migrations/1762000000000-CreateAnonRetrievals.ts
deleted file mode 100644
index 4925b04b..00000000
--- a/apps/backend/src/database/migrations/1762000000000-CreateAnonRetrievals.ts
+++ /dev/null
@@ -1,64 +0,0 @@
-import type { MigrationInterface, QueryRunner } from "typeorm";
-
-/**
- * Create the `anon_retrievals` table that stores anonymous retrieval check
- * records. Kept separate from `retrievals` because the two checks have
- * different input domains — `retrievals` is always tied to a dealbot-owned
- * deal, while `anon_retrievals` carries its own piece identity inline.
- */
-export class CreateAnonRetrievals1762000000000 implements MigrationInterface {
-  name = "CreateAnonRetrievals1762000000000";
-
-  public async up(queryRunner: QueryRunner): Promise<void> {
-    await queryRunner.query(`
-      CREATE TABLE anon_retrievals (
-        id                   UUID         PRIMARY KEY DEFAULT gen_random_uuid(),
-        sp_address           VARCHAR      NOT NULL,
-        piece_cid            VARCHAR      NOT NULL,
-        data_set_id          BIGINT       NOT NULL,
-        piece_id             BIGINT       NOT NULL,
-        raw_size             BIGINT       NOT NULL,
-        with_ipfs_indexing   BOOLEAN      NOT NULL,
-        ipfs_root_cid        VARCHAR      NULL,
-        service_type         VARCHAR      NOT NULL DEFAULT 'direct_sp',
-        retrieval_endpoint   VARCHAR      NOT NULL,
-        status               VARCHAR      NOT NULL DEFAULT 'pending',
-        started_at           TIMESTAMPTZ  NOT NULL,
-        completed_at         TIMESTAMPTZ  NULL,
-        latency_ms           INT          NULL,
-        ttfb_ms              INT          NULL,
-        throughput_bps       INT          NULL,
-        bytes_retrieved      BIGINT       NULL,
-        response_code        INT          NULL,
-        error_message        VARCHAR      NULL,
-        commp_valid          BOOLEAN      NULL,
-        car_valid            BOOLEAN      NULL,
-        created_at           TIMESTAMPTZ  NOT NULL DEFAULT now(),
-        updated_at           TIMESTAMPTZ  NOT NULL DEFAULT now()
-      )
-    `);
-
-    // Per-SP dashboards.
-    await queryRunner.query(`
-      CREATE INDEX "IDX_anon_retrievals_sp_address"
-      ON anon_retrievals (sp_address)
-    `);
-
-    // Used by the recent-dedup query in AnonPieceSelectorService — keeps the
-    // most-recently-tested CIDs out of the next selection.
-    await queryRunner.query(`
-      CREATE INDEX "IDX_anon_retrievals_piece_cid"
-      ON anon_retrievals (piece_cid)
-    `);
-
-    // Supports "last N anonymous retrievals" ordering used by the selector.
-    await queryRunner.query(`
-      CREATE INDEX "IDX_anon_retrievals_created_at"
-      ON anon_retrievals (created_at DESC)
-    `);
-  }
-
-  public async down(queryRunner: QueryRunner): Promise<void> {
-    await queryRunner.query(`DROP TABLE IF EXISTS anon_retrievals`);
-  }
-}
diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
index b822fe5f..6a787fbb 100644
--- a/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
@@ -1,8 +1,6 @@
 import type { ConfigService } from "@nestjs/config";
-import type { Repository } from "typeorm";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import type { IConfig } from "../config/app.config.js";
-import type { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
 import type { SampleAnonPieceParams, SubgraphService } from "../subgraph/subgraph.service.js";
 import type { AnonCandidatePiece } from "../subgraph/types.js";
 import { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
@@ -22,18 +20,6 @@ const makePiece = (overrides: Partial<AnonCandidatePiece> = {}): AnonCandidatePi
   ...overrides,
 });
 
-const makeRetrievalRepository = (recentPieceCids: string[]): Repository<AnonRetrieval> => {
-  const queryBuilder = {
-    select: vi.fn().mockReturnThis(),
-    orderBy: vi.fn().mockReturnThis(),
-    limit: vi.fn().mockReturnThis(),
-    getRawMany: vi.fn().mockResolvedValue(recentPieceCids.map((c) => ({ pieceCid: c }))),
-  };
-  return {
-    createQueryBuilder: vi.fn().mockReturnValue(queryBuilder),
-  } as unknown as Repository<AnonRetrieval>;
-};
-
 const makeConfigService = (): ConfigService<IConfig, true> =>
   ({
     get: vi.fn((key: string) => {
@@ -55,7 +41,7 @@ describe("AnonPieceSelectorService", () => {
 
   it("returns null when every fallback attempt yields no piece", async () => {
     sampleAnonPiece.mockResolvedValue(null);
-    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
 
     const result = await service.selectPieceForProvider(SP_ADDRESS);
 
@@ -65,7 +51,7 @@ describe("AnonPieceSelectorService", () => {
 
   it("returns the sampled piece with SP address lowercased", async () => {
     sampleAnonPiece.mockResolvedValueOnce(makePiece({ pieceCid: "baga-the-one" }));
-    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
 
     const result = await service.selectPieceForProvider(SP_ADDRESS);
 
@@ -76,7 +62,7 @@ describe("AnonPieceSelectorService", () => {
 
   it("passes the dealbot payer address to sampleAnonPiece for exclusion", async () => {
     sampleAnonPiece.mockResolvedValueOnce(makePiece());
-    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
 
     await service.selectPieceForProvider(SP_ADDRESS);
 
@@ -92,27 +78,30 @@ describe("AnonPieceSelectorService", () => {
       .mockResolvedValueOnce(makePiece({ pieceCid: staleCid, pdpPaymentEndEpoch: 100n, indexedAtBlock: 200 }))
       .mockResolvedValueOnce(makePiece({ pieceCid: freshCid, pdpPaymentEndEpoch: null }));
 
-    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
     const result = await service.selectPieceForProvider(SP_ADDRESS);
 
     expect(result?.pieceCid).toBe(freshCid);
   });
 
-  it("redraws when the first sampled piece was recently tested", async () => {
+  it("redraws when the first sampled piece was recently selected by this process", async () => {
     const staleCid = "baga-stale";
     const freshCid = "baga-fresh";
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
+
+    // Prime the in-memory ring buffer by first selecting `staleCid`.
+    sampleAnonPiece.mockResolvedValueOnce(makePiece({ pieceCid: staleCid }));
+    const first = await service.selectPieceForProvider(SP_ADDRESS);
+    expect(first?.pieceCid).toBe(staleCid);
+
+    // Now the second selection should skip `staleCid` and use `freshCid`.
     sampleAnonPiece
       .mockResolvedValueOnce(makePiece({ pieceCid: staleCid }))
       .mockResolvedValueOnce(makePiece({ pieceCid: freshCid }));
+    const second = await service.selectPieceForProvider(SP_ADDRESS);
 
-    const service = new AnonPieceSelectorService(
-      subgraphService,
-      makeConfigService(),
-      makeRetrievalRepository([staleCid]),
-    );
-    const result = await service.selectPieceForProvider(SP_ADDRESS);
-
-    expect(result?.pieceCid).toBe(freshCid);
+    expect(second?.pieceCid).toBe(freshCid);
   });
 
   it("falls back to the opposite pool when the preferred one is empty", async () => {
@@ -120,7 +109,7 @@ describe("AnonPieceSelectorService", () => {
     const fresh = makePiece({ pieceCid: "baga-other-pool" });
     sampleAnonPiece.mockResolvedValueOnce(null).mockResolvedValueOnce(null).mockResolvedValueOnce(fresh);
 
-    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
     const result = await service.selectPieceForProvider(SP_ADDRESS);
 
     expect(result?.pieceCid).toBe("baga-other-pool");
@@ -141,7 +130,7 @@ describe("AnonPieceSelectorService", () => {
       .mockResolvedValueOnce(null)
       .mockResolvedValueOnce(makePiece({ pieceCid: "baga-any-bucket" }));
 
-    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
     const result = await service.selectPieceForProvider(SP_ADDRESS);
 
     expect(result?.pieceCid).toBe("baga-any-bucket");
@@ -156,7 +145,7 @@ describe("AnonPieceSelectorService", () => {
   it("draws a fresh sampleKey for each subgraph call", async () => {
     sampleAnonPiece.mockResolvedValueOnce(null).mockResolvedValueOnce(makePiece());
 
-    const service = new AnonPieceSelectorService(subgraphService, makeConfigService(), makeRetrievalRepository([]));
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
     await service.selectPieceForProvider(SP_ADDRESS);
 
     const call1 = sampleAnonPiece.mock.calls[0][0] as SampleAnonPieceParams;
diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
index acc19832..8de50fa3 100644
--- a/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
@@ -1,10 +1,7 @@
 import { randomBytes } from "node:crypto";
 import { Injectable, Logger } from "@nestjs/common";
 import { ConfigService } from "@nestjs/config";
-import { InjectRepository } from "@nestjs/typeorm";
-import type { Repository } from "typeorm";
 import type { IConfig } from "../config/app.config.js";
-import { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
 import type { AnonPiecePool, SampleAnonPieceParams } from "../subgraph/subgraph.service.js";
 import { SubgraphService } from "../subgraph/subgraph.service.js";
 import type { AnonCandidatePiece } from "../subgraph/types.js";
@@ -15,6 +12,9 @@ import type { AnonPiece } from "./types.js";
  * to avoid immediately retesting the same piece. Piece CIDs are globally
  * unique and each one lives on a single SP's dataset, so scoping by CID
  * is equivalent to scoping by (SP, CID) for this workload.
+ *
+ * The buffer is process-local: a duplicate piece that gets retested shortly
+ * after a restart is harmless (still a valid measurement, just less diverse).
  */
 const RECENT_DEDUP_WINDOW = 500;
 
@@ -44,7 +44,7 @@ const BUCKET_WEIGHTS: Record<SizeBucket, number> = {
 
 /**
  * Probability the primary draw targets the withIPFSIndexing pool.
- * The rest of the time we sample across all FWSS pieces so SPs can't
+ * The rest of the time we sample across all FWSS pieces, so SPs can't
  * optimise only their CAR corpus.
  */
 const IPFS_INDEXED_SAMPLE_RATE = 0.8;
@@ -53,11 +53,13 @@ const IPFS_INDEXED_SAMPLE_RATE = 0.8;
 export class AnonPieceSelectorService {
   private readonly logger = new Logger(AnonPieceSelectorService.name);
 
+  /** Bounded FIFO of recently-selected piece CIDs. Process-local; lost on restart. */
+  private readonly recentlyTested = new Set<string>();
+  private readonly recentlyTestedQueue: string[] = [];
+
   constructor(
     private readonly subgraphService: SubgraphService,
     private readonly configService: ConfigService<IConfig, true>,
-    @InjectRepository(AnonRetrieval)
-    private readonly anonRetrievalRepository: Repository<AnonRetrieval>,
   ) {}
 
   /**
@@ -75,14 +77,13 @@ export class AnonPieceSelectorService {
    */
   async selectPieceForProvider(spAddress: string): Promise<AnonPiece | null> {
     const dealbotPayer = this.configService.get("blockchain", { infer: true }).walletAddress;
-    const recentlyTested = await this.loadRecentlyTestedPieceCids();
 
     const bucket = this.pickBucket();
     const pool: AnonPiecePool = Math.random() < IPFS_INDEXED_SAMPLE_RATE ? "indexed" : "any";
 
     const attempts: Array<{ bucket: SizeBucket | "any"; pool: AnonPiecePool }> = [
-      { bucket, pool },
-      { bucket, pool: pool === "indexed" ? "any" : "indexed" },
+      { bucket: bucket, pool: pool },
+      { bucket: bucket, pool: pool === "indexed" ? "any" : "indexed" },
       { bucket: "any", pool: "indexed" },
       { bucket: "any", pool: "any" },
     ];
@@ -93,10 +94,10 @@ export class AnonPieceSelectorService {
         dealbotPayer,
         bucket: attempt.bucket,
         pool: attempt.pool,
-        recentlyTested,
       });
 
       if (piece) {
+        this.rememberRecent(piece.pieceCid);
         this.logger.log({
           event: "anon_piece_selected",
           message: "Selected anonymous piece for retrieval test",
@@ -107,6 +108,7 @@ export class AnonPieceSelectorService {
           bucket: attempt.bucket,
           pool: attempt.pool,
         });
+
         return {
           pieceCid: piece.pieceCid,
           dataSetId: piece.dataSetId,
@@ -124,6 +126,7 @@ export class AnonPieceSelectorService {
       message: "No anonymous piece found after all fallbacks",
       spAddress,
     });
+
     return null;
   }
 
@@ -136,7 +139,6 @@ export class AnonPieceSelectorService {
     dealbotPayer: string;
     bucket: SizeBucket | "any";
     pool: AnonPiecePool;
-    recentlyTested: Set<string>;
   }): Promise<AnonCandidatePiece | null> {
     const range = args.bucket === "any" ? fullRange() : SIZE_BUCKETS[args.bucket];
 
@@ -159,7 +161,7 @@ export class AnonPieceSelectorService {
         continue;
       }
 
-      if (args.recentlyTested.has(piece.pieceCid)) {
+      if (this.recentlyTested.has(piece.pieceCid)) {
         continue;
       }
 
@@ -181,19 +183,21 @@ export class AnonPieceSelectorService {
     return "medium";
   }
 
-  /**
-   * Return the set of piece CIDs tested in the last RECENT_DEDUP_WINDOW
-   * anonymous retrievals across all SPs.
-   */
-  private async loadRecentlyTestedPieceCids(): Promise<Set<string>> {
-    const rows = await this.anonRetrievalRepository
-      .createQueryBuilder("r")
-      .select("r.piece_cid", "pieceCid")
-      .orderBy("r.created_at", "DESC")
-      .limit(RECENT_DEDUP_WINDOW)
-      .getRawMany<{ pieceCid: string }>();
-
-    return new Set(rows.map((row) => row.pieceCid));
+  /** Push a CID into the bounded FIFO; evict the oldest when at capacity. */
+  private rememberRecent(pieceCid: string): void {
+    if (this.recentlyTested.has(pieceCid)) {
+      return;
+    }
+
+    this.recentlyTested.add(pieceCid);
+    this.recentlyTestedQueue.push(pieceCid);
+
+    while (this.recentlyTestedQueue.length > RECENT_DEDUP_WINDOW) {
+      const evicted = this.recentlyTestedQueue.shift();
+      if (evicted !== undefined) {
+        this.recentlyTested.delete(evicted);
+      }
+    }
   }
 }
 
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
index 61e97105..e6619e32 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
@@ -1,6 +1,6 @@
 import type { Repository } from "typeorm";
 import { beforeEach, describe, expect, it, vi } from "vitest";
-import type { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
+import type { ClickhouseService } from "../clickhouse/clickhouse.service.js";
 import type { StorageProvider } from "../database/entities/storage-provider.entity.js";
 import { RetrievalStatus } from "../database/types.js";
 import type { AnonRetrievalCheckMetrics } from "../metrics-prometheus/check-metrics.service.js";
@@ -35,20 +35,18 @@ function makeProvider(): StorageProvider {
 function makeService(opts: {
   pieceResult: PieceRetrievalResult;
   fetchPieceImpl?: (signal?: AbortSignal) => Promise<PieceRetrievalResult>;
+  clickhouseEnabled?: boolean;
 }): {
   service: AnonRetrievalService;
-  saveSpy: ReturnType<typeof vi.fn>;
+  insertSpy: ReturnType<typeof vi.fn>;
   fetchSpy: ReturnType<typeof vi.fn>;
 } {
-  const saveSpy = vi.fn(async (entity: AnonRetrieval) => entity);
-  const createdEntities: Partial<AnonRetrieval>[] = [];
-  const anonRetrievalRepository = {
-    create: vi.fn((data: Partial<AnonRetrieval>) => {
-      createdEntities.push(data);
-      return data;
-    }),
-    save: saveSpy,
-  } as unknown as Repository<AnonRetrieval>;
+  const insertSpy = vi.fn();
+  const clickhouseService = {
+    insert: insertSpy,
+    enabled: opts.clickhouseEnabled ?? true,
+    probeLocation: "test-location",
+  } as unknown as ClickhouseService;
 
   const spRepository = {
     findOne: vi.fn(async () => makeProvider()),
@@ -89,11 +87,11 @@ function makeService(opts: {
     carValidationService,
     walletSdkService,
     metrics,
-    anonRetrievalRepository,
+    clickhouseService,
     spRepository,
   );
 
-  return { service, saveSpy, fetchSpy };
+  return { service, insertSpy, fetchSpy };
 }
 
 describe("AnonRetrievalService", () => {
@@ -101,7 +99,7 @@ describe("AnonRetrievalService", () => {
     vi.clearAllMocks();
   });
 
-  it("persists partial metrics when fetchPiece returns aborted=true", async () => {
+  it("emits a ClickHouse row with partial metrics when fetchPiece returns aborted=true", async () => {
     const partial: PieceRetrievalResult = {
       success: false,
       pieceCid: PIECE.pieceCid,
@@ -116,22 +114,28 @@ describe("AnonRetrievalService", () => {
       aborted: true,
     };
 
-    const { service, saveSpy } = makeService({ pieceResult: partial });
+    const { service, insertSpy } = makeService({ pieceResult: partial });
 
     await service.performForProvider(SP_ADDRESS);
 
-    expect(saveSpy).toHaveBeenCalledTimes(1);
-    const saved = saveSpy.mock.calls[0][0] as Partial<AnonRetrieval>;
-    expect(saved.status).toBe(RetrievalStatus.FAILED);
-    expect(saved.bytesRetrieved).toBe(524288);
-    expect(saved.ttfbMs).toBe(150);
-    expect(saved.latencyMs).toBe(42000);
-    expect(saved.throughputBps).toBe(12500);
-    expect(saved.responseCode).toBe(200);
-    expect(saved.errorMessage).toContain("Anon retrieval job timeout");
+    expect(insertSpy).toHaveBeenCalledTimes(1);
+    const [table, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+    expect(table).toBe("anon_retrieval_checks");
+    expect(row.status).toBe(RetrievalStatus.FAILED);
+    expect(row.bytes_retrieved).toBe(524288);
+    expect(row.first_byte_ms).toBe(150);
+    expect(row.last_byte_ms).toBe(42000);
+    expect(row.throughput_bps).toBe(12500);
+    expect(row.http_response_code).toBe(200);
+    expect(row.error_message).toContain("Anon retrieval job timeout");
+    expect(row.piece_cid).toBe(PIECE.pieceCid);
+    expect(row.sp_address).toBe(SP_ADDRESS);
+    expect(row.sp_id).toBe(7);
+    expect(row.probe_location).toBe("test-location");
+    expect(typeof row.retrieval_id).toBe("string");
   });
 
-  it("still saves a row when the signal aborts before fetchPiece runs", async () => {
+  it("still emits a row when the signal aborts before fetchPiece runs", async () => {
     const ac = new AbortController();
     ac.abort(new Error("Anon retrieval job timeout (60s) for sp1"));
 
@@ -147,20 +151,20 @@ describe("AnonRetrievalService", () => {
       commPValid: false,
     };
 
-    const { service, saveSpy, fetchSpy } = makeService({ pieceResult: never });
+    const { service, insertSpy, fetchSpy } = makeService({ pieceResult: never });
 
     await service.performForProvider(SP_ADDRESS, ac.signal);
 
     expect(fetchSpy).not.toHaveBeenCalled();
-    expect(saveSpy).toHaveBeenCalledTimes(1);
-    const saved = saveSpy.mock.calls[0][0] as Partial<AnonRetrieval>;
-    expect(saved.status).toBe(RetrievalStatus.FAILED);
-    expect(saved.errorMessage).toContain("Anon retrieval job timeout");
-    expect(saved.bytesRetrieved).toBeNull();
-    expect(saved.ttfbMs).toBeNull();
+    expect(insertSpy).toHaveBeenCalledTimes(1);
+    const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+    expect(row.status).toBe(RetrievalStatus.FAILED);
+    expect(row.error_message).toContain("Anon retrieval job timeout");
+    expect(row.bytes_retrieved).toBeNull();
+    expect(row.first_byte_ms).toBeNull();
   });
 
-  it("still saves a row when fetchPiece throws unexpectedly", async () => {
+  it("still emits a row when fetchPiece throws unexpectedly", async () => {
     const never: PieceRetrievalResult = {
       success: false,
       pieceCid: PIECE.pieceCid,
@@ -173,7 +177,7 @@ describe("AnonRetrievalService", () => {
       commPValid: false,
     };
 
-    const { service, saveSpy } = makeService({
+    const { service, insertSpy } = makeService({
       pieceResult: never,
       fetchPieceImpl: async () => {
         throw new Error("network down");
@@ -182,8 +186,28 @@ describe("AnonRetrievalService", () => {
 
     await expect(service.performForProvider(SP_ADDRESS)).rejects.toThrow("network down");
 
-    expect(saveSpy).toHaveBeenCalledTimes(1);
-    const saved = saveSpy.mock.calls[0][0] as Partial<AnonRetrieval>;
-    expect(saved.status).toBe(RetrievalStatus.FAILED);
+    expect(insertSpy).toHaveBeenCalledTimes(1);
+    const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+    expect(row.status).toBe(RetrievalStatus.FAILED);
+  });
+
+  it("skips ClickHouse insert when ClickHouse is disabled", async () => {
+    const ok: PieceRetrievalResult = {
+      success: true,
+      pieceCid: PIECE.pieceCid,
+      bytesReceived: 1024,
+      pieceBytes: null,
+      latencyMs: 100,
+      ttfbMs: 10,
+      throughputBps: 10240,
+      statusCode: 200,
+      commPValid: true,
+    };
+
+    const { service, insertSpy } = makeService({ pieceResult: ok, clickhouseEnabled: false });
+
+    await service.performForProvider(SP_ADDRESS);
+
+    expect(insertSpy).not.toHaveBeenCalled();
   });
 });
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index d40fe315..1d56d2f0 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -1,8 +1,9 @@
+import { randomUUID } from "node:crypto";
 import { Injectable, Logger } from "@nestjs/common";
 import { InjectRepository } from "@nestjs/typeorm";
 import type { Repository } from "typeorm";
+import { ClickhouseService } from "../clickhouse/clickhouse.service.js";
 import { type ProviderJobContext, toStructuredError } from "../common/logging.js";
-import { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
 import { RetrievalStatus, ServiceType } from "../database/types.js";
 import { buildCheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js";
@@ -13,6 +14,8 @@ import { CarValidationService } from "./car-validation.service.js";
 import { PieceRetrievalService } from "./piece-retrieval.service.js";
 import type { CarValidationResult, PieceRetrievalResult } from "./types.js";
 
+const ANON_RETRIEVAL_CHECKS_TABLE = "anon_retrieval_checks";
+
 @Injectable()
 export class AnonRetrievalService {
   private readonly logger = new Logger(AnonRetrievalService.name);
@@ -23,17 +26,12 @@ export class AnonRetrievalService {
     private readonly carValidationService: CarValidationService,
     private readonly walletSdkService: WalletSdkService,
     private readonly metrics: AnonRetrievalCheckMetrics,
-    @InjectRepository(AnonRetrieval)
-    private readonly anonRetrievalRepository: Repository<AnonRetrieval>,
+    private readonly clickhouseService: ClickhouseService,
     @InjectRepository(StorageProvider)
     private readonly spRepository: Repository<StorageProvider>,
   ) {}
 
-  async performForProvider(
-    spAddress: string,
-    signal?: AbortSignal,
-    logContext?: ProviderJobContext,
-  ): Promise<AnonRetrieval | null> {
+  async performForProvider(spAddress: string, signal?: AbortSignal, logContext?: ProviderJobContext): Promise<void> {
     // Build metric labels
     const provider = await this.spRepository.findOne({ where: { address: spAddress } });
     const labels = buildCheckMetricLabels({
@@ -53,7 +51,7 @@ export class AnonRetrievalService {
         spAddress,
       });
       this.metrics.recordStatus(labels, "failure.no_piece");
-      return null;
+      return;
     }
 
     this.logger.log({
@@ -72,7 +70,6 @@ export class AnonRetrievalService {
 
     let pieceResult: PieceRetrievalResult | null = null;
     let carResult: CarValidationResult | null = null;
-    let saved: AnonRetrieval | null = null;
 
     try {
       // 2. Fetch the piece. fetchPiece never throws on abort — it returns a
@@ -141,16 +138,15 @@ export class AnonRetrievalService {
         pieceResult.success ? "success" : pieceResult.aborted ? "failure.aborted" : "failure.http",
       );
     } finally {
-      // Always save a record — even on abort or unexpected error — so we never
-      // lose the evidence (ttfb, bytes, response code) we already collected.
+      // Always emit a ClickHouse row — even on abort or unexpected error — so
+      // we never lose the evidence (ttfb, bytes, response code) we already
+      // collected.
       pieceResult ??= buildAbortedPlaceholder(piece.pieceCid, signal?.reason);
-      saved = await this.saveRetrievalRecord(spAddress, piece, pieceResult, carResult, startedAt, logContext);
+      this.emitClickhouseRow(spAddress, piece, pieceResult, carResult, startedAt, provider, logContext);
     }
-
-    return saved;
   }
 
-  private async saveRetrievalRecord(
+  private emitClickhouseRow(
     spAddress: string,
     piece: {
       pieceCid: string;
@@ -163,52 +159,70 @@ export class AnonRetrievalService {
     pieceResult: PieceRetrievalResult,
     carResult: CarValidationResult | null,
     startedAt: Date,
+    provider: StorageProvider | null,
     logContext?: ProviderJobContext,
-  ): Promise<AnonRetrieval | null> {
+  ): void {
+    if (!this.clickhouseService.enabled) {
+      this.logger.debug({
+        ...logContext,
+        event: "anon_retrieval_clickhouse_disabled",
+        message: "ClickHouse disabled — anon retrieval row not emitted",
+        pieceCid: piece.pieceCid,
+        spAddress,
+      });
+      return;
+    }
+
     const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
     const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
-
-    const retrieval = this.anonRetrievalRepository.create({
-      spAddress,
-      pieceCid: piece.pieceCid,
-      dataSetId: BigInt(piece.dataSetId),
-      pieceId: BigInt(piece.pieceId),
-      rawSize: BigInt(piece.rawSize),
-      withIpfsIndexing: piece.withIPFSIndexing,
-      ipfsRootCid: piece.ipfsRootCid,
-      serviceType: ServiceType.DIRECT_SP,
-      retrievalEndpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
-      status: pieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED,
-      startedAt,
-      completedAt: new Date(),
-      latencyMs: pieceResult.latencyMs > 0 ? Math.round(pieceResult.latencyMs) : null,
-      ttfbMs: pieceResult.ttfbMs > 0 ? Math.round(pieceResult.ttfbMs) : null,
-      throughputBps: pieceResult.throughputBps > 0 ? Math.round(pieceResult.throughputBps) : null,
-      bytesRetrieved: pieceResult.bytesReceived > 0 ? pieceResult.bytesReceived : null,
-      responseCode: pieceResult.statusCode > 0 ? pieceResult.statusCode : null,
-      errorMessage: pieceResult.errorMessage ?? null,
-      commpValid: pieceResult.success ? pieceResult.commPValid : null,
-      carValid: carResult ? carResult.ipniValid !== false && carResult.blockFetchValid !== false : null,
-    });
+    const status = pieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
+    const carValid = carResult ? carResult.ipniValid !== false && carResult.blockFetchValid !== false : null;
+    const retrievalId = randomUUID();
 
     try {
-      await this.anonRetrievalRepository.save(retrieval);
+      this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, {
+        timestamp: startedAt.getTime(),
+        probe_location: this.clickhouseService.probeLocation,
+        sp_address: spAddress,
+        sp_id: provider?.providerId != null ? Number(provider.providerId) : null,
+        sp_name: provider?.name ?? null,
+        retrieval_id: retrievalId,
+        piece_cid: piece.pieceCid,
+        data_set_id: piece.dataSetId,
+        piece_id: piece.pieceId,
+        raw_size: piece.rawSize,
+        with_ipfs_indexing: piece.withIPFSIndexing,
+        ipfs_root_cid: piece.ipfsRootCid,
+        service_type: ServiceType.DIRECT_SP,
+        retrieval_endpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
+        status,
+        http_response_code: pieceResult.statusCode > 0 ? pieceResult.statusCode : null,
+        first_byte_ms: pieceResult.ttfbMs > 0 ? pieceResult.ttfbMs : null,
+        last_byte_ms: pieceResult.latencyMs > 0 ? pieceResult.latencyMs : null,
+        bytes_retrieved: pieceResult.bytesReceived > 0 ? pieceResult.bytesReceived : null,
+        throughput_bps: pieceResult.throughputBps > 0 ? Math.round(pieceResult.throughputBps) : null,
+        commp_valid: pieceResult.success ? pieceResult.commPValid : null,
+        car_valid: carValid,
+        error_message: pieceResult.errorMessage ?? null,
+      });
     } catch (error) {
+      // ClickhouseService.insert is buffered/non-throwing in normal operation, but
+      // guard against unexpected runtime errors so we don't break the probe cycle.
       this.logger.warn({
         ...logContext,
-        event: "anon_retrieval_save_failed",
-        message: "Failed to save anonymous retrieval record",
+        event: "anon_retrieval_clickhouse_insert_failed",
+        message: "Failed to enqueue anonymous retrieval row to ClickHouse",
         pieceCid: piece.pieceCid,
         spAddress,
         error: toStructuredError(error),
       });
-      return null;
     }
 
     this.logger.log({
       ...logContext,
       event: "anon_retrieval_completed",
       message: "Anonymous retrieval test completed",
+      retrievalId,
       pieceCid: piece.pieceCid,
       spAddress,
       success: pieceResult.success,
@@ -220,8 +234,6 @@ export class AnonRetrievalService {
       ipniValid: carResult?.ipniValid,
       blockFetchValid: carResult?.blockFetchValid,
     });
-
-    return retrieval;
   }
 }
 
diff --git a/apps/backend/src/retrieval-anon/retrieval-anon.module.ts b/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
index 4e9e38df..c05dcb5f 100644
--- a/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
+++ b/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
@@ -1,7 +1,6 @@
 import { Module } from "@nestjs/common";
 import { ConfigModule } from "@nestjs/config";
 import { TypeOrmModule } from "@nestjs/typeorm";
-import { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
 import { HttpClientModule } from "../http-client/http-client.module.js";
 import { IpniModule } from "../ipni/ipni.module.js";
@@ -15,7 +14,7 @@ import { PieceRetrievalService } from "./piece-retrieval.service.js";
 @Module({
   imports: [
     ConfigModule,
-    TypeOrmModule.forFeature([AnonRetrieval, StorageProvider]),
+    TypeOrmModule.forFeature([StorageProvider]),
     SubgraphModule,
     WalletSdkModule,
     HttpClientModule,

From 81a38b1fa9fa62e8cd6707e74058bb1b0454c084 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Wed, 29 Apr 2026 11:01:55 +0200
Subject: [PATCH 03/28] feat(retrieval-anon): track ipni metrics

---
 .../src/clickhouse/clickhouse.schema.ts       | 68 +++++++++++--------
 .../anon-retrieval.service.spec.ts            |  6 +-
 .../retrieval-anon/anon-retrieval.service.ts  | 18 +++--
 .../retrieval-anon/car-validation.service.ts  | 65 ++++++++++++++----
 apps/backend/src/retrieval-anon/types.ts      |  5 ++
 5 files changed, 112 insertions(+), 50 deletions(-)

diff --git a/apps/backend/src/clickhouse/clickhouse.schema.ts b/apps/backend/src/clickhouse/clickhouse.schema.ts
index 8af769d7..e30f6151 100644
--- a/apps/backend/src/clickhouse/clickhouse.schema.ts
+++ b/apps/backend/src/clickhouse/clickhouse.schema.ts
@@ -64,35 +64,45 @@ export function buildMigrations(database: string): string[] {
 
     `CREATE TABLE IF NOT EXISTS ${database}.anon_retrieval_checks
 (
-    timestamp                DateTime64(3, 'UTC'),    -- when the check completed
-    probe_location           LowCardinality(String),  -- dealbot location
-    sp_address               String,                  -- storage provider address (lowercased)
-    sp_id                    Nullable(UInt64),        -- storage provider numeric id
-    sp_name                  Nullable(String),        -- storage provider name
-
-    retrieval_id             UUID,                    -- per-event correlation id (log/Prometheus join)
-
-    piece_cid                String,                  -- piece CID (v2/CommP) sampled from the subgraph
-    data_set_id              UInt64,                  -- on-chain data set id
-    piece_id                 UInt64,                  -- on-chain piece id within the data set
-    raw_size                 UInt64,                  -- raw (unpadded) piece size, bytes
-    with_ipfs_indexing       Bool,                    -- whether the piece advertises IPNI metadata
-    ipfs_root_cid            Nullable(String),        -- root CID of the contained DAG; null when not IPFS-indexed
-
-    service_type             LowCardinality(String),  -- 'direct_sp' (only mode for anon retrievals today)
-    retrieval_endpoint       String,                  -- URL probed (e.g. {spBaseUrl}/piece/{pieceCid})
-
-    status                   LowCardinality(String),  -- RetrievalStatus: 'success' | 'failed' | 'pending' | 'in_progress' | 'timeout'
-    http_response_code       Nullable(UInt16),        -- raw HTTP status; null on transport failure
-    first_byte_ms            Nullable(Float64),       -- time to first response byte
-    last_byte_ms             Nullable(Float64),       -- time to last response byte
-    bytes_retrieved          Nullable(UInt64),        -- bytes received from /piece/{cid}
-    throughput_bps           Nullable(UInt64),        -- effective throughput, bytes per second
-
-    commp_valid              Nullable(Bool),          -- null when retrieval failed before CommP could be hashed
-    car_valid                Nullable(Bool),          -- null when CAR validation was skipped (no IPFS indexing or piece fetch failed)
-
-    error_message            Nullable(String)         -- failure reason; null on success
+    timestamp                  DateTime64(3, 'UTC'),              -- when the check completed
+    probe_location             LowCardinality(String),            -- dealbot location
+    sp_address                 String,                            -- storage provider address (lowercased)
+    sp_id                      Nullable(UInt64),                  -- storage provider numeric id
+    sp_name                    Nullable(String),                  -- storage provider name
+
+    retrieval_id               UUID,                              -- per-event correlation id (log/Prometheus join)
+
+    piece_cid                  String,                            -- piece CID (v2/CommP) sampled from the subgraph
+    data_set_id                UInt64,                            -- on-chain data set id
+    piece_id                   UInt64,                            -- on-chain piece id within the data set
+    raw_size                   UInt64,                            -- raw (unpadded) piece size, bytes
+    with_ipfs_indexing         Bool,                              -- whether the piece advertises IPNI metadata
+    ipfs_root_cid              Nullable(String),                  -- root CID of the contained DAG; null when not IPFS-indexed
+
+    service_type               LowCardinality(String),            -- 'direct_sp' (only mode for anon retrievals today)
+    retrieval_endpoint         String,                            -- URL probed (e.g. {spBaseUrl}/piece/{pieceCid})
+
+    piece_fetch_status         LowCardinality(String),            -- 'success' | 'failed' — outcome of GET /piece/<pieceCid> (HTTP 2xx AND CommP match). CAR/IPNI/block-fetch outcomes live in their own columns.
+    http_response_code         Nullable(UInt16),                  -- raw HTTP status; null on transport failure
+    first_byte_ms              Nullable(Float64),                 -- time to first response byte
+    last_byte_ms               Nullable(Float64),                 -- time to last response byte
+    bytes_retrieved            Nullable(UInt64),                  -- bytes received from /piece/{cid}
+    throughput_bps             Nullable(UInt64),                  -- effective throughput, bytes per second
+
+    commp_valid                Nullable(Bool),                    -- null when retrieval failed before CommP could be hashed
+    car_parseable              Nullable(Bool),                    -- null when CAR validation was skipped (no IPFS indexing or piece fetch failed); true if bytes parsed as a CAR
+    car_block_count            Nullable(UInt32),                  -- total number of blocks observed inside the CAR; null when skipped or unparseable
+    block_fetch_endpoint       Nullable(String),                  -- gateway base URL probed for block fetch (e.g. {spBaseUrl}/ipfs/); null when skipped
+    block_fetch_valid          Nullable(Bool),                    -- null when skipped; true if all sampled blocks fetched + hash-verified
+    block_fetch_sampled_count  Nullable(UInt32),                  -- number of blocks sampled and probed via /ipfs/<cid>?format=raw
+    block_fetch_failed_count   Nullable(UInt32),                  -- number of sampled blocks that failed (non-2xx, hash mismatch, unsupported codec, or transport error)
+
+    ipni_status                LowCardinality(String),            -- 'valid' | 'invalid' | 'skipped' (mirrors data_storage_checks naming)
+    ipni_verify_ms             Nullable(Float64),                 -- IPNI verification duration; null when skipped
+    ipni_verified_cids_count   Nullable(UInt32),                  -- CIDs confirmed findable via IPNI
+    ipni_unverified_cids_count Nullable(UInt32),                  -- CIDs checked but not findable
+
+    error_message              Nullable(String)                   -- failure reason; null on success
 ) ENGINE MergeTree()
   PRIMARY KEY (probe_location, sp_address, timestamp)
   PARTITION BY toStartOfMonth(timestamp)
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
index e6619e32..275a3de2 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
@@ -121,7 +121,7 @@ describe("AnonRetrievalService", () => {
     expect(insertSpy).toHaveBeenCalledTimes(1);
     const [table, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
     expect(table).toBe("anon_retrieval_checks");
-    expect(row.status).toBe(RetrievalStatus.FAILED);
+    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
     expect(row.bytes_retrieved).toBe(524288);
     expect(row.first_byte_ms).toBe(150);
     expect(row.last_byte_ms).toBe(42000);
@@ -158,7 +158,7 @@ describe("AnonRetrievalService", () => {
     expect(fetchSpy).not.toHaveBeenCalled();
     expect(insertSpy).toHaveBeenCalledTimes(1);
     const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
-    expect(row.status).toBe(RetrievalStatus.FAILED);
+    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
     expect(row.error_message).toContain("Anon retrieval job timeout");
     expect(row.bytes_retrieved).toBeNull();
     expect(row.first_byte_ms).toBeNull();
@@ -188,7 +188,7 @@ describe("AnonRetrievalService", () => {
 
     expect(insertSpy).toHaveBeenCalledTimes(1);
     const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
-    expect(row.status).toBe(RetrievalStatus.FAILED);
+    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
   });
 
   it("skips ClickHouse insert when ClickHouse is disabled", async () => {
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index 1d56d2f0..8f2e135a 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -175,8 +175,9 @@ export class AnonRetrievalService {
 
     const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
     const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
-    const status = pieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
-    const carValid = carResult ? carResult.ipniValid !== false && carResult.blockFetchValid !== false : null;
+    const pieceFetchStatus = pieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
+    const ipniStatus =
+      carResult == null || carResult.ipniValid === null ? "skipped" : carResult.ipniValid ? "valid" : "invalid";
     const retrievalId = randomUUID();
 
     try {
@@ -195,14 +196,23 @@ export class AnonRetrievalService {
         ipfs_root_cid: piece.ipfsRootCid,
         service_type: ServiceType.DIRECT_SP,
         retrieval_endpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
-        status,
+        piece_fetch_status: pieceFetchStatus,
         http_response_code: pieceResult.statusCode > 0 ? pieceResult.statusCode : null,
         first_byte_ms: pieceResult.ttfbMs > 0 ? pieceResult.ttfbMs : null,
         last_byte_ms: pieceResult.latencyMs > 0 ? pieceResult.latencyMs : null,
         bytes_retrieved: pieceResult.bytesReceived > 0 ? pieceResult.bytesReceived : null,
         throughput_bps: pieceResult.throughputBps > 0 ? Math.round(pieceResult.throughputBps) : null,
         commp_valid: pieceResult.success ? pieceResult.commPValid : null,
-        car_valid: carValid,
+        car_parseable: carResult ? carResult.carParseable : null,
+        car_block_count: carResult?.carParseable ? carResult.blockCount : null,
+        block_fetch_endpoint: carResult?.blockFetchEndpoint ?? null,
+        block_fetch_valid: carResult ? carResult.blockFetchValid : null,
+        block_fetch_sampled_count: carResult?.carParseable ? carResult.sampledCidCount : null,
+        block_fetch_failed_count: carResult?.blockFetchFailedCount ?? null,
+        ipni_status: ipniStatus,
+        ipni_verify_ms: carResult?.ipniVerifyMs ?? null,
+        ipni_verified_cids_count: carResult?.ipniVerifiedCidsCount ?? null,
+        ipni_unverified_cids_count: carResult?.ipniUnverifiedCidsCount ?? null,
         error_message: pieceResult.errorMessage ?? null,
       });
     } catch (error) {
diff --git a/apps/backend/src/retrieval-anon/car-validation.service.ts b/apps/backend/src/retrieval-anon/car-validation.service.ts
index 8019b8df..017a38e8 100644
--- a/apps/backend/src/retrieval-anon/car-validation.service.ts
+++ b/apps/backend/src/retrieval-anon/car-validation.service.ts
@@ -48,7 +48,18 @@ export class CarValidationService {
   ): Promise<CarValidationResult> {
     const blocks = await this.parseCar(pieceBytes, provider.address, ipfsRootCid);
     if (blocks === null) {
-      return { carParseable: false, blockCount: 0, sampledCidCount: 0, ipniValid: null, blockFetchValid: null };
+      return {
+        carParseable: false,
+        blockCount: 0,
+        sampledCidCount: 0,
+        ipniValid: null,
+        ipniVerifyMs: null,
+        ipniVerifiedCidsCount: null,
+        ipniUnverifiedCidsCount: null,
+        blockFetchValid: null,
+        blockFetchFailedCount: null,
+        blockFetchEndpoint: null,
+      };
     }
     if (blocks.length === 0) {
       return {
@@ -56,7 +67,12 @@ export class CarValidationService {
         blockCount: 0,
         sampledCidCount: 0,
         ipniValid: null,
+        ipniVerifyMs: null,
+        ipniVerifiedCidsCount: null,
+        ipniUnverifiedCidsCount: null,
         blockFetchValid: null,
+        blockFetchFailedCount: null,
+        blockFetchEndpoint: null,
         errorMessage: "CAR contained no blocks",
       };
     }
@@ -65,15 +81,20 @@ export class CarValidationService {
     const shuffled = [...blocks].sort(() => Math.random() - 0.5);
     const sampledBlocks = shuffled.slice(0, sampleCount);
 
-    const ipniValid = await this.checkIpni(provider, ipfsRootCid, sampledBlocks, signal);
+    const ipni = await this.checkIpni(provider, ipfsRootCid, sampledBlocks, signal);
     const blockFetchResult = await this.checkBlockFetch(sampledBlocks, provider.address, signal);
 
     return {
       carParseable: true,
       blockCount: blocks.length,
       sampledCidCount: sampledBlocks.length,
-      ipniValid,
+      ipniValid: ipni.valid,
+      ipniVerifyMs: ipni.durationMs,
+      ipniVerifiedCidsCount: ipni.verifiedCount,
+      ipniUnverifiedCidsCount: ipni.unverifiedCount,
       blockFetchValid: blockFetchResult.valid,
+      blockFetchFailedCount: blockFetchResult.failedCount,
+      blockFetchEndpoint: blockFetchResult.endpoint,
       errorMessage: blockFetchResult.errorMessage,
     };
   }
@@ -111,7 +132,12 @@ export class CarValidationService {
     ipfsRootCid: string,
     sampledBlocks: ReadonlyArray<{ cid: CID }>,
     signal?: AbortSignal,
-  ): Promise<boolean> {
+  ): Promise<{
+    valid: boolean;
+    durationMs: number | null;
+    verifiedCount: number | null;
+    unverifiedCount: number | null;
+  }> {
     const timeouts = this.configService.get("timeouts", { infer: true });
     let rootCid: CID;
     try {
@@ -124,7 +150,7 @@ export class CarValidationService {
         providerAddress: provider.address,
         error: toStructuredError(error),
       });
-      return false;
+      return { valid: false, durationMs: null, verifiedCount: null, unverifiedCount: null };
     }
 
     const result = await this.ipniVerificationService.verify({
@@ -136,7 +162,12 @@ export class CarValidationService {
       signal,
     });
 
-    return result.rootCIDVerified;
+    return {
+      valid: result.rootCIDVerified,
+      durationMs: result.durationMs,
+      verifiedCount: result.verified,
+      unverifiedCount: result.unverified,
+    };
   }
 
   /**
@@ -148,14 +179,20 @@ export class CarValidationService {
     sampledBlocks: ReadonlyArray<{ cid: CID; bytes: Uint8Array }>,
     spAddress: string,
     signal?: AbortSignal,
-  ): Promise<{ valid: boolean | null; errorMessage?: string }> {
+  ): Promise<{ valid: boolean | null; failedCount: number | null; endpoint: string | null; errorMessage?: string }> {
     const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
     if (!providerInfo) {
-      return { valid: null, errorMessage: `Provider info not found for ${spAddress}` };
+      return {
+        valid: null,
+        failedCount: null,
+        endpoint: null,
+        errorMessage: `Provider info not found for ${spAddress}`,
+      };
     }
 
     const spBaseUrl = providerInfo.pdp.serviceURL.replace(/\/$/, "");
-    let allValid = true;
+    const endpoint = `${spBaseUrl}/ipfs/`;
+    let failedCount = 0;
 
     for (const block of sampledBlocks) {
       signal?.throwIfAborted();
@@ -170,7 +207,7 @@ export class CarValidationService {
         });
 
         if (resp.metrics.statusCode < 200 || resp.metrics.statusCode >= 300) {
-          allValid = false;
+          failedCount += 1;
           this.logger.warn({
             event: "block_fetch_non_2xx",
             message: "Block fetch returned non-2xx status",
@@ -188,7 +225,7 @@ export class CarValidationService {
             cid: cidStr,
             spAddress,
           });
-          allValid = false;
+          failedCount += 1;
           continue;
         }
 
@@ -200,14 +237,14 @@ export class CarValidationService {
             cid: cidStr,
             spAddress,
           });
-          allValid = false;
+          failedCount += 1;
           continue;
         }
 
         // Hash-verifies and decodes; throws on mismatch
         await createBlock({ bytes: resp.data, cid: block.cid, hasher: sha256, codec });
       } catch (error) {
-        allValid = false;
+        failedCount += 1;
         this.logger.warn({
           event: "block_fetch_failed",
           message: "Block fetch or hash verification failed",
@@ -218,6 +255,6 @@ export class CarValidationService {
       }
     }
 
-    return { valid: allValid };
+    return { valid: failedCount === 0, failedCount, endpoint };
   }
 }
diff --git a/apps/backend/src/retrieval-anon/types.ts b/apps/backend/src/retrieval-anon/types.ts
index 2c3384d5..3ba2b9f9 100644
--- a/apps/backend/src/retrieval-anon/types.ts
+++ b/apps/backend/src/retrieval-anon/types.ts
@@ -30,6 +30,11 @@ export type CarValidationResult = {
   blockCount: number;
   sampledCidCount: number;
   ipniValid: boolean | null;
+  ipniVerifyMs: number | null;
+  ipniVerifiedCidsCount: number | null;
+  ipniUnverifiedCidsCount: number | null;
   blockFetchValid: boolean | null;
+  blockFetchFailedCount: number | null;
+  blockFetchEndpoint: string | null;
   errorMessage?: string;
 };

From 072a096b44ca2194bf2607f96abbba66364aae11 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Wed, 29 Apr 2026 12:57:14 +0200
Subject: [PATCH 04/28] test(retrieval-anon): new ipni fields

---
 .../anon-retrieval.service.spec.ts            | 157 +++++++++++++++++-
 1 file changed, 153 insertions(+), 4 deletions(-)

diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
index 275a3de2..812b8169 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
@@ -9,7 +9,7 @@ import type { AnonPieceSelectorService } from "./anon-piece-selector.service.js"
 import { AnonRetrievalService } from "./anon-retrieval.service.js";
 import type { CarValidationService } from "./car-validation.service.js";
 import type { PieceRetrievalService } from "./piece-retrieval.service.js";
-import type { PieceRetrievalResult } from "./types.js";
+import type { AnonPiece, CarValidationResult, PieceRetrievalResult } from "./types.js";
 
 const SP_ADDRESS = "0xaaaa0000000000000000000000000000000000aa";
 
@@ -36,10 +36,13 @@ function makeService(opts: {
   pieceResult: PieceRetrievalResult;
   fetchPieceImpl?: (signal?: AbortSignal) => Promise<PieceRetrievalResult>;
   clickhouseEnabled?: boolean;
+  piece?: AnonPiece;
+  carResult?: CarValidationResult;
 }): {
   service: AnonRetrievalService;
   insertSpy: ReturnType<typeof vi.fn>;
   fetchSpy: ReturnType<typeof vi.fn>;
+  validateCarSpy: ReturnType<typeof vi.fn>;
 } {
   const insertSpy = vi.fn();
   const clickhouseService = {
@@ -53,7 +56,7 @@ function makeService(opts: {
   } as unknown as Repository<StorageProvider>;
 
   const anonPieceSelector = {
-    selectPieceForProvider: vi.fn(async () => PIECE),
+    selectPieceForProvider: vi.fn(async () => opts.piece ?? PIECE),
   } as unknown as AnonPieceSelectorService;
 
   const fetchSpy = vi.fn(opts.fetchPieceImpl ?? (async () => opts.pieceResult));
@@ -61,8 +64,9 @@ function makeService(opts: {
     fetchPiece: fetchSpy,
   } as unknown as PieceRetrievalService;
 
+  const validateCarSpy = vi.fn(async () => opts.carResult);
   const carValidationService = {
-    validateCarPiece: vi.fn(),
+    validateCarPiece: validateCarSpy,
   } as unknown as CarValidationService;
 
   const walletSdkService = {
@@ -91,7 +95,7 @@ function makeService(opts: {
     spRepository,
   );
 
-  return { service, insertSpy, fetchSpy };
+  return { service, insertSpy, fetchSpy, validateCarSpy };
 }
 
 describe("AnonRetrievalService", () => {
@@ -133,6 +137,19 @@ describe("AnonRetrievalService", () => {
     expect(row.sp_id).toBe(7);
     expect(row.probe_location).toBe("test-location");
     expect(typeof row.retrieval_id).toBe("string");
+
+    // CAR/IPNI/block-fetch were never run on a non-IPFS-indexed piece — every
+    // dimension column should explicitly say "skipped" (ipni_status) or null.
+    expect(row.car_parseable).toBeNull();
+    expect(row.car_block_count).toBeNull();
+    expect(row.block_fetch_endpoint).toBeNull();
+    expect(row.block_fetch_valid).toBeNull();
+    expect(row.block_fetch_sampled_count).toBeNull();
+    expect(row.block_fetch_failed_count).toBeNull();
+    expect(row.ipni_status).toBe("skipped");
+    expect(row.ipni_verify_ms).toBeNull();
+    expect(row.ipni_verified_cids_count).toBeNull();
+    expect(row.ipni_unverified_cids_count).toBeNull();
   });
 
   it("still emits a row when the signal aborts before fetchPiece runs", async () => {
@@ -210,4 +227,136 @@ describe("AnonRetrievalService", () => {
 
     expect(insertSpy).not.toHaveBeenCalled();
   });
+
+  describe("with IPFS indexing", () => {
+    const INDEXED_PIECE: AnonPiece = {
+      ...PIECE,
+      withIPFSIndexing: true,
+      ipfsRootCid: "bafyrootcid",
+    };
+
+    function okPiece(bytes: Buffer): PieceRetrievalResult {
+      return {
+        success: true,
+        pieceCid: INDEXED_PIECE.pieceCid,
+        bytesReceived: bytes.length,
+        pieceBytes: bytes,
+        latencyMs: 200,
+        ttfbMs: 20,
+        throughputBps: 51200,
+        statusCode: 200,
+        commPValid: true,
+      };
+    }
+
+    it("emits populated CAR/IPNI/block-fetch columns when validation fully succeeds", async () => {
+      const carResult: CarValidationResult = {
+        carParseable: true,
+        blockCount: 42,
+        sampledCidCount: 5,
+        ipniValid: true,
+        ipniVerifyMs: 137,
+        ipniVerifiedCidsCount: 6,
+        ipniUnverifiedCidsCount: 0,
+        blockFetchValid: true,
+        blockFetchFailedCount: 0,
+        blockFetchEndpoint: "https://sp.test/ipfs/",
+      };
+
+      const { service, insertSpy, validateCarSpy } = makeService({
+        pieceResult: okPiece(Buffer.from("car-bytes")),
+        piece: INDEXED_PIECE,
+        carResult,
+      });
+
+      await service.performForProvider(SP_ADDRESS);
+
+      expect(validateCarSpy).toHaveBeenCalledTimes(1);
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      expect(row.piece_fetch_status).toBe(RetrievalStatus.SUCCESS);
+      expect(row.commp_valid).toBe(true);
+      expect(row.car_parseable).toBe(true);
+      expect(row.car_block_count).toBe(42);
+      expect(row.block_fetch_endpoint).toBe("https://sp.test/ipfs/");
+      expect(row.block_fetch_valid).toBe(true);
+      expect(row.block_fetch_sampled_count).toBe(5);
+      expect(row.block_fetch_failed_count).toBe(0);
+      expect(row.ipni_status).toBe("valid");
+      expect(row.ipni_verify_ms).toBe(137);
+      expect(row.ipni_verified_cids_count).toBe(6);
+      expect(row.ipni_unverified_cids_count).toBe(0);
+    });
+
+    it("distinguishes IPNI invalid from block-fetch failures with explicit counts", async () => {
+      const carResult: CarValidationResult = {
+        carParseable: true,
+        blockCount: 100,
+        sampledCidCount: 5,
+        ipniValid: false,
+        ipniVerifyMs: 250,
+        ipniVerifiedCidsCount: 0,
+        ipniUnverifiedCidsCount: 6,
+        blockFetchValid: false,
+        blockFetchFailedCount: 2,
+        blockFetchEndpoint: "https://sp.test/ipfs/",
+      };
+
+      const { service, insertSpy } = makeService({
+        pieceResult: okPiece(Buffer.from("car-bytes")),
+        piece: INDEXED_PIECE,
+        carResult,
+      });
+
+      await service.performForProvider(SP_ADDRESS);
+
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      // The piece-fetch path still succeeded — failures are surfaced as
+      // independent dimensions, not folded into piece_fetch_status.
+      expect(row.piece_fetch_status).toBe(RetrievalStatus.SUCCESS);
+      expect(row.car_parseable).toBe(true);
+      expect(row.ipni_status).toBe("invalid");
+      expect(row.ipni_verified_cids_count).toBe(0);
+      expect(row.ipni_unverified_cids_count).toBe(6);
+      expect(row.block_fetch_valid).toBe(false);
+      expect(row.block_fetch_sampled_count).toBe(5);
+      expect(row.block_fetch_failed_count).toBe(2);
+    });
+
+    it("emits car_parseable=false with skipped IPNI/block-fetch when bytes don't parse as CAR", async () => {
+      const carResult: CarValidationResult = {
+        carParseable: false,
+        blockCount: 0,
+        sampledCidCount: 0,
+        ipniValid: null,
+        ipniVerifyMs: null,
+        ipniVerifiedCidsCount: null,
+        ipniUnverifiedCidsCount: null,
+        blockFetchValid: null,
+        blockFetchFailedCount: null,
+        blockFetchEndpoint: null,
+      };
+
+      const { service, insertSpy } = makeService({
+        pieceResult: okPiece(Buffer.from("not-a-car")),
+        piece: INDEXED_PIECE,
+        carResult,
+      });
+
+      await service.performForProvider(SP_ADDRESS);
+
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      expect(row.car_parseable).toBe(false);
+      // car_block_count and block_fetch_sampled_count are gated on carParseable
+      // so an unparseable CAR doesn't emit a misleading 0.
+      expect(row.car_block_count).toBeNull();
+      expect(row.block_fetch_sampled_count).toBeNull();
+      expect(row.block_fetch_endpoint).toBeNull();
+      expect(row.block_fetch_valid).toBeNull();
+      expect(row.block_fetch_failed_count).toBeNull();
+      expect(row.ipni_status).toBe("skipped");
+      expect(row.ipni_verify_ms).toBeNull();
+      expect(row.ipni_verified_cids_count).toBeNull();
+      expect(row.ipni_unverified_cids_count).toBeNull();
+    });
+  });
 });

From 1fcee6001cda14f6ead2117c68ee1c40b2b927ff Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Wed, 29 Apr 2026 13:10:13 +0200
Subject: [PATCH 05/28] refactor(retrieval-anon): function signatures

---
 .../retrieval-anon/anon-retrieval.service.ts  | 171 ++++++++----------
 .../retrieval-anon/car-validation.service.ts  |  40 ++--
 2 files changed, 93 insertions(+), 118 deletions(-)

diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index 8f2e135a..4c6ade8a 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -141,109 +141,90 @@ export class AnonRetrievalService {
       // Always emit a ClickHouse row — even on abort or unexpected error — so
       // we never lose the evidence (ttfb, bytes, response code) we already
       // collected.
-      pieceResult ??= buildAbortedPlaceholder(piece.pieceCid, signal?.reason);
-      this.emitClickhouseRow(spAddress, piece, pieceResult, carResult, startedAt, provider, logContext);
-    }
-  }
+      const finalPieceResult = pieceResult ?? buildAbortedPlaceholder(piece.pieceCid, signal?.reason);
+      const retrievalId = randomUUID();
 
-  private emitClickhouseRow(
-    spAddress: string,
-    piece: {
-      pieceCid: string;
-      dataSetId: string;
-      pieceId: string;
-      rawSize: string;
-      withIPFSIndexing: boolean;
-      ipfsRootCid: string | null;
-    },
-    pieceResult: PieceRetrievalResult,
-    carResult: CarValidationResult | null,
-    startedAt: Date,
-    provider: StorageProvider | null,
-    logContext?: ProviderJobContext,
-  ): void {
-    if (!this.clickhouseService.enabled) {
-      this.logger.debug({
-        ...logContext,
-        event: "anon_retrieval_clickhouse_disabled",
-        message: "ClickHouse disabled — anon retrieval row not emitted",
-        pieceCid: piece.pieceCid,
-        spAddress,
-      });
-      return;
-    }
+      if (this.clickhouseService.enabled) {
+        const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
+        const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
+        const pieceFetchStatus = finalPieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
+        const ipniStatus =
+          carResult == null || carResult.ipniValid === null ? "skipped" : carResult.ipniValid ? "valid" : "invalid";
 
-    const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
-    const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
-    const pieceFetchStatus = pieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
-    const ipniStatus =
-      carResult == null || carResult.ipniValid === null ? "skipped" : carResult.ipniValid ? "valid" : "invalid";
-    const retrievalId = randomUUID();
+        try {
+          this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, {
+            timestamp: startedAt.getTime(),
+            probe_location: this.clickhouseService.probeLocation,
+            sp_address: spAddress,
+            sp_id: provider?.providerId != null ? Number(provider.providerId) : null,
+            sp_name: provider?.name ?? null,
+            retrieval_id: retrievalId,
+            piece_cid: piece.pieceCid,
+            data_set_id: piece.dataSetId,
+            piece_id: piece.pieceId,
+            raw_size: piece.rawSize,
+            with_ipfs_indexing: piece.withIPFSIndexing,
+            ipfs_root_cid: piece.ipfsRootCid,
+            service_type: ServiceType.DIRECT_SP,
+            retrieval_endpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
+            piece_fetch_status: pieceFetchStatus,
+            http_response_code: finalPieceResult.statusCode > 0 ? finalPieceResult.statusCode : null,
+            first_byte_ms: finalPieceResult.ttfbMs > 0 ? finalPieceResult.ttfbMs : null,
+            last_byte_ms: finalPieceResult.latencyMs > 0 ? finalPieceResult.latencyMs : null,
+            bytes_retrieved: finalPieceResult.bytesReceived > 0 ? finalPieceResult.bytesReceived : null,
+            throughput_bps: finalPieceResult.throughputBps > 0 ? Math.round(finalPieceResult.throughputBps) : null,
+            commp_valid: finalPieceResult.success ? finalPieceResult.commPValid : null,
+            car_parseable: carResult ? carResult.carParseable : null,
+            car_block_count: carResult != null && carResult.carParseable ? carResult.blockCount : null,
+            block_fetch_endpoint: carResult?.blockFetchEndpoint ?? null,
+            block_fetch_valid: carResult ? carResult.blockFetchValid : null,
+            block_fetch_sampled_count: carResult != null && carResult.carParseable ? carResult.sampledCidCount : null,
+            block_fetch_failed_count: carResult?.blockFetchFailedCount ?? null,
+            ipni_status: ipniStatus,
+            ipni_verify_ms: carResult?.ipniVerifyMs ?? null,
+            ipni_verified_cids_count: carResult?.ipniVerifiedCidsCount ?? null,
+            ipni_unverified_cids_count: carResult?.ipniUnverifiedCidsCount ?? null,
+            error_message: finalPieceResult.errorMessage ?? null,
+          });
+        } catch (error) {
+          // ClickhouseService.insert is buffered/non-throwing in normal operation, but
+          // guard against unexpected runtime errors so we don't break the probe cycle.
+          this.logger.warn({
+            ...logContext,
+            event: "anon_retrieval_clickhouse_insert_failed",
+            message: "Failed to enqueue anonymous retrieval row to ClickHouse",
+            pieceCid: piece.pieceCid,
+            spAddress,
+            error: toStructuredError(error),
+          });
+        }
+      } else {
+        this.logger.debug({
+          ...logContext,
+          event: "anon_retrieval_clickhouse_disabled",
+          message: "ClickHouse disabled — anon retrieval row not emitted",
+          pieceCid: piece.pieceCid,
+          spAddress,
+        });
+      }
 
-    try {
-      this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, {
-        timestamp: startedAt.getTime(),
-        probe_location: this.clickhouseService.probeLocation,
-        sp_address: spAddress,
-        sp_id: provider?.providerId != null ? Number(provider.providerId) : null,
-        sp_name: provider?.name ?? null,
-        retrieval_id: retrievalId,
-        piece_cid: piece.pieceCid,
-        data_set_id: piece.dataSetId,
-        piece_id: piece.pieceId,
-        raw_size: piece.rawSize,
-        with_ipfs_indexing: piece.withIPFSIndexing,
-        ipfs_root_cid: piece.ipfsRootCid,
-        service_type: ServiceType.DIRECT_SP,
-        retrieval_endpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
-        piece_fetch_status: pieceFetchStatus,
-        http_response_code: pieceResult.statusCode > 0 ? pieceResult.statusCode : null,
-        first_byte_ms: pieceResult.ttfbMs > 0 ? pieceResult.ttfbMs : null,
-        last_byte_ms: pieceResult.latencyMs > 0 ? pieceResult.latencyMs : null,
-        bytes_retrieved: pieceResult.bytesReceived > 0 ? pieceResult.bytesReceived : null,
-        throughput_bps: pieceResult.throughputBps > 0 ? Math.round(pieceResult.throughputBps) : null,
-        commp_valid: pieceResult.success ? pieceResult.commPValid : null,
-        car_parseable: carResult ? carResult.carParseable : null,
-        car_block_count: carResult?.carParseable ? carResult.blockCount : null,
-        block_fetch_endpoint: carResult?.blockFetchEndpoint ?? null,
-        block_fetch_valid: carResult ? carResult.blockFetchValid : null,
-        block_fetch_sampled_count: carResult?.carParseable ? carResult.sampledCidCount : null,
-        block_fetch_failed_count: carResult?.blockFetchFailedCount ?? null,
-        ipni_status: ipniStatus,
-        ipni_verify_ms: carResult?.ipniVerifyMs ?? null,
-        ipni_verified_cids_count: carResult?.ipniVerifiedCidsCount ?? null,
-        ipni_unverified_cids_count: carResult?.ipniUnverifiedCidsCount ?? null,
-        error_message: pieceResult.errorMessage ?? null,
-      });
-    } catch (error) {
-      // ClickhouseService.insert is buffered/non-throwing in normal operation, but
-      // guard against unexpected runtime errors so we don't break the probe cycle.
-      this.logger.warn({
+      this.logger.log({
         ...logContext,
-        event: "anon_retrieval_clickhouse_insert_failed",
-        message: "Failed to enqueue anonymous retrieval row to ClickHouse",
+        event: "anon_retrieval_completed",
+        message: "Anonymous retrieval test completed",
+        retrievalId,
         pieceCid: piece.pieceCid,
         spAddress,
-        error: toStructuredError(error),
+        success: finalPieceResult.success,
+        aborted: finalPieceResult.aborted === true,
+        latencyMs: finalPieceResult.latencyMs,
+        ttfbMs: finalPieceResult.ttfbMs,
+        bytesRetrieved: finalPieceResult.bytesReceived,
+        carParseable: carResult?.carParseable,
+        ipniValid: carResult?.ipniValid,
+        blockFetchValid: carResult?.blockFetchValid,
       });
     }
-
-    this.logger.log({
-      ...logContext,
-      event: "anon_retrieval_completed",
-      message: "Anonymous retrieval test completed",
-      retrievalId,
-      pieceCid: piece.pieceCid,
-      spAddress,
-      success: pieceResult.success,
-      aborted: pieceResult.aborted === true,
-      latencyMs: pieceResult.latencyMs,
-      ttfbMs: pieceResult.ttfbMs,
-      bytesRetrieved: pieceResult.bytesReceived,
-      carParseable: carResult?.carParseable,
-      ipniValid: carResult?.ipniValid,
-      blockFetchValid: carResult?.blockFetchValid,
-    });
   }
 }
 
diff --git a/apps/backend/src/retrieval-anon/car-validation.service.ts b/apps/backend/src/retrieval-anon/car-validation.service.ts
index 017a38e8..789f5ba6 100644
--- a/apps/backend/src/retrieval-anon/car-validation.service.ts
+++ b/apps/backend/src/retrieval-anon/car-validation.service.ts
@@ -46,8 +46,17 @@ export class CarValidationService {
     ipfsRootCid: string,
     signal?: AbortSignal,
   ): Promise<CarValidationResult> {
-    const blocks = await this.parseCar(pieceBytes, provider.address, ipfsRootCid);
-    if (blocks === null) {
+    let blocks: { cid: CID; bytes: Uint8Array }[];
+    try {
+      blocks = await this.parseCar(pieceBytes);
+    } catch (error) {
+      this.logger.debug({
+        event: "car_parse_failed",
+        message: "Failed to parse piece bytes as CAR - client fault, not SP",
+        spAddress: provider.address,
+        ipfsRootCid,
+        error: toStructuredError(error),
+      });
       return {
         carParseable: false,
         blockCount: 0,
@@ -99,28 +108,13 @@ export class CarValidationService {
     };
   }
 
-  private async parseCar(
-    pieceBytes: Buffer,
-    spAddress: string,
-    ipfsRootCid: string,
-  ): Promise<{ cid: CID; bytes: Uint8Array }[] | null> {
-    try {
-      const reader = await CarReader.fromBytes(new Uint8Array(pieceBytes));
-      const blocks: { cid: CID; bytes: Uint8Array }[] = [];
-      for await (const block of reader.blocks()) {
-        blocks.push({ cid: block.cid, bytes: block.bytes });
-      }
-      return blocks;
-    } catch (error) {
-      this.logger.debug({
-        event: "car_parse_failed",
-        message: "Failed to parse piece bytes as CAR - client fault, not SP",
-        spAddress,
-        ipfsRootCid,
-        error: toStructuredError(error),
-      });
-      return null;
+  private async parseCar(pieceBytes: Buffer): Promise<{ cid: CID; bytes: Uint8Array }[]> {
+    const reader = await CarReader.fromBytes(new Uint8Array(pieceBytes));
+    const blocks: { cid: CID; bytes: Uint8Array }[] = [];
+    for await (const block of reader.blocks()) {
+      blocks.push({ cid: block.cid, bytes: block.bytes });
     }
+    return blocks;
   }
 
   /**

From 4527d292c1cb537287274eb9638a46a5641eff21 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Wed, 29 Apr 2026 14:24:41 +0200
Subject: [PATCH 06/28] refactor(retrieval-anon): cleanup

---
 .../check-metrics.service.ts                  |  4 +-
 .../anon-piece-selector.service.spec.ts       | 16 ++++
 .../anon-piece-selector.service.ts            | 11 +--
 .../anon-retrieval.service.spec.ts            | 53 +++++++++--
 .../retrieval-anon/anon-retrieval.service.ts  | 33 +++----
 .../retrieval-anon/car-validation.service.ts  |  1 -
 .../src/subgraph/subgraph.service.spec.ts     | 10 ++-
 apps/backend/src/subgraph/subgraph.service.ts | 89 +++----------------
 8 files changed, 103 insertions(+), 114 deletions(-)

diff --git a/apps/backend/src/metrics-prometheus/check-metrics.service.ts b/apps/backend/src/metrics-prometheus/check-metrics.service.ts
index 85f1cdcf..8d4be313 100644
--- a/apps/backend/src/metrics-prometheus/check-metrics.service.ts
+++ b/apps/backend/src/metrics-prometheus/check-metrics.service.ts
@@ -303,11 +303,11 @@ export class AnonRetrievalCheckMetrics {
     this.carParseCounter.inc({ ...labels, value: parseable ? "parseable" : "not_parseable" });
   }
 
-  recordIpniStatus(labels: CheckMetricLabels, value: "valid" | "invalid" | "skipped"): void {
+  recordIpniStatus(labels: CheckMetricLabels, value: "valid" | "invalid" | "skipped" | "error"): void {
     this.ipniCounter.inc({ ...labels, value });
   }
 
-  recordBlockFetchStatus(labels: CheckMetricLabels, value: "valid" | "invalid" | "skipped"): void {
+  recordBlockFetchStatus(labels: CheckMetricLabels, value: "valid" | "invalid" | "skipped" | "error"): void {
     this.blockFetchCounter.inc({ ...labels, value });
   }
 }
diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
index 6a787fbb..32d13719 100644
--- a/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
@@ -84,6 +84,22 @@ describe("AnonPieceSelectorService", () => {
     expect(result?.pieceCid).toBe(freshCid);
   });
 
+  it("treats payment-end exactly equal to current epoch as terminated (boundary)", async () => {
+    // pdpPaymentEndEpoch === indexedAtBlock should be rejected (<=, not <).
+    // This guards against an off-by-one regression where pieces in the final
+    // payment epoch silently slip through.
+    const boundaryCid = "baga-boundary";
+    const liveCid = "baga-still-live";
+    sampleAnonPiece
+      .mockResolvedValueOnce(makePiece({ pieceCid: boundaryCid, pdpPaymentEndEpoch: 200n, indexedAtBlock: 200 }))
+      .mockResolvedValueOnce(makePiece({ pieceCid: liveCid, pdpPaymentEndEpoch: 201n, indexedAtBlock: 200 }));
+
+    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
+    const result = await service.selectPieceForProvider(SP_ADDRESS);
+
+    expect(result?.pieceCid).toBe(liveCid);
+  });
+
   it("redraws when the first sampled piece was recently selected by this process", async () => {
     const staleCid = "baga-stale";
     const freshCid = "baga-fresh";
diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
index 8de50fa3..342a4780 100644
--- a/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
@@ -8,13 +8,7 @@ import type { AnonCandidatePiece } from "../subgraph/types.js";
 import type { AnonPiece } from "./types.js";
 
 /**
- * Number of most-recently-tested anonymous pieces to exclude from selection
- * to avoid immediately retesting the same piece. Piece CIDs are globally
- * unique and each one lives on a single SP's dataset, so scoping by CID
- * is equivalent to scoping by (SP, CID) for this workload.
- *
- * The buffer is process-local: a duplicate piece that gets retested shortly
- * after a restart is harmless (still a valid measurement, just less diverse).
+ * Number of most-recently-tested piece CIDs to exclude from re-selection.
  */
 const RECENT_DEDUP_WINDOW = 500;
 
@@ -157,6 +151,9 @@ export class AnonPieceSelectorService {
         continue;
       }
 
+      // On Filecoin FEVM the EVM block number IS the chain epoch (one block per
+      // epoch), so the subgraph's indexedAtBlock is a safe proxy for "now" when
+      // checking if PDP payment for this piece has already terminated.
       if (piece.pdpPaymentEndEpoch != null && piece.pdpPaymentEndEpoch <= BigInt(piece.indexedAtBlock)) {
         continue;
       }
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
index 812b8169..b5f17c57 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
@@ -36,13 +36,17 @@ function makeService(opts: {
   pieceResult: PieceRetrievalResult;
   fetchPieceImpl?: (signal?: AbortSignal) => Promise<PieceRetrievalResult>;
   clickhouseEnabled?: boolean;
-  piece?: AnonPiece;
+  piece?: AnonPiece | null;
   carResult?: CarValidationResult;
+  validateCarImpl?: () => Promise<CarValidationResult>;
 }): {
   service: AnonRetrievalService;
   insertSpy: ReturnType<typeof vi.fn>;
   fetchSpy: ReturnType<typeof vi.fn>;
   validateCarSpy: ReturnType<typeof vi.fn>;
+  metricsRecordStatusSpy: ReturnType<typeof vi.fn>;
+  metricsRecordIpniSpy: ReturnType<typeof vi.fn>;
+  metricsRecordBlockFetchSpy: ReturnType<typeof vi.fn>;
 } {
   const insertSpy = vi.fn();
   const clickhouseService = {
@@ -56,7 +60,7 @@ function makeService(opts: {
   } as unknown as Repository<StorageProvider>;
 
   const anonPieceSelector = {
-    selectPieceForProvider: vi.fn(async () => opts.piece ?? PIECE),
+    selectPieceForProvider: vi.fn(async () => (opts.piece === null ? null : (opts.piece ?? PIECE))),
   } as unknown as AnonPieceSelectorService;
 
   const fetchSpy = vi.fn(opts.fetchPieceImpl ?? (async () => opts.pieceResult));
@@ -64,7 +68,7 @@ function makeService(opts: {
     fetchPiece: fetchSpy,
   } as unknown as PieceRetrievalService;
 
-  const validateCarSpy = vi.fn(async () => opts.carResult);
+  const validateCarSpy = vi.fn(opts.validateCarImpl ?? (async () => opts.carResult));
   const carValidationService = {
     validateCarPiece: validateCarSpy,
   } as unknown as CarValidationService;
@@ -73,16 +77,19 @@ function makeService(opts: {
     getProviderInfo: vi.fn(() => ({ pdp: { serviceURL: "https://sp.test/" } })),
   } as unknown as WalletSdkService;
 
+  const metricsRecordStatusSpy = vi.fn();
+  const metricsRecordIpniSpy = vi.fn();
+  const metricsRecordBlockFetchSpy = vi.fn();
   const metrics = {
     observeFirstByteMs: vi.fn(),
     observeLastByteMs: vi.fn(),
     observeThroughput: vi.fn(),
     observeCheckDuration: vi.fn(),
-    recordStatus: vi.fn(),
+    recordStatus: metricsRecordStatusSpy,
     recordHttpResponseCode: vi.fn(),
     recordCarParseStatus: vi.fn(),
-    recordIpniStatus: vi.fn(),
-    recordBlockFetchStatus: vi.fn(),
+    recordIpniStatus: metricsRecordIpniSpy,
+    recordBlockFetchStatus: metricsRecordBlockFetchSpy,
   } as unknown as AnonRetrievalCheckMetrics;
 
   const service = new AnonRetrievalService(
@@ -95,7 +102,15 @@ function makeService(opts: {
     spRepository,
   );
 
-  return { service, insertSpy, fetchSpy, validateCarSpy };
+  return {
+    service,
+    insertSpy,
+    fetchSpy,
+    validateCarSpy,
+    metricsRecordStatusSpy,
+    metricsRecordIpniSpy,
+    metricsRecordBlockFetchSpy,
+  };
 }
 
 describe("AnonRetrievalService", () => {
@@ -322,6 +337,30 @@ describe("AnonRetrievalService", () => {
       expect(row.block_fetch_failed_count).toBe(2);
     });
 
+    it("emits ipni_status='error' (not 'skipped') when CAR validation throws on a successful piece", async () => {
+      // Distinguishes a real infra outage (e.g. IpniVerificationService down)
+      // from a piece that legitimately had no IPFS indexing. Without the
+      // distinction, an outage looks like normal non-IPFS volume in dashboards.
+      const { service, insertSpy, metricsRecordIpniSpy, metricsRecordBlockFetchSpy } = makeService({
+        pieceResult: okPiece(Buffer.from("car-bytes")),
+        piece: INDEXED_PIECE,
+        validateCarImpl: async () => {
+          throw new Error("IpniVerificationService down");
+        },
+      });
+
+      await service.performForProvider(SP_ADDRESS);
+
+      expect(metricsRecordIpniSpy).toHaveBeenCalledWith(expect.anything(), "error");
+      expect(metricsRecordBlockFetchSpy).toHaveBeenCalledWith(expect.anything(), "error");
+
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      expect(row.ipni_status).toBe("error");
+      // Piece-fetch path itself succeeded — only the validation pipeline failed.
+      expect(row.commp_valid).toBe(true);
+      expect(row.car_parseable).toBeNull();
+    });
+
     it("emits car_parseable=false with skipped IPNI/block-fetch when bytes don't parse as CAR", async () => {
       const carResult: CarValidationResult = {
         carParseable: false,
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index 4c6ade8a..418ea8d2 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -70,6 +70,7 @@ export class AnonRetrievalService {
 
     let pieceResult: PieceRetrievalResult | null = null;
     let carResult: CarValidationResult | null = null;
+    let validatedCarPiece: boolean = false;
 
     try {
       // 2. Fetch the piece. fetchPiece never throws on abort — it returns a
@@ -96,13 +97,24 @@ export class AnonRetrievalService {
         !signal?.aborted
       ) {
         try {
+          validatedCarPiece = true;
           carResult = await this.carValidationService.validateCarPiece(
             pieceResult.pieceBytes,
             provider,
             piece.ipfsRootCid,
             signal,
           );
+          this.metrics.recordCarParseStatus(labels, carResult.carParseable);
+          this.metrics.recordIpniStatus(labels, ipniStatusFromResult(carResult));
+          this.metrics.recordBlockFetchStatus(
+              labels,
+              carResult.blockFetchValid === null ? "skipped" : carResult.blockFetchValid ? "valid" : "invalid",
+          );
         } catch (error) {
+          // Validation was attempted on a successful piece retrieval but threw.
+          this.metrics.recordCarParseStatus(labels, false);
+          this.metrics.recordIpniStatus(labels, "error");
+          this.metrics.recordBlockFetchStatus(labels, "error");
           this.logger.warn({
             ...logContext,
             event: "anon_retrieval_car_validation_failed",
@@ -112,19 +124,6 @@ export class AnonRetrievalService {
             error: toStructuredError(error),
           });
         }
-      }
-
-      // Emit CAR validation metrics
-      if (carResult) {
-        this.metrics.recordCarParseStatus(labels, carResult.carParseable);
-        this.metrics.recordIpniStatus(
-          labels,
-          carResult.ipniValid === null ? "skipped" : carResult.ipniValid ? "valid" : "invalid",
-        );
-        this.metrics.recordBlockFetchStatus(
-          labels,
-          carResult.blockFetchValid === null ? "skipped" : carResult.blockFetchValid ? "valid" : "invalid",
-        );
       } else if (!pieceResult.success) {
         // Piece retrieval failed — IPNI and block fetch were skipped
         this.metrics.recordIpniStatus(labels, "skipped");
@@ -148,8 +147,7 @@ export class AnonRetrievalService {
         const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
         const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
         const pieceFetchStatus = finalPieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
-        const ipniStatus =
-          carResult == null || carResult.ipniValid === null ? "skipped" : carResult.ipniValid ? "valid" : "invalid";
+        const ipniStatus = !validatedCarPiece ? "skipped" : carResult ? ipniStatusFromResult(carResult) : "error";
 
         try {
           this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, {
@@ -228,6 +226,11 @@ export class AnonRetrievalService {
   }
 }
 
+function ipniStatusFromResult(result: CarValidationResult): "valid" | "invalid" | "skipped" {
+  if (result.ipniValid === null) return "skipped";
+  return result.ipniValid ? "valid" : "invalid";
+}
+
 function buildAbortedPlaceholder(pieceCid: string, reason: unknown): PieceRetrievalResult {
   const message =
     reason instanceof Error && reason.message ? reason.message : typeof reason === "string" ? reason : "aborted";
diff --git a/apps/backend/src/retrieval-anon/car-validation.service.ts b/apps/backend/src/retrieval-anon/car-validation.service.ts
index 789f5ba6..27ec2744 100644
--- a/apps/backend/src/retrieval-anon/car-validation.service.ts
+++ b/apps/backend/src/retrieval-anon/car-validation.service.ts
@@ -189,7 +189,6 @@ export class CarValidationService {
     let failedCount = 0;
 
     for (const block of sampledBlocks) {
-      signal?.throwIfAborted();
       const cidStr = block.cid.toString();
       const blockUrl = `${spBaseUrl}/ipfs/${cidStr}?format=raw`;
 
diff --git a/apps/backend/src/subgraph/subgraph.service.spec.ts b/apps/backend/src/subgraph/subgraph.service.spec.ts
index 4dc2cd5e..8703b2c5 100644
--- a/apps/backend/src/subgraph/subgraph.service.spec.ts
+++ b/apps/backend/src/subgraph/subgraph.service.spec.ts
@@ -730,14 +730,18 @@ describe("SubgraphService", () => {
   });
 
   describe("sampleAnonPiece", () => {
-    it("returns null when endpoint is not configured", async () => {
+    it("throws when endpoint is not configured (distinct from empty result)", async () => {
+      // Returning null here would make a misconfigured deployment indistinguishable
+      // from a genuinely empty candidate pool — every anon job would silently
+      // no-op forever. Fail loudly instead.
       const noEndpointConfig = {
         get: vi.fn(() => ({ subgraphEndpoint: "" })),
       } as unknown as ConfigService<IConfig, true>;
       const noEndpointService = new SubgraphService(noEndpointConfig);
 
-      const piece = await noEndpointService.sampleAnonPiece(defaultSampleParams);
-      expect(piece).toBeNull();
+      await expect(noEndpointService.sampleAnonPiece(defaultSampleParams)).rejects.toThrow(
+        "No PDP subgraph endpoint configured",
+      );
       expect(fetchMock).not.toHaveBeenCalled();
     });
 
diff --git a/apps/backend/src/subgraph/subgraph.service.ts b/apps/backend/src/subgraph/subgraph.service.ts
index 55359179..3067532c 100644
--- a/apps/backend/src/subgraph/subgraph.service.ts
+++ b/apps/backend/src/subgraph/subgraph.service.ts
@@ -69,87 +69,12 @@ export class SubgraphService {
   }
 
   /**
-   * Fetch subgraph metadata including the latest indexed block number
+   * Fetch subgraph metadata including the latest indexed block number.
    *
-   * @param attempt - Current retry attempt number (default: 1)
-   * @returns Subgraph metadata with block number
    * @throws Error if endpoint is not configured or after MAX_RETRIES attempts
    */
-  async fetchSubgraphMeta(attempt: number = 1): Promise<SubgraphMeta> {
-    if (!this.blockchainConfig.subgraphEndpoint) {
-      throw new Error("No PDP subgraph endpoint configured");
-    }
-
-    try {
-      await this.enforceRateLimit();
-
-      const response = await fetch(this.blockchainConfig.subgraphEndpoint, {
-        method: "POST",
-        headers: {
-          "Content-Type": "application/json",
-        },
-        body: JSON.stringify({
-          query: Queries.GET_SUBGRAPH_META,
-        }),
-      });
-
-      if (!response.ok) {
-        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
-      }
-
-      const result = (await response.json()) as GraphQLResponse;
-
-      if (result.errors) {
-        const errorMessage = result.errors?.[0]?.message || "Unknown GraphQL error";
-        throw new Error(`GraphQL error: ${errorMessage}`);
-      }
-      let validated: SubgraphMeta;
-      try {
-        validated = validateSubgraphMetaResponse(result.data);
-      } catch (validationError) {
-        const errorMessage = validationError instanceof Error ? validationError.message : "Unknown validation error";
-        throw new ValidationError(`Data validation failed: ${errorMessage}`);
-      }
-
-      return validated;
-    } catch (error) {
-      const errorMessage = error instanceof Error ? error.message : "Unknown error";
-
-      // No need to retry on validation errors - they indicate schema/data issues, not transient failures
-      if (error instanceof ValidationError) {
-        this.logger.error({
-          event: "subgraph_meta_validation_failed",
-          message: "Subgraph data validation failed",
-          error: toStructuredError(error),
-        });
-        throw error;
-      }
-
-      // Retry on network/HTTP errors
-      if (attempt < SubgraphService.MAX_RETRIES) {
-        const delay = SubgraphService.INITIAL_RETRY_DELAY_MS * (1 << (attempt - 1));
-        this.logger.warn({
-          event: "subgraph_meta_request_retry",
-          message: "Subgraph meta request failed. Retrying...",
-          attempt,
-          maxRetries: SubgraphService.MAX_RETRIES,
-          retryDelayMs: delay,
-          error: toStructuredError(error),
-        });
-        await new Promise((resolve) => setTimeout(resolve, delay));
-        return this.fetchSubgraphMeta(attempt + 1);
-      }
-
-      this.logger.error({
-        event: "subgraph_meta_request_failed",
-        message: "Subgraph meta request failed after maximum retries",
-        maxRetries: SubgraphService.MAX_RETRIES,
-        error: toStructuredError(error),
-      });
-      throw new Error(
-        `Failed to fetch subgraph metadata after ${SubgraphService.MAX_RETRIES} attempts: ${errorMessage}`,
-      );
-    }
+  async fetchSubgraphMeta(): Promise<SubgraphMeta> {
+    return this.executeQuery<SubgraphMeta>("metadata", Queries.GET_SUBGRAPH_META, {}, validateSubgraphMetaResponse);
   }
 
   /**
@@ -189,7 +114,13 @@ export class SubgraphService {
    */
   async sampleAnonPiece(params: SampleAnonPieceParams): Promise<AnonCandidatePiece | null> {
     if (!this.blockchainConfig.subgraphEndpoint) {
-      return null;
+      // Surface misconfiguration distinctly so it does not look like an empty
+      // candidate pool (which silently no-ops every anon retrieval job).
+      this.logger.error({
+        event: "subgraph_endpoint_not_configured",
+        message: "Cannot sample anonymous piece — no PDP subgraph endpoint configured",
+      });
+      throw new Error("No PDP subgraph endpoint configured");
     }
 
     const query = buildSampleAnonPieceQuery(params.pool);

From a797c15255549fe57510301da22e6010086f2989 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Wed, 29 Apr 2026 14:27:11 +0200
Subject: [PATCH 07/28] chore: format code

---
 apps/backend/src/retrieval-anon/anon-retrieval.service.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index 418ea8d2..c11daa19 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -107,8 +107,8 @@ export class AnonRetrievalService {
           this.metrics.recordCarParseStatus(labels, carResult.carParseable);
           this.metrics.recordIpniStatus(labels, ipniStatusFromResult(carResult));
           this.metrics.recordBlockFetchStatus(
-              labels,
-              carResult.blockFetchValid === null ? "skipped" : carResult.blockFetchValid ? "valid" : "invalid",
+            labels,
+            carResult.blockFetchValid === null ? "skipped" : carResult.blockFetchValid ? "valid" : "invalid",
           );
         } catch (error) {
           // Validation was attempted on a successful piece retrieval but threw.

From 54cc48719c1fb24222ce63ef7f78216061f9c8bc Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Thu, 30 Apr 2026 09:59:58 +0200
Subject: [PATCH 08/28] fix: biome checks

---
 apps/backend/src/retrieval-anon/anon-retrieval.service.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index c11daa19..5343d59a 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -173,10 +173,10 @@ export class AnonRetrievalService {
             throughput_bps: finalPieceResult.throughputBps > 0 ? Math.round(finalPieceResult.throughputBps) : null,
             commp_valid: finalPieceResult.success ? finalPieceResult.commPValid : null,
             car_parseable: carResult ? carResult.carParseable : null,
-            car_block_count: carResult != null && carResult.carParseable ? carResult.blockCount : null,
+            car_block_count: carResult?.carParseable ? carResult?.blockCount : null,
             block_fetch_endpoint: carResult?.blockFetchEndpoint ?? null,
             block_fetch_valid: carResult ? carResult.blockFetchValid : null,
-            block_fetch_sampled_count: carResult != null && carResult.carParseable ? carResult.sampledCidCount : null,
+            block_fetch_sampled_count: carResult?.carParseable ? carResult?.sampledCidCount : null,
             block_fetch_failed_count: carResult?.blockFetchFailedCount ?? null,
             ipni_status: ipniStatus,
             ipni_verify_ms: carResult?.ipniVerifyMs ?? null,

From fcfe569e5c4bac09e96388cd78a90951e493ddfd Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Thu, 30 Apr 2026 10:00:28 +0200
Subject: [PATCH 09/28] fix(ipni): return actual verified/unverfied counts

---
 .../src/ipni/ipni-verification.service.ts     | 122 +++++++++++-------
 1 file changed, 72 insertions(+), 50 deletions(-)

diff --git a/apps/backend/src/ipni/ipni-verification.service.ts b/apps/backend/src/ipni/ipni-verification.service.ts
index 3d7d52f9..51fcc8e0 100644
--- a/apps/backend/src/ipni/ipni-verification.service.ts
+++ b/apps/backend/src/ipni/ipni-verification.service.ts
@@ -3,7 +3,7 @@ import { PDPProvider } from "filecoin-pin";
 import { waitForIpniProviderResults } from "filecoin-pin/core/utils";
 import { CID } from "multiformats/cid";
 import type { StorageProvider } from "../database/entities/storage-provider.entity.js";
-import type { IPNIVerificationResult } from "../deal-addons/strategies/ipni.types.js";
+import type { FailedCID, IPNIVerificationResult } from "../deal-addons/strategies/ipni.types.js";
 
 export type IpniVerificationInput = {
   rootCid: CID;
@@ -44,7 +44,6 @@ export class IpniVerificationService {
     const expectedProviders = [this.buildExpectedProviderInfo(storageProvider as StorageProviderWithUrl)];
     const timeoutSignal = AbortSignal.timeout(timeoutMs);
     const verificationSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
-    let failureReason = "IPNI did not return expected provider results via filecoin-pin";
 
     this.logger.log({
       event: "ipni_verification_started",
@@ -61,56 +60,69 @@ export class IpniVerificationService {
     });
 
     const ipniVerificationStartTime = Date.now();
+    const cidsToValidate: { cid: CID; isRoot: boolean }[] = [
+      { cid: rootCid, isRoot: true },
+      ...blockCids.map((cid) => ({ cid, isRoot: false })),
+    ];
 
-    const ipniValidated = await waitForIpniProviderResults(rootCid, {
-      childBlocks: blockCids,
-      maxAttempts,
-      delayMs,
-      expectedProviders,
-      signal: verificationSignal,
-    }).catch((error) => {
+    let verified = 0;
+    const failedCIDs: FailedCID[] = [];
+    let rootCIDVerified = false;
+
+    // waitForIpniProviderResults is all-or-nothing per call (throws on first failure),
+    // so we invoke it once per CID to get accurate per-CID verified/unverified counts.
+    // The shared verificationSignal bounds total wall-clock time across all CIDs.
+    for (const { cid, isRoot } of cidsToValidate) {
       if (signal?.aborted) {
         signal.throwIfAborted();
       }
+
       if (verificationSignal.aborted) {
-        failureReason = `IPNI verification timed out after ${timeoutMs}ms`;
-        this.logger.error({
-          event: "ipni_verification_timed_out",
-          message: failureReason,
-          rootCID: rootCid.toString(),
+        failedCIDs.push({ cid: cid.toString(), reason: `IPNI verification timed out after ${timeoutMs}ms` });
+        continue;
+      }
+
+      try {
+        await waitForIpniProviderResults(cid, {
+          maxAttempts,
+          delayMs,
+          expectedProviders,
+          signal: verificationSignal,
+        });
+        verified += 1;
+        if (isRoot) rootCIDVerified = true;
+      } catch (error) {
+        if (signal?.aborted) {
+          signal.throwIfAborted();
+        }
+
+        const reason = verificationSignal.aborted
+          ? `IPNI verification timed out after ${timeoutMs}ms`
+          : error instanceof Error
+            ? error.message
+            : String(error);
+
+        failedCIDs.push({ cid: cid.toString(), reason });
+
+        this.logger.warn({
+          event: "ipni_cid_verification_failed",
+          message: "IPNI verification failed for CID",
+          cid: cid.toString(),
+          isRoot,
           providerAddress: storageProvider.address,
           providerId: storageProvider.providerId,
           providerName: storageProvider.name,
           serviceUrl: storageProvider.serviceUrl,
-          blockCIDCount: blockCids.length,
-          timeoutMs,
-          pollIntervalMs: delayMs,
-          maxAttempts,
+          failureReason: reason,
         });
-        return false;
       }
-      const errorMessage = error instanceof Error ? error.message : String(error);
-      failureReason = errorMessage;
-      this.logger.error({
-        event: "ipni_verification_failed",
-        message: "IPNI verification failed",
-        rootCID: rootCid.toString(),
-        providerAddress: storageProvider.address,
-        providerId: storageProvider.providerId,
-        providerName: storageProvider.name,
-        serviceUrl: storageProvider.serviceUrl,
-        blockCIDCount: blockCids.length,
-        timeoutMs,
-        pollIntervalMs: delayMs,
-        maxAttempts,
-        failureReason,
-      });
-      return false;
-    });
+    }
 
     const ipniVerificationDurationMs = Date.now() - ipniVerificationStartTime;
+    const total = cidsToValidate.length;
+    const unverified = total - verified;
 
-    if (ipniValidated) {
+    if (verified === total) {
       this.logger.log({
         event: "ipni_verification_succeeded",
         message: "IPNI verification succeeded",
@@ -121,22 +133,32 @@ export class IpniVerificationService {
         verifyDurationMs: ipniVerificationDurationMs,
         blockCIDCount: blockCids.length,
       });
+    } else {
+      this.logger.error({
+        event: verificationSignal.aborted ? "ipni_verification_timed_out" : "ipni_verification_failed",
+        message: "IPNI verification did not fully succeed",
+        rootCID: rootCid.toString(),
+        providerAddress: storageProvider.address,
+        providerId: storageProvider.providerId,
+        providerName: storageProvider.name,
+        serviceUrl: storageProvider.serviceUrl,
+        blockCIDCount: blockCids.length,
+        timeoutMs,
+        pollIntervalMs: delayMs,
+        maxAttempts,
+        verified,
+        unverified,
+        total,
+      });
     }
 
     return {
-      verified: ipniValidated ? 1 : 0,
-      unverified: ipniValidated ? 0 : 1,
-      total: 1,
-      rootCIDVerified: ipniValidated,
+      verified: verified,
+      unverified: unverified,
+      total: total,
+      rootCIDVerified: rootCIDVerified,
       durationMs: ipniVerificationDurationMs,
-      failedCIDs: ipniValidated
-        ? []
-        : [
-            {
-              cid: rootCid.toString(),
-              reason: failureReason,
-            },
-          ],
+      failedCIDs: failedCIDs,
       verifiedAt: new Date().toISOString(),
     };
   }

From fb45bd076600779eac47999e0a8a26d45182c542 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Thu, 30 Apr 2026 13:17:03 +0200
Subject: [PATCH 10/28] refactor: store anon retrieval data primarily in
 postgres

---
 .../src/clickhouse/clickhouse.schema.ts       |  14 +-
 apps/backend/src/database/database.module.ts  |   9 +-
 .../entities/anon-retrieval.entity.ts         | 120 +++++++++++
 .../1776300000000-CreateAnonRetrievals.ts     |  72 +++++++
 apps/backend/src/database/types.ts            |  12 ++
 .../anon-retrieval.service.spec.ts            | 203 +++++++++++-------
 .../retrieval-anon/anon-retrieval.service.ts  | 172 +++++++++------
 .../retrieval-anon/retrieval-anon.module.ts   |   3 +-
 8 files changed, 444 insertions(+), 161 deletions(-)
 create mode 100644 apps/backend/src/database/entities/anon-retrieval.entity.ts
 create mode 100644 apps/backend/src/database/migrations/1776300000000-CreateAnonRetrievals.ts

diff --git a/apps/backend/src/clickhouse/clickhouse.schema.ts b/apps/backend/src/clickhouse/clickhouse.schema.ts
index e30f6151..5a9a805e 100644
--- a/apps/backend/src/clickhouse/clickhouse.schema.ts
+++ b/apps/backend/src/clickhouse/clickhouse.schema.ts
@@ -70,17 +70,12 @@ export function buildMigrations(database: string): string[] {
     sp_id                      Nullable(UInt64),                  -- storage provider numeric id
     sp_name                    Nullable(String),                  -- storage provider name
 
-    retrieval_id               UUID,                              -- per-event correlation id (log/Prometheus join)
+    retrieval_id               UUID,                              -- per-event correlation id (matches anon_retrievals.id in Postgres)
 
-    piece_cid                  String,                            -- piece CID (v2/CommP) sampled from the subgraph
-    data_set_id                UInt64,                            -- on-chain data set id
-    piece_id                   UInt64,                            -- on-chain piece id within the data set
     raw_size                   UInt64,                            -- raw (unpadded) piece size, bytes
     with_ipfs_indexing         Bool,                              -- whether the piece advertises IPNI metadata
-    ipfs_root_cid              Nullable(String),                  -- root CID of the contained DAG; null when not IPFS-indexed
 
     service_type               LowCardinality(String),            -- 'direct_sp' (only mode for anon retrievals today)
-    retrieval_endpoint         String,                            -- URL probed (e.g. {spBaseUrl}/piece/{pieceCid})
 
     piece_fetch_status         LowCardinality(String),            -- 'success' | 'failed' — outcome of GET /piece/<pieceCid> (HTTP 2xx AND CommP match). CAR/IPNI/block-fetch outcomes live in their own columns.
     http_response_code         Nullable(UInt16),                  -- raw HTTP status; null on transport failure
@@ -92,17 +87,14 @@ export function buildMigrations(database: string): string[] {
     commp_valid                Nullable(Bool),                    -- null when retrieval failed before CommP could be hashed
     car_parseable              Nullable(Bool),                    -- null when CAR validation was skipped (no IPFS indexing or piece fetch failed); true if bytes parsed as a CAR
     car_block_count            Nullable(UInt32),                  -- total number of blocks observed inside the CAR; null when skipped or unparseable
-    block_fetch_endpoint       Nullable(String),                  -- gateway base URL probed for block fetch (e.g. {spBaseUrl}/ipfs/); null when skipped
     block_fetch_valid          Nullable(Bool),                    -- null when skipped; true if all sampled blocks fetched + hash-verified
     block_fetch_sampled_count  Nullable(UInt32),                  -- number of blocks sampled and probed via /ipfs/<cid>?format=raw
     block_fetch_failed_count   Nullable(UInt32),                  -- number of sampled blocks that failed (non-2xx, hash mismatch, unsupported codec, or transport error)
 
-    ipni_status                LowCardinality(String),            -- 'valid' | 'invalid' | 'skipped' (mirrors data_storage_checks naming)
+    ipni_status                LowCardinality(String),            -- 'valid' | 'invalid' | 'skipped' | 'error'
     ipni_verify_ms             Nullable(Float64),                 -- IPNI verification duration; null when skipped
     ipni_verified_cids_count   Nullable(UInt32),                  -- CIDs confirmed findable via IPNI
-    ipni_unverified_cids_count Nullable(UInt32),                  -- CIDs checked but not findable
-
-    error_message              Nullable(String)                   -- failure reason; null on success
+    ipni_unverified_cids_count Nullable(UInt32)                   -- CIDs checked but not findable
 ) ENGINE MergeTree()
   PRIMARY KEY (probe_location, sp_address, timestamp)
   PARTITION BY toStartOfMonth(timestamp)
diff --git a/apps/backend/src/database/database.module.ts b/apps/backend/src/database/database.module.ts
index 9249c3a9..f3f9ed09 100644
--- a/apps/backend/src/database/database.module.ts
+++ b/apps/backend/src/database/database.module.ts
@@ -7,6 +7,7 @@ import { fileURLToPath } from "url";
 import { toStructuredError } from "../common/logging.js";
 import { createPinoExitLogger } from "../common/pino.config.js";
 import type { IAppConfig, IConfig, IDatabaseConfig } from "../config/app.config.js";
+import { AnonRetrieval } from "./entities/anon-retrieval.entity.js";
 import { DataRetentionBaseline } from "./entities/data-retention-baseline.entity.js";
 import { Deal } from "./entities/deal.entity.js";
 import { JobScheduleState } from "./entities/job-schedule-state.entity.js";
@@ -49,7 +50,7 @@ function toSafeDataSourceContext(options: DataSourceOptions): Record<string, unk
           password: dbConfig.password,
           database: dbConfig.database,
           poolSize: dbConfig.poolMax,
-          entities: [DataRetentionBaseline, Deal, StorageProvider, Retrieval, JobScheduleState],
+          entities: [AnonRetrieval, DataRetentionBaseline, Deal, StorageProvider, Retrieval, JobScheduleState],
           migrations: [join(__dirname, "migrations", "*.{js,ts}")],
           migrationsRun: runMigrations,
           migrationsTransactionMode: "each",
@@ -81,9 +82,9 @@ function toSafeDataSourceContext(options: DataSourceOptions): Record<string, unk
         }
       },
     }),
-    TypeOrmModule.forFeature([Deal, StorageProvider, Retrieval, JobScheduleState]),
+    TypeOrmModule.forFeature([AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState]),
   ],
-  providers: [Deal, StorageProvider, Retrieval, JobScheduleState],
-  exports: [Deal, StorageProvider, Retrieval, JobScheduleState],
+  providers: [AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState],
+  exports: [AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState],
 })
 export class DatabaseModule {}
diff --git a/apps/backend/src/database/entities/anon-retrieval.entity.ts b/apps/backend/src/database/entities/anon-retrieval.entity.ts
new file mode 100644
index 00000000..3653600f
--- /dev/null
+++ b/apps/backend/src/database/entities/anon-retrieval.entity.ts
@@ -0,0 +1,120 @@
+import { Column, CreateDateColumn, Entity, Index, PrimaryGeneratedColumn } from "typeorm";
+import { BigIntColumn } from "../helpers/bigint-column.js";
+import { IpniCheckStatus, PieceFetchStatus, ServiceType } from "../types.js";
+
+@Entity("anon_retrievals")
+@Index(["spAddress", "startedAt"])
+@Index(["startedAt"])
+export class AnonRetrieval {
+  @PrimaryGeneratedColumn("uuid")
+  id!: string;
+
+  @Column({ name: "started_at", type: "timestamptz" })
+  startedAt!: Date;
+
+  @Column({ name: "probe_location" })
+  probeLocation!: string;
+
+  @Column({ name: "sp_address" })
+  spAddress!: string;
+
+  @BigIntColumn({ name: "sp_id", nullable: true })
+  spId: bigint | null;
+
+  @Column({ name: "sp_name", type: "varchar", nullable: true })
+  spName: string | null;
+
+  @Column({ name: "piece_cid" })
+  pieceCid!: string;
+
+  @BigIntColumn({ name: "data_set_id" })
+  dataSetId!: bigint;
+
+  @BigIntColumn({ name: "piece_id" })
+  pieceId!: bigint;
+
+  @BigIntColumn({ name: "raw_size" })
+  rawSize!: bigint;
+
+  @Column({ name: "with_ipfs_indexing", type: "boolean" })
+  withIpfsIndexing!: boolean;
+
+  @Column({ name: "ipfs_root_cid", type: "varchar", nullable: true })
+  ipfsRootCid: string | null;
+
+  @Column({
+    name: "service_type",
+    type: "enum",
+    enum: ServiceType,
+    default: ServiceType.DIRECT_SP,
+  })
+  serviceType!: ServiceType;
+
+  @Column({ name: "retrieval_endpoint", type: "varchar" })
+  retrievalEndpoint!: string;
+
+  @Column({
+    name: "piece_fetch_status",
+    type: "enum",
+    enum: PieceFetchStatus,
+  })
+  pieceFetchStatus!: PieceFetchStatus;
+
+  @Column({ name: "http_response_code", type: "int", nullable: true })
+  httpResponseCode: number | null;
+
+  @Column({ name: "first_byte_ms", type: "double precision", nullable: true })
+  firstByteMs: number | null;
+
+  @Column({ name: "last_byte_ms", type: "double precision", nullable: true })
+  lastByteMs: number | null;
+
+  @BigIntColumn({ name: "bytes_retrieved", nullable: true })
+  bytesRetrieved: bigint | null;
+
+  @BigIntColumn({ name: "throughput_bps", nullable: true })
+  throughputBps: bigint | null;
+
+  @Column({ name: "commp_valid", type: "boolean", nullable: true })
+  commpValid: boolean | null;
+
+  @Column({ name: "car_parseable", type: "boolean", nullable: true })
+  carParseable: boolean | null;
+
+  @Column({ name: "car_block_count", type: "int", nullable: true })
+  carBlockCount: number | null;
+
+  @Column({ name: "block_fetch_endpoint", type: "varchar", nullable: true })
+  blockFetchEndpoint: string | null;
+
+  @Column({ name: "block_fetch_valid", type: "boolean", nullable: true })
+  blockFetchValid: boolean | null;
+
+  @Column({ name: "block_fetch_sampled_count", type: "int", nullable: true })
+  blockFetchSampledCount: number | null;
+
+  @Column({ name: "block_fetch_failed_count", type: "int", nullable: true })
+  blockFetchFailedCount: number | null;
+
+  @Column({
+    name: "ipni_status",
+    type: "enum",
+    enum: IpniCheckStatus,
+  })
+  ipniStatus!: IpniCheckStatus;
+
+  @Column({ name: "ipni_verify_ms", type: "double precision", nullable: true })
+  ipniVerifyMs: number | null;
+
+  @Column({ name: "ipni_verified_cids_count", type: "int", nullable: true })
+  ipniVerifiedCidsCount: number | null;
+
+  @Column({ name: "ipni_unverified_cids_count", type: "int", nullable: true })
+  ipniUnverifiedCidsCount: number | null;
+
+  @Column({ name: "error_message", type: "varchar", nullable: true })
+  errorMessage: string | null;
+
+  @CreateDateColumn({ name: "created_at", type: "timestamptz" })
+  createdAt!: Date;
+}
diff --git a/apps/backend/src/database/migrations/1776300000000-CreateAnonRetrievals.ts b/apps/backend/src/database/migrations/1776300000000-CreateAnonRetrievals.ts
new file mode 100644
index 00000000..b1c8f440
--- /dev/null
+++ b/apps/backend/src/database/migrations/1776300000000-CreateAnonRetrievals.ts
@@ -0,0 +1,72 @@
+import type { MigrationInterface, QueryRunner } from "typeorm";
+
+export class CreateAnonRetrievals1776300000000 implements MigrationInterface {
+  name = "CreateAnonRetrievals1776300000000";
+
+  public async up(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`
+      CREATE TYPE anon_retrievals_piece_fetch_status_enum AS ENUM ('success', 'failed')
+    `);
+    await queryRunner.query(`
+      CREATE TYPE anon_retrievals_ipni_status_enum AS ENUM ('valid', 'invalid', 'skipped', 'error')
+    `);
+    await queryRunner.query(`
+      CREATE TYPE anon_retrievals_service_type_enum AS ENUM ('direct_sp', 'ipfs_pin')
+    `);
+
+    await queryRunner.query(`
+      CREATE TABLE IF NOT EXISTS anon_retrievals (
+        id UUID NOT NULL PRIMARY KEY DEFAULT gen_random_uuid(),
+        started_at TIMESTAMPTZ NOT NULL,
+        probe_location VARCHAR NOT NULL,
+        sp_address VARCHAR NOT NULL,
+        sp_id BIGINT,
+        sp_name VARCHAR,
+        piece_cid VARCHAR NOT NULL,
+        data_set_id BIGINT NOT NULL,
+        piece_id BIGINT NOT NULL,
+        raw_size BIGINT NOT NULL,
+        with_ipfs_indexing BOOLEAN NOT NULL,
+        ipfs_root_cid VARCHAR,
+        service_type anon_retrievals_service_type_enum NOT NULL DEFAULT 'direct_sp',
+        retrieval_endpoint VARCHAR NOT NULL,
+        piece_fetch_status anon_retrievals_piece_fetch_status_enum NOT NULL,
+        http_response_code INTEGER,
+        first_byte_ms DOUBLE PRECISION,
+        last_byte_ms DOUBLE PRECISION,
+        bytes_retrieved BIGINT,
+        throughput_bps BIGINT,
+        commp_valid BOOLEAN,
+        car_parseable BOOLEAN,
+        car_block_count INTEGER,
+        block_fetch_endpoint VARCHAR,
+        block_fetch_valid BOOLEAN,
+        block_fetch_sampled_count INTEGER,
+        block_fetch_failed_count INTEGER,
+        ipni_status anon_retrievals_ipni_status_enum NOT NULL,
+        ipni_verify_ms DOUBLE PRECISION,
+        ipni_verified_cids_count INTEGER,
+        ipni_unverified_cids_count INTEGER,
+        error_message VARCHAR,
+        created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+      )
+    `);
+
+    await queryRunner.query(`
+      CREATE INDEX IF NOT EXISTS "IDX_anon_retrievals_sp_address_started_at"
+      ON anon_retrievals (sp_address, started_at)
+    `);
+
+    await queryRunner.query(`
+      CREATE INDEX IF NOT EXISTS "IDX_anon_retrievals_started_at"
+      ON anon_retrievals (started_at)
+    `);
+  }
+
+  public async down(queryRunner: QueryRunner): Promise<void> {
+    await queryRunner.query(`DROP TABLE IF EXISTS anon_retrievals CASCADE`);
+    await queryRunner.query(`DROP TYPE IF EXISTS anon_retrievals_service_type_enum`);
+    await queryRunner.query(`DROP TYPE IF EXISTS anon_retrievals_ipni_status_enum`);
+    await queryRunner.query(`DROP TYPE IF EXISTS anon_retrievals_piece_fetch_status_enum`);
+  }
+}
diff --git a/apps/backend/src/database/types.ts b/apps/backend/src/database/types.ts
index 46fd5d28..e09d1dd3 100644
--- a/apps/backend/src/database/types.ts
+++ b/apps/backend/src/database/types.ts
@@ -28,6 +28,18 @@ export enum IpniStatus {
   FAILED = "failed",
 }
 
+export enum PieceFetchStatus {
+  SUCCESS = "success",
+  FAILED = "failed",
+}
+
+export enum IpniCheckStatus {
+  VALID = "valid",
+  INVALID = "invalid",
+  SKIPPED = "skipped",
+  ERROR = "error",
+}
+
 /**
  * Metadata schema for deal storage and retrieval
  */
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
index b5f17c57..4f775150 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
@@ -1,8 +1,9 @@
 import type { Repository } from "typeorm";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import type { ClickhouseService } from "../clickhouse/clickhouse.service.js";
+import type { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
 import type { StorageProvider } from "../database/entities/storage-provider.entity.js";
-import { RetrievalStatus } from "../database/types.js";
+import { IpniCheckStatus, PieceFetchStatus } from "../database/types.js";
 import type { AnonRetrievalCheckMetrics } from "../metrics-prometheus/check-metrics.service.js";
 import type { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
 import type { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
@@ -26,7 +27,7 @@ const PIECE = {
 function makeProvider(): StorageProvider {
   return {
     address: SP_ADDRESS,
-    providerId: 7,
+    providerId: 7n,
     name: "sp-test",
     isApproved: true,
   } as unknown as StorageProvider;
@@ -39,6 +40,7 @@ function makeService(opts: {
   piece?: AnonPiece | null;
   carResult?: CarValidationResult;
   validateCarImpl?: () => Promise<CarValidationResult>;
+  saveImpl?: (entity: AnonRetrieval) => Promise<AnonRetrieval>;
 }): {
   service: AnonRetrievalService;
   insertSpy: ReturnType<typeof vi.fn>;
@@ -47,6 +49,7 @@ function makeService(opts: {
   metricsRecordStatusSpy: ReturnType<typeof vi.fn>;
   metricsRecordIpniSpy: ReturnType<typeof vi.fn>;
   metricsRecordBlockFetchSpy: ReturnType<typeof vi.fn>;
+  saveSpy: ReturnType<typeof vi.fn>;
 } {
   const insertSpy = vi.fn();
   const clickhouseService = {
@@ -59,6 +62,11 @@ function makeService(opts: {
     findOne: vi.fn(async () => makeProvider()),
   } as unknown as Repository<StorageProvider>;
 
+  const saveSpy = vi.fn(opts.saveImpl ?? (async (entity: AnonRetrieval) => entity));
+  const anonRetrievalRepository = {
+    save: saveSpy,
+  } as unknown as Repository<AnonRetrieval>;
+
   const anonPieceSelector = {
     selectPieceForProvider: vi.fn(async () => (opts.piece === null ? null : (opts.piece ?? PIECE))),
   } as unknown as AnonPieceSelectorService;
@@ -100,6 +108,7 @@ function makeService(opts: {
     metrics,
     clickhouseService,
     spRepository,
+    anonRetrievalRepository,
   );
 
   return {
@@ -110,6 +119,7 @@ function makeService(opts: {
     metricsRecordStatusSpy,
     metricsRecordIpniSpy,
     metricsRecordBlockFetchSpy,
+    saveSpy,
   };
 }
 
@@ -118,7 +128,7 @@ describe("AnonRetrievalService", () => {
     vi.clearAllMocks();
   });
 
-  it("emits a ClickHouse row with partial metrics when fetchPiece returns aborted=true", async () => {
+  it("persists a Postgres row with partial metrics when fetchPiece returns aborted=true", async () => {
     const partial: PieceRetrievalResult = {
       success: false,
       pieceCid: PIECE.pieceCid,
@@ -133,41 +143,59 @@ describe("AnonRetrievalService", () => {
       aborted: true,
     };
 
-    const { service, insertSpy } = makeService({ pieceResult: partial });
+    const { service, saveSpy, insertSpy } = makeService({ pieceResult: partial });
 
     await service.performForProvider(SP_ADDRESS);
 
+    expect(saveSpy).toHaveBeenCalledTimes(1);
+    const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
+    expect(entity.pieceFetchStatus).toBe(PieceFetchStatus.FAILED);
+    expect(entity.bytesRetrieved).toBe(524288n);
+    expect(entity.firstByteMs).toBe(150);
+    expect(entity.lastByteMs).toBe(42000);
+    expect(entity.throughputBps).toBe(12500n);
+    expect(entity.httpResponseCode).toBe(200);
+    expect(entity.errorMessage).toContain("Anon retrieval job timeout");
+    expect(entity.pieceCid).toBe(PIECE.pieceCid);
+    expect(entity.spAddress).toBe(SP_ADDRESS);
+    expect(entity.spId).toBe(7n);
+    expect(entity.probeLocation).toBe("test-location");
+    expect(entity.retrievalEndpoint).toBe(`https://sp.test/piece/${PIECE.pieceCid}`);
+    expect(typeof entity.id).toBe("string");
+
+    // CAR/IPNI/block-fetch were never run on a non-IPFS-indexed piece.
+    expect(entity.carParseable).toBeNull();
+    expect(entity.carBlockCount).toBeNull();
+    expect(entity.blockFetchEndpoint).toBeNull();
+    expect(entity.blockFetchValid).toBeNull();
+    expect(entity.blockFetchSampledCount).toBeNull();
+    expect(entity.blockFetchFailedCount).toBeNull();
+    expect(entity.ipniStatus).toBe(IpniCheckStatus.SKIPPED);
+
+    // ClickHouse mirror is also written.
     expect(insertSpy).toHaveBeenCalledTimes(1);
     const [table, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
     expect(table).toBe("anon_retrieval_checks");
-    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
+    expect(row.retrieval_id).toBe(entity.id);
+    expect(row.piece_fetch_status).toBe(PieceFetchStatus.FAILED);
     expect(row.bytes_retrieved).toBe(524288);
     expect(row.first_byte_ms).toBe(150);
     expect(row.last_byte_ms).toBe(42000);
     expect(row.throughput_bps).toBe(12500);
     expect(row.http_response_code).toBe(200);
-    expect(row.error_message).toContain("Anon retrieval job timeout");
-    expect(row.piece_cid).toBe(PIECE.pieceCid);
-    expect(row.sp_address).toBe(SP_ADDRESS);
-    expect(row.sp_id).toBe(7);
-    expect(row.probe_location).toBe("test-location");
-    expect(typeof row.retrieval_id).toBe("string");
-
-    // CAR/IPNI/block-fetch were never run on a non-IPFS-indexed piece — every
-    // dimension column should explicitly say "skipped" (ipni_status) or null.
-    expect(row.car_parseable).toBeNull();
-    expect(row.car_block_count).toBeNull();
-    expect(row.block_fetch_endpoint).toBeNull();
-    expect(row.block_fetch_valid).toBeNull();
-    expect(row.block_fetch_sampled_count).toBeNull();
-    expect(row.block_fetch_failed_count).toBeNull();
-    expect(row.ipni_status).toBe("skipped");
-    expect(row.ipni_verify_ms).toBeNull();
-    expect(row.ipni_verified_cids_count).toBeNull();
-    expect(row.ipni_unverified_cids_count).toBeNull();
+    expect(row.ipni_status).toBe(IpniCheckStatus.SKIPPED);
+
+    // Trimmed CH columns must NOT appear (they live only in Postgres).
+    expect(row).not.toHaveProperty("piece_cid");
+    expect(row).not.toHaveProperty("data_set_id");
+    expect(row).not.toHaveProperty("piece_id");
+    expect(row).not.toHaveProperty("ipfs_root_cid");
+    expect(row).not.toHaveProperty("retrieval_endpoint");
+    expect(row).not.toHaveProperty("block_fetch_endpoint");
+    expect(row).not.toHaveProperty("error_message");
   });
 
-  it("still emits a row when the signal aborts before fetchPiece runs", async () => {
+  it("still persists when the signal aborts before fetchPiece runs", async () => {
     const ac = new AbortController();
     ac.abort(new Error("Anon retrieval job timeout (60s) for sp1"));
 
@@ -183,20 +211,21 @@ describe("AnonRetrievalService", () => {
       commPValid: false,
     };
 
-    const { service, insertSpy, fetchSpy } = makeService({ pieceResult: never });
+    const { service, saveSpy, insertSpy, fetchSpy } = makeService({ pieceResult: never });
 
     await service.performForProvider(SP_ADDRESS, ac.signal);
 
     expect(fetchSpy).not.toHaveBeenCalled();
+    expect(saveSpy).toHaveBeenCalledTimes(1);
+    const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
+    expect(entity.pieceFetchStatus).toBe(PieceFetchStatus.FAILED);
+    expect(entity.errorMessage).toContain("Anon retrieval job timeout");
+    expect(entity.bytesRetrieved).toBeNull();
+    expect(entity.firstByteMs).toBeNull();
     expect(insertSpy).toHaveBeenCalledTimes(1);
-    const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
-    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
-    expect(row.error_message).toContain("Anon retrieval job timeout");
-    expect(row.bytes_retrieved).toBeNull();
-    expect(row.first_byte_ms).toBeNull();
   });
 
-  it("still emits a row when fetchPiece throws unexpectedly", async () => {
+  it("still persists when fetchPiece throws unexpectedly", async () => {
     const never: PieceRetrievalResult = {
       success: false,
       pieceCid: PIECE.pieceCid,
@@ -209,7 +238,7 @@ describe("AnonRetrievalService", () => {
       commPValid: false,
     };
 
-    const { service, insertSpy } = makeService({
+    const { service, saveSpy } = makeService({
       pieceResult: never,
       fetchPieceImpl: async () => {
         throw new Error("network down");
@@ -218,12 +247,12 @@ describe("AnonRetrievalService", () => {
 
     await expect(service.performForProvider(SP_ADDRESS)).rejects.toThrow("network down");
 
-    expect(insertSpy).toHaveBeenCalledTimes(1);
-    const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
-    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
+    expect(saveSpy).toHaveBeenCalledTimes(1);
+    const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
+    expect(entity.pieceFetchStatus).toBe(PieceFetchStatus.FAILED);
   });
 
-  it("skips ClickHouse insert when ClickHouse is disabled", async () => {
+  it("does not throw when Postgres save fails and still attempts the CH insert", async () => {
     const ok: PieceRetrievalResult = {
       success: true,
       pieceCid: PIECE.pieceCid,
@@ -236,11 +265,20 @@ describe("AnonRetrievalService", () => {
       commPValid: true,
     };
 
-    const { service, insertSpy } = makeService({ pieceResult: ok, clickhouseEnabled: false });
+    const { service, saveSpy, insertSpy } = makeService({
+      pieceResult: ok,
+      saveImpl: async () => {
+        throw new Error("connection refused");
+      },
+    });
 
-    await service.performForProvider(SP_ADDRESS);
+    await expect(service.performForProvider(SP_ADDRESS)).resolves.toBeUndefined();
 
-    expect(insertSpy).not.toHaveBeenCalled();
+    expect(saveSpy).toHaveBeenCalledTimes(1);
+    // CH still gets the row keyed by the client-side uuid.
+    expect(insertSpy).toHaveBeenCalledTimes(1);
+    const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+    expect(typeof row.retrieval_id).toBe("string");
   });
 
   describe("with IPFS indexing", () => {
@@ -264,7 +302,7 @@ describe("AnonRetrievalService", () => {
       };
     }
 
-    it("emits populated CAR/IPNI/block-fetch columns when validation fully succeeds", async () => {
+    it("populates CAR/IPNI/block-fetch columns when validation fully succeeds", async () => {
       const carResult: CarValidationResult = {
         carParseable: true,
         blockCount: 42,
@@ -278,7 +316,7 @@ describe("AnonRetrievalService", () => {
         blockFetchEndpoint: "https://sp.test/ipfs/",
       };
 
-      const { service, insertSpy, validateCarSpy } = makeService({
+      const { service, saveSpy, insertSpy, validateCarSpy } = makeService({
         pieceResult: okPiece(Buffer.from("car-bytes")),
         piece: INDEXED_PIECE,
         carResult,
@@ -287,19 +325,24 @@ describe("AnonRetrievalService", () => {
       await service.performForProvider(SP_ADDRESS);
 
       expect(validateCarSpy).toHaveBeenCalledTimes(1);
+      const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
+      expect(entity.pieceFetchStatus).toBe(PieceFetchStatus.SUCCESS);
+      expect(entity.commpValid).toBe(true);
+      expect(entity.carParseable).toBe(true);
+      expect(entity.carBlockCount).toBe(42);
+      expect(entity.blockFetchEndpoint).toBe("https://sp.test/ipfs/");
+      expect(entity.blockFetchValid).toBe(true);
+      expect(entity.blockFetchSampledCount).toBe(5);
+      expect(entity.blockFetchFailedCount).toBe(0);
+      expect(entity.ipniStatus).toBe(IpniCheckStatus.VALID);
+      expect(entity.ipniVerifyMs).toBe(137);
+      expect(entity.ipniVerifiedCidsCount).toBe(6);
+      expect(entity.ipniUnverifiedCidsCount).toBe(0);
+
       const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
-      expect(row.piece_fetch_status).toBe(RetrievalStatus.SUCCESS);
-      expect(row.commp_valid).toBe(true);
+      expect(row.piece_fetch_status).toBe(PieceFetchStatus.SUCCESS);
       expect(row.car_parseable).toBe(true);
-      expect(row.car_block_count).toBe(42);
-      expect(row.block_fetch_endpoint).toBe("https://sp.test/ipfs/");
-      expect(row.block_fetch_valid).toBe(true);
-      expect(row.block_fetch_sampled_count).toBe(5);
-      expect(row.block_fetch_failed_count).toBe(0);
-      expect(row.ipni_status).toBe("valid");
-      expect(row.ipni_verify_ms).toBe(137);
-      expect(row.ipni_verified_cids_count).toBe(6);
-      expect(row.ipni_unverified_cids_count).toBe(0);
+      expect(row.ipni_status).toBe(IpniCheckStatus.VALID);
     });
 
     it("distinguishes IPNI invalid from block-fetch failures with explicit counts", async () => {
@@ -316,7 +359,7 @@ describe("AnonRetrievalService", () => {
         blockFetchEndpoint: "https://sp.test/ipfs/",
       };
 
-      const { service, insertSpy } = makeService({
+      const { service, saveSpy } = makeService({
         pieceResult: okPiece(Buffer.from("car-bytes")),
         piece: INDEXED_PIECE,
         carResult,
@@ -324,24 +367,24 @@ describe("AnonRetrievalService", () => {
 
       await service.performForProvider(SP_ADDRESS);
 
-      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
       // The piece-fetch path still succeeded — failures are surfaced as
       // independent dimensions, not folded into piece_fetch_status.
-      expect(row.piece_fetch_status).toBe(RetrievalStatus.SUCCESS);
-      expect(row.car_parseable).toBe(true);
-      expect(row.ipni_status).toBe("invalid");
-      expect(row.ipni_verified_cids_count).toBe(0);
-      expect(row.ipni_unverified_cids_count).toBe(6);
-      expect(row.block_fetch_valid).toBe(false);
-      expect(row.block_fetch_sampled_count).toBe(5);
-      expect(row.block_fetch_failed_count).toBe(2);
+      expect(entity.pieceFetchStatus).toBe(PieceFetchStatus.SUCCESS);
+      expect(entity.carParseable).toBe(true);
+      expect(entity.ipniStatus).toBe(IpniCheckStatus.INVALID);
+      expect(entity.ipniVerifiedCidsCount).toBe(0);
+      expect(entity.ipniUnverifiedCidsCount).toBe(6);
+      expect(entity.blockFetchValid).toBe(false);
+      expect(entity.blockFetchSampledCount).toBe(5);
+      expect(entity.blockFetchFailedCount).toBe(2);
     });
 
     it("emits ipni_status='error' (not 'skipped') when CAR validation throws on a successful piece", async () => {
       // Distinguishes a real infra outage (e.g. IpniVerificationService down)
       // from a piece that legitimately had no IPFS indexing. Without the
       // distinction, an outage looks like normal non-IPFS volume in dashboards.
-      const { service, insertSpy, metricsRecordIpniSpy, metricsRecordBlockFetchSpy } = makeService({
+      const { service, saveSpy, metricsRecordIpniSpy, metricsRecordBlockFetchSpy } = makeService({
         pieceResult: okPiece(Buffer.from("car-bytes")),
         piece: INDEXED_PIECE,
         validateCarImpl: async () => {
@@ -354,11 +397,11 @@ describe("AnonRetrievalService", () => {
       expect(metricsRecordIpniSpy).toHaveBeenCalledWith(expect.anything(), "error");
       expect(metricsRecordBlockFetchSpy).toHaveBeenCalledWith(expect.anything(), "error");
 
-      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
-      expect(row.ipni_status).toBe("error");
+      const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
+      expect(entity.ipniStatus).toBe(IpniCheckStatus.ERROR);
       // Piece-fetch path itself succeeded — only the validation pipeline failed.
-      expect(row.commp_valid).toBe(true);
-      expect(row.car_parseable).toBeNull();
+      expect(entity.commpValid).toBe(true);
+      expect(entity.carParseable).toBeNull();
     });
 
     it("emits car_parseable=false with skipped IPNI/block-fetch when bytes don't parse as CAR", async () => {
@@ -375,7 +418,7 @@ describe("AnonRetrievalService", () => {
         blockFetchEndpoint: null,
       };
 
-      const { service, insertSpy } = makeService({
+      const { service, saveSpy } = makeService({
         pieceResult: okPiece(Buffer.from("not-a-car")),
         piece: INDEXED_PIECE,
         carResult,
@@ -383,19 +426,19 @@ describe("AnonRetrievalService", () => {
 
       await service.performForProvider(SP_ADDRESS);
 
-      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
-      expect(row.car_parseable).toBe(false);
+      const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
+      expect(entity.carParseable).toBe(false);
       // car_block_count and block_fetch_sampled_count are gated on carParseable
       // so an unparseable CAR doesn't emit a misleading 0.
-      expect(row.car_block_count).toBeNull();
-      expect(row.block_fetch_sampled_count).toBeNull();
-      expect(row.block_fetch_endpoint).toBeNull();
-      expect(row.block_fetch_valid).toBeNull();
-      expect(row.block_fetch_failed_count).toBeNull();
-      expect(row.ipni_status).toBe("skipped");
-      expect(row.ipni_verify_ms).toBeNull();
-      expect(row.ipni_verified_cids_count).toBeNull();
-      expect(row.ipni_unverified_cids_count).toBeNull();
+      expect(entity.carBlockCount).toBeNull();
+      expect(entity.blockFetchSampledCount).toBeNull();
+      expect(entity.blockFetchEndpoint).toBeNull();
+      expect(entity.blockFetchValid).toBeNull();
+      expect(entity.blockFetchFailedCount).toBeNull();
+      expect(entity.ipniStatus).toBe(IpniCheckStatus.SKIPPED);
+      expect(entity.ipniVerifyMs).toBeNull();
+      expect(entity.ipniVerifiedCidsCount).toBeNull();
+      expect(entity.ipniUnverifiedCidsCount).toBeNull();
     });
   });
 });
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index 5343d59a..d8298776 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -4,8 +4,9 @@ import { InjectRepository } from "@nestjs/typeorm";
 import type { Repository } from "typeorm";
 import { ClickhouseService } from "../clickhouse/clickhouse.service.js";
 import { type ProviderJobContext, toStructuredError } from "../common/logging.js";
+import { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
-import { RetrievalStatus, ServiceType } from "../database/types.js";
+import { IpniCheckStatus, PieceFetchStatus, ServiceType } from "../database/types.js";
 import { buildCheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js";
 import { AnonRetrievalCheckMetrics } from "../metrics-prometheus/check-metrics.service.js";
 import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
@@ -29,6 +30,8 @@ export class AnonRetrievalService {
     private readonly clickhouseService: ClickhouseService,
     @InjectRepository(StorageProvider)
     private readonly spRepository: Repository<StorageProvider>,
+    @InjectRepository(AnonRetrieval)
+    private readonly anonRetrievalRepository: Repository<AnonRetrieval>,
   ) {}
 
   async performForProvider(spAddress: string, signal?: AbortSignal, logContext?: ProviderJobContext): Promise<void> {
@@ -137,80 +140,75 @@ export class AnonRetrievalService {
         pieceResult.success ? "success" : pieceResult.aborted ? "failure.aborted" : "failure.http",
       );
     } finally {
-      // Always emit a ClickHouse row — even on abort or unexpected error — so
-      // we never lose the evidence (ttfb, bytes, response code) we already
-      // collected.
+      // Always persist a row — even on abort or unexpected error — so we never
+      // lose the evidence (ttfb, bytes, response code) we already collected.
       const finalPieceResult = pieceResult ?? buildAbortedPlaceholder(piece.pieceCid, signal?.reason);
-      const retrievalId = randomUUID();
+      const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
+      const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
+      const retrievalEndpoint = `${spBaseUrl}/piece/${piece.pieceCid}`;
+      const pieceFetchStatus = finalPieceResult.success ? PieceFetchStatus.SUCCESS : PieceFetchStatus.FAILED;
+      const ipniStatus: IpniCheckStatus = !validatedCarPiece
+        ? IpniCheckStatus.SKIPPED
+        : carResult
+          ? ipniStatusFromResult(carResult)
+          : IpniCheckStatus.ERROR;
 
-      if (this.clickhouseService.enabled) {
-        const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
-        const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
-        const pieceFetchStatus = finalPieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
-        const ipniStatus = !validatedCarPiece ? "skipped" : carResult ? ipniStatusFromResult(carResult) : "error";
+      const entity: AnonRetrieval = {
+        id: randomUUID(),
+        createdAt: startedAt,
+        startedAt,
+        probeLocation: this.clickhouseService.probeLocation,
+        spAddress,
+        spId: provider?.providerId ?? null,
+        spName: provider?.name ?? null,
+        pieceCid: piece.pieceCid,
+        dataSetId: BigInt(piece.dataSetId),
+        pieceId: BigInt(piece.pieceId),
+        rawSize: BigInt(piece.rawSize),
+        withIpfsIndexing: piece.withIPFSIndexing,
+        ipfsRootCid: piece.ipfsRootCid,
+        serviceType: ServiceType.DIRECT_SP,
+        retrievalEndpoint,
+        pieceFetchStatus,
+        httpResponseCode: finalPieceResult.statusCode > 0 ? finalPieceResult.statusCode : null,
+        firstByteMs: finalPieceResult.ttfbMs > 0 ? finalPieceResult.ttfbMs : null,
+        lastByteMs: finalPieceResult.latencyMs > 0 ? finalPieceResult.latencyMs : null,
+        bytesRetrieved: finalPieceResult.bytesReceived > 0 ? BigInt(finalPieceResult.bytesReceived) : null,
+        throughputBps: finalPieceResult.throughputBps > 0 ? BigInt(Math.round(finalPieceResult.throughputBps)) : null,
+        commpValid: finalPieceResult.success ? finalPieceResult.commPValid : null,
+        carParseable: carResult ? carResult.carParseable : null,
+        carBlockCount: carResult?.carParseable ? carResult.blockCount : null,
+        blockFetchEndpoint: carResult?.blockFetchEndpoint ?? null,
+        blockFetchValid: carResult ? carResult.blockFetchValid : null,
+        blockFetchSampledCount: carResult?.carParseable ? carResult.sampledCidCount : null,
+        blockFetchFailedCount: carResult?.blockFetchFailedCount ?? null,
+        ipniStatus,
+        ipniVerifyMs: carResult?.ipniVerifyMs ?? null,
+        ipniVerifiedCidsCount: carResult?.ipniVerifiedCidsCount ?? null,
+        ipniUnverifiedCidsCount: carResult?.ipniUnverifiedCidsCount ?? null,
+        errorMessage: finalPieceResult.errorMessage ?? null,
+      };
 
-        try {
-          this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, {
-            timestamp: startedAt.getTime(),
-            probe_location: this.clickhouseService.probeLocation,
-            sp_address: spAddress,
-            sp_id: provider?.providerId != null ? Number(provider.providerId) : null,
-            sp_name: provider?.name ?? null,
-            retrieval_id: retrievalId,
-            piece_cid: piece.pieceCid,
-            data_set_id: piece.dataSetId,
-            piece_id: piece.pieceId,
-            raw_size: piece.rawSize,
-            with_ipfs_indexing: piece.withIPFSIndexing,
-            ipfs_root_cid: piece.ipfsRootCid,
-            service_type: ServiceType.DIRECT_SP,
-            retrieval_endpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
-            piece_fetch_status: pieceFetchStatus,
-            http_response_code: finalPieceResult.statusCode > 0 ? finalPieceResult.statusCode : null,
-            first_byte_ms: finalPieceResult.ttfbMs > 0 ? finalPieceResult.ttfbMs : null,
-            last_byte_ms: finalPieceResult.latencyMs > 0 ? finalPieceResult.latencyMs : null,
-            bytes_retrieved: finalPieceResult.bytesReceived > 0 ? finalPieceResult.bytesReceived : null,
-            throughput_bps: finalPieceResult.throughputBps > 0 ? Math.round(finalPieceResult.throughputBps) : null,
-            commp_valid: finalPieceResult.success ? finalPieceResult.commPValid : null,
-            car_parseable: carResult ? carResult.carParseable : null,
-            car_block_count: carResult?.carParseable ? carResult?.blockCount : null,
-            block_fetch_endpoint: carResult?.blockFetchEndpoint ?? null,
-            block_fetch_valid: carResult ? carResult.blockFetchValid : null,
-            block_fetch_sampled_count: carResult?.carParseable ? carResult?.sampledCidCount : null,
-            block_fetch_failed_count: carResult?.blockFetchFailedCount ?? null,
-            ipni_status: ipniStatus,
-            ipni_verify_ms: carResult?.ipniVerifyMs ?? null,
-            ipni_verified_cids_count: carResult?.ipniVerifiedCidsCount ?? null,
-            ipni_unverified_cids_count: carResult?.ipniUnverifiedCidsCount ?? null,
-            error_message: finalPieceResult.errorMessage ?? null,
-          });
-        } catch (error) {
-          // ClickhouseService.insert is buffered/non-throwing in normal operation, but
-          // guard against unexpected runtime errors so we don't break the probe cycle.
-          this.logger.warn({
-            ...logContext,
-            event: "anon_retrieval_clickhouse_insert_failed",
-            message: "Failed to enqueue anonymous retrieval row to ClickHouse",
-            pieceCid: piece.pieceCid,
-            spAddress,
-            error: toStructuredError(error),
-          });
-        }
-      } else {
-        this.logger.debug({
+      try {
+        await this.anonRetrievalRepository.save(entity);
+      } catch (error) {
+        this.logger.warn({
           ...logContext,
-          event: "anon_retrieval_clickhouse_disabled",
-          message: "ClickHouse disabled — anon retrieval row not emitted",
+          event: "anon_retrieval_save_failed",
+          message: "Failed to persist anonymous retrieval row to Postgres",
           pieceCid: piece.pieceCid,
           spAddress,
+          error: toStructuredError(error),
         });
       }
 
+      this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, toClickhouseRow(entity));
+
       this.logger.log({
         ...logContext,
         event: "anon_retrieval_completed",
         message: "Anonymous retrieval test completed",
-        retrievalId,
+        retrievalId: entity.id,
         pieceCid: piece.pieceCid,
         spAddress,
         success: finalPieceResult.success,
@@ -226,9 +224,53 @@ export class AnonRetrievalService {
   }
 }
 
-function ipniStatusFromResult(result: CarValidationResult): "valid" | "invalid" | "skipped" {
-  if (result.ipniValid === null) return "skipped";
-  return result.ipniValid ? "valid" : "invalid";
+function ipniStatusFromResult(result: CarValidationResult): IpniCheckStatus {
+  switch (result.ipniValid) {
+    case null:
+      return IpniCheckStatus.SKIPPED;
+    case true:
+      return IpniCheckStatus.VALID;
+    case false:
+      return IpniCheckStatus.INVALID;
+    default:
+      throw new Error(`Unexpected IPNI validation result: ${result.ipniValid}`);
+  }
+}
+
+/**
+ * Project an AnonRetrieval entity to the chartable subset stored in ClickHouse.
+ * High-cardinality identifiers (piece_cid, data_set_id, piece_id, ipfs_root_cid),
+ * URLs (retrieval_endpoint, block_fetch_endpoint), and free-text columns
+ * (error_message) are intentionally dropped — they live only in Postgres.
+ */
+function toClickhouseRow(entity: AnonRetrieval): Record<string, unknown> {
+  return {
+    timestamp: entity.startedAt.getTime(),
+    probe_location: entity.probeLocation,
+    sp_address: entity.spAddress,
+    sp_id: entity.spId != null ? Number(entity.spId) : null,
+    sp_name: entity.spName,
+    retrieval_id: entity.id,
+    raw_size: Number(entity.rawSize),
+    with_ipfs_indexing: entity.withIpfsIndexing,
+    service_type: entity.serviceType,
+    piece_fetch_status: entity.pieceFetchStatus,
+    http_response_code: entity.httpResponseCode,
+    first_byte_ms: entity.firstByteMs,
+    last_byte_ms: entity.lastByteMs,
+    bytes_retrieved: entity.bytesRetrieved != null ? Number(entity.bytesRetrieved) : null,
+    throughput_bps: entity.throughputBps != null ? Number(entity.throughputBps) : null,
+    commp_valid: entity.commpValid,
+    car_parseable: entity.carParseable,
+    car_block_count: entity.carBlockCount,
+    block_fetch_valid: entity.blockFetchValid,
+    block_fetch_sampled_count: entity.blockFetchSampledCount,
+    block_fetch_failed_count: entity.blockFetchFailedCount,
+    ipni_status: entity.ipniStatus,
+    ipni_verify_ms: entity.ipniVerifyMs,
+    ipni_verified_cids_count: entity.ipniVerifiedCidsCount,
+    ipni_unverified_cids_count: entity.ipniUnverifiedCidsCount,
+  };
 }
 
 function buildAbortedPlaceholder(pieceCid: string, reason: unknown): PieceRetrievalResult {
diff --git a/apps/backend/src/retrieval-anon/retrieval-anon.module.ts b/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
index c05dcb5f..4e9e38df 100644
--- a/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
+++ b/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
@@ -1,6 +1,7 @@
 import { Module } from "@nestjs/common";
 import { ConfigModule } from "@nestjs/config";
 import { TypeOrmModule } from "@nestjs/typeorm";
+import { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
 import { HttpClientModule } from "../http-client/http-client.module.js";
 import { IpniModule } from "../ipni/ipni.module.js";
@@ -14,7 +15,7 @@ import { PieceRetrievalService } from "./piece-retrieval.service.js";
 @Module({
   imports: [
     ConfigModule,
-    TypeOrmModule.forFeature([StorageProvider]),
+    TypeOrmModule.forFeature([AnonRetrieval, StorageProvider]),
     SubgraphModule,
     WalletSdkModule,
     HttpClientModule,

From 92c40a85fb4798aa74ad03d8490ea4f1e0e62899 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Mon, 4 May 2026 08:12:26 +0200
Subject: [PATCH 11/28] Revert "refactor: store anon retrieval data primarily
 in postgres"

This reverts commit 6824f752b106f8bbd8e443aa2f74f680a8afe4c1.
---
 .../src/clickhouse/clickhouse.schema.ts       |  14 +-
 apps/backend/src/database/database.module.ts  |   9 +-
 .../entities/anon-retrieval.entity.ts         | 120 -----------
 .../1776300000000-CreateAnonRetrievals.ts     |  72 -------
 apps/backend/src/database/types.ts            |  12 --
 .../anon-retrieval.service.spec.ts            | 203 +++++++-----------
 .../retrieval-anon/anon-retrieval.service.ts  | 172 ++++++---------
 .../retrieval-anon/retrieval-anon.module.ts   |   3 +-
 8 files changed, 161 insertions(+), 444 deletions(-)
 delete mode 100644 apps/backend/src/database/entities/anon-retrieval.entity.ts
 delete mode 100644 apps/backend/src/database/migrations/1776300000000-CreateAnonRetrievals.ts

diff --git a/apps/backend/src/clickhouse/clickhouse.schema.ts b/apps/backend/src/clickhouse/clickhouse.schema.ts
index 5a9a805e..e30f6151 100644
--- a/apps/backend/src/clickhouse/clickhouse.schema.ts
+++ b/apps/backend/src/clickhouse/clickhouse.schema.ts
@@ -70,12 +70,17 @@ export function buildMigrations(database: string): string[] {
     sp_id                      Nullable(UInt64),                  -- storage provider numeric id
     sp_name                    Nullable(String),                  -- storage provider name
 
-    retrieval_id               UUID,                              -- per-event correlation id (matches anon_retrievals.id in Postgres)
+    retrieval_id               UUID,                              -- per-event correlation id (log/Prometheus join)
 
+    piece_cid                  String,                            -- piece CID (v2/CommP) sampled from the subgraph
+    data_set_id                UInt64,                            -- on-chain data set id
+    piece_id                   UInt64,                            -- on-chain piece id within the data set
     raw_size                   UInt64,                            -- raw (unpadded) piece size, bytes
     with_ipfs_indexing         Bool,                              -- whether the piece advertises IPNI metadata
+    ipfs_root_cid              Nullable(String),                  -- root CID of the contained DAG; null when not IPFS-indexed
 
     service_type               LowCardinality(String),            -- 'direct_sp' (only mode for anon retrievals today)
+    retrieval_endpoint         String,                            -- URL probed (e.g. {spBaseUrl}/piece/{pieceCid})
 
     piece_fetch_status         LowCardinality(String),            -- 'success' | 'failed' — outcome of GET /piece/<pieceCid> (HTTP 2xx AND CommP match). CAR/IPNI/block-fetch outcomes live in their own columns.
     http_response_code         Nullable(UInt16),                  -- raw HTTP status; null on transport failure
@@ -87,14 +92,17 @@ export function buildMigrations(database: string): string[] {
     commp_valid                Nullable(Bool),                    -- null when retrieval failed before CommP could be hashed
     car_parseable              Nullable(Bool),                    -- null when CAR validation was skipped (no IPFS indexing or piece fetch failed); true if bytes parsed as a CAR
     car_block_count            Nullable(UInt32),                  -- total number of blocks observed inside the CAR; null when skipped or unparseable
+    block_fetch_endpoint       Nullable(String),                  -- gateway base URL probed for block fetch (e.g. {spBaseUrl}/ipfs/); null when skipped
     block_fetch_valid          Nullable(Bool),                    -- null when skipped; true if all sampled blocks fetched + hash-verified
     block_fetch_sampled_count  Nullable(UInt32),                  -- number of blocks sampled and probed via /ipfs/<cid>?format=raw
     block_fetch_failed_count   Nullable(UInt32),                  -- number of sampled blocks that failed (non-2xx, hash mismatch, unsupported codec, or transport error)
 
-    ipni_status                LowCardinality(String),            -- 'valid' | 'invalid' | 'skipped' | 'error'
+    ipni_status                LowCardinality(String),            -- 'valid' | 'invalid' | 'skipped' (mirrors data_storage_checks naming)
     ipni_verify_ms             Nullable(Float64),                 -- IPNI verification duration; null when skipped
     ipni_verified_cids_count   Nullable(UInt32),                  -- CIDs confirmed findable via IPNI
-    ipni_unverified_cids_count Nullable(UInt32)                   -- CIDs checked but not findable
+    ipni_unverified_cids_count Nullable(UInt32),                  -- CIDs checked but not findable
+
+    error_message              Nullable(String)                   -- failure reason; null on success
 ) ENGINE MergeTree()
   PRIMARY KEY (probe_location, sp_address, timestamp)
   PARTITION BY toStartOfMonth(timestamp)
diff --git a/apps/backend/src/database/database.module.ts b/apps/backend/src/database/database.module.ts
index f3f9ed09..9249c3a9 100644
--- a/apps/backend/src/database/database.module.ts
+++ b/apps/backend/src/database/database.module.ts
@@ -7,7 +7,6 @@ import { fileURLToPath } from "url";
 import { toStructuredError } from "../common/logging.js";
 import { createPinoExitLogger } from "../common/pino.config.js";
 import type { IAppConfig, IConfig, IDatabaseConfig } from "../config/app.config.js";
-import { AnonRetrieval } from "./entities/anon-retrieval.entity.js";
 import { DataRetentionBaseline } from "./entities/data-retention-baseline.entity.js";
 import { Deal } from "./entities/deal.entity.js";
 import { JobScheduleState } from "./entities/job-schedule-state.entity.js";
@@ -50,7 +49,7 @@ function toSafeDataSourceContext(options: DataSourceOptions): Record<string, unk
           password: dbConfig.password,
           database: dbConfig.database,
           poolSize: dbConfig.poolMax,
-          entities: [AnonRetrieval, DataRetentionBaseline, Deal, StorageProvider, Retrieval, JobScheduleState],
+          entities: [DataRetentionBaseline, Deal, StorageProvider, Retrieval, JobScheduleState],
           migrations: [join(__dirname, "migrations", "*.{js,ts}")],
           migrationsRun: runMigrations,
           migrationsTransactionMode: "each",
@@ -82,9 +81,9 @@ function toSafeDataSourceContext(options: DataSourceOptions): Record<string, unk
         }
       },
     }),
-    TypeOrmModule.forFeature([AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState]),
+    TypeOrmModule.forFeature([Deal, StorageProvider, Retrieval, JobScheduleState]),
   ],
-  providers: [AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState],
-  exports: [AnonRetrieval, Deal, StorageProvider, Retrieval, JobScheduleState],
+  providers: [Deal, StorageProvider, Retrieval, JobScheduleState],
+  exports: [Deal, StorageProvider, Retrieval, JobScheduleState],
 })
 export class DatabaseModule {}
diff --git a/apps/backend/src/database/entities/anon-retrieval.entity.ts b/apps/backend/src/database/entities/anon-retrieval.entity.ts
deleted file mode 100644
index 3653600f..00000000
--- a/apps/backend/src/database/entities/anon-retrieval.entity.ts
+++ /dev/null
@@ -1,120 +0,0 @@
-import { Column, CreateDateColumn, Entity, Index, PrimaryGeneratedColumn } from "typeorm";
-import { BigIntColumn } from "../helpers/bigint-column.js";
-import { IpniCheckStatus, PieceFetchStatus, ServiceType } from "../types.js";
-
-@Entity("anon_retrievals")
-@Index(["spAddress", "startedAt"])
-@Index(["startedAt"])
-export class AnonRetrieval {
-  @PrimaryGeneratedColumn("uuid")
-  id!: string;
-
-  @Column({ name: "started_at", type: "timestamptz" })
-  startedAt!: Date;
-
-  @Column({ name: "probe_location" })
-  probeLocation!: string;
-
-  @Column({ name: "sp_address" })
-  spAddress!: string;
-
-  @BigIntColumn({ name: "sp_id", nullable: true })
-  spId: bigint | null;
-
-  @Column({ name: "sp_name", type: "varchar", nullable: true })
-  spName: string | null;
-
-  @Column({ name: "piece_cid" })
-  pieceCid!: string;
-
-  @BigIntColumn({ name: "data_set_id" })
-  dataSetId!: bigint;
-
-  @BigIntColumn({ name: "piece_id" })
-  pieceId!: bigint;
-
-  @BigIntColumn({ name: "raw_size" })
-  rawSize!: bigint;
-
-  @Column({ name: "with_ipfs_indexing", type: "boolean" })
-  withIpfsIndexing!: boolean;
-
-  @Column({ name: "ipfs_root_cid", type: "varchar", nullable: true })
-  ipfsRootCid: string | null;
-
-  @Column({
-    name: "service_type",
-    type: "enum",
-    enum: ServiceType,
-    default: ServiceType.DIRECT_SP,
-  })
-  serviceType!: ServiceType;
-
-  @Column({ name: "retrieval_endpoint", type: "varchar" })
-  retrievalEndpoint!: string;
-
-  @Column({
-    name: "piece_fetch_status",
-    type: "enum",
-    enum: PieceFetchStatus,
-  })
-  pieceFetchStatus!: PieceFetchStatus;
-
-  @Column({ name: "http_response_code", type: "int", nullable: true })
-  httpResponseCode: number | null;
-
-  @Column({ name: "first_byte_ms", type: "double precision", nullable: true })
-  firstByteMs: number | null;
-
-  @Column({ name: "last_byte_ms", type: "double precision", nullable: true })
-  lastByteMs: number | null;
-
-  @BigIntColumn({ name: "bytes_retrieved", nullable: true })
-  bytesRetrieved: bigint | null;
-
-  @BigIntColumn({ name: "throughput_bps", nullable: true })
-  throughputBps: bigint | null;
-
-  @Column({ name: "commp_valid", type: "boolean", nullable: true })
-  commpValid: boolean | null;
-
-  @Column({ name: "car_parseable", type: "boolean", nullable: true })
-  carParseable: boolean | null;
-
-  @Column({ name: "car_block_count", type: "int", nullable: true })
-  carBlockCount: number | null;
-
-  @Column({ name: "block_fetch_endpoint", type: "varchar", nullable: true })
-  blockFetchEndpoint: string | null;
-
-  @Column({ name: "block_fetch_valid", type: "boolean", nullable: true })
-  blockFetchValid: boolean | null;
-
-  @Column({ name: "block_fetch_sampled_count", type: "int", nullable: true })
-  blockFetchSampledCount: number | null;
-
-  @Column({ name: "block_fetch_failed_count", type: "int", nullable: true })
-  blockFetchFailedCount: number | null;
-
-  @Column({
-    name: "ipni_status",
-    type: "enum",
-    enum: IpniCheckStatus,
-  })
-  ipniStatus!: IpniCheckStatus;
-
-  @Column({ name: "ipni_verify_ms", type: "double precision", nullable: true })
-  ipniVerifyMs: number | null;
-
-  @Column({ name: "ipni_verified_cids_count", type: "int", nullable: true })
-  ipniVerifiedCidsCount: number | null;
-
-  @Column({ name: "ipni_unverified_cids_count", type: "int", nullable: true })
-  ipniUnverifiedCidsCount: number | null;
-
-  @Column({ name: "error_message", type: "varchar", nullable: true })
-  errorMessage: string | null;
-
-  @CreateDateColumn({ name: "created_at", type: "timestamptz" })
-  createdAt!: Date;
-}
diff --git a/apps/backend/src/database/migrations/1776300000000-CreateAnonRetrievals.ts b/apps/backend/src/database/migrations/1776300000000-CreateAnonRetrievals.ts
deleted file mode 100644
index b1c8f440..00000000
--- a/apps/backend/src/database/migrations/1776300000000-CreateAnonRetrievals.ts
+++ /dev/null
@@ -1,72 +0,0 @@
-import type { MigrationInterface, QueryRunner } from "typeorm";
-
-export class CreateAnonRetrievals1776300000000 implements MigrationInterface {
-  name = "CreateAnonRetrievals1776300000000";
-
-  public async up(queryRunner: QueryRunner): Promise<void> {
-    await queryRunner.query(`
-      CREATE TYPE anon_retrievals_piece_fetch_status_enum AS ENUM ('success', 'failed')
-    `);
-    await queryRunner.query(`
-      CREATE TYPE anon_retrievals_ipni_status_enum AS ENUM ('valid', 'invalid', 'skipped', 'error')
-    `);
-    await queryRunner.query(`
-      CREATE TYPE anon_retrievals_service_type_enum AS ENUM ('direct_sp', 'ipfs_pin')
-    `);
-
-    await queryRunner.query(`
-      CREATE TABLE IF NOT EXISTS anon_retrievals (
-        id UUID NOT NULL PRIMARY KEY DEFAULT gen_random_uuid(),
-        started_at TIMESTAMPTZ NOT NULL,
-        probe_location VARCHAR NOT NULL,
-        sp_address VARCHAR NOT NULL,
-        sp_id BIGINT,
-        sp_name VARCHAR,
-        piece_cid VARCHAR NOT NULL,
-        data_set_id BIGINT NOT NULL,
-        piece_id BIGINT NOT NULL,
-        raw_size BIGINT NOT NULL,
-        with_ipfs_indexing BOOLEAN NOT NULL,
-        ipfs_root_cid VARCHAR,
-        service_type anon_retrievals_service_type_enum NOT NULL DEFAULT 'direct_sp',
-        retrieval_endpoint VARCHAR NOT NULL,
-        piece_fetch_status anon_retrievals_piece_fetch_status_enum NOT NULL,
-        http_response_code INTEGER,
-        first_byte_ms DOUBLE PRECISION,
-        last_byte_ms DOUBLE PRECISION,
-        bytes_retrieved BIGINT,
-        throughput_bps BIGINT,
-        commp_valid BOOLEAN,
-        car_parseable BOOLEAN,
-        car_block_count INTEGER,
-        block_fetch_endpoint VARCHAR,
-        block_fetch_valid BOOLEAN,
-        block_fetch_sampled_count INTEGER,
-        block_fetch_failed_count INTEGER,
-        ipni_status anon_retrievals_ipni_status_enum NOT NULL,
-        ipni_verify_ms DOUBLE PRECISION,
-        ipni_verified_cids_count INTEGER,
-        ipni_unverified_cids_count INTEGER,
-        error_message VARCHAR,
-        created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
-      )
-    `);
-
-    await queryRunner.query(`
-      CREATE INDEX IF NOT EXISTS "IDX_anon_retrievals_sp_address_started_at"
-      ON anon_retrievals (sp_address, started_at)
-    `);
-
-    await queryRunner.query(`
-      CREATE INDEX IF NOT EXISTS "IDX_anon_retrievals_started_at"
-      ON anon_retrievals (started_at)
-    `);
-  }
-
-  public async down(queryRunner: QueryRunner): Promise<void> {
-    await queryRunner.query(`DROP TABLE IF EXISTS anon_retrievals CASCADE`);
-    await queryRunner.query(`DROP TYPE IF EXISTS anon_retrievals_service_type_enum`);
-    await queryRunner.query(`DROP TYPE IF EXISTS anon_retrievals_ipni_status_enum`);
-    await queryRunner.query(`DROP TYPE IF EXISTS anon_retrievals_piece_fetch_status_enum`);
-  }
-}
diff --git a/apps/backend/src/database/types.ts b/apps/backend/src/database/types.ts
index e09d1dd3..46fd5d28 100644
--- a/apps/backend/src/database/types.ts
+++ b/apps/backend/src/database/types.ts
@@ -28,18 +28,6 @@ export enum IpniStatus {
   FAILED = "failed",
 }
 
-export enum PieceFetchStatus {
-  SUCCESS = "success",
-  FAILED = "failed",
-}
-
-export enum IpniCheckStatus {
-  VALID = "valid",
-  INVALID = "invalid",
-  SKIPPED = "skipped",
-  ERROR = "error",
-}
-
 /**
  * Metadata schema for deal storage and retrieval
  */
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
index 4f775150..b5f17c57 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
@@ -1,9 +1,8 @@
 import type { Repository } from "typeorm";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import type { ClickhouseService } from "../clickhouse/clickhouse.service.js";
-import type { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
 import type { StorageProvider } from "../database/entities/storage-provider.entity.js";
-import { IpniCheckStatus, PieceFetchStatus } from "../database/types.js";
+import { RetrievalStatus } from "../database/types.js";
 import type { AnonRetrievalCheckMetrics } from "../metrics-prometheus/check-metrics.service.js";
 import type { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
 import type { AnonPieceSelectorService } from "./anon-piece-selector.service.js";
@@ -27,7 +26,7 @@ const PIECE = {
 function makeProvider(): StorageProvider {
   return {
     address: SP_ADDRESS,
-    providerId: 7n,
+    providerId: 7,
     name: "sp-test",
     isApproved: true,
   } as unknown as StorageProvider;
@@ -40,7 +39,6 @@ function makeService(opts: {
   piece?: AnonPiece | null;
   carResult?: CarValidationResult;
   validateCarImpl?: () => Promise<CarValidationResult>;
-  saveImpl?: (entity: AnonRetrieval) => Promise<AnonRetrieval>;
 }): {
   service: AnonRetrievalService;
   insertSpy: ReturnType<typeof vi.fn>;
@@ -49,7 +47,6 @@ function makeService(opts: {
   metricsRecordStatusSpy: ReturnType<typeof vi.fn>;
   metricsRecordIpniSpy: ReturnType<typeof vi.fn>;
   metricsRecordBlockFetchSpy: ReturnType<typeof vi.fn>;
-  saveSpy: ReturnType<typeof vi.fn>;
 } {
   const insertSpy = vi.fn();
   const clickhouseService = {
@@ -62,11 +59,6 @@ function makeService(opts: {
     findOne: vi.fn(async () => makeProvider()),
   } as unknown as Repository<StorageProvider>;
 
-  const saveSpy = vi.fn(opts.saveImpl ?? (async (entity: AnonRetrieval) => entity));
-  const anonRetrievalRepository = {
-    save: saveSpy,
-  } as unknown as Repository<AnonRetrieval>;
-
   const anonPieceSelector = {
     selectPieceForProvider: vi.fn(async () => (opts.piece === null ? null : (opts.piece ?? PIECE))),
   } as unknown as AnonPieceSelectorService;
@@ -108,7 +100,6 @@ function makeService(opts: {
     metrics,
     clickhouseService,
     spRepository,
-    anonRetrievalRepository,
   );
 
   return {
@@ -119,7 +110,6 @@ function makeService(opts: {
     metricsRecordStatusSpy,
     metricsRecordIpniSpy,
     metricsRecordBlockFetchSpy,
-    saveSpy,
   };
 }
 
@@ -128,7 +118,7 @@ describe("AnonRetrievalService", () => {
     vi.clearAllMocks();
   });
 
-  it("persists a Postgres row with partial metrics when fetchPiece returns aborted=true", async () => {
+  it("emits a ClickHouse row with partial metrics when fetchPiece returns aborted=true", async () => {
     const partial: PieceRetrievalResult = {
       success: false,
       pieceCid: PIECE.pieceCid,
@@ -143,59 +133,41 @@ describe("AnonRetrievalService", () => {
       aborted: true,
     };
 
-    const { service, saveSpy, insertSpy } = makeService({ pieceResult: partial });
+    const { service, insertSpy } = makeService({ pieceResult: partial });
 
     await service.performForProvider(SP_ADDRESS);
 
-    expect(saveSpy).toHaveBeenCalledTimes(1);
-    const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
-    expect(entity.pieceFetchStatus).toBe(PieceFetchStatus.FAILED);
-    expect(entity.bytesRetrieved).toBe(524288n);
-    expect(entity.firstByteMs).toBe(150);
-    expect(entity.lastByteMs).toBe(42000);
-    expect(entity.throughputBps).toBe(12500n);
-    expect(entity.httpResponseCode).toBe(200);
-    expect(entity.errorMessage).toContain("Anon retrieval job timeout");
-    expect(entity.pieceCid).toBe(PIECE.pieceCid);
-    expect(entity.spAddress).toBe(SP_ADDRESS);
-    expect(entity.spId).toBe(7n);
-    expect(entity.probeLocation).toBe("test-location");
-    expect(entity.retrievalEndpoint).toBe(`https://sp.test/piece/${PIECE.pieceCid}`);
-    expect(typeof entity.id).toBe("string");
-
-    // CAR/IPNI/block-fetch were never run on a non-IPFS-indexed piece.
-    expect(entity.carParseable).toBeNull();
-    expect(entity.carBlockCount).toBeNull();
-    expect(entity.blockFetchEndpoint).toBeNull();
-    expect(entity.blockFetchValid).toBeNull();
-    expect(entity.blockFetchSampledCount).toBeNull();
-    expect(entity.blockFetchFailedCount).toBeNull();
-    expect(entity.ipniStatus).toBe(IpniCheckStatus.SKIPPED);
-
-    // ClickHouse mirror is also written.
     expect(insertSpy).toHaveBeenCalledTimes(1);
     const [table, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
     expect(table).toBe("anon_retrieval_checks");
-    expect(row.retrieval_id).toBe(entity.id);
-    expect(row.piece_fetch_status).toBe(PieceFetchStatus.FAILED);
+    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
     expect(row.bytes_retrieved).toBe(524288);
     expect(row.first_byte_ms).toBe(150);
     expect(row.last_byte_ms).toBe(42000);
     expect(row.throughput_bps).toBe(12500);
     expect(row.http_response_code).toBe(200);
-    expect(row.ipni_status).toBe(IpniCheckStatus.SKIPPED);
-
-    // Trimmed CH columns must NOT appear (they live only in Postgres).
-    expect(row).not.toHaveProperty("piece_cid");
-    expect(row).not.toHaveProperty("data_set_id");
-    expect(row).not.toHaveProperty("piece_id");
-    expect(row).not.toHaveProperty("ipfs_root_cid");
-    expect(row).not.toHaveProperty("retrieval_endpoint");
-    expect(row).not.toHaveProperty("block_fetch_endpoint");
-    expect(row).not.toHaveProperty("error_message");
+    expect(row.error_message).toContain("Anon retrieval job timeout");
+    expect(row.piece_cid).toBe(PIECE.pieceCid);
+    expect(row.sp_address).toBe(SP_ADDRESS);
+    expect(row.sp_id).toBe(7);
+    expect(row.probe_location).toBe("test-location");
+    expect(typeof row.retrieval_id).toBe("string");
+
+    // CAR/IPNI/block-fetch were never run on a non-IPFS-indexed piece — every
+    // dimension column should explicitly say "skipped" (ipni_status) or null.
+    expect(row.car_parseable).toBeNull();
+    expect(row.car_block_count).toBeNull();
+    expect(row.block_fetch_endpoint).toBeNull();
+    expect(row.block_fetch_valid).toBeNull();
+    expect(row.block_fetch_sampled_count).toBeNull();
+    expect(row.block_fetch_failed_count).toBeNull();
+    expect(row.ipni_status).toBe("skipped");
+    expect(row.ipni_verify_ms).toBeNull();
+    expect(row.ipni_verified_cids_count).toBeNull();
+    expect(row.ipni_unverified_cids_count).toBeNull();
   });
 
-  it("still persists when the signal aborts before fetchPiece runs", async () => {
+  it("still emits a row when the signal aborts before fetchPiece runs", async () => {
     const ac = new AbortController();
     ac.abort(new Error("Anon retrieval job timeout (60s) for sp1"));
 
@@ -211,21 +183,20 @@ describe("AnonRetrievalService", () => {
       commPValid: false,
     };
 
-    const { service, saveSpy, insertSpy, fetchSpy } = makeService({ pieceResult: never });
+    const { service, insertSpy, fetchSpy } = makeService({ pieceResult: never });
 
     await service.performForProvider(SP_ADDRESS, ac.signal);
 
     expect(fetchSpy).not.toHaveBeenCalled();
-    expect(saveSpy).toHaveBeenCalledTimes(1);
-    const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
-    expect(entity.pieceFetchStatus).toBe(PieceFetchStatus.FAILED);
-    expect(entity.errorMessage).toContain("Anon retrieval job timeout");
-    expect(entity.bytesRetrieved).toBeNull();
-    expect(entity.firstByteMs).toBeNull();
     expect(insertSpy).toHaveBeenCalledTimes(1);
+    const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
+    expect(row.error_message).toContain("Anon retrieval job timeout");
+    expect(row.bytes_retrieved).toBeNull();
+    expect(row.first_byte_ms).toBeNull();
   });
 
-  it("still persists when fetchPiece throws unexpectedly", async () => {
+  it("still emits a row when fetchPiece throws unexpectedly", async () => {
     const never: PieceRetrievalResult = {
       success: false,
       pieceCid: PIECE.pieceCid,
@@ -238,7 +209,7 @@ describe("AnonRetrievalService", () => {
       commPValid: false,
     };
 
-    const { service, saveSpy } = makeService({
+    const { service, insertSpy } = makeService({
       pieceResult: never,
       fetchPieceImpl: async () => {
         throw new Error("network down");
@@ -247,12 +218,12 @@ describe("AnonRetrievalService", () => {
 
     await expect(service.performForProvider(SP_ADDRESS)).rejects.toThrow("network down");
 
-    expect(saveSpy).toHaveBeenCalledTimes(1);
-    const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
-    expect(entity.pieceFetchStatus).toBe(PieceFetchStatus.FAILED);
+    expect(insertSpy).toHaveBeenCalledTimes(1);
+    const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+    expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
   });
 
-  it("does not throw when Postgres save fails and still attempts the CH insert", async () => {
+  it("skips ClickHouse insert when ClickHouse is disabled", async () => {
     const ok: PieceRetrievalResult = {
       success: true,
       pieceCid: PIECE.pieceCid,
@@ -265,20 +236,11 @@ describe("AnonRetrievalService", () => {
       commPValid: true,
     };
 
-    const { service, saveSpy, insertSpy } = makeService({
-      pieceResult: ok,
-      saveImpl: async () => {
-        throw new Error("connection refused");
-      },
-    });
+    const { service, insertSpy } = makeService({ pieceResult: ok, clickhouseEnabled: false });
 
-    await expect(service.performForProvider(SP_ADDRESS)).resolves.toBeUndefined();
+    await service.performForProvider(SP_ADDRESS);
 
-    expect(saveSpy).toHaveBeenCalledTimes(1);
-    // CH still gets the row keyed by the client-side uuid.
-    expect(insertSpy).toHaveBeenCalledTimes(1);
-    const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
-    expect(typeof row.retrieval_id).toBe("string");
+    expect(insertSpy).not.toHaveBeenCalled();
   });
 
   describe("with IPFS indexing", () => {
@@ -302,7 +264,7 @@ describe("AnonRetrievalService", () => {
       };
     }
 
-    it("populates CAR/IPNI/block-fetch columns when validation fully succeeds", async () => {
+    it("emits populated CAR/IPNI/block-fetch columns when validation fully succeeds", async () => {
       const carResult: CarValidationResult = {
         carParseable: true,
         blockCount: 42,
@@ -316,7 +278,7 @@ describe("AnonRetrievalService", () => {
         blockFetchEndpoint: "https://sp.test/ipfs/",
       };
 
-      const { service, saveSpy, insertSpy, validateCarSpy } = makeService({
+      const { service, insertSpy, validateCarSpy } = makeService({
         pieceResult: okPiece(Buffer.from("car-bytes")),
         piece: INDEXED_PIECE,
         carResult,
@@ -325,24 +287,19 @@ describe("AnonRetrievalService", () => {
       await service.performForProvider(SP_ADDRESS);
 
       expect(validateCarSpy).toHaveBeenCalledTimes(1);
-      const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
-      expect(entity.pieceFetchStatus).toBe(PieceFetchStatus.SUCCESS);
-      expect(entity.commpValid).toBe(true);
-      expect(entity.carParseable).toBe(true);
-      expect(entity.carBlockCount).toBe(42);
-      expect(entity.blockFetchEndpoint).toBe("https://sp.test/ipfs/");
-      expect(entity.blockFetchValid).toBe(true);
-      expect(entity.blockFetchSampledCount).toBe(5);
-      expect(entity.blockFetchFailedCount).toBe(0);
-      expect(entity.ipniStatus).toBe(IpniCheckStatus.VALID);
-      expect(entity.ipniVerifyMs).toBe(137);
-      expect(entity.ipniVerifiedCidsCount).toBe(6);
-      expect(entity.ipniUnverifiedCidsCount).toBe(0);
-
       const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
-      expect(row.piece_fetch_status).toBe(PieceFetchStatus.SUCCESS);
+      expect(row.piece_fetch_status).toBe(RetrievalStatus.SUCCESS);
+      expect(row.commp_valid).toBe(true);
       expect(row.car_parseable).toBe(true);
-      expect(row.ipni_status).toBe(IpniCheckStatus.VALID);
+      expect(row.car_block_count).toBe(42);
+      expect(row.block_fetch_endpoint).toBe("https://sp.test/ipfs/");
+      expect(row.block_fetch_valid).toBe(true);
+      expect(row.block_fetch_sampled_count).toBe(5);
+      expect(row.block_fetch_failed_count).toBe(0);
+      expect(row.ipni_status).toBe("valid");
+      expect(row.ipni_verify_ms).toBe(137);
+      expect(row.ipni_verified_cids_count).toBe(6);
+      expect(row.ipni_unverified_cids_count).toBe(0);
     });
 
     it("distinguishes IPNI invalid from block-fetch failures with explicit counts", async () => {
@@ -359,7 +316,7 @@ describe("AnonRetrievalService", () => {
         blockFetchEndpoint: "https://sp.test/ipfs/",
       };
 
-      const { service, saveSpy } = makeService({
+      const { service, insertSpy } = makeService({
         pieceResult: okPiece(Buffer.from("car-bytes")),
         piece: INDEXED_PIECE,
         carResult,
@@ -367,24 +324,24 @@ describe("AnonRetrievalService", () => {
 
       await service.performForProvider(SP_ADDRESS);
 
-      const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
       // The piece-fetch path still succeeded — failures are surfaced as
       // independent dimensions, not folded into piece_fetch_status.
-      expect(entity.pieceFetchStatus).toBe(PieceFetchStatus.SUCCESS);
-      expect(entity.carParseable).toBe(true);
-      expect(entity.ipniStatus).toBe(IpniCheckStatus.INVALID);
-      expect(entity.ipniVerifiedCidsCount).toBe(0);
-      expect(entity.ipniUnverifiedCidsCount).toBe(6);
-      expect(entity.blockFetchValid).toBe(false);
-      expect(entity.blockFetchSampledCount).toBe(5);
-      expect(entity.blockFetchFailedCount).toBe(2);
+      expect(row.piece_fetch_status).toBe(RetrievalStatus.SUCCESS);
+      expect(row.car_parseable).toBe(true);
+      expect(row.ipni_status).toBe("invalid");
+      expect(row.ipni_verified_cids_count).toBe(0);
+      expect(row.ipni_unverified_cids_count).toBe(6);
+      expect(row.block_fetch_valid).toBe(false);
+      expect(row.block_fetch_sampled_count).toBe(5);
+      expect(row.block_fetch_failed_count).toBe(2);
     });
 
     it("emits ipni_status='error' (not 'skipped') when CAR validation throws on a successful piece", async () => {
       // Distinguishes a real infra outage (e.g. IpniVerificationService down)
       // from a piece that legitimately had no IPFS indexing. Without the
       // distinction, an outage looks like normal non-IPFS volume in dashboards.
-      const { service, saveSpy, metricsRecordIpniSpy, metricsRecordBlockFetchSpy } = makeService({
+      const { service, insertSpy, metricsRecordIpniSpy, metricsRecordBlockFetchSpy } = makeService({
         pieceResult: okPiece(Buffer.from("car-bytes")),
         piece: INDEXED_PIECE,
         validateCarImpl: async () => {
@@ -397,11 +354,11 @@ describe("AnonRetrievalService", () => {
       expect(metricsRecordIpniSpy).toHaveBeenCalledWith(expect.anything(), "error");
       expect(metricsRecordBlockFetchSpy).toHaveBeenCalledWith(expect.anything(), "error");
 
-      const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
-      expect(entity.ipniStatus).toBe(IpniCheckStatus.ERROR);
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      expect(row.ipni_status).toBe("error");
       // Piece-fetch path itself succeeded — only the validation pipeline failed.
-      expect(entity.commpValid).toBe(true);
-      expect(entity.carParseable).toBeNull();
+      expect(row.commp_valid).toBe(true);
+      expect(row.car_parseable).toBeNull();
     });
 
     it("emits car_parseable=false with skipped IPNI/block-fetch when bytes don't parse as CAR", async () => {
@@ -418,7 +375,7 @@ describe("AnonRetrievalService", () => {
         blockFetchEndpoint: null,
       };
 
-      const { service, saveSpy } = makeService({
+      const { service, insertSpy } = makeService({
         pieceResult: okPiece(Buffer.from("not-a-car")),
         piece: INDEXED_PIECE,
         carResult,
@@ -426,19 +383,19 @@ describe("AnonRetrievalService", () => {
 
       await service.performForProvider(SP_ADDRESS);
 
-      const entity = saveSpy.mock.calls[0]?.[0] as AnonRetrieval;
-      expect(entity.carParseable).toBe(false);
+      const [, row] = insertSpy.mock.calls[0] as [string, Record<string, unknown>];
+      expect(row.car_parseable).toBe(false);
       // car_block_count and block_fetch_sampled_count are gated on carParseable
       // so an unparseable CAR doesn't emit a misleading 0.
-      expect(entity.carBlockCount).toBeNull();
-      expect(entity.blockFetchSampledCount).toBeNull();
-      expect(entity.blockFetchEndpoint).toBeNull();
-      expect(entity.blockFetchValid).toBeNull();
-      expect(entity.blockFetchFailedCount).toBeNull();
-      expect(entity.ipniStatus).toBe(IpniCheckStatus.SKIPPED);
-      expect(entity.ipniVerifyMs).toBeNull();
-      expect(entity.ipniVerifiedCidsCount).toBeNull();
-      expect(entity.ipniUnverifiedCidsCount).toBeNull();
+      expect(row.car_block_count).toBeNull();
+      expect(row.block_fetch_sampled_count).toBeNull();
+      expect(row.block_fetch_endpoint).toBeNull();
+      expect(row.block_fetch_valid).toBeNull();
+      expect(row.block_fetch_failed_count).toBeNull();
+      expect(row.ipni_status).toBe("skipped");
+      expect(row.ipni_verify_ms).toBeNull();
+      expect(row.ipni_verified_cids_count).toBeNull();
+      expect(row.ipni_unverified_cids_count).toBeNull();
     });
   });
 });
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index d8298776..5343d59a 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -4,9 +4,8 @@ import { InjectRepository } from "@nestjs/typeorm";
 import type { Repository } from "typeorm";
 import { ClickhouseService } from "../clickhouse/clickhouse.service.js";
 import { type ProviderJobContext, toStructuredError } from "../common/logging.js";
-import { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
-import { IpniCheckStatus, PieceFetchStatus, ServiceType } from "../database/types.js";
+import { RetrievalStatus, ServiceType } from "../database/types.js";
 import { buildCheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js";
 import { AnonRetrievalCheckMetrics } from "../metrics-prometheus/check-metrics.service.js";
 import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
@@ -30,8 +29,6 @@ export class AnonRetrievalService {
     private readonly clickhouseService: ClickhouseService,
     @InjectRepository(StorageProvider)
     private readonly spRepository: Repository<StorageProvider>,
-    @InjectRepository(AnonRetrieval)
-    private readonly anonRetrievalRepository: Repository<AnonRetrieval>,
   ) {}
 
   async performForProvider(spAddress: string, signal?: AbortSignal, logContext?: ProviderJobContext): Promise<void> {
@@ -140,75 +137,80 @@ export class AnonRetrievalService {
         pieceResult.success ? "success" : pieceResult.aborted ? "failure.aborted" : "failure.http",
       );
     } finally {
-      // Always persist a row — even on abort or unexpected error — so we never
-      // lose the evidence (ttfb, bytes, response code) we already collected.
+      // Always emit a ClickHouse row — even on abort or unexpected error — so
+      // we never lose the evidence (ttfb, bytes, response code) we already
+      // collected.
       const finalPieceResult = pieceResult ?? buildAbortedPlaceholder(piece.pieceCid, signal?.reason);
-      const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
-      const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
-      const retrievalEndpoint = `${spBaseUrl}/piece/${piece.pieceCid}`;
-      const pieceFetchStatus = finalPieceResult.success ? PieceFetchStatus.SUCCESS : PieceFetchStatus.FAILED;
-      const ipniStatus: IpniCheckStatus = !validatedCarPiece
-        ? IpniCheckStatus.SKIPPED
-        : carResult
-          ? ipniStatusFromResult(carResult)
-          : IpniCheckStatus.ERROR;
+      const retrievalId = randomUUID();
 
-      const entity: AnonRetrieval = {
-        id: randomUUID(),
-        createdAt: startedAt,
-        startedAt,
-        probeLocation: this.clickhouseService.probeLocation,
-        spAddress,
-        spId: provider?.providerId ?? null,
-        spName: provider?.name ?? null,
-        pieceCid: piece.pieceCid,
-        dataSetId: BigInt(piece.dataSetId),
-        pieceId: BigInt(piece.pieceId),
-        rawSize: BigInt(piece.rawSize),
-        withIpfsIndexing: piece.withIPFSIndexing,
-        ipfsRootCid: piece.ipfsRootCid,
-        serviceType: ServiceType.DIRECT_SP,
-        retrievalEndpoint,
-        pieceFetchStatus,
-        httpResponseCode: finalPieceResult.statusCode > 0 ? finalPieceResult.statusCode : null,
-        firstByteMs: finalPieceResult.ttfbMs > 0 ? finalPieceResult.ttfbMs : null,
-        lastByteMs: finalPieceResult.latencyMs > 0 ? finalPieceResult.latencyMs : null,
-        bytesRetrieved: finalPieceResult.bytesReceived > 0 ? BigInt(finalPieceResult.bytesReceived) : null,
-        throughputBps: finalPieceResult.throughputBps > 0 ? BigInt(Math.round(finalPieceResult.throughputBps)) : null,
-        commpValid: finalPieceResult.success ? finalPieceResult.commPValid : null,
-        carParseable: carResult ? carResult.carParseable : null,
-        carBlockCount: carResult?.carParseable ? carResult.blockCount : null,
-        blockFetchEndpoint: carResult?.blockFetchEndpoint ?? null,
-        blockFetchValid: carResult ? carResult.blockFetchValid : null,
-        blockFetchSampledCount: carResult?.carParseable ? carResult.sampledCidCount : null,
-        blockFetchFailedCount: carResult?.blockFetchFailedCount ?? null,
-        ipniStatus,
-        ipniVerifyMs: carResult?.ipniVerifyMs ?? null,
-        ipniVerifiedCidsCount: carResult?.ipniVerifiedCidsCount ?? null,
-        ipniUnverifiedCidsCount: carResult?.ipniUnverifiedCidsCount ?? null,
-        errorMessage: finalPieceResult.errorMessage ?? null,
-      };
+      if (this.clickhouseService.enabled) {
+        const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
+        const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
+        const pieceFetchStatus = finalPieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
+        const ipniStatus = !validatedCarPiece ? "skipped" : carResult ? ipniStatusFromResult(carResult) : "error";
 
-      try {
-        await this.anonRetrievalRepository.save(entity);
-      } catch (error) {
-        this.logger.warn({
+        try {
+          this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, {
+            timestamp: startedAt.getTime(),
+            probe_location: this.clickhouseService.probeLocation,
+            sp_address: spAddress,
+            sp_id: provider?.providerId != null ? Number(provider.providerId) : null,
+            sp_name: provider?.name ?? null,
+            retrieval_id: retrievalId,
+            piece_cid: piece.pieceCid,
+            data_set_id: piece.dataSetId,
+            piece_id: piece.pieceId,
+            raw_size: piece.rawSize,
+            with_ipfs_indexing: piece.withIPFSIndexing,
+            ipfs_root_cid: piece.ipfsRootCid,
+            service_type: ServiceType.DIRECT_SP,
+            retrieval_endpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
+            piece_fetch_status: pieceFetchStatus,
+            http_response_code: finalPieceResult.statusCode > 0 ? finalPieceResult.statusCode : null,
+            first_byte_ms: finalPieceResult.ttfbMs > 0 ? finalPieceResult.ttfbMs : null,
+            last_byte_ms: finalPieceResult.latencyMs > 0 ? finalPieceResult.latencyMs : null,
+            bytes_retrieved: finalPieceResult.bytesReceived > 0 ? finalPieceResult.bytesReceived : null,
+            throughput_bps: finalPieceResult.throughputBps > 0 ? Math.round(finalPieceResult.throughputBps) : null,
+            commp_valid: finalPieceResult.success ? finalPieceResult.commPValid : null,
+            car_parseable: carResult ? carResult.carParseable : null,
+            car_block_count: carResult?.carParseable ? carResult?.blockCount : null,
+            block_fetch_endpoint: carResult?.blockFetchEndpoint ?? null,
+            block_fetch_valid: carResult ? carResult.blockFetchValid : null,
+            block_fetch_sampled_count: carResult?.carParseable ? carResult?.sampledCidCount : null,
+            block_fetch_failed_count: carResult?.blockFetchFailedCount ?? null,
+            ipni_status: ipniStatus,
+            ipni_verify_ms: carResult?.ipniVerifyMs ?? null,
+            ipni_verified_cids_count: carResult?.ipniVerifiedCidsCount ?? null,
+            ipni_unverified_cids_count: carResult?.ipniUnverifiedCidsCount ?? null,
+            error_message: finalPieceResult.errorMessage ?? null,
+          });
+        } catch (error) {
+          // ClickhouseService.insert is buffered/non-throwing in normal operation, but
+          // guard against unexpected runtime errors so we don't break the probe cycle.
+          this.logger.warn({
+            ...logContext,
+            event: "anon_retrieval_clickhouse_insert_failed",
+            message: "Failed to enqueue anonymous retrieval row to ClickHouse",
+            pieceCid: piece.pieceCid,
+            spAddress,
+            error: toStructuredError(error),
+          });
+        }
+      } else {
+        this.logger.debug({
           ...logContext,
-          event: "anon_retrieval_save_failed",
-          message: "Failed to persist anonymous retrieval row to Postgres",
+          event: "anon_retrieval_clickhouse_disabled",
+          message: "ClickHouse disabled — anon retrieval row not emitted",
           pieceCid: piece.pieceCid,
           spAddress,
-          error: toStructuredError(error),
         });
       }
 
-      this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, toClickhouseRow(entity));
-
       this.logger.log({
         ...logContext,
         event: "anon_retrieval_completed",
         message: "Anonymous retrieval test completed",
-        retrievalId: entity.id,
+        retrievalId,
         pieceCid: piece.pieceCid,
         spAddress,
         success: finalPieceResult.success,
@@ -224,53 +226,9 @@ export class AnonRetrievalService {
   }
 }
 
-function ipniStatusFromResult(result: CarValidationResult): IpniCheckStatus {
-  switch (result.ipniValid) {
-    case null:
-      return IpniCheckStatus.SKIPPED;
-    case true:
-      return IpniCheckStatus.VALID;
-    case false:
-      return IpniCheckStatus.INVALID;
-    default:
-      throw new Error(`Unexpected IPNI validation result: ${result.ipniValid}`);
-  }
-}
-
-/**
- * Project an AnonRetrieval entity to the chartable subset stored in ClickHouse.
- * High-cardinality identifiers (piece_cid, data_set_id, piece_id, ipfs_root_cid),
- * URLs (retrieval_endpoint, block_fetch_endpoint), and free-text columns
- * (error_message) are intentionally dropped — they live only in Postgres.
- */
-function toClickhouseRow(entity: AnonRetrieval): Record<string, unknown> {
-  return {
-    timestamp: entity.startedAt.getTime(),
-    probe_location: entity.probeLocation,
-    sp_address: entity.spAddress,
-    sp_id: entity.spId != null ? Number(entity.spId) : null,
-    sp_name: entity.spName,
-    retrieval_id: entity.id,
-    raw_size: Number(entity.rawSize),
-    with_ipfs_indexing: entity.withIpfsIndexing,
-    service_type: entity.serviceType,
-    piece_fetch_status: entity.pieceFetchStatus,
-    http_response_code: entity.httpResponseCode,
-    first_byte_ms: entity.firstByteMs,
-    last_byte_ms: entity.lastByteMs,
-    bytes_retrieved: entity.bytesRetrieved != null ? Number(entity.bytesRetrieved) : null,
-    throughput_bps: entity.throughputBps != null ? Number(entity.throughputBps) : null,
-    commp_valid: entity.commpValid,
-    car_parseable: entity.carParseable,
-    car_block_count: entity.carBlockCount,
-    block_fetch_valid: entity.blockFetchValid,
-    block_fetch_sampled_count: entity.blockFetchSampledCount,
-    block_fetch_failed_count: entity.blockFetchFailedCount,
-    ipni_status: entity.ipniStatus,
-    ipni_verify_ms: entity.ipniVerifyMs,
-    ipni_verified_cids_count: entity.ipniVerifiedCidsCount,
-    ipni_unverified_cids_count: entity.ipniUnverifiedCidsCount,
-  };
+function ipniStatusFromResult(result: CarValidationResult): "valid" | "invalid" | "skipped" {
+  if (result.ipniValid === null) return "skipped";
+  return result.ipniValid ? "valid" : "invalid";
 }
 
 function buildAbortedPlaceholder(pieceCid: string, reason: unknown): PieceRetrievalResult {
diff --git a/apps/backend/src/retrieval-anon/retrieval-anon.module.ts b/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
index 4e9e38df..c05dcb5f 100644
--- a/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
+++ b/apps/backend/src/retrieval-anon/retrieval-anon.module.ts
@@ -1,7 +1,6 @@
 import { Module } from "@nestjs/common";
 import { ConfigModule } from "@nestjs/config";
 import { TypeOrmModule } from "@nestjs/typeorm";
-import { AnonRetrieval } from "../database/entities/anon-retrieval.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
 import { HttpClientModule } from "../http-client/http-client.module.js";
 import { IpniModule } from "../ipni/ipni.module.js";
@@ -15,7 +14,7 @@ import { PieceRetrievalService } from "./piece-retrieval.service.js";
 @Module({
   imports: [
     ConfigModule,
-    TypeOrmModule.forFeature([AnonRetrieval, StorageProvider]),
+    TypeOrmModule.forFeature([StorageProvider]),
     SubgraphModule,
     WalletSdkModule,
     HttpClientModule,

From d4f7d802f93a3c48cec49bc9f145bc28c2815ea3 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Mon, 4 May 2026 08:29:28 +0200
Subject: [PATCH 12/28] refactor(retrieval-anon): introduce IpniCheckStatus
 enum and drop redundant clickhouse-enabled gate

- Replace string literals ("valid"|"invalid"|"skipped"|"error") with
  IpniCheckStatus enum in anon-retrieval.service.ts
- Drop the `if (clickhouseService.enabled)` wrapper around the insert call;
  ClickhouseService.insert is already a no-op when disabled, matching the
  pattern used by other retrieval flows
- Fix outdated ipni_status schema comment to include the 'error' value
---
 .../src/clickhouse/clickhouse.schema.ts       |   2 +-
 apps/backend/src/database/types.ts            |   7 +
 .../anon-retrieval.service.spec.ts            |  23 +--
 .../retrieval-anon/anon-retrieval.service.ts  | 133 +++++++++---------
 4 files changed, 74 insertions(+), 91 deletions(-)

diff --git a/apps/backend/src/clickhouse/clickhouse.schema.ts b/apps/backend/src/clickhouse/clickhouse.schema.ts
index e30f6151..05684154 100644
--- a/apps/backend/src/clickhouse/clickhouse.schema.ts
+++ b/apps/backend/src/clickhouse/clickhouse.schema.ts
@@ -97,7 +97,7 @@ export function buildMigrations(database: string): string[] {
     block_fetch_sampled_count  Nullable(UInt32),                  -- number of blocks sampled and probed via /ipfs/<cid>?format=raw
     block_fetch_failed_count   Nullable(UInt32),                  -- number of sampled blocks that failed (non-2xx, hash mismatch, unsupported codec, or transport error)
 
-    ipni_status                LowCardinality(String),            -- 'valid' | 'invalid' | 'skipped' (mirrors data_storage_checks naming)
+    ipni_status                LowCardinality(String),            -- 'valid' | 'invalid' | 'skipped' | 'error'
     ipni_verify_ms             Nullable(Float64),                 -- IPNI verification duration; null when skipped
     ipni_verified_cids_count   Nullable(UInt32),                  -- CIDs confirmed findable via IPNI
     ipni_unverified_cids_count Nullable(UInt32),                  -- CIDs checked but not findable
diff --git a/apps/backend/src/database/types.ts b/apps/backend/src/database/types.ts
index 46fd5d28..c56b355a 100644
--- a/apps/backend/src/database/types.ts
+++ b/apps/backend/src/database/types.ts
@@ -28,6 +28,13 @@ export enum IpniStatus {
   FAILED = "failed",
 }
 
+export enum IpniCheckStatus {
+  VALID = "valid",
+  INVALID = "invalid",
+  SKIPPED = "skipped",
+  ERROR = "error",
+}
+
 /**
  * Metadata schema for deal storage and retrieval
  */
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
index b5f17c57..c82eed76 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
@@ -35,7 +35,6 @@ function makeProvider(): StorageProvider {
 function makeService(opts: {
   pieceResult: PieceRetrievalResult;
   fetchPieceImpl?: (signal?: AbortSignal) => Promise<PieceRetrievalResult>;
-  clickhouseEnabled?: boolean;
   piece?: AnonPiece | null;
   carResult?: CarValidationResult;
   validateCarImpl?: () => Promise<CarValidationResult>;
@@ -51,7 +50,7 @@ function makeService(opts: {
   const insertSpy = vi.fn();
   const clickhouseService = {
     insert: insertSpy,
-    enabled: opts.clickhouseEnabled ?? true,
+    enabled: true,
     probeLocation: "test-location",
   } as unknown as ClickhouseService;
 
@@ -223,26 +222,6 @@ describe("AnonRetrievalService", () => {
     expect(row.piece_fetch_status).toBe(RetrievalStatus.FAILED);
   });
 
-  it("skips ClickHouse insert when ClickHouse is disabled", async () => {
-    const ok: PieceRetrievalResult = {
-      success: true,
-      pieceCid: PIECE.pieceCid,
-      bytesReceived: 1024,
-      pieceBytes: null,
-      latencyMs: 100,
-      ttfbMs: 10,
-      throughputBps: 10240,
-      statusCode: 200,
-      commPValid: true,
-    };
-
-    const { service, insertSpy } = makeService({ pieceResult: ok, clickhouseEnabled: false });
-
-    await service.performForProvider(SP_ADDRESS);
-
-    expect(insertSpy).not.toHaveBeenCalled();
-  });
-
   describe("with IPFS indexing", () => {
     const INDEXED_PIECE: AnonPiece = {
       ...PIECE,
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index 5343d59a..c1d08c0e 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -5,7 +5,7 @@ import type { Repository } from "typeorm";
 import { ClickhouseService } from "../clickhouse/clickhouse.service.js";
 import { type ProviderJobContext, toStructuredError } from "../common/logging.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
-import { RetrievalStatus, ServiceType } from "../database/types.js";
+import { IpniCheckStatus, RetrievalStatus, ServiceType } from "../database/types.js";
 import { buildCheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js";
 import { AnonRetrievalCheckMetrics } from "../metrics-prometheus/check-metrics.service.js";
 import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
@@ -108,13 +108,17 @@ export class AnonRetrievalService {
           this.metrics.recordIpniStatus(labels, ipniStatusFromResult(carResult));
           this.metrics.recordBlockFetchStatus(
             labels,
-            carResult.blockFetchValid === null ? "skipped" : carResult.blockFetchValid ? "valid" : "invalid",
+            carResult.blockFetchValid === null
+              ? IpniCheckStatus.SKIPPED
+              : carResult.blockFetchValid
+                ? IpniCheckStatus.VALID
+                : IpniCheckStatus.INVALID,
           );
         } catch (error) {
           // Validation was attempted on a successful piece retrieval but threw.
           this.metrics.recordCarParseStatus(labels, false);
-          this.metrics.recordIpniStatus(labels, "error");
-          this.metrics.recordBlockFetchStatus(labels, "error");
+          this.metrics.recordIpniStatus(labels, IpniCheckStatus.ERROR);
+          this.metrics.recordBlockFetchStatus(labels, IpniCheckStatus.ERROR);
           this.logger.warn({
             ...logContext,
             event: "anon_retrieval_car_validation_failed",
@@ -126,8 +130,8 @@ export class AnonRetrievalService {
         }
       } else if (!pieceResult.success) {
         // Piece retrieval failed — IPNI and block fetch were skipped
-        this.metrics.recordIpniStatus(labels, "skipped");
-        this.metrics.recordBlockFetchStatus(labels, "skipped");
+        this.metrics.recordIpniStatus(labels, IpniCheckStatus.SKIPPED);
+        this.metrics.recordBlockFetchStatus(labels, IpniCheckStatus.SKIPPED);
       }
 
       // Overall check duration and status
@@ -139,70 +143,63 @@ export class AnonRetrievalService {
     } finally {
       // Always emit a ClickHouse row — even on abort or unexpected error — so
       // we never lose the evidence (ttfb, bytes, response code) we already
-      // collected.
+      // collected. ClickhouseService.insert is a no-op when disabled.
       const finalPieceResult = pieceResult ?? buildAbortedPlaceholder(piece.pieceCid, signal?.reason);
       const retrievalId = randomUUID();
-
-      if (this.clickhouseService.enabled) {
-        const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
-        const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
-        const pieceFetchStatus = finalPieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
-        const ipniStatus = !validatedCarPiece ? "skipped" : carResult ? ipniStatusFromResult(carResult) : "error";
-
-        try {
-          this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, {
-            timestamp: startedAt.getTime(),
-            probe_location: this.clickhouseService.probeLocation,
-            sp_address: spAddress,
-            sp_id: provider?.providerId != null ? Number(provider.providerId) : null,
-            sp_name: provider?.name ?? null,
-            retrieval_id: retrievalId,
-            piece_cid: piece.pieceCid,
-            data_set_id: piece.dataSetId,
-            piece_id: piece.pieceId,
-            raw_size: piece.rawSize,
-            with_ipfs_indexing: piece.withIPFSIndexing,
-            ipfs_root_cid: piece.ipfsRootCid,
-            service_type: ServiceType.DIRECT_SP,
-            retrieval_endpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
-            piece_fetch_status: pieceFetchStatus,
-            http_response_code: finalPieceResult.statusCode > 0 ? finalPieceResult.statusCode : null,
-            first_byte_ms: finalPieceResult.ttfbMs > 0 ? finalPieceResult.ttfbMs : null,
-            last_byte_ms: finalPieceResult.latencyMs > 0 ? finalPieceResult.latencyMs : null,
-            bytes_retrieved: finalPieceResult.bytesReceived > 0 ? finalPieceResult.bytesReceived : null,
-            throughput_bps: finalPieceResult.throughputBps > 0 ? Math.round(finalPieceResult.throughputBps) : null,
-            commp_valid: finalPieceResult.success ? finalPieceResult.commPValid : null,
-            car_parseable: carResult ? carResult.carParseable : null,
-            car_block_count: carResult?.carParseable ? carResult?.blockCount : null,
-            block_fetch_endpoint: carResult?.blockFetchEndpoint ?? null,
-            block_fetch_valid: carResult ? carResult.blockFetchValid : null,
-            block_fetch_sampled_count: carResult?.carParseable ? carResult?.sampledCidCount : null,
-            block_fetch_failed_count: carResult?.blockFetchFailedCount ?? null,
-            ipni_status: ipniStatus,
-            ipni_verify_ms: carResult?.ipniVerifyMs ?? null,
-            ipni_verified_cids_count: carResult?.ipniVerifiedCidsCount ?? null,
-            ipni_unverified_cids_count: carResult?.ipniUnverifiedCidsCount ?? null,
-            error_message: finalPieceResult.errorMessage ?? null,
-          });
-        } catch (error) {
-          // ClickhouseService.insert is buffered/non-throwing in normal operation, but
-          // guard against unexpected runtime errors so we don't break the probe cycle.
-          this.logger.warn({
-            ...logContext,
-            event: "anon_retrieval_clickhouse_insert_failed",
-            message: "Failed to enqueue anonymous retrieval row to ClickHouse",
-            pieceCid: piece.pieceCid,
-            spAddress,
-            error: toStructuredError(error),
-          });
-        }
-      } else {
-        this.logger.debug({
+      const providerInfo = this.walletSdkService.getProviderInfo(spAddress);
+      const spBaseUrl = providerInfo?.pdp.serviceURL.replace(/\/$/, "") ?? spAddress;
+      const pieceFetchStatus = finalPieceResult.success ? RetrievalStatus.SUCCESS : RetrievalStatus.FAILED;
+      const ipniStatus: IpniCheckStatus = !validatedCarPiece
+        ? IpniCheckStatus.SKIPPED
+        : carResult
+          ? ipniStatusFromResult(carResult)
+          : IpniCheckStatus.ERROR;
+
+      try {
+        this.clickhouseService.insert(ANON_RETRIEVAL_CHECKS_TABLE, {
+          timestamp: startedAt.getTime(),
+          probe_location: this.clickhouseService.probeLocation,
+          sp_address: spAddress,
+          sp_id: provider?.providerId != null ? Number(provider.providerId) : null,
+          sp_name: provider?.name ?? null,
+          retrieval_id: retrievalId,
+          piece_cid: piece.pieceCid,
+          data_set_id: piece.dataSetId,
+          piece_id: piece.pieceId,
+          raw_size: piece.rawSize,
+          with_ipfs_indexing: piece.withIPFSIndexing,
+          ipfs_root_cid: piece.ipfsRootCid,
+          service_type: ServiceType.DIRECT_SP,
+          retrieval_endpoint: `${spBaseUrl}/piece/${piece.pieceCid}`,
+          piece_fetch_status: pieceFetchStatus,
+          http_response_code: finalPieceResult.statusCode > 0 ? finalPieceResult.statusCode : null,
+          first_byte_ms: finalPieceResult.ttfbMs > 0 ? finalPieceResult.ttfbMs : null,
+          last_byte_ms: finalPieceResult.latencyMs > 0 ? finalPieceResult.latencyMs : null,
+          bytes_retrieved: finalPieceResult.bytesReceived > 0 ? finalPieceResult.bytesReceived : null,
+          throughput_bps: finalPieceResult.throughputBps > 0 ? Math.round(finalPieceResult.throughputBps) : null,
+          commp_valid: finalPieceResult.success ? finalPieceResult.commPValid : null,
+          car_parseable: carResult ? carResult.carParseable : null,
+          car_block_count: carResult?.carParseable ? carResult?.blockCount : null,
+          block_fetch_endpoint: carResult?.blockFetchEndpoint ?? null,
+          block_fetch_valid: carResult ? carResult.blockFetchValid : null,
+          block_fetch_sampled_count: carResult?.carParseable ? carResult?.sampledCidCount : null,
+          block_fetch_failed_count: carResult?.blockFetchFailedCount ?? null,
+          ipni_status: ipniStatus,
+          ipni_verify_ms: carResult?.ipniVerifyMs ?? null,
+          ipni_verified_cids_count: carResult?.ipniVerifiedCidsCount ?? null,
+          ipni_unverified_cids_count: carResult?.ipniUnverifiedCidsCount ?? null,
+          error_message: finalPieceResult.errorMessage ?? null,
+        });
+      } catch (error) {
+        // ClickhouseService.insert is buffered/non-throwing in normal operation, but
+        // guard against unexpected runtime errors so we don't break the probe cycle.
+        this.logger.warn({
           ...logContext,
-          event: "anon_retrieval_clickhouse_disabled",
-          message: "ClickHouse disabled — anon retrieval row not emitted",
+          event: "anon_retrieval_clickhouse_insert_failed",
+          message: "Failed to enqueue anonymous retrieval row to ClickHouse",
           pieceCid: piece.pieceCid,
           spAddress,
+          error: toStructuredError(error),
         });
       }
 
@@ -226,9 +223,9 @@ export class AnonRetrievalService {
   }
 }
 
-function ipniStatusFromResult(result: CarValidationResult): "valid" | "invalid" | "skipped" {
-  if (result.ipniValid === null) return "skipped";
-  return result.ipniValid ? "valid" : "invalid";
+function ipniStatusFromResult(result: CarValidationResult): IpniCheckStatus {
+  if (result.ipniValid === null) return IpniCheckStatus.SKIPPED;
+  return result.ipniValid ? IpniCheckStatus.VALID : IpniCheckStatus.INVALID;
 }
 
 function buildAbortedPlaceholder(pieceCid: string, reason: unknown): PieceRetrievalResult {

From ab3748a047415581dbc1aa2ed09651ff4f11d80e Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Mon, 4 May 2026 09:00:59 +0200
Subject: [PATCH 13/28] remove(retrieval-anon): dedup window logic

---
 .../anon-piece-selector.service.spec.ts       | 20 ------------
 .../anon-piece-selector.service.ts            | 31 -------------------
 2 files changed, 51 deletions(-)

diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
index 32d13719..30a04486 100644
--- a/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.spec.ts
@@ -100,26 +100,6 @@ describe("AnonPieceSelectorService", () => {
     expect(result?.pieceCid).toBe(liveCid);
   });
 
-  it("redraws when the first sampled piece was recently selected by this process", async () => {
-    const staleCid = "baga-stale";
-    const freshCid = "baga-fresh";
-
-    const service = new AnonPieceSelectorService(subgraphService, makeConfigService());
-
-    // Prime the in-memory ring buffer by first selecting `staleCid`.
-    sampleAnonPiece.mockResolvedValueOnce(makePiece({ pieceCid: staleCid }));
-    const first = await service.selectPieceForProvider(SP_ADDRESS);
-    expect(first?.pieceCid).toBe(staleCid);
-
-    // Now the second selection should skip `staleCid` and use `freshCid`.
-    sampleAnonPiece
-      .mockResolvedValueOnce(makePiece({ pieceCid: staleCid }))
-      .mockResolvedValueOnce(makePiece({ pieceCid: freshCid }));
-    const second = await service.selectPieceForProvider(SP_ADDRESS);
-
-    expect(second?.pieceCid).toBe(freshCid);
-  });
-
   it("falls back to the opposite pool when the preferred one is empty", async () => {
     // First pool call returns nothing twice (both attempts), second pool succeeds.
     const fresh = makePiece({ pieceCid: "baga-other-pool" });
diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
index 342a4780..0ee51fc7 100644
--- a/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
@@ -7,11 +7,6 @@ import { SubgraphService } from "../subgraph/subgraph.service.js";
 import type { AnonCandidatePiece } from "../subgraph/types.js";
 import type { AnonPiece } from "./types.js";
 
-/**
- * Number of most-recently-tested piece CIDs to exclude from re-selection.
- */
-const RECENT_DEDUP_WINDOW = 500;
-
 /**
  * Piece size buckets, in raw (unpadded) bytes. Weighted sampling across
  * these buckets keeps tests meaningful for bandwidth measurement without
@@ -47,10 +42,6 @@ const IPFS_INDEXED_SAMPLE_RATE = 0.8;
 export class AnonPieceSelectorService {
   private readonly logger = new Logger(AnonPieceSelectorService.name);
 
-  /** Bounded FIFO of recently-selected piece CIDs. Process-local; lost on restart. */
-  private readonly recentlyTested = new Set<string>();
-  private readonly recentlyTestedQueue: string[] = [];
-
   constructor(
     private readonly subgraphService: SubgraphService,
     private readonly configService: ConfigService<IConfig, true>,
@@ -91,7 +82,6 @@ export class AnonPieceSelectorService {
       });
 
       if (piece) {
-        this.rememberRecent(piece.pieceCid);
         this.logger.log({
           event: "anon_piece_selected",
           message: "Selected anonymous piece for retrieval test",
@@ -158,10 +148,6 @@ export class AnonPieceSelectorService {
         continue;
       }
 
-      if (this.recentlyTested.has(piece.pieceCid)) {
-        continue;
-      }
-
       return piece;
     }
 
@@ -179,23 +165,6 @@ export class AnonPieceSelectorService {
     }
     return "medium";
   }
-
-  /** Push a CID into the bounded FIFO; evict the oldest when at capacity. */
-  private rememberRecent(pieceCid: string): void {
-    if (this.recentlyTested.has(pieceCid)) {
-      return;
-    }
-
-    this.recentlyTested.add(pieceCid);
-    this.recentlyTestedQueue.push(pieceCid);
-
-    while (this.recentlyTestedQueue.length > RECENT_DEDUP_WINDOW) {
-      const evicted = this.recentlyTestedQueue.shift();
-      if (evicted !== undefined) {
-        this.recentlyTested.delete(evicted);
-      }
-    }
-  }
 }
 
 /** Uniform-random 32-byte sort key as `0x`-prefixed hex. */

From beffac7be083ae84e56bfba5818a48257d4b4922 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Mon, 4 May 2026 09:09:36 +0200
Subject: [PATCH 14/28] revert(ipni): sequential block CID verification

Context: https://github.com/filecoin-project/filecoin-pin/issues/417
---
 .../src/clickhouse/clickhouse.schema.ts       |   4 +-
 .../src/ipni/ipni-verification.service.ts     | 122 +++++++-----------
 .../anon-retrieval.service.spec.ts            |  16 +--
 .../retrieval-anon/anon-retrieval.service.ts  |   2 -
 .../retrieval-anon/car-validation.service.ts  |  12 +-
 apps/backend/src/retrieval-anon/types.ts      |   2 -
 6 files changed, 53 insertions(+), 105 deletions(-)

diff --git a/apps/backend/src/clickhouse/clickhouse.schema.ts b/apps/backend/src/clickhouse/clickhouse.schema.ts
index 05684154..e8612056 100644
--- a/apps/backend/src/clickhouse/clickhouse.schema.ts
+++ b/apps/backend/src/clickhouse/clickhouse.schema.ts
@@ -97,10 +97,8 @@ export function buildMigrations(database: string): string[] {
     block_fetch_sampled_count  Nullable(UInt32),                  -- number of blocks sampled and probed via /ipfs/<cid>?format=raw
     block_fetch_failed_count   Nullable(UInt32),                  -- number of sampled blocks that failed (non-2xx, hash mismatch, unsupported codec, or transport error)
 
-    ipni_status                LowCardinality(String),            -- 'valid' | 'invalid' | 'skipped' | 'error'
+    ipni_status                LowCardinality(String),            -- 'valid' | 'invalid' | 'skipped' | 'error' — all-or-nothing across the root CID and the sampled child CIDs (filecoin-pin verifies them as a single batch)
     ipni_verify_ms             Nullable(Float64),                 -- IPNI verification duration; null when skipped
-    ipni_verified_cids_count   Nullable(UInt32),                  -- CIDs confirmed findable via IPNI
-    ipni_unverified_cids_count Nullable(UInt32),                  -- CIDs checked but not findable
 
     error_message              Nullable(String)                   -- failure reason; null on success
 ) ENGINE MergeTree()
diff --git a/apps/backend/src/ipni/ipni-verification.service.ts b/apps/backend/src/ipni/ipni-verification.service.ts
index 51fcc8e0..3d7d52f9 100644
--- a/apps/backend/src/ipni/ipni-verification.service.ts
+++ b/apps/backend/src/ipni/ipni-verification.service.ts
@@ -3,7 +3,7 @@ import { PDPProvider } from "filecoin-pin";
 import { waitForIpniProviderResults } from "filecoin-pin/core/utils";
 import { CID } from "multiformats/cid";
 import type { StorageProvider } from "../database/entities/storage-provider.entity.js";
-import type { FailedCID, IPNIVerificationResult } from "../deal-addons/strategies/ipni.types.js";
+import type { IPNIVerificationResult } from "../deal-addons/strategies/ipni.types.js";
 
 export type IpniVerificationInput = {
   rootCid: CID;
@@ -44,6 +44,7 @@ export class IpniVerificationService {
     const expectedProviders = [this.buildExpectedProviderInfo(storageProvider as StorageProviderWithUrl)];
     const timeoutSignal = AbortSignal.timeout(timeoutMs);
     const verificationSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
+    let failureReason = "IPNI did not return expected provider results via filecoin-pin";
 
     this.logger.log({
       event: "ipni_verification_started",
@@ -60,69 +61,56 @@ export class IpniVerificationService {
     });
 
     const ipniVerificationStartTime = Date.now();
-    const cidsToValidate: { cid: CID; isRoot: boolean }[] = [
-      { cid: rootCid, isRoot: true },
-      ...blockCids.map((cid) => ({ cid, isRoot: false })),
-    ];
 
-    let verified = 0;
-    const failedCIDs: FailedCID[] = [];
-    let rootCIDVerified = false;
-
-    // waitForIpniProviderResults is all-or-nothing per call (throws on first failure),
-    // so we invoke it once per CID to get accurate per-CID verified/unverified counts.
-    // The shared verificationSignal bounds total wall-clock time across all CIDs.
-    for (const { cid, isRoot } of cidsToValidate) {
+    const ipniValidated = await waitForIpniProviderResults(rootCid, {
+      childBlocks: blockCids,
+      maxAttempts,
+      delayMs,
+      expectedProviders,
+      signal: verificationSignal,
+    }).catch((error) => {
       if (signal?.aborted) {
         signal.throwIfAborted();
       }
-
       if (verificationSignal.aborted) {
-        failedCIDs.push({ cid: cid.toString(), reason: `IPNI verification timed out after ${timeoutMs}ms` });
-        continue;
-      }
-
-      try {
-        await waitForIpniProviderResults(cid, {
-          maxAttempts,
-          delayMs,
-          expectedProviders,
-          signal: verificationSignal,
-        });
-        verified += 1;
-        if (isRoot) rootCIDVerified = true;
-      } catch (error) {
-        if (signal?.aborted) {
-          signal.throwIfAborted();
-        }
-
-        const reason = verificationSignal.aborted
-          ? `IPNI verification timed out after ${timeoutMs}ms`
-          : error instanceof Error
-            ? error.message
-            : String(error);
-
-        failedCIDs.push({ cid: cid.toString(), reason });
-
-        this.logger.warn({
-          event: "ipni_cid_verification_failed",
-          message: "IPNI verification failed for CID",
-          cid: cid.toString(),
-          isRoot,
+        failureReason = `IPNI verification timed out after ${timeoutMs}ms`;
+        this.logger.error({
+          event: "ipni_verification_timed_out",
+          message: failureReason,
+          rootCID: rootCid.toString(),
           providerAddress: storageProvider.address,
           providerId: storageProvider.providerId,
           providerName: storageProvider.name,
           serviceUrl: storageProvider.serviceUrl,
-          failureReason: reason,
+          blockCIDCount: blockCids.length,
+          timeoutMs,
+          pollIntervalMs: delayMs,
+          maxAttempts,
         });
+        return false;
       }
-    }
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      failureReason = errorMessage;
+      this.logger.error({
+        event: "ipni_verification_failed",
+        message: "IPNI verification failed",
+        rootCID: rootCid.toString(),
+        providerAddress: storageProvider.address,
+        providerId: storageProvider.providerId,
+        providerName: storageProvider.name,
+        serviceUrl: storageProvider.serviceUrl,
+        blockCIDCount: blockCids.length,
+        timeoutMs,
+        pollIntervalMs: delayMs,
+        maxAttempts,
+        failureReason,
+      });
+      return false;
+    });
 
     const ipniVerificationDurationMs = Date.now() - ipniVerificationStartTime;
-    const total = cidsToValidate.length;
-    const unverified = total - verified;
 
-    if (verified === total) {
+    if (ipniValidated) {
       this.logger.log({
         event: "ipni_verification_succeeded",
         message: "IPNI verification succeeded",
@@ -133,32 +121,22 @@ export class IpniVerificationService {
         verifyDurationMs: ipniVerificationDurationMs,
         blockCIDCount: blockCids.length,
       });
-    } else {
-      this.logger.error({
-        event: verificationSignal.aborted ? "ipni_verification_timed_out" : "ipni_verification_failed",
-        message: "IPNI verification did not fully succeed",
-        rootCID: rootCid.toString(),
-        providerAddress: storageProvider.address,
-        providerId: storageProvider.providerId,
-        providerName: storageProvider.name,
-        serviceUrl: storageProvider.serviceUrl,
-        blockCIDCount: blockCids.length,
-        timeoutMs,
-        pollIntervalMs: delayMs,
-        maxAttempts,
-        verified,
-        unverified,
-        total,
-      });
     }
 
     return {
-      verified: verified,
-      unverified: unverified,
-      total: total,
-      rootCIDVerified: rootCIDVerified,
+      verified: ipniValidated ? 1 : 0,
+      unverified: ipniValidated ? 0 : 1,
+      total: 1,
+      rootCIDVerified: ipniValidated,
       durationMs: ipniVerificationDurationMs,
-      failedCIDs: failedCIDs,
+      failedCIDs: ipniValidated
+        ? []
+        : [
+            {
+              cid: rootCid.toString(),
+              reason: failureReason,
+            },
+          ],
       verifiedAt: new Date().toISOString(),
     };
   }
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
index c82eed76..adc75920 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.spec.ts
@@ -162,8 +162,6 @@ describe("AnonRetrievalService", () => {
     expect(row.block_fetch_failed_count).toBeNull();
     expect(row.ipni_status).toBe("skipped");
     expect(row.ipni_verify_ms).toBeNull();
-    expect(row.ipni_verified_cids_count).toBeNull();
-    expect(row.ipni_unverified_cids_count).toBeNull();
   });
 
   it("still emits a row when the signal aborts before fetchPiece runs", async () => {
@@ -250,8 +248,6 @@ describe("AnonRetrievalService", () => {
         sampledCidCount: 5,
         ipniValid: true,
         ipniVerifyMs: 137,
-        ipniVerifiedCidsCount: 6,
-        ipniUnverifiedCidsCount: 0,
         blockFetchValid: true,
         blockFetchFailedCount: 0,
         blockFetchEndpoint: "https://sp.test/ipfs/",
@@ -277,19 +273,15 @@ describe("AnonRetrievalService", () => {
       expect(row.block_fetch_failed_count).toBe(0);
       expect(row.ipni_status).toBe("valid");
       expect(row.ipni_verify_ms).toBe(137);
-      expect(row.ipni_verified_cids_count).toBe(6);
-      expect(row.ipni_unverified_cids_count).toBe(0);
     });
 
-    it("distinguishes IPNI invalid from block-fetch failures with explicit counts", async () => {
+    it("distinguishes IPNI invalid from block-fetch failures", async () => {
       const carResult: CarValidationResult = {
         carParseable: true,
         blockCount: 100,
         sampledCidCount: 5,
         ipniValid: false,
         ipniVerifyMs: 250,
-        ipniVerifiedCidsCount: 0,
-        ipniUnverifiedCidsCount: 6,
         blockFetchValid: false,
         blockFetchFailedCount: 2,
         blockFetchEndpoint: "https://sp.test/ipfs/",
@@ -309,8 +301,6 @@ describe("AnonRetrievalService", () => {
       expect(row.piece_fetch_status).toBe(RetrievalStatus.SUCCESS);
       expect(row.car_parseable).toBe(true);
       expect(row.ipni_status).toBe("invalid");
-      expect(row.ipni_verified_cids_count).toBe(0);
-      expect(row.ipni_unverified_cids_count).toBe(6);
       expect(row.block_fetch_valid).toBe(false);
       expect(row.block_fetch_sampled_count).toBe(5);
       expect(row.block_fetch_failed_count).toBe(2);
@@ -347,8 +337,6 @@ describe("AnonRetrievalService", () => {
         sampledCidCount: 0,
         ipniValid: null,
         ipniVerifyMs: null,
-        ipniVerifiedCidsCount: null,
-        ipniUnverifiedCidsCount: null,
         blockFetchValid: null,
         blockFetchFailedCount: null,
         blockFetchEndpoint: null,
@@ -373,8 +361,6 @@ describe("AnonRetrievalService", () => {
       expect(row.block_fetch_failed_count).toBeNull();
       expect(row.ipni_status).toBe("skipped");
       expect(row.ipni_verify_ms).toBeNull();
-      expect(row.ipni_verified_cids_count).toBeNull();
-      expect(row.ipni_unverified_cids_count).toBeNull();
     });
   });
 });
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index c1d08c0e..25b34e82 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -186,8 +186,6 @@ export class AnonRetrievalService {
           block_fetch_failed_count: carResult?.blockFetchFailedCount ?? null,
           ipni_status: ipniStatus,
           ipni_verify_ms: carResult?.ipniVerifyMs ?? null,
-          ipni_verified_cids_count: carResult?.ipniVerifiedCidsCount ?? null,
-          ipni_unverified_cids_count: carResult?.ipniUnverifiedCidsCount ?? null,
           error_message: finalPieceResult.errorMessage ?? null,
         });
       } catch (error) {
diff --git a/apps/backend/src/retrieval-anon/car-validation.service.ts b/apps/backend/src/retrieval-anon/car-validation.service.ts
index 27ec2744..c3a6c717 100644
--- a/apps/backend/src/retrieval-anon/car-validation.service.ts
+++ b/apps/backend/src/retrieval-anon/car-validation.service.ts
@@ -63,8 +63,6 @@ export class CarValidationService {
         sampledCidCount: 0,
         ipniValid: null,
         ipniVerifyMs: null,
-        ipniVerifiedCidsCount: null,
-        ipniUnverifiedCidsCount: null,
         blockFetchValid: null,
         blockFetchFailedCount: null,
         blockFetchEndpoint: null,
@@ -77,8 +75,6 @@ export class CarValidationService {
         sampledCidCount: 0,
         ipniValid: null,
         ipniVerifyMs: null,
-        ipniVerifiedCidsCount: null,
-        ipniUnverifiedCidsCount: null,
         blockFetchValid: null,
         blockFetchFailedCount: null,
         blockFetchEndpoint: null,
@@ -99,8 +95,6 @@ export class CarValidationService {
       sampledCidCount: sampledBlocks.length,
       ipniValid: ipni.valid,
       ipniVerifyMs: ipni.durationMs,
-      ipniVerifiedCidsCount: ipni.verifiedCount,
-      ipniUnverifiedCidsCount: ipni.unverifiedCount,
       blockFetchValid: blockFetchResult.valid,
       blockFetchFailedCount: blockFetchResult.failedCount,
       blockFetchEndpoint: blockFetchResult.endpoint,
@@ -129,8 +123,6 @@ export class CarValidationService {
   ): Promise<{
     valid: boolean;
     durationMs: number | null;
-    verifiedCount: number | null;
-    unverifiedCount: number | null;
   }> {
     const timeouts = this.configService.get("timeouts", { infer: true });
     let rootCid: CID;
@@ -144,7 +136,7 @@ export class CarValidationService {
         providerAddress: provider.address,
         error: toStructuredError(error),
       });
-      return { valid: false, durationMs: null, verifiedCount: null, unverifiedCount: null };
+      return { valid: false, durationMs: null };
     }
 
     const result = await this.ipniVerificationService.verify({
@@ -159,8 +151,6 @@ export class CarValidationService {
     return {
       valid: result.rootCIDVerified,
       durationMs: result.durationMs,
-      verifiedCount: result.verified,
-      unverifiedCount: result.unverified,
     };
   }
 
diff --git a/apps/backend/src/retrieval-anon/types.ts b/apps/backend/src/retrieval-anon/types.ts
index 3ba2b9f9..9013a5ea 100644
--- a/apps/backend/src/retrieval-anon/types.ts
+++ b/apps/backend/src/retrieval-anon/types.ts
@@ -31,8 +31,6 @@ export type CarValidationResult = {
   sampledCidCount: number;
   ipniValid: boolean | null;
   ipniVerifyMs: number | null;
-  ipniVerifiedCidsCount: number | null;
-  ipniUnverifiedCidsCount: number | null;
   blockFetchValid: boolean | null;
   blockFetchFailedCount: number | null;
   blockFetchEndpoint: string | null;

From f26744b8dfc661e779fae511cdc56a9985942e2c Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Mon, 4 May 2026 11:18:50 +0200
Subject: [PATCH 15/28] docs(retrieval-anon): flow description and metrics
 definitions

---
 .../anon-piece-selector.service.ts            |   6 +-
 docs/checks/README.md                         |   3 +-
 docs/checks/anon-retrievals.md                | 145 ++++++++++++++++++
 docs/checks/events-and-metrics.md             |  20 +++
 4 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 docs/checks/anon-retrievals.md

diff --git a/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
index 0ee51fc7..d354a222 100644
--- a/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-piece-selector.service.ts
@@ -18,13 +18,15 @@ type SizeRange = { min: bigint; max: bigint };
 const MIB = 1024n * 1024n;
 
 // All downloads are buffered in-memory, so we need to keep piece sizes reasonable
+// When changing these values, also update ./docs/checks/anon-retrievals.md#piece-selection
 const SIZE_BUCKETS: Record<SizeBucket, SizeRange> = {
   small: { min: 1n * MIB, max: 20n * MIB - 1n },
   medium: { min: 20n * MIB, max: 100n * MIB - 1n },
   large: { min: 100n * MIB, max: 500n * MIB - 1n },
 };
 
-/** Weights for choosing a bucket per selection. Must sum to 1. */
+// Weights for choosing a bucket per selection. Must sum to 1.
+// When changing these values, also update ./docs/checks/anon-retrievals.md#piece-selection
 const BUCKET_WEIGHTS: Record<SizeBucket, number> = {
   small: 0.2,
   medium: 0.5,
@@ -35,6 +37,8 @@ const BUCKET_WEIGHTS: Record<SizeBucket, number> = {
  * Probability the primary draw targets the withIPFSIndexing pool.
  * The rest of the time we sample across all FWSS pieces, so SPs can't
  * optimise only their CAR corpus.
+ *
+ * When changing this value, also update ./docs/checks/anon-retrievals.md#piece-selection
  */
 const IPFS_INDEXED_SAMPLE_RATE = 0.8;
 
diff --git a/docs/checks/README.md b/docs/checks/README.md
index 74b1a872..136349ee 100644
--- a/docs/checks/README.md
+++ b/docs/checks/README.md
@@ -4,6 +4,7 @@ The files are:
 - [production-configuration-and-approval-methodology.md](./production-configuration-and-approval-methodology.md): Defines the production configuration and approval methodology.
 - [data-storage.md](./data-storage.md): Defines the "data storage check" and how it is calculated.
 - [retrievals.md](./retrievals.md): Defines the "retrieval check" and how it is calculated.
+- [anon-retrievals.md](./anon-retrievals.md): Defines the "anonymous retrieval check" (sampled public pieces, not dealbot-uploaded) and how it is calculated.
 - [data-retention.md](./data-retention.md): Defines the "data retention check" and how it is calculated.
 - [events-and-metrics.md](./events-and-metrics.md): Defines the events and metrics that are used to assess SP performance.
 
@@ -14,7 +15,7 @@ DealBot creates synthetic traffic for SPs in the onchain SP registry and monitor
 
 ## Terminology
 ### Check
-A "check" refers to a task type that dealbot performs on a SP.  We currently have [Data Storage](./data-storage.md) and [Retrieval](./retrievals.md) checks.
+A "check" refers to a task type that dealbot performs on an SP. We currently have [Data Storage](./data-storage.md), [Retrieval](./retrievals.md), [Anonymous Retrieval](./anon-retrievals.md), and [Data Retention](./data-retention.md) checks.
 
 ### Deal
 This is synonym for "Data Storage Check".  This is covered in the [data-storage.md](./data-storage.md).
diff --git a/docs/checks/anon-retrievals.md b/docs/checks/anon-retrievals.md
new file mode 100644
index 00000000..0a303462
--- /dev/null
+++ b/docs/checks/anon-retrievals.md
@@ -0,0 +1,145 @@
+# Anonymous Retrieval Check
+
+This document is the **source of truth** for how dealbot's Anonymous Retrieval check works.
+
+Source code links throughout this document point to the current implementation.
+
+For event and metric definitions to be used by the dashboard, see [Dealbot Events & Metrics](./events-and-metrics.md).
+
+## Overview
+
+The Anonymous Retrieval check (sometimes referred to internally as [retrieval++](https://github.com/FilOzone/dealbot/pull/427)) tests publicly discoverable pieces on a storage provider (pieces that were *not* uploaded by dealbot). The intent is to measure SP retrievability against real-world tenant data, not just dealbot's own corpus.
+
+This is distinct from the [Retrieval check](./retrievals.md), which exercises pieces dealbot itself uploaded as part of a [Data Storage check](./data-storage.md). The Anonymous Retrieval check answers a different question: does the SP serve arbitrary pieces from its broader public corpus, with the same correctness and performance properties as dealbot's controlled pieces?
+
+### Definition of Successful Retrieval
+
+A successful anonymous retrieval requires:
+
+1. **Piece fetch** — `GET {spBaseUrl}/piece/{pieceCid}` returns HTTP 2xx and the response bytes hash to the declared CommP (piece CID).
+
+If the piece advertises IPFS indexing (`withIPFSIndexing = true` and a non-null `ipfsRootCid`), three additional dimensions are validated *independently*. Importantly, they do not gate the overall `piece_fetch_status`, and each is recorded as its own outcome column / metric:
+
+2. **CAR parseable:** the fetched bytes parse as a CAR file.
+3. **IPNI:** the SP is advertised as a provider for the root CID and a sample of child CIDs via filecoinpin.contact.
+4. **Block fetch:** a sample of CIDs from the parsed CAR is re-fetched via `{spBaseUrl}/ipfs/{cid}?format=raw` and each response is hash-verified against its declared CID.
+
+A piece without IPFS indexing is exercised only at step (1).
+
+Operational timeouts exist to prevent jobs from running indefinitely. If the job exceeds `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS`, it is aborted; a row is still emitted so that partial metrics (TTFB, bytes, response code) are not lost.
+
+## Piece Selection
+
+Unlike the [Retrieval check](./retrievals.md#piece-selection), dealbot does not retrieve from its own deals. Pieces are sampled from the on-chain subgraph of all FWSS-served pieces for the SP under test.
+
+Selection strategy (per scheduled job, per SP):
+
+1. **Pick a size bucket** by weighted random:
+   - `small` (1–20 MiB) — 20%
+   - `medium` (20–100 MiB) — 50%
+   - `large` (100–500 MiB) — 30%
+2. **Pick a pool**:
+   - `indexed` (IPFS-indexed pieces) — 80%
+   - `any` (all FWSS pieces) — 20%
+3. **Generate a uniform-random `sampleKey`** and query the subgraph for the smallest `Root.sampleKey ≥ $sampleKey` matching the SP, payer, size range, and pool filters.
+4. **Drop the candidate** if `pdpPaymentEndEpoch` has passed.
+5. **Fall back** through: (same bucket, opposite pool) → (any bucket, indexed) → (any bucket, any).
+
+The 80/20 split for `indexed` vs `any` exists so that SPs cannot optimize only their CAR corpus and still appear healthy on this check.
+
+> [!NOTE]
+> The bucket sizes were chosen such that the whole file will still fit into memory. In the future we may implement a streaming verification and parsing.
+
+Source: [`anon-piece-selector.service.ts`](../../apps/backend/src/retrieval-anon/anon-piece-selector.service.ts)
+
+## What Happens Each Cycle
+
+```mermaid
+flowchart TD
+  Select["Sample anonymous piece for SP from subgraph"] --> Fetch["GET /piece/{pieceCid}"]
+  Fetch --> CommP["Hash bytes → verify CommP"]
+  CommP --> HasIpfs{"piece.withIPFSIndexing<br/>and ipfsRootCid?"}
+  HasIpfs -- "no" --> Record["Persist row + metrics"]
+  HasIpfs -- "yes" --> ParseCar["Parse bytes as CAR"]
+  ParseCar --> SampleBlocks["Pick N random CIDs<br/>(ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT)"]
+  SampleBlocks --> Ipni["IPNI: verify SP advertises root + sampled CIDs"]
+  SampleBlocks --> BlockFetch["GET /ipfs/{cid}?format=raw for each sampled CID"]
+  BlockFetch --> HashCheck["Hash-verify each response against its CID"]
+  Ipni --> Record
+  HashCheck --> Record
+```
+
+### Piece Fetch
+
+- **URL:** `{spBaseUrl}/piece/{pieceCid}` (HTTP/2)
+- **Buffered in memory** — piece sizes are capped at 500 MiB by selection.
+- **Validates CommP** — the CommP of the response bytes must match `pieceCid`.
+
+Source: [`piece-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/piece-retrieval.service.ts)
+
+### CAR Validation (only when piece advertises IPFS indexing)
+
+When the selected piece has `withIPFSIndexing = true` and a non-null `ipfsRootCid`, the fetched bytes are parsed as a CAR and a random sample of `ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT` CIDs is exercised:
+
+- **IPNI check:** `IpniVerificationService.verify(rootCid, sampledCids, sp)` polls filecoinpin.contact until each CID resolves to the SP under test, the timeout fires, or `IPNI_VERIFICATION_TIMEOUT_MS` is reached.
+- **Block fetch check:** for each sampled CID, fetch `{spBaseUrl}/ipfs/{cid}?format=raw` and hash-verify the response against the CID. Non-2xx, hash mismatch, unsupported codec, or transport errors all count as a single failed block.
+
+Source: [`car-validation.service.ts`](../../apps/backend/src/retrieval-anon/car-validation.service.ts)
+
+## What Gets Asserted
+
+| # | Assertion | How It's Checked | Retries | Relevant Metric | Implemented? |
+|---|-----------|------------------|:---:|------------------|:---:|
+| 1 | SP serves the piece | `GET /piece/{pieceCid}` returns HTTP 2xx | 0 | [`anonPieceRetrievalLastByteMs`](./events-and-metrics.md#anonPieceRetrievalLastByteMs) | Yes |
+| 2 | Bytes match the declared CommP | Hash of response bytes equals `pieceCid` | 0 | [`anonRetrievalStatus`](./events-and-metrics.md#anonRetrievalStatus) | Yes |
+| 3 | Bytes parse as a CAR (IPFS-indexed pieces only) | `@ipld/car` parses the response | 0 | [`anonCarParseStatus`](./events-and-metrics.md#anonCarParseStatus) | Yes |
+| 4 | SP is advertised on IPNI for root + sampled CIDs | filecoinpin.contact returns provider records | polling until timeout | [`anonIpniStatus`](./events-and-metrics.md#anonIpniStatus) | Yes |
+| 5 | Sampled blocks fetch + hash-verify | `/ipfs/{cid}?format=raw` for each sample | 0 | [`anonBlockFetchStatus`](./events-and-metrics.md#anonBlockFetchStatus) | Yes |
+
+## Result Recording
+
+Each anonymous retrieval attempt writes one row to the `anon_retrieval_checks` ClickHouse table. The row is emitted **even on abort or unexpected error** so that the partial evidence (TTFB, bytes, response code) is preserved.
+
+The DDL and column-level comments in [`clickhouse.schema.ts`](../../apps/backend/src/clickhouse/clickhouse.schema.ts) are authoritative. The summary below is for orientation.
+
+| Column | Meaning |
+|--------|---------|
+| `timestamp` | When the check started (ms UTC) |
+| `probe_location` | Dealbot probe location (`DEALBOT_PROBE_LOCATION`) |
+| `sp_address`, `sp_id`, `sp_name` | SP identity |
+| `retrieval_id` | Per-event UUID; correlates row to logs and Prometheus |
+| `piece_cid`, `data_set_id`, `piece_id`, `raw_size` | Sampled piece identity |
+| `with_ipfs_indexing`, `ipfs_root_cid` | Whether the piece advertises IPNI metadata |
+| `service_type` | Always `direct_sp` today |
+| `retrieval_endpoint` | URL probed for piece fetch |
+| `piece_fetch_status` | `success` or `failed` — outcome of `/piece/{cid}` (HTTP 2xx **and** CommP match). CAR/IPNI/block-fetch outcomes live in their own columns and do **not** flip this status. |
+| `http_response_code` | Raw HTTP status; null on transport failure |
+| `first_byte_ms`, `last_byte_ms`, `bytes_retrieved`, `throughput_bps` | Piece-fetch performance |
+| `commp_valid` | Null when retrieval failed before CommP could be hashed |
+| `car_parseable`, `car_block_count` | Null when CAR validation was skipped (no IPFS indexing or piece fetch failed) |
+| `block_fetch_endpoint`, `block_fetch_valid`, `block_fetch_sampled_count`, `block_fetch_failed_count` | Block-fetch outcomes; null when skipped |
+| `ipni_status` | `valid` \| `invalid` \| `skipped` \| `error` |
+| `ipni_verify_ms`, `ipni_verified_cids_count`, `ipni_unverified_cids_count` | IPNI check details |
+| `error_message` | Failure reason; null on success |
+
+Source: [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts)
+
+## Metrics Recorded
+
+Anonymous-retrieval Prometheus metric definitions live in [Dealbot Events & Metrics](./events-and-metrics.md). All anon-retrieval metrics carry `checkType=anon_retrieval`.
+
+## Configuration
+
+Key environment variables that control anonymous retrieval testing:
+
+| Variable | Description |
+|----------|-------------|
+| `RETRIEVALS_ANON_PER_SP_PER_HOUR` | Anonymous retrieval rate per SP. Falls back to `RETRIEVALS_PER_SP_PER_HOUR` when unset. |
+| `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS` | Max end-to-end anon retrieval job runtime before forced abort (default 360s). |
+| `ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT` | Number of CIDs sampled from the parsed CAR for IPNI + block-fetch verification (default 5, max 50). |
+| `IPNI_VERIFICATION_TIMEOUT_MS` | Max time to wait for IPNI provider verification (shared with the Retrieval check). |
+| `IPNI_VERIFICATION_POLLING_MS` | Poll interval between IPNI verification attempts (shared). |
+| `CONNECT_TIMEOUT_MS` | Connection/header timeout for HTTP requests. |
+| `HTTP2_REQUEST_TIMEOUT_MS` | Total timeout for HTTP/2 retrieval requests. |
+
+See also: [`docs/environment-variables.md`](../environment-variables.md) for the full configuration reference.
diff --git a/docs/checks/events-and-metrics.md b/docs/checks/events-and-metrics.md
index 6c461f7f..f5d89b23 100644
--- a/docs/checks/events-and-metrics.md
+++ b/docs/checks/events-and-metrics.md
@@ -4,6 +4,16 @@ This document is the intended **source of truth** for the events emitted by deal
 
 > **Note on "events":** the entries in the [Event List](#event-list) are named **timing markers** used to define metric Timer Starts/Ends — they are not all emitted as discrete Prometheus events or log lines. Each marker is anchored in code (as a timestamp variable, log line, or status transition) and used to compute the metrics in the [Metrics](#metrics) section.
 
+## Anonymous Retrieval Event Model
+
+The [Anonymous Retrieval check](./anon-retrievals.md) is a single-shot flow per piece: select → fetch piece → (optional) parse CAR + IPNI + block fetch → write one ClickHouse row.
+
+It is not modeled as a sequence of named lifecycle events. Instead it emits:
+
+- **Outcome metrics** when each step completes — see the [time](#time-related-metrics) and [status](#status-count-related-metrics) metric tables for `anonPieceRetrievalFirstByteMs`, `anonRetrievalCheckMs`, `anonRetrievalStatus`, `anonCarParseStatus`, `anonIpniStatus`, `anonBlockFetchStatus`, and friends.
+- **One row per attempt** in the `anon_retrieval_checks` [ClickHouse table](#clickhouse-tables), emitted even on abort or unexpected error.
+- **Structured log lines** (`anon_retrieval_started`, `anon_retrieval_completed`, `anon_retrieval_no_piece`, `anon_retrieval_car_validation_failed`, `anon_retrieval_clickhouse_insert_failed`) carrying a `retrievalId` so each row can be joined back to log evidence.
+
 ## Data Storage Event Model
 
 Below are the sequence of events for a [Data Storage check](./data-storage.md).  The Data Storage flow is used because it encapsulates a [Retrieval check](./retrievals.md) as well.
@@ -87,6 +97,10 @@ sequenceDiagram
 | <a id="dataStorageCheckMs"></a>`dataStorageCheckMs` | Data Storage | [`uploadToSpStart`](#uploadToSpStart) | [`ipfsRetrievalIntegrityChecked`](#ipfsRetrievalIntegrityChecked) | Duration of a Data Storage check | |
 | <a id="retrievalCheckMs"></a>`retrievalCheckMs` | Retrieval | Retrieval check start | [`ipfsRetrievalIntegrityChecked`](#ipfsRetrievalIntegrityChecked) | Duration of a Retrieval check | |
 | <a id="dataSetCreationMs"></a>`dataSetCreationMs` | Data-Set Creation | Data-set creation uploadToSpStart | Data-set creation pieceConfirmed | Duration of one data-set creation with confirmed piece (all using `createDataSetWithPiece`) | [`deal.service.ts`](../../apps/backend/src/deal/deal.service.ts) |
+| <a id="anonPieceRetrievalFirstByteMs"></a>`anonPieceRetrievalFirstByteMs` | Anonymous Retrieval | Piece fetch start | First byte received from `/piece/{pieceCid}` | Time to first byte for anonymous piece retrievals | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonPieceRetrievalLastByteMs"></a>`anonPieceRetrievalLastByteMs` | Anonymous Retrieval | Piece fetch start | Last byte received from `/piece/{pieceCid}` | Total time to retrieve an anonymous piece | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonPieceRetrievalThroughputBps"></a>`anonPieceRetrievalThroughputBps` | Anonymous Retrieval | n/a | n/a | `(bytesRetrieved / anonPieceRetrievalLastByteMs) * 1000` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonRetrievalCheckMs"></a>`anonRetrievalCheckMs` | Anonymous Retrieval | Anon retrieval check start | After CAR/IPNI/block-fetch validation completes (or on abort) | End-to-end anonymous retrieval check duration | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 
 
 ### Status Count Related Metrics
@@ -106,6 +120,11 @@ sequenceDiagram
 | <a id="dataSetCreationStatus"></a>`dataSetCreationStatus` | Data-Set Creation | Not tied to an [event above](#event-list) but rather to data-set creation start (`pending`) and completion (`success`/`failure.*`) | `pending`, `success`, `failure.timedout`, `failure.other` | [`deal.service.ts`](../../apps/backend/src/deal/deal.service.ts) |
 | <a id="dataSetChallengeStatus"></a>`dataSetChallengeStatus` | Data Retention | Emitted on each [Data Retention Check](./data-retention.md) poll when a provider's confirmed proving-period totals advance (strictly positive deltas). Unit: **challenges** (period delta × `CHALLENGES_PER_PROVING_PERIOD = 5`). | `success` (challenges in successfully-proven periods), `failure` (challenges in faulted periods) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
 | <a id="pdp_provider_estimated_overdue_periods"></a>`pdp_provider_estimated_overdue_periods` | Data Retention | Emitted on every [Data Retention Check](./data-retention.md) poll for every successfully processed provider. | Gauge value in proving periods (non-negative integer) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
+| <a id="anonRetrievalStatus"></a>`anonRetrievalStatus` | Anonymous Retrieval | After piece fetch completes (or on abort) | `success`, `failure.http`, `failure.aborted`, `failure.no_piece` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonPieceHttpResponseCode"></a>`anonPieceHttpResponseCode` | Anonymous Retrieval | After piece fetch completes | `200`, `500`, `2xxSuccess`, `4xxClientError`, `5xxServerError`, `otherHttpStatusCodes`, `failure` (same classifier as [`ipfsRetrievalHttpResponseCode`](#ipfsRetrievalHttpResponseCode)) | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonCarParseStatus"></a>`anonCarParseStatus` | Anonymous Retrieval | After CAR validation runs (skipped when piece fetch failed or piece is not IPFS-indexed) | `parseable`, `not_parseable` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonIpniStatus"></a>`anonIpniStatus` | Anonymous Retrieval | After CAR validation runs, **or** when piece fetch failed (records `skipped`) | `valid`, `invalid`, `skipped`, `error` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonBlockFetchStatus"></a>`anonBlockFetchStatus` | Anonymous Retrieval | After block-fetch sampling runs, **or** when piece fetch failed (records `skipped`) | `valid`, `invalid`, `skipped`, `error` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 
 ## ClickHouse Tables
 
@@ -115,6 +134,7 @@ When `CLICKHOUSE_URL` is configured, dealbot writes one row per check result to
 
 - **`data_storage_checks`** — one row written each time a deal is saved (on every status transition). Populated by [`deal.service.ts`](../../apps/backend/src/deal/deal.service.ts).
 - **`retrieval_checks`** — one row per retrieval attempt. Populated by [`retrieval.service.ts`](../../apps/backend/src/retrieval/retrieval.service.ts).
+- **`anon_retrieval_checks`** — one row per [Anonymous Retrieval check](./anon-retrievals.md) attempt; emitted even on abort or unexpected error. Populated by [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts). See [Anonymous Retrieval § Result Recording](./anon-retrievals.md#result-recording) for column-level meanings.
 - **`data_retention_challenges`** — one row per provider per poll cycle. Populated by [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts).
 
 All tables share the primary key `(probe_location, sp_address, timestamp)`:

From 5cee3ee85975342302fe8b1e418e8758c723aaf1 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Mon, 4 May 2026 12:05:14 +0200
Subject: [PATCH 16/28] docs: add missing anonymous retrieval env vars

---
 docs/environment-variables.md | 40 ++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/docs/environment-variables.md b/docs/environment-variables.md
index 2f25943c..e2b23735 100644
--- a/docs/environment-variables.md
+++ b/docs/environment-variables.md
@@ -11,7 +11,7 @@ This document provides a comprehensive guide to all environment variables used b
 | [Blockchain](#blockchain-configuration)   | `NETWORK`, `RPC_URL`, `WALLET_ADDRESS`, `WALLET_PRIVATE_KEY`, `SESSION_KEY_PRIVATE_KEY`, `CHECK_DATASET_CREATION_FEES`, `USE_ONLY_APPROVED_PROVIDERS`, `SUBGRAPH_ENDPOINT` |
 | [Dataset Versioning](#dataset-versioning) | `DEALBOT_DATASET_VERSION`                                                                                                                                    |
 | [Scheduling](#scheduling-configuration)   | `PROVIDERS_REFRESH_INTERVAL_SECONDS`, `DATA_RETENTION_POLL_INTERVAL_SECONDS`, `DEALBOT_MAINTENANCE_WINDOWS_UTC`, `DEALBOT_MAINTENANCE_WINDOW_MINUTES`                                                                                                                                 |
-| [Jobs (pg-boss)](#jobs-pg-boss)           | `DEALBOT_PGBOSS_SCHEDULER_ENABLED`, `DEALBOT_PGBOSS_POOL_MAX`, `DEALS_PER_SP_PER_HOUR`, `DATASET_CREATIONS_PER_SP_PER_HOUR`, `RETRIEVALS_PER_SP_PER_HOUR`,  `JOB_SCHEDULER_POLL_SECONDS`, `JOB_WORKER_POLL_SECONDS`, `PG_BOSS_LOCAL_CONCURRENCY`, `JOB_CATCHUP_MAX_ENQUEUE`, `JOB_SCHEDULE_PHASE_SECONDS`, `JOB_ENQUEUE_JITTER_SECONDS`, `DEAL_JOB_TIMEOUT_SECONDS`, `RETRIEVAL_JOB_TIMEOUT_SECONDS`, `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS`, `IPFS_BLOCK_FETCH_CONCURRENCY` |
+| [Jobs (pg-boss)](#jobs-pg-boss)           | `DEALBOT_PGBOSS_SCHEDULER_ENABLED`, `DEALBOT_PGBOSS_POOL_MAX`, `DEALS_PER_SP_PER_HOUR`, `DATASET_CREATIONS_PER_SP_PER_HOUR`, `RETRIEVALS_PER_SP_PER_HOUR`, `RETRIEVALS_ANON_PER_SP_PER_HOUR`, `JOB_SCHEDULER_POLL_SECONDS`, `JOB_WORKER_POLL_SECONDS`, `PG_BOSS_LOCAL_CONCURRENCY`, `JOB_CATCHUP_MAX_ENQUEUE`, `JOB_SCHEDULE_PHASE_SECONDS`, `JOB_ENQUEUE_JITTER_SECONDS`, `DEAL_JOB_TIMEOUT_SECONDS`, `RETRIEVAL_JOB_TIMEOUT_SECONDS`, `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS`, `ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT`, `IPFS_BLOCK_FETCH_CONCURRENCY` |
 | [Dataset](#dataset-configuration)         | `DEALBOT_LOCAL_DATASETS_PATH`, `RANDOM_PIECE_SIZES`                                                                                                          |
 | [ClickHouse](#clickhouse-configuration)   | `CLICKHOUSE_URL`, `CLICKHOUSE_BATCH_SIZE`, `CLICKHOUSE_FLUSH_INTERVAL_MS`, `DEALBOT_PROBE_LOCATION`          |
 | [Timeouts](#timeout-configuration)        | `CONNECT_TIMEOUT_MS`, `HTTP_REQUEST_TIMEOUT_MS`, `HTTP2_REQUEST_TIMEOUT_MS`, `IPNI_VERIFICATION_TIMEOUT_MS`, `IPNI_VERIFICATION_POLLING_MS`                   |
@@ -622,6 +622,19 @@ rate-based (per hour) and persisted in Postgres so restarts do not reset timing.
 
 ---
 
+### `RETRIEVALS_ANON_PER_SP_PER_HOUR`
+
+- **Type**: `number`
+- **Required**: No
+- **Default**: Falls back to `RETRIEVALS_PER_SP_PER_HOUR`, which itself defaults to `2`
+- **Limits**: `0.001` – `20`
+
+**Role**: Target [anonymous retrieval](./checks/anon-retrievals.md) check rate per storage provider. Anonymous retrievals fetch arbitrary FWSS pieces sampled from the on-chain subgraph (not pieces dealbot uploaded), so this rate controls coverage of the SP's broader public corpus independently of the dealbot-owned [retrieval check](./checks/retrievals.md) rate.
+
+**Notes**: Fractional values are supported. For example, `0.5` means one anon retrieval every 2 hours per storage provider.
+
+---
+
 ### `DATASET_CREATIONS_PER_SP_PER_HOUR`
 
 - **Type**: `number`
@@ -806,6 +819,31 @@ Use this to stagger multiple dealbot deployments that are not sharing a database
 
 **Note**: This is independent of HTTP-level timeouts (`CONNECT_TIMEOUT_MS`, `HTTP2_REQUEST_TIMEOUT_MS`). The job timeout covers the end-to-end execution of an Anon Retrieval Check (piece selection, download, CommP validation, CAR/IPNI validation).
 
+---
+
+### `ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT`
+
+- **Type**: `number` (integer)
+- **Required**: No
+- **Default**: `5`
+- **Minimum**: `1`
+- **Maximum**: `50`
+- **Enforced**: Yes (config validation)
+
+**Role**: Number of CIDs randomly sampled from the parsed CAR for IPNI verification and block-fetch validation during an [anonymous retrieval check](./checks/anon-retrievals.md). Only applies to pieces with IPFS indexing enabled — pieces without an `ipfsRootCid` skip CAR validation entirely.
+
+For each sampled CID, dealbot:
+
+1. Confirms via filecoinpin.contact that the SP is advertised as a provider for the CID.
+2. Re-fetches the block via `{spBaseUrl}/ipfs/{cid}?format=raw` and hash-verifies the response.
+
+**When to update**:
+
+- Increase for stronger statistical confidence that the SP serves the entire DAG correctly (more IPNI queries + per-block fetches per check)
+- Decrease to reduce per-check load on the SP and on filecoinpin.contact
+
+**Note**: A higher sample count multiplies both IPNI traffic and block-fetch traffic per check. The IPNI step is all-or-nothing across the root CID and the sampled child CIDs — see [Anonymous Retrieval § CAR Validation](./checks/anon-retrievals.md#car-validation-only-when-piece-advertises-ipfs-indexing).
+
 ---
 ### `IPFS_BLOCK_FETCH_CONCURRENCY`
 

From 95a2dff643b032ea02878251a0f9986a9a12f825 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Mon, 4 May 2026 12:07:11 +0200
Subject: [PATCH 17/28] docs: fix obsolete reference to the pdp-explorer-owned
 subgraph

---
 .../production-configuration-and-approval-methodology.md      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/checks/production-configuration-and-approval-methodology.md b/docs/checks/production-configuration-and-approval-methodology.md
index 3d956aa4..6b2859aa 100644
--- a/docs/checks/production-configuration-and-approval-methodology.md
+++ b/docs/checks/production-configuration-and-approval-methodology.md
@@ -40,8 +40,8 @@ Relevant parameters include:
 
 | Parameter | Value | Notes |
 |-----------|-------|-------|
-| [`SUBGRAPH_ENDPOINT`](../environment-variables.md#subgraph_endpoint) | TODO: fill this in | Uses the subgraph from [pdp-explorer](https://github.com/FilOzone/pdp-explorer). |
-| [`MIN_NUM_DATASETS_FOR_CHECKS`](../environment-variables.md#dataset-configuration) | 15 | Ensure there are enough datasets with pieces being added so that statistical significance for [Data Retention Fault Rate](#data-retention-fault-rate) can be achieved quicker. Note that on mainnet each dataset incurs 5 challenges[^1] per daily proof[^2]. With this many datasets, an SP can be approved for data retention after a faultless ~7 days even if the SP doesn't have other datasets. |
+| [`SUBGRAPH_ENDPOINT`](../environment-variables.md#subgraph_endpoint) | -     | Points at a Goldsky deployment of the dealbot-owned subgraph in [`apps/subgraph/`](../../apps/subgraph/) (package `@dealbot/subgraph`). |
+| [`MIN_NUM_DATASETS_FOR_CHECKS`](../environment-variables.md#dataset-configuration) | 15    | Ensure there are enough datasets with pieces being added so that statistical significance for [Data Retention Fault Rate](#data-retention-fault-rate) can be achieved quicker. Note that on mainnet each dataset incurs 5 challenges[^1] per daily proof[^2]. With this many datasets, an SP can be approved for data retention after a faultless ~7 days even if the SP doesn't have other datasets. |
 
 See [How are data retention statistics/thresholds calculated?](#how-are-data-retention-statisticsthresholds-calculated) for more details.
 

From cff31713aa28d97ce4ba41135c6e73d95ca2a17f Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Mon, 4 May 2026 12:30:56 +0200
Subject: [PATCH 18/28] improve: clarity around piece fetch status and commp
 validation

---
 apps/backend/src/clickhouse/clickhouse.schema.ts         | 2 +-
 .../backend/src/retrieval-anon/anon-retrieval.service.ts | 9 ++++++++-
 docs/checks/events-and-metrics.md                        | 6 +++---
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/apps/backend/src/clickhouse/clickhouse.schema.ts b/apps/backend/src/clickhouse/clickhouse.schema.ts
index e8612056..b27ba0e2 100644
--- a/apps/backend/src/clickhouse/clickhouse.schema.ts
+++ b/apps/backend/src/clickhouse/clickhouse.schema.ts
@@ -82,7 +82,7 @@ export function buildMigrations(database: string): string[] {
     service_type               LowCardinality(String),            -- 'direct_sp' (only mode for anon retrievals today)
     retrieval_endpoint         String,                            -- URL probed (e.g. {spBaseUrl}/piece/{pieceCid})
 
-    piece_fetch_status         LowCardinality(String),            -- 'success' | 'failed' — outcome of GET /piece/<pieceCid> (HTTP 2xx AND CommP match). CAR/IPNI/block-fetch outcomes live in their own columns.
+    piece_fetch_status         LowCardinality(String),            -- 'success' | 'failed' — HTTP transport outcome of GET /piece/<pieceCid> (HTTP 2xx). CommP validity, CAR/IPNI/block-fetch outcomes live in their own columns.
     http_response_code         Nullable(UInt16),                  -- raw HTTP status; null on transport failure
     first_byte_ms              Nullable(Float64),                 -- time to first response byte
     last_byte_ms               Nullable(Float64),                 -- time to last response byte
diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index 25b34e82..eddc88f0 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -136,9 +136,16 @@ export class AnonRetrievalService {
 
       // Overall check duration and status
       this.metrics.observeCheckDuration(labels, Date.now() - checkStart);
+      const pieceServedCorrectly = pieceResult.success && pieceResult.commPValid;
       this.metrics.recordStatus(
         labels,
-        pieceResult.success ? "success" : pieceResult.aborted ? "failure.aborted" : "failure.http",
+        pieceServedCorrectly
+          ? "success"
+          : pieceResult.aborted
+            ? "failure.aborted"
+            : pieceResult.success
+              ? "failure.commp"
+              : "failure.http",
       );
     } finally {
       // Always emit a ClickHouse row — even on abort or unexpected error — so
diff --git a/docs/checks/events-and-metrics.md b/docs/checks/events-and-metrics.md
index f5d89b23..fba8b003 100644
--- a/docs/checks/events-and-metrics.md
+++ b/docs/checks/events-and-metrics.md
@@ -118,9 +118,9 @@ sequenceDiagram
 | <a id="ipfsRetrievalHttpResponseCode"></a>`ipfsRetrievalHttpResponseCode` | Data Storage, Retrieval | [`ipfsRetrievalLastByteReceived`](#ipfsRetrievalLastByteReceived) | `200`, `500`, `2xxSuccess`, `4xxClientError`, `5xxServerError`, `otherHttpStatusCodes`, `failure` | [`retrieval.service.ts`](../../apps/backend/src/retrieval/retrieval.service.ts) |
 | <a id="retrievalStatus"></a>`retrievalStatus` | Data Storage, Retrieval | [`ipfsRetrievalIntegrityChecked`](#ipfsRetrievalIntegrityChecked) | `success`, `failure.timedout`, `failure.other` from [Data Storage Sub-status meanings](./data-storage.md#sub-status-meanings). |  |
 | <a id="dataSetCreationStatus"></a>`dataSetCreationStatus` | Data-Set Creation | Not tied to an [event above](#event-list) but rather to data-set creation start (`pending`) and completion (`success`/`failure.*`) | `pending`, `success`, `failure.timedout`, `failure.other` | [`deal.service.ts`](../../apps/backend/src/deal/deal.service.ts) |
-| <a id="dataSetChallengeStatus"></a>`dataSetChallengeStatus` | Data Retention | Emitted on each [Data Retention Check](./data-retention.md) poll when a provider's confirmed proving-period totals advance (strictly positive deltas). Unit: **challenges** (period delta × `CHALLENGES_PER_PROVING_PERIOD = 5`). | `success` (challenges in successfully-proven periods), `failure` (challenges in faulted periods) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
-| <a id="pdp_provider_estimated_overdue_periods"></a>`pdp_provider_estimated_overdue_periods` | Data Retention | Emitted on every [Data Retention Check](./data-retention.md) poll for every successfully processed provider. | Gauge value in proving periods (non-negative integer) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
-| <a id="anonRetrievalStatus"></a>`anonRetrievalStatus` | Anonymous Retrieval | After piece fetch completes (or on abort) | `success`, `failure.http`, `failure.aborted`, `failure.no_piece` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="dataSetChallengeStatus"></a>`dataSetChallengeStatus` | Data Retention | Not tied to an [event above](#event-list) but rather to the periodic chain-checking done in the [Data Retention Check](./data-retention.md) | `success`, `failure` | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
+| <a id="pdp_provider_overdue_periods"></a>`pdp_provider_overdue_periods` | Data Retention | Emitted on every poll | Gauge value (estimated overdue periods) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
+| <a id="anonRetrievalStatus"></a>`anonRetrievalStatus` | Anonymous Retrieval | After piece fetch completes (or on abort) | `success` (HTTP 2xx **and** CommP matches), `failure.http`, `failure.commp` (HTTP 2xx but bytes hashed to a different CID), `failure.aborted`, `failure.no_piece`. | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonPieceHttpResponseCode"></a>`anonPieceHttpResponseCode` | Anonymous Retrieval | After piece fetch completes | `200`, `500`, `2xxSuccess`, `4xxClientError`, `5xxServerError`, `otherHttpStatusCodes`, `failure` (same classifier as [`ipfsRetrievalHttpResponseCode`](#ipfsRetrievalHttpResponseCode)) | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonCarParseStatus"></a>`anonCarParseStatus` | Anonymous Retrieval | After CAR validation runs (skipped when piece fetch failed or piece is not IPFS-indexed) | `parseable`, `not_parseable` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonIpniStatus"></a>`anonIpniStatus` | Anonymous Retrieval | After CAR validation runs, **or** when piece fetch failed (records `skipped`) | `valid`, `invalid`, `skipped`, `error` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |

From 3c2a69899944ca5d4aa8acfe6d8a95e26e2c454e Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Tue, 5 May 2026 09:04:48 +0200
Subject: [PATCH 19/28] refactor: let two subgraph endpoints coexist

---
 apps/backend/.env.example                     |   5 +-
 apps/backend/README.md                        |   3 +-
 apps/backend/src/config/app.config.ts         |  13 +-
 .../data-retention/data-retention.module.ts   |   4 +-
 .../data-retention.service.spec.ts            | 184 ++---
 .../data-retention/data-retention.service.ts  |  16 +-
 .../src/pdp-subgraph/pdp-subgraph.module.ts   |   8 +
 .../pdp-subgraph/pdp-subgraph.service.spec.ts | 694 ++++++++++++++++++
 .../src/pdp-subgraph/pdp-subgraph.service.ts  | 306 ++++++++
 apps/backend/src/pdp-subgraph/queries.ts      |  24 +
 apps/backend/src/pdp-subgraph/types.spec.ts   | 245 +++++++
 apps/backend/src/pdp-subgraph/types.ts        | 151 ++++
 apps/backend/src/subgraph/subgraph.service.ts |  15 +
 .../src/wallet-sdk/wallet-sdk.service.spec.ts |   2 +-
 docs/checks/data-retention.md                 |  10 +-
 ...-configuration-and-approval-methodology.md |   2 +-
 docs/environment-variables.md                 |  26 +-
 .../local/backend-configmap-local.yaml        |   1 +
 18 files changed, 1595 insertions(+), 114 deletions(-)
 create mode 100644 apps/backend/src/pdp-subgraph/pdp-subgraph.module.ts
 create mode 100644 apps/backend/src/pdp-subgraph/pdp-subgraph.service.spec.ts
 create mode 100644 apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts
 create mode 100644 apps/backend/src/pdp-subgraph/queries.ts
 create mode 100644 apps/backend/src/pdp-subgraph/types.spec.ts
 create mode 100644 apps/backend/src/pdp-subgraph/types.ts

diff --git a/apps/backend/.env.example b/apps/backend/.env.example
index 26469c52..30556e7a 100644
--- a/apps/backend/.env.example
+++ b/apps/backend/.env.example
@@ -23,7 +23,10 @@ WALLET_ADDRESS=0x0000000000000000000000000000000000000000
 WALLET_PRIVATE_KEY=your_private_key_here
 CHECK_DATASET_CREATION_FEES=true
 USE_ONLY_APPROVED_PROVIDERS=true
-# Point at the dealbot-owned subgraph on Goldsky (see apps/subgraph/README.md).
+# Upstream pdp-explorer subgraph — drives the data-retention / overdue-periods path.
+PDP_SUBGRAPH_ENDPOINT=https://api.thegraph.com/subgraphs/filecoin/pdp
+# Dealbot-owned subgraph on Goldsky (see apps/subgraph/README.md) — drives only
+# the new anonymous-retrieval candidate-piece query for now.
 SUBGRAPH_ENDPOINT=https://api.goldsky.com/api/public/<project>/subgraphs/dealbot-subgraph/<version>/gn
 
 # Minimum number of datasets per SP (default: 1). When > 1, a separate data_set_creation job provisions extra datasets.
diff --git a/apps/backend/README.md b/apps/backend/README.md
index 4805080f..e4dafd6e 100644
--- a/apps/backend/README.md
+++ b/apps/backend/README.md
@@ -104,7 +104,8 @@ All configuration is done via environment variables in `.env`.
 | `CHECK_DATASET_CREATION_FEES` | Check fees before dataset creation     | `true`                     |
 | `ENABLE_IPNI_TESTING`         | IPNI testing mode (`disabled`/`random`/`always`) | `always`          |
 | `USE_ONLY_APPROVED_PROVIDERS` | Only use approved storage providers    | `true`                     |
-| `SUBGRAPH_ENDPOINT`           | Subgraph GraphQL endpoint for PDP proof-set/data-retention and anon-retrieval queries | `https://api.goldsky.com/api/public/<project>/subgraphs/dealbot-subgraph/<version>/gn` |
+| `PDP_SUBGRAPH_ENDPOINT`       | PDP subgraph API endpoint for PDP proof-set/data-retention | `https://api.thegraph.com/subgraphs/filecoin/pdp` |
+| `SUBGRAPH_ENDPOINT`           | Subgraph GraphQL endpoint for anon-retrieval queries | `https://api.goldsky.com/api/public/<project>/subgraphs/dealbot-subgraph/<version>/gn` |
 
 ### Scheduling Configuration (pg-boss)
 
diff --git a/apps/backend/src/config/app.config.ts b/apps/backend/src/config/app.config.ts
index 4e49e4d8..7906be8c 100644
--- a/apps/backend/src/config/app.config.ts
+++ b/apps/backend/src/config/app.config.ts
@@ -56,6 +56,15 @@ export const configValidationSchema = Joi.object({
   USE_ONLY_APPROVED_PROVIDERS: Joi.boolean().default(true),
   DEALBOT_DATASET_VERSION: Joi.string().optional(),
   MIN_NUM_DATASETS_FOR_CHECKS: Joi.number().integer().min(1).default(1),
+  // Two subgraph endpoints coexist intentionally to limit blast radius while we
+  // migrate off the upstream pdp-explorer subgraph:
+  //   - PDP_SUBGRAPH_ENDPOINT drives the established overdue-periods / data
+  //     retention path against the existing pdp-explorer subgraph.
+  //   - SUBGRAPH_ENDPOINT drives only the new anonymous-retrieval candidate
+  //     piece query against the dealbot-owned subgraph.
+  // Once the dealbot-owned subgraph has soaked in production we can drop
+  // PDP_SUBGRAPH_ENDPOINT and route everything through SUBGRAPH_ENDPOINT.
+  PDP_SUBGRAPH_ENDPOINT: Joi.string().uri().optional().allow(""),
   SUBGRAPH_ENDPOINT: Joi.string().uri().optional().allow(""),
 
   // Scheduling
@@ -177,7 +186,8 @@ export interface IBlockchainConfig {
   useOnlyApprovedProviders: boolean;
   dealbotDataSetVersion?: string;
   minNumDataSetsForChecks: number;
-  subgraphEndpoint?: string;
+  pdpSubgraphEndpoint?: string;
+  subgraphEndpoint?: string; // Endpoint of the dealbot-owned subgraph. Eventually replaces `pdpSubgraphEndpoint`
 }
 
 export interface ISchedulingConfig {
@@ -437,6 +447,7 @@ export function loadConfig(): IConfig {
       useOnlyApprovedProviders: process.env.USE_ONLY_APPROVED_PROVIDERS !== "false",
       dealbotDataSetVersion: process.env.DEALBOT_DATASET_VERSION,
       minNumDataSetsForChecks: Number.parseInt(process.env.MIN_NUM_DATASETS_FOR_CHECKS || "1", 10),
+      pdpSubgraphEndpoint: process.env.PDP_SUBGRAPH_ENDPOINT || "",
       subgraphEndpoint: process.env.SUBGRAPH_ENDPOINT || "",
     },
     scheduling: {
diff --git a/apps/backend/src/data-retention/data-retention.module.ts b/apps/backend/src/data-retention/data-retention.module.ts
index f0aec1ec..f459570a 100644
--- a/apps/backend/src/data-retention/data-retention.module.ts
+++ b/apps/backend/src/data-retention/data-retention.module.ts
@@ -2,12 +2,12 @@ import { Module } from "@nestjs/common";
 import { TypeOrmModule } from "@nestjs/typeorm";
 import { DataRetentionBaseline } from "../database/entities/data-retention-baseline.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
-import { SubgraphModule } from "../subgraph/subgraph.module.js";
+import { PdpSubgraphModule } from "../pdp-subgraph/pdp-subgraph.module.js";
 import { WalletSdkModule } from "../wallet-sdk/wallet-sdk.module.js";
 import { DataRetentionService } from "./data-retention.service.js";
 
 @Module({
-  imports: [WalletSdkModule, SubgraphModule, TypeOrmModule.forFeature([DataRetentionBaseline, StorageProvider])],
+  imports: [WalletSdkModule, PdpSubgraphModule, TypeOrmModule.forFeature([DataRetentionBaseline, StorageProvider])],
   providers: [DataRetentionService],
   exports: [DataRetentionService],
 })
diff --git a/apps/backend/src/data-retention/data-retention.service.spec.ts b/apps/backend/src/data-retention/data-retention.service.spec.ts
index d2d539cf..3fde29e8 100644
--- a/apps/backend/src/data-retention/data-retention.service.spec.ts
+++ b/apps/backend/src/data-retention/data-retention.service.spec.ts
@@ -7,8 +7,8 @@ import type { IConfig } from "../config/app.config.js";
 import type { DataRetentionBaseline } from "../database/entities/data-retention-baseline.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
 import { buildCheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js";
-import type { SubgraphService } from "../subgraph/subgraph.service.js";
-import type { ProviderDataSetResponse } from "../subgraph/types.js";
+import type { PDPSubgraphService } from "../pdp-subgraph/pdp-subgraph.service.js";
+import type { ProviderDataSetResponse } from "../pdp-subgraph/types.js";
 import type { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
 import { DataRetentionService } from "./data-retention.service.js";
 
@@ -42,7 +42,7 @@ describe("DataRetentionService", () => {
   let walletSdkServiceMock: {
     getTestingProviders: ReturnType<typeof vi.fn>;
   };
-  let subgraphServiceMock: {
+  let pdpSubgraphServiceMock: {
     fetchSubgraphMeta: ReturnType<typeof vi.fn>;
     fetchProvidersWithDatasets: ReturnType<typeof vi.fn>;
   };
@@ -69,7 +69,7 @@ describe("DataRetentionService", () => {
     configServiceMock = {
       get: vi.fn((key: keyof IConfig) => {
         if (key === "blockchain") {
-          return { subgraphEndpoint: "https://example.com/subgraph" };
+          return { pdpSubgraphEndpoint: "https://example.com/subgraph" };
         }
         if (key === "spBlocklists") {
           return { ids: new Set(), addresses: new Set() };
@@ -95,7 +95,7 @@ describe("DataRetentionService", () => {
       ]),
     };
 
-    subgraphServiceMock = {
+    pdpSubgraphServiceMock = {
       fetchSubgraphMeta: vi.fn().mockResolvedValue({
         _meta: {
           block: {
@@ -146,7 +146,7 @@ describe("DataRetentionService", () => {
     service = new DataRetentionService(
       configServiceMock,
       walletSdkServiceMock as unknown as WalletSdkService,
-      subgraphServiceMock as unknown as SubgraphService,
+      pdpSubgraphServiceMock as unknown as PDPSubgraphService,
       mockBaselineRepository as unknown as Repository<DataRetentionBaseline>,
       mockSPRepository as unknown as Repository<StorageProvider>,
       counterMock as unknown as Counter,
@@ -155,15 +155,15 @@ describe("DataRetentionService", () => {
     );
   });
 
-  it("returns early when subgraphEndpoint is empty", async () => {
+  it("returns early when pdpSubgraphEndpoint is empty", async () => {
     (configServiceMock.get as ReturnType<typeof vi.fn>).mockReturnValue({
-      subgraphEndpoint: "",
+      pdpSubgraphEndpoint: "",
     });
 
     await service.pollDataRetention();
 
-    expect(subgraphServiceMock.fetchSubgraphMeta).not.toHaveBeenCalled();
-    expect(subgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
+    expect(pdpSubgraphServiceMock.fetchSubgraphMeta).not.toHaveBeenCalled();
+    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
   });
 
   it("returns early when no testing providers configured", async () => {
@@ -171,31 +171,31 @@ describe("DataRetentionService", () => {
 
     await service.pollDataRetention();
 
-    expect(subgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
+    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
   });
 
   it("returns early when all providers are blocked for data-retention", async () => {
     (configServiceMock.get as ReturnType<typeof vi.fn>).mockImplementation((key: string) => {
-      if (key === "blockchain") return { subgraphEndpoint: "https://example.com/subgraph" };
+      if (key === "blockchain") return { pdpSubgraphEndpoint: "https://example.com/subgraph" };
       if (key === "spBlocklists") return { ids: new Set(), addresses: new Set([PROVIDER_A, PROVIDER_B]) };
     });
 
     await service.pollDataRetention();
 
-    expect(subgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
+    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
   });
 
   it("excludes blocked providers from data-retention polling while retaining unblocked ones", async () => {
     (configServiceMock.get as ReturnType<typeof vi.fn>).mockImplementation((key: string) => {
-      if (key === "blockchain") return { subgraphEndpoint: "https://example.com/subgraph" };
+      if (key === "blockchain") return { pdpSubgraphEndpoint: "https://example.com/subgraph" };
       if (key === "spBlocklists") return { ids: new Set(), addresses: new Set([PROVIDER_A]) };
     });
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
     await service.pollDataRetention();
 
     const allAddressesPolled: string[] = (
-      subgraphServiceMock.fetchProvidersWithDatasets.mock.calls as [{ addresses: string[] }][]
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mock.calls as [{ addresses: string[] }][]
     ).flatMap(([{ addresses }]) => addresses);
     expect(allAddressesPolled).toContain(PROVIDER_B.toLowerCase());
     expect(allAddressesPolled).not.toContain(PROVIDER_A.toLowerCase());
@@ -206,16 +206,16 @@ describe("DataRetentionService", () => {
 
     await service.pollDataRetention();
 
-    expect(subgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
+    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).not.toHaveBeenCalled();
   });
 
   it("sets baseline on first poll without emitting counters (fresh deploy / new provider)", async () => {
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
 
     await service.pollDataRetention();
 
-    expect(subgraphServiceMock.fetchSubgraphMeta).toHaveBeenCalled();
-    expect(subgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledWith({
+    expect(pdpSubgraphServiceMock.fetchSubgraphMeta).toHaveBeenCalled();
+    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledWith({
       blockNumber: 1200,
       addresses: [PROVIDER_A, PROVIDER_B],
     });
@@ -239,20 +239,20 @@ describe("DataRetentionService", () => {
 
   it("computes deltas correctly on consecutive polls", async () => {
     // First poll: blockNumber=1200
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
     await service.pollDataRetention();
 
     const firstCallCount = counterMock.labels.mock.calls.length;
 
     // Second poll: blockNumber=1300, provider totals changed
-    subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({
+    pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({
       _meta: {
         block: {
           number: 1300,
         },
       },
     });
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({
         totalFaultedPeriods: 12n,
         totalProvingPeriods: 105n,
@@ -266,7 +266,7 @@ describe("DataRetentionService", () => {
   });
 
   it("does not increment counters when deltas are zero", async () => {
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
 
     // First poll
     await service.pollDataRetention();
@@ -288,7 +288,7 @@ describe("DataRetentionService", () => {
 
     const providerA = makeProvider({ address: PROVIDER_A, totalFaultedPeriods: 5n });
     const providerB = makeProvider({ address: PROVIDER_B, totalFaultedPeriods: 20n });
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([providerA, providerB]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([providerA, providerB]);
 
     await service.pollDataRetention();
 
@@ -310,7 +310,7 @@ describe("DataRetentionService", () => {
     ]);
 
     const provider = makeProvider();
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
 
     await service.pollDataRetention();
 
@@ -333,7 +333,7 @@ describe("DataRetentionService", () => {
   });
 
   it("handles empty providers array without errors", async () => {
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([]);
 
     await service.pollDataRetention();
 
@@ -347,7 +347,7 @@ describe("DataRetentionService", () => {
     ]);
 
     const provider = makeProvider();
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
 
     await service.pollDataRetention();
 
@@ -370,7 +370,7 @@ describe("DataRetentionService", () => {
   });
 
   it("catches and logs errors without rethrowing", async () => {
-    subgraphServiceMock.fetchProvidersWithDatasets.mockRejectedValueOnce(new Error("subgraph down"));
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockRejectedValueOnce(new Error("subgraph down"));
 
     // Should not throw
     await expect(service.pollDataRetention()).resolves.toBeUndefined();
@@ -378,14 +378,14 @@ describe("DataRetentionService", () => {
 
   it("resets baseline on negative deltas without incrementing counters", async () => {
     // First poll: high values
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({ totalFaultedPeriods: 100n, totalProvingPeriods: 200n }),
     ]);
     await service.pollDataRetention();
     counterMock.labels.mockClear();
 
     // Second poll: lower values (e.g., chain reorg or subgraph correction)
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({ totalFaultedPeriods: 50n, totalProvingPeriods: 100n }),
     ]);
     await service.pollDataRetention();
@@ -394,7 +394,7 @@ describe("DataRetentionService", () => {
     expect(counterMock.labels).not.toHaveBeenCalled();
 
     // Third poll: values increase from new baseline
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({ totalFaultedPeriods: 52n, totalProvingPeriods: 105n }),
     ]);
     await service.pollDataRetention();
@@ -412,7 +412,7 @@ describe("DataRetentionService", () => {
       { providerAddress: PROVIDER_A, faultedPeriods: "0", successPeriods: "0", lastBlockNumber: "1000" },
     ]);
 
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({ totalFaultedPeriods: largeValue, totalProvingPeriods: largeValue * 2n }),
     ]);
 
@@ -436,7 +436,7 @@ describe("DataRetentionService", () => {
       { providerAddress: PROVIDER_A, faultedPeriods: "0", successPeriods: "0", lastBlockNumber: "1000" },
     ]);
 
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
       makeProvider({ totalFaultedPeriods: maxSafeInt, totalProvingPeriods: maxSafeInt * 2n }),
     ]);
 
@@ -456,7 +456,7 @@ describe("DataRetentionService", () => {
       totalFaultedPeriods: 5n,
       totalProvingPeriods: 50n,
     });
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([provider]);
 
     await service.pollDataRetention();
 
@@ -475,18 +475,18 @@ describe("DataRetentionService", () => {
     }));
     walletSdkServiceMock.getTestingProviders.mockReturnValueOnce(manyProviders);
 
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([]);
 
     await service.pollDataRetention();
 
     // Should be called twice: once for first 50, once for remaining 25
-    expect(subgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledTimes(2);
-    expect(subgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenNthCalledWith(1, {
+    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledTimes(2);
+    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenNthCalledWith(1, {
       addresses: expect.arrayContaining([expect.any(String)]),
       blockNumber: 1200,
     });
-    expect(subgraphServiceMock.fetchProvidersWithDatasets.mock.calls[0][0].addresses).toHaveLength(50);
-    expect(subgraphServiceMock.fetchProvidersWithDatasets.mock.calls[1][0].addresses).toHaveLength(25);
+    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets.mock.calls[0][0].addresses).toHaveLength(50);
+    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets.mock.calls[1][0].addresses).toHaveLength(25);
   });
 
   it("continues processing next batch if one batch fails", async () => {
@@ -499,20 +499,20 @@ describe("DataRetentionService", () => {
     walletSdkServiceMock.getTestingProviders.mockReturnValueOnce(manyProviders);
 
     // First batch fails, second succeeds
-    subgraphServiceMock.fetchProvidersWithDatasets
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets
       .mockRejectedValueOnce(new Error("Subgraph timeout"))
       .mockResolvedValueOnce([]);
 
     await service.pollDataRetention();
 
     // Both batches should be attempted
-    expect(subgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledTimes(2);
+    expect(pdpSubgraphServiceMock.fetchProvidersWithDatasets).toHaveBeenCalledTimes(2);
   });
 
   it("logs error and skips counter update when provider not found in cache but returned from subgraph", async () => {
     // Provider C not in cache
     const PROVIDER_C = "0x1234567890123456789012345678901234567890";
-    subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_C })]);
+    pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_C })]);
 
     await service.pollDataRetention();
 
@@ -523,7 +523,7 @@ describe("DataRetentionService", () => {
   describe("cleanupStaleProviders", () => {
     it("does not cleanup when no stale providers exist", async () => {
       // First poll establishes baseline for both providers
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_A }),
         makeProvider({ address: PROVIDER_B }),
       ]);
@@ -536,7 +536,7 @@ describe("DataRetentionService", () => {
 
     it("successfully cleans up stale provider with valid database entry", async () => {
       // First poll: establish baseline for PROVIDER_A
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: PROVIDER_A removed from active list, only PROVIDER_B active
@@ -558,7 +558,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -589,7 +589,7 @@ describe("DataRetentionService", () => {
 
     it("skips cleanup entirely when database fetch fails", async () => {
       // First poll: establish baseline
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: provider removed, but DB fails
@@ -604,7 +604,7 @@ describe("DataRetentionService", () => {
 
       mockSPRepository.find.mockRejectedValueOnce(new Error("Database connection failed"));
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -624,7 +624,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_A, totalFaultedPeriods: 12n, totalProvingPeriods: 105n }),
       ]);
 
@@ -637,7 +637,7 @@ describe("DataRetentionService", () => {
 
     it("retains baseline when provider not found in database", async () => {
       // First poll: establish baseline
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: provider removed from active list
@@ -653,7 +653,7 @@ describe("DataRetentionService", () => {
       // Database returns empty array (provider not found)
       mockSPRepository.find.mockResolvedValueOnce([]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -670,7 +670,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_A, totalFaultedPeriods: 12n, totalProvingPeriods: 105n }),
       ]);
 
@@ -683,7 +683,7 @@ describe("DataRetentionService", () => {
 
     it("retains baseline when provider has null providerId", async () => {
       // First poll: establish baseline
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: provider removed
@@ -706,7 +706,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -716,7 +716,7 @@ describe("DataRetentionService", () => {
 
     it("retains baseline when counter removal throws error", async () => {
       // First poll: establish baseline
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: provider removed
@@ -743,7 +743,7 @@ describe("DataRetentionService", () => {
         throw new Error("Counter removal failed");
       });
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -760,7 +760,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_A, totalFaultedPeriods: 12n, totalProvingPeriods: 110n }),
       ]);
 
@@ -781,7 +781,7 @@ describe("DataRetentionService", () => {
         { id: 3, serviceProvider: PROVIDER_C, name: "Provider C", isApproved: true },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_A }),
         makeProvider({ address: PROVIDER_B }),
         makeProvider({ address: PROVIDER_C }),
@@ -799,7 +799,7 @@ describe("DataRetentionService", () => {
         { address: PROVIDER_C, name: "Provider C", providerId: 3, isApproved: true },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
 
       await service.pollDataRetention();
 
@@ -815,7 +815,7 @@ describe("DataRetentionService", () => {
 
     it("skips cleanup when processing errors occurred", async () => {
       // First poll: establish baseline
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: provider removed, but processing has errors
@@ -824,7 +824,7 @@ describe("DataRetentionService", () => {
       ]);
 
       // Simulate processing error
-      subgraphServiceMock.fetchProvidersWithDatasets.mockRejectedValueOnce(new Error("Processing failed"));
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockRejectedValueOnce(new Error("Processing failed"));
 
       await service.pollDataRetention();
 
@@ -841,7 +841,7 @@ describe("DataRetentionService", () => {
         { id: 1, serviceProvider: PROVIDER_MIXED_CASE, name: "Provider A", isApproved: true },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ address: PROVIDER_MIXED_CASE.toLowerCase() as `0x${string}` }),
       ]);
 
@@ -861,7 +861,7 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -885,7 +885,7 @@ describe("DataRetentionService", () => {
       // Subgraph returns same values: totalFaultedPeriods=10, totalProvingPeriods=100
       // confirmedTotalSuccess = 100 - 10 = 90
       // With DB baseline: faultedDelta = 10 - 10 = 0, successDelta = 90 - 90 = 0
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
 
       await service.pollDataRetention();
 
@@ -907,7 +907,7 @@ describe("DataRetentionService", () => {
       // Subgraph returns: totalFaultedPeriods=10, totalProvingPeriods=100
       // confirmedTotalSuccess = 100 - 10 = 90
       // faultedDelta = 10 - 8 = 2, successDelta = 90 - 85 = 5
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
 
       await service.pollDataRetention();
 
@@ -921,8 +921,8 @@ describe("DataRetentionService", () => {
       expect(incCalls).toEqual(expect.arrayContaining([[10], [25]]));
     });
 
-    it("reloads baselines from DB on every poll", async () => {
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
+    it("only loads baselines from DB once across multiple polls", async () => {
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
 
       await service.pollDataRetention();
       await service.pollDataRetention();
@@ -932,13 +932,13 @@ describe("DataRetentionService", () => {
     });
 
     it("does not double-count when poll ownership alternates across worker pods", async () => {
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
       await service.pollDataRetention();
 
       const secondPod = new DataRetentionService(
         configServiceMock,
         walletSdkServiceMock as unknown as WalletSdkService,
-        subgraphServiceMock as unknown as SubgraphService,
+        pdpSubgraphServiceMock as unknown as PDPSubgraphService,
         mockBaselineRepository as unknown as Repository<DataRetentionBaseline>,
         mockSPRepository as unknown as Repository<StorageProvider>,
         counterMock as unknown as Counter,
@@ -946,8 +946,8 @@ describe("DataRetentionService", () => {
         { insert: vi.fn(), probeLocation: "test" } as unknown as ClickhouseService,
       );
 
-      subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1300 } } });
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1300 } } });
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 11n, totalProvingPeriods: 102n }),
       ]);
       await secondPod.pollDataRetention();
@@ -955,8 +955,8 @@ describe("DataRetentionService", () => {
       counterMock.labels.mockClear();
       counterMock.inc.mockClear();
 
-      subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1400 } } });
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1400 } } });
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 12n, totalProvingPeriods: 104n }),
       ]);
       await service.pollDataRetention();
@@ -972,8 +972,8 @@ describe("DataRetentionService", () => {
       ];
       mockBaselineRepository.upsert.mockRejectedValueOnce(new Error("DB write failed"));
 
-      subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1300 } } });
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1300 } } });
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 12n, totalProvingPeriods: 105n }),
       ]);
 
@@ -981,8 +981,8 @@ describe("DataRetentionService", () => {
 
       expect(counterMock.labels).not.toHaveBeenCalled();
 
-      subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1400 } } });
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({ _meta: { block: { number: 1400 } } });
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 12n, totalProvingPeriods: 105n }),
       ]);
 
@@ -1003,12 +1003,12 @@ describe("DataRetentionService", () => {
         },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValue([makeProvider()]);
 
       // First poll: DB load fails, poll bails out to avoid emitting bloated values
       await service.pollDataRetention();
       expect(mockBaselineRepository.find).toHaveBeenCalledTimes(1);
-      expect(subgraphServiceMock.fetchSubgraphMeta).not.toHaveBeenCalled();
+      expect(pdpSubgraphServiceMock.fetchSubgraphMeta).not.toHaveBeenCalled();
       expect(counterMock.labels).not.toHaveBeenCalled();
 
       // Second poll: DB load succeeds, baselines restored, normal delta computation
@@ -1021,16 +1021,16 @@ describe("DataRetentionService", () => {
     it("emits real deltas on second poll after fresh deploy baseline-only first poll", async () => {
       // First poll: fresh deploy, no baselines in DB
       // Baseline set to: faultedPeriods=10, successPeriods=90
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
       await service.pollDataRetention();
       counterMock.labels.mockClear();
       counterMock.inc.mockClear();
 
       // Second poll: values have increased
-      subgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({
+      pdpSubgraphServiceMock.fetchSubgraphMeta.mockResolvedValueOnce({
         _meta: { block: { number: 1300 } },
       });
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 12n, totalProvingPeriods: 105n }),
       ]);
 
@@ -1044,7 +1044,7 @@ describe("DataRetentionService", () => {
 
     it("deletes baseline from DB when stale provider is cleaned up", async () => {
       // First poll: establish baseline for PROVIDER_A
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: PROVIDER_A removed from active list
@@ -1056,7 +1056,7 @@ describe("DataRetentionService", () => {
         { address: PROVIDER_A, name: "Provider A", providerId: 1, isApproved: true },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
@@ -1069,7 +1069,7 @@ describe("DataRetentionService", () => {
     it("emits overdue gauge on first poll (baseline-only)", async () => {
       // Provider is overdue: currentBlock=1200,
       // estimatedOverduePeriods = (1200 - 901) / 100 = 2.99 -> 2
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
 
       await service.pollDataRetention();
 
@@ -1086,7 +1086,7 @@ describe("DataRetentionService", () => {
 
     it("emits overdue gauge = 0 when provider is not overdue", async () => {
       // nextDeadline=2000 > currentBlock=1200
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ proofSets: [] })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ proofSets: [] })]);
 
       await service.pollDataRetention();
 
@@ -1095,7 +1095,7 @@ describe("DataRetentionService", () => {
 
     it("emits overdue gauge even on negative delta (baseline reset)", async () => {
       // First poll: high values
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 100n, totalProvingPeriods: 200n }),
       ]);
       await service.pollDataRetention();
@@ -1103,7 +1103,7 @@ describe("DataRetentionService", () => {
       gaugeMock.set.mockClear();
 
       // Second poll: lower values (negative delta) but still overdue
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({ totalFaultedPeriods: 50n, totalProvingPeriods: 100n }),
       ]);
       await service.pollDataRetention();
@@ -1115,7 +1115,7 @@ describe("DataRetentionService", () => {
 
     it("naturally resets gauge to 0 when subgraph catches up", async () => {
       // First poll: provider is overdue (currentBlock=1200, nextDeadline=1000)
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider()]);
       await service.pollDataRetention();
 
       expect(gaugeMock.set).toHaveBeenCalledWith(2);
@@ -1124,7 +1124,7 @@ describe("DataRetentionService", () => {
       gaugeMock.set.mockClear();
 
       // Second poll: subgraph caught up, nextDeadline advanced past currentBlock
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([
         makeProvider({
           totalFaultedPeriods: 12n,
           totalProvingPeriods: 102n,
@@ -1140,7 +1140,7 @@ describe("DataRetentionService", () => {
 
     it("removes overdue gauge when stale provider is cleaned up", async () => {
       // First poll: establish baseline for PROVIDER_A
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_A })]);
       await service.pollDataRetention();
 
       // Second poll: PROVIDER_A removed from active list
@@ -1152,7 +1152,7 @@ describe("DataRetentionService", () => {
         { address: PROVIDER_A, name: "Provider A", providerId: 1, isApproved: true },
       ]);
 
-      subgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
+      pdpSubgraphServiceMock.fetchProvidersWithDatasets.mockResolvedValueOnce([makeProvider({ address: PROVIDER_B })]);
 
       await service.pollDataRetention();
 
diff --git a/apps/backend/src/data-retention/data-retention.service.ts b/apps/backend/src/data-retention/data-retention.service.ts
index 1422bbfd..c6ece7b5 100644
--- a/apps/backend/src/data-retention/data-retention.service.ts
+++ b/apps/backend/src/data-retention/data-retention.service.ts
@@ -11,8 +11,8 @@ import { IConfig } from "../config/app.config.js";
 import { DataRetentionBaseline } from "../database/entities/data-retention-baseline.entity.js";
 import { StorageProvider } from "../database/entities/storage-provider.entity.js";
 import { buildCheckMetricLabels, CheckMetricLabels } from "../metrics-prometheus/check-metric-labels.js";
-import { SubgraphService } from "../subgraph/subgraph.service.js";
-import { type ProviderDataSetResponse } from "../subgraph/types.js";
+import { PDPSubgraphService } from "../pdp-subgraph/pdp-subgraph.service.js";
+import { type ProviderDataSetResponse } from "../pdp-subgraph/types.js";
 import { WalletSdkService } from "../wallet-sdk/wallet-sdk.service.js";
 import { type PDPProviderEx } from "../wallet-sdk/wallet-sdk.types.js";
 
@@ -41,7 +41,7 @@ export class DataRetentionService {
   constructor(
     private readonly configService: ConfigService<IConfig, true>,
     private readonly walletSdkService: WalletSdkService,
-    private readonly subgraphService: SubgraphService,
+    private readonly pdpSubgraphService: PDPSubgraphService,
     @InjectRepository(DataRetentionBaseline)
     private readonly baselineRepository: Repository<DataRetentionBaseline>,
     @InjectRepository(StorageProvider)
@@ -59,10 +59,10 @@ export class DataRetentionService {
    * challenge delta since the last poll.
    */
   async pollDataRetention(): Promise<void> {
-    const subgraphEndpoint = this.configService.get("blockchain").subgraphEndpoint;
-    if (!subgraphEndpoint) {
+    const pdpSubgraphEndpoint = this.configService.get("blockchain").pdpSubgraphEndpoint;
+    if (!pdpSubgraphEndpoint) {
       this.logger.warn({
-        event: "subgraph_endpoint_not_configured",
+        event: "pdp_subgraph_endpoint_not_configured",
         message: "No PDP subgraph endpoint configured",
       });
       return;
@@ -75,7 +75,7 @@ export class DataRetentionService {
     }
 
     try {
-      const subgraphMeta = await this.subgraphService.fetchSubgraphMeta();
+      const subgraphMeta = await this.pdpSubgraphService.fetchSubgraphMeta();
       const allProviderInfos = this.walletSdkService.getTestingProviders();
       const spBlocklists = this.configService.get("spBlocklists");
       const providerInfos = allProviderInfos?.filter((p) => !isSpBlocked(spBlocklists, p.serviceProvider, p.id));
@@ -104,7 +104,7 @@ export class DataRetentionService {
         );
 
         try {
-          const providersFromSubgraph = await this.subgraphService.fetchProvidersWithDatasets({
+          const providersFromSubgraph = await this.pdpSubgraphService.fetchProvidersWithDatasets({
             blockNumber,
             addresses: batchAddresses,
           });
diff --git a/apps/backend/src/pdp-subgraph/pdp-subgraph.module.ts b/apps/backend/src/pdp-subgraph/pdp-subgraph.module.ts
new file mode 100644
index 00000000..6e084fc1
--- /dev/null
+++ b/apps/backend/src/pdp-subgraph/pdp-subgraph.module.ts
@@ -0,0 +1,8 @@
+import { Module } from "@nestjs/common";
+import { PDPSubgraphService } from "./pdp-subgraph.service.js";
+
+@Module({
+  providers: [PDPSubgraphService],
+  exports: [PDPSubgraphService],
+})
+export class PdpSubgraphModule {}
diff --git a/apps/backend/src/pdp-subgraph/pdp-subgraph.service.spec.ts b/apps/backend/src/pdp-subgraph/pdp-subgraph.service.spec.ts
new file mode 100644
index 00000000..cd3a1ea8
--- /dev/null
+++ b/apps/backend/src/pdp-subgraph/pdp-subgraph.service.spec.ts
@@ -0,0 +1,694 @@
+import type { ConfigService } from "@nestjs/config";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { IConfig } from "../config/app.config.js";
+import { PDPSubgraphService } from "./pdp-subgraph.service.js";
+
+const VALID_ADDRESS = "0xd8da6bf26964af9d7eed9e03e53415d37aa96045" as const;
+const SUBGRAPH_ENDPOINT = "https://api.thegraph.com/subgraphs/filecoin/pdp" as const;
+
+const makeSubgraphResponse = (providers: Record<string, unknown>[] = []) => ({
+  data: { providers },
+});
+
+const makeValidProvider = (overrides: Record<string, unknown> = {}) => ({
+  address: VALID_ADDRESS,
+  totalFaultedPeriods: "10",
+  totalProvingPeriods: "100",
+  proofSets: [
+    {
+      totalFaultedPeriods: "2",
+      currentDeadlineCount: "5",
+      nextDeadline: "1000",
+      maxProvingPeriod: "100",
+    },
+  ],
+  ...overrides,
+});
+
+const makeSubgraphMetaResponse = (blockNumber = 12345) => ({
+  data: {
+    _meta: {
+      block: {
+        number: blockNumber,
+      },
+    },
+  },
+});
+
+describe("PDPSubgraphService", () => {
+  let service: PDPSubgraphService;
+  let fetchMock: ReturnType<typeof vi.fn>;
+
+  beforeEach(() => {
+    const configService = {
+      get: vi.fn((key: keyof IConfig) => {
+        if (key === "blockchain") {
+          return { pdpSubgraphEndpoint: SUBGRAPH_ENDPOINT };
+        }
+        return undefined;
+      }),
+    } as unknown as ConfigService<IConfig, true>;
+
+    service = new PDPSubgraphService(configService);
+
+    fetchMock = vi.fn();
+    vi.stubGlobal("fetch", fetchMock);
+
+    vi.useFakeTimers();
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+    vi.useRealTimers();
+  });
+
+  describe("fetchProvidersWithDatasets", () => {
+    it("fetches and returns validated providers with bigint fields", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphResponse([makeValidProvider()]),
+      });
+
+      const providers = await service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+
+      expect(fetchMock).toHaveBeenCalledWith(SUBGRAPH_ENDPOINT, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: expect.stringContaining('"addresses"'),
+      });
+
+      expect(providers).toHaveLength(1);
+      expect(providers[0].address).toBe(VALID_ADDRESS);
+      expect(providers[0].totalFaultedPeriods).toBe(10n);
+      expect(providers[0].totalProvingPeriods).toBe(100n);
+      expect(providers[0].proofSets[0].maxProvingPeriod).toBe(100n);
+    });
+
+    it("returns empty array when no providers exist", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphResponse([]),
+      });
+
+      const providers = await service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+      expect(providers).toEqual([]);
+    });
+
+    it("returns empty array when addresses array is empty", async () => {
+      const providers = await service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [],
+      });
+
+      expect(providers).toEqual([]);
+      expect(fetchMock).not.toHaveBeenCalled();
+    });
+
+    it("throws on HTTP error response", async () => {
+      fetchMock.mockResolvedValue({
+        ok: false,
+        status: 500,
+      });
+
+      const promise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+
+      // This stops Node.js from throwing an Unhandled Rejection during fast-forward.
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      await expect(promise).rejects.toThrow("Failed to fetch provider data after 3 attempts");
+      expect(fetchMock).toHaveBeenCalledTimes(3);
+    });
+
+    it("throws on GraphQL errors in response", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: null,
+          errors: [{ message: "Query failed" }],
+        }),
+      });
+
+      const promise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Failed to fetch provider data after 3 attempts");
+      expect(fetchMock).toHaveBeenCalledTimes(3);
+    });
+
+    it("throws on network failure", async () => {
+      fetchMock.mockRejectedValueOnce(new Error("Network error"));
+
+      const promise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Failed to fetch provider data after 3 attempts");
+      expect(fetchMock).toHaveBeenCalledTimes(3); // Initial + 2 retries = 3 total
+    });
+
+    it("throws immediately on validation error without retrying", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: { providers: [{ address: "invalid" }] },
+        }),
+      });
+
+      await expect(
+        service.fetchProvidersWithDatasets({
+          blockNumber: 5000,
+          addresses: [VALID_ADDRESS],
+        }),
+      ).rejects.toThrow("Data validation failed");
+
+      // Should only be called once - no retries for validation errors
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    it("throws immediately when response data is missing required fields", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: { providers: [{ address: VALID_ADDRESS }] }, // Missing required fields
+        }),
+      });
+
+      await expect(
+        service.fetchProvidersWithDatasets({
+          blockNumber: 5000,
+          addresses: [VALID_ADDRESS],
+        }),
+      ).rejects.toThrow("Data validation failed");
+
+      // Should only be called once - no retries for validation errors
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    it("sends blockNumber as string in the GraphQL variables", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphResponse([makeValidProvider()]),
+      });
+
+      await service.fetchProvidersWithDatasets({
+        blockNumber: 12345,
+        addresses: [VALID_ADDRESS],
+      });
+
+      const body = JSON.parse(fetchMock.mock.calls[0][1].body);
+      expect(body.variables.blockNumber).toBe("12345");
+    });
+
+    it("retries network errors but not validation errors", async () => {
+      // First attempt: network error (should retry)
+      fetchMock.mockRejectedValueOnce(new Error("Network timeout"));
+
+      // Second attempt: succeeds but validation fails (should not retry)
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: { providers: [{ address: "invalid" }] },
+        }),
+      });
+
+      const promise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Data validation failed");
+
+      // Should be called twice: initial network error + 1 retry that fails validation
+      expect(fetchMock).toHaveBeenCalledTimes(2);
+    });
+
+    it("sends addresses array in the GraphQL variables", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphResponse([makeValidProvider()]),
+      });
+
+      const addresses = [VALID_ADDRESS, "0xAb5801a7D398351b8bE11C439e05C5B3259aeC9B"];
+      await service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses,
+      });
+
+      const body = JSON.parse(fetchMock.mock.calls[0][1].body);
+      expect(body.variables.addresses).toEqual(addresses);
+    });
+
+    it("batches large address lists into chunks of MAX_PROVIDERS_PER_QUERY", async () => {
+      // Create 150 addresses (should be split into 2 batches: 100 + 50)
+      const addresses = Array.from({ length: 150 }, (_, i) => `0x${i.toString().padStart(40, "0")}`);
+
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphResponse([]),
+      });
+
+      await service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses,
+      });
+
+      // Should make 2 requests
+      expect(fetchMock).toHaveBeenCalledTimes(2);
+    });
+
+    it("retries failed requests with exponential backoff", async () => {
+      // Fail on first attempt, succeed on second attempt (1 retry)
+      fetchMock.mockRejectedValueOnce(new Error("Network timeout")).mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphResponse([makeValidProvider()]),
+      });
+
+      const promise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses: [VALID_ADDRESS],
+      });
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to resolve
+      const providers = await promise;
+
+      expect(fetchMock).toHaveBeenCalledTimes(2); // Initial attempt + 1 retry
+      expect(providers).toHaveLength(1);
+    });
+
+    it("processes batches with concurrency control", async () => {
+      // Create 120 addresses (should be 2 batches of 100 each, but processed with concurrency limit)
+      const addresses = Array.from({ length: 120 }, (_, i) => `0x${i.toString().padStart(40, "0")}`);
+
+      let concurrentCalls = 0;
+      let maxConcurrentCalls = 0;
+
+      fetchMock.mockImplementation(async () => {
+        concurrentCalls++;
+        maxConcurrentCalls = Math.max(maxConcurrentCalls, concurrentCalls);
+        await new Promise((resolve) => setTimeout(resolve, 10));
+        concurrentCalls--;
+        return {
+          ok: true,
+          json: async () => makeSubgraphResponse([]),
+        };
+      });
+
+      const fetchPromise = service.fetchProvidersWithDatasets({
+        blockNumber: 5000,
+        addresses,
+      });
+
+      await vi.runAllTimersAsync();
+
+      await fetchPromise;
+
+      // Should respect MAX_CONCURRENT_REQUESTS (50)
+      expect(maxConcurrentCalls).toBeLessThanOrEqual(50);
+      expect(fetchMock).toHaveBeenCalledTimes(2);
+    });
+  });
+
+  describe("fetchSubgraphMeta", () => {
+    it("fetches and returns subgraph metadata with block number", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      const meta = await service.fetchSubgraphMeta();
+
+      expect(fetchMock).toHaveBeenCalledWith(SUBGRAPH_ENDPOINT, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: expect.stringContaining("GetSubgraphMeta"),
+      });
+
+      expect(meta).toEqual({
+        _meta: {
+          block: {
+            number: 12345,
+          },
+        },
+      });
+    });
+
+    it("throws when PDP subgraph endpoint is not configured", async () => {
+      const configService = {
+        get: vi.fn(() => ({ pdpSubgraphEndpoint: "" })),
+      } as unknown as ConfigService<IConfig, true>;
+
+      const serviceWithoutEndpoint = new PDPSubgraphService(configService);
+
+      await expect(serviceWithoutEndpoint.fetchSubgraphMeta()).rejects.toThrow("No PDP subgraph endpoint configured");
+    });
+
+    it("throws on HTTP error response", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: false,
+        status: 500,
+        statusText: "Internal Server Error",
+      });
+
+      const promise = service.fetchSubgraphMeta();
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Failed to fetch subgraph metadata after 3 attempts");
+    });
+
+    it("throws on GraphQL errors in response", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          errors: [{ message: "Query timeout" }],
+        }),
+      });
+
+      const promise = service.fetchSubgraphMeta();
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Failed to fetch subgraph metadata after 3 attempts");
+    });
+
+    it("throws on validation failure without retry", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: {
+            _meta: {
+              block: {
+                number: "not-a-number", // Invalid - should be number
+              },
+            },
+          },
+        }),
+      });
+
+      await expect(service.fetchSubgraphMeta()).rejects.toThrow("Data validation failed");
+      expect(fetchMock).toHaveBeenCalledTimes(1); // Should not retry validation errors
+    });
+
+    it("throws on missing required fields", async () => {
+      fetchMock.mockResolvedValueOnce({
+        ok: true,
+        json: async () => ({
+          data: {
+            _meta: {
+              block: {
+                number: undefined, // missing required field
+              },
+            },
+          },
+        }),
+      });
+
+      await expect(service.fetchSubgraphMeta()).rejects.toThrow("Data validation failed");
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    it("retries on network failures with exponential backoff", async () => {
+      fetchMock.mockRejectedValueOnce(new Error("Network timeout")).mockResolvedValueOnce({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      const promise = service.fetchSubgraphMeta();
+
+      await vi.runAllTimersAsync();
+
+      // Now await the second promise to resolve
+      const meta = await promise;
+
+      expect(fetchMock).toHaveBeenCalledTimes(2); // Initial + 1 retry
+      expect(meta._meta.block.number).toBe(12345);
+    });
+
+    it("throws after MAX_RETRIES attempts on persistent network errors", async () => {
+      fetchMock.mockRejectedValue(new Error("Network timeout"));
+
+      const promise = service.fetchSubgraphMeta();
+      promise.catch(() => {});
+
+      await vi.runAllTimersAsync();
+
+      // Now await the final promise to catch the expected error
+      await expect(promise).rejects.toThrow("Failed to fetch subgraph metadata after 3 attempts");
+      expect(fetchMock).toHaveBeenCalledTimes(3);
+    });
+  });
+
+  describe("enforceRateLimit (sliding window)", () => {
+    it("allows requests when under the rate limit", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      const startTime = Date.now();
+
+      // Make 5 requests - should all go through immediately
+      const promises = Array.from({ length: 5 }, () => service.fetchSubgraphMeta());
+
+      await Promise.all(promises);
+
+      const endTime = Date.now();
+      const elapsed = endTime - startTime;
+
+      // Should complete quickly (no waiting)
+      expect(elapsed).toBeLessThan(100);
+      expect(fetchMock).toHaveBeenCalledTimes(5);
+    });
+
+    it("enforces rate limit when exceeding MAX_CONCURRENT_REQUESTS", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Fill up the rate limit window with 50 requests
+      const initialPromises = Array.from({ length: 50 }, () => service.fetchSubgraphMeta());
+      await Promise.all(initialPromises);
+
+      fetchMock.mockClear();
+
+      // Try to make one more request - should wait for oldest to expire
+      const promise = service.fetchSubgraphMeta();
+
+      // Advance past the 10 second window + buffer
+      await vi.advanceTimersByTimeAsync(10010);
+      await promise;
+
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    it("throws error when requestCount exceeds MAX_CONCURRENT_REQUESTS", async () => {
+      // Access private method via type assertion for testing
+      const enforceRateLimit = (service as any).enforceRateLimit.bind(service);
+
+      await expect(enforceRateLimit(51)).rejects.toThrow("Cannot request 51 items; exceeds rate limit window of 50");
+    });
+
+    it("correctly calculates wait time for multiple required slots", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Fill 48 slots
+      const initialPromises = Array.from({ length: 48 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(initialPromises);
+
+      fetchMock.mockClear();
+
+      // Request 5 more slots (need 3 to free up: 5 - 2 available = 3)
+      // Should wait for the 3rd oldest timestamp to expire
+      const enforceRateLimit = (service as any).enforceRateLimit.bind(service);
+      const promise = enforceRateLimit(5);
+
+      // The 3rd request should expire at ~10 seconds
+      await vi.advanceTimersByTimeAsync(10010);
+      await promise;
+
+      // Verify slots were reserved
+      // After 10s, the first 48 expired, so we should only have the 5 new ones
+      const timestamps = (service as any).requestTimestamps;
+      expect(timestamps.length).toBe(5); // Only the 5 new slots remain
+    });
+
+    it("handles sliding window correctly as old requests expire", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Make 30 requests at t=0
+      const batch1 = Array.from({ length: 30 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(batch1);
+
+      // Advance 5 seconds
+      await vi.advanceTimersByTimeAsync(5000);
+
+      // Make 20 more requests at t=5000
+      const batch2 = Array.from({ length: 20 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(batch2);
+
+      // Now at t=5000, we have 50 requests in the window
+      // Advance to t=10100 - first 30 should expire
+      await vi.advanceTimersByTimeAsync(5100);
+
+      fetchMock.mockClear();
+
+      // Should be able to make 30 more requests immediately
+      const batch3 = Array.from({ length: 30 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(batch3);
+
+      expect(fetchMock).toHaveBeenCalledTimes(30);
+    });
+
+    it("adds 10ms buffer to prevent timing edge cases", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Fill the window
+      const initialPromises = Array.from({ length: 50 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(initialPromises);
+
+      fetchMock.mockClear();
+
+      const promise = service.fetchSubgraphMeta();
+
+      // Advance past the window + buffer
+      await vi.advanceTimersByTimeAsync(10010);
+      await promise;
+
+      expect(fetchMock).toHaveBeenCalledTimes(1);
+    });
+
+    it("recursively waits when multiple batches need to expire", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Fill window with 50 requests
+      const batch1 = Array.from({ length: 50 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(batch1);
+
+      // Advance 5 seconds
+      await vi.advanceTimersByTimeAsync(5000);
+
+      fetchMock.mockClear();
+
+      // Try to request 30 slots (need to wait for 30 to expire)
+      const enforceRateLimit = (service as any).enforceRateLimit.bind(service);
+      const promise = enforceRateLimit(30);
+
+      // First recursion: wait for 30th oldest to expire (~10s from start)
+      await vi.advanceTimersByTimeAsync(5010);
+
+      // Should recursively check and complete
+      await promise;
+
+      const timestamps = (service as any).requestTimestamps;
+      // After 10s from start, all 50 initial requests expired, only 30 new ones remain
+      expect(timestamps.length).toBe(30); // Only the 30 new slots
+    });
+
+    it("reserves slots immediately to prevent race conditions", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Fill 47 slots
+      const initial = Array.from({ length: 47 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(initial);
+
+      // Now we have 3 available slots
+      const enforceRateLimit = (service as any).enforceRateLimit.bind(service);
+
+      // Request 3 slots - should succeed immediately
+      await enforceRateLimit(3);
+
+      const timestamps = (service as any).requestTimestamps;
+      expect(timestamps.length).toBe(50); // 47 + 3 = 50 (full)
+
+      // Try to request 1 more - should need to wait
+      const promise = enforceRateLimit(1);
+
+      // Advance time to free up a slot
+      await vi.advanceTimersByTimeAsync(10010);
+      await promise;
+
+      // After waiting, the old slots expired and new one was added
+      const finalTimestamps = (service as any).requestTimestamps;
+      expect(finalTimestamps.length).toBe(1); // Only the new request remains
+    });
+
+    it("filters out expired timestamps from the sliding window", async () => {
+      fetchMock.mockResolvedValue({
+        ok: true,
+        json: async () => makeSubgraphMetaResponse(12345),
+      });
+
+      // Make 20 requests
+      const batch1 = Array.from({ length: 20 }, () => service.fetchSubgraphMeta());
+      await vi.runAllTimersAsync();
+      await Promise.all(batch1);
+
+      // Advance past the window
+      await vi.advanceTimersByTimeAsync(11000);
+
+      fetchMock.mockClear();
+
+      // Make another request - should have full window available
+      await service.fetchSubgraphMeta();
+
+      const timestamps = (service as any).requestTimestamps;
+      // Should only have 1 timestamp (the new one), old ones filtered out
+      expect(timestamps.length).toBe(1);
+    });
+  });
+});
diff --git a/apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts b/apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts
new file mode 100644
index 00000000..aedd8bce
--- /dev/null
+++ b/apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts
@@ -0,0 +1,306 @@
+import { Injectable, Logger } from "@nestjs/common";
+import { ConfigService } from "@nestjs/config";
+import { toStructuredError } from "../common/logging.js";
+import type { IBlockchainConfig, IConfig } from "../config/app.config.js";
+import { Queries } from "./queries.js";
+import type { GraphQLResponse, ProviderDataSetResponse, ProvidersWithDataSetsOptions, SubgraphMeta } from "./types.js";
+import { validateProviderDataSetResponse, validateSubgraphMetaResponse } from "./types.js";
+
+/**
+ * Error thrown when data validation fails.
+ * These errors should not be retried as they indicate schema/data issues.
+ */
+class ValidationError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "ValidationError";
+    if (Error.captureStackTrace) {
+      Error.captureStackTrace(this, ValidationError);
+    }
+  }
+}
+
+@Injectable()
+export class PDPSubgraphService {
+  private readonly logger: Logger = new Logger(PDPSubgraphService.name);
+  private readonly blockchainConfig: IBlockchainConfig;
+
+  private static readonly MAX_PROVIDERS_PER_QUERY = 100;
+  private static readonly MAX_CONCURRENT_REQUESTS = 50;
+  private static readonly RATE_LIMIT_WINDOW_MS = 10000;
+  private static readonly MAX_RETRIES = 3;
+  private static readonly INITIAL_RETRY_DELAY_MS = 1000;
+
+  private requestTimestamps: number[] = [];
+
+  constructor(private readonly configService: ConfigService<IConfig, true>) {
+    this.blockchainConfig = this.configService.get<IBlockchainConfig>("blockchain");
+  }
+
+  /**
+   * Fetch subgraph metadata including the latest indexed block number
+   *
+   * @param attempt - Current retry attempt number (default: 1)
+   * @returns Subgraph metadata with block number
+   * @throws Error if endpoint is not configured or after MAX_RETRIES attempts
+   */
+  async fetchSubgraphMeta(attempt: number = 1): Promise<SubgraphMeta> {
+    if (!this.blockchainConfig.pdpSubgraphEndpoint) {
+      throw new Error("No PDP subgraph endpoint configured");
+    }
+
+    try {
+      await this.enforceRateLimit();
+
+      const response = await fetch(this.blockchainConfig.pdpSubgraphEndpoint, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          query: Queries.GET_SUBGRAPH_META,
+        }),
+      });
+
+      if (!response.ok) {
+        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+      }
+
+      const result = (await response.json()) as GraphQLResponse;
+
+      if (result.errors) {
+        const errorMessage = result.errors?.[0]?.message || "Unknown GraphQL error";
+        throw new Error(`GraphQL error: ${errorMessage}`);
+      }
+      let validated: SubgraphMeta;
+      try {
+        validated = validateSubgraphMetaResponse(result.data);
+      } catch (validationError) {
+        const errorMessage = validationError instanceof Error ? validationError.message : "Unknown validation error";
+        throw new ValidationError(`Data validation failed: ${errorMessage}`);
+      }
+
+      return validated;
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : "Unknown error";
+
+      // No need to retry on validation errors - they indicate schema/data issues, not transient failures
+      if (error instanceof ValidationError) {
+        this.logger.error({
+          event: "subgraph_meta_validation_failed",
+          message: "Subgraph data validation failed",
+          error: toStructuredError(error),
+        });
+        throw error;
+      }
+
+      // Retry on network/HTTP errors
+      if (attempt < PDPSubgraphService.MAX_RETRIES) {
+        const delay = PDPSubgraphService.INITIAL_RETRY_DELAY_MS * (1 << (attempt - 1));
+        this.logger.warn({
+          event: "subgraph_meta_request_retry",
+          message: "Subgraph meta request failed. Retrying...",
+          attempt,
+          maxRetries: PDPSubgraphService.MAX_RETRIES,
+          retryDelayMs: delay,
+          error: toStructuredError(error),
+        });
+        await new Promise((resolve) => setTimeout(resolve, delay));
+        return this.fetchSubgraphMeta(attempt + 1);
+      }
+
+      this.logger.error({
+        event: "subgraph_meta_request_failed",
+        message: "Subgraph meta request failed after maximum retries",
+        maxRetries: PDPSubgraphService.MAX_RETRIES,
+        error: toStructuredError(error),
+      });
+      throw new Error(
+        `Failed to fetch subgraph metadata after ${PDPSubgraphService.MAX_RETRIES} attempts: ${errorMessage}`,
+      );
+    }
+  }
+
+  /**
+   * Fetch provider-level totals from subgraph with batching, pagination, and rate limiting
+   *
+   * @param options - Options containing block number and provider addresses
+   * @returns Array of providers with their data sets currently proving
+   */
+  async fetchProvidersWithDatasets(
+    options: ProvidersWithDataSetsOptions,
+  ): Promise<ProviderDataSetResponse["providers"]> {
+    const { blockNumber, addresses } = options;
+
+    if (addresses.length === 0) {
+      return [];
+    }
+
+    if (addresses.length <= PDPSubgraphService.MAX_PROVIDERS_PER_QUERY) {
+      return this.fetchWithRetry(blockNumber, addresses);
+    }
+
+    return this.fetchMultipleBatchesWithRateLimit(blockNumber, addresses);
+  }
+
+  /**
+   * Fetch multiple batches with rate limiting and concurrency control
+   */
+  private async fetchMultipleBatchesWithRateLimit(
+    blockNumber: number,
+    addresses: string[],
+  ): Promise<ProviderDataSetResponse["providers"]> {
+    const batches: string[][] = [];
+    for (let i = 0; i < addresses.length; i += PDPSubgraphService.MAX_PROVIDERS_PER_QUERY) {
+      const addressesLimit = Math.min(addresses.length, i + PDPSubgraphService.MAX_PROVIDERS_PER_QUERY);
+      batches.push(addresses.slice(i, addressesLimit));
+    }
+
+    const allProviders: ProviderDataSetResponse["providers"] = [];
+
+    for (let i = 0; i < batches.length; i += PDPSubgraphService.MAX_CONCURRENT_REQUESTS) {
+      const batchGroup = batches.slice(i, i + PDPSubgraphService.MAX_CONCURRENT_REQUESTS);
+
+      const results = await Promise.all(batchGroup.map((batch) => this.fetchWithRetry(blockNumber, batch)));
+
+      allProviders.push(...results.flat());
+    }
+
+    return allProviders;
+  }
+
+  /**
+   * Fetch with exponential backoff retry mechanism
+   * Assuming initial request to be first attempt
+   */
+  private async fetchWithRetry(
+    blockNumber: number,
+    addresses: string[],
+    attempt: number = 1,
+  ): Promise<ProviderDataSetResponse["providers"]> {
+    if (!this.blockchainConfig.pdpSubgraphEndpoint) {
+      throw new Error("No PDP subgraph endpoint configured");
+    }
+
+    const variables = {
+      blockNumber: blockNumber.toString(),
+      addresses,
+    };
+
+    try {
+      await this.enforceRateLimit();
+
+      const response = await fetch(this.blockchainConfig.pdpSubgraphEndpoint, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          query: Queries.GET_PROVIDERS_WITH_DATASETS,
+          variables,
+        }),
+      });
+
+      if (!response.ok) {
+        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+      }
+
+      const result = (await response.json()) as GraphQLResponse;
+
+      if (result.errors) {
+        const errorMessage = result.errors?.[0]?.message || "Unknown GraphQL error";
+        throw new Error(`GraphQL error: ${errorMessage}`);
+      }
+
+      let validated: ProviderDataSetResponse;
+      try {
+        validated = validateProviderDataSetResponse(result.data);
+      } catch (validationError) {
+        const errorMessage = validationError instanceof Error ? validationError.message : "Unknown validation error";
+        throw new ValidationError(`Data validation failed: ${errorMessage}`);
+      }
+
+      return validated.providers;
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : "Unknown error";
+
+      // No need to retry on validation errors - they indicate schema/data issues, not transient failures
+      if (error instanceof ValidationError) {
+        this.logger.error({
+          event: "subgraph_provider_data_validation_failed",
+          message: "Subgraph data validation failed",
+          error: toStructuredError(error),
+        });
+        throw error;
+      }
+
+      // Retry on network/HTTP errors
+      if (attempt < PDPSubgraphService.MAX_RETRIES) {
+        const delay = PDPSubgraphService.INITIAL_RETRY_DELAY_MS * (1 << (attempt - 1));
+        this.logger.warn({
+          event: "subgraph_provider_request_retry",
+          message: "Subgraph provider request failed. Retrying...",
+          attempt,
+          maxRetries: PDPSubgraphService.MAX_RETRIES,
+          retryDelayMs: delay,
+          addressCount: addresses.length,
+          error: toStructuredError(error),
+        });
+        await new Promise((resolve) => setTimeout(resolve, delay));
+        return this.fetchWithRetry(blockNumber, addresses, attempt + 1);
+      }
+
+      this.logger.error({
+        event: "subgraph_provider_request_failed",
+        message: "Subgraph provider request failed after maximum retries",
+        maxRetries: PDPSubgraphService.MAX_RETRIES,
+        blockNumber,
+        addressCount: addresses.length,
+        error: toStructuredError(error),
+      });
+      throw new Error(
+        `Failed to fetch provider data after ${PDPSubgraphService.MAX_RETRIES} attempts: ${errorMessage}`,
+      );
+    }
+  }
+
+  /**
+   * Enforce rate limiting: max 50 requests per 10 seconds
+   * This rate limit is applied by Goldsky on their public endpoints
+   * Read more here: https://docs.goldsky.com/subgraphs/graphql-endpoints#public-endpoints
+   */
+  private async enforceRateLimit(requestCount: number = 1): Promise<void> {
+    if (requestCount > PDPSubgraphService.MAX_CONCURRENT_REQUESTS) {
+      throw new Error(
+        `Cannot request ${requestCount} items; exceeds rate limit window of ${PDPSubgraphService.MAX_CONCURRENT_REQUESTS}`,
+      );
+    }
+
+    const now = Date.now();
+    const windowStart = now - PDPSubgraphService.RATE_LIMIT_WINDOW_MS;
+
+    this.requestTimestamps = this.requestTimestamps.filter((timestamp) => timestamp > windowStart);
+
+    const availableSlots = PDPSubgraphService.MAX_CONCURRENT_REQUESTS - this.requestTimestamps.length;
+
+    if (requestCount > availableSlots) {
+      const requiredSlots = requestCount - availableSlots;
+
+      const index = Math.min(this.requestTimestamps.length, requiredSlots) - 1;
+      const oldestTimestamp = this.requestTimestamps[index] || now;
+
+      // wait time with 10ms buffer
+      const waitTime = oldestTimestamp + PDPSubgraphService.RATE_LIMIT_WINDOW_MS - now + 10;
+
+      if (waitTime > 0) {
+        await new Promise((resolve) => setTimeout(resolve, waitTime));
+        return this.enforceRateLimit(requestCount);
+      }
+    }
+
+    // Reserve the slots NOW
+    for (let i = 0; i < requestCount; i++) {
+      this.requestTimestamps.push(Date.now());
+    }
+  }
+}
diff --git a/apps/backend/src/pdp-subgraph/queries.ts b/apps/backend/src/pdp-subgraph/queries.ts
new file mode 100644
index 00000000..a21a3991
--- /dev/null
+++ b/apps/backend/src/pdp-subgraph/queries.ts
@@ -0,0 +1,24 @@
+export const Queries = {
+  GET_PROVIDERS_WITH_DATASETS: `
+      query GetProvidersWithDataSet($addresses: [Bytes!], $blockNumber: BigInt!) {
+        providers(where: {address_in: $addresses}) {
+          address
+          totalFaultedPeriods
+          totalProvingPeriods
+          proofSets (where: {nextDeadline_lt: $blockNumber, status: PROVING}) {
+            nextDeadline
+            maxProvingPeriod
+          }
+        }
+      }
+    `,
+  GET_SUBGRAPH_META: `
+    query GetSubgraphMeta {
+      _meta {
+        block {
+          number
+        }
+      }
+    }
+  `,
+} as const;
diff --git a/apps/backend/src/pdp-subgraph/types.spec.ts b/apps/backend/src/pdp-subgraph/types.spec.ts
new file mode 100644
index 00000000..02e6eee0
--- /dev/null
+++ b/apps/backend/src/pdp-subgraph/types.spec.ts
@@ -0,0 +1,245 @@
+import { describe, expect, it } from "vitest";
+import { validateProviderDataSetResponse, validateSubgraphMetaResponse } from "./types.js";
+
+// Subgraph stores addresses in lowercase
+const VALID_ADDRESS = "0xd8da6bf26964af9d7eed9e03e53415d37aa96045" as const;
+
+const makeValidProvider = (overrides: Record<string, unknown> = {}) => ({
+  address: VALID_ADDRESS,
+  totalFaultedPeriods: "10",
+  totalProvingPeriods: "100",
+  proofSets: [
+    {
+      nextDeadline: "1000",
+      maxProvingPeriod: "100",
+    },
+  ],
+  ...overrides,
+});
+
+const makeValidResponse = (providers = [makeValidProvider()]) => ({
+  providers,
+});
+
+describe("validateProviderDataSetResponse", () => {
+  it("validates and transforms a well-formed response", () => {
+    const result = validateProviderDataSetResponse(makeValidResponse());
+
+    expect(result.providers).toHaveLength(1);
+    const provider = result.providers[0];
+    expect(provider.address).toBe(VALID_ADDRESS);
+    expect(provider.totalFaultedPeriods).toBe(10n);
+    expect(provider.totalProvingPeriods).toBe(100n);
+
+    const proofSet = provider.proofSets[0];
+    expect(proofSet.nextDeadline).toBe(1000n);
+    expect(proofSet.maxProvingPeriod).toBe(100n);
+  });
+
+  it("converts string numbers to bigint", () => {
+    const result = validateProviderDataSetResponse(
+      makeValidResponse([
+        makeValidProvider({
+          totalFaultedPeriods: "999999999999999999",
+          totalProvingPeriods: "1000000000000000000",
+        }),
+      ]),
+    );
+
+    expect(typeof result.providers[0].totalFaultedPeriods).toBe("bigint");
+    expect(result.providers[0].totalFaultedPeriods).toBe(999999999999999999n);
+    expect(result.providers[0].totalProvingPeriods).toBe(1000000000000000000n);
+  });
+
+  it("accepts an empty providers array", () => {
+    const result = validateProviderDataSetResponse({ providers: [] });
+    expect(result.providers).toEqual([]);
+  });
+
+  it("accepts a provider with empty proofSets", () => {
+    const result = validateProviderDataSetResponse(makeValidResponse([makeValidProvider({ proofSets: [] })]));
+    expect(result.providers[0].proofSets).toEqual([]);
+  });
+
+  it("preserves unknown fields (schema uses .unknown(true))", () => {
+    const result = validateProviderDataSetResponse(makeValidResponse([makeValidProvider({ extraField: "hello" })]));
+    expect((result.providers[0] as Record<string, unknown>).extraField).toBe("hello");
+  });
+
+  it("throws on missing providers field", () => {
+    expect(() => validateProviderDataSetResponse({})).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on null input", () => {
+    expect(() => validateProviderDataSetResponse(null)).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on missing required provider fields", () => {
+    expect(() =>
+      validateProviderDataSetResponse({
+        providers: [{ address: VALID_ADDRESS }],
+      }),
+    ).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on invalid Ethereum address", () => {
+    expect(() =>
+      validateProviderDataSetResponse(makeValidResponse([makeValidProvider({ address: "not-an-address" })])),
+    ).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on non-numeric string for bigint fields", () => {
+    expect(() =>
+      validateProviderDataSetResponse(makeValidResponse([makeValidProvider({ totalFaultedPeriods: "abc" })])),
+    ).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on negative number string for bigint fields", () => {
+    expect(() =>
+      validateProviderDataSetResponse(makeValidResponse([makeValidProvider({ totalFaultedPeriods: "-1" })])),
+    ).toThrow("Invalid provider dataset response format");
+  });
+
+  it("throws on missing proofSet fields", () => {
+    expect(() =>
+      validateProviderDataSetResponse(
+        makeValidResponse([
+          makeValidProvider({
+            proofSets: [{ totalFaultedPeriods: "1" }],
+          }),
+        ]),
+      ),
+    ).toThrow("Invalid provider dataset response format");
+  });
+
+  it("validates multiple providers in a single response", () => {
+    const provider1 = makeValidProvider({ address: VALID_ADDRESS, totalFaultedPeriods: "5" });
+    const provider2 = makeValidProvider({
+      address: "0xAb5801a7D398351b8bE11C439e05C5B3259aeC9B",
+      totalFaultedPeriods: "15",
+    });
+
+    const result = validateProviderDataSetResponse(makeValidResponse([provider1, provider2]));
+
+    expect(result.providers).toHaveLength(2);
+    expect(result.providers[0].totalFaultedPeriods).toBe(5n);
+    expect(result.providers[1].totalFaultedPeriods).toBe(15n);
+  });
+
+  it("handles zero values correctly", () => {
+    const result = validateProviderDataSetResponse(
+      makeValidResponse([
+        makeValidProvider({
+          totalFaultedPeriods: "0",
+          totalProvingPeriods: "0",
+          proofSets: [
+            {
+              nextDeadline: "0",
+              maxProvingPeriod: "0",
+            },
+          ],
+        }),
+      ]),
+    );
+
+    expect(result.providers[0].totalFaultedPeriods).toBe(0n);
+    expect(result.providers[0].totalProvingPeriods).toBe(0n);
+    expect(result.providers[0].proofSets[0].maxProvingPeriod).toBe(0n);
+  });
+});
+
+describe("validateSubgraphMetaResponse", () => {
+  it("validates a well-formed subgraph meta response", () => {
+    const input = {
+      _meta: {
+        block: {
+          number: 12345,
+        },
+      },
+    };
+
+    const result = validateSubgraphMetaResponse(input);
+
+    expect(result._meta.block.number).toBe(12345);
+  });
+
+  it("accepts large block numbers", () => {
+    const input = {
+      _meta: {
+        block: {
+          number: 999999999,
+        },
+      },
+    };
+
+    const result = validateSubgraphMetaResponse(input);
+
+    expect(result._meta.block.number).toBe(999999999);
+  });
+
+  it("accepts numeric strings block number", () => {
+    const result = validateSubgraphMetaResponse({
+      _meta: {
+        block: {
+          number: "12345",
+        },
+      },
+    });
+
+    expect(result._meta.block.number).toBe(12345);
+  });
+
+  it("throws on missing _meta field", () => {
+    expect(() => validateSubgraphMetaResponse({})).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on missing block field", () => {
+    expect(() =>
+      validateSubgraphMetaResponse({
+        _meta: {},
+      }),
+    ).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on missing number field", () => {
+    expect(() =>
+      validateSubgraphMetaResponse({
+        _meta: {
+          block: {},
+        },
+      }),
+    ).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on null input", () => {
+    expect(() => validateSubgraphMetaResponse(null)).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on undefined input", () => {
+    expect(() => validateSubgraphMetaResponse(undefined)).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on negative block number", () => {
+    expect(() =>
+      validateSubgraphMetaResponse({
+        _meta: {
+          block: {
+            number: -1,
+          },
+        },
+      }),
+    ).toThrow("Invalid subgraph meta response format");
+  });
+
+  it("throws on floating point block number", () => {
+    expect(() =>
+      validateSubgraphMetaResponse({
+        _meta: {
+          block: {
+            number: 123.45,
+          },
+        },
+      }),
+    ).toThrow("Invalid subgraph meta response format");
+  });
+});
diff --git a/apps/backend/src/pdp-subgraph/types.ts b/apps/backend/src/pdp-subgraph/types.ts
new file mode 100644
index 00000000..ad8dcdc4
--- /dev/null
+++ b/apps/backend/src/pdp-subgraph/types.ts
@@ -0,0 +1,151 @@
+import Joi from "joi";
+import { Hex, isAddress } from "viem";
+
+// -----------------------------------------
+// Types
+// -----------------------------------------
+
+/** The response from the subgraph GraphQL query */
+export type GraphQLResponse = {
+  /** The data from the query */
+  data?: unknown;
+  /** The errors from the query */
+  errors?: { message: string }[];
+};
+
+/**
+ * Options for fetching providers with data sets
+ */
+export type ProvidersWithDataSetsOptions = {
+  addresses: string[];
+  blockNumber: number;
+};
+
+/**
+ * Validated response from the PDP subgraph meta query.
+ */
+export type SubgraphMeta = {
+  _meta: {
+    block: {
+      number: number;
+    };
+  };
+};
+
+/**
+ * A single proof set within a provider, representing deadline-related proving data.
+ * All numeric fields are bigints converted from the subgraph string representation.
+ */
+export type DataSet = {
+  nextDeadline: bigint;
+  maxProvingPeriod: bigint;
+};
+
+/**
+ * Validated and transformed response from the PDP subgraph providers query.
+ * Numeric fields are converted from subgraph string representation to bigint.
+ */
+export type ProviderDataSetResponse = {
+  providers: {
+    address: Hex;
+    totalFaultedPeriods: bigint;
+    totalProvingPeriods: bigint;
+    proofSets: DataSet[];
+  }[];
+};
+
+// -----------------------------------------
+// Joi Custom Schema Converters
+// -----------------------------------------
+
+/** Joi custom validator that converts a numeric string to bigint. */
+const toBigInt = (value: unknown, helpers: Joi.CustomHelpers) => {
+  try {
+    return BigInt(value as string);
+  } catch {
+    return helpers.error("any.invalid", {
+      message: "Invalid bigint value",
+    });
+  }
+};
+
+/** Joi custom validator to validate an Ethereum address and normalize to lowercase. */
+const toEthereumAddress = (value: unknown, helpers: Joi.CustomHelpers) => {
+  if (!isAddress(value as string)) {
+    return helpers.error("any.invalid", { message: "Invalid Ethereum address" });
+  }
+
+  // Normalize to lowercase for consistent key lookups
+  return (value as string).toLowerCase() as Hex;
+};
+
+// -----------------------------------------
+// Joi Schemas
+// -----------------------------------------
+
+const metaSchema = Joi.object({
+  _meta: Joi.object({
+    block: Joi.object({
+      number: Joi.number().integer().positive().required(),
+    })
+      .unknown(true)
+      .required(),
+  })
+    .unknown(true)
+    .required(),
+})
+  .unknown(true)
+  .required();
+
+const dataSetSchema = Joi.object({
+  nextDeadline: Joi.string().pattern(/^\d+$/).required().custom(toBigInt),
+  maxProvingPeriod: Joi.string().pattern(/^\d+$/).required().custom(toBigInt),
+}).unknown(true);
+
+const providerDataSetResponseSchema = Joi.object({
+  providers: Joi.array()
+    .items(
+      Joi.object({
+        address: Joi.string().required().custom(toEthereumAddress),
+        totalFaultedPeriods: Joi.string().pattern(/^\d+$/).required().custom(toBigInt),
+        totalProvingPeriods: Joi.string().pattern(/^\d+$/).required().custom(toBigInt),
+        proofSets: Joi.array().items(dataSetSchema).required(),
+      }).unknown(true),
+    )
+    .required(),
+})
+  .unknown(true)
+  .required();
+
+// -----------------------------------------
+// Validator Functions
+// -----------------------------------------
+
+/**
+ * Validates a raw subgraph meta response into SubgraphMeta.
+ *
+ * @param value - The raw parsed JSON from the subgraph
+ * @throws Error if validation fails
+ */
+export function validateSubgraphMetaResponse(value: unknown): SubgraphMeta {
+  const { error, value: validated } = metaSchema.validate(value, { abortEarly: false });
+  if (error) {
+    throw new Error(`Invalid subgraph meta response format: ${error.message}`);
+  }
+  return validated as SubgraphMeta;
+}
+
+/**
+ * Validates and transforms a raw subgraph response into ProviderDataSetResponse.
+ * Converts string fields to bigint.
+ *
+ * @param value - The raw parsed JSON from the subgraph
+ * @throws Error if validation fails
+ */
+export function validateProviderDataSetResponse(value: unknown): ProviderDataSetResponse {
+  const { error, value: validated } = providerDataSetResponseSchema.validate(value, { abortEarly: false });
+  if (error) {
+    throw new Error(`Invalid provider dataset response format: ${error.message}`);
+  }
+  return validated as ProviderDataSetResponse;
+}
diff --git a/apps/backend/src/subgraph/subgraph.service.ts b/apps/backend/src/subgraph/subgraph.service.ts
index 3067532c..97472c3c 100644
--- a/apps/backend/src/subgraph/subgraph.service.ts
+++ b/apps/backend/src/subgraph/subgraph.service.ts
@@ -51,6 +51,21 @@ class ValidationError extends Error {
   }
 }
 
+/**
+ * Client for the dealbot-owned subgraph (driven by `SUBGRAPH_ENDPOINT`).
+ *
+ * Functionally a superset of `PDPSubgraphService`: it exposes the same
+ * `fetchSubgraphMeta` / `fetchProvidersWithDatasets` surface plus the new
+ * `sampleAnonPiece` query used by anonymous retrievals.
+ *
+ * The two services intentionally coexist while we migrate off the upstream
+ * pdp-explorer subgraph: `PDPSubgraphService` continues to drive the
+ * established data-retention path against `PDP_SUBGRAPH_ENDPOINT`, and
+ * `SubgraphService` is scoped to the new anonymous-retrieval flow only.
+ * Once the dealbot-owned subgraph has soaked in production, this service
+ * should become the single drop-in replacement for `PDPSubgraphService`
+ * and `PDP_SUBGRAPH_ENDPOINT` can be retired.
+ */
 @Injectable()
 export class SubgraphService {
   private readonly logger: Logger = new Logger(SubgraphService.name);
diff --git a/apps/backend/src/wallet-sdk/wallet-sdk.service.spec.ts b/apps/backend/src/wallet-sdk/wallet-sdk.service.spec.ts
index 195db19f..d6613a31 100644
--- a/apps/backend/src/wallet-sdk/wallet-sdk.service.spec.ts
+++ b/apps/backend/src/wallet-sdk/wallet-sdk.service.spec.ts
@@ -18,7 +18,7 @@ const baseConfig: IBlockchainConfig = {
   checkDatasetCreationFees: false,
   useOnlyApprovedProviders: false,
   minNumDataSetsForChecks: 1,
-  subgraphEndpoint: "https://api.thegraph.com/subgraphs/filecoin/pdp",
+  pdpSubgraphEndpoint: "https://api.thegraph.com/subgraphs/filecoin/pdp",
 };
 
 const makeProvider = (overrides: Partial<PDPProviderEx>): PDPProviderEx =>
diff --git a/docs/checks/data-retention.md b/docs/checks/data-retention.md
index 4eb7a912..605753e7 100644
--- a/docs/checks/data-retention.md
+++ b/docs/checks/data-retention.md
@@ -27,7 +27,7 @@ Dealbot polls The Graph API endpoint for PDP (Proof of Data Possession) data at
 
 **Subgraph repository**: [FilOzone/pdp-explorer](https://github.com/FilOzone/pdp-explorer/blob/main/subgraph/src/pdp-verifier.ts)
 
-**Subgraph endpoint**: Configured via `SUBGRAPH_ENDPOINT` environment variable (see [environment-variables.md](../environment-variables.md#subgraph_endpoint))
+**Subgraph endpoint**: Configured via `PDP_SUBGRAPH_ENDPOINT` environment variable (see [environment-variables.md](../environment-variables.md#pdp_subgraph_endpoint))
 
 > **Note**: The production subgraph URL is currently being finalized [here](https://github.com/FilOzone/pdp-explorer/pull/86).
 
@@ -48,7 +48,7 @@ From `GET_PROVIDERS_WITH_DATASETS` query for each provider:
 
 > **Note**: The subgraph query uses the field name `proofSets`, but this refers to "dataSets" in the current codebase. The terminology was updated from "proof set" to "data set" but the subgraph schema retains the old naming.
 
-Source: [`subgraph.service.ts` (`fetchSubgraphMeta`, `fetchProvidersWithDatasets`)](../../apps/backend/src/subgraph/subgraph.service.ts)
+Source: [`pdp-subgraph.service.ts` (`fetchSubgraphMeta`, `fetchProvidersWithDatasets`)](../../apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts)
 
 ### 2. Compute Challenge Totals and Overdue Estimates
 
@@ -170,7 +170,7 @@ The PDP subgraph service enforces Goldsky's public endpoint rate limits:
 
 Rate limiting is enforced client-side to prevent 429 errors.
 
-Source: [`subgraph.service.ts` (`enforceRateLimit`)](../../apps/backend/src/subgraph/subgraph.service.ts)
+Source: [`pdp-subgraph.service.ts` (`enforceRateLimit`)](../../apps/backend/src/pdp-subgraph/pdp-subgraph.service.ts)
 
 ## Metrics Recorded
 
@@ -210,11 +210,11 @@ Key environment variables that control data retention check behavior:
 
 | Variable                | Required | Default      | Description                                                                                      |
 | ----------------------- | -------- | ------------ | ------------------------------------------------------------------------------------------------ |
-| `SUBGRAPH_ENDPOINT` | No       | Empty string | The Graph API endpoint for PDP subgraph queries. When empty, data retention checks are disabled. |
+| `PDP_SUBGRAPH_ENDPOINT` | No       | Empty string | The Graph API endpoint for PDP subgraph queries. When empty, data retention checks are disabled. |
 
 Source: [`app.config.ts`](../../apps/backend/src/config/app.config.ts)
 
-See also: [`environment-variables.md`](../environment-variables.md#subgraph_endpoint) for the full configuration reference.
+See also: [`environment-variables.md`](../environment-variables.md#pdp_subgraph_endpoint) for the full configuration reference.
 
 ## Error Handling
 
diff --git a/docs/checks/production-configuration-and-approval-methodology.md b/docs/checks/production-configuration-and-approval-methodology.md
index 6b2859aa..2e89a45d 100644
--- a/docs/checks/production-configuration-and-approval-methodology.md
+++ b/docs/checks/production-configuration-and-approval-methodology.md
@@ -40,7 +40,7 @@ Relevant parameters include:
 
 | Parameter | Value | Notes |
 |-----------|-------|-------|
-| [`SUBGRAPH_ENDPOINT`](../environment-variables.md#subgraph_endpoint) | -     | Points at a Goldsky deployment of the dealbot-owned subgraph in [`apps/subgraph/`](../../apps/subgraph/) (package `@dealbot/subgraph`). |
+| [`PDP_SUBGRAPH_ENDPOINT`](../environment-variables.md#pdp_subgraph_endpoint) | TODO: fill this in | Uses the subgraph from [pdp-explorer](https://github.com/FilOzone/pdp-explorer). |
 | [`MIN_NUM_DATASETS_FOR_CHECKS`](../environment-variables.md#dataset-configuration) | 15    | Ensure there are enough datasets with pieces being added so that statistical significance for [Data Retention Fault Rate](#data-retention-fault-rate) can be achieved quicker. Note that on mainnet each dataset incurs 5 challenges[^1] per daily proof[^2]. With this many datasets, an SP can be approved for data retention after a faultless ~7 days even if the SP doesn't have other datasets. |
 
 See [How are data retention statistics/thresholds calculated?](#how-are-data-retention-statisticsthresholds-calculated) for more details.
diff --git a/docs/environment-variables.md b/docs/environment-variables.md
index e2b23735..91e28abc 100644
--- a/docs/environment-variables.md
+++ b/docs/environment-variables.md
@@ -8,7 +8,7 @@ This document provides a comprehensive guide to all environment variables used b
 | ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | [Application](#application-configuration) | `NODE_ENV`, `DEALBOT_PORT`, `DEALBOT_HOST`, `DEALBOT_RUN_MODE`, `DEALBOT_METRICS_PORT`, `DEALBOT_METRICS_HOST`, `DEALBOT_ALLOWED_ORIGINS`, `ENABLE_DEV_MODE` |
 | [Database](#database-configuration)       | `DATABASE_HOST`, `DATABASE_PORT`, `DATABASE_POOL_MAX`, `DATABASE_USER`, `DATABASE_PASSWORD`, `DATABASE_NAME`                                                 |
-| [Blockchain](#blockchain-configuration)   | `NETWORK`, `RPC_URL`, `WALLET_ADDRESS`, `WALLET_PRIVATE_KEY`, `SESSION_KEY_PRIVATE_KEY`, `CHECK_DATASET_CREATION_FEES`, `USE_ONLY_APPROVED_PROVIDERS`, `SUBGRAPH_ENDPOINT` |
+| [Blockchain](#blockchain-configuration)   | `NETWORK`, `RPC_URL`, `WALLET_ADDRESS`, `WALLET_PRIVATE_KEY`, `SESSION_KEY_PRIVATE_KEY`, `CHECK_DATASET_CREATION_FEES`, `USE_ONLY_APPROVED_PROVIDERS`, `PDP_SUBGRAPH_ENDPOINT`, `SUBGRAPH_ENDPOINT` |
 | [Dataset Versioning](#dataset-versioning) | `DEALBOT_DATASET_VERSION`                                                                                                                                    |
 | [Scheduling](#scheduling-configuration)   | `PROVIDERS_REFRESH_INTERVAL_SECONDS`, `DATA_RETENTION_POLL_INTERVAL_SECONDS`, `DEALBOT_MAINTENANCE_WINDOWS_UTC`, `DEALBOT_MAINTENANCE_WINDOW_MINUTES`                                                                                                                                 |
 | [Jobs (pg-boss)](#jobs-pg-boss)           | `DEALBOT_PGBOSS_SCHEDULER_ENABLED`, `DEALBOT_PGBOSS_POOL_MAX`, `DEALS_PER_SP_PER_HOUR`, `DATASET_CREATIONS_PER_SP_PER_HOUR`, `RETRIEVALS_PER_SP_PER_HOUR`, `RETRIEVALS_ANON_PER_SP_PER_HOUR`, `JOB_SCHEDULER_POLL_SECONDS`, `JOB_WORKER_POLL_SECONDS`, `PG_BOSS_LOCAL_CONCURRENCY`, `JOB_CATCHUP_MAX_ENQUEUE`, `JOB_SCHEDULE_PHASE_SECONDS`, `JOB_ENQUEUE_JITTER_SECONDS`, `DEAL_JOB_TIMEOUT_SECONDS`, `RETRIEVAL_JOB_TIMEOUT_SECONDS`, `ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS`, `ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT`, `IPFS_BLOCK_FETCH_CONCURRENCY` |
@@ -425,13 +425,35 @@ Session keys are scoped (only storage operations, not deposits or withdrawals) a
 
 ---
 
+### `PDP_SUBGRAPH_ENDPOINT`
+
+- **Type**: `string` (URL)
+- **Required**: No
+- **Default**: Empty string (feature disabled)
+
+**Role**: The Graph API endpoint for the upstream pdp-explorer subgraph. Drives the data-retention overdue-periods metric.
+
+This variable is kept distinct from [`SUBGRAPH_ENDPOINT`](#subgraph_endpoint) so the dealbot-owned subgraph can be rolled out incrementally — only the new anonymous-retrieval flow points at the new endpoint while the established data-retention path stays on the upstream subgraph.
+
+**When to update**:
+
+- When switching between different Graph API endpoints for the pdp-explorer subgraph.
+
+**Example**:
+
+```bash
+PDP_SUBGRAPH_ENDPOINT=https://api.thegraph.com/subgraphs/filecoin/pdp
+```
+
+---
+
 ### `SUBGRAPH_ENDPOINT`
 
 - **Type**: `string` (URL)
 - **Required**: No
 - **Default**: Empty string (feature disabled)
 
-**Role**: The Graph API endpoint for querying PDP (Proof of Data Possession) subgraph data. Drives the overdue-periods metric and the anonymous-retrieval candidate-piece query.
+**Role**: The Graph API endpoint for the dealbot-owned subgraph. Currently drives only the [anonymous-retrieval](./checks/anon-retrievals.md) candidate-piece query. Once the dealbot-owned subgraph has soaked in production it is intended to replace [`PDP_SUBGRAPH_ENDPOINT`](#pdp_subgraph_endpoint).
 
 The dealbot-owned subgraph lives at `apps/subgraph/` (package `@dealbot/subgraph`) and is deployed to Goldsky. Point this variable at one of those slots; the exact slugs are documented in `apps/subgraph/README.md`.
 
diff --git a/kustomize/overlays/local/backend-configmap-local.yaml b/kustomize/overlays/local/backend-configmap-local.yaml
index b4febf61..52918aa2 100644
--- a/kustomize/overlays/local/backend-configmap-local.yaml
+++ b/kustomize/overlays/local/backend-configmap-local.yaml
@@ -26,6 +26,7 @@ data:
   PG_BOSS_LOCAL_CONCURRENCY: "3"
   JOB_WORKER_POLL_SECONDS: "60"
   RANDOM_PIECE_SIZES: "10485760"
+  PDP_SUBGRAPH_ENDPOINT: "https://api.goldsky.com/api/public/project_cmdfaaxeuz6us01u359yjdctw/subgraphs/pdp-explorer/calibration311a/gn"
   SUBGRAPH_ENDPOINT: "https://api.goldsky.com/api/public/project_cmdfaaxeuz6us01u359yjdctw/subgraphs/pdp-explorer/calibration311a/gn"
   JOB_SCHEDULER_POLL_SECONDS: "60"
   CLICKHOUSE_URL: "http://default:@dealbot-clickhouse:8123/dealbot"

From d82222f530489c9a054e7166a2a61fcc86bbec5c Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Tue, 5 May 2026 09:17:25 +0200
Subject: [PATCH 20/28] refactor: reduce pr diff noise

---
 apps/backend/src/jobs/jobs.module.ts       |  2 +-
 apps/backend/src/jobs/jobs.service.spec.ts | 52 +++++++++++-----------
 apps/backend/src/jobs/jobs.service.ts      |  3 +-
 3 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/apps/backend/src/jobs/jobs.module.ts b/apps/backend/src/jobs/jobs.module.ts
index 69f1edb1..fb708e09 100644
--- a/apps/backend/src/jobs/jobs.module.ts
+++ b/apps/backend/src/jobs/jobs.module.ts
@@ -18,10 +18,10 @@ import { JobScheduleRepository } from "./repositories/job-schedule.repository.js
     TypeOrmModule.forFeature([StorageProvider, JobScheduleState]),
     DealModule,
     RetrievalModule,
-    RetrievalAnonModule,
     WalletSdkModule,
     DataRetentionModule,
     PieceCleanupModule,
+    RetrievalAnonModule,
   ],
   providers: [JobsService, JobScheduleRepository],
 })
diff --git a/apps/backend/src/jobs/jobs.service.spec.ts b/apps/backend/src/jobs/jobs.service.spec.ts
index c20d0890..8983c723 100644
--- a/apps/backend/src/jobs/jobs.service.spec.ts
+++ b/apps/backend/src/jobs/jobs.service.spec.ts
@@ -52,10 +52,10 @@ describe("JobsService schedule rows", () => {
       jobScheduleRepository: JobsServiceDeps[2];
       dealService: JobsServiceDeps[3];
       retrievalService: JobsServiceDeps[4];
-      anonRetrievalService: JobsServiceDeps[5];
-      walletSdkService: JobsServiceDeps[6];
-      dataRetentionService: JobsServiceDeps[7];
-      pieceCleanupService: JobsServiceDeps[8];
+      walletSdkService: JobsServiceDeps[5];
+      dataRetentionService: JobsServiceDeps[6];
+      pieceCleanupService: JobsServiceDeps[7];
+      anonRetrievalService: JobsServiceDeps[8];
       jobsQueuedGauge: JobsServiceDeps[9];
       jobsRetryScheduledGauge: JobsServiceDeps[10];
       oldestQueuedAgeGauge: JobsServiceDeps[11];
@@ -160,10 +160,10 @@ describe("JobsService schedule rows", () => {
         overrides.jobScheduleRepository ?? (jobScheduleRepositoryMock as unknown as JobsServiceDeps[2]),
         overrides.dealService ?? ({} as JobsServiceDeps[3]),
         overrides.retrievalService ?? ({} as JobsServiceDeps[4]),
-        overrides.anonRetrievalService ?? ({} as JobsServiceDeps[5]),
-        overrides.walletSdkService ?? ({} as JobsServiceDeps[6]),
-        overrides.dataRetentionService ?? (dataRetentionServiceMock as unknown as JobsServiceDeps[7]),
-        overrides.pieceCleanupService ?? ({} as JobsServiceDeps[8]),
+        overrides.walletSdkService ?? ({} as JobsServiceDeps[5]),
+        overrides.dataRetentionService ?? (dataRetentionServiceMock as unknown as JobsServiceDeps[6]),
+        overrides.pieceCleanupService ?? ({} as JobsServiceDeps[7]),
+        overrides.anonRetrievalService ?? ({} as JobsServiceDeps[8]),
         overrides.jobsQueuedGauge ?? metricsMocks.jobsQueuedGauge,
         overrides.jobsRetryScheduledGauge ?? metricsMocks.jobsRetryScheduledGauge,
         overrides.oldestQueuedAgeGauge ?? metricsMocks.oldestQueuedAgeGauge,
@@ -287,7 +287,7 @@ describe("JobsService schedule rows", () => {
     service = buildService({
       configService,
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
     });
 
     // Trigger the timeout immediately by using fake timers
@@ -346,7 +346,7 @@ describe("JobsService schedule rows", () => {
     service = buildService({
       configService,
       retrievalService: retrievalService as unknown as ConstructorParameters<typeof JobsService>[4],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
     });
 
     vi.useFakeTimers();
@@ -385,7 +385,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       retrievalService: retrievalService as unknown as ConstructorParameters<typeof JobsService>[4],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
     });
 
     await callPrivate(service, "handleRetrievalJob", {
@@ -425,7 +425,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       retrievalService: retrievalService as unknown as ConstructorParameters<typeof JobsService>[4],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
     });
 
     await expect(
@@ -928,7 +928,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
     });
 
     await callPrivate(service, "handleDealJob", {
@@ -967,8 +967,8 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
-      pieceCleanupService: pieceCleanupService as unknown as JobsServiceDeps[8],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
+      pieceCleanupService: pieceCleanupService as unknown as JobsServiceDeps[7],
     });
 
     await callPrivate(service, "handleDealJob", {
@@ -1000,7 +1000,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
     });
 
     await callPrivate(service, "handleDealJob", {
@@ -1029,7 +1029,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
     });
 
     await callPrivate(service, "handleDataSetCreationJob", {
@@ -1071,7 +1071,7 @@ describe("JobsService schedule rows", () => {
     service = buildService({
       configService,
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
     });
 
     await callPrivate(service, "handleDataSetCreationJob", {
@@ -1112,7 +1112,7 @@ describe("JobsService schedule rows", () => {
     service = buildService({
       configService,
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
     });
 
     await callPrivate(service, "handleDataSetCreationJob", {
@@ -1157,7 +1157,7 @@ describe("JobsService schedule rows", () => {
     service = buildService({
       configService,
       dealService: dealService as unknown as ConstructorParameters<typeof JobsService>[3],
-      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[6],
+      walletSdkService: walletSdkService as unknown as ConstructorParameters<typeof JobsService>[5],
     });
 
     await callPrivate(service, "handleDataSetCreationJob", {
@@ -1330,7 +1330,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as JobsServiceDeps[3],
-      walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
+      walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
     });
 
     await callPrivate(service, "handleDealJob", {
@@ -1354,7 +1354,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       retrievalService: retrievalService as unknown as JobsServiceDeps[4],
-      walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
+      walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
     });
 
     await callPrivate(service, "handleRetrievalJob", {
@@ -1383,7 +1383,7 @@ describe("JobsService schedule rows", () => {
 
     service = buildService({
       dealService: dealService as unknown as JobsServiceDeps[3],
-      walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
+      walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
     });
 
     await callPrivate(service, "handleDataSetCreationJob", {
@@ -1425,7 +1425,7 @@ describe("JobsService schedule rows", () => {
         intervalSeconds: 60,
         service: buildService({
           dealService: dealService as unknown as JobsServiceDeps[3],
-          walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
+          walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
         }),
         expectCheckNotRun: () => expect(dealService.createDealForProvider).not.toHaveBeenCalled(),
       },
@@ -1435,7 +1435,7 @@ describe("JobsService schedule rows", () => {
         intervalSeconds: 60,
         service: buildService({
           retrievalService: retrievalService as unknown as JobsServiceDeps[4],
-          walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
+          walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
         }),
         expectCheckNotRun: () => expect(retrievalService.performRandomRetrievalForProvider).not.toHaveBeenCalled(),
       },
@@ -1445,7 +1445,7 @@ describe("JobsService schedule rows", () => {
         intervalSeconds: 3600,
         service: buildService({
           dealService: dataSetDealService as unknown as JobsServiceDeps[3],
-          walletSdkService: walletSdkService as unknown as JobsServiceDeps[6],
+          walletSdkService: walletSdkService as unknown as JobsServiceDeps[5],
         }),
         expectCheckNotRun: () => expect(dataSetDealService.createDataSetWithPiece).not.toHaveBeenCalled(),
       },
diff --git a/apps/backend/src/jobs/jobs.service.ts b/apps/backend/src/jobs/jobs.service.ts
index b070de5a..e09cf42c 100644
--- a/apps/backend/src/jobs/jobs.service.ts
+++ b/apps/backend/src/jobs/jobs.service.ts
@@ -72,10 +72,11 @@ export class JobsService implements OnModuleInit, OnApplicationShutdown {
     private readonly jobScheduleRepository: JobScheduleRepository,
     private readonly dealService: DealService,
     private readonly retrievalService: RetrievalService,
-    private readonly anonRetrievalService: AnonRetrievalService,
     private readonly walletSdkService: WalletSdkService,
     private readonly dataRetentionService: DataRetentionService,
     private readonly pieceCleanupService: PieceCleanupService,
+    private readonly anonRetrievalService: AnonRetrievalService,
+
     @InjectMetric("jobs_queued")
     private readonly jobsQueuedGauge: Gauge,
     @InjectMetric("jobs_retry_scheduled")

From 527283fcc8f082f8af2920fa8367f84e4f87cdb8 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Tue, 5 May 2026 09:33:25 +0200
Subject: [PATCH 21/28] remove: residual references to a pdp subgraph in the
 subgraph module

---
 apps/backend/src/subgraph/subgraph.service.spec.ts | 6 +++---
 apps/backend/src/subgraph/subgraph.service.ts      | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/apps/backend/src/subgraph/subgraph.service.spec.ts b/apps/backend/src/subgraph/subgraph.service.spec.ts
index 8703b2c5..64f28435 100644
--- a/apps/backend/src/subgraph/subgraph.service.spec.ts
+++ b/apps/backend/src/subgraph/subgraph.service.spec.ts
@@ -397,14 +397,14 @@ describe("SubgraphService", () => {
       });
     });
 
-    it("throws when PDP subgraph endpoint is not configured", async () => {
+    it("throws when subgraph endpoint is not configured", async () => {
       const configService = {
         get: vi.fn(() => ({ subgraphEndpoint: "" })),
       } as unknown as ConfigService<IConfig, true>;
 
       const serviceWithoutEndpoint = new SubgraphService(configService);
 
-      await expect(serviceWithoutEndpoint.fetchSubgraphMeta()).rejects.toThrow("No PDP subgraph endpoint configured");
+      await expect(serviceWithoutEndpoint.fetchSubgraphMeta()).rejects.toThrow("No subgraph endpoint configured");
     });
 
     it("throws on HTTP error response", async () => {
@@ -740,7 +740,7 @@ describe("SubgraphService", () => {
       const noEndpointService = new SubgraphService(noEndpointConfig);
 
       await expect(noEndpointService.sampleAnonPiece(defaultSampleParams)).rejects.toThrow(
-        "No PDP subgraph endpoint configured",
+        "No subgraph endpoint configured",
       );
       expect(fetchMock).not.toHaveBeenCalled();
     });
diff --git a/apps/backend/src/subgraph/subgraph.service.ts b/apps/backend/src/subgraph/subgraph.service.ts
index 97472c3c..3d4e8370 100644
--- a/apps/backend/src/subgraph/subgraph.service.ts
+++ b/apps/backend/src/subgraph/subgraph.service.ts
@@ -133,9 +133,9 @@ export class SubgraphService {
       // candidate pool (which silently no-ops every anon retrieval job).
       this.logger.error({
         event: "subgraph_endpoint_not_configured",
-        message: "Cannot sample anonymous piece — no PDP subgraph endpoint configured",
+        message: "Cannot sample anonymous piece — no subgraph endpoint configured",
       });
-      throw new Error("No PDP subgraph endpoint configured");
+      throw new Error("No subgraph endpoint configured");
     }
 
     const query = buildSampleAnonPieceQuery(params.pool);
@@ -194,7 +194,7 @@ export class SubgraphService {
     attempt: number = 1,
   ): Promise<T> {
     if (!this.blockchainConfig.subgraphEndpoint) {
-      throw new Error("No PDP subgraph endpoint configured");
+      throw new Error("No subgraph endpoint configured");
     }
 
     try {
@@ -297,7 +297,7 @@ export class SubgraphService {
     attempt: number = 1,
   ): Promise<ProviderDataSetResponse["providers"]> {
     if (!this.blockchainConfig.subgraphEndpoint) {
-      throw new Error("No PDP subgraph endpoint configured");
+      throw new Error("No subgraph endpoint configured");
     }
 
     const variables = {

From 8dfb3ca9f2508cb24dea95f22d5380d65643d51c Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <dennis.trautwein@posteo.de>
Date: Fri, 15 May 2026 21:10:42 +0200
Subject: [PATCH 22/28] Apply suggestion from @BigLep

Co-authored-by: Steve Loeppky <stvn@loeppky.com>
---
 docs/checks/anon-retrievals.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/checks/anon-retrievals.md b/docs/checks/anon-retrievals.md
index 0a303462..2c15f3ed 100644
--- a/docs/checks/anon-retrievals.md
+++ b/docs/checks/anon-retrievals.md
@@ -30,7 +30,7 @@ Operational timeouts exist to prevent jobs from running indefinitely. If the job
 
 ## Piece Selection
 
-Unlike the [Retrieval check](./retrievals.md#piece-selection), dealbot does not retrieve from its own deals. Pieces are sampled from the on-chain subgraph of all FWSS-served pieces for the SP under test.
+Unlike the [Retrieval check](./retrievals.md#piece-selection), dealbot does not retrieve from its own deals. Pieces are sampled from the [on-chain subgraph](../../src/subgraph) of all FWSS-served pieces for the SP under test.
 
 Selection strategy (per scheduled job, per SP):
 
@@ -59,7 +59,7 @@ flowchart TD
   Select["Sample anonymous piece for SP from subgraph"] --> Fetch["GET /piece/{pieceCid}"]
   Fetch --> CommP["Hash bytes → verify CommP"]
   CommP --> HasIpfs{"piece.withIPFSIndexing<br/>and ipfsRootCid?"}
-  HasIpfs -- "no" --> Record["Persist row + metrics"]
+  HasIpfs -- "no" --> Record["Persist Clickhosue row + emit Prometheus metrics"]
   HasIpfs -- "yes" --> ParseCar["Parse bytes as CAR"]
   ParseCar --> SampleBlocks["Pick N random CIDs<br/>(ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT)"]
   SampleBlocks --> Ipni["IPNI: verify SP advertises root + sampled CIDs"]

From b8a2621ce5747d2d351066147d4d3487dbd56169 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Fri, 15 May 2026 22:01:13 +0200
Subject: [PATCH 23/28] chore: align pnpm-lock.yaml with main

---
 pnpm-lock.yaml | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 8089b756..0495aa11 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -1513,24 +1513,24 @@ packages:
     engines: {node: ^14.18.0 || >=16.10.0, npm: '>=5.10.0'}
     hasBin: true
 
-  '@oclif/core@4.10.6':
-    resolution: {integrity: sha512-ySCOYnPKZE3KACT1V9It99hWG9b8E5MpagbRdWxPNRO3beMqmbr4SLUQoFtZ9XRtW++kks1ZVwZOdpnR8rpb9A==}
+  '@oclif/core@4.10.5':
+    resolution: {integrity: sha512-qcdCF7NrdWPfme6Kr34wwljRCXbCVpL1WVxiNy0Ep6vbWKjxAjFQwuhqkoyL0yjI+KdwtLcOCGn5z2yzdijc8w==}
     engines: {node: '>=18.0.0'}
 
   '@oclif/core@4.5.5':
     resolution: {integrity: sha512-iQzlaJQgPeUXrtrX71OzDwxPikQ7c2FhNd8U8rBB7BCtj2XYfmzBT/Hmbc+g9OKDIG/JkbJT0fXaWMMBrhi+1A==}
     engines: {node: '>=18.0.0'}
 
-  '@oclif/plugin-autocomplete@3.2.46':
-    resolution: {integrity: sha512-TFvuD6JlmqEVsEvMqunyj3cyCz/l2Q4MqCjp/XtlSLS9x3xTlam7PGlqWi4WAhxl/K8CtpYqVlMYFEnlLTHspw==}
+  '@oclif/plugin-autocomplete@3.2.45':
+    resolution: {integrity: sha512-ENrUg8rbVCjh40uvi3MC9kGbiUoEf11nyqE59RBzegeeLpRXNo/Zp27L9j1tUmPEqGgfS2/wvHPihNzkpK1FDw==}
     engines: {node: '>=18.0.0'}
 
-  '@oclif/plugin-not-found@3.2.81':
-    resolution: {integrity: sha512-M88tLONBH36hLAbkFbmCo1hoZPSdU5l8Px1xEIlIgSmGMam+CoAzx4kGqpLbokgfpaHeP8/Jx3QJ18u9ef/2Qw==}
+  '@oclif/plugin-not-found@3.2.80':
+    resolution: {integrity: sha512-yTLjWvR1r/Rd/cO2LxHdMCDoL5sQhBYRUcOMCmxZtWVWhx4rAZ8KVUPDVsb+SvjJDV5ADTDBgt1H52fFx7YWqg==}
     engines: {node: '>=18.0.0'}
 
-  '@oclif/plugin-warn-if-update-available@3.1.61':
-    resolution: {integrity: sha512-4XcrTxcCs+brR/eZ0BPeuiREiH3USlJiaHbUqPhnIBuyxhhUSYVd8ZO6s5MQN7AXJq4SMQ+B5zLaHq+ep/afIw==}
+  '@oclif/plugin-warn-if-update-available@3.1.60':
+    resolution: {integrity: sha512-cRKBZm14IuA6G8W84dfd3iXj3BTAoxQ5o3pUE8DKEQ4n/tVha20t5nkVeD+ISC68e0Fuw5koTMvRwXb1lJSnzg==}
     engines: {node: '>=18.0.0'}
 
   '@open-draft/deferred-promise@2.2.0':
@@ -7599,9 +7599,9 @@ snapshots:
     dependencies:
       '@float-capital/float-subgraph-uncrashable': 0.0.0-internal-testing.5
       '@oclif/core': 4.5.5
-      '@oclif/plugin-autocomplete': 3.2.46
-      '@oclif/plugin-not-found': 3.2.81(@types/node@25.2.3)
-      '@oclif/plugin-warn-if-update-available': 3.1.61
+      '@oclif/plugin-autocomplete': 3.2.45
+      '@oclif/plugin-not-found': 3.2.80(@types/node@25.6.2)
+      '@oclif/plugin-warn-if-update-available': 3.1.60
       '@pinax/graph-networks-registry': 0.7.1
       '@whatwg-node/fetch': 0.10.13
       assemblyscript: 0.19.23
@@ -8937,7 +8937,7 @@ snapshots:
     dependencies:
       consola: 3.4.2
 
-  '@oclif/core@4.10.6':
+  '@oclif/core@4.10.5':
     dependencies:
       ansi-escapes: 4.3.2
       ansis: 3.17.0
@@ -8979,7 +8979,7 @@ snapshots:
       wordwrap: 1.0.0
       wrap-ansi: 7.0.0
 
-  '@oclif/plugin-autocomplete@3.2.46':
+  '@oclif/plugin-autocomplete@3.2.45':
     dependencies:
       '@oclif/core': 4.5.5
       ansis: 3.17.0
@@ -8988,16 +8988,16 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
-  '@oclif/plugin-not-found@3.2.81(@types/node@25.2.3)':
+  '@oclif/plugin-not-found@3.2.80(@types/node@25.6.2)':
     dependencies:
-      '@inquirer/prompts': 7.10.1(@types/node@25.2.3)
-      '@oclif/core': 4.10.6
+      '@inquirer/prompts': 7.10.1(@types/node@25.6.2)
+      '@oclif/core': 4.10.5
       ansis: 3.17.0
       fast-levenshtein: 3.0.0
     transitivePeerDependencies:
       - '@types/node'
 
-  '@oclif/plugin-warn-if-update-available@3.1.61':
+  '@oclif/plugin-warn-if-update-available@3.1.60':
     dependencies:
       '@oclif/core': 4.5.5
       ansis: 3.17.0
@@ -11779,7 +11779,7 @@ snapshots:
     dependencies:
       foreground-child: 3.3.1
       jackspeak: 4.2.3
-      minimatch: 10.2.5
+      minimatch: 10.2.4
       minipass: 7.1.2
       package-json-from-dist: 1.0.1
       path-scurry: 2.0.1

From 70af7c07ad194a71b8c82f05e391e386daba1827 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Fri, 15 May 2026 22:16:25 +0200
Subject: [PATCH 24/28] fix: wrong reference to an old maximum anon retrieval
 piece size

---
 apps/backend/.env.example             | 2 +-
 apps/backend/src/config/app.config.ts | 4 ++--
 docs/environment-variables.md         | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/apps/backend/.env.example b/apps/backend/.env.example
index 30556e7a..807de908 100644
--- a/apps/backend/.env.example
+++ b/apps/backend/.env.example
@@ -67,7 +67,7 @@ JOB_SCHEDULE_PHASE_SECONDS=0
 JOB_ENQUEUE_JITTER_SECONDS=0
 DEAL_JOB_TIMEOUT_SECONDS=360          # 6m: Max runtime for deal jobs (TODO: reduce default to 3m)
 RETRIEVAL_JOB_TIMEOUT_SECONDS=60     # 1m: Max runtime for retrieval jobs (TODO: reduce default to 30s)
-ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS=360 # 6m: Max runtime for anon retrieval jobs (pieces up to ~70 MiB)
+ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS=360 # 6m: Max runtime for anon retrieval jobs (pieces up to ~500 MiB)
 IPFS_BLOCK_FETCH_CONCURRENCY=6       # Parallel block fetches when validating IPFS DAGs
 DEALBOT_PGBOSS_POOL_MAX=1
 DEALBOT_PGBOSS_SCHEDULER_ENABLED=true
diff --git a/apps/backend/src/config/app.config.ts b/apps/backend/src/config/app.config.ts
index 7906be8c..49b55606 100644
--- a/apps/backend/src/config/app.config.ts
+++ b/apps/backend/src/config/app.config.ts
@@ -101,7 +101,7 @@ export const configValidationSchema = Joi.object({
   JOB_ENQUEUE_JITTER_SECONDS: Joi.number().min(0).default(0),
   DEAL_JOB_TIMEOUT_SECONDS: Joi.number().min(120).default(360), // 6 minutes max runtime for data storage jobs (TODO: reduce default to 3 minutes)
   RETRIEVAL_JOB_TIMEOUT_SECONDS: Joi.number().min(60).default(60), // 1 minute max runtime for retrieval jobs (TODO: reduce default to 30 seconds)
-  ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS: Joi.number().min(60).default(360), // 6 minutes max runtime for anon retrieval jobs (pieces can be up to ~70 MiB)
+  ANON_RETRIEVAL_JOB_TIMEOUT_SECONDS: Joi.number().min(60).default(360), // 6 minutes max runtime for anon retrieval jobs (pieces can be up to 500 MiB)
   DATA_SET_CREATION_JOB_TIMEOUT_SECONDS: Joi.number().min(60).default(300), // 5 minutes max runtime for dataset creation jobs
   IPFS_BLOCK_FETCH_CONCURRENCY: Joi.number().integer().min(1).max(32).default(6),
   ANON_RETRIEVAL_BLOCK_SAMPLE_COUNT: Joi.number().integer().min(1).max(50).default(5),
@@ -281,7 +281,7 @@ export interface IJobsConfig {
   /**
    * Maximum runtime (seconds) for anonymous retrieval jobs before forced abort.
    *
-   * Anonymous retrievals fetch arbitrary pieces (up to ~70 MiB), so this is
+   * Anonymous retrievals fetch arbitrary pieces (up to ~500 MiB), so this is
    * typically larger than `retrievalJobTimeoutSeconds`. Uses AbortController
    * to actively cancel job execution while still persisting partial metrics.
    */
diff --git a/docs/environment-variables.md b/docs/environment-variables.md
index 91e28abc..547170ac 100644
--- a/docs/environment-variables.md
+++ b/docs/environment-variables.md
@@ -832,7 +832,7 @@ Use this to stagger multiple dealbot deployments that are not sharing a database
 - **Minimum**: `60`
 - **Enforced**: Yes (config validation)
 
-**Role**: Maximum runtime for anonymous retrieval jobs before forced abort. Anonymous retrievals fetch arbitrary pieces (up to ~70 MiB) that were not produced by the dealbot, so this is typically larger than `RETRIEVAL_JOB_TIMEOUT_SECONDS`. When the timeout trips, partial metrics (`ttfb_ms`, `bytes_retrieved`, `response_code`) are still persisted so the abort is not silently lost.
+**Role**: Maximum runtime for anonymous retrieval jobs before forced abort. Anonymous retrievals fetch arbitrary pieces (up to ~500 MiB) that were not produced by the dealbot, so this is typically larger than `RETRIEVAL_JOB_TIMEOUT_SECONDS`. When the timeout trips, partial metrics (`ttfb_ms`, `bytes_retrieved`, `response_code`) are still persisted so the abort is not silently lost.
 
 **When to update**:
 

From b003d78250412cecf36a86fa5f0f78f60876cc47 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Fri, 15 May 2026 22:17:16 +0200
Subject: [PATCH 25/28]  docs: improve anon retrieval documentation

---
 docs/checks/events-and-metrics.md | 2 +-
 docs/environment-variables.md     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/checks/events-and-metrics.md b/docs/checks/events-and-metrics.md
index fba8b003..9c8a5ae0 100644
--- a/docs/checks/events-and-metrics.md
+++ b/docs/checks/events-and-metrics.md
@@ -4,7 +4,7 @@ This document is the intended **source of truth** for the events emitted by deal
 
 > **Note on "events":** the entries in the [Event List](#event-list) are named **timing markers** used to define metric Timer Starts/Ends — they are not all emitted as discrete Prometheus events or log lines. Each marker is anchored in code (as a timestamp variable, log line, or status transition) and used to compute the metrics in the [Metrics](#metrics) section.
 
-## Anonymous Retrieval Event Model
+## Data Storage Event Model
 
 The [Anonymous Retrieval check](./anon-retrievals.md) is a single-shot flow per piece: select → fetch piece → (optional) parse CAR + IPNI + block fetch → write one ClickHouse row.
 
diff --git a/docs/environment-variables.md b/docs/environment-variables.md
index 547170ac..72fadca0 100644
--- a/docs/environment-variables.md
+++ b/docs/environment-variables.md
@@ -431,9 +431,9 @@ Session keys are scoped (only storage operations, not deposits or withdrawals) a
 - **Required**: No
 - **Default**: Empty string (feature disabled)
 
-**Role**: The Graph API endpoint for the upstream pdp-explorer subgraph. Drives the data-retention overdue-periods metric.
+**Role**: The Graph API endpoint for querying PDP (Proof of Data Possession) subgraph data. This endpoint is used to retrieve data retention info for provider data.
 
-This variable is kept distinct from [`SUBGRAPH_ENDPOINT`](#subgraph_endpoint) so the dealbot-owned subgraph can be rolled out incrementally — only the new anonymous-retrieval flow points at the new endpoint while the established data-retention path stays on the upstream subgraph.
+This variable is kept distinct from [`SUBGRAPH_ENDPOINT`](#subgraph_endpoint) so the [dealbot-owned subgraph](../../src/subgraph) can be rolled out incrementally. Only the newer [anonymous-retrieval check](./checks/anon-retrievals.md) points at the new endpoint while the established [data-retention check](./checks/data-retention.md) stays on the upstream subgraph.
 
 **When to update**:
 
@@ -455,7 +455,7 @@ PDP_SUBGRAPH_ENDPOINT=https://api.thegraph.com/subgraphs/filecoin/pdp
 
 **Role**: The Graph API endpoint for the dealbot-owned subgraph. Currently drives only the [anonymous-retrieval](./checks/anon-retrievals.md) candidate-piece query. Once the dealbot-owned subgraph has soaked in production it is intended to replace [`PDP_SUBGRAPH_ENDPOINT`](#pdp_subgraph_endpoint).
 
-The dealbot-owned subgraph lives at `apps/subgraph/` (package `@dealbot/subgraph`) and is deployed to Goldsky. Point this variable at one of those slots; the exact slugs are documented in `apps/subgraph/README.md`.
+The dealbot-owned subgraph lives at [`apps/subgraph/`](../apps/subgraph) (package `@dealbot/subgraph`) and is deployed to [Goldsky](https://goldsky.com).
 
 **When to update**:
 

From 21b4f2d5045dc6261b915c7e14c75521ddb83d89 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Fri, 15 May 2026 22:54:51 +0200
Subject: [PATCH 26/28] docs: fix accidental changes to untouched event
 descriptions

---
 docs/checks/events-and-metrics.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/checks/events-and-metrics.md b/docs/checks/events-and-metrics.md
index 9c8a5ae0..1e9d8583 100644
--- a/docs/checks/events-and-metrics.md
+++ b/docs/checks/events-and-metrics.md
@@ -118,8 +118,8 @@ sequenceDiagram
 | <a id="ipfsRetrievalHttpResponseCode"></a>`ipfsRetrievalHttpResponseCode` | Data Storage, Retrieval | [`ipfsRetrievalLastByteReceived`](#ipfsRetrievalLastByteReceived) | `200`, `500`, `2xxSuccess`, `4xxClientError`, `5xxServerError`, `otherHttpStatusCodes`, `failure` | [`retrieval.service.ts`](../../apps/backend/src/retrieval/retrieval.service.ts) |
 | <a id="retrievalStatus"></a>`retrievalStatus` | Data Storage, Retrieval | [`ipfsRetrievalIntegrityChecked`](#ipfsRetrievalIntegrityChecked) | `success`, `failure.timedout`, `failure.other` from [Data Storage Sub-status meanings](./data-storage.md#sub-status-meanings). |  |
 | <a id="dataSetCreationStatus"></a>`dataSetCreationStatus` | Data-Set Creation | Not tied to an [event above](#event-list) but rather to data-set creation start (`pending`) and completion (`success`/`failure.*`) | `pending`, `success`, `failure.timedout`, `failure.other` | [`deal.service.ts`](../../apps/backend/src/deal/deal.service.ts) |
-| <a id="dataSetChallengeStatus"></a>`dataSetChallengeStatus` | Data Retention | Not tied to an [event above](#event-list) but rather to the periodic chain-checking done in the [Data Retention Check](./data-retention.md) | `success`, `failure` | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
-| <a id="pdp_provider_overdue_periods"></a>`pdp_provider_overdue_periods` | Data Retention | Emitted on every poll | Gauge value (estimated overdue periods) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
+| <a id="dataSetChallengeStatus"></a>`dataSetChallengeStatus` | Data Retention | Emitted on each [Data Retention Check](./data-retention.md) poll when a provider's confirmed proving-period totals advance (strictly positive deltas). Unit: **challenges** (period delta × `CHALLENGES_PER_PROVING_PERIOD = 5`). | `success` (challenges in successfully-proven periods), `failure` (challenges in faulted periods) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
+| <a id="pdp_provider_estimated_overdue_periods"></a>`pdp_provider_estimated_overdue_periods` | Data Retention | Emitted on every [Data Retention Check](./data-retention.md) poll for every successfully processed provider. | Gauge value in proving periods (non-negative integer) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
 | <a id="anonRetrievalStatus"></a>`anonRetrievalStatus` | Anonymous Retrieval | After piece fetch completes (or on abort) | `success` (HTTP 2xx **and** CommP matches), `failure.http`, `failure.commp` (HTTP 2xx but bytes hashed to a different CID), `failure.aborted`, `failure.no_piece`. | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonPieceHttpResponseCode"></a>`anonPieceHttpResponseCode` | Anonymous Retrieval | After piece fetch completes | `200`, `500`, `2xxSuccess`, `4xxClientError`, `5xxServerError`, `otherHttpStatusCodes`, `failure` (same classifier as [`ipfsRetrievalHttpResponseCode`](#ipfsRetrievalHttpResponseCode)) | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonCarParseStatus"></a>`anonCarParseStatus` | Anonymous Retrieval | After CAR validation runs (skipped when piece fetch failed or piece is not IPFS-indexed) | `parseable`, `not_parseable` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |

From a4f0b38fdb789de01d1dbff7e8977434320c0008 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Fri, 15 May 2026 22:56:32 +0200
Subject: [PATCH 27/28] rename: metric anonRetrievalStatus to
 anonPieceRetrievalStatus

https://github.com/FilOzone/dealbot/pull/487/changes#r3245245410
---
 apps/backend/src/metrics-prometheus/check-metrics.service.ts  | 2 +-
 .../src/metrics-prometheus/metrics-prometheus.module.ts       | 4 ++--
 docs/checks/anon-retrievals.md                                | 2 +-
 docs/checks/events-and-metrics.md                             | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/apps/backend/src/metrics-prometheus/check-metrics.service.ts b/apps/backend/src/metrics-prometheus/check-metrics.service.ts
index 8d4be313..76a8ee31 100644
--- a/apps/backend/src/metrics-prometheus/check-metrics.service.ts
+++ b/apps/backend/src/metrics-prometheus/check-metrics.service.ts
@@ -260,7 +260,7 @@ export class AnonRetrievalCheckMetrics {
     private readonly throughputBps: Histogram,
     @InjectMetric("anonRetrievalCheckMs")
     private readonly checkMs: Histogram,
-    @InjectMetric("anonRetrievalStatus")
+    @InjectMetric("anonPieceRetrievalStatus")
     private readonly statusCounter: Counter,
     @InjectMetric("anonPieceHttpResponseCode")
     private readonly httpResponseCounter: Counter,
diff --git a/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts b/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts
index 45f728b6..4ebeb01a 100644
--- a/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts
+++ b/apps/backend/src/metrics-prometheus/metrics-prometheus.module.ts
@@ -234,8 +234,8 @@ const metricProviders = [
     buckets: [100, 500, 1000, 2000, 5000, 10000, 30000, 60000, 120000, 300000, 600000],
   }),
   makeCounterProvider({
-    name: "anonRetrievalStatus",
-    help: "Anonymous retrieval overall outcome",
+    name: "anonPieceRetrievalStatus",
+    help: "Anonymous piece retrieval overall outcome",
     labelNames: ["checkType", "providerId", "providerName", "providerStatus", "value"] as const,
   }),
   makeCounterProvider({
diff --git a/docs/checks/anon-retrievals.md b/docs/checks/anon-retrievals.md
index 2c15f3ed..c3b69610 100644
--- a/docs/checks/anon-retrievals.md
+++ b/docs/checks/anon-retrievals.md
@@ -91,7 +91,7 @@ Source: [`car-validation.service.ts`](../../apps/backend/src/retrieval-anon/car-
 | # | Assertion | How It's Checked | Retries | Relevant Metric | Implemented? |
 |---|-----------|------------------|:---:|------------------|:---:|
 | 1 | SP serves the piece | `GET /piece/{pieceCid}` returns HTTP 2xx | 0 | [`anonPieceRetrievalLastByteMs`](./events-and-metrics.md#anonPieceRetrievalLastByteMs) | Yes |
-| 2 | Bytes match the declared CommP | Hash of response bytes equals `pieceCid` | 0 | [`anonRetrievalStatus`](./events-and-metrics.md#anonRetrievalStatus) | Yes |
+| 2 | Bytes match the declared CommP | Hash of response bytes equals `pieceCid` | 0 | [`anonPieceRetrievalStatus`](./events-and-metrics.md#anonPieceRetrievalStatus) | Yes |
 | 3 | Bytes parse as a CAR (IPFS-indexed pieces only) | `@ipld/car` parses the response | 0 | [`anonCarParseStatus`](./events-and-metrics.md#anonCarParseStatus) | Yes |
 | 4 | SP is advertised on IPNI for root + sampled CIDs | filecoinpin.contact returns provider records | polling until timeout | [`anonIpniStatus`](./events-and-metrics.md#anonIpniStatus) | Yes |
 | 5 | Sampled blocks fetch + hash-verify | `/ipfs/{cid}?format=raw` for each sample | 0 | [`anonBlockFetchStatus`](./events-and-metrics.md#anonBlockFetchStatus) | Yes |
diff --git a/docs/checks/events-and-metrics.md b/docs/checks/events-and-metrics.md
index 1e9d8583..2421242c 100644
--- a/docs/checks/events-and-metrics.md
+++ b/docs/checks/events-and-metrics.md
@@ -10,7 +10,7 @@ The [Anonymous Retrieval check](./anon-retrievals.md) is a single-shot flow per
 
 It is not modeled as a sequence of named lifecycle events. Instead it emits:
 
-- **Outcome metrics** when each step completes — see the [time](#time-related-metrics) and [status](#status-count-related-metrics) metric tables for `anonPieceRetrievalFirstByteMs`, `anonRetrievalCheckMs`, `anonRetrievalStatus`, `anonCarParseStatus`, `anonIpniStatus`, `anonBlockFetchStatus`, and friends.
+- **Outcome metrics** when each step completes — see the [time](#time-related-metrics) and [status](#status-count-related-metrics) metric tables for `anonPieceRetrievalFirstByteMs`, `anonRetrievalCheckMs`, `anonPieceRetrievalStatus`, `anonCarParseStatus`, `anonIpniStatus`, `anonBlockFetchStatus`, and friends.
 - **One row per attempt** in the `anon_retrieval_checks` [ClickHouse table](#clickhouse-tables), emitted even on abort or unexpected error.
 - **Structured log lines** (`anon_retrieval_started`, `anon_retrieval_completed`, `anon_retrieval_no_piece`, `anon_retrieval_car_validation_failed`, `anon_retrieval_clickhouse_insert_failed`) carrying a `retrievalId` so each row can be joined back to log evidence.
 
@@ -120,7 +120,7 @@ sequenceDiagram
 | <a id="dataSetCreationStatus"></a>`dataSetCreationStatus` | Data-Set Creation | Not tied to an [event above](#event-list) but rather to data-set creation start (`pending`) and completion (`success`/`failure.*`) | `pending`, `success`, `failure.timedout`, `failure.other` | [`deal.service.ts`](../../apps/backend/src/deal/deal.service.ts) |
 | <a id="dataSetChallengeStatus"></a>`dataSetChallengeStatus` | Data Retention | Emitted on each [Data Retention Check](./data-retention.md) poll when a provider's confirmed proving-period totals advance (strictly positive deltas). Unit: **challenges** (period delta × `CHALLENGES_PER_PROVING_PERIOD = 5`). | `success` (challenges in successfully-proven periods), `failure` (challenges in faulted periods) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
 | <a id="pdp_provider_estimated_overdue_periods"></a>`pdp_provider_estimated_overdue_periods` | Data Retention | Emitted on every [Data Retention Check](./data-retention.md) poll for every successfully processed provider. | Gauge value in proving periods (non-negative integer) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
-| <a id="anonRetrievalStatus"></a>`anonRetrievalStatus` | Anonymous Retrieval | After piece fetch completes (or on abort) | `success` (HTTP 2xx **and** CommP matches), `failure.http`, `failure.commp` (HTTP 2xx but bytes hashed to a different CID), `failure.aborted`, `failure.no_piece`. | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonPieceRetrievalStatus"></a>`anonPieceRetrievalStatus` | Anonymous Retrieval | After piece fetch completes (or on abort) | `success` (HTTP 2xx **and** CommP matches), `failure.http`, `failure.commp` (HTTP 2xx but bytes hashed to a different CID), `failure.aborted`, `failure.no_piece`. | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonPieceHttpResponseCode"></a>`anonPieceHttpResponseCode` | Anonymous Retrieval | After piece fetch completes | `200`, `500`, `2xxSuccess`, `4xxClientError`, `5xxServerError`, `otherHttpStatusCodes`, `failure` (same classifier as [`ipfsRetrievalHttpResponseCode`](#ipfsRetrievalHttpResponseCode)) | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonCarParseStatus"></a>`anonCarParseStatus` | Anonymous Retrieval | After CAR validation runs (skipped when piece fetch failed or piece is not IPFS-indexed) | `parseable`, `not_parseable` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonIpniStatus"></a>`anonIpniStatus` | Anonymous Retrieval | After CAR validation runs, **or** when piece fetch failed (records `skipped`) | `valid`, `invalid`, `skipped`, `error` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |

From 1a32373e29e7868c39f19d68ded25e3d238b9858 Mon Sep 17 00:00:00 2001
From: Dennis Trautwein <git@dtrautwein.eu>
Date: Fri, 15 May 2026 22:58:07 +0200
Subject: [PATCH 28/28] fix: interpret abort signal as timed out for metric

---
 apps/backend/src/retrieval-anon/anon-retrieval.service.ts | 2 +-
 docs/checks/events-and-metrics.md                         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
index eddc88f0..a74c2bf0 100644
--- a/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
+++ b/apps/backend/src/retrieval-anon/anon-retrieval.service.ts
@@ -142,7 +142,7 @@ export class AnonRetrievalService {
         pieceServedCorrectly
           ? "success"
           : pieceResult.aborted
-            ? "failure.aborted"
+            ? "failure.timedout"
             : pieceResult.success
               ? "failure.commp"
               : "failure.http",
diff --git a/docs/checks/events-and-metrics.md b/docs/checks/events-and-metrics.md
index 2421242c..37761e89 100644
--- a/docs/checks/events-and-metrics.md
+++ b/docs/checks/events-and-metrics.md
@@ -120,7 +120,7 @@ sequenceDiagram
 | <a id="dataSetCreationStatus"></a>`dataSetCreationStatus` | Data-Set Creation | Not tied to an [event above](#event-list) but rather to data-set creation start (`pending`) and completion (`success`/`failure.*`) | `pending`, `success`, `failure.timedout`, `failure.other` | [`deal.service.ts`](../../apps/backend/src/deal/deal.service.ts) |
 | <a id="dataSetChallengeStatus"></a>`dataSetChallengeStatus` | Data Retention | Emitted on each [Data Retention Check](./data-retention.md) poll when a provider's confirmed proving-period totals advance (strictly positive deltas). Unit: **challenges** (period delta × `CHALLENGES_PER_PROVING_PERIOD = 5`). | `success` (challenges in successfully-proven periods), `failure` (challenges in faulted periods) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
 | <a id="pdp_provider_estimated_overdue_periods"></a>`pdp_provider_estimated_overdue_periods` | Data Retention | Emitted on every [Data Retention Check](./data-retention.md) poll for every successfully processed provider. | Gauge value in proving periods (non-negative integer) | [`data-retention.service.ts`](../../apps/backend/src/data-retention/data-retention.service.ts) |
-| <a id="anonPieceRetrievalStatus"></a>`anonPieceRetrievalStatus` | Anonymous Retrieval | After piece fetch completes (or on abort) | `success` (HTTP 2xx **and** CommP matches), `failure.http`, `failure.commp` (HTTP 2xx but bytes hashed to a different CID), `failure.aborted`, `failure.no_piece`. | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
+| <a id="anonPieceRetrievalStatus"></a>`anonPieceRetrievalStatus` | Anonymous Retrieval | After piece fetch completes (or on abort) | `success` (HTTP 2xx **and** CommP matches), `failure.http`, `failure.commp` (HTTP 2xx but bytes hashed to a different CID), `failure.timedout`, `failure.no_piece`. | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonPieceHttpResponseCode"></a>`anonPieceHttpResponseCode` | Anonymous Retrieval | After piece fetch completes | `200`, `500`, `2xxSuccess`, `4xxClientError`, `5xxServerError`, `otherHttpStatusCodes`, `failure` (same classifier as [`ipfsRetrievalHttpResponseCode`](#ipfsRetrievalHttpResponseCode)) | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonCarParseStatus"></a>`anonCarParseStatus` | Anonymous Retrieval | After CAR validation runs (skipped when piece fetch failed or piece is not IPFS-indexed) | `parseable`, `not_parseable` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |
 | <a id="anonIpniStatus"></a>`anonIpniStatus` | Anonymous Retrieval | After CAR validation runs, **or** when piece fetch failed (records `skipped`) | `valid`, `invalid`, `skipped`, `error` | [`anon-retrieval.service.ts`](../../apps/backend/src/retrieval-anon/anon-retrieval.service.ts) |