Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ require (
github.com/google/uuid v1.6.0
github.com/onsi/gomega v1.38.2
github.com/sei-protocol/sei-config v0.0.5
github.com/sei-protocol/seictl v0.0.12
github.com/sei-protocol/seictl v0.0.13
k8s.io/api v0.35.0
k8s.io/apimachinery v0.35.0
k8s.io/client-go v0.35.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sei-protocol/sei-config v0.0.5 h1:edMsQk0/WijGwbZIccSGC2FtPkw0N9XIWDSGgsDeAFw=
github.com/sei-protocol/sei-config v0.0.5/go.mod h1:IEAv5ynYw8Gu2F2qNfE4MQR0PPihAT6g7RWLpWdw5O0=
github.com/sei-protocol/seictl v0.0.12 h1:BH6EXSrCSjMT45q/wWqskkR1ph+V9YxIgbXg4Cv80tI=
github.com/sei-protocol/seictl v0.0.12/go.mod h1:Tf6AISrbFK0i9/BYHB4pkDrLrk5KAfuFuTkz/fKfY9w=
github.com/sei-protocol/seictl v0.0.13 h1:AoJNfA8lo0cQLbqyWJVCKRIauAoDvi4UOnJwfux7S/I=
github.com/sei-protocol/seictl v0.0.13/go.mod h1:Tf6AISrbFK0i9/BYHB4pkDrLrk5KAfuFuTkz/fKfY9w=
github.com/spf13/cobra v1.10.0 h1:a5/WeUlSDCvV5a45ljW2ZFtV0bTDpkfSAj3uqB6Sc+0=
github.com/spf13/cobra v1.10.0/go.mod h1:9dhySC7dnTtEiqzmqfkLj47BslqLCUPMXjG2lj/NgoE=
github.com/spf13/pflag v1.0.8/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
Expand Down
4 changes: 3 additions & 1 deletion internal/controller/node/task_builders.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ func taskBuilderForNode(node *seiv1alpha1.SeiNode, taskType string) sidecar.Task
case taskConfigureGenesis:
return configureGenesisBuilder(node)
case taskConfigureStateSync:
return sidecar.ConfigureStateSyncTask{}
return sidecar.ConfigureStateSyncTask{
UseLocalSnapshot: hasLocalSnapshot(node),
}
case taskConfigApply:
return configApplyBuilder(node)
case taskConfigValidate:
Expand Down
13 changes: 12 additions & 1 deletion manifests/samples/pacific-1-replay.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ spec:
command: ["seid"]
args: ["start", "--home", "/sei"]

config:
overrides:
# Match sei-infra production settings
chain.concurrency_workers: "500"
chain.occ_enabled: "true"
self_remediation.blocks_behind_threshold: "300"
self_remediation.blocks_behind_check_interval_seconds: "60"
self_remediation.restart_cooldown_seconds: "300"
self_remediation.p2p_no_peers_restart_window_seconds: "120"
self_remediation.statesync_no_peers_restart_window_seconds: "90"

genesis:
chainId: pacific-1
s3:
Expand All @@ -27,7 +38,7 @@ spec:
region: eu-central-1
tags:
ChainIdentifier: pacific-1
Component: snapshotter
Component: state-syncer

stateSync:
trustPeriod: "9999h0m0s"
Expand Down
146 changes: 123 additions & 23 deletions manifests/samples/pacific-1-shadow-replay.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# Shadow Replay Job
# Shadow Replay Job — Phase 1
#
# Re-executes mainnet blocks through the Giga engine and compares outcomes
# against canonical results from an archival source node.
# Output: NDJSON ComparisonRecords to stdout.
#
# Output: NDJSON BlockComparison records to stdout + /sei/shadow/output/
# Metrics: Prometheus on :9090
#
# Prerequisites:
# 1. A seid image built from ftr-shadow (with shadow-replay command)
# 2. An archival source node (min-retain-blocks=0) reachable via RPC
# 2. An archival source node reachable via RPC
# 3. The seid-node ServiceAccount with IRSA for S3 access
---
apiVersion: v1
Expand All @@ -27,14 +29,15 @@ metadata:
name: pacific-1-shadow-replay
namespace: default
spec:
backoffLimit: 2
backoffLimit: 5
activeDeadlineSeconds: 21600
template:
metadata:
labels:
sei.io/workload: shadow-replay
sei.io/chain: pacific-1
spec:
restartPolicy: OnFailure
restartPolicy: Never
serviceAccountName: seid-node
tolerations:
- key: sei.io/workload
Expand All @@ -50,9 +53,9 @@ spec:
values: [sei-node]

initContainers:
# 1. Bootstrap the seid home directory.
- name: seid-init
image: &seidImage ghcr.io/bdchatham/sei-shadow:ftr-shadow
imagePullPolicy: Always
command: ["/bin/sh", "-c"]
args:
- |
Expand All @@ -61,12 +64,11 @@ spec:
else
seid init pacific-1 --chain-id pacific-1 --home /sei --overwrite
fi
mkdir -p /sei/tmp
mkdir -p /sei/tmp /sei/shadow/output
volumeMounts:
- name: data
mountPath: /sei

# 2. Download snapshot from S3 and extract into data directory.
- name: snapshot-restore
image: amazon/aws-cli:2.27.31
command: ["/bin/sh", "-c"]
Expand All @@ -84,21 +86,23 @@ spec:
exit 0
fi

yum install -y tar gzip >/dev/null 2>&1

echo "resolving latest snapshot..."
HEIGHT=$(aws s3 cp "s3://${BUCKET}/${PREFIX}latest.txt" - --region ${REGION} | tr -d '[:space:]')
KEY="${PREFIX}snapshot_${HEIGHT}_${CHAIN_ID}_${REGION}.tar.gz"

echo "downloading s3://${BUCKET}/${KEY}"
echo "downloading and extracting s3://${BUCKET}/${KEY}"
mkdir -p ${DEST}
aws s3 cp "s3://${BUCKET}/${KEY}" - --region ${REGION} | tar xzf - -C ${DEST}

echo "${HEIGHT}" > /sei/shadow/snapshot-height.txt
echo "snapshot restored at height ${HEIGHT}"
touch /sei/.snapshot-done
volumeMounts:
- name: data
mountPath: /sei

# 3. Download the real mainnet genesis.json (overwrites the dummy from seid init).
- name: genesis
image: amazon/aws-cli:2.27.31
command: ["/bin/sh", "-c"]
Expand All @@ -112,26 +116,122 @@ spec:
- name: data
mountPath: /sei

- name: resolve-start-height
image: *seidImage
imagePullPolicy: Always
command: ["/bin/sh", "-c"]
args:
- |
if [ -f /sei/shadow/snapshot-height.txt ]; then
HEIGHT=$(cat /sei/shadow/snapshot-height.txt | tr -d '[:space:]')
START=$((HEIGHT + 1))
echo "${START}" > /sei/shadow/start-height.txt
echo "start height resolved: ${START}"
else
echo "ERROR: snapshot-height.txt not found"
exit 1
fi
volumeMounts:
- name: data
mountPath: /sei

- name: bootstrap-state
image: *seidImage
imagePullPolicy: Always
command: ["/bin/sh", "-c"]
args:
- |
if [ -f /sei/.bootstrap-done ]; then
echo "state already bootstrapped, skipping"
exit 0
fi

HEIGHT=$(cat /sei/shadow/snapshot-height.txt | tr -d '[:space:]')
echo "bootstrapping state from local snapshot at height ${HEIGHT}..."

# Configure state-sync to use the local snapshot
sed -i 's/^enable = .*/enable = true/' /sei/config/config.toml
sed -i 's/^use-local-snapshot = .*/use-local-snapshot = true/' /sei/config/config.toml
sed -i "s/^trust-height = .*/trust-height = ${HEIGHT}/" /sei/config/config.toml
sed -i 's|^rpc-servers = .*|rpc-servers = "18.194.110.34:26657,63.180.172.82:26657"|' /sei/config/config.toml
sed -i 's/^backfill-blocks = .*/backfill-blocks = 0/' /sei/config/config.toml
sed -i 's/^trust-period = .*/trust-period = "9999h0m0s"/' /sei/config/config.toml

# Fetch trust hash for the snapshot height
TRUST_HASH=$(curl -s "http://18.194.110.34:26657/block?height=${HEIGHT}" | \
jq -r '.block_id.hash // .result.block_id.hash' 2>/dev/null)
if [ -n "${TRUST_HASH}" ] && [ "${TRUST_HASH}" != "null" ]; then
sed -i "s/^trust-hash = .*/trust-hash = \"${TRUST_HASH}\"/" /sei/config/config.toml
echo "trust hash: ${TRUST_HASH}"
else
echo "WARNING: could not fetch trust hash, using empty"
fi

# Start seid in background
seid start --home /sei &
SEID_PID=$!

echo "waiting for state-sync to apply snapshot (pid=${SEID_PID})..."
# 7200 iterations * 2s = 4 hours max
for i in $(seq 1 7200); do
sleep 2
# If seid crashed, exit early
if ! kill -0 ${SEID_PID} 2>/dev/null; then
echo "ERROR: seid process exited unexpectedly"
exit 1
fi
# Check RPC (only available after state-sync completes)
CURRENT=$(curl -s http://localhost:26657/status 2>/dev/null | \
jq -r '.sync_info.latest_block_height // .result.sync_info.latest_block_height' 2>/dev/null)
if [ -n "${CURRENT}" ] && [ "${CURRENT}" != "null" ] && [ "${CURRENT}" -ge "${HEIGHT}" ] 2>/dev/null; then
echo "state bootstrapped at height ${CURRENT}"
kill ${SEID_PID} 2>/dev/null
wait ${SEID_PID} 2>/dev/null
sed -i 's/^enable = true/enable = false/' /sei/config/config.toml
touch /sei/.bootstrap-done
exit 0
fi
# Progress log every 5 minutes
if [ $((i % 150)) -eq 0 ]; then
ELAPSED=$((i * 2 / 60))
echo "still waiting... ${ELAPSED}m elapsed (current height: ${CURRENT:-unknown})"
fi
done
echo "ERROR: timed out waiting for state-sync (4h)"
kill ${SEID_PID} 2>/dev/null
exit 1
env:
- name: TMPDIR
value: /sei/tmp
volumeMounts:
- name: data
mountPath: /sei

containers:
- name: shadow-replay
image: *seidImage
command: ["seid"]
imagePullPolicy: Always
command: ["/bin/sh", "-c"]
args:
- "shadow-replay"
- "--home"
- "/sei"
- "--source-rpc"
# TODO: replace with the archive node's RPC endpoint
- "http://pacific-replay.sei-network.svc.cluster.local:26657"
- "--start-height"
# Must equal snapshot height + 1. Current snapshots are ~197875000.
- "197875001"
- "--end-height"
# 50k blocks for initial validation. Set to "0" for continuous replay to tip.
- "197925000"
- |
START=$(cat /sei/shadow/start-height.txt | tr -d '[:space:]')
END=$((START + 49999))
echo "replaying blocks ${START} to ${END}"
exec seid shadow-replay \
--home /sei \
--source-rpc http://18.194.110.34:26657 \
--start-height "${START}" \
--end-height "${END}" \
--checkpoint /sei/shadow/checkpoint.json \
--output-dir /sei/shadow/output \
--metrics-addr :9090 \
--chain-id pacific-1
env:
- name: TMPDIR
value: /sei/tmp
ports:
- name: metrics
containerPort: 9090
volumeMounts:
- name: data
mountPath: /sei
Expand Down
15 changes: 13 additions & 2 deletions manifests/samples/pacific-1-snapshotter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
namespace: default
spec:
chainId: pacific-1
mode: full
mode: archive
image: 189176372795.dkr.ecr.us-east-2.amazonaws.com/sei/sei-chain:837ba922db3f5313a474fbe0c7bba4cbec466cdc

sidecar:
Expand All @@ -15,6 +15,17 @@ spec:
command: ["seid"]
args: ["start", "--home", "/sei"]

config:
overrides:
# Match sei-infra production settings
chain.concurrency_workers: "500"
chain.occ_enabled: "true"
self_remediation.blocks_behind_threshold: "300"
self_remediation.blocks_behind_check_interval_seconds: "60"
self_remediation.restart_cooldown_seconds: "300"
self_remediation.p2p_no_peers_restart_window_seconds: "120"
self_remediation.statesync_no_peers_restart_window_seconds: "90"

genesis:
chainId: pacific-1
s3:
Expand All @@ -27,7 +38,7 @@ spec:
region: eu-central-1
tags:
ChainIdentifier: pacific-1
Component: snapshotter
Component: state-syncer

stateSync:
trustPeriod: "9999h0m0s"
Expand Down
Loading