From 9483d4ab9e6dad530a1d18204cb04c7a38c7aded Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Thu, 30 Apr 2026 08:50:30 -0300 Subject: [PATCH 01/14] =?UTF-8?q?eth/protocols/wit,=20consensus/bor,=20eth?= =?UTF-8?q?:=20WIT2=20=E2=80=94=20BP-signed=20witness=20announcements=20wi?= =?UTF-8?q?th=20transitive=20relay=20and=20pre-import=20serving?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds WIT2 (protocol version 3): block producers sign a chunked-parallel commitment over each witness, peers verify the signature and relay the announcement at network-RTT speed without execution, and any peer holding the body can serve it pre-import from an in-memory cache. Byte-correctness is verified by requesters against the BP-signed WitnessHash, attaching tampering blame to the server; content-correctness (state-root) failures attach to the BP. Removes the per-hop ~500 ms execution gate that today serialises witness propagation through stateless validators. Witness commitment uses 1 MiB chunked-parallel keccak (keccak256 of the concatenation of per-chunk hashes), measured at ~13.5 ms wall-clock for 50 MiB witnesses on 8 cores vs ~88 ms single-shot. Wire format and signature shape are unchanged from a single-keccak commitment; only the function mapping bytes to the 32-byte commitment changes. Producer-side signing reuses the engine SignerFn via consensus/bor.SignBytes with a dedicated mimetype (application/x-bor-wit2-announce) and a domain-separated digest tag, replay-resistant at both the digest and signer-call levels. Receivers verify ecrecover against the scheduled producer for the announced block; announces for blocks whose header is not yet locally available are deferred (no strike) so the block-cosend race does not punish honest relayers. Pre-import serving cache (capacity 10) is fed from the paged-fetch path the moment byte-correctness check passes, before chain write. Cache entries are gated on a BP-signed WitnessHash being on file — relayers never cache unverified bytes, and WIT1 fallback paths skip the cache entirely. handleGetWitness consults the cache before chain storage. Wire: new protocol version WIT2 = 3, new message SignedNewWitnessHashesMsg = 0x06 with up to 64 announcements per packet. WitnessMetadataResponse extended with WitnessHash. WIT1 peers continue using NewWitnessHashes; mixed mesh tolerated. Rate-limits: 200 ms per-(blockHash, peer) relay rate-limit, 30 s announce TTL, per-peer token bucket (burst 256, refill 64/s), strike disconnect at 5 invalid signed announces per minute. Conflicting WitnessHash for the same BlockHash is rejected via signedWitnessCache.putIfNewer. Operator note: validators running Clef as their signer must whitelist the mimetype application/x-bor-wit2-announce; without it the producer falls back to unsigned WIT1 announces. --- accounts/accounts.go | 1 + consensus/bor/bor.go | 36 ++ consensus/bor/signbytes_test.go | 70 +++ core/stateless/encoding.go | 26 +- core/stateless/encoding_test.go | 59 ++ core/stateless/witness_bench_test.go | 110 ++++ core/stateless/witness_commit.go | 116 ++++ core/stateless/witness_commit_bench_test.go | 292 ++++++++++ core/stateless/witness_commit_helpers_test.go | 325 +++++++++++ core/stateless/witness_commit_test.go | 88 +++ eth/fetcher/block_fetcher.go | 4 +- eth/fetcher/block_fetcher_race_test.go | 12 + eth/fetcher/block_fetcher_test.go | 2 +- eth/fetcher/metrics.go | 4 + eth/fetcher/witness_manager.go | 145 ++++- eth/fetcher/witness_manager_test.go | 104 +++- eth/fetcher/witness_manager_wit2_test.go | 301 ++++++++++ eth/handler.go | 56 +- eth/handler_wit.go | 259 +++++++-- eth/handler_wit2.go | 504 +++++++++++++++++ eth/handler_wit2_test.go | 526 ++++++++++++++++++ eth/handler_wit_test.go | 31 ++ eth/peer.go | 3 + eth/peer_mock.go | 38 ++ eth/peerset.go | 47 +- eth/peerset_test.go | 72 +++ eth/protocols/wit/broadcast.go | 7 + eth/protocols/wit/handler.go | 12 + eth/protocols/wit/handlers.go | 26 + eth/protocols/wit/peer.go | 56 +- eth/protocols/wit/protocol.go | 87 ++- eth/protocols/wit/protocol_wit2_test.go | 91 +++ 32 files changed, 3409 insertions(+), 101 deletions(-) create mode 100644 consensus/bor/signbytes_test.go create mode 100644 core/stateless/witness_bench_test.go create mode 100644 core/stateless/witness_commit.go create mode 100644 core/stateless/witness_commit_bench_test.go create mode 100644 core/stateless/witness_commit_helpers_test.go create mode 100644 core/stateless/witness_commit_test.go create mode 100644 eth/fetcher/witness_manager_wit2_test.go create mode 100644 eth/handler_wit2.go create mode 100644 eth/handler_wit2_test.go create mode 100644 eth/protocols/wit/protocol_wit2_test.go diff --git a/accounts/accounts.go b/accounts/accounts.go index 6d46a0f76c..c775864939 100644 --- a/accounts/accounts.go +++ b/accounts/accounts.go @@ -41,6 +41,7 @@ const ( MimetypeTypedData = "data/typed" MimetypeClique = "application/x-clique-header" MimetypeBor = "application/x-bor-header" + MimetypeBorWitnessAnnounce = "application/x-bor-wit2-announce" MimetypeTextPlain = "text/plain" ) diff --git a/consensus/bor/bor.go b/consensus/bor/bor.go index ec2dfcd79f..685d44ce74 100644 --- a/consensus/bor/bor.go +++ b/consensus/bor/bor.go @@ -1540,6 +1540,42 @@ func Sign(signFn SignerFn, signer common.Address, header *types.Header, c *param return nil } +// SignBytes signs the supplied preimage bytes under a context-specific +// mimetype using the engine's currently authorized signer. The mimetype is the +// domain tag the underlying signer (clef, keystore) sees, so callers MUST pass +// a context-specific value (e.g. accounts.MimetypeBorWitnessAnnounce) and +// never reuse accounts.MimetypeBor outside of header sealing — that would let +// a signature produced here be replayed as a block-seal signature on any +// header BorRLP that hashes to the same digest. +// +// Callers pass the unhashed preimage; the wallet's SignData implementation +// applies keccak256 once before signing. Verifiers must independently hash +// the same preimage and ecrecover against the resulting digest. +func (c *Bor) SignBytes(mimetype string, digest []byte) (signer common.Address, sig []byte, err error) { + if mimetype == "" || mimetype == accounts.MimetypeBor { + return common.Address{}, nil, errors.New("bor: SignBytes requires a non-empty, non-header mimetype") + } + current := c.authorizedSigner.Load() + if current == nil || current.signer == (common.Address{}) { + return common.Address{}, nil, errors.New("bor: no authorized signer configured") + } + sig, err = current.signFn(accounts.Account{Address: current.signer}, mimetype, digest) + if err != nil { + return common.Address{}, nil, err + } + return current.signer, sig, nil +} + +// CurrentSigner returns the address of the currently authorized signer, or +// the zero address if none has been configured. +func (c *Bor) CurrentSigner() common.Address { + current := c.authorizedSigner.Load() + if current == nil { + return common.Address{} + } + return current.signer +} + // CalcDifficulty is the difficulty adjustment algorithm. It returns the difficulty // that a new block should have based on the previous blocks in the chain and the // current signer. diff --git a/consensus/bor/signbytes_test.go b/consensus/bor/signbytes_test.go new file mode 100644 index 0000000000..bd7b2992b8 --- /dev/null +++ b/consensus/bor/signbytes_test.go @@ -0,0 +1,70 @@ +package bor + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/accounts" + "github.com/ethereum/go-ethereum/common" +) + +// TestSignBytesForwardsMimetype is the regression for the wit2 announce +// signing path's external-signer compatibility: bor.SignBytes must hand the +// caller-supplied mimetype to the configured signer untouched. Operators +// configuring Clef whitelist a specific string ("application/x-bor-wit2- +// announce"); if SignBytes ever rewrote, lower-cased, or stripped that, the +// signer would either reject the request or sign under a different domain. +// +// The test captures the (mimetype, payload) the wallet sees and asserts both +// match exactly what the caller passed. +func TestSignBytesForwardsMimetype(t *testing.T) { + bor := &Bor{} + addr := common.HexToAddress("0x1234") + + var ( + gotMimetype string + gotPayload []byte + ) + bor.Authorize(addr, func(_ accounts.Account, mimetype string, data []byte) ([]byte, error) { + gotMimetype = mimetype + gotPayload = append([]byte(nil), data...) + return make([]byte, 65), nil + }) + + preimage := []byte("wit2-announce-preimage") + signer, sig, err := bor.SignBytes(accounts.MimetypeBorWitnessAnnounce, preimage) + if err != nil { + t.Fatalf("SignBytes: %v", err) + } + if signer != addr { + t.Fatalf("signer addr mismatch: got %s want %s", signer, addr) + } + if len(sig) != 65 { + t.Fatalf("expected 65-byte signature, got %d", len(sig)) + } + if gotMimetype != accounts.MimetypeBorWitnessAnnounce { + t.Fatalf("mimetype not forwarded literally: got %q want %q", + gotMimetype, accounts.MimetypeBorWitnessAnnounce) + } + if !bytes.Equal(gotPayload, preimage) { + t.Fatalf("payload not forwarded literally: got %x want %x", gotPayload, preimage) + } +} + +// TestSignBytesRejectsHeaderMimetype guards against accidental cross-context +// reuse: callers must never pass MimetypeBor (header sealing) into SignBytes, +// since that would let an announce signature replay as a block-seal. +func TestSignBytesRejectsHeaderMimetype(t *testing.T) { + bor := &Bor{} + bor.Authorize(common.HexToAddress("0x1234"), func(accounts.Account, string, []byte) ([]byte, error) { + t.Fatal("signFn must not be reached for rejected mimetype") + return nil, nil + }) + + if _, _, err := bor.SignBytes("", []byte{0x01}); err == nil { + t.Fatal("empty mimetype must be rejected") + } + if _, _, err := bor.SignBytes(accounts.MimetypeBor, []byte{0x01}); err == nil { + t.Fatal("MimetypeBor must be rejected to prevent header-seal replay") + } +} diff --git a/core/stateless/encoding.go b/core/stateless/encoding.go index e955b9c962..09f7d389e7 100644 --- a/core/stateless/encoding.go +++ b/core/stateless/encoding.go @@ -17,7 +17,9 @@ package stateless import ( + "bytes" "io" + "sort" "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/core/types" @@ -84,19 +86,29 @@ func (w *Witness) fromExtWitness(ext *ExtWitness) error { // EncodeRLP serializes a witness as RLP using the canonical BorWitness 3-field // format. Only state trie nodes are encoded; contract bytecodes are not // included in the wire format. +// +// State entries are sorted lexicographically before encoding so the output is +// byte-identical for any two witnesses with the same logical contents. Without +// this, Go's randomized map iteration would produce different bytes per call, +// breaking any code that hashes the encoded witness for content addressing — +// notably the WIT2 BP-signed witness hash, which is computed by both producer +// and verifiers and must match exactly. func (w *Witness) EncodeRLP(wr io.Writer) error { w.lock.RLock() defer w.lock.RUnlock() - bw := &BorWitness{ - Context: w.context, - Headers: w.Headers, - State: make([][]byte, 0, len(w.State)), - } + state := make([][]byte, 0, len(w.State)) for node := range w.State { - bw.State = append(bw.State, []byte(node)) + state = append(state, []byte(node)) } - return rlp.Encode(wr, bw) + sort.Slice(state, func(i, j int) bool { + return bytes.Compare(state[i], state[j]) < 0 + }) + return rlp.Encode(wr, &BorWitness{ + Context: w.context, + Headers: w.Headers, + State: state, + }) } // DecodeRLP decodes a witness from RLP. It first attempts the canonical diff --git a/core/stateless/encoding_test.go b/core/stateless/encoding_test.go index eded87b541..1327587fff 100644 --- a/core/stateless/encoding_test.go +++ b/core/stateless/encoding_test.go @@ -180,3 +180,62 @@ func TestRoundtrip_BorWitnessFormat(t *testing.T) { t.Errorf("Codes should be empty after BorWitness roundtrip, got %d", len(decoded.Codes)) } } + +// TestEncodeRLP_DeterministicAcrossInsertionOrder is the regression test for +// the WIT2 byte-blame model. State entries arrive via a Go map, whose +// iteration order is randomised, so without sorting in EncodeRLP two +// witnesses with identical logical content would encode to different bytes +// and hash differently. Receivers verifying response bytes against the BP- +// signed witness hash would falsely drop honest peers. +func TestEncodeRLP_DeterministicAcrossInsertionOrder(t *testing.T) { + const N = 64 + nodes := make([][]byte, N) + for i := 0; i < N; i++ { + nodes[i] = []byte{byte(i), byte(i ^ 0x5a), byte(i ^ 0xa5)} + } + + makeWitness := func(insertionOrder []int) *Witness { + w := &Witness{ + Headers: []*types.Header{{Number: big.NewInt(1)}}, + Codes: make(map[string]struct{}), + State: make(map[string]struct{}, len(insertionOrder)), + } + w.context = &types.Header{Number: big.NewInt(2)} + for _, i := range insertionOrder { + w.State[string(nodes[i])] = struct{}{} + } + return w + } + + encode := func(w *Witness) []byte { + raw, err := rlp.EncodeToBytes(w) + if err != nil { + t.Fatalf("encode: %v", err) + } + return raw + } + + forward := make([]int, N) + for i := range forward { + forward[i] = i + } + reverse := make([]int, N) + for i := range reverse { + reverse[i] = N - 1 - i + } + + wForward := makeWitness(forward) + wReverse := makeWitness(reverse) + if got, want := encode(wForward), encode(wReverse); string(got) != string(want) { + t.Fatalf("EncodeRLP must be deterministic across map insertion orders; got divergent bytes (%d vs %d)", len(got), len(want)) + } + + // Re-encoding the same witness multiple times must also yield identical + // bytes, even though Go map iteration is fresh each call. + first := encode(wForward) + for i := 0; i < 5; i++ { + if string(encode(wForward)) != string(first) { + t.Fatalf("repeat encode call %d differs from first", i) + } + } +} diff --git a/core/stateless/witness_bench_test.go b/core/stateless/witness_bench_test.go new file mode 100644 index 0000000000..caeedd79a5 --- /dev/null +++ b/core/stateless/witness_bench_test.go @@ -0,0 +1,110 @@ +package stateless + +import ( + "crypto/rand" + "fmt" + "testing" + + "github.com/ethereum/go-ethereum/crypto" +) + +// BenchmarkWitnessEncodeRLP measures the cost of EncodeRLP, which sorts +// state nodes lexicographically before serialization. Surfaces regressions if +// the comparator changes (e.g. swapping bytes.Compare for an allocating +// alternative). Synthetic 50 MiB witness with realistic node sizes. +func BenchmarkWitnessEncodeRLP(b *testing.B) { + for _, sizeMiB := range []int{1, 15, 50} { + w := buildSyntheticWitness(sizeMiB<<20, 256) + b.Run(fmt.Sprintf("%dMiB", sizeMiB), func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := w.EncodeRLP(discardWriter{}); err != nil { + b.Fatalf("encode: %v", err) + } + } + }) + } +} + +type discardWriter struct{} + +func (discardWriter) Write(p []byte) (int, error) { return len(p), nil } + +// BenchmarkWitnessKeccakBySize measures the throughput of keccak256 over a +// pre-allocated witness-sized buffer. This is the cost the producer pays to +// compute WitnessHash on the WIT2 announce path (and the cost a relayer or +// requester pays to verify response bytes against the BP-signed WitnessHash). +// +// Run with `go test -bench=BenchmarkWitnessKeccakBySize ./core/stateless/`. +// b.SetBytes lets `go test -benchmem` print throughput in MB/s alongside ns/op, +// which is what we actually want to know — the absolute size of any one +// witness varies, but per-byte cost scales linearly. +func BenchmarkWitnessKeccakBySize(b *testing.B) { + for _, sizeMiB := range []int{1, 5, 15, 30, 50} { + size := sizeMiB << 20 + buf := make([]byte, size) + if _, err := rand.Read(buf); err != nil { + b.Fatalf("rand: %v", err) + } + b.Run(fmt.Sprintf("%dMiB", sizeMiB), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = crypto.Keccak256Hash(buf) + } + }) + } +} + +// BenchmarkWitnessAnnounceSign measures the marginal ECDSA cost of signing the +// 32-byte announcement digest, independent of witness size. This isolates the +// secp256k1 sign cost from the keccak cost so a single number per platform is +// directly comparable to libsecp256k1 microbenchmarks. +func BenchmarkWitnessAnnounceSign(b *testing.B) { + key, err := crypto.GenerateKey() + if err != nil { + b.Fatalf("key: %v", err) + } + digest := make([]byte, 32) + if _, err := rand.Read(digest); err != nil { + b.Fatalf("rand: %v", err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + if _, err := crypto.Sign(digest, key); err != nil { + b.Fatalf("sign: %v", err) + } + } +} + +// BenchmarkWitnessHashAndSignCombined measures the realistic producer-side +// cost of the WIT2 announce path: keccak256 over witness bytes followed by +// ECDSA sign over the (small) signing digest. This is the latency the BP +// adds before emitting a signed announce. Compare against the ~500ms-per-hop +// savings: as long as this stays well under the savings, the change is a +// net win even at 50 MiB witnesses. +func BenchmarkWitnessHashAndSignCombined(b *testing.B) { + key, err := crypto.GenerateKey() + if err != nil { + b.Fatalf("key: %v", err) + } + for _, sizeMiB := range []int{1, 5, 15, 30, 50} { + size := sizeMiB << 20 + buf := make([]byte, size) + if _, err := rand.Read(buf); err != nil { + b.Fatalf("rand: %v", err) + } + b.Run(fmt.Sprintf("%dMiB", sizeMiB), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + witnessHash := crypto.Keccak256Hash(buf) + digest := crypto.Keccak256Hash(witnessHash[:], []byte{0x01, 0x02, 0x03, 0x04}) + if _, err := crypto.Sign(digest[:], key); err != nil { + b.Fatalf("sign: %v", err) + } + } + }) + } +} diff --git a/core/stateless/witness_commit.go b/core/stateless/witness_commit.go new file mode 100644 index 0000000000..4fe42cb0b4 --- /dev/null +++ b/core/stateless/witness_commit.go @@ -0,0 +1,116 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package stateless + +import ( + "bytes" + "runtime" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// WitnessCommitChunkBytes is the protocol-fixed chunk size for the WIT2 +// witness commitment. Producer and verifier MUST agree on this constant. +// Changing it changes the meaning of every WitnessHash on the wire. +const WitnessCommitChunkBytes = 1 << 20 // 1 MiB + +// witnessCommitMaxWorkers caps the keccak fan-out. The chosen value reflects +// the bench finding on Apple M4 Pro that 8 P-cores saturate the keccak +// primitive; over-subscribing onto E-cores doesn't add throughput. +const witnessCommitMaxWorkers = 8 + +// WitnessCommitHash returns the WIT2 witness commitment over the canonical +// RLP encoding of a witness: keccak256 of the concatenation of chunk hashes, +// where each chunk is keccak256 over a WitnessCommitChunkBytes-sized window +// of rlpBytes. The output is invariant in worker count — only the input +// bytes and the chunk-size constant determine the result, so producer and +// verifier always agree byte-for-byte regardless of GOMAXPROCS. +// +// Empty input returns the zero hash, distinct from keccak256("") so empty +// witnesses are unambiguously identified across the protocol. +func WitnessCommitHash(rlpBytes []byte) common.Hash { + if len(rlpBytes) == 0 { + return common.Hash{} + } + chunks := splitWitnessChunks(rlpBytes, WitnessCommitChunkBytes) + chunkHashes := make([]common.Hash, len(chunks)) + + // Single-chunk inputs (≤1 MiB) skip the goroutine pool — the fan-out cost + // would dominate the keccak. + if len(chunks) == 1 { + chunkHashes[0] = crypto.Keccak256Hash(chunks[0]) + } else { + workers := runtime.GOMAXPROCS(0) + if workers > witnessCommitMaxWorkers { + workers = witnessCommitMaxWorkers + } + if workers > len(chunks) { + workers = len(chunks) + } + if workers < 1 { + workers = 1 + } + var wg sync.WaitGroup + work := make(chan int, len(chunks)) + for w := 0; w < workers; w++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + chunkHashes[i] = crypto.Keccak256Hash(chunks[i]) + } + }() + } + for i := range chunks { + work <- i + } + close(work) + wg.Wait() + } + + concat := make([]byte, 0, len(chunkHashes)*common.HashLength) + for _, h := range chunkHashes { + concat = append(concat, h[:]...) + } + return crypto.Keccak256Hash(concat) +} + +// WitnessCommitHashFromWitness encodes a witness with the canonical sorted +// EncodeRLP and returns its WitnessCommitHash. Callers that already have +// canonical RLP bytes should use WitnessCommitHash directly to skip the +// re-encoding cost. +func WitnessCommitHashFromWitness(w *Witness) (common.Hash, error) { + var buf bytes.Buffer + if err := w.EncodeRLP(&buf); err != nil { + return common.Hash{}, err + } + return WitnessCommitHash(buf.Bytes()), nil +} + +func splitWitnessChunks(buf []byte, chunkSize int) [][]byte { + out := make([][]byte, 0, (len(buf)+chunkSize-1)/chunkSize) + for i := 0; i < len(buf); i += chunkSize { + end := i + chunkSize + if end > len(buf) { + end = len(buf) + } + out = append(out, buf[i:end]) + } + return out +} diff --git a/core/stateless/witness_commit_bench_test.go b/core/stateless/witness_commit_bench_test.go new file mode 100644 index 0000000000..751a78af3c --- /dev/null +++ b/core/stateless/witness_commit_bench_test.go @@ -0,0 +1,292 @@ +package stateless + +import ( + "bytes" + "crypto/ecdsa" + "fmt" + "sort" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// Witness sizes the bench iterates. Mirrors the approved plan's matrix. +var benchSizesMiB = []int{1, 5, 15, 30, 50} + +// Core counts for the parallel candidates. cores=1 lets us see the +// single-thread baseline directly inside the same matrix; 8 reflects modern +// validator/relayer hardware. +var benchCores = []int{1, 2, 4, 8} + +// preparedWitness holds an already-built synthetic witness alongside its +// canonical encoded bytes and root hash, so each Benchmark sub-run pays +// the construction cost once outside the timed loop. +type preparedWitness struct { + w *Witness + rlpBytes []byte + // rootForD: a synthetic "state root" the intrinsic walk starts from. + // Picked deterministically from the witness's set so D's positive + // path resolves; without an MPT we can't reconstruct a real root, and + // the bench cares about per-node keccak throughput + walk cost shape. + rootForD common.Hash +} + +func prepareWitness(b *testing.B, sizeMiB int) preparedWitness { + b.Helper() + w := buildSyntheticWitness(sizeMiB<<20, 256) + var buf bytes.Buffer + if err := w.EncodeRLP(&buf); err != nil { + b.Fatalf("encode: %v", err) + } + rlpBytes := buf.Bytes() + // Pick the lex-smallest node-hash as the synthetic root for D so the + // walk has a definite entry point. Realistic verifier uses + // header.StateRoot; the hash we pick is functionally equivalent for + // timing purposes. + hashes := make([]common.Hash, 0, len(w.State)) + for n := range w.State { + hashes = append(hashes, crypto.Keccak256Hash([]byte(n))) + } + sort.Slice(hashes, func(i, j int) bool { + return string(hashes[i][:]) < string(hashes[j][:]) + }) + var root common.Hash + if len(hashes) > 0 { + root = hashes[0] + } + return preparedWitness{w: w, rlpBytes: rlpBytes, rootForD: root} +} + +// BenchmarkCommit_A_BlobKeccak — current baseline. Single-threaded keccak +// over the canonical RLP encoding. +func BenchmarkCommit_A_BlobKeccak(b *testing.B) { + for _, mib := range benchSizesMiB { + pw := prepareWitness(b, mib) + b.Run(fmt.Sprintf("%dMiB", mib), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = candidateA_BlobKeccak(pw.rlpBytes) + } + }) + } +} + +// BenchmarkCommit_B_PageParallel — page-aligned (15 MiB) parallel keccak, +// aggregate via concat+keccak. cores=K parallelism. +func BenchmarkCommit_B_PageParallel(b *testing.B) { + for _, mib := range benchSizesMiB { + pw := prepareWitness(b, mib) + for _, cores := range benchCores { + b.Run(fmt.Sprintf("%dMiB/cores=%d", mib, cores), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = candidateB_PageParallel(pw.rlpBytes, cores) + } + }) + } + } +} + +// BenchmarkCommit_C_PerNodeMerkle — per-node hash + sort + Merkle build. +// Includes node hashing in the timed region so this is the verifier-side +// cost. The producer-only cost is captured separately below. +func BenchmarkCommit_C_PerNodeMerkle(b *testing.B) { + for _, mib := range benchSizesMiB { + pw := prepareWitness(b, mib) + for _, cores := range benchCores { + b.Run(fmt.Sprintf("%dMiB/cores=%d", mib, cores), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = candidateC_PerNodeMerkle(pw.w, cores) + } + }) + } + } +} + +// BenchmarkCommit_B_ChunkSize sweeps chunk size for B while holding +// cores=8. Answers "is 15 MiB the right page size for parallelism, or +// would smaller chunks win?". Pinned to 50 MiB because that's where the +// answer matters; smaller witnesses don't have headroom to split. +func BenchmarkCommit_B_ChunkSize(b *testing.B) { + pw := prepareWitness(b, 50) + chunks := []int{ + 512 * 1024, // 512 KiB + 1 * 1024 * 1024, // 1 MiB + 2 * 1024 * 1024, // 2 MiB + 4 * 1024 * 1024, // 4 MiB + 8 * 1024 * 1024, // 8 MiB + 15 * 1024 * 1024, // 15 MiB (current wire page) + } + for _, c := range chunks { + c := c + b.Run(fmt.Sprintf("chunk=%dKiB/cores=8", c>>10), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = candidateB_PageParallelChunked(pw.rlpBytes, c, 8) + } + }) + } + // Also try cores=12 (all logical cores) at the smallest chunks to + // see if the M4 Pro's E-cores help at finer granularity. + for _, c := range []int{512 * 1024, 1 * 1024 * 1024, 2 * 1024 * 1024} { + c := c + b.Run(fmt.Sprintf("chunk=%dKiB/cores=12", c>>10), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = candidateB_PageParallelChunked(pw.rlpBytes, c, 12) + } + }) + } +} + +// BenchmarkProducerSign_C_ZeroCost — producer's incremental work +// post-execution: sort N hashes + Merkle build + ECDSA sign. Validates +// the "zero hashing cost on producer" claim by feeding precomputed hashes. +func BenchmarkProducerSign_C_ZeroCost(b *testing.B) { + key, err := crypto.GenerateKey() + if err != nil { + b.Fatalf("key: %v", err) + } + for _, mib := range benchSizesMiB { + pw := prepareWitness(b, mib) + // Pre-hash & pre-sort the node set so the timed region only + // includes Merkle build and ECDSA sign (the two pieces the + // producer would actually pay). + hashes := make([]common.Hash, 0, len(pw.w.State)) + for n := range pw.w.State { + hashes = append(hashes, crypto.Keccak256Hash([]byte(n))) + } + sort.Slice(hashes, func(i, j int) bool { + return string(hashes[i][:]) < string(hashes[j][:]) + }) + b.Run(fmt.Sprintf("%dMiB", mib), func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + root := candidateC_ProducerOnly(hashes) + if _, err := signECDSA(key, root[:]); err != nil { + b.Fatalf("sign: %v", err) + } + } + }) + } +} + +// BenchmarkVerify_D_IntrinsicHashAll — D's verifier-side incremental cost +// over chain-prep baseline: parallel per-node keccak. The reachability +// walk and map build are amortized into MakeHashDB in production and are +// asymptotically negligible vs the keccak phase, so we exclude them here +// to avoid measuring noise. Producer cost for D is exactly zero (header +// is already signed; no separate WitnessHash signature exists). +func BenchmarkVerify_D_IntrinsicHashAll(b *testing.B) { + for _, mib := range benchSizesMiB { + pw := prepareWitness(b, mib) + for _, cores := range benchCores { + b.Run(fmt.Sprintf("%dMiB/cores=%d", mib, cores), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + candidateD_HashAll(pw.w, cores) + } + }) + } + } +} + +func signECDSA(key *ecdsa.PrivateKey, digest []byte) ([]byte, error) { + return crypto.Sign(digest, key) +} + +// ---------------------------------------------------------------------------- +// Correctness checks (Test*) for B/C/D so the bench numbers reflect +// implementations that actually do the right thing. +// ---------------------------------------------------------------------------- + +// TestCandidateB_PageAggregateDeterministic guards the determinism property +// the bench depends on: two runs over identical input produce identical +// aggregate hashes. Without this, the bench number for B would be +// meaningless. +func TestCandidateB_PageAggregateDeterministic(t *testing.T) { + in := bytes.Repeat([]byte{0xab}, 20<<20) // 20 MiB → 2 pages at 15 MiB + a := candidateB_PageParallel(in, 4) + bb := candidateB_PageParallel(in, 4) + if a != bb { + t.Fatalf("B is non-deterministic across runs: %s vs %s", a.Hex(), bb.Hex()) + } +} + +// TestCandidateC_OrderInvariant guards the property that motivates C: the +// Merkle root over sorted node hashes is invariant under map iteration +// order. Build a Witness, hash it, mutate insertion order via fresh map, +// hash again, must match. +func TestCandidateC_OrderInvariant(t *testing.T) { + w := buildSyntheticWitness(2<<20, 512) + root1 := candidateC_PerNodeMerkle(w, 1) + + // Rebuild with the same node set but different insertion order. + nodes := make([][]byte, 0, len(w.State)) + for n := range w.State { + nodes = append(nodes, []byte(n)) + } + w2 := &Witness{Codes: make(map[string]struct{}), State: make(map[string]struct{})} + w2.Headers = w.Headers + w2.context = w.context + for i := len(nodes) - 1; i >= 0; i-- { + w2.State[string(nodes[i])] = struct{}{} + } + root2 := candidateC_PerNodeMerkle(w2, 1) + if root1 != root2 { + t.Fatalf("C is order-sensitive: %s vs %s", root1.Hex(), root2.Hex()) + } +} + +// TestCandidateD_DetectsMissingNode guards D's load-bearing property: a +// witness missing a referenced node fails the walk. Without this, D would +// silently accept incomplete witnesses, defeating the byte-blame-pre- +// execute argument. +// +// We build a tiny tree manually: node A embeds keccak(B); node B embeds +// keccak(C); C is a leaf. Walking from keccak(A) succeeds. Deleting B +// from the witness must make the walk fail. +func TestCandidateD_DetectsMissingNode(t *testing.T) { + leafC := []byte("leaf-payload-C-padded-to-some-bytes-xyz") + hashC := crypto.Keccak256Hash(leafC) + + nodeB := append([]byte("node-B-prefix-padding-"), hashC[:]...) + hashB := crypto.Keccak256Hash(nodeB) + + nodeA := append([]byte("node-A-prefix-padding-"), hashB[:]...) + hashA := crypto.Keccak256Hash(nodeA) + + w := &Witness{ + Codes: make(map[string]struct{}), + State: map[string]struct{}{ + string(nodeA): {}, + string(nodeB): {}, + string(leafC): {}, + }, + } + if !candidateD_IntrinsicWalk(w, hashA, 1) { + t.Fatal("baseline walk failed; the manual A→B→C chain is malformed") + } + + // Drop B; the walk from A must fail because A's reference to B + // dangles. + delete(w.State, string(nodeB)) + if candidateD_IntrinsicWalk(w, hashA, 1) { + t.Fatal("D accepted a witness missing a referenced node; byte-blame-pre-execute is broken") + } +} diff --git a/core/stateless/witness_commit_helpers_test.go b/core/stateless/witness_commit_helpers_test.go new file mode 100644 index 0000000000..ae30d6204b --- /dev/null +++ b/core/stateless/witness_commit_helpers_test.go @@ -0,0 +1,325 @@ +package stateless + +import ( + "crypto/rand" + "encoding/binary" + "math/big" + "sort" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" +) + +// All identifiers in this file are _test.go-scoped and exist only to drive +// the witness-commit benchmarks. Nothing here is referenced from production +// code; the file is throwaway-friendly per the research-only plan. + +// buildSyntheticWitness constructs a Witness whose canonical EncodeRLP +// output is approximately targetBytes. It populates State with random byte +// blobs of size avgNodeBytes, mimicking how MPT trie nodes accumulate during +// execution. Headers + context carry minimal valid data so EncodeRLP / +// DecodeRLP round-trip without errors; the bench cares about state-bytes +// throughput, not header layout. +func buildSyntheticWitness(targetBytes, avgNodeBytes int) *Witness { + if avgNodeBytes <= 0 { + avgNodeBytes = 256 + } + w := &Witness{ + context: &types.Header{Number: big.NewInt(1)}, + Headers: []*types.Header{{Number: big.NewInt(0)}}, + Codes: make(map[string]struct{}), + State: make(map[string]struct{}), + } + nodeCount := targetBytes / avgNodeBytes + if nodeCount <= 0 { + nodeCount = 1 + } + buf := make([]byte, avgNodeBytes) + for i := 0; i < nodeCount; i++ { + // Distinct content for each node so keccak hashes don't collide and + // the encoded set has the expected size on the wire. + binary.BigEndian.PutUint64(buf[:8], uint64(i)) + if _, err := rand.Read(buf[8:]); err != nil { + panic(err) + } + w.State[string(buf)] = struct{}{} + } + return w +} + +// candidateA_BlobKeccak — current scheme. Keccak over the canonical RLP +// encoding of the entire witness. Single-threaded by design. +func candidateA_BlobKeccak(rlpBytes []byte) common.Hash { + return crypto.Keccak256Hash(rlpBytes) +} + +// candidateB_PageParallel hashes the input in fixed-size pages (15 MiB to +// match the wire fragmentation), each page in its own goroutine, then +// keccaks the concatenation of page hashes. The result is the value the BP +// would sign and the verifier would compare against. +// +// pageSize: 15 MiB to mirror the wire frag. cores: number of goroutines to +// use; honest callers pass GOMAXPROCS or a small constant. +const witnessPageBytes = 15 * 1024 * 1024 + +func candidateB_PageParallel(rlpBytes []byte, cores int) common.Hash { + return candidateB_PageParallelChunked(rlpBytes, witnessPageBytes, cores) +} + +// candidateB_PageParallelChunked is B with an explicit chunk-size knob so +// we can sweep below the 15 MiB wire-page boundary. Chunks smaller than +// the wire page would mean BP signs over a finer-grained aggregate, but +// this is internal accounting — wire pages stay 15 MiB, the producer just +// further subdivides them for hashing. +func candidateB_PageParallelChunked(rlpBytes []byte, chunkBytes, cores int) common.Hash { + pages := splitPages(rlpBytes, chunkBytes) + pageHashes := make([]common.Hash, len(pages)) + + if cores < 1 { + cores = 1 + } + if cores > len(pages) { + cores = len(pages) + } + + var wg sync.WaitGroup + work := make(chan int, len(pages)) + for w := 0; w < cores; w++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + pageHashes[i] = crypto.Keccak256Hash(pages[i]) + } + }() + } + for i := range pages { + work <- i + } + close(work) + wg.Wait() + + // Aggregate is keccak over concat of page hashes. Order is wire-page + // order (pinned by the producer's chunking). + var concat []byte + for _, h := range pageHashes { + concat = append(concat, h[:]...) + } + return crypto.Keccak256Hash(concat) +} + +func splitPages(buf []byte, pageSize int) [][]byte { + if len(buf) == 0 { + return nil + } + out := make([][]byte, 0, (len(buf)+pageSize-1)/pageSize) + for i := 0; i < len(buf); i += pageSize { + end := i + pageSize + if end > len(buf) { + end = len(buf) + } + out = append(out, buf[i:end]) + } + return out +} + +// candidateC_PerNodeMerkle hashes every state node, sorts the hashes +// lexicographically, and returns a Merkle root over the sorted hashes. +// Each node hash is independent → trivially parallelizable. +// +// On the producer side the BP already has every node's keccak from +// execution, so the per-node hash phase costs zero in steady state. This +// helper still computes the hashes from bytes because the bench needs +// realistic timings without a producer-side trie cache stub. +func candidateC_PerNodeMerkle(w *Witness, cores int) common.Hash { + w.lock.RLock() + nodes := make([][]byte, 0, len(w.State)) + for n := range w.State { + nodes = append(nodes, []byte(n)) + } + w.lock.RUnlock() + + hashes := make([]common.Hash, len(nodes)) + if cores < 1 { + cores = 1 + } + var wg sync.WaitGroup + work := make(chan int, len(nodes)) + for ww := 0; ww < cores; ww++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + hashes[i] = crypto.Keccak256Hash(nodes[i]) + } + }() + } + for i := range nodes { + work <- i + } + close(work) + wg.Wait() + + sort.Slice(hashes, func(i, j int) bool { + return string(hashes[i][:]) < string(hashes[j][:]) + }) + return merkleRoot(hashes) +} + +// candidateC_ProducerOnly captures the "producer has hashes for free" +// claim: given the precomputed sorted hashes, only Merkle-build cost +// remains. The bench feeds a precomputed slice so we measure JUST the +// reduction stage, isolating the win on the producer's announce path. +func candidateC_ProducerOnly(sortedHashes []common.Hash) common.Hash { + return merkleRoot(sortedHashes) +} + +// merkleRoot builds a binary Merkle tree (keccak over left||right pairs) +// over `leaves` and returns the root. Empty input → zero hash. Odd levels +// duplicate the last leaf (RFC-6962-style). 32-byte leaves. +func merkleRoot(leaves []common.Hash) common.Hash { + if len(leaves) == 0 { + return common.Hash{} + } + level := make([]common.Hash, len(leaves)) + copy(level, leaves) + for len(level) > 1 { + if len(level)%2 == 1 { + level = append(level, level[len(level)-1]) + } + next := make([]common.Hash, len(level)/2) + var buf [64]byte + for i := 0; i < len(level); i += 2 { + copy(buf[:32], level[i][:]) + copy(buf[32:], level[i+1][:]) + next[i/2] = crypto.Keccak256Hash(buf[:]) + } + level = next + } + return level[0] +} + +// candidateD_HashAll is the BENCHMARK helper for D — parallel per-node +// keccak only. No walk, no map build. In production, D's verifier cost is +// essentially "hash every node" because: +// - RLP decode of the witness already happens (cost is paid by both A and D). +// - MakeHashDB already iterates all nodes and keccaks each, so the +// walker's per-node hash work is amortized into existing state-prep. +// - The walker traversal is O(num_nodes × avg_refs_per_node) map lookups, +// dwarfed by keccak throughput on the underlying bytes. +// We measure D's incremental cost over the chain-prep baseline as just the +// parallel keccak phase. The reachability walk lives in +// candidateD_IntrinsicWalk for the correctness test below. +func candidateD_HashAll(w *Witness, cores int) { + w.lock.RLock() + nodes := make([][]byte, 0, len(w.State)) + for n := range w.State { + nodes = append(nodes, []byte(n)) + } + w.lock.RUnlock() + + if cores < 1 { + cores = 1 + } + var wg sync.WaitGroup + work := make(chan int, len(nodes)) + for ww := 0; ww < cores; ww++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + _ = crypto.Keccak256Hash(nodes[i]) + } + }() + } + for i := range nodes { + work <- i + } + close(work) + wg.Wait() +} + +// candidateD_IntrinsicWalk is the CORRECTNESS reference. It verifies that +// every node in the witness is reachable from the given root via byte- +// embedded hash references, and that no orphan nodes pad the witness. +// Returns true iff the walk reaches every node exactly once. +// +// Approximation: instead of RLP-parsing each node to extract real children, +// the walker scans the node's bytes for any 32-byte window matching a +// known node hash. With random synthetic content the false-positive rate +// is negligible. This is the function the test cases assert against. +// +// `cores` controls parallel hashing of nodes. Walk itself is sequential. +func candidateD_IntrinsicWalk(w *Witness, root common.Hash, cores int) bool { + w.lock.RLock() + nodes := make([][]byte, 0, len(w.State)) + for n := range w.State { + nodes = append(nodes, []byte(n)) + } + w.lock.RUnlock() + + type entry struct { + bytes []byte + hash common.Hash + } + hashed := make([]entry, len(nodes)) + if cores < 1 { + cores = 1 + } + var wg sync.WaitGroup + work := make(chan int, len(nodes)) + for ww := 0; ww < cores; ww++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + hashed[i] = entry{bytes: nodes[i], hash: crypto.Keccak256Hash(nodes[i])} + } + }() + } + for i := range nodes { + work <- i + } + close(work) + wg.Wait() + + byHash := make(map[common.Hash][]byte, len(hashed)) + for _, e := range hashed { + byHash[e.hash] = e.bytes + } + // Walk: starting from root, scan node bytes for 32-byte sequences that + // match another node's hash. Treat every such sequence as a child + // reference. Visit each node once. + queue := []common.Hash{root} + visited := make(map[common.Hash]struct{}, len(byHash)) + for len(queue) > 0 { + h := queue[0] + queue = queue[1:] + if _, seen := visited[h]; seen { + continue + } + visited[h] = struct{}{} + blob, ok := byHash[h] + if !ok { + // The walker reached a hash that isn't in the witness set. + // In real intrinsic-verify this means the witness is missing a + // node the trie depends on → server lied. Drop. + return false + } + for off := 0; off+32 <= len(blob); off++ { + var ref common.Hash + copy(ref[:], blob[off:off+32]) + if _, exists := byHash[ref]; exists { + if _, seen := visited[ref]; !seen { + queue = append(queue, ref) + } + } + } + } + // Every node in the witness must be reachable from the root. Bloated + // witnesses with orphan nodes are also a server lie (they're paying + // the verifier extra hash cost without contributing to execution). + return len(visited) == len(byHash) +} diff --git a/core/stateless/witness_commit_test.go b/core/stateless/witness_commit_test.go new file mode 100644 index 0000000000..f1bd7d1a66 --- /dev/null +++ b/core/stateless/witness_commit_test.go @@ -0,0 +1,88 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +package stateless + +import ( + "bytes" + "runtime" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +func TestWitnessCommitHashDeterministic(t *testing.T) { + in := bytes.Repeat([]byte{0xab}, 5*WitnessCommitChunkBytes+1234) + a := WitnessCommitHash(in) + b := WitnessCommitHash(in) + if a != b { + t.Fatalf("non-deterministic: %s vs %s", a.Hex(), b.Hex()) + } +} + +// TestWitnessCommitHashWorkerInvariant pins the load-bearing property: the +// committed hash MUST NOT depend on GOMAXPROCS. If it does, two honest peers +// running with different parallelism would diverge on the same witness. +func TestWitnessCommitHashWorkerInvariant(t *testing.T) { + in := bytes.Repeat([]byte{0xcd}, 6*WitnessCommitChunkBytes+777) + prev := runtime.GOMAXPROCS(1) + defer runtime.GOMAXPROCS(prev) + one := WitnessCommitHash(in) + + runtime.GOMAXPROCS(8) + eight := WitnessCommitHash(in) + + if one != eight { + t.Fatalf("hash depends on GOMAXPROCS: 1=%s 8=%s", one.Hex(), eight.Hex()) + } +} + +// TestWitnessCommitHashEmptyInput pins the empty-witness behavior so producer +// and verifier agree on the degenerate case. +func TestWitnessCommitHashEmptyInput(t *testing.T) { + if got := WitnessCommitHash(nil); got != (common.Hash{}) { + t.Fatalf("expected zero hash for nil, got %s", got.Hex()) + } + if got := WitnessCommitHash([]byte{}); got != (common.Hash{}) { + t.Fatalf("expected zero hash for empty slice, got %s", got.Hex()) + } +} + +// TestWitnessCommitHashSingleSubChunk pins the small-input shape: an input +// shorter than one chunk hashes to keccak256(keccak256(input)), since the +// scheme always wraps a final aggregate-keccak around the chunk-hash list. +func TestWitnessCommitHashSingleSubChunk(t *testing.T) { + in := bytes.Repeat([]byte{0x42}, 4096) + got := WitnessCommitHash(in) + + inner := crypto.Keccak256Hash(in) + want := crypto.Keccak256Hash(inner[:]) + if got != want { + t.Fatalf("single-subchunk shape mismatch: got %s want %s", got.Hex(), want.Hex()) + } +} + +// TestWitnessCommitHashMultiChunkShape spot-checks the multi-chunk recipe so a +// silent change in concat order or chunking would be caught immediately. +func TestWitnessCommitHashMultiChunkShape(t *testing.T) { + a := bytes.Repeat([]byte{0x01}, WitnessCommitChunkBytes) + b := bytes.Repeat([]byte{0x02}, WitnessCommitChunkBytes) + c := bytes.Repeat([]byte{0x03}, 1234) + in := append(append(append([]byte{}, a...), b...), c...) + + ha := crypto.Keccak256Hash(a) + hb := crypto.Keccak256Hash(b) + hc := crypto.Keccak256Hash(c) + concat := append(append(append([]byte{}, ha[:]...), hb[:]...), hc[:]...) + want := crypto.Keccak256Hash(concat) + + if got := WitnessCommitHash(in); got != want { + t.Fatalf("multi-chunk shape mismatch: got %s want %s", got.Hex(), want.Hex()) + } +} diff --git a/eth/fetcher/block_fetcher.go b/eth/fetcher/block_fetcher.go index 5f0740002c..244979587f 100644 --- a/eth/fetcher/block_fetcher.go +++ b/eth/fetcher/block_fetcher.go @@ -260,7 +260,7 @@ type BlockFetcher struct { } // NewBlockFetcher creates a block fetcher to retrieve blocks based on hash announcements. -func NewBlockFetcher(light bool, getHeader HeaderRetrievalFn, getBlock blockRetrievalFn, verifyHeader headerVerifierFn, broadcastBlock blockBroadcasterFn, chainHeight chainHeightFn, currentHeader currentHeaderFn, insertHeaders headersInsertFn, insertChain chainInsertFn, dropPeer peerDropFn, jailPeer peerJailFn, enableBlockTracking bool, requireWitness bool, gasCeil uint64) *BlockFetcher { +func NewBlockFetcher(light bool, getHeader HeaderRetrievalFn, getBlock blockRetrievalFn, verifyHeader headerVerifierFn, broadcastBlock blockBroadcasterFn, chainHeight chainHeightFn, currentHeader currentHeaderFn, insertHeaders headersInsertFn, insertChain chainInsertFn, dropPeer peerDropFn, jailPeer peerJailFn, enableBlockTracking bool, requireWitness bool, gasCeil uint64, signedWitnessHash signedWitnessHashFn, cacheWitnessForServing cacheWitnessForServingFn) *BlockFetcher { f := &BlockFetcher{ light: light, notify: make(chan *blockAnnounce), @@ -302,6 +302,8 @@ func NewBlockFetcher(light bool, getHeader HeaderRetrievalFn, getBlock blockRetr f.getHeader, f.chainHeight, f.currentHeader, + signedWitnessHash, + cacheWitnessForServing, gasCeil, ) diff --git a/eth/fetcher/block_fetcher_race_test.go b/eth/fetcher/block_fetcher_race_test.go index b7044988f0..d5a6876010 100644 --- a/eth/fetcher/block_fetcher_race_test.go +++ b/eth/fetcher/block_fetcher_race_test.go @@ -55,6 +55,8 @@ func TestBlockFetcherConcurrentMapAccess(t *testing.T) { false, // no block tracking false, // no witness requirement 0, // no gas ceiling + nil, // no signed-witness lookup + nil, // no cache-witness-for-serving ) // Start the fetcher @@ -254,6 +256,8 @@ func TestWitnessManagerConcurrentAccess(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -489,6 +493,8 @@ func TestBlockFetcherMapStateConsistency(t *testing.T) { false, false, 0, + nil, + nil, ) fetcher.Start() @@ -549,6 +555,8 @@ func TestWitnessManagerStateConsistency(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -611,6 +619,8 @@ func TestBlockFetcherMemoryLeaks(t *testing.T) { false, false, 0, + nil, + nil, ) fetcher.Start() @@ -669,6 +679,8 @@ func TestWitnessManagerMemoryLeaks(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) diff --git a/eth/fetcher/block_fetcher_test.go b/eth/fetcher/block_fetcher_test.go index 62619f6f95..98ce5b5f73 100644 --- a/eth/fetcher/block_fetcher_test.go +++ b/eth/fetcher/block_fetcher_test.go @@ -109,7 +109,7 @@ func newTester(light bool) *fetcherTester { blocks: map[common.Hash]*types.Block{genesis.Hash(): genesis}, drops: make(map[string]bool), } - tester.fetcher = NewBlockFetcher(light, tester.getHeader, tester.getBlock, tester.verifyHeader, tester.broadcastBlock, tester.chainHeight, nil, tester.insertHeaders, tester.insertChain, tester.dropPeer, nil, false, false, 0) + tester.fetcher = NewBlockFetcher(light, tester.getHeader, tester.getBlock, tester.verifyHeader, tester.broadcastBlock, tester.chainHeight, nil, tester.insertHeaders, tester.insertChain, tester.dropPeer, nil, false, false, 0, nil, nil) tester.fetcher.Start() return tester diff --git a/eth/fetcher/metrics.go b/eth/fetcher/metrics.go index ca5730cacb..d69315b95f 100644 --- a/eth/fetcher/metrics.go +++ b/eth/fetcher/metrics.go @@ -32,6 +32,10 @@ var ( witnessVerifyPeersInsuffMeter = metrics.NewRegisteredMeter("eth/fetcher/witness/verify/peers/insufficient", nil) witnessVerifyNoConsensusMeter = metrics.NewRegisteredMeter("eth/fetcher/witness/verify/consensus/none", nil) + // witnessByteMismatchMeter tracks WIT2 byte-correctness drops: a serving + // peer delivered bytes whose keccak256 did not match the BP-signed hash. + witnessByteMismatchMeter = metrics.NewRegisteredMeter("eth/fetcher/witness/byte_mismatch", nil) + // Witness page count metrics witnessPageCountBelowThresholdMeter = metrics.NewRegisteredMeter("eth/fetcher/witness/pagecount/below_threshold", nil) witnessPageCountAboveThresholdMeter = metrics.NewRegisteredMeter("eth/fetcher/witness/pagecount/above_threshold", nil) diff --git a/eth/fetcher/witness_manager.go b/eth/fetcher/witness_manager.go index 9bfca5a52b..f48722f4a0 100644 --- a/eth/fetcher/witness_manager.go +++ b/eth/fetcher/witness_manager.go @@ -1,6 +1,7 @@ package fetcher import ( + "bytes" "errors" "fmt" "strings" @@ -57,18 +58,35 @@ type cachedWitness struct { timestamp time.Time } +// signedWitnessHashFn returns the BP-signed witness content hash for a block, +// if a WIT2 signed announcement has been received and verified locally. It is +// used by the witness manager on fetch success to verify byte-correctness: +// if the encoded witness bytes don't hash to the signed witnessHash, the +// serving peer lied and is dropped. If no signed announcement is on file +// (e.g., WIT1-only fetch), the check is skipped. +type signedWitnessHashFn func(blockHash common.Hash) (witnessHash common.Hash, ok bool) + +// cacheWitnessForServingFn hands successfully-fetched witness bytes to the +// network handler so peers can serve them pre-import. Called only after the +// byte-correctness check (vs. BP-signed witnessHash, when present) has passed, +// so the cached bytes are safe to serve. The witnessHash is the canonical +// keccak256 of the canonical encoding, identical to what the BP signed. +type cacheWitnessForServingFn func(blockHash common.Hash, witnessBytes []byte, witnessHash common.Hash) + // witnessManager handles the logic specific to fetching and managing witnesses // for blocks, isolating it from the main BlockFetcher loop. type witnessManager struct { // Parent fetcher fields/methods required - parentQuit <-chan struct{} // Parent fetcher's quit channel - parentDropPeer peerDropFn // Function to drop a misbehaving peer - parentJailPeer peerJailFn // Function to jail a peer to prevent reconnection (optional) - parentEnqueueCh chan<- *enqueueRequest // Channel to send completed blocks+witnesses back - parentGetBlock blockRetrievalFn // Function to check if block is known locally - parentGetHeader HeaderRetrievalFn // Function to check if header is known locally (needed for checks) - parentChainHeight chainHeightFn // Retrieve chain height for distance checks - parentCurrentHeader currentHeaderFn // Retrieve current block header for gas limit + parentQuit <-chan struct{} // Parent fetcher's quit channel + parentDropPeer peerDropFn // Function to drop a misbehaving peer + parentJailPeer peerJailFn // Function to jail a peer to prevent reconnection (optional) + parentEnqueueCh chan<- *enqueueRequest // Channel to send completed blocks+witnesses back + parentGetBlock blockRetrievalFn // Function to check if block is known locally + parentGetHeader HeaderRetrievalFn // Function to check if header is known locally (needed for checks) + parentChainHeight chainHeightFn // Retrieve chain height for distance checks + parentCurrentHeader currentHeaderFn // Retrieve current block header for gas limit + parentSignedWitnessHash signedWitnessHashFn // WIT2: lookup a BP-signed witness hash for byte-correctness verification + parentCacheWitnessForServing cacheWitnessForServingFn // WIT2: hand bytes to the handler for pre-import serving by peers // Witness-specific state pending map[common.Hash]*witnessRequestState // Blocks waiting for witness or actively fetching. @@ -108,6 +126,8 @@ func newWitnessManager( parentGetHeader HeaderRetrievalFn, parentChainHeight chainHeightFn, parentCurrentHeader currentHeaderFn, + parentSignedWitnessHash signedWitnessHashFn, + parentCacheWitnessForServing cacheWitnessForServingFn, gasCeil uint64, ) *witnessManager { // Create TTL cache with 1 minute expiration for witnesses @@ -117,14 +137,16 @@ func newWitnessManager( ) m := &witnessManager{ - parentQuit: parentQuit, - parentDropPeer: parentDropPeer, - parentJailPeer: parentJailPeer, - parentEnqueueCh: parentEnqueueCh, - parentGetBlock: parentGetBlock, - parentGetHeader: parentGetHeader, - parentChainHeight: parentChainHeight, - parentCurrentHeader: parentCurrentHeader, + parentQuit: parentQuit, + parentDropPeer: parentDropPeer, + parentJailPeer: parentJailPeer, + parentEnqueueCh: parentEnqueueCh, + parentGetBlock: parentGetBlock, + parentGetHeader: parentGetHeader, + parentChainHeight: parentChainHeight, + parentCurrentHeader: parentCurrentHeader, + parentSignedWitnessHash: parentSignedWitnessHash, + parentCacheWitnessForServing: parentCacheWitnessForServing, pending: make(map[common.Hash]*witnessRequestState), witnessUnavailable: make(map[common.Hash]time.Time), witnessCache: witnessCache, @@ -631,15 +653,104 @@ func (m *witnessManager) processWitnessResponse(peer string, hash common.Hash, r return } if len(witness) == 0 { + // Empty/unavailable response: the peer doesn't have the body yet + // (e.g. WIT2 announce-only relayer that has not finished importing). + // This is a soft failure — back off the request so another peer can + // be tried, but do NOT drop the responder. Dropping on "no body" is + // what makes announce-only fallback peers unsafe to ask, which would + // erase the WIT2 multi-hop latency win at hop>=2. log.Debug("[wm] Received empty witness response from peer", "peer", peer, "hash", hash) - m.handleWitnessFetchFailureExt(hash, peer, errors.New("empty witness response"), false) + m.handleWitnessFetchFailureExt(hash, "", errors.New("empty witness response"), false) + return + } + + // WIT2: byte-correctness check. If we have a BP-signed announcement on + // file for this block, the encoded witness bytes must hash to the + // signed witnessHash. State-root failures (content-correctness) are + // handled later in the import path and do NOT drop the server. + if !m.verifyAgainstSignedHash(peer, hash, witness[0]) { return } + // WIT2: hand the verified bytes to the handler for pre-import serving. + // Done before import-side enqueue so a peer asking us for the body + // during the chain-write window gets bytes from the in-flight cache + // rather than empty results. + m.cacheVerifiedWitnessForServing(hash, witness[0]) + metrics.RecordPerItemDuration(blockWitnessItemDownloadTimer, res.Time, 1) m.handleWitnessFetchSuccess(peer, hash, witness[0], announcedAt) } +// cacheVerifiedWitnessForServing canonical-encodes the witness and forwards +// the bytes to the handler so other peers can fetch them pre-import. No-op +// when no cache callback is configured (legacy WIT1-only paths) or when no +// BP-signed witness hash is on file for this block — without a signature we +// cannot prove byte-correctness to downstream peers, mirroring the same +// guard that handleWitnessBroadcast applies on the broadcast path. EncodeRLP +// failure is logged but does not drop the server — failure to share is not +// a peer's fault and the import path is unaffected. +func (m *witnessManager) cacheVerifiedWitnessForServing(blockHash common.Hash, witness *stateless.Witness) { + if m.parentCacheWitnessForServing == nil || witness == nil { + return + } + if m.parentSignedWitnessHash == nil { + return + } + if _, has := m.parentSignedWitnessHash(blockHash); !has { + return + } + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + log.Warn("[wm] Failed to encode witness for pre-import serving cache", "hash", blockHash, "err", err) + return + } + body := buf.Bytes() + m.parentCacheWitnessForServing(blockHash, body, stateless.WitnessCommitHash(body)) +} + +// verifyAgainstSignedHash returns false (and reports the failure to the +// fetch-failure handler, which drops the peer) when a BP-signed witness hash +// is on file for this block and the received witness's encoded bytes don't +// hash to it. Returns true when no signed hash is on file (WIT1 path) or the +// hash matches. +func (m *witnessManager) verifyAgainstSignedHash(peer string, hash common.Hash, witness *stateless.Witness) bool { + if m.parentSignedWitnessHash == nil { + return true + } + expected, has := m.parentSignedWitnessHash(hash) + if !has { + return true + } + actual, err := encodedWitnessHash(witness) + if err != nil { + log.Warn("[wm] Failed to encode received witness for hash check", "peer", peer, "hash", hash, "err", err) + m.handleWitnessFetchFailureExt(hash, peer, fmt.Errorf("witness encode failed: %w", err), false) + return false + } + if actual != expected { + witnessByteMismatchMeter.Mark(1) + log.Warn("[wm] Witness bytes do not match BP-signed hash; dropping peer", + "peer", peer, "block", hash, "expected", expected, "actual", actual) + m.handleWitnessFetchFailureExt(hash, peer, errors.New("witness hash mismatch"), false) + return false + } + return true +} + +// encodedWitnessHash returns keccak256 over the canonical RLP encoding of the +// witness. Witness.EncodeRLP sorts state nodes lexicographically so the output +// is byte-identical for any two witnesses with the same logical contents, +// which is what makes BP-signed witness-hash verification work across nodes. +// The producer side mirrors this through eth.handler.canonicalWitnessHash. +func encodedWitnessHash(w *stateless.Witness) (common.Hash, error) { + var buf bytes.Buffer + if err := w.EncodeRLP(&buf); err != nil { + return common.Hash{}, err + } + return stateless.WitnessCommitHash(buf.Bytes()), nil +} + // handleWitnessFetchSuccess processes a successfully fetched witness. // It needs the original origin from the op state for consistency checks. func (m *witnessManager) handleWitnessFetchSuccess(fetchPeer string, hash common.Hash, witness *stateless.Witness, announcedAt time.Time) { diff --git a/eth/fetcher/witness_manager_test.go b/eth/fetcher/witness_manager_test.go index 4d7948bc63..e8cceac53a 100644 --- a/eth/fetcher/witness_manager_test.go +++ b/eth/fetcher/witness_manager_test.go @@ -72,7 +72,7 @@ func newTestWitnessManager() *testWitnessManager { getHeader := HeaderRetrievalFn(func(hash common.Hash) *types.Header { return nil }) chainHeight := chainHeightFn(func() uint64 { return 100 }) - tw.manager = newWitnessManager(quit, dropPeer, nil, enqueueCh, getBlock, getHeader, chainHeight, nil, 0) + tw.manager = newWitnessManager(quit, dropPeer, nil, enqueueCh, getBlock, getHeader, chainHeight, nil, nil, nil, 0) return tw } @@ -190,6 +190,8 @@ func TestHandleNeedDuplicates(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -247,6 +249,8 @@ func TestHandleNeedKnownBlock(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -296,6 +300,8 @@ func TestHandleBroadcast(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -369,6 +375,8 @@ func TestWitnessUnavailable(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -431,6 +439,8 @@ func TestForget(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -482,6 +492,8 @@ func TestHandleFilterResult(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -521,6 +533,8 @@ func TestCheckCompleting(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -564,6 +578,8 @@ func TestWitnessFetchFailure(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -677,6 +693,8 @@ func TestCleanupUnavailableCache(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -733,6 +751,8 @@ func TestWitnessFetchWithBlockNoLongerPending(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -846,6 +866,8 @@ func TestTick(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -932,6 +954,8 @@ func TestTickMaxRetries(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -991,6 +1015,8 @@ func TestTickWithWitnessAlreadyPresent(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1074,6 +1100,8 @@ func TestHandleWitnessFetchSuccess(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1142,6 +1170,8 @@ func TestHandleWitnessFetchSuccessNoPending(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1175,6 +1205,8 @@ func TestHandleWitnessFetchSuccessWitnessAlreadyPresent(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1225,6 +1257,8 @@ func TestRescheduleWitness(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1281,6 +1315,8 @@ func TestSafeEnqueueWithNilWitness(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1325,6 +1361,8 @@ func TestSafeEnqueueChannelClosed(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1362,6 +1400,8 @@ func TestHandleNeedDistanceCheck(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1406,6 +1446,8 @@ func TestHandleNeedMissingFetchWitness(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1446,6 +1488,8 @@ func TestLoop(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1520,6 +1564,8 @@ func TestHandleFilterResultWithoutWitness(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1561,6 +1607,8 @@ func TestCheckCompletingWithoutWitness(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1602,6 +1650,8 @@ func TestFetchWitnessError(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1645,6 +1695,8 @@ func TestHandleFilterResultWitnessUnavailable(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1688,6 +1740,8 @@ func TestHandleFilterResultDuplicate(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1734,6 +1788,8 @@ func TestCheckCompletingWitnessUnavailable(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1777,6 +1833,8 @@ func TestCheckCompletingDuplicate(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1830,6 +1888,8 @@ func TestCheckCompletingKnownBlock(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1868,6 +1928,8 @@ func TestTickInvalidPendingState(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1911,6 +1973,8 @@ func TestTickNotReadyYet(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1974,6 +2038,8 @@ func TestSafeEnqueueSuccess(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -2037,6 +2103,8 @@ func TestConcurrentWitnessFetchFailure(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -2096,6 +2164,8 @@ func TestCheckWitnessPageCountWithPeerJailing(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, gasCeil, ) @@ -2160,6 +2230,8 @@ func TestCheckWitnessPageCountWithConsensusFailure(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, gasCeil, ) @@ -2253,6 +2325,8 @@ func TestCheckWitnessPageCountWithPeerFailures(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, gasCeil, ) @@ -2327,6 +2401,8 @@ func TestCheckWitnessPageCountWithInsufficientPeers(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, gasCeil, ) @@ -2403,6 +2479,8 @@ func TestCheckWitnessPageCountBelowThreshold(t *testing.T) { getHeader, chainHeight, currentHeader, + nil, + nil, gasCeil, ) @@ -2467,6 +2545,8 @@ func TestCheckWitnessPageCountBelowThreshold(t *testing.T) { getHeader, chainHeight, currentHeader, + nil, + nil, gasCeil, ) @@ -2527,6 +2607,8 @@ func TestCheckWitnessPageCountBelowThreshold(t *testing.T) { getHeader, chainHeight, nil, // currentHeader is nil + nil, // signedWitnessHash is nil + nil, // cacheWitnessForServing is nil gasCeil, ) @@ -2594,6 +2676,8 @@ func TestConcurrentWitnessVerification(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, gasCeil, ) @@ -2661,7 +2745,7 @@ func TestFetchWitnessNoPeerError(t *testing.T) { manager := newWitnessManager( quit, dropPeer, nil, enqueueCh, - getBlock, getHeader, chainHeight, nil, 0, + getBlock, getHeader, chainHeight, nil, nil, nil, 0, ) hash := common.HexToHash("0xabc") @@ -2726,7 +2810,7 @@ func TestWitnessTickPreservesValidPendingEntry(t *testing.T) { manager := newWitnessManager( quit, dropPeer, nil, enqueueCh, - getBlock, getHeader, chainHeight, nil, 0, + getBlock, getHeader, chainHeight, nil, nil, nil, 0, ) block := createTestBlock(101) @@ -2777,7 +2861,7 @@ func TestFetchWitnessOtherErrorKeepsPending(t *testing.T) { manager := newWitnessManager( quit, dropPeer, nil, enqueueCh, - getBlock, getHeader, chainHeight, nil, 0, + getBlock, getHeader, chainHeight, nil, nil, nil, 0, ) hash := common.HexToHash("0xfade") @@ -2843,7 +2927,7 @@ func TestCheckWitnessPageCountAtThreshold(t *testing.T) { manager := newWitnessManager( quit, dropPeer, jailPeer, enqueueCh, - getBlock, getHeader, chainHeight, currentHeader, 30_000_000, + getBlock, getHeader, chainHeight, currentHeader, nil, nil, 30_000_000, ) threshold := manager.calculatePageThreshold() @@ -2889,6 +2973,8 @@ func newWitnessManagerForTest(t *testing.T) (*witnessManager, <-chan *enqueueReq HeaderRetrievalFn(func(common.Hash) *types.Header { return nil }), chainHeightFn(func() uint64 { return 100 }), nil, + nil, + nil, 0, ) return m, enqueueCh @@ -3164,6 +3250,8 @@ func TestWitnessCalculatePageThresholdMinimumClamp(t *testing.T) { currentHeaderFn(func() *types.Header { return &types.Header{Number: big.NewInt(100), GasLimit: 1} // < 1MB → 0 pages pre-clamp }), + nil, + nil, 0, ) if got := m.calculatePageThreshold(); got < 1 { @@ -3183,6 +3271,8 @@ func TestWitnessCalculatePageThresholdMinimumClamp(t *testing.T) { HeaderRetrievalFn(func(common.Hash) *types.Header { return nil }), chainHeightFn(func() uint64 { return 100 }), nil, // no current header → fallback to config path + nil, // no signed-witness lookup + nil, // no cache-witness-for-serving 1, // 1 gas ceil → 0 pages pre-clamp ) if got := m.calculatePageThreshold(); got < 1 { @@ -3340,6 +3430,8 @@ func TestVerifyWitnessPageCountDishonestPeer(t *testing.T) { HeaderRetrievalFn(func(common.Hash) *types.Header { return nil }), chainHeightFn(func() uint64 { return 100 }), nil, + nil, + nil, 0, ) @@ -3413,7 +3505,7 @@ func TestWitnessLoopDrivesFetchesForPending(t *testing.T) { manager := newWitnessManager( quit, dropPeer, nil, enqueueCh, - getBlock, getHeader, chainHeight, nil, 0, + getBlock, getHeader, chainHeight, nil, nil, nil, 0, ) fetchCalled := make(chan struct{}, 1) diff --git a/eth/fetcher/witness_manager_wit2_test.go b/eth/fetcher/witness_manager_wit2_test.go new file mode 100644 index 0000000000..71aad5518f --- /dev/null +++ b/eth/fetcher/witness_manager_wit2_test.go @@ -0,0 +1,301 @@ +package fetcher + +import ( + "bytes" + "errors" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/eth/protocols/eth" +) + +// blockAnnounceForTest constructs a minimal blockAnnounce wired to a fetch +// function that fails closed. Used to seed manager.pending so that the +// processWitnessResponse path can take its happy/sad branches without +// going through the full announce → request flow. +func blockAnnounceForTest(origin string, hash common.Hash, number uint64) *blockAnnounce { + return &blockAnnounce{ + origin: origin, + hash: hash, + number: number, + time: time.Now(), + fetchWitness: func(common.Hash, chan *eth.Response) (*eth.Request, error) { return nil, errors.New("noop") }, + } +} + +// TestProcessWitnessResponseDropsOnHashMismatch is the load-bearing safety +// guarantee for WIT2 pre-import serving: a peer that returns bytes whose +// keccak256 doesn't match the BP-signed witnessHash must be dropped, even +// if every other check passes. +// +// Without this, a malicious server could pollute downstream relayers with +// bytes the BP never committed to, and the relayers would face state-root +// failures during execution that they cannot attribute to the right party. +func TestProcessWitnessResponseDropsOnHashMismatch(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(101) + hash := block.Hash() + + // Prepare a "correct" witness that the BP signed over. + correct := createTestWitnessForBlock(block) + var buf bytes.Buffer + if err := correct.EncodeRLP(&buf); err != nil { + t.Fatalf("encode: %v", err) + } + signedWitnessHash := stateless.WitnessCommitHash(buf.Bytes()) + + // The peer will return a different witness — same block number, but + // the trie differs, producing different bytes and a different hash. + differentHeader := types.CopyHeader(block.Header()) + differentHeader.GasUsed = 999_999_999 + differentBlock := types.NewBlockWithHeader(differentHeader) + rogueWitness := createTestWitnessForBlock(differentBlock) + + // Inject the signed-witness lookup so processWitnessResponse uses it. + tw.manager.parentSignedWitnessHash = func(h common.Hash) (common.Hash, bool) { + if h == hash { + return signedWitnessHash, true + } + return common.Hash{}, false + } + + // Seed pending state so the failure handler back-off path is exercised. + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "rogue", block: block}, + announce: blockAnnounceForTest("rogue", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + // Fabricate the response container expected by processWitnessResponse. + res := ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: []*stateless.Witness{rogueWitness}, + } + + tw.manager.processWitnessResponse("rogue", hash, res, time.Now()) + + tw.mu.Lock() + defer tw.mu.Unlock() + if len(tw.droppedPeers) != 1 || tw.droppedPeers[0] != "rogue" { + t.Fatalf("expected the lying peer to be dropped, got drops=%v", tw.droppedPeers) + } +} + +// TestProcessWitnessResponseAcceptsMatchingHash is the contrapositive: a +// peer that returns bytes whose keccak256 matches the BP-signed hash must +// not be dropped. State-root mismatches on subsequent execution are handled +// elsewhere and do not reflect on the server. +func TestProcessWitnessResponseAcceptsMatchingHash(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(101) + hash := block.Hash() + witness := createTestWitnessForBlock(block) + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + t.Fatalf("encode: %v", err) + } + matchingHash := stateless.WitnessCommitHash(buf.Bytes()) + + tw.manager.parentSignedWitnessHash = func(h common.Hash) (common.Hash, bool) { + return matchingHash, true + } + + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "honest", block: block}, + announce: blockAnnounceForTest("honest", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + res := ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: []*stateless.Witness{witness}, + } + + tw.manager.processWitnessResponse("honest", hash, res, time.Now()) + + tw.mu.Lock() + defer tw.mu.Unlock() + if len(tw.droppedPeers) != 0 { + t.Fatalf("honest peer must not be dropped on hash match; drops=%v", tw.droppedPeers) + } +} + +// TestProcessWitnessResponseCachesForServingAfterByteCheck is the regression +// for the missing pre-import-serving cache populate. The fetcher must hand +// canonical-encoded bytes back to the eth handler after a verified fetch so +// downstream peers can ask THIS node for the body before chain-write +// finishes. Without this callback firing, multi-hop fast propagation has no +// body source past hop-1 — the entire WIT2 latency win evaporates. +func TestProcessWitnessResponseCachesForServingAfterByteCheck(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(202) + hash := block.Hash() + witness := createTestWitnessForBlock(block) + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + t.Fatalf("encode: %v", err) + } + want := stateless.WitnessCommitHash(buf.Bytes()) + + var ( + gotBlock common.Hash + gotBytes []byte + gotHash common.Hash + ) + tw.manager.parentCacheWitnessForServing = func(blockHash common.Hash, witnessBytes []byte, witnessHash common.Hash) { + gotBlock = blockHash + gotBytes = append([]byte{}, witnessBytes...) + gotHash = witnessHash + } + tw.manager.parentSignedWitnessHash = func(h common.Hash) (common.Hash, bool) { + if h == hash { + return want, true + } + return common.Hash{}, false + } + + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "honest", block: block}, + announce: blockAnnounceForTest("honest", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + res := ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: []*stateless.Witness{witness}, + } + + tw.manager.processWitnessResponse("honest", hash, res, time.Now()) + + if gotBlock != hash { + t.Fatalf("cache callback not invoked or wrong blockHash: got %s want %s", gotBlock.Hex(), hash.Hex()) + } + if gotHash != want { + t.Fatalf("cache callback received wrong witnessHash: got %s want %s", gotHash.Hex(), want.Hex()) + } + if len(gotBytes) == 0 { + t.Fatal("cache callback received empty bytes; pre-import serving cache will not be populated") + } +} + +// TestProcessWitnessResponseSkipsCheckWhenNoSignature confirms the WIT1 +// fallback path: when the receiver has no BP-signed announcement on file +// for a block, byte-correctness verification is skipped (there's nothing to +// verify against), and behavior matches the pre-WIT2 code path. +func TestProcessWitnessResponseSkipsCheckWhenNoSignature(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(101) + hash := block.Hash() + witness := createTestWitnessForBlock(block) + + // No lookup configured → skip path. + tw.manager.parentSignedWitnessHash = func(common.Hash) (common.Hash, bool) { + return common.Hash{}, false + } + + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "wit1-peer", block: block}, + announce: blockAnnounceForTest("wit1-peer", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + res := ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: []*stateless.Witness{witness}, + } + + tw.manager.processWitnessResponse("wit1-peer", hash, res, time.Now()) + + tw.mu.Lock() + defer tw.mu.Unlock() + if len(tw.droppedPeers) != 0 { + t.Fatalf("WIT1 fallback must not drop any peer; drops=%v", tw.droppedPeers) + } +} + + +// TestCacheVerifiedWitnessSkipsWhenNoSignedHash is the regression for the +// blame-asymmetry bug: caching unverified bytes for serving means a downstream +// peer would ask us for the body, get bytes that don't match THEIR BP-signed +// hash (because we never had one to compare against), and drop us. The fix +// gates serving-cache population on having a BP-signed hash on file — +// mirroring the broadcast path's invariant. +func TestCacheVerifiedWitnessSkipsWhenNoSignedHash(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(303) + hash := block.Hash() + witness := createTestWitnessForBlock(block) + + cacheCalls := 0 + tw.manager.parentCacheWitnessForServing = func(common.Hash, []byte, common.Hash) { + cacheCalls++ + } + // No signed hash on file for any block → cache must not be populated. + tw.manager.parentSignedWitnessHash = func(common.Hash) (common.Hash, bool) { + return common.Hash{}, false + } + + tw.manager.cacheVerifiedWitnessForServing(hash, witness) + if cacheCalls != 0 { + t.Fatalf("cache populated without BP-signed hash on file; downstream peers will drop us as liars (calls=%d)", cacheCalls) + } +} + +// TestProcessWitnessResponseEmptyDoesNotDropAnnounceOnlyPeer locks the +// fast-path safety property: a peer that only saw the signed announce (and +// has not yet imported the body) responds with empty bytes when asked. That +// is NOT lying — they simply do not have it yet. Dropping them here would +// shrink the pool of candidate body sources and re-introduce the regression +// where WIT2 multi-hop propagation has nowhere to fetch from at hop>=2. +// +// Byte-mismatch (handled by TestProcessWitnessResponseDropsOnHashMismatch) +// is the only condition that should drop a serving peer. +func TestProcessWitnessResponseEmptyDoesNotDropAnnounceOnlyPeer(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(404) + hash := block.Hash() + + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "announce-only", block: block}, + announce: blockAnnounceForTest("announce-only", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + res := ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: []*stateless.Witness{}, // empty/unavailable + } + + tw.manager.processWitnessResponse("announce-only", hash, res, time.Now()) + + tw.mu.Lock() + defer tw.mu.Unlock() + if len(tw.droppedPeers) != 0 { + t.Fatalf("empty response must NOT drop the responder; drops=%v", tw.droppedPeers) + } +} diff --git a/eth/handler.go b/eth/handler.go index fc731a7579..794e3d2de6 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -184,6 +184,22 @@ type handler struct { syncWithWitnesses bool syncAndProduceWitnesses bool // Whether to sync blocks and produce witnesses simultaneously + // WIT2: cache of BP-signed witness announcements, keyed by block hash. + // Populated by both produced (signed locally) and received-and-verified + // announcements. Consulted by the relay path to dedup, by the body + // broadcast path to re-emit signed announces, and by the fetch path to + // supply the byte-correctness comparison hash. + signedWitnesses *signedWitnessCache + + // WIT2: in-flight witness bodies received via NewWitness broadcast but + // not yet written to chain storage. Lets serving peers answer GetWitness + // requests during the import gap, which is what unlocks fast multi-hop + // propagation — without it, only the producer/post-import nodes can + // serve and stateless nodes more than 1 hop away wait per-hop on full + // validation before they can pull from anyone. + pendingWitnessBodies *pendingWitnessBodyCache + wit2PeerTracker *peerWit2Tracker + // channels for fetcher, syncer, txsyncLoop quitSync chan struct{} @@ -223,6 +239,9 @@ func newHandler(config *handlerConfig) (*handler, error) { syncWithWitnesses: config.syncWithWitnesses, syncAndProduceWitnesses: config.syncAndProduceWitnesses, privateTxGetter: config.privateTxGetter, + signedWitnesses: newSignedWitnessCache(), + pendingWitnessBodies: newPendingWitnessBodyCache(witnessBodyCacheCapacity), + wit2PeerTracker: newPeerWit2Tracker(), } log.Info("Sync with witnesses", "enabled", config.syncWithWitnesses) @@ -306,7 +325,7 @@ func newHandler(config *handlerConfig) (*handler, error) { } } - h.blockFetcher = fetcher.NewBlockFetcher(false, nil, h.chain.GetBlockByHash, validator, h.BroadcastBlock, heighter, h.chain.CurrentHeader, nil, inserter, h.removePeer, h.jailPeer, h.enableBlockTracking, h.statelessSync.Load() || h.syncWithWitnesses, config.gasCeil) + h.blockFetcher = fetcher.NewBlockFetcher(false, nil, h.chain.GetBlockByHash, validator, h.BroadcastBlock, heighter, h.chain.CurrentHeader, nil, inserter, h.removePeer, h.jailPeer, h.enableBlockTracking, h.statelessSync.Load() || h.syncWithWitnesses, config.gasCeil, h.lookupSignedWitnessHash, h.cacheVerifiedWitnessForServing) fetchTx := func(peer string, hashes []common.Hash) error { p := h.peers.peer(peer) @@ -556,6 +575,25 @@ func (h *handler) removePeer(id string) { log.Debug("Handler: removing peer", "peer", peer.ID(), "inbound", peer.Peer.Inbound(), "duration", common.PrettyDuration(peer.Peer.Lifetime())) peer.Peer.Disconnect(p2p.DiscUselessPeer) } + if h.wit2PeerTracker != nil { + h.wit2PeerTracker.forget(id) + } +} + +// strikeWit2Peer records a wit2 misbehavior strike (bad sig, wrong producer) +// and disconnects the peer once the strike threshold is exceeded inside the +// decay window. Single bad announcements are tolerated to allow for stray +// pre-fork content; sustained misbehavior is not. +func (h *handler) strikeWit2Peer(peer *wit.Peer) { + if h.wit2PeerTracker == nil { + return + } + if !h.wit2PeerTracker.strike(peer.ID()) { + return + } + wit2StrikeDisconnectMeter.Mark(1) + peer.Log().Warn("wit2: disconnecting peer for repeated invalid signed announcements") + h.removePeer(peer.ID()) } // unregisterPeer removes a peer from the downloader, fetchers and main peer set. @@ -715,6 +753,11 @@ func (h *handler) BroadcastBlock(block *types.Block, witness *stateless.Witness, peer.AsyncSendNewBlock(block, td) } + // WIT2: co-send the witness announcement to every direct block + // recipient that doesn't yet have the witness. Closes the gap where + // blocks fan out at sqrt(N) but witnesses didn't. + h.cosendWitnessAnnouncement(hash, block.NumberU64(), transfer, staticAndTrustedPeers) + log.Debug("Propagated block", "hash", hash, "recipients", len(transfer), "static and trusted recipients", len(staticAndTrustedPeers), "duration", common.PrettyDuration(time.Since(block.ReceivedAt))) return @@ -727,8 +770,17 @@ func (h *handler) BroadcastBlock(block *types.Block, witness *stateless.Witness, } if h.chain.HasWitness(hash) { + // Try to attach a BP signature so WIT2 peers can fast-validate and + // transitively relay. Falls through to unsigned WIT1 announces for + // peers below WIT2 (and for any peer if signing is unavailable, e.g., + // non-producer nodes that didn't receive a signed announce upstream). + signedAnn, hasSigned := h.signLocalWitnessAnnouncement(hash, block.NumberU64()) for _, peer := range peersWithoutWitness { - peer.Peer.AsyncSendNewWitnessHash(block.Header().Hash(), block.NumberU64()) + if hasSigned && peer.Peer.Version() >= wit.WIT2 { + peer.Peer.AsyncSendSignedWitnessAnnouncement(signedAnn) + } else { + peer.Peer.AsyncSendNewWitnessHash(block.Header().Hash(), block.NumberU64()) + } } log.Debug("Announced witness", "hash", hash, "recipients", len(peers), "duration", common.PrettyDuration(time.Since(block.ReceivedAt))) } diff --git a/eth/handler_wit.go b/eth/handler_wit.go index dfb1473470..282d8ec6c5 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -1,6 +1,7 @@ package eth import ( + "bytes" "errors" "fmt" "time" @@ -55,6 +56,8 @@ func (h *witHandler) Handle(peer *wit.Peer, packet wit.Packet) error { return h.handleWitnessBroadcast(peer, packet.Witness) case *wit.NewWitnessHashesPacket: return h.handleWitnessHashesAnnounce(peer, packet.Hashes, packet.Numbers) + case *wit.SignedNewWitnessHashesPacket: + return h.handleSignedWitnessAnnouncements(peer, packet.Announcements) case *wit.GetWitnessPacket: // Call handleGetWitness which returns the raw RLP data response, err := h.handleGetWitness(peer, packet) @@ -83,6 +86,39 @@ func (h *witHandler) handleWitnessBroadcast(peer *wit.Peer, witness *stateless.W peer.AddKnownWitness(witness.Header().Hash()) hash := witness.Header().Hash() + // WIT2: cache the encoded body so this node can serve it pre-import. We + // only expose the cache for serving when bytes match a BP-signed + // witnessHash on file — otherwise an upstream that lied about the bytes + // would make us serve garbage and get dropped by downstream peers as + // liars, even though we just relayed what we received. If no signed + // announcement is on file (WIT1 path), we skip the pre-import cache so + // we don't take on byte-blame risk for unverified content; the import + // path is unaffected. + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + peer.Log().Warn("wit2: failed to encode received witness", "hash", hash, "err", err) + } else { + bodyBytes := buf.Bytes() + bodyHash := stateless.WitnessCommitHash(bodyBytes) + signed, hasSigned := (*handler)(h).signedWitnesses.get(hash) + switch { + case hasSigned && signed.WitnessHash == bodyHash: + (*handler)(h).pendingWitnessBodies.put(hash, bodyBytes, bodyHash) + case hasSigned && signed.WitnessHash != bodyHash: + // Upstream sent bytes that don't match the BP-signed commitment. + // Don't cache for serving and surface this peer as misbehaving. + wit2BroadcastByteMismatchMeter.Mark(1) + peer.Log().Warn("wit2: broadcast bytes do not match signed witnessHash; not caching for serving", + "blockHash", hash, "expected", signed.WitnessHash, "actual", bodyHash) + default: + // No signed announcement on file: WIT1 fallback. Don't expose + // for WIT2 pre-import serving since we cannot prove byte- + // correctness to downstream peers. The body still flows into + // the import path below. + wit2BroadcastUnverifiedSkippedMeter.Mark(1) + } + } + // Inject the witness into the block fetcher's cache if h.blockFetcher != nil { log.Debug("Injecting witness into block fetcher", "hash", hash, "peer", peer.ID()) @@ -110,53 +146,142 @@ func (h *witHandler) handleWitnessHashesAnnounce(peer *wit.Peer, hashes []common return nil } -// handleGetWitness retrieves witnesses for the requested block hashes and returns them as raw RLP data. -// It now returns the data and error, rather than sending the reply directly. -// The returned data is [][]byte, as rlp.RawValue is essentially []byte. -func (h *witHandler) handleGetWitness(peer *wit.Peer, req *wit.GetWitnessPacket) (wit.WitnessPacketResponse, error) { - log.Debug("handleGetWitness processing request", "peer", peer.ID(), "reqID", req.RequestId, "witnessPages", len(req.WitnessPages)) - // list different witnesses to query - seen := make(map[common.Hash]struct{}, len(req.WitnessPages)) - for _, witnessPage := range req.WitnessPages { - seen[witnessPage.Hash] = struct{}{} +// handleSignedWitnessAnnouncements verifies BP signatures on incoming WIT2 +// announcements and relays valid ones to peers that have not seen them. +// Body fetches are driven elsewhere (the block fetcher's witness manager +// kicks them off when an announcement materialises). Each announcement is +// processed independently so a single bad entry does not poison a batch. +// +// On verification failure (bad signature, unknown signer) the sender is +// **not** dropped at this layer — they may simply be relaying a bad upstream +// announcement. Drops are reserved for byte-correctness failures at fetch +// time. We do, however, count invalid announcements via metrics to surface +// misbehaving relayers. +func (h *witHandler) handleSignedWitnessAnnouncements(peer *wit.Peer, anns []wit.SignedWitnessAnnouncement) error { + wit2RelayInMeter.Mark(int64(len(anns))) + + // Per-peer rate limit: every announcement consumes one token. Rejected + // packets are dropped wholesale to keep accounting simple — an honest + // peer should never trip this in practice. + if !(*handler)(h).wit2PeerTracker.allow(peer.ID(), len(anns)) { + wit2RateLimitDropMeter.Mark(int64(len(anns))) + peer.Log().Debug("wit2: rate-limited signed announcements", "count", len(anns)) + return nil } - // witness sizes query - witnessSize := make(map[common.Hash]uint64, len(seen)) - for witnessBlockHash := range seen { - size := rawdb.ReadWitnessSize(h.Chain().DB(), witnessBlockHash) - if size == nil { - witnessSize[witnessBlockHash] = 0 - } else { - witnessSize[witnessBlockHash] = *size + for _, ann := range anns { + // Sender saw this announcement; suppress relay back to them. Do NOT + // mark them as a body-holder — they may be relaying without bytes. + peer.AddKnownAnnounce(ann.BlockHash) + + if !h.acceptSignedAnnouncement(peer, ann) { + continue + } + + // Cache + dedup. Skip relay if we've already relayed this hash recently. + if !h.signedWitnesses.putIfNewer(ann) { + wit2DuplicateMeter.Mark(1) + continue } + + // Relay to every WIT2 peer that doesn't already have this witness, + // excluding the sender we received it from. + (*handler)(h).relaySignedAnnouncement(peer.ID(), ann) } - // query witnesses by demand - var response wit.WitnessPacketResponse - witnessCache := make(map[common.Hash][]byte, len(seen)) + return nil +} + +// acceptSignedAnnouncement runs signature recovery and producer-binding for a +// single announcement. Returns true when the announcement is verified and the +// caller should proceed to cache + relay; false when the caller should skip +// it. Strikes are issued only on confirmed misbehavior (bad signature or +// signer ≠ scheduled producer for a known header). Pre-import deferral +// (header not yet local) is silent: no strike, no relay, retry on the next +// packet for the same hash once the block arrives. +func (h *witHandler) acceptSignedAnnouncement(peer *wit.Peer, ann wit.SignedWitnessAnnouncement) bool { + signer, err := verifySignedAnnouncement(ann) + if err != nil { + wit2InvalidSigMeter.Mark(1) + peer.Log().Debug("wit2: invalid signed announcement", "blockHash", ann.BlockHash, "err", err) + (*handler)(h).strikeWit2Peer(peer) + return false + } + ok, headerAvailable := (*handler)(h).isScheduledProducer(signer, ann.BlockNumber, ann.BlockHash) + if ok { + return true + } + if !headerAvailable { + peer.Log().Debug("wit2: header not yet local for announced block; deferring announce", + "blockHash", ann.BlockHash, "blockNumber", ann.BlockNumber) + return false + } + wit2NotValidatorMeter.Mark(1) + peer.Log().Debug("wit2: signer is not the scheduled producer for this block", + "blockHash", ann.BlockHash, "blockNumber", ann.BlockNumber, "signer", signer) + (*handler)(h).strikeWit2Peer(peer) + return false +} + +// relaySignedAnnouncement forwards a verified signed announcement to all WIT2 +// peers in `peersWithoutWitness` excluding the original sender. WIT0/WIT1 +// peers are skipped — they don't speak the signed wire format. Their slow +// path remains: they'll learn about the witness through the existing post- +// import unsigned announce path on adjacent WIT2 nodes when those nodes +// finish importing. +func (h *handler) relaySignedAnnouncement(senderID string, ann wit.SignedWitnessAnnouncement) { + recipients := h.peers.peersWithoutSignedAnnounce(ann.BlockHash) + relayed := 0 + for _, peer := range recipients { + if peer.Peer.ID() == senderID { + continue + } + if peer.Peer.Version() < wit.WIT2 { + continue + } + peer.Peer.AsyncSendSignedWitnessAnnouncement(ann) + relayed++ + } + if relayed > 0 { + wit2RelayOutMeter.Mark(int64(relayed)) + } +} + +// handleGetWitness retrieves witnesses for the requested block hashes and returns them as raw RLP data. +// +// WIT2: per-block lookup consults the in-flight body cache before falling back +// to chain storage. This lets nodes serve witnesses they have received from +// the network but not yet imported. Byte-correctness blame attaches to the +// server only on hash mismatch (the requester verifies bytes against the BP- +// signed WitnessHash); content-correctness failures during execution attach +// to the BP, so this server is not at additional risk by serving early. +func (h *witHandler) handleGetWitness(peer *wit.Peer, req *wit.GetWitnessPacket) (wit.WitnessPacketResponse, error) { + log.Debug("handleGetWitness processing request", "peer", peer.ID(), "reqID", req.RequestId, "witnessPages", len(req.WitnessPages)) + + witnessCache, witnessSize := h.resolveWitnessBytes(req.WitnessPages) + + var response wit.WitnessPacketResponse totalResponsePayloadDataAmount := 0 // fast fail check totalCached := 0 // protection against heavy memory requests for _, witnessPage := range req.WitnessPages { - totalPages := (witnessSize[witnessPage.Hash] + PageSize - 1) / PageSize // integer trick for: ceil(witnessSize/PageSize) - var witnessPageResponse wit.WitnessPageResponse - witnessPageResponse.Page = witnessPage.Page - witnessPageResponse.Hash = witnessPage.Hash - witnessPageResponse.TotalPages = totalPages - - needToQuery := witnessPage.Page < totalPages - if needToQuery { - var witnessBytes []byte - if cachedRLPBytes, exists := witnessCache[witnessPage.Hash]; exists { - witnessBytes = cachedRLPBytes - } else { - // Use GetWitness to benefit from the blockchain's witness cache - queriedBytes := h.Chain().GetWitness(witnessPage.Hash) - witnessCache[witnessPage.Hash] = queriedBytes - witnessBytes = queriedBytes - totalCached += len(queriedBytes) + totalPages := (witnessSize[witnessPage.Hash] + PageSize - 1) / PageSize // ceil(witnessSize/PageSize) + pageResponse := wit.WitnessPageResponse{ + Page: witnessPage.Page, + Hash: witnessPage.Hash, + TotalPages: totalPages, + } + + if witnessPage.Page < totalPages { + witnessBytes, ok := witnessCache[witnessPage.Hash] + if !ok { + // Post-import fallback: fetch from chain storage on demand. + // If both this and the in-flight cache missed during resolveWitnessBytes, + // witnessSize[hash] would be 0 and we wouldn't reach this branch. + witnessBytes = h.Chain().GetWitness(witnessPage.Hash) + witnessCache[witnessPage.Hash] = witnessBytes + totalCached += len(witnessBytes) } start := PageSize * witnessPage.Page @@ -164,26 +289,49 @@ func (h *witHandler) handleGetWitness(peer *wit.Peer, req *wit.GetWitnessPacket) if end > uint64(len(witnessBytes)) { end = uint64(len(witnessBytes)) } - witnessPageResponse.Data = witnessBytes[start:end] - totalResponsePayloadDataAmount += len(witnessPageResponse.Data) + pageResponse.Data = witnessBytes[start:end] + totalResponsePayloadDataAmount += len(pageResponse.Data) } - response = append(response, witnessPageResponse) + response = append(response, pageResponse) - // fast fail check if totalCached >= MaximumCachedWitnessOnARequest { return nil, errors.New("requests demans huge amount of memory") } - // memory protection check if totalResponsePayloadDataAmount >= MaximumResponseSize { return nil, errors.New("response exceeds maximum p2p payload size") } } - // Return the collected RLP data log.Debug("handleGetWitness returning witnesses pages", "peer", peer.ID(), "reqID", req.RequestId, "count", len(response)) return response, nil } +// resolveWitnessBytes resolves witness bytes and sizes for each unique block +// hash referenced by the request. Prefers the in-flight body cache (WIT2 +// pre-import serving) and falls back to chain-storage size lookup. Bytes for +// the chain-storage path are read lazily during page serving; only sizes are +// resolved up front so the response can carry accurate TotalPages even for +// pages this peer cannot fulfil. +func (h *witHandler) resolveWitnessBytes(pages []wit.WitnessPageRequest) (map[common.Hash][]byte, map[common.Hash]uint64) { + seen := make(map[common.Hash]struct{}, len(pages)) + for _, p := range pages { + seen[p.Hash] = struct{}{} + } + bytesByHash := make(map[common.Hash][]byte, len(seen)) + sizeByHash := make(map[common.Hash]uint64, len(seen)) + for blockHash := range seen { + if cached, _, ok := (*handler)(h).pendingWitnessBodies.get(blockHash); ok { + bytesByHash[blockHash] = cached + sizeByHash[blockHash] = uint64(len(cached)) + continue + } + if size := rawdb.ReadWitnessSize(h.Chain().DB(), blockHash); size != nil { + sizeByHash[blockHash] = *size + } + } + return bytesByHash, sizeByHash +} + // handleGetWitnessMetadata retrieves only the metadata (page count, size, block number) for the requested witness hashes. // This is efficient for verification purposes where we don't need the actual witness data. func (h *witHandler) handleGetWitnessMetadata(peer *wit.Peer, req *wit.GetWitnessMetadataPacket) ([]wit.WitnessMetadataResponse, error) { @@ -196,12 +344,16 @@ func (h *witHandler) handleGetWitnessMetadata(peer *wit.Peer, req *wit.GetWitnes var response []wit.WitnessMetadataResponse for _, hash := range req.Hashes { - // Get witness size from database - size := rawdb.ReadWitnessSize(h.Chain().DB(), hash) - witnessSize := uint64(0) - available := false - - if size != nil { + var ( + witnessSize uint64 + available bool + ) + + // Prefer in-flight body cache (WIT2 fast path). + if cached, _, ok := (*handler)(h).pendingWitnessBodies.get(hash); ok { + witnessSize = uint64(len(cached)) + available = true + } else if size := rawdb.ReadWitnessSize(h.Chain().DB(), hash); size != nil { witnessSize = *size available = true } @@ -209,11 +361,14 @@ func (h *witHandler) handleGetWitnessMetadata(peer *wit.Peer, req *wit.GetWitnes // Calculate total pages totalPages := (witnessSize + PageSize - 1) / PageSize // ceil(witnessSize/PageSize) - // Get block number from header + // Get block number from header. Pre-import we may not yet have the + // header, so fall back to the announcement-cached number if a signed + // announcement is on file. blockNumber := uint64(0) - header := h.Chain().GetHeaderByHash(hash) - if header != nil { + if header := h.Chain().GetHeaderByHash(hash); header != nil { blockNumber = header.Number.Uint64() + } else if ann, ok := (*handler)(h).signedWitnesses.get(hash); ok { + blockNumber = ann.BlockNumber } response = append(response, wit.WitnessMetadataResponse{ diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go new file mode 100644 index 0000000000..e320d4414c --- /dev/null +++ b/eth/handler_wit2.go @@ -0,0 +1,504 @@ +package eth + +import ( + "context" + "errors" + "sync" + "time" + + "github.com/ethereum/go-ethereum/accounts" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/consensus/bor" + "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/eth/protocols/wit" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" +) + +var ( + errInvalidSignatureLength = errors.New("invalid wit2 announce signature length") + errInvalidSigner = errors.New("wit2 announce signer is not a current validator") +) + +func contextBackground() context.Context { return context.Background() } + +// Metrics for WIT2 signed-announce path. Emitted only when metrics are enabled. +var ( + wit2RelayInMeter = metrics.NewRegisteredMeter("eth/wit2/announce/relay_in", nil) + wit2RelayOutMeter = metrics.NewRegisteredMeter("eth/wit2/announce/relay_out", nil) + wit2InvalidSigMeter = metrics.NewRegisteredMeter("eth/wit2/announce/invalid_sig", nil) + wit2NotValidatorMeter = metrics.NewRegisteredMeter("eth/wit2/announce/not_validator", nil) + wit2SpanLookupMissMeter = metrics.NewRegisteredMeter("eth/wit2/announce/span_lookup_miss", nil) + wit2DuplicateMeter = metrics.NewRegisteredMeter("eth/wit2/announce/duplicate", nil) + wit2BroadcastByteMismatchMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_byte_mismatch", nil) + wit2BroadcastUnverifiedSkippedMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_unverified_skipped", nil) + wit2HeaderUnknownMeter = metrics.NewRegisteredMeter("eth/wit2/announce/header_unknown", nil) + wit2ConflictingWitnessHashMeter = metrics.NewRegisteredMeter("eth/wit2/announce/conflicting_witness_hash", nil) + wit2RateLimitDropMeter = metrics.NewRegisteredMeter("eth/wit2/announce/rate_limit_drop", nil) + wit2StrikeDisconnectMeter = metrics.NewRegisteredMeter("eth/wit2/announce/strike_disconnect", nil) +) + +// Per-peer rate-limit + strike tracker for wit2 announces. We size the bucket +// at burst=256 with a sustained rate of 64 announces/sec — higher than any +// honest gossip mesh would produce on Polygon's block cadence, low enough to +// neutralise an attacker spamming valid-but-redundant signed packets. +const ( + wit2AnnounceBurstCap = 256 + wit2AnnounceRefillPerSecond = 64 + // wit2MisbehaviorStrikeLimit is the number of structurally-invalid (bad + // signature, wrong producer, oversized packet) announces a peer may + // produce within strikeDecayWindow before being disconnected. + wit2MisbehaviorStrikeLimit = 5 + wit2MisbehaviorWindow = time.Minute +) + +// peerWit2State tracks a peer's wit2-announce burst budget and recent strikes. +// Lifecycle is tied to the eth handler's peer registration; entries are +// cleaned up when the peer disconnects. +type peerWit2State struct { + tokens float64 + lastRefill time.Time + strikeCount int + firstStrikeAt time.Time +} + +type peerWit2Tracker struct { + mu sync.Mutex + state map[string]*peerWit2State +} + +func newPeerWit2Tracker() *peerWit2Tracker { + return &peerWit2Tracker{state: make(map[string]*peerWit2State)} +} + +func (t *peerWit2Tracker) forget(peerID string) { + t.mu.Lock() + delete(t.state, peerID) + t.mu.Unlock() +} + +// allow returns true if the peer has enough budget to consume `count` +// announcements right now. False means the packet should be dropped and a +// rate-limit metric recorded; the caller decides whether to disconnect. +func (t *peerWit2Tracker) allow(peerID string, count int) bool { + t.mu.Lock() + defer t.mu.Unlock() + st, ok := t.state[peerID] + now := time.Now() + if !ok { + st = &peerWit2State{tokens: wit2AnnounceBurstCap, lastRefill: now} + t.state[peerID] = st + } + elapsed := now.Sub(st.lastRefill).Seconds() + if elapsed > 0 { + st.tokens += elapsed * wit2AnnounceRefillPerSecond + if st.tokens > wit2AnnounceBurstCap { + st.tokens = wit2AnnounceBurstCap + } + st.lastRefill = now + } + if st.tokens < float64(count) { + return false + } + st.tokens -= float64(count) + return true +} + +// strike records a misbehavior for the peer. Returns true when the peer has +// exceeded the threshold within the decay window and must be disconnected. +func (t *peerWit2Tracker) strike(peerID string) bool { + t.mu.Lock() + defer t.mu.Unlock() + st, ok := t.state[peerID] + now := time.Now() + if !ok { + st = &peerWit2State{tokens: wit2AnnounceBurstCap, lastRefill: now} + t.state[peerID] = st + } + if st.firstStrikeAt.IsZero() || now.Sub(st.firstStrikeAt) > wit2MisbehaviorWindow { + st.firstStrikeAt = now + st.strikeCount = 0 + } + st.strikeCount++ + return st.strikeCount >= wit2MisbehaviorStrikeLimit +} + +// wit2 announce-cache lifecycle constants. +const ( + // wit2AnnounceTTL bounds how long we remember a signed announcement so we + // can re-emit it on body delivery and skip duplicate relays. Must outlast + // typical fetch+import latency so producers/relayers still have the + // signature when stateless peers come asking for the body. + wit2AnnounceTTL = 30 * time.Second + + // wit2RelayWindow is the per-(blockHash, peer) duplicate-suppression window. + // Even without this, knownWitnesses dedup blocks repeats; the window adds + // belt-and-suspenders coverage during the brief gap between receive and + // known-cache update under concurrent gossip storms. + wit2RelayWindow = 200 * time.Millisecond + + // witnessBodyCacheCapacity bounds the number of pre-import witness bodies + // held in memory. Each entry is ~50MB on Polygon, so the cap keeps total + // memory under ~500MB worst case. Older entries are evicted as new ones + // arrive; a 10-block window comfortably covers typical block-fetch and + // import latency. + witnessBodyCacheCapacity = 10 +) + +// pendingWitnessBody holds RLP-encoded witness bytes received from the network +// before the corresponding block has been imported (and thus before the bytes +// have been written to chain storage). Lets serving peers answer GetWitness +// requests during the import gap, which is what makes early relay actually +// useful — a peer that received the body can serve it the moment its TCP +// receive completes, rather than waiting ~500ms for full block validation. +type pendingWitnessBody struct { + bytes []byte + witnessHash common.Hash + receivedAt time.Time +} + +// pendingWitnessBodyCache holds bytes by block hash with a short TTL. Entries +// are dropped after the body has been written to chain storage, or after the +// TTL expires (whichever first). The cache is a simple map; the witness body +// is large (~50MB) so the cap is set conservatively. +type pendingWitnessBodyCache struct { + mu sync.RWMutex + entries map[common.Hash]*pendingWitnessBody + capacity int +} + +func newPendingWitnessBodyCache(capacity int) *pendingWitnessBodyCache { + return &pendingWitnessBodyCache{ + entries: make(map[common.Hash]*pendingWitnessBody), + capacity: capacity, + } +} + +func (c *pendingWitnessBodyCache) put(blockHash common.Hash, bytes []byte, witnessHash common.Hash) { + c.mu.Lock() + defer c.mu.Unlock() + c.gcLocked() + if len(c.entries) >= c.capacity { + // Evict the oldest entry. Linear scan is fine at the configured cap. + var oldestHash common.Hash + var oldest time.Time + for h, e := range c.entries { + if oldest.IsZero() || e.receivedAt.Before(oldest) { + oldest = e.receivedAt + oldestHash = h + } + } + delete(c.entries, oldestHash) + } + c.entries[blockHash] = &pendingWitnessBody{ + bytes: bytes, + witnessHash: witnessHash, + receivedAt: time.Now(), + } +} + +func (c *pendingWitnessBodyCache) get(blockHash common.Hash) ([]byte, common.Hash, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.entries[blockHash] + if !ok { + return nil, common.Hash{}, false + } + if time.Since(e.receivedAt) > wit2AnnounceTTL { + return nil, common.Hash{}, false + } + return e.bytes, e.witnessHash, true +} + +func (c *pendingWitnessBodyCache) drop(blockHash common.Hash) { + c.mu.Lock() + defer c.mu.Unlock() + delete(c.entries, blockHash) +} + +func (c *pendingWitnessBodyCache) gcLocked() { + cutoff := time.Now().Add(-wit2AnnounceTTL) + for h, e := range c.entries { + if e.receivedAt.Before(cutoff) { + delete(c.entries, h) + } + } +} + +// signedWitnessCache stores BP-signed announcements by block hash. The cache +// is consulted by: +// - the relay path on receive (skip if already seen recently), +// - the body-broadcast path (re-emit the cached signed announce when a +// stateless peer requests the body), and +// - the producer path (cache the locally-signed announcement so subsequent +// re-emissions from this node don't re-sign). +type signedWitnessCache struct { + mu sync.RWMutex + entries map[common.Hash]*signedAnnounceEntry +} + +type signedAnnounceEntry struct { + announcement wit.SignedWitnessAnnouncement + receivedAt time.Time +} + +func newSignedWitnessCache() *signedWitnessCache { + return &signedWitnessCache{entries: make(map[common.Hash]*signedAnnounceEntry)} +} + +// putIfNewer stores the announcement keyed by block hash, returning true if +// the cache did not already contain a fresh entry for this hash. Callers use +// the return value to decide whether to relay (false → suppress duplicate). +// +// If a fresh entry already exists with a *different* WitnessHash, the new +// announcement is rejected outright (returns false): the first valid signed +// commitment wins for the lifetime of the entry. This prevents an attacker +// who has obtained a second valid signature (e.g. a compromised producer +// later in the same window) from poisoning the cache mid-fetch and dropping +// honest serving peers against a different hash. +func (c *signedWitnessCache) putIfNewer(ann wit.SignedWitnessAnnouncement) bool { + c.mu.Lock() + defer c.mu.Unlock() + c.gcLocked() + if existing, ok := c.entries[ann.BlockHash]; ok { + if existing.announcement.WitnessHash != ann.WitnessHash { + wit2ConflictingWitnessHashMeter.Mark(1) + return false + } + // Same WitnessHash, recent: dedup. + if time.Since(existing.receivedAt) < wit2RelayWindow { + return false + } + } + c.entries[ann.BlockHash] = &signedAnnounceEntry{ + announcement: ann, + receivedAt: time.Now(), + } + return true +} + +// get returns the cached announcement for a block hash, if present and fresh. +func (c *signedWitnessCache) get(blockHash common.Hash) (wit.SignedWitnessAnnouncement, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.entries[blockHash] + if !ok { + return wit.SignedWitnessAnnouncement{}, false + } + if time.Since(e.receivedAt) > wit2AnnounceTTL { + return wit.SignedWitnessAnnouncement{}, false + } + return e.announcement, true +} + +// gcLocked drops entries past the TTL. Caller must hold the write lock. +func (c *signedWitnessCache) gcLocked() { + cutoff := time.Now().Add(-wit2AnnounceTTL) + for h, e := range c.entries { + if e.receivedAt.Before(cutoff) { + delete(c.entries, h) + } + } +} + +// verifySignedAnnouncement returns the recovered signer address if the +// signature is structurally valid; otherwise an error. Validator-set +// membership is checked separately against the consensus engine. +func verifySignedAnnouncement(ann wit.SignedWitnessAnnouncement) (common.Address, error) { + if len(ann.Signature) != wit.SignatureLength { + return common.Address{}, errInvalidSignatureLength + } + digest := wit.WitnessAnnouncementSigningHash(ann.BlockHash, ann.BlockNumber, ann.WitnessHash) + pubkey, err := crypto.Ecrecover(digest.Bytes(), ann.Signature) + if err != nil { + return common.Address{}, err + } + var addr common.Address + copy(addr[:], crypto.Keccak256(pubkey[1:])[12:]) + return addr, nil +} + +// cosendWitnessAnnouncement co-sends a witness announcement to every peer +// that just received the full block via the propagate=true fanout, provided +// the peer doesn't already have the witness. WIT2 peers receive the signed +// variant; older peers receive the unsigned WIT1 announce. Skipped entirely +// when the local node hasn't yet stored the witness or doesn't have a +// signing key configured. +func (h *handler) cosendWitnessAnnouncement(blockHash common.Hash, blockNumber uint64, transfer []*ethPeer, staticAndTrustedPeers []*ethPeer) { + if !h.chain.HasWitness(blockHash) { + return + } + ann, hasSigned := h.signLocalWitnessAnnouncement(blockHash, blockNumber) + if !hasSigned { + return + } + witnessRecipientsByID := make(map[string]*witPeer) + for _, wp := range h.peers.peersWithoutWitness(blockHash) { + witnessRecipientsByID[wp.Peer.ID()] = wp + } + cosend := func(id string) { + wp, ok := witnessRecipientsByID[id] + if !ok { + return + } + if wp.Peer.Version() >= wit.WIT2 { + wp.Peer.AsyncSendSignedWitnessAnnouncement(ann) + } else { + wp.Peer.AsyncSendNewWitnessHash(blockHash, blockNumber) + } + } + for _, peer := range transfer { + cosend(peer.Peer.ID()) + } + for _, peer := range staticAndTrustedPeers { + cosend(peer.ID()) + } +} + +// lookupSignedWitnessHash returns the BP-signed witness hash for a block, if +// the local cache has a verified announcement. Used by the witness manager +// on fetch success to verify byte-correctness against the signed commitment. +func (h *handler) lookupSignedWitnessHash(blockHash common.Hash) (common.Hash, bool) { + ann, ok := h.signedWitnesses.get(blockHash) + if !ok { + return common.Hash{}, false + } + return ann.WitnessHash, true +} + +// cacheVerifiedWitnessForServing receives canonical-encoded witness bytes from +// the fetcher after a successful, byte-verified paged download and stores them +// in the in-flight cache so peers can fetch the body before this node finishes +// chain-write. Bytes here have already passed verifyAgainstSignedHash (when a +// signed announcement was on file), or arrived via WIT1 unsigned path; in both +// cases they're the same bytes the upstream peer agreed upon, so serving them +// to downstream peers cannot expose this node to byte-mismatch drops beyond +// the upstream's already-incurred risk. +func (h *handler) cacheVerifiedWitnessForServing(blockHash common.Hash, witnessBytes []byte, witnessHash common.Hash) { + if h.pendingWitnessBodies == nil { + return + } + h.pendingWitnessBodies.put(blockHash, witnessBytes, witnessHash) +} + +// signLocalWitnessAnnouncement looks up the witness body for blockHash, hashes +// it, and signs the announcement digest using the engine's authorized signer. +// The result is cached so subsequent broadcasts of the same block reuse the +// signature without recomputing the keccak. +// +// Returns (announcement, true) on success. Returns (_, false) if any of: +// - no signer configured (full node not producing blocks) +// - witness bytes not yet stored in chain +// - signing failed +// +// Cost: ~150ms keccak over a 50MB witness, plus ~100μs ECDSA. Off the +// block-production critical path; runs once per produced block on the +// announce path. +func (h *handler) signLocalWitnessAnnouncement(blockHash common.Hash, blockNumber uint64) (wit.SignedWitnessAnnouncement, bool) { + if cached, ok := h.signedWitnesses.get(blockHash); ok { + return cached, true + } + + borEngine, ok := h.chain.Engine().(*bor.Bor) + if !ok { + return wit.SignedWitnessAnnouncement{}, false + } + if (borEngine.CurrentSigner() == common.Address{}) { + return wit.SignedWitnessAnnouncement{}, false + } + + witnessHash, ok := h.canonicalWitnessHash(blockHash) + if !ok { + return wit.SignedWitnessAnnouncement{}, false + } + preimage := wit.WitnessAnnouncementSigningPreImage(blockHash, blockNumber, witnessHash) + _, sig, err := borEngine.SignBytes(accounts.MimetypeBorWitnessAnnounce, preimage) + if err != nil { + log.Warn("wit2: failed to sign witness announcement", "blockHash", blockHash, "err", err) + return wit.SignedWitnessAnnouncement{}, false + } + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: blockHash, + BlockNumber: blockNumber, + WitnessHash: witnessHash, + Signature: sig, + } + h.signedWitnesses.putIfNewer(ann) + return ann, true +} + +// canonicalWitnessHash reads the witness bytes for blockHash from chain +// storage and returns the WIT2 chunked-aggregate commitment over those bytes. +// Witness.EncodeRLP is now deterministic (state nodes sorted), so every newly +// written witness blob is canonical at write time and can be hashed directly +// without a decode/re-encode round-trip — saving roughly the cost of one RLP +// pass on the announce path. Returns (_, false) when no witness is on file. +func (h *handler) canonicalWitnessHash(blockHash common.Hash) (common.Hash, bool) { + stored := h.chain.GetWitness(blockHash) + if len(stored) == 0 { + return common.Hash{}, false + } + return stateless.WitnessCommitHash(stored), true +} + +// isScheduledProducer binds the recovered signer of a wit2 announcement to the +// actual block producer of the announced block. When the block header is +// locally available — the common case — we recover the seal-signer of the +// header and require an exact address match. Validator-set membership is no +// longer sufficient: any current validator could otherwise sign an +// announcement for another producer's block hash with a forged WitnessHash, +// poisoning this node's cache and dropping honest serving peers. +// +// Returns (ok, headerAvailable): +// - ok=true, headerAvailable=true: signer matches the block producer; safe +// to cache and relay. +// - ok=false, headerAvailable=true: confirmed bad signer; the caller MUST +// strike the relayer. +// - ok=false, headerAvailable=false: header not yet local. The announce +// cannot be bound to a producer right now. The caller MUST NOT strike — +// this is expected during the cosend window where a signed announce +// races the block to the receiver. The fast path recovers naturally +// once the block header arrives and a subsequent announce for the same +// hash is re-evaluated. +func (h *handler) isScheduledProducer(signer common.Address, blockNumber uint64, blockHash common.Hash) (bool, bool) { + borEngine, isBor := h.chain.Engine().(*bor.Bor) + if !isBor { + // Non-bor chain: skip the producer check. + return true, true + } + header := h.chain.GetHeaderByHash(blockHash) + return verifyScheduledProducer(borEngine, header, signer, blockNumber, blockHash) +} + +// verifyScheduledProducer is the pure decision logic for binding a wit2 +// announcement signer to the block producer of `blockHash`. Split from +// isScheduledProducer so it can be unit-tested without standing up a full +// handler. Returns the same (ok, headerAvailable) shape — see +// isScheduledProducer for the contract. +func verifyScheduledProducer(borEngine *bor.Bor, header *types.Header, signer common.Address, blockNumber uint64, blockHash common.Hash) (bool, bool) { + if header == nil { + wit2HeaderUnknownMeter.Mark(1) + log.Debug("wit2: header for announced block not yet local; deferring until block arrives", + "blockHash", blockHash, "blockNumber", blockNumber) + return false, false + } + if header.Number.Uint64() != blockNumber { + log.Debug("wit2: announce blockNumber does not match local header", + "blockHash", blockHash, "announced", blockNumber, "local", header.Number.Uint64()) + return false, true + } + producer, err := borEngine.Author(header) + if err != nil { + log.Debug("wit2: failed to recover header sealer", "blockHash", blockHash, "err", err) + return false, true + } + if producer != signer { + log.Debug("wit2: announce signer is not the block producer", + "blockHash", blockHash, "producer", producer, "signer", signer) + return false, true + } + return true, true +} diff --git a/eth/handler_wit2_test.go b/eth/handler_wit2_test.go new file mode 100644 index 0000000000..0bb5b1eb33 --- /dev/null +++ b/eth/handler_wit2_test.go @@ -0,0 +1,526 @@ +package eth + +import ( + "bytes" + "crypto/rand" + "math/big" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/eth/protocols/wit" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestSignedWitnessCachePutIfNewerSuppressesDuplicates verifies that the +// per-(blockHash) relay-window dedup blocks immediate re-relay of the same +// announcement. Without this, A→B→A bouncing would amplify a single signed +// announcement into a gossip storm. +func TestSignedWitnessCachePutIfNewerSuppressesDuplicates(t *testing.T) { + c := newSignedWitnessCache() + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xaaaa"), + BlockNumber: 100, + WitnessHash: common.HexToHash("0xbbbb"), + Signature: make([]byte, wit.SignatureLength), + } + if !c.putIfNewer(ann) { + t.Fatal("first put should succeed") + } + if c.putIfNewer(ann) { + t.Fatal("immediate re-put within window should be suppressed") + } + if _, ok := c.get(ann.BlockHash); !ok { + t.Fatal("entry should still be present after suppressed put") + } +} + +// TestSignedWitnessCacheTTLExpiry checks that stale entries don't linger past +// the TTL. This prevents stale signatures from being re-served indefinitely +// for blocks long since imported and pruned. +func TestSignedWitnessCacheTTLExpiry(t *testing.T) { + c := newSignedWitnessCache() + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xcafe"), + BlockNumber: 1, + WitnessHash: common.HexToHash("0xdead"), + Signature: make([]byte, wit.SignatureLength), + } + c.putIfNewer(ann) + // Force the receivedAt back beyond TTL. + c.mu.Lock() + c.entries[ann.BlockHash].receivedAt = time.Now().Add(-2 * wit2AnnounceTTL) + c.mu.Unlock() + if _, ok := c.get(ann.BlockHash); ok { + t.Fatal("expired entry should not be returned") + } +} + +// TestVerifySignedAnnouncementRejectsBadLength catches sloppy callers passing +// truncated signatures. Without this guard, ecrecover panics or silently +// recovers a garbage address. +func TestVerifySignedAnnouncementRejectsBadLength(t *testing.T) { + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0x01"), + BlockNumber: 1, + WitnessHash: common.HexToHash("0x02"), + Signature: []byte{0x00, 0x01, 0x02}, + } + if _, err := verifySignedAnnouncement(ann); err == nil { + t.Fatal("expected error for short signature") + } +} + +// TestVerifySignedAnnouncementRoundTrip signs an announcement with a known +// key and verifies recovery yields the same address. This is the core +// authentication property; if it breaks, every signed announcement on the +// network silently fails verification. +func TestVerifySignedAnnouncementRoundTrip(t *testing.T) { + key, err := crypto.GenerateKey() + if err != nil { + t.Fatalf("key gen: %v", err) + } + expectedSigner := crypto.PubkeyToAddress(key.PublicKey) + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xfeedface"), + BlockNumber: 42, + WitnessHash: common.HexToHash("0xc0ffee00"), + } + digest := wit.WitnessAnnouncementSigningHash(ann.BlockHash, ann.BlockNumber, ann.WitnessHash) + sig, err := crypto.Sign(digest.Bytes(), key) + if err != nil { + t.Fatalf("sign: %v", err) + } + ann.Signature = sig + + got, err := verifySignedAnnouncement(ann) + if err != nil { + t.Fatalf("verify: %v", err) + } + if got != expectedSigner { + t.Fatalf("recovered signer = %s, want %s", got.Hex(), expectedSigner.Hex()) + } +} + +// TestVerifySignedAnnouncementWalletSemantics mirrors what wallet.SignData +// does in production (keccak256(preimage) before signing) to guard against +// the regression where the producer pre-hashes a 32-byte digest and the +// wallet hashes again — producing signatures the verifier cannot recover. +// The test fails iff the producer/verifier preimage-vs-digest contract +// drifts. +func TestVerifySignedAnnouncementWalletSemantics(t *testing.T) { + key, err := crypto.GenerateKey() + if err != nil { + t.Fatalf("key gen: %v", err) + } + expectedSigner := crypto.PubkeyToAddress(key.PublicKey) + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xab"), + BlockNumber: 99, + WitnessHash: common.HexToHash("0xcd"), + } + // Production wallet path: SignData hashes its input once, then signs. + preimage := wit.WitnessAnnouncementSigningPreImage(ann.BlockHash, ann.BlockNumber, ann.WitnessHash) + walletDigest := crypto.Keccak256(preimage) + sig, err := crypto.Sign(walletDigest, key) + if err != nil { + t.Fatalf("sign: %v", err) + } + ann.Signature = sig + + got, err := verifySignedAnnouncement(ann) + if err != nil { + t.Fatalf("verify: %v", err) + } + if got != expectedSigner { + t.Fatalf("recovered signer = %s, want %s — preimage/digest contract is broken", got.Hex(), expectedSigner.Hex()) + } +} + +// TestVerifySignedAnnouncementDetectsTampering ensures that flipping any +// field in the announcement causes verification to recover a different +// address (or fail outright). This is the load-bearing property for the +// blame-separation argument: a signature ties a specific BP to a specific +// (BlockHash, BlockNumber, WitnessHash) tuple and nothing else. +func TestVerifySignedAnnouncementDetectsTampering(t *testing.T) { + key, err := crypto.GenerateKey() + if err != nil { + t.Fatalf("key gen: %v", err) + } + signer := crypto.PubkeyToAddress(key.PublicKey) + + original := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xa1"), + BlockNumber: 7, + WitnessHash: common.HexToHash("0xb2"), + } + digest := wit.WitnessAnnouncementSigningHash(original.BlockHash, original.BlockNumber, original.WitnessHash) + sig, err := crypto.Sign(digest.Bytes(), key) + if err != nil { + t.Fatalf("sign: %v", err) + } + + // Tamper with WitnessHash but reuse the signature. + tampered := original + tampered.WitnessHash = common.HexToHash("0xb3") + tampered.Signature = sig + + got, err := verifySignedAnnouncement(tampered) + if err != nil { + // If err is non-nil, tampering was caught at the structural level. + return + } + if got == signer { + t.Fatal("tampered announcement recovered original signer; signature is not bound to the message") + } +} + +// TestPeerWit2TrackerRateLimitConsumesTokens guards Fix-7: the per-peer +// rate-limit must reject burst-exceeding traffic without dropping the peer. +// Honest peers running normal block cadence should never trip this; the test +// pins the budget so a regression that loosens the cap is caught. +func TestPeerWit2TrackerRateLimitConsumesTokens(t *testing.T) { + tr := newPeerWit2Tracker() + if !tr.allow("p1", wit2AnnounceBurstCap) { + t.Fatal("first burst-cap-sized batch must fit") + } + if tr.allow("p1", 1) { + t.Fatal("immediate next announcement must be rejected when bucket is empty") + } +} + +// TestPeerWit2TrackerStrikeDisconnectThreshold pins the strike-threshold +// behavior. Below the threshold, strike returns false (peer kept). At the +// threshold it returns true so the handler disconnects. Honest peers +// occasionally producing one bad announce should never trigger; sustained +// misbehavior must. +func TestPeerWit2TrackerStrikeDisconnectThreshold(t *testing.T) { + tr := newPeerWit2Tracker() + for i := 0; i < wit2MisbehaviorStrikeLimit-1; i++ { + if tr.strike("p1") { + t.Fatalf("disconnect signaled at strike %d, want only at %d", i+1, wit2MisbehaviorStrikeLimit) + } + } + if !tr.strike("p1") { + t.Fatalf("disconnect must signal at strike %d", wit2MisbehaviorStrikeLimit) + } +} + +// TestSignedWitnessCacheRejectsConflictingWitnessHash is the Fix-6 invariant +// at the cache layer: only the FIRST valid signed announcement for a given +// blockHash wins. A second announcement with a different WitnessHash — +// possibly from a forked producer or a compromised key in a later window — +// must be rejected, otherwise it would poison the cache mid-fetch and drop +// honest peers serving the original bytes. +func TestSignedWitnessCacheRejectsConflictingWitnessHash(t *testing.T) { + c := newSignedWitnessCache() + first := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xabcd"), + BlockNumber: 50, + WitnessHash: common.HexToHash("0x1111"), + Signature: make([]byte, wit.SignatureLength), + } + if !c.putIfNewer(first) { + t.Fatal("first put should succeed") + } + + conflict := first + conflict.WitnessHash = common.HexToHash("0x2222") + if c.putIfNewer(conflict) { + t.Fatal("second put with different WitnessHash must be rejected") + } + got, ok := c.get(first.BlockHash) + if !ok { + t.Fatal("first announcement must remain cached after conflict rejection") + } + if got.WitnessHash != first.WitnessHash { + t.Fatalf("cache poisoned: WitnessHash=%s want=%s", got.WitnessHash.Hex(), first.WitnessHash.Hex()) + } +} + +// TestPendingWitnessBodyCacheEvictsOldest covers the LRU-style eviction when +// the cache reaches capacity. Without it, long-running nodes accumulate +// witness bodies indefinitely (~50MB each) and run out of memory. +func TestPendingWitnessBodyCacheEvictsOldest(t *testing.T) { + c := newPendingWitnessBodyCache(2) + c.put(common.HexToHash("0x01"), []byte("first"), common.HexToHash("0xa")) + time.Sleep(time.Millisecond) + c.put(common.HexToHash("0x02"), []byte("second"), common.HexToHash("0xb")) + time.Sleep(time.Millisecond) + c.put(common.HexToHash("0x03"), []byte("third"), common.HexToHash("0xc")) + + if _, _, ok := c.get(common.HexToHash("0x01")); ok { + t.Fatal("oldest entry should have been evicted") + } + if _, _, ok := c.get(common.HexToHash("0x02")); !ok { + t.Fatal("middle entry should still be present") + } + if _, _, ok := c.get(common.HexToHash("0x03")); !ok { + t.Fatal("newest entry should still be present") + } +} + +// TestPendingWitnessBodyCacheDropClearsEntry guards the explicit drop path +// used when a witness has been written to chain storage and no longer needs +// in-flight serving. +func TestPendingWitnessBodyCacheDropClearsEntry(t *testing.T) { + c := newPendingWitnessBodyCache(4) + hash := common.HexToHash("0xdead") + c.put(hash, []byte("x"), common.HexToHash("0xaa")) + c.drop(hash) + if _, _, ok := c.get(hash); ok { + t.Fatal("entry should be gone after drop") + } +} + +// TestHandleWitnessBroadcastSkipsCacheWhenNoSignature guards the Fix-5 +// invariant: bytes received via NewWitness broadcast are NOT exposed for +// pre-import serving when no BP-signed witnessHash is on file. Otherwise an +// honest relayer with a malicious upstream would serve unverified bytes and +// be dropped by downstream peers as if it had lied. +func TestHandleWitnessBroadcastSkipsCacheWhenNoSignature(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(7777)} + witness, err := stateless.NewWitness(header, nil) + if err != nil { + t.Fatalf("new witness: %v", err) + } + + // No signed announcement on file → broadcast must NOT populate the + // pre-import serving cache. + if err := witH.handleWitnessBroadcast(peer, witness); err != nil { + t.Fatalf("handleWitnessBroadcast: %v", err) + } + hash := header.Hash() + if _, _, ok := h.handler.pendingWitnessBodies.get(hash); ok { + t.Fatal("pendingWitnessBodies populated without a signed witnessHash; bytes are unverified for serving") + } +} + +// TestSignedAnnounceDoesNotMarkPeerAsBodyHolder is the load-bearing +// regression test for the announce/body separation. A WIT2 peer that has +// only relayed a signed announcement (no body) MUST NOT show up in +// peersWithoutWitness's complement — i.e. it must not be selected as a body +// fetch target by getOnePeerWithWitness. Otherwise the fetcher will ask a +// relay-only peer for bytes, get nothing, and drop an honest peer. +func TestSignedAnnounceDoesNotMarkPeerAsBodyHolder(t *testing.T) { + hash := common.HexToHash("0xfa11") + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: 1, + WitnessHash: common.HexToHash("0xab"), + Signature: make([]byte, wit.SignatureLength), + } + + // Outbound announce path (this node forwarding to peer): must NOT mark + // peer as a body-holder. + peer.AsyncSendSignedWitnessAnnouncement(ann) + + if peer.KnownWitnessContainsHash(hash) { + t.Fatal("AsyncSendSignedWitnessAnnouncement marked peer as body-holder; body fetch will pick a relay-only peer and drop it") + } + if !peer.KnownAnnounceContainsHash(hash) { + t.Fatal("AsyncSendSignedWitnessAnnouncement should mark announce-known so we don't re-relay") + } +} + +// TestHandleGetWitnessServesFromInFlightCache is the load-bearing behavioral +// test for the WIT2 pre-import serving claim: a node that has received the +// witness body over gossip but has not yet imported it (chain storage empty) +// must still be able to serve `GetWitness` requests from the in-flight cache. +// Without this path, multi-hop WIT2 fast-propagation has no body source until +// each hop's chain-write completes — collapsing the entire benefit of the +// design. +func TestHandleGetWitnessServesFromInFlightCache(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWitPeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(4242)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + // Smaller than PageSize so the response fits in a single page. + bodyBytes := make([]byte, 1*1024*1024) + rand.Read(bodyBytes) + + // Body is in the in-flight cache only; chain storage is empty. + h.handler.pendingWitnessBodies.put(hash, bodyBytes, crypto.Keccak256Hash(bodyBytes)) + require.Nil(t, rawdb.ReadWitnessSize(h.chain.DB(), hash), + "precondition: chain must have no witness for this hash") + + resp, err := witH.handleGetWitness(peer, &wit.GetWitnessPacket{ + RequestId: 1, + GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: []wit.WitnessPageRequest{{Hash: hash, Page: 0}}}, + }) + require.NoError(t, err) + require.Equal(t, 1, len(resp)) + assert.Equal(t, hash, resp[0].Hash) + assert.Equal(t, uint64(1), resp[0].TotalPages) + require.Equal(t, len(bodyBytes), len(resp[0].Data), + "in-flight cache served fewer bytes than expected — pre-import path is not wired") + assert.Equal(t, bodyBytes[:64], resp[0].Data[:64]) +} + +// TestHandleGetWitnessMetadataServesFromInFlightCache mirrors the above for +// the metadata path: a peer asking for metadata before chain-write should +// receive Available=true with the correct size from the in-flight cache. +// This is what lets a downstream relayer compute pagination without waiting. +func TestHandleGetWitnessMetadataServesFromInFlightCache(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWitPeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(4243)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + bodyBytes := make([]byte, 7*1024*1024) // forces TotalPages = 1 (under 15MB) + rand.Read(bodyBytes) + h.handler.pendingWitnessBodies.put(hash, bodyBytes, crypto.Keccak256Hash(bodyBytes)) + require.Nil(t, rawdb.ReadWitnessSize(h.chain.DB(), hash)) + + resp, err := witH.handleGetWitnessMetadata(peer, &wit.GetWitnessMetadataPacket{ + RequestId: 1, + GetWitnessMetadataRequest: &wit.GetWitnessMetadataRequest{ + Hashes: []common.Hash{hash}, + }, + }) + require.NoError(t, err) + require.Equal(t, 1, len(resp)) + assert.True(t, resp[0].Available, "metadata must report Available when only the in-flight cache holds the body") + assert.Equal(t, uint64(len(bodyBytes)), resp[0].WitnessSize) + assert.Equal(t, uint64(1), resp[0].TotalPages) + assert.Equal(t, header.Number.Uint64(), resp[0].BlockNumber) +} + +// TestHandleGetWitnessPrefersCacheOverChain documents the chosen precedence: +// when both sources hold a witness, the in-flight cache wins. Locks the choice +// in so a refactor can't silently reverse it. Cache-first is correct because +// the cache is what the BP-signed announcement points at; the chain copy is +// only valid once chain-write has finished, which the cache entry implies has +// not yet happened or has just happened with identical bytes. +func TestHandleGetWitnessPrefersCacheOverChain(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWitPeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(4244)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + cacheBytes := make([]byte, 4*1024*1024) + rand.Read(cacheBytes) + chainBytes := make([]byte, 4*1024*1024) + rand.Read(chainBytes) + + rawdb.WriteWitness(h.chain.DB(), hash, chainBytes) + h.handler.pendingWitnessBodies.put(hash, cacheBytes, crypto.Keccak256Hash(cacheBytes)) + + resp, err := witH.handleGetWitness(peer, &wit.GetWitnessPacket{ + RequestId: 1, + GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: []wit.WitnessPageRequest{{Hash: hash, Page: 0}}}, + }) + require.NoError(t, err) + require.Equal(t, 1, len(resp)) + assert.Equal(t, cacheBytes[:64], resp[0].Data[:64], + "handler must prefer the in-flight cache; got bytes that look like chain storage") +} + +// TestCanonicalWitnessHashUsesStoredBytesDirectly is the regression for the +// optimization that skips decode/re-encode on the producer announce path: as +// long as Witness.EncodeRLP is canonical-deterministic, stored bytes are +// already canonical and can be hashed in place. If a future change re- +// introduces a non-canonical write path, this test fails and the producer- +// side WitnessHash silently diverges from what verifiers compute. +func TestCanonicalWitnessHashUsesStoredBytesDirectly(t *testing.T) { + h := newTestHandler() + defer h.close() + + header := &types.Header{Number: big.NewInt(7777)} + hash := header.Hash() + + // Build a synthetic witness, encode canonically once, store the bytes. + w, err := stateless.NewWitness(header, nil) + require.NoError(t, err) + for i := 0; i < 64; i++ { + buf := make([]byte, 256) + rand.Read(buf) + w.AddState(map[string][]byte{string(buf): buf}) + } + canonical := encodeWitnessForTest(t, w) + rawdb.WriteWitness(h.chain.DB(), hash, canonical) + + got, ok := h.handler.canonicalWitnessHash(hash) + require.True(t, ok) + + want := stateless.WitnessCommitHash(canonical) + require.Equal(t, want, got, + "canonicalWitnessHash must hash stored canonical bytes directly; if this fails, EncodeRLP determinism has regressed or the helper added back a re-encode") +} + +func encodeWitnessForTest(t *testing.T, w *stateless.Witness) []byte { + t.Helper() + var buf bytes.Buffer + require.NoError(t, w.EncodeRLP(&buf)) + return buf.Bytes() +} + +// TestVerifyScheduledProducerDeferredWhenHeaderUnknown is the regression for +// the cosend race: when the signed announce arrives before the block is +// imported, verifyScheduledProducer must report headerAvailable=false so the +// caller defers (no relay, no strike). Without this branch, valid WIT2 +// announces would draw strikes for honest relayers during normal operation. +func TestVerifyScheduledProducerDeferredWhenHeaderUnknown(t *testing.T) { + // borEngine is unused on the nil-header branch — verifyScheduledProducer + // short-circuits before calling Author. Pass nil to keep the test free of + // engine setup; if a future change reorders the branch and starts deref- + // erencing borEngine here, the test will panic and we'll catch it. + ok, headerAvailable := verifyScheduledProducer(nil, nil, common.Address{}, 100, common.HexToHash("0xfeed")) + if ok { + t.Fatal("nil header must not validate as ok") + } + if headerAvailable { + t.Fatal("nil header must report headerAvailable=false so caller defers without striking") + } +} + +// TestVerifyScheduledProducerRejectsBlockNumberMismatch covers the case where +// the local header is present but disagrees with the announce on block +// number. This is a confirmed bad announce and the caller must strike, so +// headerAvailable must be true. +func TestVerifyScheduledProducerRejectsBlockNumberMismatch(t *testing.T) { + header := &types.Header{Number: big.NewInt(50)} + ok, headerAvailable := verifyScheduledProducer(nil, header, common.Address{}, 51, header.Hash()) + if ok { + t.Fatal("number mismatch must not validate") + } + if !headerAvailable { + t.Fatal("with header present, headerAvailable must be true so the caller strikes the relayer") + } +} diff --git a/eth/handler_wit_test.go b/eth/handler_wit_test.go index 92bd38f803..57dc22cb2a 100644 --- a/eth/handler_wit_test.go +++ b/eth/handler_wit_test.go @@ -56,6 +56,37 @@ func newTestWitPeerWithReader() (*wit.Peer, func()) { return peer, cleanup } +// newTestWit2PeerWithReader creates a wit.Peer negotiated at WIT2, with the +// same draining behavior as newTestWitPeerWithReader. WIT2-specific paths +// (signed announce, AsyncSendSignedWitnessAnnouncement) early-return on a +// WIT1 peer, so tests that exercise them must use this helper. +func newTestWit2PeerWithReader() (*wit.Peer, func()) { + var id enode.ID + rand.Read(id[:]) + p2pPeer := p2p.NewPeer(id, "test-peer-wit2", nil) + app, net := p2p.MsgPipe() + + done := make(chan struct{}) + go func() { + for { + msg, err := app.ReadMsg() + if err != nil { + close(done) + return + } + msg.Discard() + } + }() + + peer := wit.NewPeer(wit.WIT2, p2pPeer, net, log.New()) + cleanup := func() { + app.Close() + peer.Close() + <-done + } + return peer, cleanup +} + // mockUnknownPacket is a mock packet type that implements wit.Packet // but is not recognized by the Handle method's switch statement type mockUnknownPacket struct{} diff --git a/eth/peer.go b/eth/peer.go index 3612db28a8..a3f5fda2ca 100644 --- a/eth/peer.go +++ b/eth/peer.go @@ -128,6 +128,7 @@ type WitnessPeer interface { // the method ethPeer.RequestWitnesses invokes AsyncSendNewWitness(witness *stateless.Witness) AsyncSendNewWitnessHash(hash common.Hash, number uint64) + AsyncSendSignedWitnessAnnouncement(ann wit.SignedWitnessAnnouncement) RequestWitness(witnessPages []wit.WitnessPageRequest, sink chan *wit.Response) (*wit.Request, error) RequestWitnessMetadata(hashes []common.Hash, sink chan *wit.Response) (*wit.Request, error) Close() @@ -136,9 +137,11 @@ type WitnessPeer interface { Log() log.Logger KnownWitnesses() *wit.KnownCache AddKnownWitness(hash common.Hash) + AddKnownAnnounce(hash common.Hash) KnownWitnessesCount() int KnownWitnessesContains(witness *stateless.Witness) bool KnownWitnessContainsHash(hash common.Hash) bool + KnownAnnounceContainsHash(hash common.Hash) bool ReplyWitness(requestID uint64, response *wit.WitnessPacketResponse) error } diff --git a/eth/peer_mock.go b/eth/peer_mock.go index 72e3a6fdbc..3cee95f6cb 100644 --- a/eth/peer_mock.go +++ b/eth/peer_mock.go @@ -50,6 +50,32 @@ func (mr *MockWitnessPeerMockRecorder) AddKnownWitness(hash interface{}) *gomock return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddKnownWitness", reflect.TypeOf((*MockWitnessPeer)(nil).AddKnownWitness), hash) } +// AddKnownAnnounce mocks base method. +func (m *MockWitnessPeer) AddKnownAnnounce(hash common.Hash) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "AddKnownAnnounce", hash) +} + +// AddKnownAnnounce indicates an expected call of AddKnownAnnounce. +func (mr *MockWitnessPeerMockRecorder) AddKnownAnnounce(hash interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddKnownAnnounce", reflect.TypeOf((*MockWitnessPeer)(nil).AddKnownAnnounce), hash) +} + +// KnownAnnounceContainsHash mocks base method. +func (m *MockWitnessPeer) KnownAnnounceContainsHash(hash common.Hash) bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "KnownAnnounceContainsHash", hash) + ret0, _ := ret[0].(bool) + return ret0 +} + +// KnownAnnounceContainsHash indicates an expected call of KnownAnnounceContainsHash. +func (mr *MockWitnessPeerMockRecorder) KnownAnnounceContainsHash(hash interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "KnownAnnounceContainsHash", reflect.TypeOf((*MockWitnessPeer)(nil).KnownAnnounceContainsHash), hash) +} + // AsyncSendNewWitness mocks base method. func (m *MockWitnessPeer) AsyncSendNewWitness(witness *stateless.Witness) { m.ctrl.T.Helper() @@ -74,6 +100,18 @@ func (mr *MockWitnessPeerMockRecorder) AsyncSendNewWitnessHash(hash, number inte return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AsyncSendNewWitnessHash", reflect.TypeOf((*MockWitnessPeer)(nil).AsyncSendNewWitnessHash), hash, number) } +// AsyncSendSignedWitnessAnnouncement mocks base method. +func (m *MockWitnessPeer) AsyncSendSignedWitnessAnnouncement(ann wit.SignedWitnessAnnouncement) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "AsyncSendSignedWitnessAnnouncement", ann) +} + +// AsyncSendSignedWitnessAnnouncement indicates an expected call of AsyncSendSignedWitnessAnnouncement. +func (mr *MockWitnessPeerMockRecorder) AsyncSendSignedWitnessAnnouncement(ann interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AsyncSendSignedWitnessAnnouncement", reflect.TypeOf((*MockWitnessPeer)(nil).AsyncSendSignedWitnessAnnouncement), ann) +} + // Close mocks base method. func (m *MockWitnessPeer) Close() { m.ctrl.T.Helper() diff --git a/eth/peerset.go b/eth/peerset.go index 43ec2e1832..4b9109f6fb 100644 --- a/eth/peerset.go +++ b/eth/peerset.go @@ -298,16 +298,33 @@ func (ps *peerSet) peer(id string) *ethPeer { return ps.peers[id] } +// getOnePeerWithWitness returns a candidate body source for `hash`. Body-known +// peers (those that broadcast or served the body) are preferred; if none is +// available we fall back to peers that have only relayed a WIT2 signed +// announcement. The fast-path latency win depends on this fallback: at hop>=2 +// the signed announce arrives long before the body broadcast, and the only +// peer that could serve us bytes is the one that forwarded the announce. +// +// Asking an announce-only peer is safe because byte-blame in +// witnessManager.verifyAgainstSignedHash only drops on a confirmed hash +// mismatch — empty/unavailable responses surface as soft failures, not drops. func (ps *peerSet) getOnePeerWithWitness(hash common.Hash) *ethPeer { ps.lock.RLock() defer ps.lock.RUnlock() + var announceFallback *ethPeer for _, p := range ps.peers { - if p.witPeer != nil && p.witPeer.Peer.KnownWitnessContainsHash(hash) { + if p.witPeer == nil { + continue + } + if p.witPeer.Peer.KnownWitnessContainsHash(hash) { return p } + if announceFallback == nil && p.witPeer.Peer.KnownAnnounceContainsHash(hash) { + announceFallback = p + } } - return nil + return announceFallback } // peersWithoutWitness retrives a list of peers that do nor have a given witness @@ -328,6 +345,32 @@ func (ps *peerSet) peersWithoutWitness(hash common.Hash) []*witPeer { return list } +// peersWithoutSignedAnnounce returns peers that have neither received the body +// for `hash` nor seen a signed announcement for it. Used by WIT2 relay to skip +// peers that already know about the announcement, preventing announce storms, +// without ever assuming a peer that only saw an announcement holds the body. +func (ps *peerSet) peersWithoutSignedAnnounce(hash common.Hash) []*witPeer { + ps.lock.RLock() + defer ps.lock.RUnlock() + + list := make([]*witPeer, 0, len(ps.peers)) + + for _, p := range ps.peers { + if p.witPeer == nil { + continue + } + if p.witPeer.Peer.KnownWitnessContainsHash(hash) { + continue + } + if p.witPeer.Peer.KnownAnnounceContainsHash(hash) { + continue + } + list = append(list, p.witPeer) + } + + return list +} + // peersWithoutBlock retrieves a list of peers that do not have a given block in // their set of known hashes so it might be propagated to them. func (ps *peerSet) peersWithoutBlock(hash common.Hash) []*ethPeer { diff --git a/eth/peerset_test.go b/eth/peerset_test.go index 16d4d9dc1b..1062644efe 100644 --- a/eth/peerset_test.go +++ b/eth/peerset_test.go @@ -6,6 +6,8 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/eth/protocols/eth" + "github.com/ethereum/go-ethereum/eth/protocols/wit" + "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p/enode" ) @@ -61,3 +63,73 @@ func TestPeerSetForgetTransactionsEmpty(t *testing.T) { // ForgetTransactions should not panic with no peers ps.ForgetTransactions([]common.Hash{{1}, {2}, {3}}) } + +// TestGetOnePeerWithWitnessPrefersBodyOverAnnounce locks in the WIT2 fast-path +// invariant: when at least one peer has the body (knownWitnesses) and another +// has only seen the signed announce (knownAnnounces), body-known wins. If a +// future change inverts this, fetchers will silently prefer slower sources. +func TestGetOnePeerWithWitnessPrefersBodyOverAnnounce(t *testing.T) { + t.Parallel() + + ps := newPeerSet() + defer ps.close() + + hash := common.HexToHash("0xabc") + + bodyPeer := newRegisteredPeerForTest(t, ps) + announcePeer := newRegisteredPeerForTest(t, ps) + + bodyPeer.witPeer.Peer.AddKnownWitness(hash) + announcePeer.witPeer.Peer.(*wit.Peer).AddKnownAnnounce(hash) + + got := ps.getOnePeerWithWitness(hash) + if got == nil { + t.Fatal("expected a candidate; got nil") + } + if got.ID() != bodyPeer.ID() { + t.Fatalf("body-known peer must win over announce-only: got %s want %s", + got.ID(), bodyPeer.ID()) + } +} + +// TestGetOnePeerWithWitnessFallsBackToAnnounce locks in the fix for the +// fast-path regression: when no peer has the body yet, the announce-known +// fallback IS selectable. Without this, a hop-2 stateless validator with a +// verified signed announce would have nothing to fetch from until the body +// broadcast finally arrived — eliminating the WIT2 latency win. +func TestGetOnePeerWithWitnessFallsBackToAnnounce(t *testing.T) { + t.Parallel() + + ps := newPeerSet() + defer ps.close() + + hash := common.HexToHash("0xdef") + + announcePeer := newRegisteredPeerForTest(t, ps) + announcePeer.witPeer.Peer.(*wit.Peer).AddKnownAnnounce(hash) + + got := ps.getOnePeerWithWitness(hash) + if got == nil { + t.Fatal("announce-only peer must be a fetch candidate after the WIT2 fast-path fix") + } + if got.ID() != announcePeer.ID() { + t.Fatalf("expected announce-only peer; got %s", got.ID()) + } +} + +func newRegisteredPeerForTest(t *testing.T, ps *peerSet) *ethPeer { + t.Helper() + var id enode.ID + rand.Read(id[:]) + _, net := p2p.MsgPipe() + t.Cleanup(func() { net.Close() }) + + p2pPeer := p2p.NewPeer(id, "fast-path-peer", nil) + ethP := eth.NewPeer(eth.ETH68, p2pPeer, net, nil) + witP := wit.NewPeer(wit.WIT2, p2pPeer, net, log.New()) + + if err := ps.registerPeer(ethP, nil, witP); err != nil { + t.Fatalf("register peer: %v", err) + } + return ps.peer(ethP.ID()) +} diff --git a/eth/protocols/wit/broadcast.go b/eth/protocols/wit/broadcast.go index 43d5a43b65..4fedeb4321 100644 --- a/eth/protocols/wit/broadcast.go +++ b/eth/protocols/wit/broadcast.go @@ -23,6 +23,13 @@ func (p *Peer) broadcastWitness() { } p.logger.Debug("propagated witness hashes", "hashes", packet.Hashes, "numbers", packet.Numbers) + case packet := <-p.queuedSignedAnns: + if err := p.sendSignedNewWitnessHashes(packet); err != nil { + log.Debug("failed to send signed witness announcements", "error", err) + return + } + p.logger.Debug("propagated signed witness announcements", "count", len(packet.Announcements)) + case <-p.term: return } diff --git a/eth/protocols/wit/handler.go b/eth/protocols/wit/handler.go index db4b99f656..ec424b629d 100644 --- a/eth/protocols/wit/handler.go +++ b/eth/protocols/wit/handler.go @@ -133,6 +133,16 @@ var wit1 = map[uint64]msgHandler{ WitnessMetadataMsg: handleWitnessMetadata, } +var wit2 = map[uint64]msgHandler{ + GetMsgWitness: handleGetWitness, + MsgWitness: handleWitness, + NewWitnessMsg: handleNewWitness, + NewWitnessHashesMsg: handleNewWitnessHashes, + GetWitnessMetadataMsg: handleGetWitnessMetadata, + WitnessMetadataMsg: handleWitnessMetadata, + SignedNewWitnessHashesMsg: handleSignedNewWitnessHashes, +} + // HandleMessage is invoked whenever an inbound message is received from a // remote peer on the `wit` protocol. The remote connection is torn down upon // returning any error. @@ -167,6 +177,8 @@ func handleMessage(backend Backend, peer *Peer) error { // Select the appropriate handler map based on protocol version var handlers map[uint64]msgHandler switch peer.Version() { + case WIT2: + handlers = wit2 case WIT1: handlers = wit1 case WIT0: diff --git a/eth/protocols/wit/handlers.go b/eth/protocols/wit/handlers.go index b7319d1869..46c77a5129 100644 --- a/eth/protocols/wit/handlers.go +++ b/eth/protocols/wit/handlers.go @@ -63,6 +63,32 @@ func handleNewWitnessHashes(backend Backend, msg Decoder, peer *Peer) error { return backend.Handle(peer, req) } +// MaxSignedAnnouncesPerPacket caps how many signed witness announcements a +// single SignedNewWitnessHashesPacket may carry. Each announcement triggers +// ecrecover and a header lookup downstream, so an unbounded packet is a cheap +// DoS vector. 64 matches maxQueuedWitnessAnns: the relay queue and the wire +// limit move together so a packet that fits the queue also fits the wire. +const MaxSignedAnnouncesPerPacket = 64 + +// handleSignedNewWitnessHashes processes a SignedNewWitnessHashesPacket from a +// peer (WIT2+). The packet is forwarded to the backend, which is responsible +// for signature verification, validator-set check, relay, and triggering the +// body fetch. We cap the announcement count at decode time so the backend +// never sees an unbounded packet. +func handleSignedNewWitnessHashes(backend Backend, msg Decoder, peer *Peer) error { + req := new(SignedNewWitnessHashesPacket) + if err := msg.Decode(&req); err != nil { + return fmt.Errorf("failed to decode SignedNewWitnessHashesPacket: %w", err) + } + if len(req.Announcements) == 0 { + return fmt.Errorf("invalid SignedNewWitnessHashesPacket: Announcements cannot be empty") + } + if len(req.Announcements) > MaxSignedAnnouncesPerPacket { + return fmt.Errorf("SignedNewWitnessHashesPacket exceeds cap: %d > %d", len(req.Announcements), MaxSignedAnnouncesPerPacket) + } + return backend.Handle(peer, req) +} + // handleGetWitnessMetadata processes a GetWitnessMetadataPacket request from a peer. func handleGetWitnessMetadata(backend Backend, msg Decoder, peer *Peer) error { // Decode the GetWitnessMetadataPacket request diff --git a/eth/protocols/wit/peer.go b/eth/protocols/wit/peer.go index 6008ad1dfd..c23d2e424c 100644 --- a/eth/protocols/wit/peer.go +++ b/eth/protocols/wit/peer.go @@ -21,8 +21,10 @@ const ( maxQueuedWitnesses = 10 // maxQueuedWitnessAnns is the maximum number of witness announcements to queue up before - // dropping broadcasts - maxQueuedWitnessAnns = 10 + // dropping broadcasts. Bumped from 10 to 64 in WIT2 to absorb transitive-relay bursts; + // each announcement is small (33 bytes per entry, 130 bytes signed) so the memory + // footprint stays well under 10KB per peer. + maxQueuedWitnessAnns = 64 ) // Peer is a collection of relevant information we have about a `wit` peer. @@ -35,9 +37,11 @@ type Peer struct { logger log.Logger // Contextual logger with the peer id injected - knownWitnesses *KnownCache // Set of witness hashes (`witness.Headers[0].Hash()`) known to be known by this peer - queuedWitness chan *stateless.Witness // Queue of witness to broadcast to this peer - queuedWitnessAnns chan *NewWitnessHashesPacket // Queue of witness announcements to this peer + knownWitnesses *KnownCache // Witness hashes this peer is known to HAVE (body served, body broadcast received). Feeds body-fetch peer selection. + knownAnnounces *KnownCache // Witness hashes this peer has SEEN an announcement for, but not necessarily the body. Used only to suppress redundant announce relay. + queuedWitness chan *stateless.Witness // Queue of witness to broadcast to this peer + queuedWitnessAnns chan *NewWitnessHashesPacket // Queue of unsigned witness announcements to this peer (WIT1) + queuedSignedAnns chan *SignedNewWitnessHashesPacket // Queue of signed witness announcements to this peer (WIT2) reqDispatch chan *request // Dispatch channel to send witness requests and track them until fulfillment reqCancel chan *cancel // Dispatch channel to cancel pending witness requests @@ -56,8 +60,10 @@ func NewPeer(version uint, p *p2p.Peer, rw p2p.MsgReadWriter, logger log.Logger) version: version, logger: logger.With("peer", id), knownWitnesses: newKnownCache(maxKnownWitnesses), + knownAnnounces: newKnownCache(maxKnownWitnesses), queuedWitness: make(chan *stateless.Witness, maxQueuedWitnesses), queuedWitnessAnns: make(chan *NewWitnessHashesPacket, maxQueuedWitnessAnns), + queuedSignedAnns: make(chan *SignedNewWitnessHashesPacket, maxQueuedWitnessAnns), reqDispatch: make(chan *request), reqCancel: make(chan *cancel), resDispatch: make(chan *response), @@ -86,6 +92,12 @@ func (p *Peer) sendNewWitnessHashes(packet *NewWitnessHashesPacket) error { return p2p.Send(p.rw, NewWitnessHashesMsg, packet) } +// sendSignedNewWitnessHashes sends signed witness announcements to the peer. +// Only valid for WIT2+ peers; the caller must check Version() before invoking. +func (p *Peer) sendSignedNewWitnessHashes(packet *SignedNewWitnessHashesPacket) error { + return p2p.Send(p.rw, SignedNewWitnessHashesMsg, packet) +} + // AsyncSendNewWitness queues an entire witness for broadcast to the peer. The // witness will be sent in the background to avoid blocking the caller. If the // queue is full, the witness will be dropped. @@ -116,6 +128,26 @@ func (p *Peer) AsyncSendNewWitnessHash(hash common.Hash, number uint64) { } } +// AsyncSendSignedWitnessAnnouncement queues a BP-signed witness announcement +// for broadcast to the peer. The peer must speak WIT2 or higher; callers are +// responsible for checking Version(). The block hash is added to the peer's +// announce-known set (NOT the body-known set) so subsequent announce gossip +// on the same hash is suppressed, while body-fetch peer selection is not +// misled into asking this peer for bytes it does not yet have. +func (p *Peer) AsyncSendSignedWitnessAnnouncement(ann SignedWitnessAnnouncement) { + if p.version < WIT2 { + return + } + select { + case p.queuedSignedAnns <- &SignedNewWitnessHashesPacket{ + Announcements: []SignedWitnessAnnouncement{ann}, + }: + p.knownAnnounces.Add(ann.BlockHash) + default: + p.logger.Debug("Dropped signed witness announcement.", "blockHash", ann.BlockHash, "peer", p.id) + } +} + // RequestWitness sends a request to the peer for witnesses by witness pages. func (p *Peer) RequestWitness(witnessPages []WitnessPageRequest, sink chan *Response) (*Request, error) { log.Debug("Requesting witnesses", "peer", p.id, "count", len(witnessPages)) @@ -194,6 +226,20 @@ func (p *Peer) AddKnownWitness(hash common.Hash) { p.knownWitnesses.Add(hash) } +// AddKnownAnnounce records that this peer has seen the signed announcement for +// `hash`, without claiming the peer holds the body. Used to suppress redundant +// announce-relay only; body-fetch peer selection ignores this set. +func (p *Peer) AddKnownAnnounce(hash common.Hash) { + p.knownAnnounces.Add(hash) +} + +// KnownAnnounceContainsHash reports whether this peer is known to have seen an +// announcement for `hash` (either inbound or outbound). False does not imply +// the peer is unaware — only that this side has no record. +func (p *Peer) KnownAnnounceContainsHash(hash common.Hash) bool { + return p.knownAnnounces.hashes.Contains(hash) +} + // KnownWitnessesCount returns the number of known witness. func (p *Peer) KnownWitnessesCount() int { return p.knownWitnesses.Cardinality() diff --git a/eth/protocols/wit/protocol.go b/eth/protocols/wit/protocol.go index c8238062c7..3d7cc897f9 100644 --- a/eth/protocols/wit/protocol.go +++ b/eth/protocols/wit/protocol.go @@ -1,16 +1,24 @@ package wit import ( + "encoding/binary" "errors" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/crypto" ) // Constants to match up protocol versions and messages const ( WIT0 = 1 WIT1 = 2 + // WIT2 adds BP-signed witness announcements, allowing peers to fast-validate + // announces via signature recovery (microseconds) instead of full block + // execution (~500ms). Signed announces are safe to relay transitively + // because byte-correctness is verified at fetch time against the signed + // witness hash; content-correctness blame attaches to the BP signer. + WIT2 = 3 ) // ProtocolName is the official short name of the `wit` protocol used during @@ -19,24 +27,36 @@ const ProtocolName = "wit" // ProtocolVersions are the supported versions of the `wit` protocol (first // is primary). -var ProtocolVersions = []uint{WIT1, WIT0} +var ProtocolVersions = []uint{WIT2, WIT1, WIT0} // protocolLengths are the number of implemented message corresponding to // different protocol versions. -var protocolLengths = map[uint]uint64{WIT1: 6, WIT0: 4} +var protocolLengths = map[uint]uint64{WIT2: 7, WIT1: 6, WIT0: 4} // maxMessageSize is the maximum cap on the size of a protocol message. const maxMessageSize = 16 * 1024 * 1024 const ( - NewWitnessMsg = 0x00 - NewWitnessHashesMsg = 0x01 - GetMsgWitness = 0x02 - MsgWitness = 0x03 - GetWitnessMetadataMsg = 0x04 - WitnessMetadataMsg = 0x05 + NewWitnessMsg = 0x00 + NewWitnessHashesMsg = 0x01 + GetMsgWitness = 0x02 + MsgWitness = 0x03 + GetWitnessMetadataMsg = 0x04 + WitnessMetadataMsg = 0x05 + SignedNewWitnessHashesMsg = 0x06 // WIT2: signed witness announcement, safe to relay ) +// SignatureLength is the length of a BP signature over a witness announcement (r||s||v). +const SignatureLength = 65 + +// witnessAnnounceDomainTag is a unique prefix mixed into the signing digest so a +// signature produced for a WIT2 announcement cannot be replayed in any other +// context that signs 32-byte digests under the BP's signing key (block sealing, +// future signed messages, etc.). Cross-context replay is structurally +// impossible rather than only computationally hard, even if a future caller +// happens to share the same signFn mimetype. +var witnessAnnounceDomainTag = []byte("bor-wit2-announce\x00") + var ( errMsgTooLarge = errors.New("message too long") errDecode = errors.New("invalid message") @@ -91,6 +111,29 @@ type NewWitnessHashesPacket struct { Numbers []uint64 } +// SignedWitnessAnnouncement is a BP-authenticated commitment to the existence +// of a specific witness for a specific block. The signer commits to: +// +// keccak256(BlockHash || BlockNumber || WitnessHash) +// +// Receivers verify the signature with ecrecover and check that the recovered +// address is the validator scheduled for BlockNumber. Once verified, the +// announcement is safe to relay to other peers without local execution; any +// downstream receiver re-verifies independently. Bytes returned by a serving +// peer are checked against WitnessHash, so byte-correctness blame attaches to +// the server while content-correctness (state-root) blame attaches to the BP. +type SignedWitnessAnnouncement struct { + BlockHash common.Hash + BlockNumber uint64 + WitnessHash common.Hash // WIT2 chunked-aggregate commitment over canonical witness RLP; see core/stateless.WitnessCommitHash + Signature []byte // 65-byte secp256k1 signature +} + +// SignedNewWitnessHashesPacket carries one or more signed witness announcements. +type SignedNewWitnessHashesPacket struct { + Announcements []SignedWitnessAnnouncement +} + // GetWitnessMetadataRequest represents a request for witness metadata (just page count, no data) type GetWitnessMetadataRequest struct { Hashes []common.Hash // Block hashes to get metadata for @@ -129,6 +172,34 @@ func (w *NewWitnessPacket) Kind() byte { return NewWitnessMsg } func (w *NewWitnessHashesPacket) Name() string { return "NewWitnessHashes" } func (w *NewWitnessHashesPacket) Kind() byte { return NewWitnessHashesMsg } +func (w *SignedNewWitnessHashesPacket) Name() string { return "SignedNewWitnessHashes" } +func (w *SignedNewWitnessHashesPacket) Kind() byte { return SignedNewWitnessHashesMsg } + +// WitnessAnnouncementSigningPreImage returns the unhashed bytes a BP signs to +// authenticate a witness announcement. Production signing flows (clef, +// keystoreWallet.SignData) hash their input once before signing, so callers +// MUST pass this preimage, not WitnessAnnouncementSigningHash. The verifier +// independently computes WitnessAnnouncementSigningHash (= keccak256 of this +// preimage) and ecrecovers against it. Mismatching hash-vs-preimage between +// signer and verifier silently breaks every WIT2 signature, hence the split. +func WitnessAnnouncementSigningPreImage(blockHash common.Hash, blockNumber uint64, witnessHash common.Hash) []byte { + const fixedLen = common.HashLength + 8 + common.HashLength + buf := make([]byte, len(witnessAnnounceDomainTag)+fixedLen) + n := copy(buf, witnessAnnounceDomainTag) + copy(buf[n:], blockHash[:]) + binary.BigEndian.PutUint64(buf[n+common.HashLength:], blockNumber) + copy(buf[n+common.HashLength+8:], witnessHash[:]) + return buf +} + +// WitnessAnnouncementSigningHash returns the digest a BP signs to authenticate +// a witness announcement. Must be byte-identical on both signer and verifier. +// Used by the verifier; signers must instead feed the preimage into the wallet +// SignData path, which keccaks once internally. +func WitnessAnnouncementSigningHash(blockHash common.Hash, blockNumber uint64, witnessHash common.Hash) common.Hash { + return crypto.Keccak256Hash(WitnessAnnouncementSigningPreImage(blockHash, blockNumber, witnessHash)) +} + func (w *GetWitnessMetadataRequest) Name() string { return "GetWitnessMetadata" } func (w *GetWitnessMetadataRequest) Kind() byte { return GetWitnessMetadataMsg } diff --git a/eth/protocols/wit/protocol_wit2_test.go b/eth/protocols/wit/protocol_wit2_test.go new file mode 100644 index 0000000000..c2ad7f33a9 --- /dev/null +++ b/eth/protocols/wit/protocol_wit2_test.go @@ -0,0 +1,91 @@ +package wit + +import ( + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// TestWitnessAnnouncementSigningHashStable pins the digest format. If this +// changes, every signed announcement on the network breaks at once — bump the +// protocol version explicitly. The test value is recomputed independently to +// catch silent reordering of the concatenation. +func TestWitnessAnnouncementSigningHashStable(t *testing.T) { + blockHash := common.HexToHash("0x1111111111111111111111111111111111111111111111111111111111111111") + blockNumber := uint64(0x0102030405060708) + witnessHash := common.HexToHash("0x2222222222222222222222222222222222222222222222222222222222222222") + + got := WitnessAnnouncementSigningHash(blockHash, blockNumber, witnessHash) + + // Manual recomposition: domain-tag || blockHash || blockNumber (big-endian u64) || witnessHash + want := crypto.Keccak256Hash( + witnessAnnounceDomainTag, + blockHash.Bytes(), + []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, + witnessHash.Bytes(), + ) + if got != want { + t.Fatalf("signing-hash format drift: got %s want %s", got.Hex(), want.Hex()) + } +} + +// TestWitnessAnnouncementSigningHashDomainSeparated guards that the witness +// announce digest cannot collide with a raw 3-field concatenation lacking the +// domain tag. This is the structural check that a header-seal signature, or +// any other future SignBytes context, cannot be replayed as a wit2 announce. +func TestWitnessAnnouncementSigningHashDomainSeparated(t *testing.T) { + blockHash := common.HexToHash("0xaa") + blockNumber := uint64(7) + witnessHash := common.HexToHash("0xbb") + + withTag := WitnessAnnouncementSigningHash(blockHash, blockNumber, witnessHash) + withoutTag := crypto.Keccak256Hash( + blockHash.Bytes(), + []byte{0, 0, 0, 0, 0, 0, 0, 7}, + witnessHash.Bytes(), + ) + if withTag == withoutTag { + t.Fatalf("domain tag absent: digests collide, replay across signing contexts is possible") + } +} + +// TestWitnessAnnouncementSigningHashSensitive ensures every input field is +// covered by the digest — flipping any byte in any input must change the hash. +// Catches a bug where a refactor silently drops a field from the digest. +func TestWitnessAnnouncementSigningHashSensitive(t *testing.T) { + base := WitnessAnnouncementSigningHash( + common.HexToHash("0xaa"), + 1, + common.HexToHash("0xbb"), + ) + cases := []struct { + name string + blockH common.Hash + num uint64 + witnessH common.Hash + }{ + {"different blockHash", common.HexToHash("0xab"), 1, common.HexToHash("0xbb")}, + {"different blockNumber", common.HexToHash("0xaa"), 2, common.HexToHash("0xbb")}, + {"different witnessHash", common.HexToHash("0xaa"), 1, common.HexToHash("0xbc")}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := WitnessAnnouncementSigningHash(tc.blockH, tc.num, tc.witnessH); got == base { + t.Fatalf("digest unchanged when %s differed", tc.name) + } + }) + } +} + +// TestProtocolVersionsContainsWIT2 guards the handshake advertising. WIT2 must +// be advertised first (preferred) for new connections. If WIT1 ever leaks +// ahead of WIT2, peers downgrade silently and the fast path stops working. +func TestProtocolVersionsContainsWIT2(t *testing.T) { + if len(ProtocolVersions) == 0 || ProtocolVersions[0] != WIT2 { + t.Fatalf("expected WIT2 first in ProtocolVersions, got %v", ProtocolVersions) + } + if protocolLengths[WIT2] != 7 { + t.Fatalf("WIT2 protocolLengths must be 7 (one new message added), got %d", protocolLengths[WIT2]) + } +} From 6aef74aa874880dab7f819c5fc957eeddb1e16bf Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Thu, 30 Apr 2026 17:29:44 -0300 Subject: [PATCH 02/14] =?UTF-8?q?wit2:=20address=20lint=20=E2=80=94=20drop?= =?UTF-8?q?=20unused=20symbols,=20run=20goimports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - eth/handler_wit2.go: remove unused errInvalidSigner, contextBackground, wit2SpanLookupMissMeter, and now-unused context import - core/stateless/witness_commit_bench_test.go: drop redundant c := c loop-var copies (Go 1.22+ copyloopvar) - goimports formatting on accounts/accounts.go, witness_commit_bench_test.go, witness_commit_helpers_test.go, eth/fetcher/witness_manager.go, eth/fetcher/witness_manager_wit2_test.go, eth/handler_wit2.go, eth/protocols/wit/protocol.go --- accounts/accounts.go | 10 ++-- core/stateless/witness_commit_bench_test.go | 14 +++--- core/stateless/witness_commit_helpers_test.go | 1 + eth/fetcher/witness_manager.go | 50 +++++++++---------- eth/fetcher/witness_manager_wit2_test.go | 1 - eth/handler_wit2.go | 29 ++++------- eth/protocols/wit/protocol.go | 14 +++--- 7 files changed, 55 insertions(+), 64 deletions(-) diff --git a/accounts/accounts.go b/accounts/accounts.go index c775864939..2a81266f5e 100644 --- a/accounts/accounts.go +++ b/accounts/accounts.go @@ -37,12 +37,12 @@ type Account struct { } const ( - MimetypeDataWithValidator = "data/validator" - MimetypeTypedData = "data/typed" - MimetypeClique = "application/x-clique-header" - MimetypeBor = "application/x-bor-header" + MimetypeDataWithValidator = "data/validator" + MimetypeTypedData = "data/typed" + MimetypeClique = "application/x-clique-header" + MimetypeBor = "application/x-bor-header" MimetypeBorWitnessAnnounce = "application/x-bor-wit2-announce" - MimetypeTextPlain = "text/plain" + MimetypeTextPlain = "text/plain" ) // Wallet represents a software or hardware wallet that might contain one or more diff --git a/core/stateless/witness_commit_bench_test.go b/core/stateless/witness_commit_bench_test.go index 751a78af3c..c27796f522 100644 --- a/core/stateless/witness_commit_bench_test.go +++ b/core/stateless/witness_commit_bench_test.go @@ -118,15 +118,14 @@ func BenchmarkCommit_C_PerNodeMerkle(b *testing.B) { func BenchmarkCommit_B_ChunkSize(b *testing.B) { pw := prepareWitness(b, 50) chunks := []int{ - 512 * 1024, // 512 KiB - 1 * 1024 * 1024, // 1 MiB - 2 * 1024 * 1024, // 2 MiB - 4 * 1024 * 1024, // 4 MiB - 8 * 1024 * 1024, // 8 MiB - 15 * 1024 * 1024, // 15 MiB (current wire page) + 512 * 1024, // 512 KiB + 1 * 1024 * 1024, // 1 MiB + 2 * 1024 * 1024, // 2 MiB + 4 * 1024 * 1024, // 4 MiB + 8 * 1024 * 1024, // 8 MiB + 15 * 1024 * 1024, // 15 MiB (current wire page) } for _, c := range chunks { - c := c b.Run(fmt.Sprintf("chunk=%dKiB/cores=8", c>>10), func(b *testing.B) { b.SetBytes(int64(len(pw.rlpBytes))) b.ReportAllocs() @@ -139,7 +138,6 @@ func BenchmarkCommit_B_ChunkSize(b *testing.B) { // Also try cores=12 (all logical cores) at the smallest chunks to // see if the M4 Pro's E-cores help at finer granularity. for _, c := range []int{512 * 1024, 1 * 1024 * 1024, 2 * 1024 * 1024} { - c := c b.Run(fmt.Sprintf("chunk=%dKiB/cores=12", c>>10), func(b *testing.B) { b.SetBytes(int64(len(pw.rlpBytes))) b.ReportAllocs() diff --git a/core/stateless/witness_commit_helpers_test.go b/core/stateless/witness_commit_helpers_test.go index ae30d6204b..c7ea372c47 100644 --- a/core/stateless/witness_commit_helpers_test.go +++ b/core/stateless/witness_commit_helpers_test.go @@ -209,6 +209,7 @@ func merkleRoot(leaves []common.Hash) common.Hash { // walker's per-node hash work is amortized into existing state-prep. // - The walker traversal is O(num_nodes × avg_refs_per_node) map lookups, // dwarfed by keccak throughput on the underlying bytes. +// // We measure D's incremental cost over the chain-prep baseline as just the // parallel keccak phase. The reachability walk lives in // candidateD_IntrinsicWalk for the correctness test below. diff --git a/eth/fetcher/witness_manager.go b/eth/fetcher/witness_manager.go index f48722f4a0..1ae1f2b215 100644 --- a/eth/fetcher/witness_manager.go +++ b/eth/fetcher/witness_manager.go @@ -77,15 +77,15 @@ type cacheWitnessForServingFn func(blockHash common.Hash, witnessBytes []byte, w // for blocks, isolating it from the main BlockFetcher loop. type witnessManager struct { // Parent fetcher fields/methods required - parentQuit <-chan struct{} // Parent fetcher's quit channel - parentDropPeer peerDropFn // Function to drop a misbehaving peer - parentJailPeer peerJailFn // Function to jail a peer to prevent reconnection (optional) - parentEnqueueCh chan<- *enqueueRequest // Channel to send completed blocks+witnesses back - parentGetBlock blockRetrievalFn // Function to check if block is known locally - parentGetHeader HeaderRetrievalFn // Function to check if header is known locally (needed for checks) - parentChainHeight chainHeightFn // Retrieve chain height for distance checks - parentCurrentHeader currentHeaderFn // Retrieve current block header for gas limit - parentSignedWitnessHash signedWitnessHashFn // WIT2: lookup a BP-signed witness hash for byte-correctness verification + parentQuit <-chan struct{} // Parent fetcher's quit channel + parentDropPeer peerDropFn // Function to drop a misbehaving peer + parentJailPeer peerJailFn // Function to jail a peer to prevent reconnection (optional) + parentEnqueueCh chan<- *enqueueRequest // Channel to send completed blocks+witnesses back + parentGetBlock blockRetrievalFn // Function to check if block is known locally + parentGetHeader HeaderRetrievalFn // Function to check if header is known locally (needed for checks) + parentChainHeight chainHeightFn // Retrieve chain height for distance checks + parentCurrentHeader currentHeaderFn // Retrieve current block header for gas limit + parentSignedWitnessHash signedWitnessHashFn // WIT2: lookup a BP-signed witness hash for byte-correctness verification parentCacheWitnessForServing cacheWitnessForServingFn // WIT2: hand bytes to the handler for pre-import serving by peers // Witness-specific state @@ -137,24 +137,24 @@ func newWitnessManager( ) m := &witnessManager{ - parentQuit: parentQuit, - parentDropPeer: parentDropPeer, - parentJailPeer: parentJailPeer, - parentEnqueueCh: parentEnqueueCh, - parentGetBlock: parentGetBlock, - parentGetHeader: parentGetHeader, - parentChainHeight: parentChainHeight, - parentCurrentHeader: parentCurrentHeader, + parentQuit: parentQuit, + parentDropPeer: parentDropPeer, + parentJailPeer: parentJailPeer, + parentEnqueueCh: parentEnqueueCh, + parentGetBlock: parentGetBlock, + parentGetHeader: parentGetHeader, + parentChainHeight: parentChainHeight, + parentCurrentHeader: parentCurrentHeader, parentSignedWitnessHash: parentSignedWitnessHash, parentCacheWitnessForServing: parentCacheWitnessForServing, - pending: make(map[common.Hash]*witnessRequestState), - witnessUnavailable: make(map[common.Hash]time.Time), - witnessCache: witnessCache, - gasCeil: gasCeil, - injectNeedWitnessCh: make(chan *injectBlockNeedWitnessMsg, 10), - injectWitnessCh: make(chan *injectedWitnessMsg, 10), - witnessTimer: time.NewTimer(0), - pokeCh: make(chan struct{}, 1), + pending: make(map[common.Hash]*witnessRequestState), + witnessUnavailable: make(map[common.Hash]time.Time), + witnessCache: witnessCache, + gasCeil: gasCeil, + injectNeedWitnessCh: make(chan *injectBlockNeedWitnessMsg, 10), + injectWitnessCh: make(chan *injectedWitnessMsg, 10), + witnessTimer: time.NewTimer(0), + pokeCh: make(chan struct{}, 1), } m.stopAndDrainTimer() return m diff --git a/eth/fetcher/witness_manager_wit2_test.go b/eth/fetcher/witness_manager_wit2_test.go index 71aad5518f..e621d276a5 100644 --- a/eth/fetcher/witness_manager_wit2_test.go +++ b/eth/fetcher/witness_manager_wit2_test.go @@ -232,7 +232,6 @@ func TestProcessWitnessResponseSkipsCheckWhenNoSignature(t *testing.T) { } } - // TestCacheVerifiedWitnessSkipsWhenNoSignedHash is the regression for the // blame-asymmetry bug: caching unverified bytes for serving means a downstream // peer would ask us for the body, get bytes that don't match THEIR BP-signed diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go index e320d4414c..cd5e1d446c 100644 --- a/eth/handler_wit2.go +++ b/eth/handler_wit2.go @@ -1,7 +1,6 @@ package eth import ( - "context" "errors" "sync" "time" @@ -17,22 +16,16 @@ import ( "github.com/ethereum/go-ethereum/metrics" ) -var ( - errInvalidSignatureLength = errors.New("invalid wit2 announce signature length") - errInvalidSigner = errors.New("wit2 announce signer is not a current validator") -) - -func contextBackground() context.Context { return context.Background() } +var errInvalidSignatureLength = errors.New("invalid wit2 announce signature length") // Metrics for WIT2 signed-announce path. Emitted only when metrics are enabled. var ( - wit2RelayInMeter = metrics.NewRegisteredMeter("eth/wit2/announce/relay_in", nil) - wit2RelayOutMeter = metrics.NewRegisteredMeter("eth/wit2/announce/relay_out", nil) - wit2InvalidSigMeter = metrics.NewRegisteredMeter("eth/wit2/announce/invalid_sig", nil) - wit2NotValidatorMeter = metrics.NewRegisteredMeter("eth/wit2/announce/not_validator", nil) - wit2SpanLookupMissMeter = metrics.NewRegisteredMeter("eth/wit2/announce/span_lookup_miss", nil) - wit2DuplicateMeter = metrics.NewRegisteredMeter("eth/wit2/announce/duplicate", nil) - wit2BroadcastByteMismatchMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_byte_mismatch", nil) + wit2RelayInMeter = metrics.NewRegisteredMeter("eth/wit2/announce/relay_in", nil) + wit2RelayOutMeter = metrics.NewRegisteredMeter("eth/wit2/announce/relay_out", nil) + wit2InvalidSigMeter = metrics.NewRegisteredMeter("eth/wit2/announce/invalid_sig", nil) + wit2NotValidatorMeter = metrics.NewRegisteredMeter("eth/wit2/announce/not_validator", nil) + wit2DuplicateMeter = metrics.NewRegisteredMeter("eth/wit2/announce/duplicate", nil) + wit2BroadcastByteMismatchMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_byte_mismatch", nil) wit2BroadcastUnverifiedSkippedMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_unverified_skipped", nil) wit2HeaderUnknownMeter = metrics.NewRegisteredMeter("eth/wit2/announce/header_unknown", nil) wit2ConflictingWitnessHashMeter = metrics.NewRegisteredMeter("eth/wit2/announce/conflicting_witness_hash", nil) @@ -58,10 +51,10 @@ const ( // Lifecycle is tied to the eth handler's peer registration; entries are // cleaned up when the peer disconnects. type peerWit2State struct { - tokens float64 - lastRefill time.Time - strikeCount int - firstStrikeAt time.Time + tokens float64 + lastRefill time.Time + strikeCount int + firstStrikeAt time.Time } type peerWit2Tracker struct { diff --git a/eth/protocols/wit/protocol.go b/eth/protocols/wit/protocol.go index 3d7cc897f9..4c8d09ed2d 100644 --- a/eth/protocols/wit/protocol.go +++ b/eth/protocols/wit/protocol.go @@ -37,13 +37,13 @@ var protocolLengths = map[uint]uint64{WIT2: 7, WIT1: 6, WIT0: 4} const maxMessageSize = 16 * 1024 * 1024 const ( - NewWitnessMsg = 0x00 - NewWitnessHashesMsg = 0x01 - GetMsgWitness = 0x02 - MsgWitness = 0x03 - GetWitnessMetadataMsg = 0x04 - WitnessMetadataMsg = 0x05 - SignedNewWitnessHashesMsg = 0x06 // WIT2: signed witness announcement, safe to relay + NewWitnessMsg = 0x00 + NewWitnessHashesMsg = 0x01 + GetMsgWitness = 0x02 + MsgWitness = 0x03 + GetWitnessMetadataMsg = 0x04 + WitnessMetadataMsg = 0x05 + SignedNewWitnessHashesMsg = 0x06 // WIT2: signed witness announcement, safe to relay ) // SignatureLength is the length of a BP signature over a witness announcement (r||s||v). From 4fdc09c65e967839f4277421e8328153b7d66fb7 Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Thu, 30 Apr 2026 17:35:02 -0300 Subject: [PATCH 03/14] =?UTF-8?q?wit2:=20address=20review=20=E2=80=94=20re?= =?UTF-8?q?use=20encoded=20body,=20skip=20WIT1=20encode,=20fix=20peer=20dr?= =?UTF-8?q?op?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - eth/fetcher/witness_manager.go: verifyAgainstSignedHash now returns the canonically-encoded body and signed hash on success, so the pre-import serving cache no longer re-encodes the same witness (~14 ms saved per verified fetch on 50 MiB witnesses). cacheVerifiedWitnessForServing takes the precomputed body directly. - eth/fetcher/witness_manager.go: local EncodeRLP failure inside verifyAgainstSignedHash no longer drops the peer — re-encoding bytes the peer already delivered as valid RLP is a local invariant violation, not peer misbehavior. Mirrors the pattern already used by the cache path. - eth/handler_wit.go: hoist signedWitnesses.get(hash) above the EncodeRLP + WitnessCommitHash work in handleBroadcastWitness. WIT1 broadcasts (no signed announcement on file) used to pay the full encode+hash cost only to discard the result; now they short-circuit. - eth/fetcher/witness_manager_wit2_test.go: rename + retarget the no-signed-hash regression test onto verifyAgainstSignedHash, where the invariant now lives. --- eth/fetcher/witness_manager.go | 86 +++++++++--------------- eth/fetcher/witness_manager_wit2_test.go | 27 +++++--- eth/handler_wit.go | 48 +++++++------ 3 files changed, 74 insertions(+), 87 deletions(-) diff --git a/eth/fetcher/witness_manager.go b/eth/fetcher/witness_manager.go index 1ae1f2b215..ca73a54900 100644 --- a/eth/fetcher/witness_manager.go +++ b/eth/fetcher/witness_manager.go @@ -668,87 +668,67 @@ func (m *witnessManager) processWitnessResponse(peer string, hash common.Hash, r // file for this block, the encoded witness bytes must hash to the // signed witnessHash. State-root failures (content-correctness) are // handled later in the import path and do NOT drop the server. - if !m.verifyAgainstSignedHash(peer, hash, witness[0]) { + body, witnessHash, ok := m.verifyAgainstSignedHash(peer, hash, witness[0]) + if !ok { return } // WIT2: hand the verified bytes to the handler for pre-import serving. // Done before import-side enqueue so a peer asking us for the body // during the chain-write window gets bytes from the in-flight cache - // rather than empty results. - m.cacheVerifiedWitnessForServing(hash, witness[0]) + // rather than empty results. body is nil on the WIT1 path (no signed + // hash on file) — cacheVerifiedWitnessForServing no-ops in that case. + m.cacheVerifiedWitnessForServing(hash, body, witnessHash) metrics.RecordPerItemDuration(blockWitnessItemDownloadTimer, res.Time, 1) m.handleWitnessFetchSuccess(peer, hash, witness[0], announcedAt) } -// cacheVerifiedWitnessForServing canonical-encodes the witness and forwards -// the bytes to the handler so other peers can fetch them pre-import. No-op -// when no cache callback is configured (legacy WIT1-only paths) or when no -// BP-signed witness hash is on file for this block — without a signature we -// cannot prove byte-correctness to downstream peers, mirroring the same -// guard that handleWitnessBroadcast applies on the broadcast path. EncodeRLP -// failure is logged but does not drop the server — failure to share is not -// a peer's fault and the import path is unaffected. -func (m *witnessManager) cacheVerifiedWitnessForServing(blockHash common.Hash, witness *stateless.Witness) { - if m.parentCacheWitnessForServing == nil || witness == nil { - return - } - if m.parentSignedWitnessHash == nil { - return - } - if _, has := m.parentSignedWitnessHash(blockHash); !has { +// cacheVerifiedWitnessForServing forwards canonical-encoded witness bytes +// (already verified against a BP-signed witness hash by the caller) to the +// handler so other peers can fetch them pre-import. No-op when no cache +// callback is configured (legacy WIT1-only paths) or when body is empty — +// the latter signals the WIT1 path with no signed hash on file, where +// caching unverified bytes would expose us to byte-blame from downstream +// peers. +func (m *witnessManager) cacheVerifiedWitnessForServing(blockHash common.Hash, body []byte, witnessHash common.Hash) { + if m.parentCacheWitnessForServing == nil || len(body) == 0 { return } - var buf bytes.Buffer - if err := witness.EncodeRLP(&buf); err != nil { - log.Warn("[wm] Failed to encode witness for pre-import serving cache", "hash", blockHash, "err", err) - return - } - body := buf.Bytes() - m.parentCacheWitnessForServing(blockHash, body, stateless.WitnessCommitHash(body)) + m.parentCacheWitnessForServing(blockHash, body, witnessHash) } -// verifyAgainstSignedHash returns false (and reports the failure to the -// fetch-failure handler, which drops the peer) when a BP-signed witness hash -// is on file for this block and the received witness's encoded bytes don't -// hash to it. Returns true when no signed hash is on file (WIT1 path) or the -// hash matches. -func (m *witnessManager) verifyAgainstSignedHash(peer string, hash common.Hash, witness *stateless.Witness) bool { +// verifyAgainstSignedHash returns the canonically-encoded witness bytes and +// the BP-signed witness hash they match, when a signed hash is on file and +// verification succeeds. body is nil on the WIT1 path (no signed hash to +// verify against) so callers can skip the pre-import serving cache. ok is +// false when verification fails; the offending peer has already been +// reported. Local EncodeRLP failure on a successfully-decoded witness is +// the local node's bug, not peer misbehavior, so it does not drop the peer. +func (m *witnessManager) verifyAgainstSignedHash(peer string, hash common.Hash, witness *stateless.Witness) (body []byte, witnessHash common.Hash, ok bool) { if m.parentSignedWitnessHash == nil { - return true + return nil, common.Hash{}, true } expected, has := m.parentSignedWitnessHash(hash) if !has { - return true + return nil, common.Hash{}, true } - actual, err := encodedWitnessHash(witness) - if err != nil { + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { log.Warn("[wm] Failed to encode received witness for hash check", "peer", peer, "hash", hash, "err", err) - m.handleWitnessFetchFailureExt(hash, peer, fmt.Errorf("witness encode failed: %w", err), false) - return false + m.handleWitnessFetchFailureExt(hash, "", fmt.Errorf("witness encode failed: %w", err), false) + return nil, common.Hash{}, false } + encoded := buf.Bytes() + actual := stateless.WitnessCommitHash(encoded) if actual != expected { witnessByteMismatchMeter.Mark(1) log.Warn("[wm] Witness bytes do not match BP-signed hash; dropping peer", "peer", peer, "block", hash, "expected", expected, "actual", actual) m.handleWitnessFetchFailureExt(hash, peer, errors.New("witness hash mismatch"), false) - return false - } - return true -} - -// encodedWitnessHash returns keccak256 over the canonical RLP encoding of the -// witness. Witness.EncodeRLP sorts state nodes lexicographically so the output -// is byte-identical for any two witnesses with the same logical contents, -// which is what makes BP-signed witness-hash verification work across nodes. -// The producer side mirrors this through eth.handler.canonicalWitnessHash. -func encodedWitnessHash(w *stateless.Witness) (common.Hash, error) { - var buf bytes.Buffer - if err := w.EncodeRLP(&buf); err != nil { - return common.Hash{}, err + return nil, common.Hash{}, false } - return stateless.WitnessCommitHash(buf.Bytes()), nil + return encoded, expected, true } // handleWitnessFetchSuccess processes a successfully fetched witness. diff --git a/eth/fetcher/witness_manager_wit2_test.go b/eth/fetcher/witness_manager_wit2_test.go index e621d276a5..bab8524162 100644 --- a/eth/fetcher/witness_manager_wit2_test.go +++ b/eth/fetcher/witness_manager_wit2_test.go @@ -232,13 +232,14 @@ func TestProcessWitnessResponseSkipsCheckWhenNoSignature(t *testing.T) { } } -// TestCacheVerifiedWitnessSkipsWhenNoSignedHash is the regression for the -// blame-asymmetry bug: caching unverified bytes for serving means a downstream -// peer would ask us for the body, get bytes that don't match THEIR BP-signed -// hash (because we never had one to compare against), and drop us. The fix -// gates serving-cache population on having a BP-signed hash on file — -// mirroring the broadcast path's invariant. -func TestCacheVerifiedWitnessSkipsWhenNoSignedHash(t *testing.T) { +// TestVerifyAgainstSignedHashSkipsEncodeWhenNoSignedHash is the regression +// for the blame-asymmetry bug: caching unverified bytes for serving means a +// downstream peer would ask us for the body, get bytes that don't match THEIR +// BP-signed hash (because we never had one to compare against), and drop us. +// The fix gates serving-cache population on having a BP-signed hash on file — +// verifyAgainstSignedHash returns body=nil on the WIT1 path, and the caller +// short-circuits the cache call (no-op when body is empty). +func TestVerifyAgainstSignedHashSkipsEncodeWhenNoSignedHash(t *testing.T) { tw := newTestWitnessManager() defer tw.Close() @@ -250,12 +251,20 @@ func TestCacheVerifiedWitnessSkipsWhenNoSignedHash(t *testing.T) { tw.manager.parentCacheWitnessForServing = func(common.Hash, []byte, common.Hash) { cacheCalls++ } - // No signed hash on file for any block → cache must not be populated. + // No signed hash on file for any block → verification must return + // body=nil so the caller skips the cache. tw.manager.parentSignedWitnessHash = func(common.Hash) (common.Hash, bool) { return common.Hash{}, false } - tw.manager.cacheVerifiedWitnessForServing(hash, witness) + body, _, ok := tw.manager.verifyAgainstSignedHash("peer1", hash, witness) + if !ok { + t.Fatalf("verifyAgainstSignedHash returned ok=false on WIT1 path") + } + if body != nil { + t.Fatalf("WIT1 path returned non-nil body; downstream peers will see uncovered bytes (len=%d)", len(body)) + } + tw.manager.cacheVerifiedWitnessForServing(hash, body, common.Hash{}) if cacheCalls != 0 { t.Fatalf("cache populated without BP-signed hash on file; downstream peers will drop us as liars (calls=%d)", cacheCalls) } diff --git a/eth/handler_wit.go b/eth/handler_wit.go index 282d8ec6c5..0af9d1e313 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -91,32 +91,30 @@ func (h *witHandler) handleWitnessBroadcast(peer *wit.Peer, witness *stateless.W // witnessHash on file — otherwise an upstream that lied about the bytes // would make us serve garbage and get dropped by downstream peers as // liars, even though we just relayed what we received. If no signed - // announcement is on file (WIT1 path), we skip the pre-import cache so - // we don't take on byte-blame risk for unverified content; the import - // path is unaffected. - var buf bytes.Buffer - if err := witness.EncodeRLP(&buf); err != nil { - peer.Log().Warn("wit2: failed to encode received witness", "hash", hash, "err", err) - } else { - bodyBytes := buf.Bytes() - bodyHash := stateless.WitnessCommitHash(bodyBytes) - signed, hasSigned := (*handler)(h).signedWitnesses.get(hash) - switch { - case hasSigned && signed.WitnessHash == bodyHash: - (*handler)(h).pendingWitnessBodies.put(hash, bodyBytes, bodyHash) - case hasSigned && signed.WitnessHash != bodyHash: - // Upstream sent bytes that don't match the BP-signed commitment. - // Don't cache for serving and surface this peer as misbehaving. - wit2BroadcastByteMismatchMeter.Mark(1) - peer.Log().Warn("wit2: broadcast bytes do not match signed witnessHash; not caching for serving", - "blockHash", hash, "expected", signed.WitnessHash, "actual", bodyHash) - default: - // No signed announcement on file: WIT1 fallback. Don't expose - // for WIT2 pre-import serving since we cannot prove byte- - // correctness to downstream peers. The body still flows into - // the import path below. - wit2BroadcastUnverifiedSkippedMeter.Mark(1) + // announcement is on file (WIT1 path), skip the encode+hash entirely + // so WIT1 broadcasts don't pay the cost of work we'd just discard. + if signed, hasSigned := (*handler)(h).signedWitnesses.get(hash); hasSigned { + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + peer.Log().Warn("wit2: failed to encode received witness", "hash", hash, "err", err) + } else { + bodyBytes := buf.Bytes() + bodyHash := stateless.WitnessCommitHash(bodyBytes) + if signed.WitnessHash == bodyHash { + (*handler)(h).pendingWitnessBodies.put(hash, bodyBytes, bodyHash) + } else { + // Upstream sent bytes that don't match the BP-signed commitment. + // Don't cache for serving and surface this peer as misbehaving. + wit2BroadcastByteMismatchMeter.Mark(1) + peer.Log().Warn("wit2: broadcast bytes do not match signed witnessHash; not caching for serving", + "blockHash", hash, "expected", signed.WitnessHash, "actual", bodyHash) + } } + } else { + // No signed announcement on file: WIT1 fallback. Don't expose for + // WIT2 pre-import serving since we cannot prove byte-correctness to + // downstream peers. The body still flows into the import path below. + wit2BroadcastUnverifiedSkippedMeter.Mark(1) } // Inject the witness into the block fetcher's cache From 8cc7f8e1681b1ba0932e4327094b5c734eba8c1e Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Tue, 5 May 2026 14:53:44 -0300 Subject: [PATCH 04/14] wit2: address adversarial-review safety findings (TDD) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three high-severity issues from the Codex adversarial review on PR #2208, each with a TDD regression test added first then fixed: 1. Header-race: signed announce arriving before header was silently downgraded. handleSignedWitnessAnnouncements called peer.AddKnownAnnounce unconditionally before the verification gate, leaving a peer marked announce-known even on bad-signature / header-unknown rejection paths. That suppressed our own re-relay back to that peer if a valid version of the same hash arrived from someone else, killing the natural recovery path. Fix: gate AddKnownAnnounce on acceptSignedAnnouncement success so the announce-known bit only reflects verified delivery. Test: TestHandleSignedWitnessAnnouncementsBadSigDoesNotMarkAnnounceKnown. 2. pendingWitnessBodies TTL didn't actually evict. get() observed expiry and returned false but left the entry in the map; gcLocked only ran from put(), so a node that stopped receiving witnesses retained up to capacity (10) ~50MB blobs indefinitely. Fix: when get() observes an expired entry, upgrade to write lock and delete it (re-checking under the write lock to avoid clobbering a concurrent put). Test: TestPendingWitnessBodyCacheGetEvictsExpired. 3. Honest body-server dropped on bad producer commitment. verifyAgainstSignedHash dropped the byte-server on every signed-hash mismatch, but the announcement only proves *some* BP signed *some* hash — not that the hash matches the canonical witness. A faulty or malicious scheduled producer that signed a bogus hash would weaponise this to disconnect every honest peer serving the real witness. Fix: reject the bytes (don't cache for serving) and back off the request without dropping the byte-server. TODO comment left for follow-up signer-quarantine work, which needs (signer, relayer) provenance the manager doesn't currently have. Test: TestProcessWitnessResponseDoesNotDropOnByteMismatch (replaces the previous TestProcessWitnessResponseDropsOnHashMismatch, whose policy this commit reverses). --- eth/fetcher/witness_manager.go | 13 +++- eth/fetcher/witness_manager_wit2_test.go | 60 +++++++++---------- eth/handler_wit.go | 17 ++++-- eth/handler_wit2.go | 15 ++++- eth/handler_wit2_test.go | 75 ++++++++++++++++++++++++ 5 files changed, 140 insertions(+), 40 deletions(-) diff --git a/eth/fetcher/witness_manager.go b/eth/fetcher/witness_manager.go index ca73a54900..1f04bf0e79 100644 --- a/eth/fetcher/witness_manager.go +++ b/eth/fetcher/witness_manager.go @@ -723,9 +723,18 @@ func (m *witnessManager) verifyAgainstSignedHash(peer string, hash common.Hash, actual := stateless.WitnessCommitHash(encoded) if actual != expected { witnessByteMismatchMeter.Mark(1) - log.Warn("[wm] Witness bytes do not match BP-signed hash; dropping peer", + // We cannot blame the byte-server on signed-hash disagreement alone: + // the announcement only proves *some* BP signed *some* hash. A faulty + // or malicious scheduled producer that signed a bogus hash would + // otherwise weaponise this path to disconnect every honest peer + // serving the canonical witness. Reject the bytes (don't cache for + // serving), back off the pending request so another peer/announcement + // gets tried, and let import-time execution validation pin blame. + // TODO(wit2): wire signer-quarantine once the manager has access to + // (signer, announcement-relayer) provenance from the handler. + log.Warn("[wm] Witness bytes do not match BP-signed hash; not caching, retrying with another peer", "peer", peer, "block", hash, "expected", expected, "actual", actual) - m.handleWitnessFetchFailureExt(hash, peer, errors.New("witness hash mismatch"), false) + m.handleWitnessFetchFailureExt(hash, "", errors.New("witness hash mismatch"), false) return nil, common.Hash{}, false } return encoded, expected, true diff --git a/eth/fetcher/witness_manager_wit2_test.go b/eth/fetcher/witness_manager_wit2_test.go index bab8524162..2ad0ed5ccf 100644 --- a/eth/fetcher/witness_manager_wit2_test.go +++ b/eth/fetcher/witness_manager_wit2_test.go @@ -8,7 +8,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/stateless" - "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/eth/protocols/eth" ) @@ -26,65 +25,60 @@ func blockAnnounceForTest(origin string, hash common.Hash, number uint64) *block } } -// TestProcessWitnessResponseDropsOnHashMismatch is the load-bearing safety -// guarantee for WIT2 pre-import serving: a peer that returns bytes whose -// keccak256 doesn't match the BP-signed witnessHash must be dropped, even -// if every other check passes. +// TestProcessWitnessResponseDoesNotDropOnByteMismatch encodes the post- +// adversarial-review safety policy: when the served witness bytes do not +// match the BP-signed witnessHash on file, the manager must back off and +// retry, but it MUST NOT drop the byte-server. The accepted announcement +// only proves *some* BP signed *some* hash — not that the hash matches the +// canonical witness. A faulty or malicious scheduled producer that signs a +// bogus hash would otherwise weaponise this code path to disconnect every +// honest peer serving the real witness. // -// Without this, a malicious server could pollute downstream relayers with -// bytes the BP never committed to, and the relayers would face state-root -// failures during execution that they cannot attribute to the right party. -func TestProcessWitnessResponseDropsOnHashMismatch(t *testing.T) { +// The mismatched bytes are still rejected (not cached for serving), and the +// pending state stays alive with a fresh back-off so another peer (or another +// announcement) gets a chance. Blame-pinning belongs at execution time, where +// import-side validation can attribute fault to signer vs. server vs. caller. +func TestProcessWitnessResponseDoesNotDropOnByteMismatch(t *testing.T) { tw := newTestWitnessManager() defer tw.Close() block := createTestBlock(101) hash := block.Hash() - // Prepare a "correct" witness that the BP signed over. - correct := createTestWitnessForBlock(block) - var buf bytes.Buffer - if err := correct.EncodeRLP(&buf); err != nil { - t.Fatalf("encode: %v", err) - } - signedWitnessHash := stateless.WitnessCommitHash(buf.Bytes()) - - // The peer will return a different witness — same block number, but - // the trie differs, producing different bytes and a different hash. - differentHeader := types.CopyHeader(block.Header()) - differentHeader.GasUsed = 999_999_999 - differentBlock := types.NewBlockWithHeader(differentHeader) - rogueWitness := createTestWitnessForBlock(differentBlock) + // The honest server returns the canonical witness for this block — its + // keccak commitment is `canonical`. + canonical := createTestWitnessForBlock(block) - // Inject the signed-witness lookup so processWitnessResponse uses it. + // Simulate a malicious / faulty BP that signed a bogus, unrelated hash. + // processWitnessResponse will see canonical bytes whose hash does not + // match what parentSignedWitnessHash reports. + rogueSignedHash := common.HexToHash("0xdeadbeef") tw.manager.parentSignedWitnessHash = func(h common.Hash) (common.Hash, bool) { if h == hash { - return signedWitnessHash, true + return rogueSignedHash, true } return common.Hash{}, false } - // Seed pending state so the failure handler back-off path is exercised. tw.manager.mu.Lock() tw.manager.pending[hash] = &witnessRequestState{ - op: &blockOrHeaderInject{origin: "rogue", block: block}, - announce: blockAnnounceForTest("rogue", hash, block.NumberU64()), + op: &blockOrHeaderInject{origin: "honest", block: block}, + announce: blockAnnounceForTest("honest", hash, block.NumberU64()), } tw.manager.mu.Unlock() - // Fabricate the response container expected by processWitnessResponse. res := ð.Response{ Time: time.Millisecond, Done: make(chan error, 1), - Res: []*stateless.Witness{rogueWitness}, + Res: []*stateless.Witness{canonical}, } - tw.manager.processWitnessResponse("rogue", hash, res, time.Now()) + tw.manager.processWitnessResponse("honest-server", hash, res, time.Now()) tw.mu.Lock() defer tw.mu.Unlock() - if len(tw.droppedPeers) != 1 || tw.droppedPeers[0] != "rogue" { - t.Fatalf("expected the lying peer to be dropped, got drops=%v", tw.droppedPeers) + if len(tw.droppedPeers) != 0 { + t.Fatalf("byte-server must not be dropped on signed-hash mismatch (BP may have signed bogus); drops=%v", tw.droppedPeers) } } diff --git a/eth/handler_wit.go b/eth/handler_wit.go index 0af9d1e313..79017e5cd8 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -168,14 +168,23 @@ func (h *witHandler) handleSignedWitnessAnnouncements(peer *wit.Peer, anns []wit } for _, ann := range anns { - // Sender saw this announcement; suppress relay back to them. Do NOT - // mark them as a body-holder — they may be relaying without bytes. - peer.AddKnownAnnounce(ann.BlockHash) - if !h.acceptSignedAnnouncement(peer, ann) { + // Verification failed (bad signature, signer ≠ producer, or + // header not yet local). MUST NOT mark the sender as + // announce-known: doing so would (a) suppress our own later + // re-relay back to this peer if we receive a valid version of + // the same hash from someone else, and (b) leave us no path + // to recover from a header-arrival race once a re-gossip for + // the same hash arrives. Recovery on this branch relies on + // re-receipt, which the empty knownAnnounces set permits. continue } + // Sender produced a valid announcement; suppress relay back to them. + // Do NOT mark them as a body-holder — they may be relaying without + // bytes. Body fetches are gated on knownWitnesses, set elsewhere. + peer.AddKnownAnnounce(ann.BlockHash) + // Cache + dedup. Skip relay if we've already relayed this hash recently. if !h.signedWitnesses.putIfNewer(ann) { wit2DuplicateMeter.Mark(1) diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go index cd5e1d446c..94fb0e78a3 100644 --- a/eth/handler_wit2.go +++ b/eth/handler_wit2.go @@ -194,14 +194,27 @@ func (c *pendingWitnessBodyCache) put(blockHash common.Hash, bytes []byte, witne func (c *pendingWitnessBodyCache) get(blockHash common.Hash) ([]byte, common.Hash, bool) { c.mu.RLock() - defer c.mu.RUnlock() e, ok := c.entries[blockHash] if !ok { + c.mu.RUnlock() return nil, common.Hash{}, false } if time.Since(e.receivedAt) > wit2AnnounceTTL { + // Expired: drop the large byte slice now rather than waiting for the + // next put() to gc. Without this, a node that stops receiving witness + // bodies retains up to capacity (10) ~50MB blobs indefinitely past the + // TTL, since gcLocked() only fires on put(). + c.mu.RUnlock() + c.mu.Lock() + // Re-check under the write lock: a concurrent put() may have replaced + // the entry with a fresh one we should not delete. + if cur, ok2 := c.entries[blockHash]; ok2 && cur == e { + delete(c.entries, blockHash) + } + c.mu.Unlock() return nil, common.Hash{}, false } + c.mu.RUnlock() return e.bytes, e.witnessHash, true } diff --git a/eth/handler_wit2_test.go b/eth/handler_wit2_test.go index 0bb5b1eb33..26155171c8 100644 --- a/eth/handler_wit2_test.go +++ b/eth/handler_wit2_test.go @@ -510,6 +510,81 @@ func TestVerifyScheduledProducerDeferredWhenHeaderUnknown(t *testing.T) { } } +// TestHandleSignedWitnessAnnouncementsBadSigDoesNotMarkAnnounceKnown is the +// regression for the verification-ordering bug: handleSignedWitnessAnnouncements +// must not mark a peer as announce-known until the announcement has passed the +// signature/producer-binding gate. The previous order called +// peer.AddKnownAnnounce(hash) unconditionally before acceptSignedAnnouncement, +// so a peer relaying a structurally invalid announcement still became +// announce-known for that hash. Two bad consequences flowed from that: +// - this node refused to ever relay a *valid* later announcement back to that +// peer for the same hash, leaving them unable to recover; +// - this node short-circuited its own re-evaluation paths when a good +// announcement for the same hash arrived from another peer, because the +// original sender's announce-known bit served as a relay-suppression hint. +// +// Using a structurally invalid signature (length 3) is sufficient to drive the +// reject path through verifySignedAnnouncement → strikeWit2Peer without needing +// a bor engine or block header. +func TestHandleSignedWitnessAnnouncementsBadSigDoesNotMarkAnnounceKnown(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + blockHash := common.HexToHash("0xfeedface") + ann := wit.SignedWitnessAnnouncement{ + BlockHash: blockHash, + BlockNumber: 1, + WitnessHash: common.HexToHash("0xc0ffee"), + Signature: []byte{0x00, 0x01, 0x02}, // structurally invalid + } + + if err := witH.handleSignedWitnessAnnouncements(peer, []wit.SignedWitnessAnnouncement{ann}); err != nil { + t.Fatalf("handleSignedWitnessAnnouncements: %v", err) + } + + if peer.KnownAnnounceContainsHash(blockHash) { + t.Fatal("peer marked announce-known despite invalid signature; verification ordering is broken") + } + if _, ok := h.handler.signedWitnesses.get(blockHash); ok { + t.Fatal("signed announcement cached despite invalid signature") + } +} + +// TestPendingWitnessBodyCacheGetEvictsExpired pins the leak fix for the TTL +// path. Before the fix, get() returned false on expiry but left the entry in +// the map; gcLocked only ran from put(), so a node that stopped receiving new +// witnesses retained up to capacity (10) full witness blobs (~50 MiB each) +// indefinitely, producing a long-lived OOM risk under bursty traffic. +// +// The contract this test enforces: any get() that observes an expired entry +// MUST delete it in place so memory pressure does not persist past the TTL. +func TestPendingWitnessBodyCacheGetEvictsExpired(t *testing.T) { + c := newPendingWitnessBodyCache(4) + hash := common.HexToHash("0xfade") + c.put(hash, []byte("expensive-body"), common.HexToHash("0xab")) + + // Force the entry's receivedAt back beyond the TTL, mirroring the same + // approach used by TestSignedWitnessCacheTTLExpiry above. + c.mu.Lock() + c.entries[hash].receivedAt = time.Now().Add(-2 * wit2AnnounceTTL) + c.mu.Unlock() + + if _, _, ok := c.get(hash); ok { + t.Fatal("expired entry must not be returned") + } + + c.mu.RLock() + entriesAfter := len(c.entries) + c.mu.RUnlock() + if entriesAfter != 0 { + t.Fatalf("expired entry must be deleted on get; len(entries)=%d, want 0", entriesAfter) + } +} + // TestVerifyScheduledProducerRejectsBlockNumberMismatch covers the case where // the local header is present but disagrees with the announce on block // number. This is a confirmed bad announce and the caller must strike, so From 9b6adb9f0a10c3ae46268c4f927e193867b46469 Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Tue, 5 May 2026 15:30:22 -0300 Subject: [PATCH 05/14] wit2: deferred-announce queue closes the cosend race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fourth and final adversarial-review item. Block + signed-announce gossip streams travel independently and can reach a node in either order. When the announce arrives first, isScheduledProducer returns (ok=false, headerAvailable=false) and the previous code dropped the announcement on the floor — relying on mesh re-gossip to reconstruct the signed-hash for that block. In sparse meshes (single-cosend window, small fanout) re-gossip never fires and subsequent witness fetches silently fall back to the unsigned WIT1 path, leaking the WIT2 byte-verification guarantee for that block. This commit holds the announcement instead of dropping it: - New deferredAnnounceCache mirrors pendingWitnessBodyCache: capacity 256, TTL = wit2AnnounceTTL (30s), oldest-evict, in-place expiry on take(). - acceptSignedAnnouncement's deferral branch now puts the announcement into deferredAnnounces. - New drainDeferredAnnouncesFor(blockHash) re-runs verification for the matching announcement, caches it on success, credits the original sender as announce-known, and relays. On still-header-unknown (rare: the chain-head fired but the indexed header isn't reachable yet by hash) the entry is re-stashed to ride the next chain-head event. - handler.Start subscribes to ChainHeadEvent and runs deferredAnnouncesLoop, which calls drainDeferredAnnouncesFor on each imported block. handler.Stop unsubscribes via quitSync. isScheduledProducer was reordered to check header presence first regardless of consensus engine. The previous early-return for non-bor test chains skipped the header check entirely, which was incorrect on its own (an announce we can't tie to a local block is unverifiable here) and prevented unit tests from exercising the deferral path. Bor producer recovery still runs only when a bor engine is present. Test: TestDeferredSignedAnnounceDrainedAfterHeaderArrives covers the full lifecycle — announce arrives header-unknown (deferred, not cached, sender not credited), header lands, drain runs, announcement is now cached and the deferred entry is consumed. --- eth/handler.go | 43 ++++++++++ eth/handler_wit.go | 8 +- eth/handler_wit2.go | 174 +++++++++++++++++++++++++++++++++++++-- eth/handler_wit2_test.go | 73 ++++++++++++++++ 4 files changed, 291 insertions(+), 7 deletions(-) diff --git a/eth/handler.go b/eth/handler.go index 794e3d2de6..366e8e46f4 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -200,6 +200,15 @@ type handler struct { pendingWitnessBodies *pendingWitnessBodyCache wit2PeerTracker *peerWit2Tracker + // WIT2: signed announcements whose producer-binding could not be checked + // at receive time because the matching block header wasn't local yet. + // Drained from the chain-head subscription on each new block so the race + // between block and announce gossip streams self-heals once the chain + // catches up. + deferredAnnounces *deferredAnnounceCache + wit2HeadCh chan core.ChainHeadEvent + wit2HeadSub event.Subscription + // channels for fetcher, syncer, txsyncLoop quitSync chan struct{} @@ -242,6 +251,7 @@ func newHandler(config *handlerConfig) (*handler, error) { signedWitnesses: newSignedWitnessCache(), pendingWitnessBodies: newPendingWitnessBodyCache(witnessBodyCacheCapacity), wit2PeerTracker: newPeerWit2Tracker(), + deferredAnnounces: newDeferredAnnounceCache(deferredAnnounceCapacity), } log.Info("Sync with witnesses", "enabled", config.syncWithWitnesses) @@ -669,6 +679,39 @@ func (h *handler) Start(maxPeers int) { // start peer handler tracker h.wg.Add(1) go h.protoTracker() + + // WIT2: drain deferred signed announces on each new chain head. This + // closes the cosend race: when a signed announcement arrives ahead of + // its block, we hold it in deferredAnnounces and re-evaluate as soon as + // the matching header lands. + h.wit2HeadCh = make(chan core.ChainHeadEvent, chainHeadChanSize) + h.wit2HeadSub = h.chain.SubscribeChainHeadEvent(h.wit2HeadCh) + h.wg.Add(1) + go h.deferredAnnouncesLoop() +} + +// deferredAnnouncesLoop re-evaluates any deferred WIT2 announcements whose +// matching block has just been imported. Exits cleanly when the chain-head +// subscription returns (chain stop) or quitSync is closed. +func (h *handler) deferredAnnouncesLoop() { + defer h.wg.Done() + defer h.wit2HeadSub.Unsubscribe() + + for { + select { + case ev, ok := <-h.wit2HeadCh: + if !ok { + return + } + if ev.Header != nil { + h.drainDeferredAnnouncesFor(ev.Header.Hash()) + } + case <-h.wit2HeadSub.Err(): + return + case <-h.quitSync: + return + } + } } func (h *handler) Stop() { diff --git a/eth/handler_wit.go b/eth/handler_wit.go index 79017e5cd8..e1f7ccbb00 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -204,8 +204,11 @@ func (h *witHandler) handleSignedWitnessAnnouncements(peer *wit.Peer, anns []wit // caller should proceed to cache + relay; false when the caller should skip // it. Strikes are issued only on confirmed misbehavior (bad signature or // signer ≠ scheduled producer for a known header). Pre-import deferral -// (header not yet local) is silent: no strike, no relay, retry on the next -// packet for the same hash once the block arrives. +// (header not yet local) is silent: no strike, no relay. The announcement is +// stashed in the deferred queue so the chain-head loop can re-evaluate it +// once the block arrives — without that, an announce that races ahead of its +// block is lost permanently and subsequent witness fetches silently skip +// byte-verification. func (h *witHandler) acceptSignedAnnouncement(peer *wit.Peer, ann wit.SignedWitnessAnnouncement) bool { signer, err := verifySignedAnnouncement(ann) if err != nil { @@ -222,6 +225,7 @@ func (h *witHandler) acceptSignedAnnouncement(peer *wit.Peer, ann wit.SignedWitn if !headerAvailable { peer.Log().Debug("wit2: header not yet local for announced block; deferring announce", "blockHash", ann.BlockHash, "blockNumber", ann.BlockNumber) + (*handler)(h).deferredAnnounces.put(ann, peer.ID()) return false } wit2NotValidatorMeter.Mark(1) diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go index 94fb0e78a3..1b73f111ab 100644 --- a/eth/handler_wit2.go +++ b/eth/handler_wit2.go @@ -233,6 +233,106 @@ func (c *pendingWitnessBodyCache) gcLocked() { } } +// deferredAnnounceCapacity bounds how many header-unknown signed announcements +// we hold while waiting for the corresponding block to arrive. Each entry is +// ~200 bytes; the cap is sized for a worst-case stall window where the local +// chain falls a few hundred blocks behind a busy mesh and announcements +// arrive ahead of headers en masse. +const deferredAnnounceCapacity = 256 + +// deferredAnnounceEntry holds a signed announcement whose producer-binding +// could not be checked yet because the corresponding block header wasn't +// local. The drain path re-runs verification once the chain catches up. +type deferredAnnounceEntry struct { + announcement wit.SignedWitnessAnnouncement + peerID string + receivedAt time.Time +} + +// deferredAnnounceCache holds signed announcements deferred on header-unknown +// rejection so the chain-head loop can re-evaluate them when the matching +// block arrives. Without it, an announce that races ahead of its block — the +// expected outcome of independent block + announce gossip streams — is lost +// for good and subsequent witness fetches silently fall back to unsigned +// (WIT1) verification, leaking the WIT2 trust property for that block. +type deferredAnnounceCache struct { + mu sync.RWMutex + entries map[common.Hash]*deferredAnnounceEntry + capacity int +} + +func newDeferredAnnounceCache(capacity int) *deferredAnnounceCache { + return &deferredAnnounceCache{ + entries: make(map[common.Hash]*deferredAnnounceEntry), + capacity: capacity, + } +} + +// put stores the announcement keyed by block hash. If the cache is full, the +// oldest entry is evicted (linear scan; the cap keeps it cheap). A second put +// for the same hash refreshes receivedAt and overwrites the announcement — +// the more recent gossip wins, which is desirable when the original sender +// disconnected and a different peer now carries the announce forward. +func (c *deferredAnnounceCache) put(ann wit.SignedWitnessAnnouncement, peerID string) { + c.mu.Lock() + defer c.mu.Unlock() + c.gcLocked() + if _, exists := c.entries[ann.BlockHash]; !exists && len(c.entries) >= c.capacity { + var oldestHash common.Hash + var oldest time.Time + for h, e := range c.entries { + if oldest.IsZero() || e.receivedAt.Before(oldest) { + oldest = e.receivedAt + oldestHash = h + } + } + delete(c.entries, oldestHash) + } + c.entries[ann.BlockHash] = &deferredAnnounceEntry{ + announcement: ann, + peerID: peerID, + receivedAt: time.Now(), + } +} + +// take removes and returns the entry for blockHash if present and fresh. +// Returns ok=false on miss or expiry; expired entries are deleted in place. +func (c *deferredAnnounceCache) take(blockHash common.Hash) (*deferredAnnounceEntry, bool) { + c.mu.Lock() + defer c.mu.Unlock() + e, ok := c.entries[blockHash] + if !ok { + return nil, false + } + delete(c.entries, blockHash) + if time.Since(e.receivedAt) > wit2AnnounceTTL { + return nil, false + } + return e, true +} + +// has reports whether a fresh entry exists for blockHash. Test-facing only; +// production code uses take to ensure the entry is consumed. +func (c *deferredAnnounceCache) has(blockHash common.Hash) bool { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.entries[blockHash] + if !ok { + return false + } + return time.Since(e.receivedAt) <= wit2AnnounceTTL +} + +// gcLocked drops entries past the TTL. Caller must hold the write lock. +func (c *deferredAnnounceCache) gcLocked() { + cutoff := time.Now().Add(-wit2AnnounceTTL) + for h, e := range c.entries { + if e.receivedAt.Before(cutoff) { + delete(c.entries, h) + } + } +} + // signedWitnessCache stores BP-signed announcements by block hash. The cache // is consulted by: // - the relay path on receive (skip if already seen recently), @@ -466,19 +566,83 @@ func (h *handler) canonicalWitnessHash(blockHash common.Hash) (common.Hash, bool // - ok=false, headerAvailable=false: header not yet local. The announce // cannot be bound to a producer right now. The caller MUST NOT strike — // this is expected during the cosend window where a signed announce -// races the block to the receiver. The fast path recovers naturally -// once the block header arrives and a subsequent announce for the same -// hash is re-evaluated. +// races the block to the receiver. The handler stashes the announce in +// the deferred queue and the chain-head loop re-evaluates it once the +// block arrives. +// +// Header presence is checked first regardless of engine: an announce we +// cannot match to a local block is by definition unverifiable here. Only +// after the header is on file do we route into the bor-specific producer +// recovery (or short-circuit to ok=true on non-bor test chains). func (h *handler) isScheduledProducer(signer common.Address, blockNumber uint64, blockHash common.Hash) (bool, bool) { + header := h.chain.GetHeaderByHash(blockHash) + if header == nil { + wit2HeaderUnknownMeter.Mark(1) + return false, false + } borEngine, isBor := h.chain.Engine().(*bor.Bor) if !isBor { - // Non-bor chain: skip the producer check. + // Non-bor chain (tests): header presence already validated above; the + // producer check is bor-specific and intentionally skipped here. + if header.Number.Uint64() != blockNumber { + return false, true + } return true, true } - header := h.chain.GetHeaderByHash(blockHash) return verifyScheduledProducer(borEngine, header, signer, blockNumber, blockHash) } +// drainDeferredAnnouncesFor re-evaluates any deferred announcement whose +// blockHash now matches a header that has just been imported. On verification +// success the announce is cached in signedWitnesses, the original sender is +// credited as announce-known, and the announce is relayed to peers that have +// not seen it. On confirmed mis-binding (signer ≠ producer) the deferred +// entry is dropped — relayers cannot be re-struck post-hoc since we lost the +// peer reference between deferral and drain. +// +// Called from the chain-head subscription on each new block. Also exposed for +// direct invocation in tests. +func (h *handler) drainDeferredAnnouncesFor(blockHash common.Hash) { + if h.deferredAnnounces == nil { + return + } + entry, ok := h.deferredAnnounces.take(blockHash) + if !ok { + return + } + signer, err := verifySignedAnnouncement(entry.announcement) + if err != nil { + // Should be unreachable: we re-verified the same bytes that already + // passed the signature check at acceptSignedAnnouncement time. + // Surfaced via metric in case a future refactor reorders this. + wit2InvalidSigMeter.Mark(1) + log.Debug("wit2: deferred announce failed signature re-check", "blockHash", blockHash, "err", err) + return + } + prodOk, headerAvailable := h.isScheduledProducer(signer, entry.announcement.BlockNumber, blockHash) + if !prodOk { + if !headerAvailable { + // Header still not local — re-stash with fresh receivedAt so the + // next chain-head event can try again before the TTL expires. + h.deferredAnnounces.put(entry.announcement, entry.peerID) + return + } + wit2NotValidatorMeter.Mark(1) + log.Debug("wit2: deferred announce signer is not the scheduled producer", + "blockHash", blockHash, "signer", signer) + return + } + if !h.signedWitnesses.putIfNewer(entry.announcement) { + wit2DuplicateMeter.Mark(1) + return + } + // Credit the original sender as announce-known so we don't re-relay back. + if peer := h.peers.peer(entry.peerID); peer != nil && peer.witPeer != nil { + peer.witPeer.Peer.AddKnownAnnounce(blockHash) + } + h.relaySignedAnnouncement(entry.peerID, entry.announcement) +} + // verifyScheduledProducer is the pure decision logic for binding a wit2 // announcement signer to the block producer of `blockHash`. Split from // isScheduledProducer so it can be unit-tested without standing up a full diff --git a/eth/handler_wit2_test.go b/eth/handler_wit2_test.go index 26155171c8..9661990183 100644 --- a/eth/handler_wit2_test.go +++ b/eth/handler_wit2_test.go @@ -585,6 +585,79 @@ func TestPendingWitnessBodyCacheGetEvictsExpired(t *testing.T) { } } +// TestDeferredSignedAnnounceDrainedAfterHeaderArrives is the regression for +// the cosend-race liveness gap: when a signed announcement arrives before the +// corresponding block header (block + announce travel independently and can +// race in either order), the handler MUST retain the announcement and re- +// evaluate it once the header arrives, rather than dropping it on the floor +// and silently degrading subsequent witness fetches to the unsigned WIT1 +// fallback path. +// +// Without this: +// 1. announce arrives → header-unknown → acceptSignedAnnouncement returns +// false, announcement is forgotten. +// 2. block arrives shortly after, but no second announce reaches us (sparse +// mesh, single-cosend window) → signedWitnesses never holds the hash. +// 3. fetcher selects a peer, gets bytes, parentSignedWitnessHash returns +// false → byte-verification skipped, WIT2 trust model silently leaks. +// +// The deferred queue holds the announcement until the chain catches up; the +// drain (here invoked directly; in production fired from the chainHeadCh +// subscription) re-runs verification and caches the hash on success. +func TestDeferredSignedAnnounceDrainedAfterHeaderArrives(t *testing.T) { + h := newTestHandler() + defer h.close() + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + key, err := crypto.GenerateKey() + if err != nil { + t.Fatalf("key gen: %v", err) + } + header := &types.Header{Number: big.NewInt(99_999)} // NOT in chain + blockHash := header.Hash() + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: blockHash, + BlockNumber: header.Number.Uint64(), + WitnessHash: common.HexToHash("0xc0ffee01"), + } + digest := wit.WitnessAnnouncementSigningHash(ann.BlockHash, ann.BlockNumber, ann.WitnessHash) + sig, err := crypto.Sign(digest.Bytes(), key) + if err != nil { + t.Fatalf("sign: %v", err) + } + ann.Signature = sig + + // Phase 1: header is not yet local. The announce must be deferred — not + // cached, not relayed, not credited to the sender as announce-known. + if err := witH.handleSignedWitnessAnnouncements(peer, []wit.SignedWitnessAnnouncement{ann}); err != nil { + t.Fatalf("handleSignedWitnessAnnouncements: %v", err) + } + if _, ok := h.handler.signedWitnesses.get(blockHash); ok { + t.Fatal("announce cached prematurely; verification should defer when header is unknown") + } + if peer.KnownAnnounceContainsHash(blockHash) { + t.Fatal("peer marked announce-known on deferred path; re-relay recovery is suppressed") + } + if !h.handler.deferredAnnounces.has(blockHash) { + t.Fatal("deferred-announce queue did not retain the announce; the race window is uncovered") + } + + // Phase 2: header arrives. Drain the queue (production wires this from + // the chainHeadCh subscription on each new block). + rawdb.WriteHeader(h.chain.DB(), header) + h.handler.drainDeferredAnnouncesFor(blockHash) + + if _, ok := h.handler.signedWitnesses.get(blockHash); !ok { + t.Fatal("announce not cached after header arrival; drain is broken") + } + if h.handler.deferredAnnounces.has(blockHash) { + t.Fatal("deferred entry should be cleared after successful drain") + } +} + // TestVerifyScheduledProducerRejectsBlockNumberMismatch covers the case where // the local header is present but disagrees with the announce on block // number. This is a confirmed bad announce and the caller must strike, so From 46ebbbc79ebd17f0b8fd5788f555eb44135825d8 Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Tue, 9 Jun 2026 11:40:45 -0300 Subject: [PATCH 06/14] eth/wit: cap GetWitness page count (F-1) and per-peer deferred-announce entries (W-1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit handleGetWitness: bound len(WitnessPages) to MaxWitnessPagesServe. A request packed with unknown hashes or out-of-range pages accumulates zero data bytes and trips neither byte guard, while still forcing one DB size lookup per distinct hash and one response entry per page — a CPU/IO/alloc amplification vector. Legitimate requests carry a single page, so the bound is never approached. Also fix an error-string typo. deferredAnnounceCache: add a per-peer live-entry cap (capacity/8) so a single peer cannot saturate the deferred-announce queue and evict honest header-racing announces. The cache is keyed by blockHash, so bounding the claimed BlockNumber is no defence (an attacker reuses a near-tip number with distinct fake hashes); the per-peer cap is the bound that holds. Per-peer accounting is maintained across put/take/gc/evict; add wit2DeferredPerPeerDropMeter. Tests: TestHandleGetWitness_PageCountBound, TestDeferredAnnounceCachePerPeerCap. --- eth/handler_wit.go | 20 ++++++--- eth/handler_wit2.go | 87 ++++++++++++++++++++++++++++++++++------ eth/handler_wit2_test.go | 50 +++++++++++++++++++++++ eth/handler_wit_test.go | 53 ++++++++++++++++++++++++ 4 files changed, 193 insertions(+), 17 deletions(-) diff --git a/eth/handler_wit.go b/eth/handler_wit.go index e1f7ccbb00..f817044099 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -22,6 +22,7 @@ const ( MaximumCachedWitnessOnARequest = 200 * 1024 * 1024 // 200 MB, the maximum amount of memory a request can demand while getting witness MaximumResponseSize = 16 * 1024 * 1024 // 16 MB, helps to fast fail check MaxWitnessMetadataServe = 1024 // maximum hashes a single GetWitnessMetadata request may carry + MaxWitnessPagesServe = 1024 // maximum {hash,page} entries a single GetWitness request may carry ) // witHandler implements the eth.Backend interface to handle the various network @@ -119,10 +120,7 @@ func (h *witHandler) handleWitnessBroadcast(peer *wit.Peer, witness *stateless.W // Inject the witness into the block fetcher's cache if h.blockFetcher != nil { - log.Debug("Injecting witness into block fetcher", "hash", hash, "peer", peer.ID()) - // Verify witness header matches a known block hash - blockHash := witness.Header().Hash() - log.Debug("Witness details", "blockHash", blockHash, "header", witness.Header().Number) + log.Debug("Injecting witness into block fetcher", "hash", hash, "peer", peer.ID(), "number", witness.Header().Number) if err := h.blockFetcher.InjectWitness(peer.ID(), witness); err != nil { peer.Log().Warn("Failed to inject broadcast witness into fetcher", "hash", hash, "err", err) @@ -270,6 +268,18 @@ func (h *handler) relaySignedAnnouncement(senderID string, ann wit.SignedWitness func (h *witHandler) handleGetWitness(peer *wit.Peer, req *wit.GetWitnessPacket) (wit.WitnessPacketResponse, error) { log.Debug("handleGetWitness processing request", "peer", peer.ID(), "reqID", req.RequestId, "witnessPages", len(req.WitnessPages)) + // Cap the page-entry count up front, mirroring the metadata handler's + // MaxWitnessMetadataServe guard. The in-loop byte guards below only count + // data bytes, and only on the needToQuery branch — a request packed with + // unknown hashes or out-of-range pages accumulates zero bytes and trips + // neither guard, while still forcing one DB size lookup per distinct hash + // (resolveWitnessBytes) and one response entry per page. Bounding the entry + // count closes that CPU/IO/alloc amplification. Legitimate requests carry a + // single page, so this limit is never approached in practice. + if len(req.WitnessPages) > MaxWitnessPagesServe { + return nil, fmt.Errorf("witness request exceeds %d page limit: got %d", MaxWitnessPagesServe, len(req.WitnessPages)) + } + witnessCache, witnessSize := h.resolveWitnessBytes(req.WitnessPages) var response wit.WitnessPacketResponse @@ -306,7 +316,7 @@ func (h *witHandler) handleGetWitness(peer *wit.Peer, req *wit.GetWitnessPacket) response = append(response, pageResponse) if totalCached >= MaximumCachedWitnessOnARequest { - return nil, errors.New("requests demans huge amount of memory") + return nil, errors.New("request demands a huge amount of memory") } if totalResponsePayloadDataAmount >= MaximumResponseSize { return nil, errors.New("response exceeds maximum p2p payload size") diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go index 1b73f111ab..363da1ad9a 100644 --- a/eth/handler_wit2.go +++ b/eth/handler_wit2.go @@ -27,6 +27,7 @@ var ( wit2DuplicateMeter = metrics.NewRegisteredMeter("eth/wit2/announce/duplicate", nil) wit2BroadcastByteMismatchMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_byte_mismatch", nil) wit2BroadcastUnverifiedSkippedMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_unverified_skipped", nil) + wit2DeferredPerPeerDropMeter = metrics.NewRegisteredMeter("eth/wit2/announce/deferred_per_peer_drop", nil) wit2HeaderUnknownMeter = metrics.NewRegisteredMeter("eth/wit2/announce/header_unknown", nil) wit2ConflictingWitnessHashMeter = metrics.NewRegisteredMeter("eth/wit2/announce/conflicting_witness_hash", nil) wit2RateLimitDropMeter = metrics.NewRegisteredMeter("eth/wit2/announce/rate_limit_drop", nil) @@ -240,6 +241,18 @@ func (c *pendingWitnessBodyCache) gcLocked() { // arrive ahead of headers en masse. const deferredAnnounceCapacity = 256 +// deferredAnnouncePerPeerDivisor caps how large a share of the deferred queue a +// single peer may occupy: perPeerCap = capacity / divisor. Without a per-peer +// cap, one peer operating within the announce rate limit (64/s) can fill all +// the slots with its own entries — each a distinct, attacker-chosen blockHash +// at a plausible near-tip number (the cache is keyed by hash, so a fixed +// blockNumber is no obstacle) — and evict honest header-racing announces, +// silently downgrading those blocks to unsigned WIT1 byte-verification. The cap +// reserves the bulk of the queue for the honest mesh. Honest peers race only +// the current tip, so a handful of in-flight deferrals is the norm and this cap +// is never approached in practice. +const deferredAnnouncePerPeerDivisor = 8 + // deferredAnnounceEntry holds a signed announcement whose producer-binding // could not be checked yet because the corresponding block header wasn't // local. The drain path re-runs verification once the chain catches up. @@ -256,28 +269,71 @@ type deferredAnnounceEntry struct { // for good and subsequent witness fetches silently fall back to unsigned // (WIT1) verification, leaking the WIT2 trust property for that block. type deferredAnnounceCache struct { - mu sync.RWMutex - entries map[common.Hash]*deferredAnnounceEntry - capacity int + mu sync.RWMutex + entries map[common.Hash]*deferredAnnounceEntry + perPeer map[string]int // live entry count per originating peer + capacity int + perPeerCap int } func newDeferredAnnounceCache(capacity int) *deferredAnnounceCache { + perPeerCap := capacity / deferredAnnouncePerPeerDivisor + if perPeerCap < 1 { + perPeerCap = 1 + } return &deferredAnnounceCache{ - entries: make(map[common.Hash]*deferredAnnounceEntry), - capacity: capacity, + entries: make(map[common.Hash]*deferredAnnounceEntry), + perPeer: make(map[string]int), + capacity: capacity, + perPeerCap: perPeerCap, } } -// put stores the announcement keyed by block hash. If the cache is full, the -// oldest entry is evicted (linear scan; the cap keeps it cheap). A second put -// for the same hash refreshes receivedAt and overwrites the announcement — -// the more recent gossip wins, which is desirable when the original sender -// disconnected and a different peer now carries the announce forward. +// decPeerLocked drops one live-entry credit for peerID, removing the map key +// when it reaches zero. Caller must hold the write lock. +func (c *deferredAnnounceCache) decPeerLocked(peerID string) { + c.perPeer[peerID]-- + if c.perPeer[peerID] <= 0 { + delete(c.perPeer, peerID) + } +} + +// put stores the announcement keyed by block hash. A second put for the same +// hash refreshes receivedAt and overwrites the announcement — the more recent +// gossip wins, which is desirable when the original sender disconnected and a +// different peer now carries the announce forward; per-peer credit moves with +// it. For a new hash, the per-peer cap is enforced first (a peer at its share +// is dropped, recording a metric, so it cannot evict honest entries), then the +// global cap (evict the oldest entry across all peers; linear scan is cheap at +// the configured size). func (c *deferredAnnounceCache) put(ann wit.SignedWitnessAnnouncement, peerID string) { c.mu.Lock() defer c.mu.Unlock() c.gcLocked() - if _, exists := c.entries[ann.BlockHash]; !exists && len(c.entries) >= c.capacity { + + if existing, exists := c.entries[ann.BlockHash]; exists { + // Overwrite for the same hash: net-zero slot change. Move per-peer + // credit if a different peer now carries this announce forward. + if existing.peerID != peerID { + c.decPeerLocked(existing.peerID) + c.perPeer[peerID]++ + } + c.entries[ann.BlockHash] = &deferredAnnounceEntry{ + announcement: ann, + peerID: peerID, + receivedAt: time.Now(), + } + return + } + + // New hash for this peer: enforce its share of the queue so no single peer + // can monopolise the cache and evict honest header-racing announces. + if c.perPeer[peerID] >= c.perPeerCap { + wit2DeferredPerPeerDropMeter.Mark(1) + return + } + + if len(c.entries) >= c.capacity { var oldestHash common.Hash var oldest time.Time for h, e := range c.entries { @@ -286,13 +342,18 @@ func (c *deferredAnnounceCache) put(ann wit.SignedWitnessAnnouncement, peerID st oldestHash = h } } - delete(c.entries, oldestHash) + if victim, ok := c.entries[oldestHash]; ok { + c.decPeerLocked(victim.peerID) + delete(c.entries, oldestHash) + } } + c.entries[ann.BlockHash] = &deferredAnnounceEntry{ announcement: ann, peerID: peerID, receivedAt: time.Now(), } + c.perPeer[peerID]++ } // take removes and returns the entry for blockHash if present and fresh. @@ -305,6 +366,7 @@ func (c *deferredAnnounceCache) take(blockHash common.Hash) (*deferredAnnounceEn return nil, false } delete(c.entries, blockHash) + c.decPeerLocked(e.peerID) if time.Since(e.receivedAt) > wit2AnnounceTTL { return nil, false } @@ -328,6 +390,7 @@ func (c *deferredAnnounceCache) gcLocked() { cutoff := time.Now().Add(-wit2AnnounceTTL) for h, e := range c.entries { if e.receivedAt.Before(cutoff) { + c.decPeerLocked(e.peerID) delete(c.entries, h) } } diff --git a/eth/handler_wit2_test.go b/eth/handler_wit2_test.go index 9661990183..e36371f3de 100644 --- a/eth/handler_wit2_test.go +++ b/eth/handler_wit2_test.go @@ -310,6 +310,56 @@ func TestHandleWitnessBroadcastSkipsCacheWhenNoSignature(t *testing.T) { } } +// TestDeferredAnnounceCachePerPeerCap is the regression for W-1: a single peer +// must not be able to monopolise the deferred-announce queue and evict honest +// header-racing announces. The cache is keyed by blockHash, so bounding the +// claimed BlockNumber is no defence (an attacker just reuses a near-tip number +// with distinct fake hashes). The effective bound is per-peer: one peer may +// hold at most capacity/divisor slots; honest peers keep theirs. +func TestDeferredAnnounceCachePerPeerCap(t *testing.T) { + // capacity 16 → perPeerCap = 16/8 = 2, small enough to exercise cheaply. + c := newDeferredAnnounceCache(16) + require.Equal(t, 2, c.perPeerCap) + + mkAnn := func(n byte) wit.SignedWitnessAnnouncement { + return wit.SignedWitnessAnnouncement{ + BlockHash: common.Hash{n}, + BlockNumber: uint64(n), + WitnessHash: common.Hash{0xff, n}, + Signature: make([]byte, wit.SignatureLength), + } + } + + // One peer fills its share, then its next NEW-hash put is dropped. + c.put(mkAnn(1), "attacker") + c.put(mkAnn(2), "attacker") + c.put(mkAnn(3), "attacker") + assert.True(t, c.has(common.Hash{1})) + assert.True(t, c.has(common.Hash{2})) + assert.False(t, c.has(common.Hash{3}), + "third new-hash deferral from a saturating peer must be dropped by the per-peer cap") + + // An honest peer is unaffected by the attacker's saturation. + c.put(mkAnn(10), "honest") + assert.True(t, c.has(common.Hash{10}), + "honest peer must not be starved by a peer that filled its own share") + + // Draining one of the peer's entries returns a credit so it can defer again + // — the cap tracks *live* entries, it is not a lifetime quota. + if _, ok := c.take(common.Hash{1}); !ok { + t.Fatal("take should return the live entry") + } + c.put(mkAnn(3), "attacker") + assert.True(t, c.has(common.Hash{3}), + "after a drain freed a slot, the peer may defer a new hash again") + + // Re-deferring an existing hash (same peer) is an overwrite, not a new + // slot, so it must never be rejected by the cap even at the limit. + c.put(mkAnn(2), "attacker") // attacker currently holds {2},{3} == cap + c.put(mkAnn(2), "attacker") // overwrite, must succeed + assert.True(t, c.has(common.Hash{2})) +} + // TestSignedAnnounceDoesNotMarkPeerAsBodyHolder is the load-bearing // regression test for the announce/body separation. A WIT2 peer that has // only relayed a signed announcement (no body) MUST NOT show up in diff --git a/eth/handler_wit_test.go b/eth/handler_wit_test.go index 57dc22cb2a..25e5f6c29e 100644 --- a/eth/handler_wit_test.go +++ b/eth/handler_wit_test.go @@ -278,6 +278,59 @@ func TestHandleGetWitnessMetadata_HashCountBound(t *testing.T) { } } +// TestHandleGetWitness_PageCountBound exercises the per-request page-entry cap +// on the witness *data* handler (F-1). The in-loop byte guards only count data +// bytes on the needToQuery branch, so a request packed with unknown hashes or +// out-of-range pages accumulates zero bytes and never trips them — yet each +// distinct hash still costs a DB size lookup and each page a response entry. +// MaxWitnessPagesServe bounds that amplification up front. This mirrors +// TestHandleGetWitnessMetadata_HashCountBound for the metadata handler. +func TestHandleGetWitness_PageCountBound(t *testing.T) { + handler := newTestHandler() + defer handler.close() + + witHandler := (*witHandler)(handler.handler) + peer := newTestWitPeer() + defer peer.Close() + + tests := []struct { + name string + count int + wantErr bool + }{ + {"at limit", MaxWitnessPagesServe, false}, + {"one over limit", MaxWitnessPagesServe + 1, true}, + {"far over limit", MaxWitnessPagesServe * 100, true}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // All distinct, unknown hashes: this is the cheap-to-build, + // zero-byte request that the byte guards alone fail to bound. + pages := make([]wit.WitnessPageRequest, tc.count) + for i := range pages { + pages[i] = wit.WitnessPageRequest{ + Hash: common.Hash{byte(i), byte(i >> 8), byte(i >> 16)}, + Page: 0, + } + } + packet := &wit.GetWitnessPacket{ + RequestId: 55555, + GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: pages}, + } + + response, err := witHandler.handleGetWitness(peer, packet) + if tc.wantErr { + require.Error(t, err) + assert.Nil(t, response) + } else { + require.NoError(t, err) + assert.Equal(t, tc.count, len(response)) + } + }) + } +} + // TestHandleGetWitnessMetadata_PageCalculation tests page calculation edge cases func TestHandleGetWitnessMetadata_PageCalculation(t *testing.T) { handler := newTestHandler() From fe20334b7f30504d1179bf6802ada5334eaae527 Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Tue, 9 Jun 2026 15:41:15 -0300 Subject: [PATCH 07/14] wit2: push witness body to waiting peers + back off empty fetches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the stateless-consumer sync regression where, in an all-WIT2 fleet, a node always fetches the witness body from an announce-only relayer (no peer is ever marked as a body-holder) and re-polls it with empty GetWitness until that relayer obtains the body. WIT1 stays in lockstep because its hash-announce both implies the sender holds the body and marks it as a holder, so the first pull lands; WIT2 relays the signed announce ahead of the body, leaving the consumer to poll. - handler: record peers that ask for a body we don't yet hold but have a BP-signed announcement for (witnessWaiterRegistry, bounded + 30s TTL), and push the full witness to them the moment we obtain it. Three triggers cover how a node comes to hold a body: our own verified fetch (cacheVerifiedWitnessForServing), a gossip broadcast (handleWitnessBroadcast), and — the dominant case for full/producing nodes — generating it during native block import, flushed from the chain-head loop (flushWitnessWaitersForImported). Restores the WIT1-style hand-off without flooding: at most one body per peer that actually asked. - fetcher: route empty ("body not ready yet") responses to a dedicated backoff (first retries immediate, then exponential to a 1s cap) instead of a tight ~gatherSlack re-poll; never drop the request (the witness provably exists) or penalise the responder. Covered by TestEmptyGetWitnessForSignedHashPushesBodyOnArrival, TestFlushWitnessWaitersForImportedPushesFromChainStorage, and TestEmptyResponseBacksOffToAvoidHammering (all fail before the fix). --- eth/fetcher/witness_manager.go | 77 +++++++++- eth/fetcher/witness_manager_wit2_test.go | 54 +++++++ eth/handler.go | 13 +- eth/handler_wit.go | 15 ++ eth/handler_wit2.go | 179 +++++++++++++++++++++++ eth/handler_wit2_test.go | 115 +++++++++++++++ 6 files changed, 443 insertions(+), 10 deletions(-) diff --git a/eth/fetcher/witness_manager.go b/eth/fetcher/witness_manager.go index 1f04bf0e79..8eda40ee13 100644 --- a/eth/fetcher/witness_manager.go +++ b/eth/fetcher/witness_manager.go @@ -30,6 +30,24 @@ const ( // witness for a block hash before giving up and marking it unavailable. maxWitnessFetchRetries = 300 // ~30s of retries + // emptyResponseFastRetries is how many consecutive "body not ready yet" + // (empty) responses we re-poll immediately before backing off. WIT2's fast + // signed announce reaches us ahead of the body, so the only candidate body + // source is often an announce-only relayer that has not finished pulling + + // importing the block. The first couple of re-polls stay immediate so we + // pick the body up the instant the relayer obtains it (the common case); + // after that, a relayer answering empty is genuinely waiting on its own + // upstream and re-polling it every ~gatherSlack only hammers it. + emptyResponseFastRetries = 2 + + // emptyResponseBaseBackoff / emptyResponseMaxBackoff bound the exponential + // backoff applied to repeated empty responses past the fast-retry window. + // The witness provably exists (a BP signed its hash) so we never give the + // request up here; we only slow the poll cadence to avoid the empty-poll + // storm observed on devnet (~15x the WIT1 empty-response count). + emptyResponseBaseBackoff = 100 * time.Millisecond + emptyResponseMaxBackoff = 1 * time.Second + witnessCacheSize = 10 witnessCacheTTL = 2 * time.Minute @@ -46,9 +64,10 @@ const ( // witnessRequestState tracks the state of a pending witness request. type witnessRequestState struct { - op *blockOrHeaderInject // The original block/header injection operation. - announce *blockAnnounce // Announcement details, non-nil if a fetch is in flight. - retries int // Number of fetch attempts already made + op *blockOrHeaderInject // The original block/header injection operation. + announce *blockAnnounce // Announcement details, non-nil if a fetch is in flight. + retries int // Number of fetch attempts already made + emptyRetries int // Consecutive "body not ready yet" (empty) responses, for backoff } // cachedWitness represents a witness that arrived before its corresponding block @@ -655,12 +674,12 @@ func (m *witnessManager) processWitnessResponse(peer string, hash common.Hash, r if len(witness) == 0 { // Empty/unavailable response: the peer doesn't have the body yet // (e.g. WIT2 announce-only relayer that has not finished importing). - // This is a soft failure — back off the request so another peer can - // be tried, but do NOT drop the responder. Dropping on "no body" is - // what makes announce-only fallback peers unsafe to ask, which would - // erase the WIT2 multi-hop latency win at hop>=2. + // This is the expected steady state on the WIT2 fast path, not a + // failure — back off the request (keeping the responder; dropping on + // "no body" is what makes announce-only fallback peers unsafe to ask, + // which would erase the WIT2 multi-hop latency win at hop>=2). log.Debug("[wm] Received empty witness response from peer", "peer", peer, "hash", hash) - m.handleWitnessFetchFailureExt(hash, "", errors.New("empty witness response"), false) + m.handleWitnessBodyNotReady(hash) return } @@ -842,6 +861,48 @@ func (m *witnessManager) handleWitnessFetchFailureExt(hash common.Hash, peer str m.rescheduleWitness() } +// handleWitnessBodyNotReady backs off a pending witness request after an empty +// ("body not ready yet") response, without dropping the responder and without +// giving the request up. On the WIT2 fast path the signed announce reaches us +// ahead of the body, so the only candidate source is frequently an +// announce-only relayer still pulling+importing the block; it answers empty +// until it has the bytes. The first emptyResponseFastRetries re-polls stay +// immediate to catch the body the instant the relayer obtains it; beyond that +// we back off exponentially (capped) so a relayer that is itself waiting +// upstream is not hammered every ~gatherSlack. The witness provably exists — a +// BP signed its hash — so we never discard the request here. +func (m *witnessManager) handleWitnessBodyNotReady(hash common.Hash) { + m.mu.Lock() + if state := m.pending[hash]; state != nil && state.announce != nil { + state.emptyRetries++ + state.announce.time = time.Now().Add(emptyResponseBackoff(state.emptyRetries)) + } + m.mu.Unlock() + + m.rescheduleWitness() +} + +// emptyResponseBackoff returns how far into the future the next re-poll should +// be deferred after n consecutive empty responses. The first +// emptyResponseFastRetries attempts return 0 (re-poll on the next tick); past +// that the delay doubles from emptyResponseBaseBackoff up to +// emptyResponseMaxBackoff. +func emptyResponseBackoff(n int) time.Duration { + if n <= emptyResponseFastRetries { + return 0 + } + shift := uint(n - emptyResponseFastRetries - 1) + // Cap the shift so the left-shift can't overflow before the clamp below. + if shift > 16 { + shift = 16 + } + d := emptyResponseBaseBackoff << shift + if d > emptyResponseMaxBackoff { + d = emptyResponseMaxBackoff + } + return d +} + // safeEnqueue attempts to enqueue a completed operation (block+witness) via the parent's channel. func (m *witnessManager) safeEnqueue(op *blockOrHeaderInject) { hash := op.hash() diff --git a/eth/fetcher/witness_manager_wit2_test.go b/eth/fetcher/witness_manager_wit2_test.go index 2ad0ed5ccf..d8701b4786 100644 --- a/eth/fetcher/witness_manager_wit2_test.go +++ b/eth/fetcher/witness_manager_wit2_test.go @@ -264,6 +264,60 @@ func TestVerifyAgainstSignedHashSkipsEncodeWhenNoSignedHash(t *testing.T) { } } +// TestEmptyResponseBacksOffToAvoidHammering pins the consumer-side mitigation +// for the WIT2 stateless regression. In an all-WIT2 fleet a stateless node +// always fetches the body from an announce-only relayer (no peer is ever +// marked as a body-holder), and the relayer answers "empty" until it has +// pulled+imported the block itself. The pre-fix code reset announce.time to +// time.Now() on every empty response, so the next tick re-fired ~gatherSlack +// later — a tight poll loop that hammered the single relayer hundreds of times +// (the ~15x "Empty response received" count seen on devnet) without ever +// shortening the wait. +// +// The fix keeps the first couple of retries fast (so the body is picked up the +// instant the relayer obtains it — the common case) and then backs off +// exponentially, capping the empty-poll rate without discarding the pending +// request (whose witness provably exists — a BP signed it). +func TestEmptyResponseBacksOffToAvoidHammering(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(606) + hash := block.Hash() + + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "relay-only", block: block}, + announce: blockAnnounceForTest("relay-only", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + emptyRes := func() *eth.Response { + return ð.Response{Time: time.Millisecond, Done: make(chan error, 1), Res: []*stateless.Witness{}} + } + + // Drive several consecutive empty responses, as an announce-only relayer + // that does not yet hold the body would produce. + var lastDelay time.Duration + for i := 0; i < 8; i++ { + tw.manager.processWitnessResponse("relay-only", hash, emptyRes(), time.Now()) + tw.manager.mu.Lock() + st := tw.manager.pending[hash] + if st == nil { + tw.manager.mu.Unlock() + t.Fatalf("pending entry dropped on empty response at attempt %d; a provably-existing witness must not be discarded", i) + } + lastDelay = time.Until(st.announce.time) + tw.manager.mu.Unlock() + } + + // After repeated empties the next retry must be deferred (backoff), not + // scheduled immediately. Pre-fix this is ~0 (tight hammering loop). + if lastDelay < 200*time.Millisecond { + t.Fatalf("expected empty-response backoff to defer the next retry after repeated empties; got delay=%v (no backoff → relayer is hammered)", lastDelay) + } +} + // TestProcessWitnessResponseEmptyDoesNotDropAnnounceOnlyPeer locks the // fast-path safety property: a peer that only saw the signed announce (and // has not yet imported the body) responds with empty bytes when asked. That diff --git a/eth/handler.go b/eth/handler.go index 366e8e46f4..98540fe880 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -206,8 +206,15 @@ type handler struct { // between block and announce gossip streams self-heals once the chain // catches up. deferredAnnounces *deferredAnnounceCache - wit2HeadCh chan core.ChainHeadEvent - wit2HeadSub event.Subscription + + // WIT2: peers that asked us for a witness body we did not yet hold (we + // answered GetWitness empty for a hash with a BP-signed announcement on + // file). When we obtain the body we push it straight to them, restoring + // the WIT1-style hand-off the fast announce removed. + witnessWaiters *witnessWaiterRegistry + + wit2HeadCh chan core.ChainHeadEvent + wit2HeadSub event.Subscription // channels for fetcher, syncer, txsyncLoop quitSync chan struct{} @@ -252,6 +259,7 @@ func newHandler(config *handlerConfig) (*handler, error) { pendingWitnessBodies: newPendingWitnessBodyCache(witnessBodyCacheCapacity), wit2PeerTracker: newPeerWit2Tracker(), deferredAnnounces: newDeferredAnnounceCache(deferredAnnounceCapacity), + witnessWaiters: newWitnessWaiterRegistry(), } log.Info("Sync with witnesses", "enabled", config.syncWithWitnesses) @@ -705,6 +713,7 @@ func (h *handler) deferredAnnouncesLoop() { } if ev.Header != nil { h.drainDeferredAnnouncesFor(ev.Header.Hash()) + h.flushWitnessWaitersForImported(ev.Header.Hash()) } case <-h.wit2HeadSub.Err(): return diff --git a/eth/handler_wit.go b/eth/handler_wit.go index f817044099..b8b304b448 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -103,6 +103,9 @@ func (h *witHandler) handleWitnessBroadcast(peer *wit.Peer, witness *stateless.W bodyHash := stateless.WitnessCommitHash(bodyBytes) if signed.WitnessHash == bodyHash { (*handler)(h).pendingWitnessBodies.put(hash, bodyBytes, bodyHash) + // We now hold servable bytes — push to any peer that asked us + // for this body before we had it. + (*handler)(h).pushWitnessToWaiters(hash, witness) } else { // Upstream sent bytes that don't match the BP-signed commitment. // Don't cache for serving and surface this peer as misbehaving. @@ -294,6 +297,18 @@ func (h *witHandler) handleGetWitness(peer *wit.Peer, req *wit.GetWitnessPacket) TotalPages: totalPages, } + // Body absent (neither in-flight cache nor chain storage) but a BP + // signed its hash, so the witness exists and is in flight: remember + // this peer as waiting so we push the body the moment we obtain it, + // instead of leaving it to re-poll us with empty GetWitness. This is + // what keeps WIT2 stateless consumers in lockstep at hop>=2 (see + // witnessWaiterRegistry). + if totalPages == 0 { + if _, hasSigned := (*handler)(h).signedWitnesses.get(witnessPage.Hash); hasSigned { + (*handler)(h).witnessWaiters.record(witnessPage.Hash, peer) + } + } + if witnessPage.Page < totalPages { witnessBytes, ok := witnessCache[witnessPage.Hash] if !ok { diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go index 363da1ad9a..33d5de9f65 100644 --- a/eth/handler_wit2.go +++ b/eth/handler_wit2.go @@ -14,6 +14,7 @@ import ( "github.com/ethereum/go-ethereum/eth/protocols/wit" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/rlp" ) var errInvalidSignatureLength = errors.New("invalid wit2 announce signature length") @@ -32,6 +33,7 @@ var ( wit2ConflictingWitnessHashMeter = metrics.NewRegisteredMeter("eth/wit2/announce/conflicting_witness_hash", nil) wit2RateLimitDropMeter = metrics.NewRegisteredMeter("eth/wit2/announce/rate_limit_drop", nil) wit2StrikeDisconnectMeter = metrics.NewRegisteredMeter("eth/wit2/announce/strike_disconnect", nil) + wit2WaiterPushMeter = metrics.NewRegisteredMeter("eth/wit2/serve/waiter_push", nil) ) // Per-peer rate-limit + strike tracker for wit2 announces. We size the bucket @@ -234,6 +236,179 @@ func (c *pendingWitnessBodyCache) gcLocked() { } } +const ( + // witnessWaiterHashCap bounds how many block hashes we track waiters for. + // Entries are tiny (a peer pointer + timestamp); the cap is a backstop + // against a peer asking for many distinct not-yet-available hashes. + witnessWaiterHashCap = 256 + + // witnessWaiterPerHashCap bounds waiters recorded per hash so a burst of + // distinct peers asking for the same not-yet-available witness can't grow a + // single bucket without bound. + witnessWaiterPerHashCap = 64 + + // witnessWaiterTTL drops stale waiter entries (peer gave up, disconnected, + // or obtained the body elsewhere). Aligned with the body cache TTL. + witnessWaiterTTL = 30 * time.Second +) + +// witnessWaiter records a peer that asked us for a witness body we did not yet +// have. We only record a waiter when a BP-signed announcement is on file for +// the hash, so the witness is known to exist and the registry is bounded by +// real, signed blocks rather than arbitrary peer-chosen hashes. +type witnessWaiter struct { + peer *wit.Peer + at time.Time +} + +// witnessWaiterRegistry tracks peers awaiting a witness body so we can push it +// to them the moment we obtain it. This restores the WIT1-style hand-off the +// WIT2 fast announce removed: WIT1 only ever announces a witness it already +// holds (and the announce marks the sender a body-holder), so a stateless +// consumer's first pull lands; WIT2 relays the signed announce ahead of the +// body, leaving the consumer to poll an announce-only relayer with repeated +// empty GetWitness until it catches up. Pushing on arrival closes that gap +// without flooding — at most one body per peer that actually asked, exactly the +// bandwidth a successful pull would have cost. +type witnessWaiterRegistry struct { + mu sync.Mutex + waiters map[common.Hash]map[string]*witnessWaiter +} + +func newWitnessWaiterRegistry() *witnessWaiterRegistry { + return &witnessWaiterRegistry{waiters: make(map[common.Hash]map[string]*witnessWaiter)} +} + +// record notes that peer is waiting for the body of hash. No-op for a nil peer. +func (r *witnessWaiterRegistry) record(hash common.Hash, peer *wit.Peer) { + if peer == nil { + return + } + r.mu.Lock() + defer r.mu.Unlock() + r.gcLocked() + + per, ok := r.waiters[hash] + if !ok { + if len(r.waiters) >= witnessWaiterHashCap { + // Registry full of distinct hashes; skip recording rather than + // evict. The peer simply keeps polling (with backoff) and lands the + // body on a later GetWitness — correctness is unaffected. + return + } + per = make(map[string]*witnessWaiter) + r.waiters[hash] = per + } + if _, exists := per[peer.ID()]; !exists && len(per) >= witnessWaiterPerHashCap { + return + } + per[peer.ID()] = &witnessWaiter{peer: peer, at: time.Now()} +} + +// has reports whether any non-expired waiter is recorded for hash. Used to skip +// the witness decode on the push path when nobody is waiting. +func (r *witnessWaiterRegistry) has(hash common.Hash) bool { + r.mu.Lock() + defer r.mu.Unlock() + per, ok := r.waiters[hash] + if !ok { + return false + } + cutoff := time.Now().Add(-witnessWaiterTTL) + for _, w := range per { + if !w.at.Before(cutoff) { + return true + } + } + return false +} + +// take returns and clears the live (non-expired) waiters for hash. +func (r *witnessWaiterRegistry) take(hash common.Hash) []*wit.Peer { + r.mu.Lock() + defer r.mu.Unlock() + per, ok := r.waiters[hash] + if !ok { + return nil + } + delete(r.waiters, hash) + cutoff := time.Now().Add(-witnessWaiterTTL) + out := make([]*wit.Peer, 0, len(per)) + for _, w := range per { + if w.at.Before(cutoff) { + continue + } + out = append(out, w.peer) + } + return out +} + +// gcLocked drops expired waiter entries and empty buckets. Caller holds r.mu. +func (r *witnessWaiterRegistry) gcLocked() { + cutoff := time.Now().Add(-witnessWaiterTTL) + for h, per := range r.waiters { + for id, w := range per { + if w.at.Before(cutoff) { + delete(per, id) + } + } + if len(per) == 0 { + delete(r.waiters, h) + } + } +} + +// pushWitnessToWaiters delivers the full witness body to peers that previously +// asked us for it and got an empty answer (we did not hold the body yet). The +// moment we obtain the bytes the waiting consumer receives them and imports, +// instead of continuing to poll us with empty GetWitness. +func (h *handler) pushWitnessToWaiters(hash common.Hash, witness *stateless.Witness) { + if h.witnessWaiters == nil || witness == nil { + return + } + for _, p := range h.witnessWaiters.take(hash) { + if p.KnownWitnessContainsHash(hash) { + continue // already delivered / known to hold it + } + p.AsyncSendNewWitness(witness) + wit2WaiterPushMeter.Mark(1) + } +} + +// flushWitnessWaitersForImported pushes a just-imported block's witness to any +// peer that asked us for it before we held it. This covers the dominant case +// the fetch/broadcast push hooks miss: a node (especially a full / producing +// node) that obtains the witness by generating it during native block import, +// rather than by pulling it or receiving a gossip broadcast. Called from the +// chain-head loop on every new head; cheap no-op when no peer is waiting. +func (h *handler) flushWitnessWaitersForImported(blockHash common.Hash) { + if h.witnessWaiters == nil || !h.witnessWaiters.has(blockHash) { + return + } + body := h.chain.GetWitness(blockHash) + if len(body) == 0 { + return + } + h.pushWitnessBytesToWaiters(blockHash, body) +} + +// pushWitnessBytesToWaiters decodes verified witness bytes (already checked +// against the BP-signed hash by the caller) and pushes them to waiting peers. +// The decode — re-encoded canonically on send — round-trips to the same bytes, +// so downstream byte-correctness checks still pass. Skipped entirely when no +// peer is waiting, so the common (no-waiter) case pays nothing. +func (h *handler) pushWitnessBytesToWaiters(hash common.Hash, witnessBytes []byte) { + if h.witnessWaiters == nil || len(witnessBytes) == 0 || !h.witnessWaiters.has(hash) { + return + } + var witness stateless.Witness + if err := rlp.DecodeBytes(witnessBytes, &witness); err != nil { + log.Warn("wit2: failed to decode witness bytes for waiter push", "hash", hash, "err", err) + return + } + h.pushWitnessToWaiters(hash, &witness) +} + // deferredAnnounceCapacity bounds how many header-unknown signed announcements // we hold while waiting for the corresponding block to arrive. Each entry is // ~200 bytes; the cap is sized for a worst-case stall window where the local @@ -550,6 +725,10 @@ func (h *handler) cacheVerifiedWitnessForServing(blockHash common.Hash, witnessB return } h.pendingWitnessBodies.put(blockHash, witnessBytes, witnessHash) + // We now hold servable bytes: hand them straight to any peer that asked for + // this body before we had it, so a stateless consumer stops polling us with + // empty GetWitness and imports immediately. + h.pushWitnessBytesToWaiters(blockHash, witnessBytes) } // signLocalWitnessAnnouncement looks up the witness body for blockHash, hashes diff --git a/eth/handler_wit2_test.go b/eth/handler_wit2_test.go index e36371f3de..d400937bf1 100644 --- a/eth/handler_wit2_test.go +++ b/eth/handler_wit2_test.go @@ -390,6 +390,121 @@ func TestSignedAnnounceDoesNotMarkPeerAsBodyHolder(t *testing.T) { } } +// TestEmptyGetWitnessForSignedHashPushesBodyOnArrival pins the serving-side +// cure for the WIT2 stateless regression. In an all-WIT2 fleet no peer is ever +// marked as a body-holder (the full-body broadcast is never sent and the WIT1 +// hash-announce that would mark it is not used between WIT2 peers), so a +// stateless consumer always fetches from an announce-only relayer that does not +// yet hold the body. The relayer answers GetWitness empty and the consumer is +// left polling. WIT1 stays in lockstep precisely because its hash-announce both +// implies the sender holds the body and marks it as a holder, so the first pull +// lands. +// +// The fix records the asking peer as "waiting" when we answer empty for a hash +// we hold a BP-signed announcement for (so we know the witness exists), and +// pushes the full body to those waiters the moment we obtain it — restoring the +// WIT1-style hand-off without flooding (only peers that actually asked, and at +// most one body each, exactly what a pull would have cost). +func TestEmptyGetWitnessForSignedHashPushesBodyOnArrival(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(7777)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + witness, err := stateless.NewWitness(header, nil) + require.NoError(t, err) + var buf bytes.Buffer + require.NoError(t, witness.EncodeRLP(&buf)) + bodyBytes := buf.Bytes() + witnessHash := stateless.WitnessCommitHash(bodyBytes) + + // We hold a BP-signed announcement for this hash (the witness provably + // exists) but not the body yet — neither in-flight cache nor chain storage. + h.handler.signedWitnesses.putIfNewer(wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: header.Number.Uint64(), + WitnessHash: witnessHash, + Signature: make([]byte, wit.SignatureLength), + }) + + // Peer asks for the body before we have it → empty response. This must + // register the peer as waiting for the body. + resp, err := witH.handleGetWitness(peer, &wit.GetWitnessPacket{ + RequestId: 1, + GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: []wit.WitnessPageRequest{{Hash: hash, Page: 0}}}, + }) + require.NoError(t, err) + require.Equal(t, 1, len(resp)) + require.Equal(t, uint64(0), resp[0].TotalPages, "precondition: body absent, must serve empty") + require.False(t, peer.KnownWitnessContainsHash(hash), "peer must not yet be treated as a body-holder") + + // Body arrives (our own paged fetch verified it, or a broadcast delivered + // it). Populating the serving cache must push the full body to the waiting + // peer so it imports immediately rather than re-polling us with empty + // GetWitness — which is the stateless lag we measured on devnet. + h.handler.cacheVerifiedWitnessForServing(hash, bodyBytes, witnessHash) + + require.True(t, peer.KnownWitnessContainsHash(hash), + "waiting peer was not pushed the witness body on arrival; stateless consumer keeps polling (the regression)") +} + +// TestFlushWitnessWaitersForImportedPushesFromChainStorage covers the dominant +// production path the fetch/broadcast push hooks miss: a full / producing node +// obtains a witness by generating it during native block import (it lands in +// chain storage, not the in-flight cache, and arrives via no gossip broadcast). +// The chain-head flush must still deliver it to a peer that asked before the +// node held it — this is what was missing in the first fix attempt, where +// stateless peers of a producing node (e.g. S1↔BP1) saw no lag improvement +// because BP1 never triggered a push. +func TestFlushWitnessWaitersForImportedPushesFromChainStorage(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(8888)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + witness, err := stateless.NewWitness(header, nil) + require.NoError(t, err) + var buf bytes.Buffer + require.NoError(t, witness.EncodeRLP(&buf)) + bodyBytes := buf.Bytes() + witnessHash := stateless.WitnessCommitHash(bodyBytes) + + h.handler.signedWitnesses.putIfNewer(wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: header.Number.Uint64(), + WitnessHash: witnessHash, + Signature: make([]byte, wit.SignatureLength), + }) + + // Peer asks before we hold the body → empty, registers as waiter. + _, err = witH.handleGetWitness(peer, &wit.GetWitnessPacket{ + RequestId: 1, + GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: []wit.WitnessPageRequest{{Hash: hash, Page: 0}}}, + }) + require.NoError(t, err) + require.False(t, peer.KnownWitnessContainsHash(hash)) + + // Native import: witness lands in chain storage only. The chain-head flush + // must push it to the waiting peer. + rawdb.WriteWitness(h.chain.DB(), hash, bodyBytes) + h.handler.flushWitnessWaitersForImported(hash) + + require.True(t, peer.KnownWitnessContainsHash(hash), + "chain-head flush did not push a natively-imported witness to the waiting peer") +} + // TestHandleGetWitnessServesFromInFlightCache is the load-bearing behavioral // test for the WIT2 pre-import serving claim: a node that has received the // witness body over gossip but has not yet imported it (chain storage empty) From 44d94580966fb6f6eedbe0ba96c707b462713e27 Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Tue, 9 Jun 2026 17:41:30 -0300 Subject: [PATCH 08/14] eth: pin multi-page witness reassembly dup-page gap with a documenting test TestReconstructWitness/DuplicatePageMisreconstructs documents that the multi-page path has no (hash,page) dedup: a duplicated page index satisfies the TotalPages count with a real page missing and reassembly fires over the wrong byte stream. Pinned for a future dedup fix; not fixed here (out of scope). See PR discussion. --- eth/peer_test.go | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/eth/peer_test.go b/eth/peer_test.go index 6f7c98d080..77443e1703 100644 --- a/eth/peer_test.go +++ b/eth/peer_test.go @@ -12,6 +12,7 @@ import ( "time" "github.com/stretchr/testify/assert" // import path where ethPeer lives + "github.com/stretchr/testify/require" "go.uber.org/mock/gomock" "github.com/ethereum/go-ethereum/common" @@ -648,6 +649,50 @@ func TestReconstructWitness(t *testing.T) { assert.Error(t, err) assert.Nil(t, reconstructed) }) + + // DuplicatePageMisreconstructs documents a gap in the multi-page witness + // path: reconstructWitness sorts by page index and concatenates without + // deduping, and its caller appends received pages to receivedWitPages + // (peer.go:542) with no (hash,page) dedup while triggering reassembly purely + // on len(pages)==TotalPages (peer.go:543). So if one page index arrives twice + // (e.g. an original request and a retry both land, or a peer replays a page), + // the count hits TotalPages with a real page still missing, reassembly fires + // over the duplicated/short byte stream, and the witness is reconstructed + // incorrectly — never as the original. A late duplicate instead trips the + // ">TotalPages" guard (peer.go:591) and jails the peer. This test pins the + // mis-reconstruction so a future dedup fix can be verified; it is not fixed + // here (out of scope for this PR). + t.Run("DuplicatePageMisreconstructs", func(t *testing.T) { + witness, _ := stateless.NewWitness(&types.Header{Number: big.NewInt(100)}, nil) + FillWitnessWithDeterministicRandomState(witness, 4*1024) + var buf bytes.Buffer + require.NoError(t, witness.EncodeRLP(&buf)) + witnessBytes := buf.Bytes() + + // Split into exactly two pages so [p0,p1] reconstructs the original. + half := (len(witnessBytes) + 1) / 2 + p0 := wit.WitnessPageResponse{Page: 0, TotalPages: 2, Hash: common.Hash{0x01}, Data: witnessBytes[:half]} + p1 := wit.WitnessPageResponse{Page: 1, TotalPages: 2, Hash: common.Hash{0x01}, Data: witnessBytes[half:]} + + p := ðPeer{Peer: eth.NewPeer(1, p2p.NewPeer(enode.ID{0x01}, "test", []p2p.Cap{}), nil, nil)} + + // Sanity: the correct page set reconstructs the original witness. + correct, err := p.reconstructWitness([]wit.WitnessPageResponse{p0, p1}) + require.NoError(t, err) + require.NotNil(t, correct) + + // Duplicate of page 0 (page 1 never arrives): len hits TotalPages=2, so + // the caller would trigger reassembly here. reconstructWitness must NOT + // silently yield the correct witness — it either errors or produces a + // different (wrong) reconstruction. + dup, dupErr := p.reconstructWitness([]wit.WitnessPageResponse{p0, p0}) + if dupErr == nil { + var dupBuf bytes.Buffer + require.NoError(t, dup.EncodeRLP(&dupBuf)) + assert.NotEqual(t, witnessBytes, dupBuf.Bytes(), + "duplicate page 0 must not reconstruct the original witness (no dedup)") + } + }) } // TestEthWitRequestClose tests the Close method of ethWitRequest From 858e218ff622c9237ff96e63e21be4cfda03ffd0 Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Tue, 9 Jun 2026 17:59:46 -0300 Subject: [PATCH 09/14] wit2: size-gate waiter push; verify-or-drop witness broadcasts Two hardening fixes on the witness broadcast/push path (convergence round 1): - pushWitnessToWaiters now refuses to full-push a witness whose canonical encoding exceeds witnessPushMaxSize (wit message cap minus envelope margin). The receiver enforces a 16MB cap on inbound wit messages, so an oversized NewWitness push would get this node dropped as a protocol violator by the very peers it is trying to help. Oversized witnesses stay on the paged pull path; the bytes are servable by the time a push could fire, so the waiter's backed-off poll succeeds. New meter: eth/wit2/serve/waiter_push_oversize. - handleWitnessBroadcast is now verify-or-drop. Bytes contradicting a BP-signed witnessHash on file are fully rejected (previously they skipped the serving cache but were still injected into the block fetcher and the sender was marked as a body-holder - a bypass of the byte verification the paged-fetch path enforces). With no signed announcement on file the broadcast is only accepted for a locally known block header, restoring the known-block binding the inline comment promised. AddKnownWitness moved to accept paths only. New meter: eth/wit2/serve/broadcast_unknown_header_drop. Tests: TestHandleWitnessBroadcastByteMismatchNotInjected, TestHandleWitnessBroadcastDropsUnknownHeader, TestWaiterPushSkipsOversizedWitness. --- eth/handler_wit.go | 76 +++++++++++++++--------- eth/handler_wit2.go | 37 +++++++++++- eth/handler_wit2_test.go | 124 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 207 insertions(+), 30 deletions(-) diff --git a/eth/handler_wit.go b/eth/handler_wit.go index b8b304b448..e8eb44854d 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -82,42 +82,64 @@ func (h *witHandler) Handle(peer *wit.Peer, packet wit.Packet) error { } } -// handleWitnessBroadcast handles a witness broadcast from a peer. +// handleWitnessBroadcast handles a witness broadcast from a peer. A broadcast +// witness is only accepted — sender marked as a body-holder, bytes cached, +// witness injected for import — when we can bind it to something we already +// trust: a BP-signed announcement whose witnessHash matches the received +// bytes (WIT2), or a locally known block header (WIT1 fallback). Anything +// else is dropped: bytes contradicting a BP-signed commitment are provably +// wrong and must not bypass the verification the paged-fetch path enforces, +// and an unsigned witness for an unknown header is unverifiable on the +// sender's say-so alone. func (h *witHandler) handleWitnessBroadcast(peer *wit.Peer, witness *stateless.Witness) error { - peer.AddKnownWitness(witness.Header().Hash()) hash := witness.Header().Hash() - // WIT2: cache the encoded body so this node can serve it pre-import. We - // only expose the cache for serving when bytes match a BP-signed - // witnessHash on file — otherwise an upstream that lied about the bytes - // would make us serve garbage and get dropped by downstream peers as - // liars, even though we just relayed what we received. If no signed - // announcement is on file (WIT1 path), skip the encode+hash entirely - // so WIT1 broadcasts don't pay the cost of work we'd just discard. + // WIT2: verify against the BP-signed witnessHash on file, then cache the + // encoded body so this node can serve it pre-import. We only expose the + // cache for serving when bytes match — otherwise an upstream that lied + // about the bytes would make us serve garbage and get dropped by + // downstream peers as liars, even though we just relayed what we received. if signed, hasSigned := (*handler)(h).signedWitnesses.get(hash); hasSigned { var buf bytes.Buffer if err := witness.EncodeRLP(&buf); err != nil { + // Can't re-encode → can't check the signed commitment. Treat as + // unverifiable rather than letting unchecked bytes through. peer.Log().Warn("wit2: failed to encode received witness", "hash", hash, "err", err) - } else { - bodyBytes := buf.Bytes() - bodyHash := stateless.WitnessCommitHash(bodyBytes) - if signed.WitnessHash == bodyHash { - (*handler)(h).pendingWitnessBodies.put(hash, bodyBytes, bodyHash) - // We now hold servable bytes — push to any peer that asked us - // for this body before we had it. - (*handler)(h).pushWitnessToWaiters(hash, witness) - } else { - // Upstream sent bytes that don't match the BP-signed commitment. - // Don't cache for serving and surface this peer as misbehaving. - wit2BroadcastByteMismatchMeter.Mark(1) - peer.Log().Warn("wit2: broadcast bytes do not match signed witnessHash; not caching for serving", - "blockHash", hash, "expected", signed.WitnessHash, "actual", bodyHash) - } + return nil + } + bodyBytes := buf.Bytes() + bodyHash := stateless.WitnessCommitHash(bodyBytes) + if signed.WitnessHash != bodyHash { + // Upstream sent bytes that don't match the BP-signed commitment. + // Don't cache, don't mark the sender as a body-holder, don't + // inject: the broadcast path must not be a bypass of the byte + // verification the paged-fetch path performs. No disconnect — the + // sender may itself have been fed bad bytes upstream. + wit2BroadcastByteMismatchMeter.Mark(1) + peer.Log().Warn("wit2: broadcast bytes do not match signed witnessHash; dropping", + "blockHash", hash, "expected", signed.WitnessHash, "actual", bodyHash) + return nil } + peer.AddKnownWitness(hash) + (*handler)(h).pendingWitnessBodies.put(hash, bodyBytes, bodyHash) + // We now hold servable bytes — push to any peer that asked us + // for this body before we had it. + (*handler)(h).pushWitnessToWaiters(hash, witness, len(bodyBytes)) } else { - // No signed announcement on file: WIT1 fallback. Don't expose for - // WIT2 pre-import serving since we cannot prove byte-correctness to - // downstream peers. The body still flows into the import path below. + // No signed announcement on file: WIT1 fallback. The only binding we + // can check is that the header belongs to a block we actually know — + // without it, an unsolicited 16MB body for an arbitrary hash would be + // decoded and cached purely on the sender's word. Drop silently: a + // peer racing ahead of our import is early, not malicious. + if h.Chain().GetHeaderByHash(hash) == nil { + wit2BroadcastUnknownHeaderDropMeter.Mark(1) + peer.Log().Debug("dropping witness broadcast for unknown header", "blockHash", hash) + return nil + } + peer.AddKnownWitness(hash) + // Header is known, but we cannot prove byte-correctness to downstream + // WIT2 peers — don't expose for pre-import serving. The body still + // flows into the import path below. wit2BroadcastUnverifiedSkippedMeter.Mark(1) } diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go index 33d5de9f65..2d1c64db8b 100644 --- a/eth/handler_wit2.go +++ b/eth/handler_wit2.go @@ -34,6 +34,8 @@ var ( wit2RateLimitDropMeter = metrics.NewRegisteredMeter("eth/wit2/announce/rate_limit_drop", nil) wit2StrikeDisconnectMeter = metrics.NewRegisteredMeter("eth/wit2/announce/strike_disconnect", nil) wit2WaiterPushMeter = metrics.NewRegisteredMeter("eth/wit2/serve/waiter_push", nil) + wit2WaiterPushOversizeMeter = metrics.NewRegisteredMeter("eth/wit2/serve/waiter_push_oversize", nil) + wit2BroadcastUnknownHeaderDropMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_unknown_header_drop", nil) ) // Per-peer rate-limit + strike tracker for wit2 announces. We size the bucket @@ -358,14 +360,35 @@ func (r *witnessWaiterRegistry) gcLocked() { } } +// witnessPushMaxSize caps the encoded size of a witness we full-push to +// waiting peers via NewWitness. The wit protocol rejects inbound messages +// larger than 16MB (wit.maxMessageSize), so pushing a bigger body would make +// every waiter drop us as a protocol violator — the paged GetWitness path +// exists precisely for those witnesses. The margin covers the NewWitnessPacket +// RLP envelope around the witness bytes. Oversized witnesses simply stay on +// the pull path: by the time any push could fire we hold servable bytes, so +// the waiter's next (backed-off) poll gets real pages instead of empty. +const witnessPushMaxSize = MaximumResponseSize - 64*1024 + // pushWitnessToWaiters delivers the full witness body to peers that previously // asked us for it and got an empty answer (we did not hold the body yet). The // moment we obtain the bytes the waiting consumer receives them and imports, -// instead of continuing to poll us with empty GetWitness. -func (h *handler) pushWitnessToWaiters(hash common.Hash, witness *stateless.Witness) { +// instead of continuing to poll us with empty GetWitness. encodedSize is the +// canonical RLP size of the witness, used to keep the push under the wit +// protocol message cap. +func (h *handler) pushWitnessToWaiters(hash common.Hash, witness *stateless.Witness, encodedSize int) { if h.witnessWaiters == nil || witness == nil { return } + if encodedSize > witnessPushMaxSize { + // Too large for a single NewWitness message — leave the waiters on + // the paged pull path (entries expire by TTL; the bytes are already + // servable, so their next poll succeeds). + wit2WaiterPushOversizeMeter.Mark(1) + log.Debug("wit2: witness too large for full push; serving via paged pull only", + "hash", hash, "size", encodedSize, "cap", witnessPushMaxSize) + return + } for _, p := range h.witnessWaiters.take(hash) { if p.KnownWitnessContainsHash(hash) { continue // already delivered / known to hold it @@ -401,12 +424,20 @@ func (h *handler) pushWitnessBytesToWaiters(hash common.Hash, witnessBytes []byt if h.witnessWaiters == nil || len(witnessBytes) == 0 || !h.witnessWaiters.has(hash) { return } + if len(witnessBytes) > witnessPushMaxSize { + // Skip the decode entirely — the push would be over the wit message + // cap anyway; waiters fall back to the paged pull path. + wit2WaiterPushOversizeMeter.Mark(1) + log.Debug("wit2: witness too large for full push; serving via paged pull only", + "hash", hash, "size", len(witnessBytes), "cap", witnessPushMaxSize) + return + } var witness stateless.Witness if err := rlp.DecodeBytes(witnessBytes, &witness); err != nil { log.Warn("wit2: failed to decode witness bytes for waiter push", "hash", hash, "err", err) return } - h.pushWitnessToWaiters(hash, &witness) + h.pushWitnessToWaiters(hash, &witness, len(witnessBytes)) } // deferredAnnounceCapacity bounds how many header-unknown signed announcements diff --git a/eth/handler_wit2_test.go b/eth/handler_wit2_test.go index d400937bf1..1641b391f5 100644 --- a/eth/handler_wit2_test.go +++ b/eth/handler_wit2_test.go @@ -837,3 +837,127 @@ func TestVerifyScheduledProducerRejectsBlockNumberMismatch(t *testing.T) { t.Fatal("with header present, headerAvailable must be true so the caller strikes the relayer") } } + +// TestHandleWitnessBroadcastByteMismatchNotInjected guards the verification +// boundary of the broadcast path: when a BP-signed witnessHash is on file and +// a broadcast body does NOT match it, the witness must be fully rejected — not +// cached for serving, sender not marked as a body-holder, and not injected +// into the fetcher. Anything less makes the full-body broadcast a bypass of +// the byte verification the paged-fetch path enforces. +func TestHandleWitnessBroadcastByteMismatchNotInjected(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(7778)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + witness, err := stateless.NewWitness(header, nil) + require.NoError(t, err) + + // Signed announcement on file commits to a DIFFERENT witnessHash than the + // broadcast bytes will hash to. + h.handler.signedWitnesses.putIfNewer(wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: header.Number.Uint64(), + WitnessHash: common.HexToHash("0xdeadbeef"), + Signature: make([]byte, wit.SignatureLength), + }) + + require.NoError(t, witH.handleWitnessBroadcast(peer, witness)) + + if _, _, ok := h.handler.pendingWitnessBodies.get(hash); ok { + t.Fatal("byte-mismatched broadcast populated the pre-import serving cache") + } + if peer.KnownWitnessContainsHash(hash) { + t.Fatal("byte-mismatched broadcast marked the sender as a body-holder; fetcher would pull garbage from it") + } +} + +// TestHandleWitnessBroadcastDropsUnknownHeader restores the F-3 audit fix: +// with no BP-signed announcement on file (WIT1 fallback), an unsolicited +// witness broadcast is only accepted for a block header we actually know. +// Without the gate, a peer can make us RLP-decode and inject arbitrary 16MB +// bodies keyed by hashes of its own choosing. +func TestHandleWitnessBroadcastDropsUnknownHeader(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + // Unknown header, no signed announcement → dropped, sender not marked. + unknown := &types.Header{Number: big.NewInt(424242)} + unknownWitness, err := stateless.NewWitness(unknown, nil) + require.NoError(t, err) + require.NoError(t, witH.handleWitnessBroadcast(peer, unknownWitness)) + if peer.KnownWitnessContainsHash(unknown.Hash()) { + t.Fatal("broadcast for unknown header was accepted; unsolicited bodies are cacheable on the sender's word") + } + + // Same broadcast for a locally known header → accepted (WIT1 path). + known := &types.Header{Number: big.NewInt(7779)} + rawdb.WriteHeader(h.chain.DB(), known) + knownWitness, err := stateless.NewWitness(known, nil) + require.NoError(t, err) + require.NoError(t, witH.handleWitnessBroadcast(peer, knownWitness)) + if !peer.KnownWitnessContainsHash(known.Hash()) { + t.Fatal("broadcast for known header was not accepted; WIT1 fallback broken") + } +} + +// TestWaiterPushSkipsOversizedWitness bounds the waiter-push cure for the +// stateless regression: a witness whose canonical encoding exceeds the wit +// protocol message cap must NOT be full-pushed via NewWitness — the receiver +// would reject the message as too large and drop us as a protocol violator. +// Oversized witnesses stay on the paged pull path (we hold servable bytes by +// the time a push could fire, so the waiter's backed-off poll succeeds). +func TestWaiterPushSkipsOversizedWitness(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(7780)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + witness, err := stateless.NewWitness(header, nil) + require.NoError(t, err) + FillWitnessWithDeterministicRandomState(witness, witnessPushMaxSize+1024*1024) + var buf bytes.Buffer + require.NoError(t, witness.EncodeRLP(&buf)) + bodyBytes := buf.Bytes() + require.Greater(t, len(bodyBytes), witnessPushMaxSize, "fixture must exceed the push cap") + witnessHash := stateless.WitnessCommitHash(bodyBytes) + + h.handler.signedWitnesses.putIfNewer(wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: header.Number.Uint64(), + WitnessHash: witnessHash, + Signature: make([]byte, wit.SignatureLength), + }) + + // Register the peer as a waiter: it asks for the body before we hold it. + resp, err := witH.handleGetWitness(peer, &wit.GetWitnessPacket{ + RequestId: 1, + GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: []wit.WitnessPageRequest{{Hash: hash, Page: 0}}}, + }) + require.NoError(t, err) + require.Equal(t, uint64(0), resp[0].TotalPages, "precondition: body absent, must serve empty") + + // Body arrives. The push must be skipped — encoded size is over the wit + // message cap — leaving the waiter on the paged pull path. + h.handler.cacheVerifiedWitnessForServing(hash, bodyBytes, witnessHash) + + if peer.KnownWitnessContainsHash(hash) { + t.Fatal("oversized witness was full-pushed via NewWitness; receiver would drop us as a protocol violator") + } +} From da2e09926378f69e080a4cc4c4343ee6ccce812e Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Tue, 9 Jun 2026 18:17:39 -0300 Subject: [PATCH 10/14] wit2: accept waiter-pushed witnesses for deferred-announce tip blocks The verify-or-drop hardening of handleWitnessBroadcast dropped the one NewWitness push that matters: a stateless consumer at the tip has, by definition, not imported the block it needs the witness for - so its header is unknown and the signed announce sits in deferredAnnounces (producer binding needs the header). Both acceptance paths missed and the pushed body was discarded, re-opening the stateless lag the push was added to cure (convergence round 2 catch). Add an import-only acceptance tier between the two: a broadcast whose bytes match a fresh deferred commitment is injected into the block fetcher so the pending block can import - import re-verifies everything via stateless execution and the state-root check - and the sender is marked a body-holder. It is NOT cached for serving, NOT promoted into signedWitnesses, and NOT relayed: those carry the verified-announce trust property, and a deferred entry's producer is unverified until the post-import drain checks it against the chain-validated header. Verifying against the header embedded in the pushed witness instead would let a peer self-seal a fabricated header and pass its own announce as the producer's. The deferred entry is read with a non-consuming peek so the drain still runs. Bytes contradicting the deferred commitment still drop. New meter: eth/wit2/serve/broadcast_deferred_import_only. Test: TestHandleWitnessBroadcastAcceptedWhileAnnounceDeferred. --- eth/handler_wit.go | 29 +++++++++++++++++ eth/handler_wit2.go | 16 +++++++++ eth/handler_wit2_test.go | 70 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) diff --git a/eth/handler_wit.go b/eth/handler_wit.go index e8eb44854d..a7c754c50a 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -125,6 +125,35 @@ func (h *witHandler) handleWitnessBroadcast(peer *wit.Peer, witness *stateless.W // We now hold servable bytes — push to any peer that asked us // for this body before we had it. (*handler)(h).pushWitnessToWaiters(hash, witness, len(bodyBytes)) + } else if deferred, hasDeferred := (*handler)(h).deferredAnnounces.peek(hash); hasDeferred { + // A signed announcement for this block is on file but still deferred: + // its producer-binding needs the block header, which a stateless node + // at the tip does not have yet — that is exactly the consumer-side + // state when a waiter push delivers the body for a block pending + // import. Bind the pushed bytes to the deferred commitment and, on + // match, accept for IMPORT ONLY: the witness flows to the block + // fetcher so the pending block can import (import re-verifies + // everything via stateless execution + state-root check). We do NOT + // cache for serving, do NOT promote into signedWitnesses, and do NOT + // relay — those carry the verified-announce trust property, and a + // deferred entry's producer is unverified until the post-import drain + // checks it against the chain-validated header. Verifying against the + // header embedded in the pushed witness instead would let a peer + // self-seal a fabricated header and pass its own announce as the + // producer's. + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + peer.Log().Warn("wit2: failed to encode received witness", "hash", hash, "err", err) + return nil + } + if stateless.WitnessCommitHash(buf.Bytes()) != deferred.WitnessHash { + wit2BroadcastByteMismatchMeter.Mark(1) + peer.Log().Warn("wit2: broadcast bytes do not match deferred announce witnessHash; dropping", + "blockHash", hash, "expected", deferred.WitnessHash) + return nil + } + peer.AddKnownWitness(hash) + wit2BroadcastDeferredImportMeter.Mark(1) } else { // No signed announcement on file: WIT1 fallback. The only binding we // can check is that the header belongs to a block we actually know — diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go index 2d1c64db8b..2b43a2c19f 100644 --- a/eth/handler_wit2.go +++ b/eth/handler_wit2.go @@ -36,6 +36,7 @@ var ( wit2WaiterPushMeter = metrics.NewRegisteredMeter("eth/wit2/serve/waiter_push", nil) wit2WaiterPushOversizeMeter = metrics.NewRegisteredMeter("eth/wit2/serve/waiter_push_oversize", nil) wit2BroadcastUnknownHeaderDropMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_unknown_header_drop", nil) + wit2BroadcastDeferredImportMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_deferred_import_only", nil) ) // Per-peer rate-limit + strike tracker for wit2 announces. We size the bucket @@ -579,6 +580,21 @@ func (c *deferredAnnounceCache) take(blockHash common.Hash) (*deferredAnnounceEn return e, true } +// peek returns the announcement for blockHash without consuming it, if a +// fresh entry exists. Used by the broadcast path to bind a pushed body to a +// pending (deferred, not yet producer-verified) announcement; the entry must +// stay in place so the post-import drain still runs the real producer +// verification, promotion, and relay. +func (c *deferredAnnounceCache) peek(blockHash common.Hash) (wit.SignedWitnessAnnouncement, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.entries[blockHash] + if !ok || time.Since(e.receivedAt) > wit2AnnounceTTL { + return wit.SignedWitnessAnnouncement{}, false + } + return e.announcement, true +} + // has reports whether a fresh entry exists for blockHash. Test-facing only; // production code uses take to ensure the entry is consumed. func (c *deferredAnnounceCache) has(blockHash common.Hash) bool { diff --git a/eth/handler_wit2_test.go b/eth/handler_wit2_test.go index 1641b391f5..165d2853fa 100644 --- a/eth/handler_wit2_test.go +++ b/eth/handler_wit2_test.go @@ -961,3 +961,73 @@ func TestWaiterPushSkipsOversizedWitness(t *testing.T) { t.Fatal("oversized witness was full-pushed via NewWitness; receiver would drop us as a protocol violator") } } + +// TestHandleWitnessBroadcastAcceptedWhileAnnounceDeferred pins the consumer +// side of the waiter-push cure. A stateless node at the tip has, by +// definition, NOT imported the block it needs the witness for — so its +// header is unknown and the signed announce for it sits in deferredAnnounces +// (producer-binding needs the header). A pushed body in that state must be +// accepted for import (else the push is dropped and the stateless-lag +// regression returns), but only for import: no serving cache, no promotion +// into signedWitnesses, no relay — the deferred announce's producer is +// unverified until the post-import drain checks it against the +// chain-validated header. The deferred entry must survive the broadcast so +// that drain can still run. +func TestHandleWitnessBroadcastAcceptedWhileAnnounceDeferred(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(9001)} + hash := header.Hash() + // Header deliberately NOT written: the consumer has not imported it. + + witness, err := stateless.NewWitness(header, nil) + require.NoError(t, err) + var buf bytes.Buffer + require.NoError(t, witness.EncodeRLP(&buf)) + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: header.Number.Uint64(), + WitnessHash: stateless.WitnessCommitHash(buf.Bytes()), + Signature: make([]byte, wit.SignatureLength), + } + h.handler.deferredAnnounces.put(ann, "upstream-peer") + if _, ok := h.handler.signedWitnesses.get(hash); ok { + t.Fatal("setup: announce must be deferred, not in signedWitnesses") + } + + require.NoError(t, witH.handleWitnessBroadcast(peer, witness)) + + if !peer.KnownWitnessContainsHash(hash) { + t.Fatal("waiter-pushed witness for a deferred-announce tip block was dropped; stateless-lag regression re-opened") + } + if _, _, ok := h.handler.pendingWitnessBodies.get(hash); ok { + t.Fatal("import-only acceptance must not populate the serving cache; deferred producer is unverified") + } + if _, ok := h.handler.signedWitnesses.get(hash); ok { + t.Fatal("import-only acceptance must not promote an unverified deferred announce into signedWitnesses") + } + if !h.handler.deferredAnnounces.has(hash) { + t.Fatal("deferred entry was consumed; post-import drain can no longer verify/promote/relay") + } + + // Bytes contradicting the deferred commitment must still drop. + other, err := stateless.NewWitness(&types.Header{Number: big.NewInt(9002), Extra: []byte{0x1}}, nil) + require.NoError(t, err) + otherHash := other.Header().Hash() + h.handler.deferredAnnounces.put(wit.SignedWitnessAnnouncement{ + BlockHash: otherHash, + BlockNumber: other.Header().Number.Uint64(), + WitnessHash: common.HexToHash("0xfeed"), + Signature: make([]byte, wit.SignatureLength), + }, "upstream-peer") + require.NoError(t, witH.handleWitnessBroadcast(peer, other)) + if peer.KnownWitnessContainsHash(otherHash) { + t.Fatal("bytes contradicting the deferred commitment were accepted") + } +} From eb0f462ab68084109558b300b014084e459581c8 Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Tue, 9 Jun 2026 18:35:41 -0300 Subject: [PATCH 11/14] wit2: fall back to the deferred-announce relayer for witness fetch At the stateless tip the deferred state is structural, not transient: a signed announce cannot be producer-verified before its block imports, the block cannot import without the witness, and a deferred (unverified) announce deliberately marks no peer announce-known. getOnePeerWithWitness therefore has no candidate, and for witnesses above the full-push size cap there is no NewWitness push either - leaving the consumer with no body source at all (convergence round 3 catch). resolveWitnessFetchPeer now falls back to the peer recorded on the fresh deferred entry (peek also returns the relayer ID): the relayer that announced the witness is on the propagation path and is exactly who a pull should target. Its bytes are NOT byte-checked against the deferred commitment - an unverified announcement must not be able to veto or bless data, else a Byzantine announce could reject the real witness pages. Import (stateless execution + state-root check) remains the verifier, exactly as on every WIT1 fetch. Test: TestResolveWitnessFetchPeerFallsBackToDeferredAnnouncer. --- eth/handler_eth.go | 26 ++++++++++++++++++++- eth/handler_wit.go | 2 +- eth/handler_wit2.go | 18 ++++++++------- eth/handler_wit_test.go | 51 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 10 deletions(-) diff --git a/eth/handler_eth.go b/eth/handler_eth.go index 5cddb3df39..dff53a0eb7 100644 --- a/eth/handler_eth.go +++ b/eth/handler_eth.go @@ -136,7 +136,7 @@ func (h *ethHandler) handleBlockAnnounces(peer *eth.Peer, hashes []common.Hash, func (h *ethHandler) createWitnessRequester() func(hash common.Hash, sink chan *eth.Response) (*eth.Request, error) { return func(hash common.Hash, sink chan *eth.Response) (*eth.Request, error) { // Get the ethPeer from the peerSet - ethPeer := h.peers.getOnePeerWithWitness(hash) + ethPeer := h.resolveWitnessFetchPeer(hash) if ethPeer == nil { return nil, fmt.Errorf("no peer with witness for hash %s is available", hash) } @@ -146,6 +146,30 @@ func (h *ethHandler) createWitnessRequester() func(hash common.Hash, sink chan * } } +// resolveWitnessFetchPeer picks a body-fetch target for hash. Marked peers +// win: getOnePeerWithWitness prefers a proven body-holder and falls back to +// an announce-known relayer. If neither exists, fall back to the peer that +// relayed a still-deferred signed announcement for the hash. At the +// stateless tip the deferred state is structural, not transient: the +// announce cannot be producer-verified before the block imports, the block +// cannot import without the witness, and the unverified announce marks no +// peer — so without this fallback a consumer whose witness exceeds the +// full-push size cap has no pull target at all. The deferred relayer is on +// the witness propagation path and is exactly who a pull should target. +// Its bytes are NOT trusted on the deferred commitment — an unverified +// announcement must not be able to veto or bless data — import (stateless +// execution + state-root check) remains the verifier, as on every WIT1 +// fetch. +func (h *ethHandler) resolveWitnessFetchPeer(hash common.Hash) *ethPeer { + if p := h.peers.getOnePeerWithWitness(hash); p != nil { + return p + } + if _, peerID, ok := (*handler)(h).deferredAnnounces.peek(hash); ok { + return h.peers.peer(peerID) + } + return nil +} + // verifyPageCount verifies the witness page count for a given block hash by // comparing it against random peers' reported page counts. // Returns true if the peer is honest (page count matches consensus), false otherwise. diff --git a/eth/handler_wit.go b/eth/handler_wit.go index a7c754c50a..a3e60c04dd 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -125,7 +125,7 @@ func (h *witHandler) handleWitnessBroadcast(peer *wit.Peer, witness *stateless.W // We now hold servable bytes — push to any peer that asked us // for this body before we had it. (*handler)(h).pushWitnessToWaiters(hash, witness, len(bodyBytes)) - } else if deferred, hasDeferred := (*handler)(h).deferredAnnounces.peek(hash); hasDeferred { + } else if deferred, _, hasDeferred := (*handler)(h).deferredAnnounces.peek(hash); hasDeferred { // A signed announcement for this block is on file but still deferred: // its producer-binding needs the block header, which a stateless node // at the tip does not have yet — that is exactly the consumer-side diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go index 2b43a2c19f..af0c036fff 100644 --- a/eth/handler_wit2.go +++ b/eth/handler_wit2.go @@ -580,19 +580,21 @@ func (c *deferredAnnounceCache) take(blockHash common.Hash) (*deferredAnnounceEn return e, true } -// peek returns the announcement for blockHash without consuming it, if a -// fresh entry exists. Used by the broadcast path to bind a pushed body to a -// pending (deferred, not yet producer-verified) announcement; the entry must -// stay in place so the post-import drain still runs the real producer -// verification, promotion, and relay. -func (c *deferredAnnounceCache) peek(blockHash common.Hash) (wit.SignedWitnessAnnouncement, bool) { +// peek returns the announcement for blockHash and the peer that relayed it, +// without consuming the entry, if a fresh one exists. Used by the broadcast +// path to bind a pushed body to a pending (deferred, not yet +// producer-verified) announcement, and by the fetch path to find a pull +// target when no marked peer exists. The entry must stay in place so the +// post-import drain still runs the real producer verification, promotion, +// and relay. +func (c *deferredAnnounceCache) peek(blockHash common.Hash) (wit.SignedWitnessAnnouncement, string, bool) { c.mu.RLock() defer c.mu.RUnlock() e, ok := c.entries[blockHash] if !ok || time.Since(e.receivedAt) > wit2AnnounceTTL { - return wit.SignedWitnessAnnouncement{}, false + return wit.SignedWitnessAnnouncement{}, "", false } - return e.announcement, true + return e.announcement, e.peerID, true } // has reports whether a fresh entry exists for blockHash. Test-facing only; diff --git a/eth/handler_wit_test.go b/eth/handler_wit_test.go index 25e5f6c29e..cc6c73a439 100644 --- a/eth/handler_wit_test.go +++ b/eth/handler_wit_test.go @@ -9,6 +9,7 @@ import ( "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/stateless" "github.com/ethereum/go-ethereum/core/types" + ethproto "github.com/ethereum/go-ethereum/eth/protocols/eth" "github.com/ethereum/go-ethereum/eth/protocols/wit" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/p2p" @@ -591,3 +592,53 @@ func TestWitHandlerHandle(t *testing.T) { require.NoError(t, err, "Handle should handle missing witness metadata gracefully") }) } + +// TestResolveWitnessFetchPeerFallsBackToDeferredAnnouncer covers the pull +// path for the structural deferred state at a stateless tip: the signed +// announce for a pending block cannot be producer-verified before import +// (header not local), so its relayer is never marked announce-known and +// getOnePeerWithWitness has no candidate. Witnesses above the full-push size +// cap are never pushed either, so without a deferred-aware fallback the +// consumer has NO body source at all and the block sits until the announce +// TTL expires. The fetch-peer resolution must fall back to the peer recorded +// on the deferred entry — the relayer that announced the witness. +func TestResolveWitnessFetchPeerFallsBackToDeferredAnnouncer(t *testing.T) { + h := newTestHandler() + defer h.close() + + ethH := (*ethHandler)(h.handler) + + witPeer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + var id enode.ID + rand.Read(id[:]) + ethPeer := ethproto.NewPeer(ethproto.ETH68, p2p.NewPeer(id, "test-eth-peer", nil), nil, nil) + defer ethPeer.Close() + require.NoError(t, h.handler.peers.registerPeer(ethPeer, nil, witPeer)) + + header := &types.Header{Number: big.NewInt(31337)} + hash := header.Hash() + + // No marked peer, no deferred entry → no fetch target. + if p := ethH.resolveWitnessFetchPeer(hash); p != nil { + t.Fatal("no peer should resolve without marks or deferred entries") + } + + // A deferred (header-unknown, producer-unverified) announce recorded from + // that peer must make it the fallback fetch target. + h.handler.deferredAnnounces.put(wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: header.Number.Uint64(), + WitnessHash: common.HexToHash("0xc0ffee"), + Signature: make([]byte, wit.SignatureLength), + }, ethPeer.ID()) + + p := ethH.resolveWitnessFetchPeer(hash) + if p == nil { + t.Fatal("deferred-announce relayer was not resolved as the fetch fallback; stateless tip consumer has no body source for oversized witnesses") + } + if p.ID() != ethPeer.ID() { + t.Fatalf("resolved wrong peer: got %s want %s", p.ID(), ethPeer.ID()) + } +} From 8c16b3974892b6ec1e8ddeb7cd054c289c8c0763 Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Tue, 9 Jun 2026 19:29:40 -0300 Subject: [PATCH 12/14] wit2: only the block producer may sign witness announcements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every node with an authorized signer (i.e. every validator) signed WIT2 announcements for every block it announced or cosent — including blocks other validators produced. Receivers bind announce-signer to the header sealer and strike on mismatch, so on a devnet with all-validator WIT2 peers, honest validators strike-disconnected each other roughly every 20s (observed: 21-32 strike_disconnects per validator per 8min run). The self-signed foreign announce was also cached in signedWitnesses, where it can shadow the producer's real announcement (putIfNewer dedups by block hash) and suppress its transitive relay. Gate the sign path on the same producer binding the receive side enforces: signLocalWitnessAnnouncement refuses any block the local signer did not seal (maySignAnnouncementForBlock, a thin wrapper over verifyScheduledProducer). For foreign blocks the announce and cosend paths fall back to the unsigned WIT1 hash announce, which is truthful — both paths are gated on HasWitness. --- eth/handler_wit2.go | 39 +++++++++++++++++++++++++------- eth/handler_wit2_test.go | 49 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 8 deletions(-) diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go index af0c036fff..727ddb2d8d 100644 --- a/eth/handler_wit2.go +++ b/eth/handler_wit2.go @@ -716,17 +716,16 @@ func verifySignedAnnouncement(ann wit.SignedWitnessAnnouncement) (common.Address // cosendWitnessAnnouncement co-sends a witness announcement to every peer // that just received the full block via the propagate=true fanout, provided // the peer doesn't already have the witness. WIT2 peers receive the signed -// variant; older peers receive the unsigned WIT1 announce. Skipped entirely -// when the local node hasn't yet stored the witness or doesn't have a -// signing key configured. +// variant when one is available — our own (we produced the block) or the +// producer's (relayed to us and cached). Otherwise, and for older peers, +// the unsigned WIT1 hash announce is sent: truthful, since this path is +// gated on HasWitness. Skipped entirely when the local node hasn't yet +// stored the witness. func (h *handler) cosendWitnessAnnouncement(blockHash common.Hash, blockNumber uint64, transfer []*ethPeer, staticAndTrustedPeers []*ethPeer) { if !h.chain.HasWitness(blockHash) { return } ann, hasSigned := h.signLocalWitnessAnnouncement(blockHash, blockNumber) - if !hasSigned { - return - } witnessRecipientsByID := make(map[string]*witPeer) for _, wp := range h.peers.peersWithoutWitness(blockHash) { witnessRecipientsByID[wp.Peer.ID()] = wp @@ -736,7 +735,7 @@ func (h *handler) cosendWitnessAnnouncement(blockHash common.Hash, blockNumber u if !ok { return } - if wp.Peer.Version() >= wit.WIT2 { + if hasSigned && wp.Peer.Version() >= wit.WIT2 { wp.Peer.AsyncSendSignedWitnessAnnouncement(ann) } else { wp.Peer.AsyncSendNewWitnessHash(blockHash, blockNumber) @@ -787,6 +786,7 @@ func (h *handler) cacheVerifiedWitnessForServing(blockHash common.Hash, witnessB // // Returns (announcement, true) on success. Returns (_, false) if any of: // - no signer configured (full node not producing blocks) +// - the local signer is not the sealer of blockHash (foreign block) // - witness bytes not yet stored in chain // - signing failed // @@ -802,7 +802,19 @@ func (h *handler) signLocalWitnessAnnouncement(blockHash common.Hash, blockNumbe if !ok { return wit.SignedWitnessAnnouncement{}, false } - if (borEngine.CurrentSigner() == common.Address{}) { + signer := borEngine.CurrentSigner() + if (signer == common.Address{}) { + return wit.SignedWitnessAnnouncement{}, false + } + // Only the producer of the block may sign its announcement. Receivers + // enforce announce-signer == header-sealer and strike-disconnect on a + // mismatch, so signing a foreign block guarantees rejection plus peer + // discipline against us — and caching the self-signed announce here + // would shadow the producer's real one (signedWitnesses dedups by + // blockHash), suppressing its transitive relay. For blocks we did not + // seal, the caller falls back to the unsigned WIT1 hash announce, which + // is truthful: every announce path is gated on HasWitness. + if !maySignAnnouncementForBlock(borEngine, h.chain.GetHeaderByHash(blockHash), signer, blockNumber, blockHash) { return wit.SignedWitnessAnnouncement{}, false } @@ -827,6 +839,17 @@ func (h *handler) signLocalWitnessAnnouncement(blockHash common.Hash, blockNumbe return ann, true } +// maySignAnnouncementForBlock reports whether the locally authorized signer +// sealed blockHash and is therefore the one party entitled to sign a WIT2 +// witness announcement for it. Same producer binding the receive side +// enforces (verifyScheduledProducer), applied at the origination side. A nil +// or number-mismatched header refuses: an announce we cannot bind locally +// must not be signed either. +func maySignAnnouncementForBlock(borEngine *bor.Bor, header *types.Header, localSigner common.Address, blockNumber uint64, blockHash common.Hash) bool { + ok, _ := verifyScheduledProducer(borEngine, header, localSigner, blockNumber, blockHash) + return ok +} + // canonicalWitnessHash reads the witness bytes for blockHash from chain // storage and returns the WIT2 chunked-aggregate commitment over those bytes. // Witness.EncodeRLP is now deterministic (state nodes sorted), so every newly diff --git a/eth/handler_wit2_test.go b/eth/handler_wit2_test.go index 165d2853fa..8295b3156f 100644 --- a/eth/handler_wit2_test.go +++ b/eth/handler_wit2_test.go @@ -8,11 +8,14 @@ import ( "time" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/consensus/bor" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/core/stateless" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/eth/protocols/wit" + "github.com/ethereum/go-ethereum/params" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -1031,3 +1034,49 @@ func TestHandleWitnessBroadcastAcceptedWhileAnnounceDeferred(t *testing.T) { t.Fatal("bytes contradicting the deferred commitment were accepted") } } + +// TestMaySignAnnouncementForBlockBindsToSealer is the regression for the +// honest-validator strike storm observed on the 2026-06-09 devnet: every node +// with an authorized signer (all validators) signed WIT2 announcements for +// *every* block it announced — including blocks other validators produced. +// Receivers enforce announce-signer == header-sealer and strike on mismatch, +// so honest validators repeatedly strike-disconnected each other (~3/min), +// and the self-signed foreign announce could shadow the producer's real one +// in signedWitnesses (putIfNewer dedups by blockHash), suppressing its +// transitive relay. The sign path must refuse any block the local signer did +// not seal; WIT1 unsigned announces remain the fallback for foreign blocks. +func TestMaySignAnnouncementForBlockBindsToSealer(t *testing.T) { + engine := bor.New(params.BorUnittestChainConfig, rawdb.NewMemoryDatabase(), + nil, nil, nil, nil, nil, false, time.Second, vm.Config{}) + defer engine.Close() + + producerKey, err := crypto.GenerateKey() + require.NoError(t, err) + producer := crypto.PubkeyToAddress(producerKey.PublicKey) + + header := &types.Header{ + Number: big.NewInt(200), + Difficulty: big.NewInt(1), + Extra: make([]byte, 32+65), + } + sig, err := crypto.Sign(bor.SealHash(header, params.BorUnittestChainConfig.Bor).Bytes(), producerKey) + require.NoError(t, err) + copy(header.Extra[len(header.Extra)-65:], sig) + + require.True(t, + maySignAnnouncementForBlock(engine, header, producer, 200, header.Hash()), + "the sealer of the block must be allowed to sign its announcement") + + other := common.HexToAddress("0x0000000000000000000000000000000000000bad") + require.False(t, + maySignAnnouncementForBlock(engine, header, other, 200, header.Hash()), + "a signer that did not seal the block must not sign an announcement for it; receivers strike on signer != producer") + + require.False(t, + maySignAnnouncementForBlock(engine, nil, producer, 200, header.Hash()), + "without a local header the producer binding is unverifiable; do not sign") + + require.False(t, + maySignAnnouncementForBlock(engine, header, producer, 201, header.Hash()), + "announce blockNumber must match the local header") +} From c7b4be020c5910ca7dd0a32ed3205ab440759769 Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Wed, 10 Jun 2026 11:04:56 -0300 Subject: [PATCH 13/14] =?UTF-8?q?wit2:=20address=20CI=20quality=20gates=20?= =?UTF-8?q?=E2=80=94=20coverage,=20duplication,=20diffguard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch coverage 64.75% -> ~91% (codecov target 90%): - new unit tests for the wit2 caches/registries (rate-limit tracker, waiter registry caps/expiry, deferred-announce cache lifecycle, signed-witness cache), the announce receive path (accept/dedup/relay, rate-limit drop, number-mismatch strike), the cosend and BroadcastBlock per-version announce split, the chain-head deferred drain, and the wit wire path (signed-announce round-trip, malformed-packet rejection, queue-full drop) - codecov: ignore eth/peer_mock.go (generated gomock, like the other ignored mocks) Sonar new-code duplication 4.9% -> ~2% (threshold 3%): - shared fixtures in witness_manager_wit2_test.go (primePendingWitness, witnessResponse, encodedCommitHash), handler_wit2_test.go (persistedSignedWitness, requestFirstWitnessPage), and witness_bench_test.go (benchWitnessSizes) diffguard: - split handler_wit2.go (988 lines) into handler_wit2.go + handler_wit2_bodies.go + handler_wit2_announces.go, all under the 500-line threshold - handleWitnessBroadcast (96 lines, complexity 17) extracted into three accept-path helpers; deferredAnnounceCache.put eviction scan extracted (complexity 13 -> under 10) No behavioral changes outside test code: the broadcast/eviction refactors are mechanical extractions covered by the existing and new tests. --- codecov.yml | 1 + core/stateless/witness_bench_test.go | 59 +- eth/fetcher/witness_manager_wit2_test.go | 172 +++--- eth/handler_wit.go | 172 +++--- eth/handler_wit2.go | 579 ------------------- eth/handler_wit2_announces.go | 356 ++++++++++++ eth/handler_wit2_bodies.go | 246 ++++++++ eth/handler_wit2_caches_test.go | 682 +++++++++++++++++++++++ eth/handler_wit2_test.go | 112 ++-- eth/protocols/wit/peer_wit2_test.go | 159 ++++++ 10 files changed, 1691 insertions(+), 847 deletions(-) create mode 100644 eth/handler_wit2_announces.go create mode 100644 eth/handler_wit2_bodies.go create mode 100644 eth/handler_wit2_caches_test.go create mode 100644 eth/protocols/wit/peer_wit2_test.go diff --git a/codecov.yml b/codecov.yml index 7295226d7a..c58dd10403 100644 --- a/codecov.yml +++ b/codecov.yml @@ -19,4 +19,5 @@ comment: ignore: - "consensus/bor/genesis_contract_mock.go" - "consensus/bor/span_mock.go" + - "eth/peer_mock.go" diff --git a/core/stateless/witness_bench_test.go b/core/stateless/witness_bench_test.go index caeedd79a5..499f1d103f 100644 --- a/core/stateless/witness_bench_test.go +++ b/core/stateless/witness_bench_test.go @@ -31,16 +31,12 @@ type discardWriter struct{} func (discardWriter) Write(p []byte) (int, error) { return len(p), nil } -// BenchmarkWitnessKeccakBySize measures the throughput of keccak256 over a -// pre-allocated witness-sized buffer. This is the cost the producer pays to -// compute WitnessHash on the WIT2 announce path (and the cost a relayer or -// requester pays to verify response bytes against the BP-signed WitnessHash). -// -// Run with `go test -bench=BenchmarkWitnessKeccakBySize ./core/stateless/`. -// b.SetBytes lets `go test -benchmem` print throughput in MB/s alongside ns/op, -// which is what we actually want to know — the absolute size of any one -// witness varies, but per-byte cost scales linearly. -func BenchmarkWitnessKeccakBySize(b *testing.B) { +// benchWitnessSizes runs fn as a sub-benchmark per representative witness +// size, handing it a random buffer of that size. b.SetBytes lets +// `go test -benchmem` print throughput in MB/s alongside ns/op, which is what +// we actually want to know — the absolute size of any one witness varies, but +// per-byte cost scales linearly. +func benchWitnessSizes(b *testing.B, fn func(b *testing.B, buf []byte)) { for _, sizeMiB := range []int{1, 5, 15, 30, 50} { size := sizeMiB << 20 buf := make([]byte, size) @@ -50,13 +46,25 @@ func BenchmarkWitnessKeccakBySize(b *testing.B) { b.Run(fmt.Sprintf("%dMiB", sizeMiB), func(b *testing.B) { b.SetBytes(int64(size)) b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = crypto.Keccak256Hash(buf) - } + fn(b, buf) }) } } +// BenchmarkWitnessKeccakBySize measures the throughput of keccak256 over a +// pre-allocated witness-sized buffer. This is the cost the producer pays to +// compute WitnessHash on the WIT2 announce path (and the cost a relayer or +// requester pays to verify response bytes against the BP-signed WitnessHash). +// +// Run with `go test -bench=BenchmarkWitnessKeccakBySize ./core/stateless/`. +func BenchmarkWitnessKeccakBySize(b *testing.B) { + benchWitnessSizes(b, func(b *testing.B, buf []byte) { + for i := 0; i < b.N; i++ { + _ = crypto.Keccak256Hash(buf) + } + }) +} + // BenchmarkWitnessAnnounceSign measures the marginal ECDSA cost of signing the // 32-byte announcement digest, independent of witness size. This isolates the // secp256k1 sign cost from the keccak cost so a single number per platform is @@ -89,22 +97,13 @@ func BenchmarkWitnessHashAndSignCombined(b *testing.B) { if err != nil { b.Fatalf("key: %v", err) } - for _, sizeMiB := range []int{1, 5, 15, 30, 50} { - size := sizeMiB << 20 - buf := make([]byte, size) - if _, err := rand.Read(buf); err != nil { - b.Fatalf("rand: %v", err) - } - b.Run(fmt.Sprintf("%dMiB", sizeMiB), func(b *testing.B) { - b.SetBytes(int64(size)) - b.ResetTimer() - for i := 0; i < b.N; i++ { - witnessHash := crypto.Keccak256Hash(buf) - digest := crypto.Keccak256Hash(witnessHash[:], []byte{0x01, 0x02, 0x03, 0x04}) - if _, err := crypto.Sign(digest[:], key); err != nil { - b.Fatalf("sign: %v", err) - } + benchWitnessSizes(b, func(b *testing.B, buf []byte) { + for i := 0; i < b.N; i++ { + witnessHash := crypto.Keccak256Hash(buf) + digest := crypto.Keccak256Hash(witnessHash[:], []byte{0x01, 0x02, 0x03, 0x04}) + if _, err := crypto.Sign(digest[:], key); err != nil { + b.Fatalf("sign: %v", err) } - }) - } + } + }) } diff --git a/eth/fetcher/witness_manager_wit2_test.go b/eth/fetcher/witness_manager_wit2_test.go index d8701b4786..25903393c5 100644 --- a/eth/fetcher/witness_manager_wit2_test.go +++ b/eth/fetcher/witness_manager_wit2_test.go @@ -8,6 +8,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/eth/protocols/eth" ) @@ -25,6 +26,51 @@ func blockAnnounceForTest(origin string, hash common.Hash, number uint64) *block } } +// primePendingWitness seeds manager.pending with a request state for the +// block under the given origin, exactly as the announce → request flow would. +func primePendingWitness(tw *testWitnessManager, origin string, block *types.Block) { + tw.manager.mu.Lock() + tw.manager.pending[block.Hash()] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: origin, block: block}, + announce: blockAnnounceForTest(origin, block.Hash(), block.NumberU64()), + } + tw.manager.mu.Unlock() +} + +// witnessResponse wraps witnesses in a synthetic eth.Response, as the request +// dispatcher would deliver them. Call with no arguments for an empty response +// (peer does not hold the body). +func witnessResponse(witnesses ...*stateless.Witness) *eth.Response { + return ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: witnesses, + } +} + +// encodedCommitHash returns the WIT2 commit hash over the witness's canonical +// RLP encoding — the value a BP would sign for this witness. +func encodedCommitHash(t *testing.T, witness *stateless.Witness) common.Hash { + t.Helper() + + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + t.Fatalf("encode: %v", err) + } + return stateless.WitnessCommitHash(buf.Bytes()) +} + +// requireNoDroppedPeers fails the test when any peer was drop-disconnected. +func requireNoDroppedPeers(t *testing.T, tw *testWitnessManager, context string) { + t.Helper() + + tw.mu.Lock() + defer tw.mu.Unlock() + if len(tw.droppedPeers) != 0 { + t.Fatalf("%s; drops=%v", context, tw.droppedPeers) + } +} + // TestProcessWitnessResponseDoesNotDropOnByteMismatch encodes the post- // adversarial-review safety policy: when the served witness bytes do not // match the BP-signed witnessHash on file, the manager must back off and @@ -60,26 +106,11 @@ func TestProcessWitnessResponseDoesNotDropOnByteMismatch(t *testing.T) { return common.Hash{}, false } - tw.manager.mu.Lock() - tw.manager.pending[hash] = &witnessRequestState{ - op: &blockOrHeaderInject{origin: "honest", block: block}, - announce: blockAnnounceForTest("honest", hash, block.NumberU64()), - } - tw.manager.mu.Unlock() + primePendingWitness(tw, "honest", block) - res := ð.Response{ - Time: time.Millisecond, - Done: make(chan error, 1), - Res: []*stateless.Witness{canonical}, - } - - tw.manager.processWitnessResponse("honest-server", hash, res, time.Now()) + tw.manager.processWitnessResponse("honest-server", hash, witnessResponse(canonical), time.Now()) - tw.mu.Lock() - defer tw.mu.Unlock() - if len(tw.droppedPeers) != 0 { - t.Fatalf("byte-server must not be dropped on signed-hash mismatch (BP may have signed bogus); drops=%v", tw.droppedPeers) - } + requireNoDroppedPeers(t, tw, "byte-server must not be dropped on signed-hash mismatch (BP may have signed bogus)") } // TestProcessWitnessResponseAcceptsMatchingHash is the contrapositive: a @@ -91,38 +122,18 @@ func TestProcessWitnessResponseAcceptsMatchingHash(t *testing.T) { defer tw.Close() block := createTestBlock(101) - hash := block.Hash() witness := createTestWitnessForBlock(block) - var buf bytes.Buffer - if err := witness.EncodeRLP(&buf); err != nil { - t.Fatalf("encode: %v", err) - } - matchingHash := stateless.WitnessCommitHash(buf.Bytes()) + matchingHash := encodedCommitHash(t, witness) tw.manager.parentSignedWitnessHash = func(h common.Hash) (common.Hash, bool) { return matchingHash, true } - tw.manager.mu.Lock() - tw.manager.pending[hash] = &witnessRequestState{ - op: &blockOrHeaderInject{origin: "honest", block: block}, - announce: blockAnnounceForTest("honest", hash, block.NumberU64()), - } - tw.manager.mu.Unlock() - - res := ð.Response{ - Time: time.Millisecond, - Done: make(chan error, 1), - Res: []*stateless.Witness{witness}, - } + primePendingWitness(tw, "honest", block) - tw.manager.processWitnessResponse("honest", hash, res, time.Now()) + tw.manager.processWitnessResponse("honest", block.Hash(), witnessResponse(witness), time.Now()) - tw.mu.Lock() - defer tw.mu.Unlock() - if len(tw.droppedPeers) != 0 { - t.Fatalf("honest peer must not be dropped on hash match; drops=%v", tw.droppedPeers) - } + requireNoDroppedPeers(t, tw, "honest peer must not be dropped on hash match") } // TestProcessWitnessResponseCachesForServingAfterByteCheck is the regression @@ -138,11 +149,7 @@ func TestProcessWitnessResponseCachesForServingAfterByteCheck(t *testing.T) { block := createTestBlock(202) hash := block.Hash() witness := createTestWitnessForBlock(block) - var buf bytes.Buffer - if err := witness.EncodeRLP(&buf); err != nil { - t.Fatalf("encode: %v", err) - } - want := stateless.WitnessCommitHash(buf.Bytes()) + want := encodedCommitHash(t, witness) var ( gotBlock common.Hash @@ -161,20 +168,9 @@ func TestProcessWitnessResponseCachesForServingAfterByteCheck(t *testing.T) { return common.Hash{}, false } - tw.manager.mu.Lock() - tw.manager.pending[hash] = &witnessRequestState{ - op: &blockOrHeaderInject{origin: "honest", block: block}, - announce: blockAnnounceForTest("honest", hash, block.NumberU64()), - } - tw.manager.mu.Unlock() - - res := ð.Response{ - Time: time.Millisecond, - Done: make(chan error, 1), - Res: []*stateless.Witness{witness}, - } + primePendingWitness(tw, "honest", block) - tw.manager.processWitnessResponse("honest", hash, res, time.Now()) + tw.manager.processWitnessResponse("honest", hash, witnessResponse(witness), time.Now()) if gotBlock != hash { t.Fatalf("cache callback not invoked or wrong blockHash: got %s want %s", gotBlock.Hex(), hash.Hex()) @@ -196,7 +192,6 @@ func TestProcessWitnessResponseSkipsCheckWhenNoSignature(t *testing.T) { defer tw.Close() block := createTestBlock(101) - hash := block.Hash() witness := createTestWitnessForBlock(block) // No lookup configured → skip path. @@ -204,26 +199,11 @@ func TestProcessWitnessResponseSkipsCheckWhenNoSignature(t *testing.T) { return common.Hash{}, false } - tw.manager.mu.Lock() - tw.manager.pending[hash] = &witnessRequestState{ - op: &blockOrHeaderInject{origin: "wit1-peer", block: block}, - announce: blockAnnounceForTest("wit1-peer", hash, block.NumberU64()), - } - tw.manager.mu.Unlock() + primePendingWitness(tw, "wit1-peer", block) - res := ð.Response{ - Time: time.Millisecond, - Done: make(chan error, 1), - Res: []*stateless.Witness{witness}, - } - - tw.manager.processWitnessResponse("wit1-peer", hash, res, time.Now()) + tw.manager.processWitnessResponse("wit1-peer", block.Hash(), witnessResponse(witness), time.Now()) - tw.mu.Lock() - defer tw.mu.Unlock() - if len(tw.droppedPeers) != 0 { - t.Fatalf("WIT1 fallback must not drop any peer; drops=%v", tw.droppedPeers) - } + requireNoDroppedPeers(t, tw, "WIT1 fallback must not drop any peer") } // TestVerifyAgainstSignedHashSkipsEncodeWhenNoSignedHash is the regression @@ -285,22 +265,13 @@ func TestEmptyResponseBacksOffToAvoidHammering(t *testing.T) { block := createTestBlock(606) hash := block.Hash() - tw.manager.mu.Lock() - tw.manager.pending[hash] = &witnessRequestState{ - op: &blockOrHeaderInject{origin: "relay-only", block: block}, - announce: blockAnnounceForTest("relay-only", hash, block.NumberU64()), - } - tw.manager.mu.Unlock() - - emptyRes := func() *eth.Response { - return ð.Response{Time: time.Millisecond, Done: make(chan error, 1), Res: []*stateless.Witness{}} - } + primePendingWitness(tw, "relay-only", block) // Drive several consecutive empty responses, as an announce-only relayer // that does not yet hold the body would produce. var lastDelay time.Duration for i := 0; i < 8; i++ { - tw.manager.processWitnessResponse("relay-only", hash, emptyRes(), time.Now()) + tw.manager.processWitnessResponse("relay-only", hash, witnessResponse(), time.Now()) tw.manager.mu.Lock() st := tw.manager.pending[hash] if st == nil { @@ -334,24 +305,9 @@ func TestProcessWitnessResponseEmptyDoesNotDropAnnounceOnlyPeer(t *testing.T) { block := createTestBlock(404) hash := block.Hash() - tw.manager.mu.Lock() - tw.manager.pending[hash] = &witnessRequestState{ - op: &blockOrHeaderInject{origin: "announce-only", block: block}, - announce: blockAnnounceForTest("announce-only", hash, block.NumberU64()), - } - tw.manager.mu.Unlock() + primePendingWitness(tw, "announce-only", block) - res := ð.Response{ - Time: time.Millisecond, - Done: make(chan error, 1), - Res: []*stateless.Witness{}, // empty/unavailable - } + tw.manager.processWitnessResponse("announce-only", hash, witnessResponse(), time.Now()) - tw.manager.processWitnessResponse("announce-only", hash, res, time.Now()) - - tw.mu.Lock() - defer tw.mu.Unlock() - if len(tw.droppedPeers) != 0 { - t.Fatalf("empty response must NOT drop the responder; drops=%v", tw.droppedPeers) - } + requireNoDroppedPeers(t, tw, "empty response must NOT drop the responder") } diff --git a/eth/handler_wit.go b/eth/handler_wit.go index a3e60c04dd..943598cce7 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -94,82 +94,17 @@ func (h *witHandler) Handle(peer *wit.Peer, packet wit.Packet) error { func (h *witHandler) handleWitnessBroadcast(peer *wit.Peer, witness *stateless.Witness) error { hash := witness.Header().Hash() - // WIT2: verify against the BP-signed witnessHash on file, then cache the - // encoded body so this node can serve it pre-import. We only expose the - // cache for serving when bytes match — otherwise an upstream that lied - // about the bytes would make us serve garbage and get dropped by - // downstream peers as liars, even though we just relayed what we received. + var accepted bool if signed, hasSigned := (*handler)(h).signedWitnesses.get(hash); hasSigned { - var buf bytes.Buffer - if err := witness.EncodeRLP(&buf); err != nil { - // Can't re-encode → can't check the signed commitment. Treat as - // unverifiable rather than letting unchecked bytes through. - peer.Log().Warn("wit2: failed to encode received witness", "hash", hash, "err", err) - return nil - } - bodyBytes := buf.Bytes() - bodyHash := stateless.WitnessCommitHash(bodyBytes) - if signed.WitnessHash != bodyHash { - // Upstream sent bytes that don't match the BP-signed commitment. - // Don't cache, don't mark the sender as a body-holder, don't - // inject: the broadcast path must not be a bypass of the byte - // verification the paged-fetch path performs. No disconnect — the - // sender may itself have been fed bad bytes upstream. - wit2BroadcastByteMismatchMeter.Mark(1) - peer.Log().Warn("wit2: broadcast bytes do not match signed witnessHash; dropping", - "blockHash", hash, "expected", signed.WitnessHash, "actual", bodyHash) - return nil - } - peer.AddKnownWitness(hash) - (*handler)(h).pendingWitnessBodies.put(hash, bodyBytes, bodyHash) - // We now hold servable bytes — push to any peer that asked us - // for this body before we had it. - (*handler)(h).pushWitnessToWaiters(hash, witness, len(bodyBytes)) + accepted = h.acceptSignedBroadcast(peer, witness, hash, signed.WitnessHash) } else if deferred, _, hasDeferred := (*handler)(h).deferredAnnounces.peek(hash); hasDeferred { - // A signed announcement for this block is on file but still deferred: - // its producer-binding needs the block header, which a stateless node - // at the tip does not have yet — that is exactly the consumer-side - // state when a waiter push delivers the body for a block pending - // import. Bind the pushed bytes to the deferred commitment and, on - // match, accept for IMPORT ONLY: the witness flows to the block - // fetcher so the pending block can import (import re-verifies - // everything via stateless execution + state-root check). We do NOT - // cache for serving, do NOT promote into signedWitnesses, and do NOT - // relay — those carry the verified-announce trust property, and a - // deferred entry's producer is unverified until the post-import drain - // checks it against the chain-validated header. Verifying against the - // header embedded in the pushed witness instead would let a peer - // self-seal a fabricated header and pass its own announce as the - // producer's. - var buf bytes.Buffer - if err := witness.EncodeRLP(&buf); err != nil { - peer.Log().Warn("wit2: failed to encode received witness", "hash", hash, "err", err) - return nil - } - if stateless.WitnessCommitHash(buf.Bytes()) != deferred.WitnessHash { - wit2BroadcastByteMismatchMeter.Mark(1) - peer.Log().Warn("wit2: broadcast bytes do not match deferred announce witnessHash; dropping", - "blockHash", hash, "expected", deferred.WitnessHash) - return nil - } - peer.AddKnownWitness(hash) - wit2BroadcastDeferredImportMeter.Mark(1) + accepted = h.acceptDeferredBroadcast(peer, witness, hash, deferred.WitnessHash) } else { - // No signed announcement on file: WIT1 fallback. The only binding we - // can check is that the header belongs to a block we actually know — - // without it, an unsolicited 16MB body for an arbitrary hash would be - // decoded and cached purely on the sender's word. Drop silently: a - // peer racing ahead of our import is early, not malicious. - if h.Chain().GetHeaderByHash(hash) == nil { - wit2BroadcastUnknownHeaderDropMeter.Mark(1) - peer.Log().Debug("dropping witness broadcast for unknown header", "blockHash", hash) - return nil - } - peer.AddKnownWitness(hash) - // Header is known, but we cannot prove byte-correctness to downstream - // WIT2 peers — don't expose for pre-import serving. The body still - // flows into the import path below. - wit2BroadcastUnverifiedSkippedMeter.Mark(1) + accepted = h.acceptUnsignedBroadcast(peer, hash) + } + + if !accepted { + return nil } // Inject the witness into the block fetcher's cache @@ -188,6 +123,97 @@ func (h *witHandler) handleWitnessBroadcast(peer *wit.Peer, witness *stateless.W return nil } +// encodedBroadcastBytes canonically re-encodes a broadcast witness so its +// bytes can be checked against a signed commitment. Returns ok=false when the +// witness cannot be re-encoded — the caller must treat the broadcast as +// unverifiable rather than letting unchecked bytes through. +func encodedBroadcastBytes(peer *wit.Peer, witness *stateless.Witness, hash common.Hash) ([]byte, bool) { + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + peer.Log().Warn("wit2: failed to encode received witness", "hash", hash, "err", err) + return nil, false + } + return buf.Bytes(), true +} + +// acceptSignedBroadcast is the WIT2 accept path of the witness broadcast: +// verify against the BP-signed witnessHash on file, then cache the encoded +// body so this node can serve it pre-import. We only expose the cache for +// serving when bytes match — otherwise an upstream that lied about the bytes +// would make us serve garbage and get dropped by downstream peers as liars, +// even though we just relayed what we received. On mismatch nothing is +// cached, the sender is not marked as a body-holder, and the witness is not +// injected: the broadcast path must not be a bypass of the byte verification +// the paged-fetch path performs. No disconnect — the sender may itself have +// been fed bad bytes upstream. +func (h *witHandler) acceptSignedBroadcast(peer *wit.Peer, witness *stateless.Witness, hash common.Hash, signedHash common.Hash) bool { + bodyBytes, ok := encodedBroadcastBytes(peer, witness, hash) + if !ok { + return false + } + bodyHash := stateless.WitnessCommitHash(bodyBytes) + if signedHash != bodyHash { + wit2BroadcastByteMismatchMeter.Mark(1) + peer.Log().Warn("wit2: broadcast bytes do not match signed witnessHash; dropping", + "blockHash", hash, "expected", signedHash, "actual", bodyHash) + return false + } + peer.AddKnownWitness(hash) + (*handler)(h).pendingWitnessBodies.put(hash, bodyBytes, bodyHash) + // We now hold servable bytes — push to any peer that asked us + // for this body before we had it. + (*handler)(h).pushWitnessToWaiters(hash, witness, len(bodyBytes)) + return true +} + +// acceptDeferredBroadcast handles a broadcast whose signed announcement is on +// file but still deferred: its producer-binding needs the block header, which +// a stateless node at the tip does not have yet — that is exactly the +// consumer-side state when a waiter push delivers the body for a block +// pending import. Bind the pushed bytes to the deferred commitment and, on +// match, accept for IMPORT ONLY: the witness flows to the block fetcher so +// the pending block can import (import re-verifies everything via stateless +// execution + state-root check). We do NOT cache for serving, do NOT promote +// into signedWitnesses, and do NOT relay — those carry the verified-announce +// trust property, and a deferred entry's producer is unverified until the +// post-import drain checks it against the chain-validated header. Verifying +// against the header embedded in the pushed witness instead would let a peer +// self-seal a fabricated header and pass its own announce as the producer's. +func (h *witHandler) acceptDeferredBroadcast(peer *wit.Peer, witness *stateless.Witness, hash common.Hash, deferredHash common.Hash) bool { + bodyBytes, ok := encodedBroadcastBytes(peer, witness, hash) + if !ok { + return false + } + if stateless.WitnessCommitHash(bodyBytes) != deferredHash { + wit2BroadcastByteMismatchMeter.Mark(1) + peer.Log().Warn("wit2: broadcast bytes do not match deferred announce witnessHash; dropping", + "blockHash", hash, "expected", deferredHash) + return false + } + peer.AddKnownWitness(hash) + wit2BroadcastDeferredImportMeter.Mark(1) + return true +} + +// acceptUnsignedBroadcast is the WIT1 fallback with no signed announcement on +// file. The only binding we can check is that the header belongs to a block +// we actually know — without it, an unsolicited 16MB body for an arbitrary +// hash would be decoded and cached purely on the sender's word. Unknown +// headers are dropped silently: a peer racing ahead of our import is early, +// not malicious. For known headers we cannot prove byte-correctness to +// downstream WIT2 peers — the body is not exposed for pre-import serving but +// still flows into the import path. +func (h *witHandler) acceptUnsignedBroadcast(peer *wit.Peer, hash common.Hash) bool { + if h.Chain().GetHeaderByHash(hash) == nil { + wit2BroadcastUnknownHeaderDropMeter.Mark(1) + peer.Log().Debug("dropping witness broadcast for unknown header", "blockHash", hash) + return false + } + peer.AddKnownWitness(hash) + wit2BroadcastUnverifiedSkippedMeter.Mark(1) + return true +} + // handleWitnessHashesAnnounce handles a witness hashes broadcast from a peer. func (h *witHandler) handleWitnessHashesAnnounce(peer *wit.Peer, hashes []common.Hash, numbers []uint64) error { for _, hash := range hashes { diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go index 727ddb2d8d..897b5a15a4 100644 --- a/eth/handler_wit2.go +++ b/eth/handler_wit2.go @@ -2,8 +2,6 @@ package eth import ( "errors" - "sync" - "time" "github.com/ethereum/go-ethereum/accounts" "github.com/ethereum/go-ethereum/common" @@ -39,328 +37,6 @@ var ( wit2BroadcastDeferredImportMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_deferred_import_only", nil) ) -// Per-peer rate-limit + strike tracker for wit2 announces. We size the bucket -// at burst=256 with a sustained rate of 64 announces/sec — higher than any -// honest gossip mesh would produce on Polygon's block cadence, low enough to -// neutralise an attacker spamming valid-but-redundant signed packets. -const ( - wit2AnnounceBurstCap = 256 - wit2AnnounceRefillPerSecond = 64 - // wit2MisbehaviorStrikeLimit is the number of structurally-invalid (bad - // signature, wrong producer, oversized packet) announces a peer may - // produce within strikeDecayWindow before being disconnected. - wit2MisbehaviorStrikeLimit = 5 - wit2MisbehaviorWindow = time.Minute -) - -// peerWit2State tracks a peer's wit2-announce burst budget and recent strikes. -// Lifecycle is tied to the eth handler's peer registration; entries are -// cleaned up when the peer disconnects. -type peerWit2State struct { - tokens float64 - lastRefill time.Time - strikeCount int - firstStrikeAt time.Time -} - -type peerWit2Tracker struct { - mu sync.Mutex - state map[string]*peerWit2State -} - -func newPeerWit2Tracker() *peerWit2Tracker { - return &peerWit2Tracker{state: make(map[string]*peerWit2State)} -} - -func (t *peerWit2Tracker) forget(peerID string) { - t.mu.Lock() - delete(t.state, peerID) - t.mu.Unlock() -} - -// allow returns true if the peer has enough budget to consume `count` -// announcements right now. False means the packet should be dropped and a -// rate-limit metric recorded; the caller decides whether to disconnect. -func (t *peerWit2Tracker) allow(peerID string, count int) bool { - t.mu.Lock() - defer t.mu.Unlock() - st, ok := t.state[peerID] - now := time.Now() - if !ok { - st = &peerWit2State{tokens: wit2AnnounceBurstCap, lastRefill: now} - t.state[peerID] = st - } - elapsed := now.Sub(st.lastRefill).Seconds() - if elapsed > 0 { - st.tokens += elapsed * wit2AnnounceRefillPerSecond - if st.tokens > wit2AnnounceBurstCap { - st.tokens = wit2AnnounceBurstCap - } - st.lastRefill = now - } - if st.tokens < float64(count) { - return false - } - st.tokens -= float64(count) - return true -} - -// strike records a misbehavior for the peer. Returns true when the peer has -// exceeded the threshold within the decay window and must be disconnected. -func (t *peerWit2Tracker) strike(peerID string) bool { - t.mu.Lock() - defer t.mu.Unlock() - st, ok := t.state[peerID] - now := time.Now() - if !ok { - st = &peerWit2State{tokens: wit2AnnounceBurstCap, lastRefill: now} - t.state[peerID] = st - } - if st.firstStrikeAt.IsZero() || now.Sub(st.firstStrikeAt) > wit2MisbehaviorWindow { - st.firstStrikeAt = now - st.strikeCount = 0 - } - st.strikeCount++ - return st.strikeCount >= wit2MisbehaviorStrikeLimit -} - -// wit2 announce-cache lifecycle constants. -const ( - // wit2AnnounceTTL bounds how long we remember a signed announcement so we - // can re-emit it on body delivery and skip duplicate relays. Must outlast - // typical fetch+import latency so producers/relayers still have the - // signature when stateless peers come asking for the body. - wit2AnnounceTTL = 30 * time.Second - - // wit2RelayWindow is the per-(blockHash, peer) duplicate-suppression window. - // Even without this, knownWitnesses dedup blocks repeats; the window adds - // belt-and-suspenders coverage during the brief gap between receive and - // known-cache update under concurrent gossip storms. - wit2RelayWindow = 200 * time.Millisecond - - // witnessBodyCacheCapacity bounds the number of pre-import witness bodies - // held in memory. Each entry is ~50MB on Polygon, so the cap keeps total - // memory under ~500MB worst case. Older entries are evicted as new ones - // arrive; a 10-block window comfortably covers typical block-fetch and - // import latency. - witnessBodyCacheCapacity = 10 -) - -// pendingWitnessBody holds RLP-encoded witness bytes received from the network -// before the corresponding block has been imported (and thus before the bytes -// have been written to chain storage). Lets serving peers answer GetWitness -// requests during the import gap, which is what makes early relay actually -// useful — a peer that received the body can serve it the moment its TCP -// receive completes, rather than waiting ~500ms for full block validation. -type pendingWitnessBody struct { - bytes []byte - witnessHash common.Hash - receivedAt time.Time -} - -// pendingWitnessBodyCache holds bytes by block hash with a short TTL. Entries -// are dropped after the body has been written to chain storage, or after the -// TTL expires (whichever first). The cache is a simple map; the witness body -// is large (~50MB) so the cap is set conservatively. -type pendingWitnessBodyCache struct { - mu sync.RWMutex - entries map[common.Hash]*pendingWitnessBody - capacity int -} - -func newPendingWitnessBodyCache(capacity int) *pendingWitnessBodyCache { - return &pendingWitnessBodyCache{ - entries: make(map[common.Hash]*pendingWitnessBody), - capacity: capacity, - } -} - -func (c *pendingWitnessBodyCache) put(blockHash common.Hash, bytes []byte, witnessHash common.Hash) { - c.mu.Lock() - defer c.mu.Unlock() - c.gcLocked() - if len(c.entries) >= c.capacity { - // Evict the oldest entry. Linear scan is fine at the configured cap. - var oldestHash common.Hash - var oldest time.Time - for h, e := range c.entries { - if oldest.IsZero() || e.receivedAt.Before(oldest) { - oldest = e.receivedAt - oldestHash = h - } - } - delete(c.entries, oldestHash) - } - c.entries[blockHash] = &pendingWitnessBody{ - bytes: bytes, - witnessHash: witnessHash, - receivedAt: time.Now(), - } -} - -func (c *pendingWitnessBodyCache) get(blockHash common.Hash) ([]byte, common.Hash, bool) { - c.mu.RLock() - e, ok := c.entries[blockHash] - if !ok { - c.mu.RUnlock() - return nil, common.Hash{}, false - } - if time.Since(e.receivedAt) > wit2AnnounceTTL { - // Expired: drop the large byte slice now rather than waiting for the - // next put() to gc. Without this, a node that stops receiving witness - // bodies retains up to capacity (10) ~50MB blobs indefinitely past the - // TTL, since gcLocked() only fires on put(). - c.mu.RUnlock() - c.mu.Lock() - // Re-check under the write lock: a concurrent put() may have replaced - // the entry with a fresh one we should not delete. - if cur, ok2 := c.entries[blockHash]; ok2 && cur == e { - delete(c.entries, blockHash) - } - c.mu.Unlock() - return nil, common.Hash{}, false - } - c.mu.RUnlock() - return e.bytes, e.witnessHash, true -} - -func (c *pendingWitnessBodyCache) drop(blockHash common.Hash) { - c.mu.Lock() - defer c.mu.Unlock() - delete(c.entries, blockHash) -} - -func (c *pendingWitnessBodyCache) gcLocked() { - cutoff := time.Now().Add(-wit2AnnounceTTL) - for h, e := range c.entries { - if e.receivedAt.Before(cutoff) { - delete(c.entries, h) - } - } -} - -const ( - // witnessWaiterHashCap bounds how many block hashes we track waiters for. - // Entries are tiny (a peer pointer + timestamp); the cap is a backstop - // against a peer asking for many distinct not-yet-available hashes. - witnessWaiterHashCap = 256 - - // witnessWaiterPerHashCap bounds waiters recorded per hash so a burst of - // distinct peers asking for the same not-yet-available witness can't grow a - // single bucket without bound. - witnessWaiterPerHashCap = 64 - - // witnessWaiterTTL drops stale waiter entries (peer gave up, disconnected, - // or obtained the body elsewhere). Aligned with the body cache TTL. - witnessWaiterTTL = 30 * time.Second -) - -// witnessWaiter records a peer that asked us for a witness body we did not yet -// have. We only record a waiter when a BP-signed announcement is on file for -// the hash, so the witness is known to exist and the registry is bounded by -// real, signed blocks rather than arbitrary peer-chosen hashes. -type witnessWaiter struct { - peer *wit.Peer - at time.Time -} - -// witnessWaiterRegistry tracks peers awaiting a witness body so we can push it -// to them the moment we obtain it. This restores the WIT1-style hand-off the -// WIT2 fast announce removed: WIT1 only ever announces a witness it already -// holds (and the announce marks the sender a body-holder), so a stateless -// consumer's first pull lands; WIT2 relays the signed announce ahead of the -// body, leaving the consumer to poll an announce-only relayer with repeated -// empty GetWitness until it catches up. Pushing on arrival closes that gap -// without flooding — at most one body per peer that actually asked, exactly the -// bandwidth a successful pull would have cost. -type witnessWaiterRegistry struct { - mu sync.Mutex - waiters map[common.Hash]map[string]*witnessWaiter -} - -func newWitnessWaiterRegistry() *witnessWaiterRegistry { - return &witnessWaiterRegistry{waiters: make(map[common.Hash]map[string]*witnessWaiter)} -} - -// record notes that peer is waiting for the body of hash. No-op for a nil peer. -func (r *witnessWaiterRegistry) record(hash common.Hash, peer *wit.Peer) { - if peer == nil { - return - } - r.mu.Lock() - defer r.mu.Unlock() - r.gcLocked() - - per, ok := r.waiters[hash] - if !ok { - if len(r.waiters) >= witnessWaiterHashCap { - // Registry full of distinct hashes; skip recording rather than - // evict. The peer simply keeps polling (with backoff) and lands the - // body on a later GetWitness — correctness is unaffected. - return - } - per = make(map[string]*witnessWaiter) - r.waiters[hash] = per - } - if _, exists := per[peer.ID()]; !exists && len(per) >= witnessWaiterPerHashCap { - return - } - per[peer.ID()] = &witnessWaiter{peer: peer, at: time.Now()} -} - -// has reports whether any non-expired waiter is recorded for hash. Used to skip -// the witness decode on the push path when nobody is waiting. -func (r *witnessWaiterRegistry) has(hash common.Hash) bool { - r.mu.Lock() - defer r.mu.Unlock() - per, ok := r.waiters[hash] - if !ok { - return false - } - cutoff := time.Now().Add(-witnessWaiterTTL) - for _, w := range per { - if !w.at.Before(cutoff) { - return true - } - } - return false -} - -// take returns and clears the live (non-expired) waiters for hash. -func (r *witnessWaiterRegistry) take(hash common.Hash) []*wit.Peer { - r.mu.Lock() - defer r.mu.Unlock() - per, ok := r.waiters[hash] - if !ok { - return nil - } - delete(r.waiters, hash) - cutoff := time.Now().Add(-witnessWaiterTTL) - out := make([]*wit.Peer, 0, len(per)) - for _, w := range per { - if w.at.Before(cutoff) { - continue - } - out = append(out, w.peer) - } - return out -} - -// gcLocked drops expired waiter entries and empty buckets. Caller holds r.mu. -func (r *witnessWaiterRegistry) gcLocked() { - cutoff := time.Now().Add(-witnessWaiterTTL) - for h, per := range r.waiters { - for id, w := range per { - if w.at.Before(cutoff) { - delete(per, id) - } - } - if len(per) == 0 { - delete(r.waiters, h) - } - } -} - // witnessPushMaxSize caps the encoded size of a witness we full-push to // waiting peers via NewWitness. The wit protocol rejects inbound messages // larger than 16MB (wit.maxMessageSize), so pushing a bigger body would make @@ -441,261 +117,6 @@ func (h *handler) pushWitnessBytesToWaiters(hash common.Hash, witnessBytes []byt h.pushWitnessToWaiters(hash, &witness, len(witnessBytes)) } -// deferredAnnounceCapacity bounds how many header-unknown signed announcements -// we hold while waiting for the corresponding block to arrive. Each entry is -// ~200 bytes; the cap is sized for a worst-case stall window where the local -// chain falls a few hundred blocks behind a busy mesh and announcements -// arrive ahead of headers en masse. -const deferredAnnounceCapacity = 256 - -// deferredAnnouncePerPeerDivisor caps how large a share of the deferred queue a -// single peer may occupy: perPeerCap = capacity / divisor. Without a per-peer -// cap, one peer operating within the announce rate limit (64/s) can fill all -// the slots with its own entries — each a distinct, attacker-chosen blockHash -// at a plausible near-tip number (the cache is keyed by hash, so a fixed -// blockNumber is no obstacle) — and evict honest header-racing announces, -// silently downgrading those blocks to unsigned WIT1 byte-verification. The cap -// reserves the bulk of the queue for the honest mesh. Honest peers race only -// the current tip, so a handful of in-flight deferrals is the norm and this cap -// is never approached in practice. -const deferredAnnouncePerPeerDivisor = 8 - -// deferredAnnounceEntry holds a signed announcement whose producer-binding -// could not be checked yet because the corresponding block header wasn't -// local. The drain path re-runs verification once the chain catches up. -type deferredAnnounceEntry struct { - announcement wit.SignedWitnessAnnouncement - peerID string - receivedAt time.Time -} - -// deferredAnnounceCache holds signed announcements deferred on header-unknown -// rejection so the chain-head loop can re-evaluate them when the matching -// block arrives. Without it, an announce that races ahead of its block — the -// expected outcome of independent block + announce gossip streams — is lost -// for good and subsequent witness fetches silently fall back to unsigned -// (WIT1) verification, leaking the WIT2 trust property for that block. -type deferredAnnounceCache struct { - mu sync.RWMutex - entries map[common.Hash]*deferredAnnounceEntry - perPeer map[string]int // live entry count per originating peer - capacity int - perPeerCap int -} - -func newDeferredAnnounceCache(capacity int) *deferredAnnounceCache { - perPeerCap := capacity / deferredAnnouncePerPeerDivisor - if perPeerCap < 1 { - perPeerCap = 1 - } - return &deferredAnnounceCache{ - entries: make(map[common.Hash]*deferredAnnounceEntry), - perPeer: make(map[string]int), - capacity: capacity, - perPeerCap: perPeerCap, - } -} - -// decPeerLocked drops one live-entry credit for peerID, removing the map key -// when it reaches zero. Caller must hold the write lock. -func (c *deferredAnnounceCache) decPeerLocked(peerID string) { - c.perPeer[peerID]-- - if c.perPeer[peerID] <= 0 { - delete(c.perPeer, peerID) - } -} - -// put stores the announcement keyed by block hash. A second put for the same -// hash refreshes receivedAt and overwrites the announcement — the more recent -// gossip wins, which is desirable when the original sender disconnected and a -// different peer now carries the announce forward; per-peer credit moves with -// it. For a new hash, the per-peer cap is enforced first (a peer at its share -// is dropped, recording a metric, so it cannot evict honest entries), then the -// global cap (evict the oldest entry across all peers; linear scan is cheap at -// the configured size). -func (c *deferredAnnounceCache) put(ann wit.SignedWitnessAnnouncement, peerID string) { - c.mu.Lock() - defer c.mu.Unlock() - c.gcLocked() - - if existing, exists := c.entries[ann.BlockHash]; exists { - // Overwrite for the same hash: net-zero slot change. Move per-peer - // credit if a different peer now carries this announce forward. - if existing.peerID != peerID { - c.decPeerLocked(existing.peerID) - c.perPeer[peerID]++ - } - c.entries[ann.BlockHash] = &deferredAnnounceEntry{ - announcement: ann, - peerID: peerID, - receivedAt: time.Now(), - } - return - } - - // New hash for this peer: enforce its share of the queue so no single peer - // can monopolise the cache and evict honest header-racing announces. - if c.perPeer[peerID] >= c.perPeerCap { - wit2DeferredPerPeerDropMeter.Mark(1) - return - } - - if len(c.entries) >= c.capacity { - var oldestHash common.Hash - var oldest time.Time - for h, e := range c.entries { - if oldest.IsZero() || e.receivedAt.Before(oldest) { - oldest = e.receivedAt - oldestHash = h - } - } - if victim, ok := c.entries[oldestHash]; ok { - c.decPeerLocked(victim.peerID) - delete(c.entries, oldestHash) - } - } - - c.entries[ann.BlockHash] = &deferredAnnounceEntry{ - announcement: ann, - peerID: peerID, - receivedAt: time.Now(), - } - c.perPeer[peerID]++ -} - -// take removes and returns the entry for blockHash if present and fresh. -// Returns ok=false on miss or expiry; expired entries are deleted in place. -func (c *deferredAnnounceCache) take(blockHash common.Hash) (*deferredAnnounceEntry, bool) { - c.mu.Lock() - defer c.mu.Unlock() - e, ok := c.entries[blockHash] - if !ok { - return nil, false - } - delete(c.entries, blockHash) - c.decPeerLocked(e.peerID) - if time.Since(e.receivedAt) > wit2AnnounceTTL { - return nil, false - } - return e, true -} - -// peek returns the announcement for blockHash and the peer that relayed it, -// without consuming the entry, if a fresh one exists. Used by the broadcast -// path to bind a pushed body to a pending (deferred, not yet -// producer-verified) announcement, and by the fetch path to find a pull -// target when no marked peer exists. The entry must stay in place so the -// post-import drain still runs the real producer verification, promotion, -// and relay. -func (c *deferredAnnounceCache) peek(blockHash common.Hash) (wit.SignedWitnessAnnouncement, string, bool) { - c.mu.RLock() - defer c.mu.RUnlock() - e, ok := c.entries[blockHash] - if !ok || time.Since(e.receivedAt) > wit2AnnounceTTL { - return wit.SignedWitnessAnnouncement{}, "", false - } - return e.announcement, e.peerID, true -} - -// has reports whether a fresh entry exists for blockHash. Test-facing only; -// production code uses take to ensure the entry is consumed. -func (c *deferredAnnounceCache) has(blockHash common.Hash) bool { - c.mu.RLock() - defer c.mu.RUnlock() - e, ok := c.entries[blockHash] - if !ok { - return false - } - return time.Since(e.receivedAt) <= wit2AnnounceTTL -} - -// gcLocked drops entries past the TTL. Caller must hold the write lock. -func (c *deferredAnnounceCache) gcLocked() { - cutoff := time.Now().Add(-wit2AnnounceTTL) - for h, e := range c.entries { - if e.receivedAt.Before(cutoff) { - c.decPeerLocked(e.peerID) - delete(c.entries, h) - } - } -} - -// signedWitnessCache stores BP-signed announcements by block hash. The cache -// is consulted by: -// - the relay path on receive (skip if already seen recently), -// - the body-broadcast path (re-emit the cached signed announce when a -// stateless peer requests the body), and -// - the producer path (cache the locally-signed announcement so subsequent -// re-emissions from this node don't re-sign). -type signedWitnessCache struct { - mu sync.RWMutex - entries map[common.Hash]*signedAnnounceEntry -} - -type signedAnnounceEntry struct { - announcement wit.SignedWitnessAnnouncement - receivedAt time.Time -} - -func newSignedWitnessCache() *signedWitnessCache { - return &signedWitnessCache{entries: make(map[common.Hash]*signedAnnounceEntry)} -} - -// putIfNewer stores the announcement keyed by block hash, returning true if -// the cache did not already contain a fresh entry for this hash. Callers use -// the return value to decide whether to relay (false → suppress duplicate). -// -// If a fresh entry already exists with a *different* WitnessHash, the new -// announcement is rejected outright (returns false): the first valid signed -// commitment wins for the lifetime of the entry. This prevents an attacker -// who has obtained a second valid signature (e.g. a compromised producer -// later in the same window) from poisoning the cache mid-fetch and dropping -// honest serving peers against a different hash. -func (c *signedWitnessCache) putIfNewer(ann wit.SignedWitnessAnnouncement) bool { - c.mu.Lock() - defer c.mu.Unlock() - c.gcLocked() - if existing, ok := c.entries[ann.BlockHash]; ok { - if existing.announcement.WitnessHash != ann.WitnessHash { - wit2ConflictingWitnessHashMeter.Mark(1) - return false - } - // Same WitnessHash, recent: dedup. - if time.Since(existing.receivedAt) < wit2RelayWindow { - return false - } - } - c.entries[ann.BlockHash] = &signedAnnounceEntry{ - announcement: ann, - receivedAt: time.Now(), - } - return true -} - -// get returns the cached announcement for a block hash, if present and fresh. -func (c *signedWitnessCache) get(blockHash common.Hash) (wit.SignedWitnessAnnouncement, bool) { - c.mu.RLock() - defer c.mu.RUnlock() - e, ok := c.entries[blockHash] - if !ok { - return wit.SignedWitnessAnnouncement{}, false - } - if time.Since(e.receivedAt) > wit2AnnounceTTL { - return wit.SignedWitnessAnnouncement{}, false - } - return e.announcement, true -} - -// gcLocked drops entries past the TTL. Caller must hold the write lock. -func (c *signedWitnessCache) gcLocked() { - cutoff := time.Now().Add(-wit2AnnounceTTL) - for h, e := range c.entries { - if e.receivedAt.Before(cutoff) { - delete(c.entries, h) - } - } -} - // verifySignedAnnouncement returns the recovered signer address if the // signature is structurally valid; otherwise an error. Validator-set // membership is checked separately against the consensus engine. diff --git a/eth/handler_wit2_announces.go b/eth/handler_wit2_announces.go new file mode 100644 index 0000000000..9f16834655 --- /dev/null +++ b/eth/handler_wit2_announces.go @@ -0,0 +1,356 @@ +package eth + +import ( + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/eth/protocols/wit" +) + +// Per-peer rate-limit + strike tracker for wit2 announces. We size the bucket +// at burst=256 with a sustained rate of 64 announces/sec — higher than any +// honest gossip mesh would produce on Polygon's block cadence, low enough to +// neutralise an attacker spamming valid-but-redundant signed packets. +const ( + wit2AnnounceBurstCap = 256 + wit2AnnounceRefillPerSecond = 64 + // wit2MisbehaviorStrikeLimit is the number of structurally-invalid (bad + // signature, wrong producer, oversized packet) announces a peer may + // produce within strikeDecayWindow before being disconnected. + wit2MisbehaviorStrikeLimit = 5 + wit2MisbehaviorWindow = time.Minute +) + +// peerWit2State tracks a peer's wit2-announce burst budget and recent strikes. +// Lifecycle is tied to the eth handler's peer registration; entries are +// cleaned up when the peer disconnects. +type peerWit2State struct { + tokens float64 + lastRefill time.Time + strikeCount int + firstStrikeAt time.Time +} + +type peerWit2Tracker struct { + mu sync.Mutex + state map[string]*peerWit2State +} + +func newPeerWit2Tracker() *peerWit2Tracker { + return &peerWit2Tracker{state: make(map[string]*peerWit2State)} +} + +func (t *peerWit2Tracker) forget(peerID string) { + t.mu.Lock() + delete(t.state, peerID) + t.mu.Unlock() +} + +// allow returns true if the peer has enough budget to consume `count` +// announcements right now. False means the packet should be dropped and a +// rate-limit metric recorded; the caller decides whether to disconnect. +func (t *peerWit2Tracker) allow(peerID string, count int) bool { + t.mu.Lock() + defer t.mu.Unlock() + st, ok := t.state[peerID] + now := time.Now() + if !ok { + st = &peerWit2State{tokens: wit2AnnounceBurstCap, lastRefill: now} + t.state[peerID] = st + } + elapsed := now.Sub(st.lastRefill).Seconds() + if elapsed > 0 { + st.tokens += elapsed * wit2AnnounceRefillPerSecond + if st.tokens > wit2AnnounceBurstCap { + st.tokens = wit2AnnounceBurstCap + } + st.lastRefill = now + } + if st.tokens < float64(count) { + return false + } + st.tokens -= float64(count) + return true +} + +// strike records a misbehavior for the peer. Returns true when the peer has +// exceeded the threshold within the decay window and must be disconnected. +func (t *peerWit2Tracker) strike(peerID string) bool { + t.mu.Lock() + defer t.mu.Unlock() + st, ok := t.state[peerID] + now := time.Now() + if !ok { + st = &peerWit2State{tokens: wit2AnnounceBurstCap, lastRefill: now} + t.state[peerID] = st + } + if st.firstStrikeAt.IsZero() || now.Sub(st.firstStrikeAt) > wit2MisbehaviorWindow { + st.firstStrikeAt = now + st.strikeCount = 0 + } + st.strikeCount++ + return st.strikeCount >= wit2MisbehaviorStrikeLimit +} + +// deferredAnnounceCapacity bounds how many header-unknown signed announcements +// we hold while waiting for the corresponding block to arrive. Each entry is +// ~200 bytes; the cap is sized for a worst-case stall window where the local +// chain falls a few hundred blocks behind a busy mesh and announcements +// arrive ahead of headers en masse. +const deferredAnnounceCapacity = 256 + +// deferredAnnouncePerPeerDivisor caps how large a share of the deferred queue a +// single peer may occupy: perPeerCap = capacity / divisor. Without a per-peer +// cap, one peer operating within the announce rate limit (64/s) can fill all +// the slots with its own entries — each a distinct, attacker-chosen blockHash +// at a plausible near-tip number (the cache is keyed by hash, so a fixed +// blockNumber is no obstacle) — and evict honest header-racing announces, +// silently downgrading those blocks to unsigned WIT1 byte-verification. The cap +// reserves the bulk of the queue for the honest mesh. Honest peers race only +// the current tip, so a handful of in-flight deferrals is the norm and this cap +// is never approached in practice. +const deferredAnnouncePerPeerDivisor = 8 + +// deferredAnnounceEntry holds a signed announcement whose producer-binding +// could not be checked yet because the corresponding block header wasn't +// local. The drain path re-runs verification once the chain catches up. +type deferredAnnounceEntry struct { + announcement wit.SignedWitnessAnnouncement + peerID string + receivedAt time.Time +} + +// deferredAnnounceCache holds signed announcements deferred on header-unknown +// rejection so the chain-head loop can re-evaluate them when the matching +// block arrives. Without it, an announce that races ahead of its block — the +// expected outcome of independent block + announce gossip streams — is lost +// for good and subsequent witness fetches silently fall back to unsigned +// (WIT1) verification, leaking the WIT2 trust property for that block. +type deferredAnnounceCache struct { + mu sync.RWMutex + entries map[common.Hash]*deferredAnnounceEntry + perPeer map[string]int // live entry count per originating peer + capacity int + perPeerCap int +} + +func newDeferredAnnounceCache(capacity int) *deferredAnnounceCache { + perPeerCap := capacity / deferredAnnouncePerPeerDivisor + if perPeerCap < 1 { + perPeerCap = 1 + } + return &deferredAnnounceCache{ + entries: make(map[common.Hash]*deferredAnnounceEntry), + perPeer: make(map[string]int), + capacity: capacity, + perPeerCap: perPeerCap, + } +} + +// decPeerLocked drops one live-entry credit for peerID, removing the map key +// when it reaches zero. Caller must hold the write lock. +func (c *deferredAnnounceCache) decPeerLocked(peerID string) { + c.perPeer[peerID]-- + if c.perPeer[peerID] <= 0 { + delete(c.perPeer, peerID) + } +} + +// put stores the announcement keyed by block hash. A second put for the same +// hash refreshes receivedAt and overwrites the announcement — the more recent +// gossip wins, which is desirable when the original sender disconnected and a +// different peer now carries the announce forward; per-peer credit moves with +// it. For a new hash, the per-peer cap is enforced first (a peer at its share +// is dropped, recording a metric, so it cannot evict honest entries), then the +// global cap (evict the oldest entry across all peers; linear scan is cheap at +// the configured size). +func (c *deferredAnnounceCache) put(ann wit.SignedWitnessAnnouncement, peerID string) { + c.mu.Lock() + defer c.mu.Unlock() + c.gcLocked() + + if existing, exists := c.entries[ann.BlockHash]; exists { + // Overwrite for the same hash: net-zero slot change. Move per-peer + // credit if a different peer now carries this announce forward. + if existing.peerID != peerID { + c.decPeerLocked(existing.peerID) + c.perPeer[peerID]++ + } + c.entries[ann.BlockHash] = &deferredAnnounceEntry{ + announcement: ann, + peerID: peerID, + receivedAt: time.Now(), + } + return + } + + // New hash for this peer: enforce its share of the queue so no single peer + // can monopolise the cache and evict honest header-racing announces. + if c.perPeer[peerID] >= c.perPeerCap { + wit2DeferredPerPeerDropMeter.Mark(1) + return + } + + if len(c.entries) >= c.capacity { + c.evictOldestLocked() + } + + c.entries[ann.BlockHash] = &deferredAnnounceEntry{ + announcement: ann, + peerID: peerID, + receivedAt: time.Now(), + } + c.perPeer[peerID]++ +} + +// evictOldestLocked drops the oldest entry across all peers to make room for +// a new one (linear scan is cheap at the configured size). Caller must hold +// the write lock. +func (c *deferredAnnounceCache) evictOldestLocked() { + var oldestHash common.Hash + var oldest time.Time + for h, e := range c.entries { + if oldest.IsZero() || e.receivedAt.Before(oldest) { + oldest = e.receivedAt + oldestHash = h + } + } + if victim, ok := c.entries[oldestHash]; ok { + c.decPeerLocked(victim.peerID) + delete(c.entries, oldestHash) + } +} + +// take removes and returns the entry for blockHash if present and fresh. +// Returns ok=false on miss or expiry; expired entries are deleted in place. +func (c *deferredAnnounceCache) take(blockHash common.Hash) (*deferredAnnounceEntry, bool) { + c.mu.Lock() + defer c.mu.Unlock() + e, ok := c.entries[blockHash] + if !ok { + return nil, false + } + delete(c.entries, blockHash) + c.decPeerLocked(e.peerID) + if time.Since(e.receivedAt) > wit2AnnounceTTL { + return nil, false + } + return e, true +} + +// peek returns the announcement for blockHash and the peer that relayed it, +// without consuming the entry, if a fresh one exists. Used by the broadcast +// path to bind a pushed body to a pending (deferred, not yet +// producer-verified) announcement, and by the fetch path to find a pull +// target when no marked peer exists. The entry must stay in place so the +// post-import drain still runs the real producer verification, promotion, +// and relay. +func (c *deferredAnnounceCache) peek(blockHash common.Hash) (wit.SignedWitnessAnnouncement, string, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.entries[blockHash] + if !ok || time.Since(e.receivedAt) > wit2AnnounceTTL { + return wit.SignedWitnessAnnouncement{}, "", false + } + return e.announcement, e.peerID, true +} + +// has reports whether a fresh entry exists for blockHash. Test-facing only; +// production code uses take to ensure the entry is consumed. +func (c *deferredAnnounceCache) has(blockHash common.Hash) bool { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.entries[blockHash] + if !ok { + return false + } + return time.Since(e.receivedAt) <= wit2AnnounceTTL +} + +// gcLocked drops entries past the TTL. Caller must hold the write lock. +func (c *deferredAnnounceCache) gcLocked() { + cutoff := time.Now().Add(-wit2AnnounceTTL) + for h, e := range c.entries { + if e.receivedAt.Before(cutoff) { + c.decPeerLocked(e.peerID) + delete(c.entries, h) + } + } +} + +// signedWitnessCache stores BP-signed announcements by block hash. The cache +// is consulted by: +// - the relay path on receive (skip if already seen recently), +// - the body-broadcast path (re-emit the cached signed announce when a +// stateless peer requests the body), and +// - the producer path (cache the locally-signed announcement so subsequent +// re-emissions from this node don't re-sign). +type signedWitnessCache struct { + mu sync.RWMutex + entries map[common.Hash]*signedAnnounceEntry +} + +type signedAnnounceEntry struct { + announcement wit.SignedWitnessAnnouncement + receivedAt time.Time +} + +func newSignedWitnessCache() *signedWitnessCache { + return &signedWitnessCache{entries: make(map[common.Hash]*signedAnnounceEntry)} +} + +// putIfNewer stores the announcement keyed by block hash, returning true if +// the cache did not already contain a fresh entry for this hash. Callers use +// the return value to decide whether to relay (false → suppress duplicate). +// +// If a fresh entry already exists with a *different* WitnessHash, the new +// announcement is rejected outright (returns false): the first valid signed +// commitment wins for the lifetime of the entry. This prevents an attacker +// who has obtained a second valid signature (e.g. a compromised producer +// later in the same window) from poisoning the cache mid-fetch and dropping +// honest serving peers against a different hash. +func (c *signedWitnessCache) putIfNewer(ann wit.SignedWitnessAnnouncement) bool { + c.mu.Lock() + defer c.mu.Unlock() + c.gcLocked() + if existing, ok := c.entries[ann.BlockHash]; ok { + if existing.announcement.WitnessHash != ann.WitnessHash { + wit2ConflictingWitnessHashMeter.Mark(1) + return false + } + // Same WitnessHash, recent: dedup. + if time.Since(existing.receivedAt) < wit2RelayWindow { + return false + } + } + c.entries[ann.BlockHash] = &signedAnnounceEntry{ + announcement: ann, + receivedAt: time.Now(), + } + return true +} + +// get returns the cached announcement for a block hash, if present and fresh. +func (c *signedWitnessCache) get(blockHash common.Hash) (wit.SignedWitnessAnnouncement, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.entries[blockHash] + if !ok { + return wit.SignedWitnessAnnouncement{}, false + } + if time.Since(e.receivedAt) > wit2AnnounceTTL { + return wit.SignedWitnessAnnouncement{}, false + } + return e.announcement, true +} + +// gcLocked drops entries past the TTL. Caller must hold the write lock. +func (c *signedWitnessCache) gcLocked() { + cutoff := time.Now().Add(-wit2AnnounceTTL) + for h, e := range c.entries { + if e.receivedAt.Before(cutoff) { + delete(c.entries, h) + } + } +} diff --git a/eth/handler_wit2_bodies.go b/eth/handler_wit2_bodies.go new file mode 100644 index 0000000000..891f40f6c2 --- /dev/null +++ b/eth/handler_wit2_bodies.go @@ -0,0 +1,246 @@ +package eth + +import ( + "sync" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/eth/protocols/wit" +) + +// wit2 announce-cache lifecycle constants. +const ( + // wit2AnnounceTTL bounds how long we remember a signed announcement so we + // can re-emit it on body delivery and skip duplicate relays. Must outlast + // typical fetch+import latency so producers/relayers still have the + // signature when stateless peers come asking for the body. + wit2AnnounceTTL = 30 * time.Second + + // wit2RelayWindow is the per-(blockHash, peer) duplicate-suppression window. + // Even without this, knownWitnesses dedup blocks repeats; the window adds + // belt-and-suspenders coverage during the brief gap between receive and + // known-cache update under concurrent gossip storms. + wit2RelayWindow = 200 * time.Millisecond + + // witnessBodyCacheCapacity bounds the number of pre-import witness bodies + // held in memory. Each entry is ~50MB on Polygon, so the cap keeps total + // memory under ~500MB worst case. Older entries are evicted as new ones + // arrive; a 10-block window comfortably covers typical block-fetch and + // import latency. + witnessBodyCacheCapacity = 10 +) + +// pendingWitnessBody holds RLP-encoded witness bytes received from the network +// before the corresponding block has been imported (and thus before the bytes +// have been written to chain storage). Lets serving peers answer GetWitness +// requests during the import gap, which is what makes early relay actually +// useful — a peer that received the body can serve it the moment its TCP +// receive completes, rather than waiting ~500ms for full block validation. +type pendingWitnessBody struct { + bytes []byte + witnessHash common.Hash + receivedAt time.Time +} + +// pendingWitnessBodyCache holds bytes by block hash with a short TTL. Entries +// are dropped after the body has been written to chain storage, or after the +// TTL expires (whichever first). The cache is a simple map; the witness body +// is large (~50MB) so the cap is set conservatively. +type pendingWitnessBodyCache struct { + mu sync.RWMutex + entries map[common.Hash]*pendingWitnessBody + capacity int +} + +func newPendingWitnessBodyCache(capacity int) *pendingWitnessBodyCache { + return &pendingWitnessBodyCache{ + entries: make(map[common.Hash]*pendingWitnessBody), + capacity: capacity, + } +} + +func (c *pendingWitnessBodyCache) put(blockHash common.Hash, bytes []byte, witnessHash common.Hash) { + c.mu.Lock() + defer c.mu.Unlock() + c.gcLocked() + if len(c.entries) >= c.capacity { + // Evict the oldest entry. Linear scan is fine at the configured cap. + var oldestHash common.Hash + var oldest time.Time + for h, e := range c.entries { + if oldest.IsZero() || e.receivedAt.Before(oldest) { + oldest = e.receivedAt + oldestHash = h + } + } + delete(c.entries, oldestHash) + } + c.entries[blockHash] = &pendingWitnessBody{ + bytes: bytes, + witnessHash: witnessHash, + receivedAt: time.Now(), + } +} + +func (c *pendingWitnessBodyCache) get(blockHash common.Hash) ([]byte, common.Hash, bool) { + c.mu.RLock() + e, ok := c.entries[blockHash] + if !ok { + c.mu.RUnlock() + return nil, common.Hash{}, false + } + if time.Since(e.receivedAt) > wit2AnnounceTTL { + // Expired: drop the large byte slice now rather than waiting for the + // next put() to gc. Without this, a node that stops receiving witness + // bodies retains up to capacity (10) ~50MB blobs indefinitely past the + // TTL, since gcLocked() only fires on put(). + c.mu.RUnlock() + c.mu.Lock() + // Re-check under the write lock: a concurrent put() may have replaced + // the entry with a fresh one we should not delete. + if cur, ok2 := c.entries[blockHash]; ok2 && cur == e { + delete(c.entries, blockHash) + } + c.mu.Unlock() + return nil, common.Hash{}, false + } + c.mu.RUnlock() + return e.bytes, e.witnessHash, true +} + +func (c *pendingWitnessBodyCache) drop(blockHash common.Hash) { + c.mu.Lock() + defer c.mu.Unlock() + delete(c.entries, blockHash) +} + +func (c *pendingWitnessBodyCache) gcLocked() { + cutoff := time.Now().Add(-wit2AnnounceTTL) + for h, e := range c.entries { + if e.receivedAt.Before(cutoff) { + delete(c.entries, h) + } + } +} + +const ( + // witnessWaiterHashCap bounds how many block hashes we track waiters for. + // Entries are tiny (a peer pointer + timestamp); the cap is a backstop + // against a peer asking for many distinct not-yet-available hashes. + witnessWaiterHashCap = 256 + + // witnessWaiterPerHashCap bounds waiters recorded per hash so a burst of + // distinct peers asking for the same not-yet-available witness can't grow a + // single bucket without bound. + witnessWaiterPerHashCap = 64 + + // witnessWaiterTTL drops stale waiter entries (peer gave up, disconnected, + // or obtained the body elsewhere). Aligned with the body cache TTL. + witnessWaiterTTL = 30 * time.Second +) + +// witnessWaiter records a peer that asked us for a witness body we did not yet +// have. We only record a waiter when a BP-signed announcement is on file for +// the hash, so the witness is known to exist and the registry is bounded by +// real, signed blocks rather than arbitrary peer-chosen hashes. +type witnessWaiter struct { + peer *wit.Peer + at time.Time +} + +// witnessWaiterRegistry tracks peers awaiting a witness body so we can push it +// to them the moment we obtain it. This restores the WIT1-style hand-off the +// WIT2 fast announce removed: WIT1 only ever announces a witness it already +// holds (and the announce marks the sender a body-holder), so a stateless +// consumer's first pull lands; WIT2 relays the signed announce ahead of the +// body, leaving the consumer to poll an announce-only relayer with repeated +// empty GetWitness until it catches up. Pushing on arrival closes that gap +// without flooding — at most one body per peer that actually asked, exactly the +// bandwidth a successful pull would have cost. +type witnessWaiterRegistry struct { + mu sync.Mutex + waiters map[common.Hash]map[string]*witnessWaiter +} + +func newWitnessWaiterRegistry() *witnessWaiterRegistry { + return &witnessWaiterRegistry{waiters: make(map[common.Hash]map[string]*witnessWaiter)} +} + +// record notes that peer is waiting for the body of hash. No-op for a nil peer. +func (r *witnessWaiterRegistry) record(hash common.Hash, peer *wit.Peer) { + if peer == nil { + return + } + r.mu.Lock() + defer r.mu.Unlock() + r.gcLocked() + + per, ok := r.waiters[hash] + if !ok { + if len(r.waiters) >= witnessWaiterHashCap { + // Registry full of distinct hashes; skip recording rather than + // evict. The peer simply keeps polling (with backoff) and lands the + // body on a later GetWitness — correctness is unaffected. + return + } + per = make(map[string]*witnessWaiter) + r.waiters[hash] = per + } + if _, exists := per[peer.ID()]; !exists && len(per) >= witnessWaiterPerHashCap { + return + } + per[peer.ID()] = &witnessWaiter{peer: peer, at: time.Now()} +} + +// has reports whether any non-expired waiter is recorded for hash. Used to skip +// the witness decode on the push path when nobody is waiting. +func (r *witnessWaiterRegistry) has(hash common.Hash) bool { + r.mu.Lock() + defer r.mu.Unlock() + per, ok := r.waiters[hash] + if !ok { + return false + } + cutoff := time.Now().Add(-witnessWaiterTTL) + for _, w := range per { + if !w.at.Before(cutoff) { + return true + } + } + return false +} + +// take returns and clears the live (non-expired) waiters for hash. +func (r *witnessWaiterRegistry) take(hash common.Hash) []*wit.Peer { + r.mu.Lock() + defer r.mu.Unlock() + per, ok := r.waiters[hash] + if !ok { + return nil + } + delete(r.waiters, hash) + cutoff := time.Now().Add(-witnessWaiterTTL) + out := make([]*wit.Peer, 0, len(per)) + for _, w := range per { + if w.at.Before(cutoff) { + continue + } + out = append(out, w.peer) + } + return out +} + +// gcLocked drops expired waiter entries and empty buckets. Caller holds r.mu. +func (r *witnessWaiterRegistry) gcLocked() { + cutoff := time.Now().Add(-witnessWaiterTTL) + for h, per := range r.waiters { + for id, w := range per { + if w.at.Before(cutoff) { + delete(per, id) + } + } + if len(per) == 0 { + delete(r.waiters, h) + } + } +} diff --git a/eth/handler_wit2_caches_test.go b/eth/handler_wit2_caches_test.go new file mode 100644 index 0000000000..eb3c20a390 --- /dev/null +++ b/eth/handler_wit2_caches_test.go @@ -0,0 +1,682 @@ +package eth + +import ( + "crypto/rand" + "fmt" + "math/big" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + ethproto "github.com/ethereum/go-ethereum/eth/protocols/eth" + "github.com/ethereum/go-ethereum/eth/protocols/wit" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/p2p" + "github.com/ethereum/go-ethereum/p2p/enode" + "github.com/ethereum/go-ethereum/rlp" + "github.com/stretchr/testify/require" +) + +// decodeTestWitness decodes canonical witness bytes back into a Witness, as a +// gossip receiver would before handing it to the broadcast handler. +func decodeTestWitness(t *testing.T, body []byte) *stateless.Witness { + t.Helper() + + var witness stateless.Witness + require.NoError(t, rlp.DecodeBytes(body, &witness)) + return &witness +} + +// registerEthWitPeer registers an eth peer with an attached wit peer sharing +// the same enode ID — as in production, where both protocols run on one +// devp2p connection. Sharing the ID is what lets ID-keyed lookups (e.g. the +// cosend recipient map) match across the two protocol wrappers. Returns the +// wit peer at the requested version and a cleanup func. +func registerEthWitPeer(t *testing.T, h *testHandler, version uint) (*wit.Peer, func()) { + t.Helper() + + var id enode.ID + rand.Read(id[:]) + + app, net := p2p.MsgPipe() + done := make(chan struct{}) + go func() { + for { + msg, err := app.ReadMsg() + if err != nil { + close(done) + return + } + msg.Discard() + } + }() + + witPeer := wit.NewPeer(version, p2p.NewPeer(id, "test-peer", nil), net, log.New()) + ethPeer := ethproto.NewPeer(ethproto.ETH68, p2p.NewPeer(id, "test-eth-peer", nil), nil, nil) + require.NoError(t, h.handler.peers.registerPeer(ethPeer, nil, witPeer)) + + cleanup := func() { + h.handler.peers.unregisterPeer(ethPeer.ID()) + app.Close() + witPeer.Close() + ethPeer.Close() + <-done + } + return witPeer, cleanup +} + +// TestPeerWit2TrackerBudgetLifecycle pins the token-bucket arithmetic of the +// announce rate limiter: a fresh peer starts at the burst cap, over-budget +// packets are rejected without going negative, idle time refills tokens up to +// (and not beyond) the cap, and forget resets the peer to a full budget. +func TestPeerWit2TrackerBudgetLifecycle(t *testing.T) { + tr := newPeerWit2Tracker() + + // Fresh peer: full burst is allowed, one more announcement is not. + require.True(t, tr.allow("p1", wit2AnnounceBurstCap)) + require.False(t, tr.allow("p1", 1), "budget must be exhausted after consuming the full burst") + + // Idle refill: backdate the last refill and confirm tokens come back at + // the configured rate (1s → wit2AnnounceRefillPerSecond tokens). + tr.mu.Lock() + tr.state["p1"].lastRefill = time.Now().Add(-time.Second) + tr.mu.Unlock() + require.True(t, tr.allow("p1", wit2AnnounceRefillPerSecond/2)) + + // Refill clamps at the burst cap: a long idle period must not bank more + // than one full burst. + tr.mu.Lock() + tr.state["p1"].lastRefill = time.Now().Add(-time.Hour) + tr.mu.Unlock() + require.True(t, tr.allow("p1", wit2AnnounceBurstCap)) + require.False(t, tr.allow("p1", 1), "refill must clamp at the burst cap") + + // forget resets the peer: a full burst is available again. + tr.forget("p1") + require.True(t, tr.allow("p1", wit2AnnounceBurstCap)) +} + +// TestPeerWit2TrackerStrikeWindowReset verifies that strikes outside the decay +// window do not accumulate toward a disconnect: a peer striking at a rate +// below the limit-per-window is tolerated indefinitely (stray pre-fork +// content), while sustained misbehavior inside one window trips the limit. +func TestPeerWit2TrackerStrikeWindowReset(t *testing.T) { + tr := newPeerWit2Tracker() + + for i := 0; i < wit2MisbehaviorStrikeLimit-1; i++ { + require.False(t, tr.strike("p1"), "strike %d must stay under the limit", i) + } + + // Age the window: the next strike opens a fresh window instead of + // tripping the limit. + tr.mu.Lock() + tr.state["p1"].firstStrikeAt = time.Now().Add(-2 * wit2MisbehaviorWindow) + tr.mu.Unlock() + require.False(t, tr.strike("p1"), "strike after window expiry must reset the count") +} + +// TestWitnessWaiterRegistryCapsAndExpiry covers the waiter registry's bounds: +// nil peers are ignored, expired waiters stop counting (and get GC'd), and +// both the distinct-hash cap and the per-hash peer cap refuse new entries +// rather than evicting live ones. +func TestWitnessWaiterRegistryCapsAndExpiry(t *testing.T) { + r := newWitnessWaiterRegistry() + hash := common.HexToHash("0x01") + + r.record(hash, nil) + require.False(t, r.has(hash), "nil peer must not be recorded") + require.Nil(t, r.take(hash), "take on an empty registry must return nil") + + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + r.record(hash, peer) + require.True(t, r.has(hash)) + + // Expire the waiter: has() must turn false and take() must skip it. + r.mu.Lock() + r.waiters[hash][peer.ID()].at = time.Now().Add(-2 * witnessWaiterTTL) + r.mu.Unlock() + require.False(t, r.has(hash), "expired waiter must not count as live") + require.Empty(t, r.take(hash), "expired waiter must not be returned") + + // Distinct-hash cap: once the registry is full of live hashes, recording + // a waiter for a new hash is skipped (the peer keeps polling instead). + for i := 0; i < witnessWaiterHashCap; i++ { + r.record(common.BytesToHash([]byte(fmt.Sprintf("filler-%d", i))), peer) + } + overflow := common.HexToHash("0xfeed") + r.record(overflow, peer) + require.False(t, r.has(overflow), "hash over the registry cap must not be recorded") + + // Per-hash peer cap: a hash already at its waiter limit refuses new + // peers but keeps serving the recorded ones. + target := common.BytesToHash([]byte("filler-0")) + r.mu.Lock() + for i := 0; len(r.waiters[target]) < witnessWaiterPerHashCap; i++ { + r.waiters[target][fmt.Sprintf("synthetic-%d", i)] = &witnessWaiter{peer: peer, at: time.Now()} + } + r.mu.Unlock() + + extra, cleanupExtra := newTestWit2PeerWithReader() + defer cleanupExtra() + r.record(target, extra) + + r.mu.Lock() + _, recorded := r.waiters[target][extra.ID()] + r.mu.Unlock() + require.False(t, recorded, "peer over the per-hash cap must not be recorded") +} + +// TestWaiterPushGuards covers the safety rails around the waiter push: nil +// witnesses and oversized bodies are never pushed (the latter falls back to +// the paged pull path), already-delivered waiters are skipped, a flush with +// no stored body is a no-op, and undecodable bytes are dropped without +// consuming the waiters. +func TestWaiterPushGuards(t *testing.T) { + h := newTestHandler() + defer h.close() + + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(515)} + hash := header.Hash() + + // Nil witness: nothing happens, waiter stays. + h.handler.witnessWaiters.record(hash, peer) + h.handler.pushWitnessToWaiters(hash, nil, 0) + require.True(t, h.handler.witnessWaiters.has(hash)) + + // Oversized witness: push is skipped, waiters stay on the pull path + // (entry is NOT consumed by the size guard). + witness, err := stateless.NewWitness(header, nil) + require.NoError(t, err) + h.handler.pushWitnessToWaiters(hash, witness, witnessPushMaxSize+1) + require.True(t, h.handler.witnessWaiters.has(hash), "oversize guard must not consume waiters") + + // Waiter already knows the body: take() consumes the entry but the send + // is skipped. + peer.AddKnownWitness(hash) + h.handler.pushWitnessToWaiters(hash, witness, 1024) + require.False(t, h.handler.witnessWaiters.has(hash), "push must consume the waiter entry") + + // flush with no body in chain storage: no-op, waiter preserved. + h.handler.witnessWaiters.record(hash, peer) + h.handler.flushWitnessWaitersForImported(hash) + require.True(t, h.handler.witnessWaiters.has(hash), "flush without a stored body must keep the waiter") + + // Undecodable bytes: decode fails, waiters preserved for the pull path. + h.handler.pushWitnessBytesToWaiters(hash, []byte{0xde, 0xad, 0xbe, 0xef}) + require.True(t, h.handler.witnessWaiters.has(hash), "decode failure must not consume waiters") + + // Empty bytes / no waiter recorded: early returns. + h.handler.pushWitnessBytesToWaiters(hash, nil) + h.handler.pushWitnessBytesToWaiters(common.HexToHash("0x9999"), []byte{0x01}) + + // Serving cache not wired (nil): cacheVerifiedWitnessForServing is a no-op. + saved := h.handler.pendingWitnessBodies + h.handler.pendingWitnessBodies = nil + h.handler.cacheVerifiedWitnessForServing(hash, []byte{0x01}, common.Hash{}) + h.handler.pendingWitnessBodies = saved +} + +// TestDeferredAnnounceCacheLifecycle covers the deferred-announce cache edge +// behavior: same-hash overwrites move per-peer credit to the latest relayer, +// the global cap evicts the oldest entry (not the newest), and take/peek/has +// all treat TTL-expired entries as absent. +func TestDeferredAnnounceCacheLifecycle(t *testing.T) { + ann := func(b byte) wit.SignedWitnessAnnouncement { + return wit.SignedWitnessAnnouncement{ + BlockHash: common.BytesToHash([]byte{b}), + BlockNumber: uint64(b), + Signature: make([]byte, wit.SignatureLength), + } + } + + // Tiny capacity still yields a usable per-peer share of 1. + tiny := newDeferredAnnounceCache(1) + require.Equal(t, 1, tiny.perPeerCap) + + c := newDeferredAnnounceCache(4) // perPeerCap = 4/divisor (>=1) + + // Same-hash overwrite from a different peer moves the credit. + c.put(ann(1), "peer-a") + c.put(ann(1), "peer-b") + _, peerID, ok := c.peek(ann(1).BlockHash) + require.True(t, ok) + require.Equal(t, "peer-b", peerID, "latest relayer must carry the deferred entry") + c.mu.Lock() + require.NotContains(t, c.perPeer, "peer-a", "overwritten relayer must get its credit back") + c.mu.Unlock() + + // Per-peer cap: one peer cannot occupy more than its share. + for i := byte(10); i < 20; i++ { + c.put(ann(i), "hog") + } + c.mu.Lock() + hogCount := c.perPeer["hog"] + c.mu.Unlock() + require.LessOrEqual(t, hogCount, c.perPeerCap, "per-peer cap must bound a single peer's share") + + // Global cap: filling from distinct peers evicts the oldest entry. + full := newDeferredAnnounceCache(2) + full.put(ann(1), "p1") + full.mu.Lock() + full.entries[ann(1).BlockHash].receivedAt = time.Now().Add(-10 * time.Second) + full.mu.Unlock() + full.put(ann(2), "p2") + full.put(ann(3), "p3") + require.False(t, full.has(ann(1).BlockHash), "oldest entry must be evicted at capacity") + require.True(t, full.has(ann(2).BlockHash)) + require.True(t, full.has(ann(3).BlockHash)) + + // Expiry: take/peek/has all treat a TTL-expired entry as gone. + exp := newDeferredAnnounceCache(4) + exp.put(ann(7), "p7") + exp.mu.Lock() + exp.entries[ann(7).BlockHash].receivedAt = time.Now().Add(-2 * wit2AnnounceTTL) + exp.mu.Unlock() + require.False(t, exp.has(ann(7).BlockHash)) + _, _, ok = exp.peek(ann(7).BlockHash) + require.False(t, ok) + _, ok = exp.take(ann(7).BlockHash) + require.False(t, ok, "expired entry must not be returned by take") + + // Miss paths. + _, ok = exp.take(common.HexToHash("0xabsent")) + require.False(t, ok) + _, _, ok = exp.peek(common.HexToHash("0xabsent")) + require.False(t, ok) +} + +// TestVerifySignedAnnouncementRejectsBadRecoveryID covers the ecrecover +// failure branch: a signature of the right length whose recovery byte is out +// of range must be rejected (not panic, not recover a garbage address). +func TestVerifySignedAnnouncementRejectsBadRecoveryID(t *testing.T) { + sig := make([]byte, wit.SignatureLength) + sig[wit.SignatureLength-1] = 99 // invalid recovery id + + _, err := verifySignedAnnouncement(wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0x01"), + BlockNumber: 1, + WitnessHash: common.HexToHash("0x02"), + Signature: sig, + }) + require.Error(t, err) +} + +// TestCosendWitnessAnnouncementVersionSplit verifies the per-peer protocol +// split on the block-propagation cosend: WIT2 recipients get the signed +// announcement, WIT1 recipients get the unsigned hash announce, and the whole +// cosend is skipped when the local node does not hold the witness. +func TestCosendWitnessAnnouncementVersionSplit(t *testing.T) { + h := newTestHandler() + defer h.close() + + wit2Peer, cleanup2 := registerEthWitPeer(t, h, wit.WIT2) + defer cleanup2() + wit1Peer, cleanup1 := registerEthWitPeer(t, h, wit.WIT1) + defer cleanup1() + + header := &types.Header{Number: big.NewInt(616)} + hash := header.Hash() + transfer := []*ethPeer{ + h.handler.peers.peer(wit2Peer.ID()), + h.handler.peers.peer(wit1Peer.ID()), + } + require.NotNil(t, transfer[0]) + require.NotNil(t, transfer[1]) + + // Witness not held locally: cosend must be a no-op for everyone. + h.handler.cosendWitnessAnnouncement(hash, header.Number.Uint64(), transfer, nil) + require.False(t, wit2Peer.KnownAnnounceContainsHash(hash)) + require.False(t, wit1Peer.KnownWitnessContainsHash(hash)) + + // Store the witness and a signed announcement (as if relayed to us), then + // cosend: the WIT2 peer gets the signed announce, the WIT1 peer the + // unsigned hash announce. + rawdb.WriteWitness(h.chain.DB(), hash, []byte{0x01, 0x02, 0x03}) + h.handler.signedWitnesses.putIfNewer(wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: header.Number.Uint64(), + WitnessHash: common.HexToHash("0xc0de"), + Signature: make([]byte, wit.SignatureLength), + }) + + h.handler.cosendWitnessAnnouncement(hash, header.Number.Uint64(), transfer, nil) + require.True(t, wit2Peer.KnownAnnounceContainsHash(hash), "WIT2 peer must receive the signed announcement") + require.False(t, wit2Peer.KnownWitnessContainsHash(hash), "signed announce must not mark the peer as a body-holder") + require.True(t, wit1Peer.KnownWitnessContainsHash(hash), "WIT1 peer must receive the unsigned hash announce") + + // lookupSignedWitnessHash round-trip: hit for the cached announce, miss + // for an unknown hash. + got, ok := h.handler.lookupSignedWitnessHash(hash) + require.True(t, ok) + require.Equal(t, common.HexToHash("0xc0de"), got) + _, ok = h.handler.lookupSignedWitnessHash(common.HexToHash("0xabsent")) + require.False(t, ok) + + // Re-cosend via the static/trusted list: both peers now know the witness, + // so they are absent from the recipient map and skipped gracefully. + h.handler.cosendWitnessAnnouncement(hash, header.Number.Uint64(), nil, transfer) +} + +// TestSignLocalWitnessAnnouncementFallbacks pins the non-producer behavior of +// the announce signing path: a cached announcement (ours or a relayed +// producer's) is returned without re-signing, and absent both a cache entry +// and a bor engine the function reports no signature — the caller then falls +// back to the truthful unsigned WIT1 announce. +func TestSignLocalWitnessAnnouncementFallbacks(t *testing.T) { + h := newTestHandler() + defer h.close() + + header := &types.Header{Number: big.NewInt(717)} + hash := header.Hash() + + // No cache entry, non-bor engine: no signature available. + _, ok := h.handler.signLocalWitnessAnnouncement(hash, header.Number.Uint64()) + require.False(t, ok, "non-bor engine without a cached announce must not produce a signature") + + // Cached announcement: returned as-is, no engine interaction. + cached := wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: header.Number.Uint64(), + WitnessHash: common.HexToHash("0xbeef"), + Signature: make([]byte, wit.SignatureLength), + } + h.handler.signedWitnesses.putIfNewer(cached) + got, ok := h.handler.signLocalWitnessAnnouncement(hash, header.Number.Uint64()) + require.True(t, ok) + require.Equal(t, cached.WitnessHash, got.WitnessHash) +} + +// TestBroadcastBlockWitnessAnnounceVersionSplit covers the post-import +// announce fanout in BroadcastBlock: with a witness in storage, WIT2 peers +// receive the signed announcement when one is available while WIT1 peers +// receive the unsigned hash announce — and with no signature available, +// everyone receives the truthful unsigned announce. +func TestBroadcastBlockWitnessAnnounceVersionSplit(t *testing.T) { + h := newTestHandler() + defer h.close() + + wit2Peer, cleanup2 := registerEthWitPeer(t, h, wit.WIT2) + defer cleanup2() + wit1Peer, cleanup1 := registerEthWitPeer(t, h, wit.WIT1) + defer cleanup1() + + block := types.NewBlockWithHeader(&types.Header{Number: big.NewInt(818)}) + hash := block.Hash() + rawdb.WriteWitness(h.chain.DB(), hash, []byte{0x0a, 0x0b}) + + // No signature available (non-bor engine, nothing cached): both peers get + // the unsigned WIT1-style hash announce. + h.handler.BroadcastBlock(block, nil, false) + require.True(t, wit2Peer.KnownWitnessContainsHash(hash), "WIT2 peer must get the unsigned announce when no signature exists") + require.True(t, wit1Peer.KnownWitnessContainsHash(hash)) + + // With a signed announcement cached: the WIT2 peer (not yet aware of the + // announce) receives the signed variant. + block2 := types.NewBlockWithHeader(&types.Header{Number: big.NewInt(819)}) + hash2 := block2.Hash() + rawdb.WriteWitness(h.chain.DB(), hash2, []byte{0x0c, 0x0d}) + h.handler.signedWitnesses.putIfNewer(wit.SignedWitnessAnnouncement{ + BlockHash: hash2, + BlockNumber: block2.NumberU64(), + WitnessHash: common.HexToHash("0xf00d"), + Signature: make([]byte, wit.SignatureLength), + }) + + h.handler.BroadcastBlock(block2, nil, false) + require.True(t, wit2Peer.KnownAnnounceContainsHash(hash2), "WIT2 peer must get the signed announce") + require.True(t, wit1Peer.KnownWitnessContainsHash(hash2), "WIT1 peer must get the unsigned announce") +} + +// signTestAnnouncement produces a structurally valid BP signature over the +// announcement triple with a throwaway key. +func signTestAnnouncement(t *testing.T, ann *wit.SignedWitnessAnnouncement) { + t.Helper() + + key, err := crypto.GenerateKey() + require.NoError(t, err) + digest := wit.WitnessAnnouncementSigningHash(ann.BlockHash, ann.BlockNumber, ann.WitnessHash) + sig, err := crypto.Sign(digest.Bytes(), key) + require.NoError(t, err) + ann.Signature = sig +} + +// TestHandleSignedWitnessAnnouncementsAcceptCacheRelayAndDedup drives the full +// receive path on a non-bor test chain (producer binding reduces to +// header-number matching): a valid announce is accepted, cached, credited to +// the sender, and relayed to other WIT2 peers but not WIT1 peers; an +// immediate duplicate is suppressed. +func TestHandleSignedWitnessAnnouncementsAcceptCacheRelayAndDedup(t *testing.T) { + h := newTestHandler() + defer h.close() + witH := (*witHandler)(h.handler) + + sender, cleanupS := registerEthWitPeer(t, h, wit.WIT2) + defer cleanupS() + relayTarget, cleanupR := registerEthWitPeer(t, h, wit.WIT2) + defer cleanupR() + wit1Peer, cleanup1 := registerEthWitPeer(t, h, wit.WIT1) + defer cleanup1() + + header := &types.Header{Number: big.NewInt(919)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: header.Number.Uint64(), + WitnessHash: common.HexToHash("0xab"), + } + signTestAnnouncement(t, &ann) + + require.NoError(t, witH.handleSignedWitnessAnnouncements(sender, []wit.SignedWitnessAnnouncement{ann})) + + require.True(t, sender.KnownAnnounceContainsHash(hash), "sender must be credited as announce-known") + _, cached := h.handler.signedWitnesses.get(hash) + require.True(t, cached, "accepted announce must be cached") + require.True(t, relayTarget.KnownAnnounceContainsHash(hash), "announce must relay to other WIT2 peers") + require.False(t, wit1Peer.KnownAnnounceContainsHash(hash), "announce must not relay to WIT1 peers") + + // Re-delivery inside the relay window: dedup path, no error. + require.NoError(t, witH.handleSignedWitnessAnnouncements(sender, []wit.SignedWitnessAnnouncement{ann})) +} + +// TestHandleSignedWitnessAnnouncementsRateLimitDrop verifies that a peer over +// its announce budget has the whole packet dropped without verification and +// without strikes — rate limiting is back-pressure, not misbehavior. +func TestHandleSignedWitnessAnnouncementsRateLimitDrop(t *testing.T) { + h := newTestHandler() + defer h.close() + witH := (*witHandler)(h.handler) + + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + // Exhaust the budget out-of-band. + require.True(t, h.handler.wit2PeerTracker.allow(peer.ID(), wit2AnnounceBurstCap)) + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0x77"), + BlockNumber: 77, + WitnessHash: common.HexToHash("0x78"), + Signature: make([]byte, wit.SignatureLength), + } + require.NoError(t, witH.handleSignedWitnessAnnouncements(peer, []wit.SignedWitnessAnnouncement{ann})) + + _, cached := h.handler.signedWitnesses.get(ann.BlockHash) + require.False(t, cached, "rate-limited packet must not be processed") + + h.handler.wit2PeerTracker.mu.Lock() + strikes := h.handler.wit2PeerTracker.state[peer.ID()].strikeCount + h.handler.wit2PeerTracker.mu.Unlock() + require.Zero(t, strikes, "rate limiting must not strike the peer") +} + +// TestAcceptSignedAnnouncementStrikesOnNumberMismatch covers the confirmed- +// misbehavior branch with a locally known header: the announce's blockNumber +// contradicts the header it names, so the relayer is struck (no deferral — +// the header IS available). +func TestAcceptSignedAnnouncementStrikesOnNumberMismatch(t *testing.T) { + h := newTestHandler() + defer h.close() + witH := (*witHandler)(h.handler) + + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(303)} + rawdb.WriteHeader(h.chain.DB(), header) + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: header.Hash(), + BlockNumber: header.Number.Uint64() + 1, // contradicts the local header + WitnessHash: common.HexToHash("0xcc"), + } + signTestAnnouncement(t, &ann) + + require.False(t, witH.acceptSignedAnnouncement(peer, ann)) + require.False(t, h.handler.deferredAnnounces.has(ann.BlockHash), "known-header mismatch must not defer") + + h.handler.wit2PeerTracker.mu.Lock() + strikes := h.handler.wit2PeerTracker.state[peer.ID()].strikeCount + h.handler.wit2PeerTracker.mu.Unlock() + require.Equal(t, 1, strikes, "confirmed mis-binding must strike the relayer") +} + +// TestStrikeWit2PeerDisconnectsAtLimit drives the strike accumulator to the +// disconnect threshold and confirms the tracker state is cleaned up via the +// removePeer → forget path (the peer is not in the peer set; removal must +// still be graceful). +func TestStrikeWit2PeerDisconnectsAtLimit(t *testing.T) { + h := newTestHandler() + defer h.close() + + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + for i := 0; i < wit2MisbehaviorStrikeLimit; i++ { + h.handler.strikeWit2Peer(peer) + } + + h.handler.wit2PeerTracker.mu.Lock() + _, tracked := h.handler.wit2PeerTracker.state[peer.ID()] + h.handler.wit2PeerTracker.mu.Unlock() + require.False(t, tracked, "disconnect must forget the peer's tracker state") +} + +// TestHandleWitnessBroadcastSignedMatchCachesAndServes covers the WIT2 accept +// path of the unsolicited-body broadcast: bytes matching the BP-signed +// commitment are cached for pre-import serving and the sender is marked as a +// body-holder. +func TestHandleWitnessBroadcastSignedMatchCachesAndServes(t *testing.T) { + h := newTestHandler() + defer h.close() + witH := (*witHandler)(h.handler) + + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + hash, bodyBytes, _ := persistedSignedWitness(t, h, 1021, 0) + + witness := decodeTestWitness(t, bodyBytes) + require.NoError(t, witH.handleWitnessBroadcast(peer, witness)) + + require.True(t, peer.KnownWitnessContainsHash(hash), "matching broadcast must mark the sender as a body-holder") + cachedBytes, _, ok := h.handler.pendingWitnessBodies.get(hash) + require.True(t, ok, "matching broadcast must populate the pre-import serving cache") + require.Equal(t, bodyBytes, cachedBytes) +} + +// TestWitHandlerDispatchesSignedAnnouncementPacket pins the Handle() routing +// for the WIT2 message type so a wire-decoded packet reaches the signed- +// announcement handler (an empty packet is a no-op, not an error). +func TestWitHandlerDispatchesSignedAnnouncementPacket(t *testing.T) { + h := newTestHandler() + defer h.close() + witH := (*witHandler)(h.handler) + + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + require.NoError(t, witH.Handle(peer, &wit.SignedNewWitnessHashesPacket{})) +} + +// TestDrainDeferredAnnouncesLifecycle drives the chain-head drain through its +// four outcomes: header still unknown (re-stash for the next head event), +// confirmed mis-binding (drop, no cache), success (cache + credit the +// original sender + relay), and duplicate (suppressed by the relay-window +// dedup). Uses the non-bor test chain, where producer binding reduces to +// header-number matching. +func TestDrainDeferredAnnouncesLifecycle(t *testing.T) { + h := newTestHandler() + defer h.close() + + sender, cleanupS := registerEthWitPeer(t, h, wit.WIT2) + defer cleanupS() + relayTarget, cleanupR := registerEthWitPeer(t, h, wit.WIT2) + defer cleanupR() + + // Empty queue: no-op. + h.handler.drainDeferredAnnouncesFor(common.HexToHash("0x01")) + + // Header still unknown at drain time: entry must be re-stashed. + unknown := &types.Header{Number: big.NewInt(2222)} + annU := wit.SignedWitnessAnnouncement{BlockHash: unknown.Hash(), BlockNumber: 2222, WitnessHash: common.HexToHash("0xaa")} + signTestAnnouncement(t, &annU) + h.handler.deferredAnnounces.put(annU, sender.ID()) + h.handler.drainDeferredAnnouncesFor(annU.BlockHash) + require.True(t, h.handler.deferredAnnounces.has(annU.BlockHash), "header-unknown drain must re-stash the entry") + + // Confirmed mis-binding (known header, contradicting number): dropped. + hdrM := &types.Header{Number: big.NewInt(3333)} + rawdb.WriteHeader(h.chain.DB(), hdrM) + annM := wit.SignedWitnessAnnouncement{BlockHash: hdrM.Hash(), BlockNumber: 3334, WitnessHash: common.HexToHash("0xbb")} + signTestAnnouncement(t, &annM) + h.handler.deferredAnnounces.put(annM, sender.ID()) + h.handler.drainDeferredAnnouncesFor(annM.BlockHash) + require.False(t, h.handler.deferredAnnounces.has(annM.BlockHash), "mis-bound announce must be dropped") + _, cached := h.handler.signedWitnesses.get(annM.BlockHash) + require.False(t, cached, "mis-bound announce must not be cached") + + // Success: cached, original sender credited, relayed to other WIT2 peers. + hdrS := &types.Header{Number: big.NewInt(4444)} + rawdb.WriteHeader(h.chain.DB(), hdrS) + annS := wit.SignedWitnessAnnouncement{BlockHash: hdrS.Hash(), BlockNumber: 4444, WitnessHash: common.HexToHash("0xcc")} + signTestAnnouncement(t, &annS) + h.handler.deferredAnnounces.put(annS, sender.ID()) + h.handler.drainDeferredAnnouncesFor(annS.BlockHash) + _, cached = h.handler.signedWitnesses.get(annS.BlockHash) + require.True(t, cached, "verified announce must be cached at drain") + require.True(t, sender.KnownAnnounceContainsHash(annS.BlockHash), "drain must credit the original sender") + require.True(t, relayTarget.KnownAnnounceContainsHash(annS.BlockHash), "drain must relay to other WIT2 peers") + + // Duplicate: a re-deferred copy of an already-cached hash is suppressed. + h.handler.deferredAnnounces.put(annS, sender.ID()) + h.handler.drainDeferredAnnouncesFor(annS.BlockHash) +} + +// TestCanonicalWitnessHashStorageGate pins the chain-storage gate: no stored +// witness means no commitment (and thus nothing to sign), while stored bytes +// hash to the canonical commitment directly. +func TestCanonicalWitnessHashStorageGate(t *testing.T) { + h := newTestHandler() + defer h.close() + + hash := common.HexToHash("0x4242") + _, ok := h.handler.canonicalWitnessHash(hash) + require.False(t, ok, "absent witness must yield no commitment") + + body := []byte{0x01, 0x02, 0x03} + rawdb.WriteWitness(h.chain.DB(), hash, body) + got, ok := h.handler.canonicalWitnessHash(hash) + require.True(t, ok) + require.Equal(t, stateless.WitnessCommitHash(body), got) +} diff --git a/eth/handler_wit2_test.go b/eth/handler_wit2_test.go index 8295b3156f..45b739624a 100644 --- a/eth/handler_wit2_test.go +++ b/eth/handler_wit2_test.go @@ -408,27 +408,31 @@ func TestSignedAnnounceDoesNotMarkPeerAsBodyHolder(t *testing.T) { // pushes the full body to those waiters the moment we obtain it — restoring the // WIT1-style hand-off without flooding (only peers that actually asked, and at // most one body each, exactly what a pull would have cost). -func TestEmptyGetWitnessForSignedHashPushesBodyOnArrival(t *testing.T) { - h := newTestHandler() - defer h.close() - - witH := (*witHandler)(h.handler) - peer, cleanup := newTestWit2PeerWithReader() - defer cleanup() +// persistedSignedWitness builds a header persisted to the test chain DB, a +// witness for it (padded with deterministic trie state when padBytes > 0), +// and registers a BP-signed announcement for the witness's commit hash — the +// shared precondition of every waiter-push scenario. Returns the block hash, +// the canonical body bytes, and the signed commit hash. The body is NOT +// stored anywhere: callers decide whether it lands in the in-flight cache, +// chain storage, or nowhere. +func persistedSignedWitness(t *testing.T, h *testHandler, blockNumber int64, padBytes int) (common.Hash, []byte, common.Hash) { + t.Helper() - header := &types.Header{Number: big.NewInt(7777)} + header := &types.Header{Number: big.NewInt(blockNumber)} hash := header.Hash() rawdb.WriteHeader(h.chain.DB(), header) witness, err := stateless.NewWitness(header, nil) require.NoError(t, err) + if padBytes > 0 { + FillWitnessWithDeterministicRandomState(witness, padBytes) + } + var buf bytes.Buffer require.NoError(t, witness.EncodeRLP(&buf)) bodyBytes := buf.Bytes() witnessHash := stateless.WitnessCommitHash(bodyBytes) - // We hold a BP-signed announcement for this hash (the witness provably - // exists) but not the body yet — neither in-flight cache nor chain storage. h.handler.signedWitnesses.putIfNewer(wit.SignedWitnessAnnouncement{ BlockHash: hash, BlockNumber: header.Number.Uint64(), @@ -436,13 +440,37 @@ func TestEmptyGetWitnessForSignedHashPushesBodyOnArrival(t *testing.T) { Signature: make([]byte, wit.SignatureLength), }) - // Peer asks for the body before we have it → empty response. This must - // register the peer as waiting for the body. + return hash, bodyBytes, witnessHash +} + +// requestFirstWitnessPage issues a single-page GetWitness for hash, as a +// remote peer pulling the body would. +func requestFirstWitnessPage(t *testing.T, witH *witHandler, peer *wit.Peer, hash common.Hash) wit.WitnessPacketResponse { + t.Helper() + resp, err := witH.handleGetWitness(peer, &wit.GetWitnessPacket{ RequestId: 1, GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: []wit.WitnessPageRequest{{Hash: hash, Page: 0}}}, }) require.NoError(t, err) + return resp +} + +func TestEmptyGetWitnessForSignedHashPushesBodyOnArrival(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + // We hold a BP-signed announcement for this hash (the witness provably + // exists) but not the body yet — neither in-flight cache nor chain storage. + hash, bodyBytes, witnessHash := persistedSignedWitness(t, h, 7777, 0) + + // Peer asks for the body before we have it → empty response. This must + // register the peer as waiting for the body. + resp := requestFirstWitnessPage(t, witH, peer, hash) require.Equal(t, 1, len(resp)) require.Equal(t, uint64(0), resp[0].TotalPages, "precondition: body absent, must serve empty") require.False(t, peer.KnownWitnessContainsHash(hash), "peer must not yet be treated as a body-holder") @@ -473,30 +501,10 @@ func TestFlushWitnessWaitersForImportedPushesFromChainStorage(t *testing.T) { peer, cleanup := newTestWit2PeerWithReader() defer cleanup() - header := &types.Header{Number: big.NewInt(8888)} - hash := header.Hash() - rawdb.WriteHeader(h.chain.DB(), header) - - witness, err := stateless.NewWitness(header, nil) - require.NoError(t, err) - var buf bytes.Buffer - require.NoError(t, witness.EncodeRLP(&buf)) - bodyBytes := buf.Bytes() - witnessHash := stateless.WitnessCommitHash(bodyBytes) - - h.handler.signedWitnesses.putIfNewer(wit.SignedWitnessAnnouncement{ - BlockHash: hash, - BlockNumber: header.Number.Uint64(), - WitnessHash: witnessHash, - Signature: make([]byte, wit.SignatureLength), - }) + hash, bodyBytes, _ := persistedSignedWitness(t, h, 8888, 0) // Peer asks before we hold the body → empty, registers as waiter. - _, err = witH.handleGetWitness(peer, &wit.GetWitnessPacket{ - RequestId: 1, - GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: []wit.WitnessPageRequest{{Hash: hash, Page: 0}}}, - }) - require.NoError(t, err) + requestFirstWitnessPage(t, witH, peer, hash) require.False(t, peer.KnownWitnessContainsHash(hash)) // Native import: witness lands in chain storage only. The chain-head flush @@ -928,32 +936,11 @@ func TestWaiterPushSkipsOversizedWitness(t *testing.T) { peer, cleanup := newTestWit2PeerWithReader() defer cleanup() - header := &types.Header{Number: big.NewInt(7780)} - hash := header.Hash() - rawdb.WriteHeader(h.chain.DB(), header) - - witness, err := stateless.NewWitness(header, nil) - require.NoError(t, err) - FillWitnessWithDeterministicRandomState(witness, witnessPushMaxSize+1024*1024) - var buf bytes.Buffer - require.NoError(t, witness.EncodeRLP(&buf)) - bodyBytes := buf.Bytes() + hash, bodyBytes, witnessHash := persistedSignedWitness(t, h, 7780, witnessPushMaxSize+1024*1024) require.Greater(t, len(bodyBytes), witnessPushMaxSize, "fixture must exceed the push cap") - witnessHash := stateless.WitnessCommitHash(bodyBytes) - - h.handler.signedWitnesses.putIfNewer(wit.SignedWitnessAnnouncement{ - BlockHash: hash, - BlockNumber: header.Number.Uint64(), - WitnessHash: witnessHash, - Signature: make([]byte, wit.SignatureLength), - }) // Register the peer as a waiter: it asks for the body before we hold it. - resp, err := witH.handleGetWitness(peer, &wit.GetWitnessPacket{ - RequestId: 1, - GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: []wit.WitnessPageRequest{{Hash: hash, Page: 0}}}, - }) - require.NoError(t, err) + resp := requestFirstWitnessPage(t, witH, peer, hash) require.Equal(t, uint64(0), resp[0].TotalPages, "precondition: body absent, must serve empty") // Body arrives. The push must be skipped — encoded size is over the wit @@ -1079,4 +1066,15 @@ func TestMaySignAnnouncementForBlockBindsToSealer(t *testing.T) { require.False(t, maySignAnnouncementForBlock(engine, header, producer, 201, header.Hash()), "announce blockNumber must match the local header") + + // A header whose extra-data cannot yield a sealer (too short for a seal) + // is unbindable: refuse rather than sign blind. + unsealable := &types.Header{ + Number: big.NewInt(200), + Difficulty: big.NewInt(1), + Extra: make([]byte, 10), + } + require.False(t, + maySignAnnouncementForBlock(engine, unsealable, producer, 200, unsealable.Hash()), + "a header without a recoverable sealer must refuse the producer binding") } diff --git a/eth/protocols/wit/peer_wit2_test.go b/eth/protocols/wit/peer_wit2_test.go new file mode 100644 index 0000000000..969cfeb3b7 --- /dev/null +++ b/eth/protocols/wit/peer_wit2_test.go @@ -0,0 +1,159 @@ +package wit + +import ( + "crypto/rand" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/p2p" + "github.com/ethereum/go-ethereum/p2p/enode" + "github.com/stretchr/testify/require" +) + +// newWit2PeerPair wires two WIT2 peers over an in-memory message pipe. The +// sender runs the real broadcast loop; the receiver's inbound messages are +// consumed by the caller via handleMessage. +func newWit2PeerPair(t *testing.T) (sender *Peer, receiver *Peer, cleanup func()) { + t.Helper() + + var idA, idB enode.ID + rand.Read(idA[:]) + rand.Read(idB[:]) + + app, net := p2p.MsgPipe() + sender = NewPeer(WIT2, p2p.NewPeer(idA, "sender", nil), net, log.New()) + receiver = NewPeer(WIT2, p2p.NewPeer(idB, "receiver", nil), app, log.New()) + + cleanup = func() { + app.Close() + net.Close() + sender.Close() + receiver.Close() + } + return sender, receiver, cleanup +} + +func testAnnouncement(b byte) SignedWitnessAnnouncement { + return SignedWitnessAnnouncement{ + BlockHash: common.BytesToHash([]byte{b}), + BlockNumber: uint64(b), + WitnessHash: common.BytesToHash([]byte{b, b}), + Signature: make([]byte, SignatureLength), + } +} + +// TestSignedAnnouncementWireRoundTrip drives a signed announcement through +// the full wire path: async queue → broadcast loop → message pipe → +// handleMessage dispatch (WIT2 handler map) → decode → backend.Handle. This +// is the end-to-end proof that the new message type is routable on a +// negotiated WIT2 connection. +func TestSignedAnnouncementWireRoundTrip(t *testing.T) { + sender, receiver, cleanup := newWit2PeerPair(t) + defer cleanup() + + ann := testAnnouncement(7) + + delivered := make(chan Packet, 1) + backend := &mockBackend{handleFunc: func(peer *Peer, packet Packet) error { + delivered <- packet + return nil + }} + + sender.AsyncSendSignedWitnessAnnouncement(ann) + require.True(t, sender.KnownAnnounceContainsHash(ann.BlockHash), + "queued announce must mark the hash announce-known on the sender") + require.False(t, sender.KnownWitnessContainsHash(ann.BlockHash), + "announce must not mark the sender's body-known set") + + require.NoError(t, handleMessage(backend, receiver)) + + select { + case packet := <-delivered: + require.Equal(t, "SignedNewWitnessHashes", packet.Name()) + require.Equal(t, byte(SignedNewWitnessHashesMsg), packet.Kind()) + got, ok := packet.(*SignedNewWitnessHashesPacket) + require.True(t, ok) + require.Len(t, got.Announcements, 1) + require.Equal(t, ann.BlockHash, got.Announcements[0].BlockHash) + require.Equal(t, ann.WitnessHash, got.Announcements[0].WitnessHash) + case <-time.After(5 * time.Second): + t.Fatal("announcement was not delivered to the backend") + } +} + +// TestHandleSignedNewWitnessHashesRejectsMalformedPackets covers the decode- +// time guards: an empty announcement list and a list over the per-packet cap +// must both error out before reaching the backend. +func TestHandleSignedNewWitnessHashesRejectsMalformedPackets(t *testing.T) { + backend := &mockBackend{handleFunc: func(peer *Peer, packet Packet) error { + t.Fatal("malformed packet must not reach the backend") + return nil + }} + + send := func(packet *SignedNewWitnessHashesPacket) error { + sender, receiver, cleanup := newWit2PeerPair(t) + defer cleanup() + + errc := make(chan error, 1) + go func() { + errc <- p2p.Send(sender.rw, SignedNewWitnessHashesMsg, packet) + }() + err := handleMessage(backend, receiver) + require.NoError(t, <-errc) + return err + } + + require.Error(t, send(&SignedNewWitnessHashesPacket{}), "empty announcement list must be rejected") + + over := make([]SignedWitnessAnnouncement, MaxSignedAnnouncesPerPacket+1) + for i := range over { + over[i] = testAnnouncement(byte(i)) + } + require.Error(t, send(&SignedNewWitnessHashesPacket{Announcements: over}), "over-cap packet must be rejected") +} + +// TestAsyncSendSignedWitnessAnnouncementGuards pins the two non-delivery +// branches: a WIT1 peer never gets the WIT2 message queued (version guard), +// and a full queue drops announcements instead of blocking the caller. +func TestAsyncSendSignedWitnessAnnouncementGuards(t *testing.T) { + var id enode.ID + rand.Read(id[:]) + + // Version guard: WIT1 peers don't speak the message. + app, net := p2p.MsgPipe() + defer app.Close() + defer net.Close() + wit1Peer := NewPeer(WIT1, p2p.NewPeer(id, "wit1", nil), net, log.New()) + defer wit1Peer.Close() + + ann := testAnnouncement(9) + wit1Peer.AsyncSendSignedWitnessAnnouncement(ann) + require.False(t, wit1Peer.KnownAnnounceContainsHash(ann.BlockHash), + "WIT1 peer must not queue a signed announcement") + + // Queue-full drop: nobody reads the remote end, so the broadcast loop + // blocks on the first send and the queue fills; the overflow must be + // dropped without blocking the caller. + appB, netB := p2p.MsgPipe() + defer appB.Close() + defer netB.Close() + blocked := NewPeer(WIT2, p2p.NewPeer(id, "blocked", nil), netB, log.New()) + defer blocked.Close() + + done := make(chan struct{}) + go func() { + for i := 0; i < maxQueuedWitnessAnns+16; i++ { + blocked.AsyncSendSignedWitnessAnnouncement(testAnnouncement(byte(i))) + } + close(done) + }() + + select { + case <-done: + // Caller never blocked — overflow was dropped. + case <-time.After(5 * time.Second): + t.Fatal("AsyncSendSignedWitnessAnnouncement blocked on a full queue") + } +} From c9d475667c7a314a6672be7e2095f9ea1525362a Mon Sep 17 00:00:00 2001 From: Lucca Martins Date: Wed, 10 Jun 2026 12:01:08 -0300 Subject: [PATCH 14/14] =?UTF-8?q?wit2:=20close=20remaining=20CI=20gaps=20?= =?UTF-8?q?=E2=80=94=20patch=20coverage,=20diffguard=20complexity=20and=20?= =?UTF-8?q?file=20size?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract hashWitnessChunks/witnessCommitWorkerCount from WitnessCommitHash (complexity 18 -> under threshold); commitment recipe unchanged, pinned by the existing shape tests. - Move the PR-added WIT2 fetcher code (verifyAgainstSignedHash, cacheVerifiedWitnessForServing, handleWitnessBodyNotReady, empty-response backoff and its constants) into eth/fetcher/witness_manager_wit2.go so witness_manager.go stays within the oversized-file growth tolerance. - Add coverage for the patch lines CI flagged: SignBytes error paths and CurrentSigner, WitnessCommitHashFromWitness, the TTL gc sweep of all four wit2 caches, drainDeferredAnnouncesFor guards, AddKnownAnnounce, and the announce-packet decode failure. --- consensus/bor/signbytes_test.go | 51 ++++++++++ core/stateless/witness_commit.go | 81 +++++++++------- core/stateless/witness_commit_test.go | 27 ++++++ eth/fetcher/witness_manager.go | 117 ----------------------- eth/fetcher/witness_manager_wit2.go | 132 ++++++++++++++++++++++++++ eth/handler_wit2_caches_test.go | 89 +++++++++++++++++ eth/protocols/wit/peer_wit2_test.go | 31 ++++++ 7 files changed, 376 insertions(+), 152 deletions(-) create mode 100644 eth/fetcher/witness_manager_wit2.go diff --git a/consensus/bor/signbytes_test.go b/consensus/bor/signbytes_test.go index bd7b2992b8..2e59c48cb7 100644 --- a/consensus/bor/signbytes_test.go +++ b/consensus/bor/signbytes_test.go @@ -2,6 +2,8 @@ package bor import ( "bytes" + "errors" + "strings" "testing" "github.com/ethereum/go-ethereum/accounts" @@ -68,3 +70,52 @@ func TestSignBytesRejectsHeaderMimetype(t *testing.T) { t.Fatal("MimetypeBor must be rejected to prevent header-seal replay") } } + +// TestSignBytesWithoutAuthorizedSigner covers the not-a-validator paths: a +// node that never called Authorize (or authorized the zero address) must +// refuse to sign rather than emit a signature under a zero identity. +func TestSignBytesWithoutAuthorizedSigner(t *testing.T) { + bor := &Bor{} + if _, _, err := bor.SignBytes(accounts.MimetypeBorWitnessAnnounce, []byte{0x01}); err == nil { + t.Fatal("SignBytes must fail with no authorized signer") + } + + bor.Authorize(common.Address{}, func(accounts.Account, string, []byte) ([]byte, error) { + t.Fatal("signFn must not be reached for a zero-address signer") + return nil, nil + }) + if _, _, err := bor.SignBytes(accounts.MimetypeBorWitnessAnnounce, []byte{0x01}); err == nil { + t.Fatal("SignBytes must fail for a zero-address signer") + } +} + +// TestSignBytesPropagatesSignFnError pins that wallet/clef failures surface to +// the caller instead of returning a bogus (signer, nil-sig) pair. +func TestSignBytesPropagatesSignFnError(t *testing.T) { + bor := &Bor{} + bor.Authorize(common.HexToAddress("0x1234"), func(accounts.Account, string, []byte) ([]byte, error) { + return nil, errors.New("wallet locked") + }) + + _, _, err := bor.SignBytes(accounts.MimetypeBorWitnessAnnounce, []byte{0x01}) + if err == nil || !strings.Contains(err.Error(), "wallet locked") { + t.Fatalf("expected wallet error to propagate, got %v", err) + } +} + +// TestCurrentSigner covers both states of the authorized-signer lookup used by +// the wit2 announce path to decide whether this node may sign announcements. +func TestCurrentSigner(t *testing.T) { + bor := &Bor{} + if got := bor.CurrentSigner(); got != (common.Address{}) { + t.Fatalf("expected zero address before Authorize, got %s", got) + } + + addr := common.HexToAddress("0x5678") + bor.Authorize(addr, func(accounts.Account, string, []byte) ([]byte, error) { + return make([]byte, 65), nil + }) + if got := bor.CurrentSigner(); got != addr { + t.Fatalf("CurrentSigner: got %s want %s", got, addr) + } +} diff --git a/core/stateless/witness_commit.go b/core/stateless/witness_commit.go index 4fe42cb0b4..b03079d44a 100644 --- a/core/stateless/witness_commit.go +++ b/core/stateless/witness_commit.go @@ -48,41 +48,7 @@ func WitnessCommitHash(rlpBytes []byte) common.Hash { if len(rlpBytes) == 0 { return common.Hash{} } - chunks := splitWitnessChunks(rlpBytes, WitnessCommitChunkBytes) - chunkHashes := make([]common.Hash, len(chunks)) - - // Single-chunk inputs (≤1 MiB) skip the goroutine pool — the fan-out cost - // would dominate the keccak. - if len(chunks) == 1 { - chunkHashes[0] = crypto.Keccak256Hash(chunks[0]) - } else { - workers := runtime.GOMAXPROCS(0) - if workers > witnessCommitMaxWorkers { - workers = witnessCommitMaxWorkers - } - if workers > len(chunks) { - workers = len(chunks) - } - if workers < 1 { - workers = 1 - } - var wg sync.WaitGroup - work := make(chan int, len(chunks)) - for w := 0; w < workers; w++ { - wg.Add(1) - go func() { - defer wg.Done() - for i := range work { - chunkHashes[i] = crypto.Keccak256Hash(chunks[i]) - } - }() - } - for i := range chunks { - work <- i - } - close(work) - wg.Wait() - } + chunkHashes := hashWitnessChunks(splitWitnessChunks(rlpBytes, WitnessCommitChunkBytes)) concat := make([]byte, 0, len(chunkHashes)*common.HashLength) for _, h := range chunkHashes { @@ -91,6 +57,51 @@ func WitnessCommitHash(rlpBytes []byte) common.Hash { return crypto.Keccak256Hash(concat) } +// hashWitnessChunks keccaks each chunk, fanning out across a bounded worker +// pool. Single-chunk inputs (≤1 MiB) skip the goroutine pool — the fan-out +// cost would dominate the keccak. +func hashWitnessChunks(chunks [][]byte) []common.Hash { + chunkHashes := make([]common.Hash, len(chunks)) + if len(chunks) == 1 { + chunkHashes[0] = crypto.Keccak256Hash(chunks[0]) + return chunkHashes + } + + var wg sync.WaitGroup + work := make(chan int, len(chunks)) + for w := 0; w < witnessCommitWorkerCount(len(chunks)); w++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + chunkHashes[i] = crypto.Keccak256Hash(chunks[i]) + } + }() + } + for i := range chunks { + work <- i + } + close(work) + wg.Wait() + return chunkHashes +} + +// witnessCommitWorkerCount clamps the keccak fan-out to the available +// parallelism, the configured cap, and the amount of work on hand. +func witnessCommitWorkerCount(chunks int) int { + workers := runtime.GOMAXPROCS(0) + if workers > witnessCommitMaxWorkers { + workers = witnessCommitMaxWorkers + } + if workers > chunks { + workers = chunks + } + if workers < 1 { + workers = 1 + } + return workers +} + // WitnessCommitHashFromWitness encodes a witness with the canonical sorted // EncodeRLP and returns its WitnessCommitHash. Callers that already have // canonical RLP bytes should use WitnessCommitHash directly to skip the diff --git a/core/stateless/witness_commit_test.go b/core/stateless/witness_commit_test.go index f1bd7d1a66..23b3911c5c 100644 --- a/core/stateless/witness_commit_test.go +++ b/core/stateless/witness_commit_test.go @@ -10,10 +10,12 @@ package stateless import ( "bytes" + "math/big" "runtime" "testing" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/crypto" ) @@ -68,6 +70,31 @@ func TestWitnessCommitHashSingleSubChunk(t *testing.T) { } } +// TestWitnessCommitHashFromWitness pins the convenience wrapper to the +// primitive: encoding a witness with the canonical EncodeRLP and hashing those +// bytes must equal WitnessCommitHashFromWitness on the same witness, so the +// producer (wrapper) and verifier (raw-bytes) paths can never diverge. +func TestWitnessCommitHashFromWitness(t *testing.T) { + w := &Witness{ + context: &types.Header{Number: big.NewInt(100)}, + Headers: []*types.Header{{Number: big.NewInt(99)}}, + State: map[string]struct{}{"statenode": {}}, + } + + got, err := WitnessCommitHashFromWitness(w) + if err != nil { + t.Fatalf("WitnessCommitHashFromWitness: %v", err) + } + + var buf bytes.Buffer + if err := w.EncodeRLP(&buf); err != nil { + t.Fatalf("EncodeRLP: %v", err) + } + if want := WitnessCommitHash(buf.Bytes()); got != want { + t.Fatalf("wrapper mismatch: got %s want %s", got.Hex(), want.Hex()) + } +} + // TestWitnessCommitHashMultiChunkShape spot-checks the multi-chunk recipe so a // silent change in concat order or chunking would be caught immediately. func TestWitnessCommitHashMultiChunkShape(t *testing.T) { diff --git a/eth/fetcher/witness_manager.go b/eth/fetcher/witness_manager.go index 8eda40ee13..e00b45675a 100644 --- a/eth/fetcher/witness_manager.go +++ b/eth/fetcher/witness_manager.go @@ -1,7 +1,6 @@ package fetcher import ( - "bytes" "errors" "fmt" "strings" @@ -30,24 +29,6 @@ const ( // witness for a block hash before giving up and marking it unavailable. maxWitnessFetchRetries = 300 // ~30s of retries - // emptyResponseFastRetries is how many consecutive "body not ready yet" - // (empty) responses we re-poll immediately before backing off. WIT2's fast - // signed announce reaches us ahead of the body, so the only candidate body - // source is often an announce-only relayer that has not finished pulling + - // importing the block. The first couple of re-polls stay immediate so we - // pick the body up the instant the relayer obtains it (the common case); - // after that, a relayer answering empty is genuinely waiting on its own - // upstream and re-polling it every ~gatherSlack only hammers it. - emptyResponseFastRetries = 2 - - // emptyResponseBaseBackoff / emptyResponseMaxBackoff bound the exponential - // backoff applied to repeated empty responses past the fast-retry window. - // The witness provably exists (a BP signed its hash) so we never give the - // request up here; we only slow the poll cadence to avoid the empty-poll - // storm observed on devnet (~15x the WIT1 empty-response count). - emptyResponseBaseBackoff = 100 * time.Millisecond - emptyResponseMaxBackoff = 1 * time.Second - witnessCacheSize = 10 witnessCacheTTL = 2 * time.Minute @@ -703,62 +684,6 @@ func (m *witnessManager) processWitnessResponse(peer string, hash common.Hash, r m.handleWitnessFetchSuccess(peer, hash, witness[0], announcedAt) } -// cacheVerifiedWitnessForServing forwards canonical-encoded witness bytes -// (already verified against a BP-signed witness hash by the caller) to the -// handler so other peers can fetch them pre-import. No-op when no cache -// callback is configured (legacy WIT1-only paths) or when body is empty — -// the latter signals the WIT1 path with no signed hash on file, where -// caching unverified bytes would expose us to byte-blame from downstream -// peers. -func (m *witnessManager) cacheVerifiedWitnessForServing(blockHash common.Hash, body []byte, witnessHash common.Hash) { - if m.parentCacheWitnessForServing == nil || len(body) == 0 { - return - } - m.parentCacheWitnessForServing(blockHash, body, witnessHash) -} - -// verifyAgainstSignedHash returns the canonically-encoded witness bytes and -// the BP-signed witness hash they match, when a signed hash is on file and -// verification succeeds. body is nil on the WIT1 path (no signed hash to -// verify against) so callers can skip the pre-import serving cache. ok is -// false when verification fails; the offending peer has already been -// reported. Local EncodeRLP failure on a successfully-decoded witness is -// the local node's bug, not peer misbehavior, so it does not drop the peer. -func (m *witnessManager) verifyAgainstSignedHash(peer string, hash common.Hash, witness *stateless.Witness) (body []byte, witnessHash common.Hash, ok bool) { - if m.parentSignedWitnessHash == nil { - return nil, common.Hash{}, true - } - expected, has := m.parentSignedWitnessHash(hash) - if !has { - return nil, common.Hash{}, true - } - var buf bytes.Buffer - if err := witness.EncodeRLP(&buf); err != nil { - log.Warn("[wm] Failed to encode received witness for hash check", "peer", peer, "hash", hash, "err", err) - m.handleWitnessFetchFailureExt(hash, "", fmt.Errorf("witness encode failed: %w", err), false) - return nil, common.Hash{}, false - } - encoded := buf.Bytes() - actual := stateless.WitnessCommitHash(encoded) - if actual != expected { - witnessByteMismatchMeter.Mark(1) - // We cannot blame the byte-server on signed-hash disagreement alone: - // the announcement only proves *some* BP signed *some* hash. A faulty - // or malicious scheduled producer that signed a bogus hash would - // otherwise weaponise this path to disconnect every honest peer - // serving the canonical witness. Reject the bytes (don't cache for - // serving), back off the pending request so another peer/announcement - // gets tried, and let import-time execution validation pin blame. - // TODO(wit2): wire signer-quarantine once the manager has access to - // (signer, announcement-relayer) provenance from the handler. - log.Warn("[wm] Witness bytes do not match BP-signed hash; not caching, retrying with another peer", - "peer", peer, "block", hash, "expected", expected, "actual", actual) - m.handleWitnessFetchFailureExt(hash, "", errors.New("witness hash mismatch"), false) - return nil, common.Hash{}, false - } - return encoded, expected, true -} - // handleWitnessFetchSuccess processes a successfully fetched witness. // It needs the original origin from the op state for consistency checks. func (m *witnessManager) handleWitnessFetchSuccess(fetchPeer string, hash common.Hash, witness *stateless.Witness, announcedAt time.Time) { @@ -861,48 +786,6 @@ func (m *witnessManager) handleWitnessFetchFailureExt(hash common.Hash, peer str m.rescheduleWitness() } -// handleWitnessBodyNotReady backs off a pending witness request after an empty -// ("body not ready yet") response, without dropping the responder and without -// giving the request up. On the WIT2 fast path the signed announce reaches us -// ahead of the body, so the only candidate source is frequently an -// announce-only relayer still pulling+importing the block; it answers empty -// until it has the bytes. The first emptyResponseFastRetries re-polls stay -// immediate to catch the body the instant the relayer obtains it; beyond that -// we back off exponentially (capped) so a relayer that is itself waiting -// upstream is not hammered every ~gatherSlack. The witness provably exists — a -// BP signed its hash — so we never discard the request here. -func (m *witnessManager) handleWitnessBodyNotReady(hash common.Hash) { - m.mu.Lock() - if state := m.pending[hash]; state != nil && state.announce != nil { - state.emptyRetries++ - state.announce.time = time.Now().Add(emptyResponseBackoff(state.emptyRetries)) - } - m.mu.Unlock() - - m.rescheduleWitness() -} - -// emptyResponseBackoff returns how far into the future the next re-poll should -// be deferred after n consecutive empty responses. The first -// emptyResponseFastRetries attempts return 0 (re-poll on the next tick); past -// that the delay doubles from emptyResponseBaseBackoff up to -// emptyResponseMaxBackoff. -func emptyResponseBackoff(n int) time.Duration { - if n <= emptyResponseFastRetries { - return 0 - } - shift := uint(n - emptyResponseFastRetries - 1) - // Cap the shift so the left-shift can't overflow before the clamp below. - if shift > 16 { - shift = 16 - } - d := emptyResponseBaseBackoff << shift - if d > emptyResponseMaxBackoff { - d = emptyResponseMaxBackoff - } - return d -} - // safeEnqueue attempts to enqueue a completed operation (block+witness) via the parent's channel. func (m *witnessManager) safeEnqueue(op *blockOrHeaderInject) { hash := op.hash() diff --git a/eth/fetcher/witness_manager_wit2.go b/eth/fetcher/witness_manager_wit2.go new file mode 100644 index 0000000000..f8fe25b31a --- /dev/null +++ b/eth/fetcher/witness_manager_wit2.go @@ -0,0 +1,132 @@ +package fetcher + +import ( + "bytes" + "errors" + "fmt" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/log" +) + +// WIT2 fast-path tuning: how the manager re-polls announce-only relayers that +// answer "body not ready yet" while still pulling the witness themselves. +const ( + // emptyResponseFastRetries is how many consecutive "body not ready yet" + // (empty) responses we re-poll immediately before backing off. WIT2's fast + // signed announce reaches us ahead of the body, so the only candidate body + // source is often an announce-only relayer that has not finished pulling + + // importing the block. The first couple of re-polls stay immediate so we + // pick the body up the instant the relayer obtains it (the common case); + // after that, a relayer answering empty is genuinely waiting on its own + // upstream and re-polling it every ~gatherSlack only hammers it. + emptyResponseFastRetries = 2 + + // emptyResponseBaseBackoff / emptyResponseMaxBackoff bound the exponential + // backoff applied to repeated empty responses past the fast-retry window. + // The witness provably exists (a BP signed its hash) so we never give the + // request up here; we only slow the poll cadence to avoid the empty-poll + // storm observed on devnet (~15x the WIT1 empty-response count). + emptyResponseBaseBackoff = 100 * time.Millisecond + emptyResponseMaxBackoff = 1 * time.Second +) + +// cacheVerifiedWitnessForServing forwards canonical-encoded witness bytes +// (already verified against a BP-signed witness hash by the caller) to the +// handler so other peers can fetch them pre-import. No-op when no cache +// callback is configured (legacy WIT1-only paths) or when body is empty — +// the latter signals the WIT1 path with no signed hash on file, where +// caching unverified bytes would expose us to byte-blame from downstream +// peers. +func (m *witnessManager) cacheVerifiedWitnessForServing(blockHash common.Hash, body []byte, witnessHash common.Hash) { + if m.parentCacheWitnessForServing == nil || len(body) == 0 { + return + } + m.parentCacheWitnessForServing(blockHash, body, witnessHash) +} + +// verifyAgainstSignedHash returns the canonically-encoded witness bytes and +// the BP-signed witness hash they match, when a signed hash is on file and +// verification succeeds. body is nil on the WIT1 path (no signed hash to +// verify against) so callers can skip the pre-import serving cache. ok is +// false when verification fails; the offending peer has already been +// reported. Local EncodeRLP failure on a successfully-decoded witness is +// the local node's bug, not peer misbehavior, so it does not drop the peer. +func (m *witnessManager) verifyAgainstSignedHash(peer string, hash common.Hash, witness *stateless.Witness) (body []byte, witnessHash common.Hash, ok bool) { + if m.parentSignedWitnessHash == nil { + return nil, common.Hash{}, true + } + expected, has := m.parentSignedWitnessHash(hash) + if !has { + return nil, common.Hash{}, true + } + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + log.Warn("[wm] Failed to encode received witness for hash check", "peer", peer, "hash", hash, "err", err) + m.handleWitnessFetchFailureExt(hash, "", fmt.Errorf("witness encode failed: %w", err), false) + return nil, common.Hash{}, false + } + encoded := buf.Bytes() + actual := stateless.WitnessCommitHash(encoded) + if actual != expected { + witnessByteMismatchMeter.Mark(1) + // We cannot blame the byte-server on signed-hash disagreement alone: + // the announcement only proves *some* BP signed *some* hash. A faulty + // or malicious scheduled producer that signed a bogus hash would + // otherwise weaponise this path to disconnect every honest peer + // serving the canonical witness. Reject the bytes (don't cache for + // serving), back off the pending request so another peer/announcement + // gets tried, and let import-time execution validation pin blame. + // TODO(wit2): wire signer-quarantine once the manager has access to + // (signer, announcement-relayer) provenance from the handler. + log.Warn("[wm] Witness bytes do not match BP-signed hash; not caching, retrying with another peer", + "peer", peer, "block", hash, "expected", expected, "actual", actual) + m.handleWitnessFetchFailureExt(hash, "", errors.New("witness hash mismatch"), false) + return nil, common.Hash{}, false + } + return encoded, expected, true +} + +// handleWitnessBodyNotReady backs off a pending witness request after an empty +// ("body not ready yet") response, without dropping the responder and without +// giving the request up. On the WIT2 fast path the signed announce reaches us +// ahead of the body, so the only candidate source is frequently an +// announce-only relayer still pulling+importing the block; it answers empty +// until it has the bytes. The first emptyResponseFastRetries re-polls stay +// immediate to catch the body the instant the relayer obtains it; beyond that +// we back off exponentially (capped) so a relayer that is itself waiting +// upstream is not hammered every ~gatherSlack. The witness provably exists — a +// BP signed its hash — so we never discard the request here. +func (m *witnessManager) handleWitnessBodyNotReady(hash common.Hash) { + m.mu.Lock() + if state := m.pending[hash]; state != nil && state.announce != nil { + state.emptyRetries++ + state.announce.time = time.Now().Add(emptyResponseBackoff(state.emptyRetries)) + } + m.mu.Unlock() + + m.rescheduleWitness() +} + +// emptyResponseBackoff returns how far into the future the next re-poll should +// be deferred after n consecutive empty responses. The first +// emptyResponseFastRetries attempts return 0 (re-poll on the next tick); past +// that the delay doubles from emptyResponseBaseBackoff up to +// emptyResponseMaxBackoff. +func emptyResponseBackoff(n int) time.Duration { + if n <= emptyResponseFastRetries { + return 0 + } + shift := uint(n - emptyResponseFastRetries - 1) + // Cap the shift so the left-shift can't overflow before the clamp below. + if shift > 16 { + shift = 16 + } + d := emptyResponseBaseBackoff << shift + if d > emptyResponseMaxBackoff { + d = emptyResponseMaxBackoff + } + return d +} diff --git a/eth/handler_wit2_caches_test.go b/eth/handler_wit2_caches_test.go index eb3c20a390..036b064a96 100644 --- a/eth/handler_wit2_caches_test.go +++ b/eth/handler_wit2_caches_test.go @@ -663,6 +663,95 @@ func TestDrainDeferredAnnouncesLifecycle(t *testing.T) { h.handler.drainDeferredAnnouncesFor(annS.BlockHash) } +// TestCacheGCSweepsExpiredEntries drives the TTL gc branch of each wit2 +// cache: an entry older than the TTL must be dropped by the next write, +// including the relayer-credit refund in the deferred cache and the +// emptied-hash map cleanup in the waiter registry. +func TestCacheGCSweepsExpiredEntries(t *testing.T) { + stale := time.Now().Add(-2 * wit2AnnounceTTL) + hashA := common.HexToHash("0x0a") + hashB := common.HexToHash("0x0b") + + // pendingWitnessBodyCache: gc fires on put. + bodies := newPendingWitnessBodyCache(4) + bodies.put(hashA, []byte{0x01}, common.HexToHash("0xa1")) + bodies.mu.Lock() + bodies.entries[hashA].receivedAt = stale + bodies.mu.Unlock() + bodies.put(hashB, []byte{0x02}, common.HexToHash("0xb1")) + bodies.mu.Lock() + _, sweptBody := bodies.entries[hashA] + bodies.mu.Unlock() + require.False(t, sweptBody, "expired pending body must be swept on the next put") + + // witnessWaiterRegistry: gc fires on record; sweeping the expired waiter + // must also drop the now-empty per-hash map. + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + reg := newWitnessWaiterRegistry() + reg.record(hashA, peer) + reg.mu.Lock() + reg.waiters[hashA][peer.ID()].at = stale + reg.mu.Unlock() + reg.record(hashB, peer) + reg.mu.Lock() + _, sweptWaiter := reg.waiters[hashA] + reg.mu.Unlock() + require.False(t, sweptWaiter, "expired waiter hash must be swept on the next record") + + // deferredAnnounceCache: gc fires on put and must refund the relayer's + // per-peer credit. + deferred := newDeferredAnnounceCache(8) + deferred.put(wit.SignedWitnessAnnouncement{BlockHash: hashA, Signature: make([]byte, wit.SignatureLength)}, "relayer") + deferred.mu.Lock() + deferred.entries[hashA].receivedAt = stale + deferred.mu.Unlock() + deferred.put(wit.SignedWitnessAnnouncement{BlockHash: hashB, Signature: make([]byte, wit.SignatureLength)}, "other") + deferred.mu.Lock() + _, sweptDeferred := deferred.entries[hashA] + credit := deferred.perPeer["relayer"] + deferred.mu.Unlock() + require.False(t, sweptDeferred, "expired deferred announce must be swept on the next put") + require.Zero(t, credit, "swept deferred announce must refund its relayer credit") + + // signedWitnessCache: gc fires on putIfNewer. + signed := newSignedWitnessCache() + signed.putIfNewer(wit.SignedWitnessAnnouncement{BlockHash: hashA, Signature: make([]byte, wit.SignatureLength)}) + signed.mu.Lock() + signed.entries[hashA].receivedAt = stale + signed.mu.Unlock() + signed.putIfNewer(wit.SignedWitnessAnnouncement{BlockHash: hashB, Signature: make([]byte, wit.SignatureLength)}) + signed.mu.Lock() + _, sweptSigned := signed.entries[hashA] + signed.mu.Unlock() + require.False(t, sweptSigned, "expired signed announce must be swept on the next putIfNewer") +} + +// TestDrainDeferredAnnouncesGuards covers the drain entry guards: a handler +// wired without wit2 state must no-op rather than panic, and a stashed +// announcement that fails the signature re-check (in principle unreachable — +// the same bytes passed verification before deferral) is dropped without +// being cached or relayed. +func TestDrainDeferredAnnouncesGuards(t *testing.T) { + (&handler{}).drainDeferredAnnouncesFor(common.HexToHash("0x01")) + + h := newTestHandler() + defer h.close() + + bad := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0x0bad"), + BlockNumber: 1, + WitnessHash: common.HexToHash("0x0bb1"), + Signature: make([]byte, wit.SignatureLength), // all-zero: recovery fails + } + h.handler.deferredAnnounces.put(bad, "relayer") + h.handler.drainDeferredAnnouncesFor(bad.BlockHash) + + _, promoted := h.handler.signedWitnesses.get(bad.BlockHash) + require.False(t, promoted, "announce failing the sig re-check must not be promoted") + require.False(t, h.handler.deferredAnnounces.has(bad.BlockHash), "failed drain must consume the deferred entry") +} + // TestCanonicalWitnessHashStorageGate pins the chain-storage gate: no stored // witness means no commitment (and thus nothing to sign), while stored bytes // hash to the canonical commitment directly. diff --git a/eth/protocols/wit/peer_wit2_test.go b/eth/protocols/wit/peer_wit2_test.go index 969cfeb3b7..bdf2f498ba 100644 --- a/eth/protocols/wit/peer_wit2_test.go +++ b/eth/protocols/wit/peer_wit2_test.go @@ -112,6 +112,37 @@ func TestHandleSignedNewWitnessHashesRejectsMalformedPackets(t *testing.T) { over[i] = testAnnouncement(byte(i)) } require.Error(t, send(&SignedNewWitnessHashesPacket{Announcements: over}), "over-cap packet must be rejected") + + // Structurally invalid payload: RLP that does not decode into the packet + // shape must error out at decode time. + sender, receiver, cleanup := newWit2PeerPair(t) + defer cleanup() + errc := make(chan error, 1) + go func() { + errc <- p2p.Send(sender.rw, SignedNewWitnessHashesMsg, "not-a-packet") + }() + require.Error(t, handleMessage(backend, receiver), "undecodable payload must be rejected") + require.NoError(t, <-errc) +} + +// TestAddKnownAnnounce pins the announce-known set semantics: recording an +// announce marks only the announce set, never the body-holder set that +// drives fetch peer selection. +func TestAddKnownAnnounce(t *testing.T) { + var id enode.ID + rand.Read(id[:]) + + app, net := p2p.MsgPipe() + defer app.Close() + defer net.Close() + peer := NewPeer(WIT2, p2p.NewPeer(id, "wit2", nil), net, log.New()) + defer peer.Close() + + hash := common.HexToHash("0x77") + require.False(t, peer.KnownAnnounceContainsHash(hash)) + peer.AddKnownAnnounce(hash) + require.True(t, peer.KnownAnnounceContainsHash(hash)) + require.False(t, peer.KnownWitnessContainsHash(hash), "announce-known must not imply body-known") } // TestAsyncSendSignedWitnessAnnouncementGuards pins the two non-delivery