decred · davecgh · May 13, 2024 · Nov 2, 2023 · Nov 2, 2023 · Nov 2, 2023
diff --git a/gcs/blockcf2/maxsize_test.go b/gcs/blockcf2/maxsize_test.go
@@ -0,0 +1,289 @@
+// Copyright (c) 2024 The Decred developers
+// Use of this source code is governed by an ISC
+// license that can be found in the LICENSE file.
+
+package blockcf2
+
+import (
+	"encoding/binary"
+	"math/bits"
+	"math/rand"
+	"sort"
+	"testing"
+	"time"
+
+	"github.com/dchest/siphash"
+	"github.com/decred/dcrd/chaincfg/v3"
+	"github.com/decred/dcrd/gcs/v4"
+	"github.com/decred/dcrd/wire"
+)
+
+// TestMaxSize verifies the max size of blockcf2 filters for various
+// parameters.
+//
+// This test is meant to nail down the values generated by various
+// package-level constants to known good hardcoded values such that if any of
+// the values change, this test breaks and any code that relies on assumptions
+// about the max filter size having a particular value are reviewed.
+func TestMaxSize(t *testing.T) {
+	// entrySizeForSize returns the size of scripts that ensures that every
+	// script added to a filter maximizes the passed maximum size.
+	//
+	// When adding scripts to build the filter, every input script should
+	// be unique in order to maximize the chances of unique values after
+	// the siphash and modulo reduction stages.  When the total size
+	// available to build input scripts is less than or equal to 2^8, then
+	// a single byte is sufficient (because each byte will have a unique
+	// value), and so on when the max size is 2^16, 2^24 and 2^32.
+	//
+	// In practice, for the current network constants, the script size that
+	// will maximize all tests will be 3.
+	entrySizeForSize := func(size uint32) uint32 {
+		switch {
+		case size < 1<<8:
+			return 1
+		case size < 1<<16:
+			return 2
+		case size < 1<<24:
+			return 3
+		default:
+			return 4
+		}
+	}
+
+	// mainnetMaxBlockSize is the max size of mainnet blocks according to
+	// the most recent consensus rules.
+	mainnetBlockSizes := chaincfg.MainNetParams().MaximumBlockSizes
+	mainnetMaxBlockSize := uint32(mainnetBlockSizes[len(mainnetBlockSizes)-1])
+
+	const (
+		// minTxSize is the minimum transaction size with one input
+		// (and no outputs).
+		minTxSize = uint32(4 + 51 + 18)
+
+		// txOutSize is the size of an output, plus one byte for the
+		// pkscript length encoded as a varint.
+		txOutSize = uint32(10 + 1)
+
+		// p2shSize is the size of a P2SH script (the smallest standard
+		// script that may be added without limits to a transaction).
+		p2shSize = uint32(23)
+	)
+
+	tests := []struct {
+		name string
+		n    uint32
+		want uint64
+	}{{
+		// This test asserts the size of a filter when the source
+		// data for the filter fills an entire P2P message.
+		name: "filter from data that fills an entire P2P msg",
+		n:    wire.MaxMessagePayload / entrySizeForSize(wire.MaxMessagePayload),
+		want: 22541387,
+	}, {
+		// This test asserts the size of a filter when the source data
+		// for the filter fills as many bytes as the maximum block
+		// payload size for a P2P message.
+		name: "filter from data that fills the max block size",
+		n:    wire.MaxBlockPayload / entrySizeForSize(wire.MaxBlockPayload),
+		want: 1174034,
+	}, {
+		// This test asserts the size of a filter when the source data
+		// for the filter fills as many bytes as the maximum consensus
+		// enforced block size for mainnet.
+		name: "filter from data that fills the max mainnet block size",
+		n:    mainnetMaxBlockSize / entrySizeForSize(mainnetMaxBlockSize),
+		want: 352214,
+	}, {
+		// This test asserts the size of a filter when the source data
+		// for the filter is a single tx that has as many OP_RETURN
+		// outputs as necessary to fill a block.
+		name: "filter from tx filled with OP_RETURN outputs",
+		n:    wire.MaxBlockPayload / (txOutSize + entrySizeForSize(wire.MaxBlockPayload)),
+		want: 251581,
+	}, {
+		// This test asserts the size of a filter when the source data
+		// is a set of transactions that have 4 OP_RETURN outputs, as
+		// enforced by the standardness policy checks of the mempool.
+		name: "filter from standard OP_RETURN tx",
+		n:    wire.MaxBlockPayload / (minTxSize + (txOutSize+entrySizeForSize(wire.MaxBlockPayload))*4),
+		want: 27305,
+	}, {
+		// This test asserts the size of a filter when the source data
+		// is a single transaction with as many P2SH outputs as
+		// necessary to fill the largest block of any network.
+		name: "filter from P2SH outputs tx",
+		n:    wire.MaxBlockPayload / (txOutSize + p2shSize),
+		want: 103593,
+	}, {
+		// This test asserts the size of a filter when the source data
+		// is a single transaction with as many P2SH outputs as
+		// necessary to fill the largest block on mainnet.
+		name: "filter from P2SH outputs tx on mainnet",
+		n:    mainnetMaxBlockSize / (txOutSize + p2shSize),
+		want: 31080,
+	}}
+
+	for i := range tests {
+		tc := tests[i]
+		t.Run(tc.name, func(t *testing.T) {
+			got := gcs.MaxFilterV2Size(B, M, tc.n)
+			if tc.want != got {
+				t.Logf("N: %d", tc.n)
+				t.Fatalf("Unexpected max filter size: got %d, want %d",
+					got, tc.want)
+			}
+		})
+	}
+}
+
+func fastReduce(x, N uint64) uint64 {
+	hi, _ := bits.Mul64(x, N)
+	return hi
+}
+
+// TestWorstMaxSize generates the largest possible cfilter for a given random
+// key.
+func TestWorstMaxSize(t *testing.T) {
+	if testing.Short() {
+		t.Skip("Skipping due to -test.short")
+	}
+
+	const scriptSize = 3
+	rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
+	nbScripts := wire.MaxBlockPayload / scriptSize
+
+	endian := binary.BigEndian
+	aux := make([]byte, 4)
+
+	var key [gcs.KeySize]byte
+	rnd.Read(key[:])
+	k0 := binary.LittleEndian.Uint64(key[0:8])
+	k1 := binary.LittleEndian.Uint64(key[8:16])
+
+	modulusNM := uint64(nbScripts * M)
+
+	// Generate every 3-byte script (2^24) and create a map of the reduced
+	// value to the script values that produce that reduced value (as a
+	// uint32).
+	maxNb := 1 << (8 * scriptSize)
+	seenSip := make(map[uint64]struct{}, maxNb)
+	seenReduced := make(map[uint64][]uint32, maxNb)
+	keys := make([]uint64, 0, maxNb)
+	for c := uint32(0); c < uint32(maxNb); c++ {
+		endian.PutUint32(aux, c)
+		v := siphash.Hash(k0, k1, aux[1:])
+		if _, ok := seenSip[v]; ok {
+			continue
+		}
+		seenSip[v] = struct{}{}
+		vv := fastReduce(v, modulusNM)
+		seenReduced[vv] = append(seenReduced[vv], c)
+		keys = append(keys, vv)
+	}
+
+	// Sort the reduced values in ascending order.
+	sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
+	for i := 0; i < 10; i++ {
+		t.Logf("smallest seen %d: %d (%v)", i, keys[i], seenReduced[keys[i]])
+	}
+	for i := len(keys) - 10; i < len(keys); i++ {
+		t.Logf("largest seen %d: %d (%v)", i, keys[i], seenReduced[keys[i]])
+	}
+
+	// Create the scripts. The first N-1 scripts will generate the smallest
+	// possible reduced values.
+	data := make([][]byte, nbScripts)
+	for i := 0; i < nbScripts-1; i++ {
+		values := seenReduced[keys[i]]
+		if len(values) > 1 {
+			// Report on different input entries that generate
+			// different siphash values that nevertheless reduce to
+			// the same final value.
+			sips := make([]uint64, len(values))
+			mods := make([]uint64, len(values))
+			for j := range values {
+				endian.PutUint32(aux, values[j])
+				sips[j] = siphash.Hash(k0, k1, aux[1:])
+				mods[j] = fastReduce(sips[j], modulusNM)
+			}
+			t.Logf("Collision on modulo reduction for script %d: "+
+				"values %v, siphashes %v, modulos %v", i, values,
+				sips, mods)
+		}
+		data[i] = make([]byte, scriptSize)
+		endian.PutUint32(aux, values[0])
+		copy(data[i], aux[1:])
+		seenReduced[keys[i]] = values[1:]
+	}
+
+	// The last script will generate the largest possible reduced value. The
+	// difference between the last and second-to-last scripts is the largest
+	// difference possible (when using this specific random key).
+	i := nbScripts - 1
+	data[i] = make([]byte, scriptSize)
+	endian.PutUint32(aux, seenReduced[keys[len(keys)-1]][0])
+	copy(data[i], aux[1:])
+
+	// Create filter and report results.
+	filter, err := gcs.NewFilterV2(B, M, key, data)
+	if err != nil {
+		t.Fatal(err)
+	}
+	sz := uint64(len(filter.Bytes()))
+	t.Logf("Key: %x", key)
+	t.Logf("Number of values: %d", len(seenReduced))
+	t.Logf("Scripts: %d", len(data))
+
+	// The maximum possible quotient happens when the first nbScripts-1
+	// scripts generate values from 0..nbScripts-2 and the last script
+	// generates the value modulusNM -1.
+	N := uint64(nbScripts)
+	maxQuotient := ((modulusNM - 1) - (N - 1)) >> 19
+	t.Logf("Max Quotient: %d", maxQuotient)
+
+	// The max possible size can be calculated by assuming every script
+	// will generate a value (i.e. no removed duplicates) and the last
+	// script will generate the max possible quotient.
+	maxPossible := gcs.MaxFilterV2Size(B, M, uint32(nbScripts))
+	t.Logf("Max Possible size: %d, actual size: %d", maxPossible, sz)
+}
+
+// TestRandomFilterSize generates filters with random data.
+func TestRandomFilterSize(t *testing.T) {
+	// Use a unique random seed each test instance and log it if the tests
+	// fail.
+	seed := time.Now().UnixNano()
+	rng := rand.New(rand.NewSource(seed))
+	defer func(t *testing.T, seed int64) {
+		if t.Failed() {
+			t.Logf("random seed: %d", seed)
+		}
+	}(t, seed)
+	scriptSize := 3
+	nbScripts := wire.MaxBlockPayload / scriptSize
+	data := make([][]byte, nbScripts)
+	for i := range data {
+		data[i] = make([]byte, scriptSize)
+	}
+	var key [gcs.KeySize]byte
+
+	// Generate a random filter and key.
+	for i := range data {
+		rng.Read(data[i])
+	}
+	rng.Read(key[:])
+
+	// Check if it's larger than possible.
+	filter, err := gcs.NewFilterV2(B, M, key, data)
+	if err != nil {
+		t.Fatal(err)
+	}
+	sz := uint64(len(filter.Bytes()))
+	maxPossible := gcs.MaxFilterV2Size(B, M, uint32(nbScripts))
+	t.Logf("Max Possible size: %d, actual size: %d", maxPossible, sz)
+	if sz > maxPossible {
+		t.Fatalf("Found a random filter with max size %d > max possible size %d",
+			sz, maxPossible)
+	}
+}
diff --git a/gcs/gcs.go b/gcs/gcs.go
@@ -549,3 +549,46 @@ func MakeHeaderForFilter(filter *FilterV1, prevHeader *chainhash.Hash) chainhash
 	// The final filter hash is the blake256 of the hash computed above.
 	return chainhash.Hash(blake256.Sum256(filterTip[:]))
 }
+
+// MaxFilterV2Size returns the maximum filter size possible for the given filter
+// parameters.
+func MaxFilterV2Size(B uint8, M uint64, N uint32) uint64 {
+	// The maximum (i.e. worst) filter size for V2 filters happens when the
+	// following conditions are met:
+	//
+	// - Every one of the N data entries is unique.
+	// - Every one of the N data entries produces a unique value after
+	//   the siphash stage, ensuring no values are prematurely removed.
+	// - The quotient difference is maximized and produces the largest
+	//   possible number of one bits in unary coding for the set of unique
+	//   scripts.
+	//
+	// Given that the values are sorted prior being encoded with the
+	// Golomb/Rice coding, the largest possible quotient happens when the
+	// difference between two consecutive values is maximized.  And that
+	// happens when the last value has the highest possible value and the
+	// second-to-last has the least possible value.
+	//
+	// The highest possible value after the modulo reduction stage is
+	// N*M-1.  And the least possible value is zero.  In other words, the
+	// first N-1 siphashed entries are mapped, after modulo reduction, to
+	// the value 0 and the last entry is mapped to the value N*M-1.
+	//
+	// Thus the largest possible difference is N*M-1, with the max
+	// number of bits of the quotient readily determined by shifting right
+	// that amount by B.
+	n := uint64(N)
+	b := uint64(B)
+	largestDiff := n*M - 1
+	maxQuoBits := largestDiff >> b
+
+	// Finally, the maximum size of the filter is determined by assuming
+	// each one of the N entries takes one bit for the 0 in the quotient
+	// encoding and B bits for the remainder, one entry takes an additional
+	// maxQuoBits for the quotient encoding, N is encoded as a varint and
+	// any necessary padding is added.
+	nSerSize := uint64(wire.VarIntSerializeSize(n))
+	maxBits := n + n*b + maxQuoBits
+	maxBytes := (maxBits+7)/8 + nSerSize
+	return maxBytes
+}
diff --git a/gcs/go.mod b/gcs/go.mod
@@ -6,6 +6,7 @@ require (
 	github.com/dchest/siphash v1.2.3
 	github.com/decred/dcrd/blockchain/stake/v5 v5.0.0
 	github.com/decred/dcrd/chaincfg/chainhash v1.0.4
+	github.com/decred/dcrd/chaincfg/v3 v3.2.0
 	github.com/decred/dcrd/crypto/blake256 v1.0.1
 	github.com/decred/dcrd/txscript/v4 v4.1.0
 	github.com/decred/dcrd/wire v1.6.0
@@ -14,7 +15,6 @@ require (
 require (
 	github.com/agl/ed25519 v0.0.0-20170116200512-5312a6153412 // indirect
 	github.com/decred/base58 v1.0.5 // indirect
-	github.com/decred/dcrd/chaincfg/v3 v3.2.0 // indirect
 	github.com/decred/dcrd/crypto/ripemd160 v1.0.2 // indirect
 	github.com/decred/dcrd/database/v3 v3.0.1 // indirect
 	github.com/decred/dcrd/dcrec v1.0.1 // indirect

diff --git a/internal/blockchain/chainio.go b/internal/blockchain/chainio.go
@@ -851,6 +851,21 @@ func dbFetchGCSFilter(dbTx database.Tx, blockHash *chainhash.Hash) (*gcs.FilterV
 	return filter, nil
 }
 
+// dbFetchRawGCSFilter fetches the raw version 2 GCS filter for the passed
+// block, without decoding it from the db.
+//
+// WARNING: the returned slice is only valid for the duration of the database
+// transaction and MUST be copied to a new buffer if it is needed after the db
+// transaction ends.
+//
+// This function is meant for cases where the raw filter bytes will be used
+// without decoding into a gcs.FilterV2 value.  For a safer alternative, use
+// dbFetchGCSFilter.
+func dbFetchRawGCSFilter(dbTx database.Tx, blockHash *chainhash.Hash) []byte {
+	filterBucket := dbTx.Metadata().Bucket(gcsFilterBucketName)
+	return filterBucket.Get(blockHash[:])
+}
+
 // dbPutGCSFilter uses an existing database transaction to update the version 2
 // GCS filter for the given block hash using the provided filter.
 func dbPutGCSFilter(dbTx database.Tx, blockHash *chainhash.Hash, filter *gcs.FilterV2) error {

diff --git a/internal/blockchain/error.go b/internal/blockchain/error.go
@@ -589,6 +589,14 @@ const (
 	// ErrSerializeHeader indicates an attempt to serialize a block header failed.
 	ErrSerializeHeader = ErrorKind("ErrSerializeHeader")
 
+	// ErrNotAnAncestor indicates an attempt to fetch a chain of filters where
+	// the start hash is not an ancestor of the end hash.
+	ErrNotAnAncestor = ErrorKind("ErrNotAnAncestor")
+
+	// ErrRequestTooLarge indicates an attempt to request too much data
+	// in a batched request.
+	ErrRequestTooLarge = ErrorKind("ErrRequestTooLarge")
+
 	// ------------------------------------------
 	// Errors related to the UTXO backend.
 	// ------------------------------------------

diff --git a/internal/blockchain/error_test.go b/internal/blockchain/error_test.go
@@ -150,6 +150,8 @@ func TestErrorKindStringer(t *testing.T) {
 		{ErrNoTreasuryBalance, "ErrNoTreasuryBalance"},
 		{ErrInvalidateGenesisBlock, "ErrInvalidateGenesisBlock"},
 		{ErrSerializeHeader, "ErrSerializeHeader"},
+		{ErrNotAnAncestor, "ErrNotAnAncestor"},
+		{ErrRequestTooLarge, "ErrRequestTooLarge"},
 		{ErrUtxoBackend, "ErrUtxoBackend"},
 		{ErrUtxoBackendCorruption, "ErrUtxoBackendCorruption"},
 		{ErrUtxoBackendNotOpen, "ErrUtxoBackendNotOpen"},