Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

multi: Add batched cfilter fetching messages #3211

Merged
merged 4 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
289 changes: 289 additions & 0 deletions gcs/blockcf2/maxsize_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
// Copyright (c) 2024 The Decred developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.

package blockcf2
matheusd marked this conversation as resolved.
Show resolved Hide resolved

import (
"encoding/binary"
"math/bits"
"math/rand"
"sort"
"testing"
"time"

"github.com/dchest/siphash"
"github.com/decred/dcrd/chaincfg/v3"
"github.com/decred/dcrd/gcs/v4"
"github.com/decred/dcrd/wire"
)

// TestMaxSize verifies the max size of blockcf2 filters for various
// parameters.
//
// This test is meant to nail down the values generated by various
// package-level constants to known good hardcoded values such that if any of
// the values change, this test breaks and any code that relies on assumptions
// about the max filter size having a particular value are reviewed.
func TestMaxSize(t *testing.T) {
// entrySizeForSize returns the size of scripts that ensures that every
// script added to a filter maximizes the passed maximum size.
//
// When adding scripts to build the filter, every input script should
// be unique in order to maximize the chances of unique values after
// the siphash and modulo reduction stages. When the total size
// available to build input scripts is less than or equal to 2^8, then
// a single byte is sufficient (because each byte will have a unique
// value), and so on when the max size is 2^16, 2^24 and 2^32.
//
// In practice, for the current network constants, the script size that
// will maximize all tests will be 3.
entrySizeForSize := func(size uint32) uint32 {
matheusd marked this conversation as resolved.
Show resolved Hide resolved
switch {
case size < 1<<8:
return 1
case size < 1<<16:
return 2
case size < 1<<24:
return 3
default:
return 4
}
}

// mainnetMaxBlockSize is the max size of mainnet blocks according to
// the most recent consensus rules.
mainnetBlockSizes := chaincfg.MainNetParams().MaximumBlockSizes
mainnetMaxBlockSize := uint32(mainnetBlockSizes[len(mainnetBlockSizes)-1])

const (
// minTxSize is the minimum transaction size with one input
// (and no outputs).
minTxSize = uint32(4 + 51 + 18)

// txOutSize is the size of an output, plus one byte for the
// pkscript length encoded as a varint.
txOutSize = uint32(10 + 1)

// p2shSize is the size of a P2SH script (the smallest standard
// script that may be added without limits to a transaction).
p2shSize = uint32(23)
)

tests := []struct {
name string
n uint32
want uint64
}{{
// This test asserts the size of a filter when the source
// data for the filter fills an entire P2P message.
name: "filter from data that fills an entire P2P msg",
n: wire.MaxMessagePayload / entrySizeForSize(wire.MaxMessagePayload),
want: 22541387,
}, {
// This test asserts the size of a filter when the source data
// for the filter fills as many bytes as the maximum block
// payload size for a P2P message.
name: "filter from data that fills the max block size",
n: wire.MaxBlockPayload / entrySizeForSize(wire.MaxBlockPayload),
want: 1174034,
}, {
// This test asserts the size of a filter when the source data
// for the filter fills as many bytes as the maximum consensus
// enforced block size for mainnet.
name: "filter from data that fills the max mainnet block size",
n: mainnetMaxBlockSize / entrySizeForSize(mainnetMaxBlockSize),
want: 352214,
}, {
// This test asserts the size of a filter when the source data
// for the filter is a single tx that has as many OP_RETURN
// outputs as necessary to fill a block.
name: "filter from tx filled with OP_RETURN outputs",
n: wire.MaxBlockPayload / (txOutSize + entrySizeForSize(wire.MaxBlockPayload)),
want: 251581,
}, {
// This test asserts the size of a filter when the source data
// is a set of transactions that have 4 OP_RETURN outputs, as
// enforced by the standardness policy checks of the mempool.
name: "filter from standard OP_RETURN tx",
n: wire.MaxBlockPayload / (minTxSize + (txOutSize+entrySizeForSize(wire.MaxBlockPayload))*4),
want: 27305,
}, {
// This test asserts the size of a filter when the source data
// is a single transaction with as many P2SH outputs as
// necessary to fill the largest block of any network.
name: "filter from P2SH outputs tx",
n: wire.MaxBlockPayload / (txOutSize + p2shSize),
want: 103593,
}, {
// This test asserts the size of a filter when the source data
// is a single transaction with as many P2SH outputs as
// necessary to fill the largest block on mainnet.
name: "filter from P2SH outputs tx on mainnet",
n: mainnetMaxBlockSize / (txOutSize + p2shSize),
want: 31080,
}}

for i := range tests {
tc := tests[i]
t.Run(tc.name, func(t *testing.T) {
got := gcs.MaxFilterV2Size(B, M, tc.n)
if tc.want != got {
t.Logf("N: %d", tc.n)
t.Fatalf("Unexpected max filter size: got %d, want %d",
got, tc.want)
}
})
}
}

func fastReduce(x, N uint64) uint64 {
hi, _ := bits.Mul64(x, N)
return hi
}

// TestWorstMaxSize generates the largest possible cfilter for a given random
// key.
func TestWorstMaxSize(t *testing.T) {
if testing.Short() {
t.Skip("Skipping due to -test.short")
}
matheusd marked this conversation as resolved.
Show resolved Hide resolved

const scriptSize = 3
rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
nbScripts := wire.MaxBlockPayload / scriptSize

endian := binary.BigEndian
aux := make([]byte, 4)

var key [gcs.KeySize]byte
rnd.Read(key[:])
k0 := binary.LittleEndian.Uint64(key[0:8])
k1 := binary.LittleEndian.Uint64(key[8:16])

modulusNM := uint64(nbScripts * M)

// Generate every 3-byte script (2^24) and create a map of the reduced
// value to the script values that produce that reduced value (as a
// uint32).
maxNb := 1 << (8 * scriptSize)
seenSip := make(map[uint64]struct{}, maxNb)
seenReduced := make(map[uint64][]uint32, maxNb)
keys := make([]uint64, 0, maxNb)
for c := uint32(0); c < uint32(maxNb); c++ {
endian.PutUint32(aux, c)
v := siphash.Hash(k0, k1, aux[1:])
if _, ok := seenSip[v]; ok {
continue
}
seenSip[v] = struct{}{}
vv := fastReduce(v, modulusNM)
seenReduced[vv] = append(seenReduced[vv], c)
keys = append(keys, vv)
}

// Sort the reduced values in ascending order.
sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
for i := 0; i < 10; i++ {
t.Logf("smallest seen %d: %d (%v)", i, keys[i], seenReduced[keys[i]])
}
for i := len(keys) - 10; i < len(keys); i++ {
t.Logf("largest seen %d: %d (%v)", i, keys[i], seenReduced[keys[i]])
}

// Create the scripts. The first N-1 scripts will generate the smallest
// possible reduced values.
data := make([][]byte, nbScripts)
for i := 0; i < nbScripts-1; i++ {
values := seenReduced[keys[i]]
if len(values) > 1 {
// Report on different input entries that generate
// different siphash values that nevertheless reduce to
// the same final value.
sips := make([]uint64, len(values))
mods := make([]uint64, len(values))
for j := range values {
endian.PutUint32(aux, values[j])
sips[j] = siphash.Hash(k0, k1, aux[1:])
mods[j] = fastReduce(sips[j], modulusNM)
}
t.Logf("Collision on modulo reduction for script %d: "+
"values %v, siphashes %v, modulos %v", i, values,
sips, mods)
}
data[i] = make([]byte, scriptSize)
endian.PutUint32(aux, values[0])
copy(data[i], aux[1:])
seenReduced[keys[i]] = values[1:]
}

// The last script will generate the largest possible reduced value. The
// difference between the last and second-to-last scripts is the largest
// difference possible (when using this specific random key).
i := nbScripts - 1
data[i] = make([]byte, scriptSize)
endian.PutUint32(aux, seenReduced[keys[len(keys)-1]][0])
copy(data[i], aux[1:])

// Create filter and report results.
filter, err := gcs.NewFilterV2(B, M, key, data)
if err != nil {
t.Fatal(err)
}
sz := uint64(len(filter.Bytes()))
t.Logf("Key: %x", key)
t.Logf("Number of values: %d", len(seenReduced))
t.Logf("Scripts: %d", len(data))

// The maximum possible quotient happens when the first nbScripts-1
// scripts generate values from 0..nbScripts-2 and the last script
// generates the value modulusNM -1.
N := uint64(nbScripts)
maxQuotient := ((modulusNM - 1) - (N - 1)) >> 19
t.Logf("Max Quotient: %d", maxQuotient)

// The max possible size can be calculated by assuming every script
// will generate a value (i.e. no removed duplicates) and the last
// script will generate the max possible quotient.
maxPossible := gcs.MaxFilterV2Size(B, M, uint32(nbScripts))
t.Logf("Max Possible size: %d, actual size: %d", maxPossible, sz)
}

// TestRandomFilterSize generates filters with random data.
func TestRandomFilterSize(t *testing.T) {
// Use a unique random seed each test instance and log it if the tests
// fail.
seed := time.Now().UnixNano()
rng := rand.New(rand.NewSource(seed))
defer func(t *testing.T, seed int64) {
if t.Failed() {
t.Logf("random seed: %d", seed)
}
}(t, seed)
scriptSize := 3
nbScripts := wire.MaxBlockPayload / scriptSize
data := make([][]byte, nbScripts)
for i := range data {
data[i] = make([]byte, scriptSize)
}
var key [gcs.KeySize]byte

// Generate a random filter and key.
for i := range data {
rng.Read(data[i])
}
rng.Read(key[:])

// Check if it's larger than possible.
filter, err := gcs.NewFilterV2(B, M, key, data)
if err != nil {
t.Fatal(err)
}
sz := uint64(len(filter.Bytes()))
maxPossible := gcs.MaxFilterV2Size(B, M, uint32(nbScripts))
t.Logf("Max Possible size: %d, actual size: %d", maxPossible, sz)
if sz > maxPossible {
t.Fatalf("Found a random filter with max size %d > max possible size %d",
sz, maxPossible)
}
}
43 changes: 43 additions & 0 deletions gcs/gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,3 +549,46 @@ func MakeHeaderForFilter(filter *FilterV1, prevHeader *chainhash.Hash) chainhash
// The final filter hash is the blake256 of the hash computed above.
return chainhash.Hash(blake256.Sum256(filterTip[:]))
}

// MaxFilterV2Size returns the maximum filter size possible for the given filter
// parameters.
func MaxFilterV2Size(B uint8, M uint64, N uint32) uint64 {
// The maximum (i.e. worst) filter size for V2 filters happens when the
// following conditions are met:
//
// - Every one of the N data entries is unique.
// - Every one of the N data entries produces a unique value after
// the siphash stage, ensuring no values are prematurely removed.
// - The quotient difference is maximized and produces the largest
// possible number of one bits in unary coding for the set of unique
// scripts.
//
// Given that the values are sorted prior being encoded with the
// Golomb/Rice coding, the largest possible quotient happens when the
// difference between two consecutive values is maximized. And that
// happens when the last value has the highest possible value and the
// second-to-last has the least possible value.
//
// The highest possible value after the modulo reduction stage is
// N*M-1. And the least possible value is zero. In other words, the
// first N-1 siphashed entries are mapped, after modulo reduction, to
// the value 0 and the last entry is mapped to the value N*M-1.
//
// Thus the largest possible difference is N*M-1, with the max
// number of bits of the quotient readily determined by shifting right
// that amount by B.
n := uint64(N)
b := uint64(B)
largestDiff := n*M - 1
maxQuoBits := largestDiff >> b

// Finally, the maximum size of the filter is determined by assuming
// each one of the N entries takes one bit for the 0 in the quotient
// encoding and B bits for the remainder, one entry takes an additional
// maxQuoBits for the quotient encoding, N is encoded as a varint and
// any necessary padding is added.
nSerSize := uint64(wire.VarIntSerializeSize(n))
maxBits := n + n*b + maxQuoBits
maxBytes := (maxBits+7)/8 + nSerSize
return maxBytes
}
2 changes: 1 addition & 1 deletion gcs/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/dchest/siphash v1.2.3
github.com/decred/dcrd/blockchain/stake/v5 v5.0.0
github.com/decred/dcrd/chaincfg/chainhash v1.0.4
github.com/decred/dcrd/chaincfg/v3 v3.2.0
github.com/decred/dcrd/crypto/blake256 v1.0.1
github.com/decred/dcrd/txscript/v4 v4.1.0
github.com/decred/dcrd/wire v1.6.0
Expand All @@ -14,7 +15,6 @@ require (
require (
github.com/agl/ed25519 v0.0.0-20170116200512-5312a6153412 // indirect
github.com/decred/base58 v1.0.5 // indirect
github.com/decred/dcrd/chaincfg/v3 v3.2.0 // indirect
github.com/decred/dcrd/crypto/ripemd160 v1.0.2 // indirect
github.com/decred/dcrd/database/v3 v3.0.1 // indirect
github.com/decred/dcrd/dcrec v1.0.1 // indirect
Expand Down
15 changes: 15 additions & 0 deletions internal/blockchain/chainio.go
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,21 @@ func dbFetchGCSFilter(dbTx database.Tx, blockHash *chainhash.Hash) (*gcs.FilterV
return filter, nil
}

// dbFetchRawGCSFilter fetches the raw version 2 GCS filter for the passed
// block, without decoding it from the db.
//
// WARNING: the returned slice is only valid for the duration of the database
// transaction and MUST be copied to a new buffer if it is needed after the db
// transaction ends.
//
// This function is meant for cases where the raw filter bytes will be used
// without decoding into a gcs.FilterV2 value. For a safer alternative, use
// dbFetchGCSFilter.
func dbFetchRawGCSFilter(dbTx database.Tx, blockHash *chainhash.Hash) []byte {
filterBucket := dbTx.Metadata().Bucket(gcsFilterBucketName)
return filterBucket.Get(blockHash[:])
}

// dbPutGCSFilter uses an existing database transaction to update the version 2
// GCS filter for the given block hash using the provided filter.
func dbPutGCSFilter(dbTx database.Tx, blockHash *chainhash.Hash, filter *gcs.FilterV2) error {
Expand Down
8 changes: 8 additions & 0 deletions internal/blockchain/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,14 @@ const (
// ErrSerializeHeader indicates an attempt to serialize a block header failed.
ErrSerializeHeader = ErrorKind("ErrSerializeHeader")

// ErrNotAnAncestor indicates an attempt to fetch a chain of filters where
// the start hash is not an ancestor of the end hash.
ErrNotAnAncestor = ErrorKind("ErrNotAnAncestor")

// ErrRequestTooLarge indicates an attempt to request too much data
// in a batched request.
ErrRequestTooLarge = ErrorKind("ErrRequestTooLarge")

// ------------------------------------------
// Errors related to the UTXO backend.
// ------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions internal/blockchain/error_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ func TestErrorKindStringer(t *testing.T) {
{ErrNoTreasuryBalance, "ErrNoTreasuryBalance"},
{ErrInvalidateGenesisBlock, "ErrInvalidateGenesisBlock"},
{ErrSerializeHeader, "ErrSerializeHeader"},
{ErrNotAnAncestor, "ErrNotAnAncestor"},
{ErrRequestTooLarge, "ErrRequestTooLarge"},
{ErrUtxoBackend, "ErrUtxoBackend"},
{ErrUtxoBackendCorruption, "ErrUtxoBackendCorruption"},
{ErrUtxoBackendNotOpen, "ErrUtxoBackendNotOpen"},
Expand Down