Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rpc: Optimize serialization and enhance metadata of dumptxoutset output #29612

Merged
merged 4 commits into from May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/kernel/chainparams.cpp
Expand Up @@ -542,3 +542,33 @@ std::unique_ptr<const CChainParams> CChainParams::TestNet()
{
return std::make_unique<const CTestNetParams>();
}

std::vector<int> CChainParams::GetAvailableSnapshotHeights() const
{
std::vector<int> heights;
heights.reserve(m_assumeutxo_data.size());

for (const auto& data : m_assumeutxo_data) {
heights.emplace_back(data.height);
}
return heights;
}

std::optional<ChainType> GetNetworkForMagic(MessageStartChars& message)
{
const auto mainnet_msg = CChainParams::Main()->MessageStart();
const auto testnet_msg = CChainParams::TestNet()->MessageStart();
const auto regtest_msg = CChainParams::RegTest({})->MessageStart();
const auto signet_msg = CChainParams::SigNet({})->MessageStart();

if (std::equal(message.begin(), message.end(), mainnet_msg.data())) {
return ChainType::MAIN;
} else if (std::equal(message.begin(), message.end(), testnet_msg.data())) {
return ChainType::TESTNET;
} else if (std::equal(message.begin(), message.end(), regtest_msg.data())) {
return ChainType::REGTEST;
} else if (std::equal(message.begin(), message.end(), signet_msg.data())) {
return ChainType::SIGNET;
}
return std::nullopt;
}
3 changes: 3 additions & 0 deletions src/kernel/chainparams.h
Expand Up @@ -93,6 +93,7 @@ class CChainParams
const Consensus::Params& GetConsensus() const { return consensus; }
const MessageStartChars& MessageStart() const { return pchMessageStart; }
uint16_t GetDefaultPort() const { return nDefaultPort; }
std::vector<int> GetAvailableSnapshotHeights() const;

const CBlock& GenesisBlock() const { return genesis; }
/** Default value for -checkmempool and -checkblockindex argument */
Expand Down Expand Up @@ -183,4 +184,6 @@ class CChainParams
ChainTxData chainTxData;
};

std::optional<ChainType> GetNetworkForMagic(MessageStartChars& pchMessageStart);

#endif // BITCOIN_KERNEL_CHAINPARAMS_H
56 changes: 55 additions & 1 deletion src/node/utxo_snapshot.h
Expand Up @@ -6,27 +6,37 @@
#ifndef BITCOIN_NODE_UTXO_SNAPSHOT_H
#define BITCOIN_NODE_UTXO_SNAPSHOT_H

#include <chainparams.h>
#include <kernel/chainparams.h>
#include <kernel/cs_main.h>
#include <serialize.h>
#include <sync.h>
#include <uint256.h>
#include <util/chaintype.h>
#include <util/fs.h>

#include <cstdint>
#include <optional>
#include <string_view>

// UTXO set snapshot magic bytes
static constexpr std::array<uint8_t, 5> SNAPSHOT_MAGIC_BYTES = {'u', 't', 'x', 'o', 0xff};

class Chainstate;

namespace node {
//! Metadata describing a serialized version of a UTXO set from which an
//! assumeutxo Chainstate can be constructed.
class SnapshotMetadata
{
const uint16_t m_version{1};
const std::set<uint16_t> m_supported_versions{1};
public:
//! The hash of the block that reflects the tip of the chain for the
//! UTXO set contained in this snapshot.
uint256 m_base_blockhash;
uint32_t m_base_blockheight;


//! The number of coins in the UTXO set contained in this snapshot. Used
//! during snapshot load to estimate progress of UTXO set reconstruction.
Expand All @@ -35,11 +45,55 @@ class SnapshotMetadata
SnapshotMetadata() { }
SnapshotMetadata(
const uint256& base_blockhash,
const int base_blockheight,
uint64_t coins_count) :
m_base_blockhash(base_blockhash),
m_base_blockheight(base_blockheight),
m_coins_count(coins_count) { }

SERIALIZE_METHODS(SnapshotMetadata, obj) { READWRITE(obj.m_base_blockhash, obj.m_coins_count); }
template <typename Stream>
inline void Serialize(Stream& s) const {
s << SNAPSHOT_MAGIC_BYTES;
s << m_version;
s << Params().MessageStart();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: It would be nice, if new code didn't use globals implicitly. What about storing a reference to the prams (or the magic) in a member field of this struct and using it here? An alternative would be to use params-serialization and embed the magic bytes during serialization.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I addressed this in #30167 using the member field. Using params-serialization wouldn't have made much of a difference given how we use this code right now, I think.

s << m_base_blockheight;
s << m_base_blockhash;
s << m_coins_count;
}

template <typename Stream>
inline void Unserialize(Stream& s) {
// Read the snapshot magic bytes
std::array<uint8_t, SNAPSHOT_MAGIC_BYTES.size()> snapshot_magic;
s >> snapshot_magic;
if (snapshot_magic != SNAPSHOT_MAGIC_BYTES) {
throw std::ios_base::failure("Invalid UTXO set snapshot magic bytes. Please check if this is indeed a snapshot file or if you are using an outdated snapshot format.");
}

// Read the version
uint16_t version;
s >> version;
if (m_supported_versions.find(version) == m_supported_versions.end()) {
throw std::ios_base::failure(strprintf("Version of snapshot %s does not match any of the supported versions.", version));
}

// Read the network magic (pchMessageStart)
MessageStartChars message;
s >> message;
if (!std::equal(message.begin(), message.end(), Params().MessageStart().data())) {
auto metadata_network = GetNetworkForMagic(message);
if (metadata_network) {
std::string network_string{ChainTypeToString(metadata_network.value())};
throw std::ios_base::failure(strprintf("The network of the snapshot (%s) does not match the network of this node (%s).", network_string, Params().GetChainTypeString()));
} else {
throw std::ios_base::failure("This snapshot has been created for an unrecognized network. This could be a custom signet, a new testnet or possibly caused by data corruption.");
}
}

s >> m_base_blockheight;
s >> m_base_blockhash;
s >> m_coins_count;
}
};

//! The file in the snapshot chainstate dir which stores the base blockhash. This is
Expand Down
56 changes: 49 additions & 7 deletions src/rpc/blockchain.cpp
Expand Up @@ -34,6 +34,7 @@
#include <rpc/server_util.h>
#include <rpc/util.h>
#include <script/descriptor.h>
#include <serialize.h>
#include <streams.h>
#include <sync.h>
#include <txdb.h>
Expand Down Expand Up @@ -2690,29 +2691,60 @@ UniValue CreateUTXOSnapshot(
tip->nHeight, tip->GetBlockHash().ToString(),
fs::PathToString(path), fs::PathToString(temppath)));

SnapshotMetadata metadata{tip->GetBlockHash(), maybe_stats->coins_count};
SnapshotMetadata metadata{tip->GetBlockHash(), tip->nHeight, maybe_stats->coins_count};

afile << metadata;

COutPoint key;
Txid last_hash;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: would still prefer last_txid over last_hash as it's more expressive (OTOH, in COutPoint, it's also called hash)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I must have overlooked that if you already mentioned it before when I changed the type. I will address it if I have to retouch.

Coin coin;
unsigned int iter{0};
size_t written_coins_count{0};
std::vector<std::pair<uint32_t, Coin>> coins;

// To reduce space the serialization format of the snapshot avoids
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Might it be useful to add a small diagram of the format to the dumptxoutset help output?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, will think about what that could look like if I have to retouch or in the follow-up.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I spent some time thinking about a diagram that would work but I wasn't very happy what I could come up with. I instead copied the expression used in the issue. It shows the format in a concise way that I find understandable: list(Txid, list((vout,Coin)). Let me know what you think.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also add the size of the coins list list(Txid, number_of_coins, list((vout,Coin)))?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will add this if I have to retouch, but if not I can do this as a follow-up in one of my other assumeutxo PRs.

// duplication of tx hashes. The code takes advantage of the guarantee by
// leveldb that keys are lexicographically sorted.
// In the coins vector we collect all coins that belong to a certain tx hash
// (key.hash) and when we have them all (key.hash != last_hash) we write
// them to file using the below lambda function.
// See also https://github.com/bitcoin/bitcoin/issues/25675
auto write_coins_to_file = [&](AutoFile& afile, const Txid& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins, size_t& written_coins_count) {
afile << last_hash;
WriteCompactSize(afile, coins.size());
for (const auto& [n, coin] : coins) {
fjahr marked this conversation as resolved.
Show resolved Hide resolved
WriteCompactSize(afile, n);
afile << coin;
++written_coins_count;
}
};

pcursor->GetKey(key);
last_hash = key.hash;
while (pcursor->Valid()) {
if (iter % 5000 == 0) node.rpc_interruption_point();
++iter;
if (pcursor->GetKey(key) && pcursor->GetValue(coin)) {
afile << key;
afile << coin;
if (key.hash != last_hash) {
write_coins_to_file(afile, last_hash, coins, written_coins_count);
last_hash = key.hash;
coins.clear();
}
coins.emplace_back(key.n, coin);
fjahr marked this conversation as resolved.
Show resolved Hide resolved
}

pcursor->Next();
}

if (!coins.empty()) {
write_coins_to_file(afile, last_hash, coins, written_coins_count);
}

CHECK_NONFATAL(written_coins_count == maybe_stats->coins_count);

afile.fclose();

UniValue result(UniValue::VOBJ);
result.pushKV("coins_written", maybe_stats->coins_count);
result.pushKV("coins_written", written_coins_count);
result.pushKV("base_hash", tip->GetBlockHash().ToString());
result.pushKV("base_height", tip->nHeight);
result.pushKV("path", path.utf8string());
Expand Down Expand Up @@ -2772,12 +2804,22 @@ static RPCHelpMan loadtxoutset()
}

SnapshotMetadata metadata;
afile >> metadata;
try {
afile >> metadata;
} catch (const std::ios_base::failure& e) {
throw JSONRPCError(RPC_DESERIALIZATION_ERROR, strprintf("Unable to parse metadata: %s", e.what()));
}

uint256 base_blockhash = metadata.m_base_blockhash;
int base_blockheight = metadata.m_base_blockheight;
if (!chainman.GetParams().AssumeutxoForBlockhash(base_blockhash).has_value()) {
auto available_heights = chainman.GetParams().GetAvailableSnapshotHeights();
std::string heights_formatted = Join(available_heights, ", ", [&](const auto& i) { return ToString(i); });
throw JSONRPCError(RPC_INTERNAL_ERROR, strprintf("Unable to load UTXO snapshot, "
"assumeutxo block hash in snapshot metadata not recognized (%s)", base_blockhash.ToString()));
"assumeutxo block hash in snapshot metadata not recognized (hash: %s, height: %s). The following snapshot heights are available: %s.",
base_blockhash.ToString(),
base_blockheight,
heights_formatted));
}
CBlockIndex* snapshot_start_block = WITH_LOCK(::cs_main,
return chainman.m_blockman.LookupBlockIndex(base_blockhash));
Expand Down
9 changes: 6 additions & 3 deletions src/test/validation_chainstatemanager_tests.cpp
Expand Up @@ -226,10 +226,13 @@ struct SnapshotTestSetup : TestChain100Setup {
// A UTXO is missing but count is correct
metadata.m_coins_count -= 1;

COutPoint outpoint;
Txid txid;
auto_infile >> txid;
// coins size
(void)ReadCompactSize(auto_infile);
// vout index
(void)ReadCompactSize(auto_infile);
Coin coin;

auto_infile >> outpoint;
auto_infile >> coin;
}));

Expand Down
111 changes: 62 additions & 49 deletions src/validation.cpp
Expand Up @@ -5660,69 +5660,81 @@ bool ChainstateManager::PopulateAndValidateSnapshot(
return false;
}

COutPoint outpoint;
Coin coin;
const uint64_t coins_count = metadata.m_coins_count;
uint64_t coins_left = metadata.m_coins_count;

LogPrintf("[snapshot] loading coins from snapshot %s\n", base_blockhash.ToString());
LogPrintf("[snapshot] loading %d coins from snapshot %s\n", coins_left, base_blockhash.ToString());
int64_t coins_processed{0};

while (coins_left > 0) {
try {
coins_file >> outpoint;
coins_file >> coin;
} catch (const std::ios_base::failure&) {
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
coins_count - coins_left);
return false;
}
if (coin.nHeight > base_height ||
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
) {
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
coins_count - coins_left);
return false;
}
if (!MoneyRange(coin.out.nValue)) {
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n",
coins_count - coins_left);
return false;
}
Txid txid;
coins_file >> txid;
size_t coins_per_txid{0};
coins_per_txid = ReadCompactSize(coins_file);

if (coins_per_txid > coins_left) {
LogPrintf("[snapshot] mismatch in coins count in snapshot metadata and actual snapshot data\n");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this is new code, but uncovered. It would be nice to have a test for this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a test for this in #30167

return false;
}

coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));
for (size_t i = 0; i < coins_per_txid; i++) {
COutPoint outpoint;
Coin coin;
outpoint.n = static_cast<uint32_t>(ReadCompactSize(coins_file));
outpoint.hash = txid;
coins_file >> coin;
if (coin.nHeight > base_height ||
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
) {
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
coins_count - coins_left);
return false;
}
if (!MoneyRange(coin.out.nValue)) {
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n",
coins_count - coins_left);
return false;
}
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));

--coins_left;
++coins_processed;
--coins_left;
++coins_processed;

if (coins_processed % 1000000 == 0) {
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
coins_processed,
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
coins_cache.DynamicMemoryUsage() / (1000 * 1000));
}
if (coins_processed % 1000000 == 0) {
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
coins_processed,
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
coins_cache.DynamicMemoryUsage() / (1000 * 1000));
}

// Batch write and flush (if we need to) every so often.
//
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins
// means <5MB of memory imprecision.
if (coins_processed % 120000 == 0) {
if (m_interrupt) {
return false;
}
// Batch write and flush (if we need to) every so often.
//
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins
// means <5MB of memory imprecision.
if (coins_processed % 120000 == 0) {
if (m_interrupt) {
return false;
}

const auto snapshot_cache_state = WITH_LOCK(::cs_main,
return snapshot_chainstate.GetCoinsCacheSizeState());
const auto snapshot_cache_state = WITH_LOCK(::cs_main,
return snapshot_chainstate.GetCoinsCacheSizeState());

if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
// This is a hack - we don't know what the actual best block is, but that
// doesn't matter for the purposes of flushing the cache here. We'll set this
// to its correct value (`base_blockhash`) below after the coins are loaded.
coins_cache.SetBestBlock(GetRandHash());
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
// This is a hack - we don't know what the actual best block is, but that
// doesn't matter for the purposes of flushing the cache here. We'll set this
// to its correct value (`base_blockhash`) below after the coins are loaded.
coins_cache.SetBestBlock(GetRandHash());

// No need to acquire cs_main since this chainstate isn't being used yet.
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
// No need to acquire cs_main since this chainstate isn't being used yet.
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
}
}
}
} catch (const std::ios_base::failure&) {
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
coins_processed);
return false;
}
}

Expand All @@ -5735,7 +5747,8 @@ bool ChainstateManager::PopulateAndValidateSnapshot(

bool out_of_coins{false};
try {
coins_file >> outpoint;
Txid txid;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think this should check that the stream is completely empty, no? I suspect that a trailing byte will be left undetected, so it may be better to deserialize a std::byte here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done in #30167

coins_file >> txid;
} catch (const std::ios_base::failure&) {
// We expect an exception since we should be out of coins.
out_of_coins = true;
Expand Down