-
Notifications
You must be signed in to change notification settings - Fork 35.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
rpc: Optimize serialization and enhance metadata of dumptxoutset output #29612
Changes from all commits
de95953
c14ed7f
4d8e5ed
542e13b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,7 @@ | |
#include <rpc/server_util.h> | ||
#include <rpc/util.h> | ||
#include <script/descriptor.h> | ||
#include <serialize.h> | ||
#include <streams.h> | ||
#include <sync.h> | ||
#include <txdb.h> | ||
|
@@ -2690,29 +2691,60 @@ UniValue CreateUTXOSnapshot( | |
tip->nHeight, tip->GetBlockHash().ToString(), | ||
fs::PathToString(path), fs::PathToString(temppath))); | ||
|
||
SnapshotMetadata metadata{tip->GetBlockHash(), maybe_stats->coins_count}; | ||
SnapshotMetadata metadata{tip->GetBlockHash(), tip->nHeight, maybe_stats->coins_count}; | ||
|
||
afile << metadata; | ||
|
||
COutPoint key; | ||
Txid last_hash; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: would still prefer There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I must have overlooked that if you already mentioned it before when I changed the type. I will address it if I have to retouch. |
||
Coin coin; | ||
unsigned int iter{0}; | ||
size_t written_coins_count{0}; | ||
std::vector<std::pair<uint32_t, Coin>> coins; | ||
|
||
// To reduce space the serialization format of the snapshot avoids | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Might it be useful to add a small diagram of the format to the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, will think about what that could look like if I have to retouch or in the follow-up. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I spent some time thinking about a diagram that would work but I wasn't very happy what I could come up with. I instead copied the expression used in the issue. It shows the format in a concise way that I find understandable: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe also add the size of the coins list There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will add this if I have to retouch, but if not I can do this as a follow-up in one of my other assumeutxo PRs. |
||
// duplication of tx hashes. The code takes advantage of the guarantee by | ||
// leveldb that keys are lexicographically sorted. | ||
// In the coins vector we collect all coins that belong to a certain tx hash | ||
// (key.hash) and when we have them all (key.hash != last_hash) we write | ||
// them to file using the below lambda function. | ||
// See also https://github.com/bitcoin/bitcoin/issues/25675 | ||
auto write_coins_to_file = [&](AutoFile& afile, const Txid& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins, size_t& written_coins_count) { | ||
afile << last_hash; | ||
WriteCompactSize(afile, coins.size()); | ||
for (const auto& [n, coin] : coins) { | ||
fjahr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
WriteCompactSize(afile, n); | ||
afile << coin; | ||
++written_coins_count; | ||
} | ||
}; | ||
|
||
pcursor->GetKey(key); | ||
last_hash = key.hash; | ||
while (pcursor->Valid()) { | ||
if (iter % 5000 == 0) node.rpc_interruption_point(); | ||
++iter; | ||
if (pcursor->GetKey(key) && pcursor->GetValue(coin)) { | ||
afile << key; | ||
afile << coin; | ||
if (key.hash != last_hash) { | ||
write_coins_to_file(afile, last_hash, coins, written_coins_count); | ||
last_hash = key.hash; | ||
coins.clear(); | ||
} | ||
coins.emplace_back(key.n, coin); | ||
fjahr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
pcursor->Next(); | ||
} | ||
|
||
if (!coins.empty()) { | ||
write_coins_to_file(afile, last_hash, coins, written_coins_count); | ||
} | ||
|
||
CHECK_NONFATAL(written_coins_count == maybe_stats->coins_count); | ||
|
||
afile.fclose(); | ||
|
||
UniValue result(UniValue::VOBJ); | ||
result.pushKV("coins_written", maybe_stats->coins_count); | ||
result.pushKV("coins_written", written_coins_count); | ||
result.pushKV("base_hash", tip->GetBlockHash().ToString()); | ||
result.pushKV("base_height", tip->nHeight); | ||
result.pushKV("path", path.utf8string()); | ||
|
@@ -2772,12 +2804,22 @@ static RPCHelpMan loadtxoutset() | |
} | ||
|
||
SnapshotMetadata metadata; | ||
afile >> metadata; | ||
try { | ||
afile >> metadata; | ||
} catch (const std::ios_base::failure& e) { | ||
throw JSONRPCError(RPC_DESERIALIZATION_ERROR, strprintf("Unable to parse metadata: %s", e.what())); | ||
} | ||
|
||
uint256 base_blockhash = metadata.m_base_blockhash; | ||
int base_blockheight = metadata.m_base_blockheight; | ||
if (!chainman.GetParams().AssumeutxoForBlockhash(base_blockhash).has_value()) { | ||
auto available_heights = chainman.GetParams().GetAvailableSnapshotHeights(); | ||
std::string heights_formatted = Join(available_heights, ", ", [&](const auto& i) { return ToString(i); }); | ||
throw JSONRPCError(RPC_INTERNAL_ERROR, strprintf("Unable to load UTXO snapshot, " | ||
"assumeutxo block hash in snapshot metadata not recognized (%s)", base_blockhash.ToString())); | ||
"assumeutxo block hash in snapshot metadata not recognized (hash: %s, height: %s). The following snapshot heights are available: %s.", | ||
base_blockhash.ToString(), | ||
base_blockheight, | ||
heights_formatted)); | ||
} | ||
CBlockIndex* snapshot_start_block = WITH_LOCK(::cs_main, | ||
return chainman.m_blockman.LookupBlockIndex(base_blockhash)); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5660,69 +5660,81 @@ bool ChainstateManager::PopulateAndValidateSnapshot( | |
return false; | ||
} | ||
|
||
COutPoint outpoint; | ||
Coin coin; | ||
const uint64_t coins_count = metadata.m_coins_count; | ||
uint64_t coins_left = metadata.m_coins_count; | ||
|
||
LogPrintf("[snapshot] loading coins from snapshot %s\n", base_blockhash.ToString()); | ||
LogPrintf("[snapshot] loading %d coins from snapshot %s\n", coins_left, base_blockhash.ToString()); | ||
int64_t coins_processed{0}; | ||
|
||
while (coins_left > 0) { | ||
try { | ||
coins_file >> outpoint; | ||
coins_file >> coin; | ||
} catch (const std::ios_base::failure&) { | ||
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n", | ||
coins_count - coins_left); | ||
return false; | ||
} | ||
if (coin.nHeight > base_height || | ||
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash | ||
) { | ||
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n", | ||
coins_count - coins_left); | ||
return false; | ||
} | ||
if (!MoneyRange(coin.out.nValue)) { | ||
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n", | ||
coins_count - coins_left); | ||
return false; | ||
} | ||
Txid txid; | ||
coins_file >> txid; | ||
size_t coins_per_txid{0}; | ||
coins_per_txid = ReadCompactSize(coins_file); | ||
|
||
if (coins_per_txid > coins_left) { | ||
LogPrintf("[snapshot] mismatch in coins count in snapshot metadata and actual snapshot data\n"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like this is new code, but uncovered. It would be nice to have a test for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a test for this in #30167 |
||
return false; | ||
} | ||
|
||
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin)); | ||
for (size_t i = 0; i < coins_per_txid; i++) { | ||
COutPoint outpoint; | ||
Coin coin; | ||
outpoint.n = static_cast<uint32_t>(ReadCompactSize(coins_file)); | ||
outpoint.hash = txid; | ||
coins_file >> coin; | ||
if (coin.nHeight > base_height || | ||
outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash | ||
) { | ||
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n", | ||
coins_count - coins_left); | ||
return false; | ||
} | ||
if (!MoneyRange(coin.out.nValue)) { | ||
LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n", | ||
coins_count - coins_left); | ||
return false; | ||
} | ||
coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin)); | ||
|
||
--coins_left; | ||
++coins_processed; | ||
--coins_left; | ||
++coins_processed; | ||
|
||
if (coins_processed % 1000000 == 0) { | ||
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n", | ||
coins_processed, | ||
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count), | ||
coins_cache.DynamicMemoryUsage() / (1000 * 1000)); | ||
} | ||
if (coins_processed % 1000000 == 0) { | ||
LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n", | ||
coins_processed, | ||
static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count), | ||
coins_cache.DynamicMemoryUsage() / (1000 * 1000)); | ||
} | ||
|
||
// Batch write and flush (if we need to) every so often. | ||
// | ||
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins | ||
// means <5MB of memory imprecision. | ||
if (coins_processed % 120000 == 0) { | ||
if (m_interrupt) { | ||
return false; | ||
} | ||
// Batch write and flush (if we need to) every so often. | ||
// | ||
// If our average Coin size is roughly 41 bytes, checking every 120,000 coins | ||
// means <5MB of memory imprecision. | ||
if (coins_processed % 120000 == 0) { | ||
if (m_interrupt) { | ||
return false; | ||
} | ||
|
||
const auto snapshot_cache_state = WITH_LOCK(::cs_main, | ||
return snapshot_chainstate.GetCoinsCacheSizeState()); | ||
const auto snapshot_cache_state = WITH_LOCK(::cs_main, | ||
return snapshot_chainstate.GetCoinsCacheSizeState()); | ||
|
||
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) { | ||
// This is a hack - we don't know what the actual best block is, but that | ||
// doesn't matter for the purposes of flushing the cache here. We'll set this | ||
// to its correct value (`base_blockhash`) below after the coins are loaded. | ||
coins_cache.SetBestBlock(GetRandHash()); | ||
if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) { | ||
// This is a hack - we don't know what the actual best block is, but that | ||
// doesn't matter for the purposes of flushing the cache here. We'll set this | ||
// to its correct value (`base_blockhash`) below after the coins are loaded. | ||
coins_cache.SetBestBlock(GetRandHash()); | ||
|
||
// No need to acquire cs_main since this chainstate isn't being used yet. | ||
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false); | ||
// No need to acquire cs_main since this chainstate isn't being used yet. | ||
FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false); | ||
} | ||
} | ||
} | ||
} catch (const std::ios_base::failure&) { | ||
LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n", | ||
coins_processed); | ||
return false; | ||
} | ||
} | ||
|
||
|
@@ -5735,7 +5747,8 @@ bool ChainstateManager::PopulateAndValidateSnapshot( | |
|
||
bool out_of_coins{false}; | ||
try { | ||
coins_file >> outpoint; | ||
Txid txid; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I think this should check that the stream is completely empty, no? I suspect that a trailing byte will be left undetected, so it may be better to deserialize a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done in #30167 |
||
coins_file >> txid; | ||
} catch (const std::ios_base::failure&) { | ||
// We expect an exception since we should be out of coins. | ||
out_of_coins = true; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: It would be nice, if new code didn't use globals implicitly. What about storing a reference to the prams (or the magic) in a member field of this struct and using it here? An alternative would be to use params-serialization and embed the magic bytes during serialization.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I addressed this in #30167 using the member field. Using params-serialization wouldn't have made much of a difference given how we use this code right now, I think.