bitcoin · achow101 · May 23, 2024 · Mar 10, 2024 · Mar 10, 2024 · Mar 10, 2024
diff --git a/src/kernel/chainparams.cpp b/src/kernel/chainparams.cpp
@@ -542,3 +542,33 @@ std::unique_ptr<const CChainParams> CChainParams::TestNet()
 {
     return std::make_unique<const CTestNetParams>();
 }
+
+std::vector<int> CChainParams::GetAvailableSnapshotHeights() const
+{
+    std::vector<int> heights;
+    heights.reserve(m_assumeutxo_data.size());
+
+    for (const auto& data : m_assumeutxo_data) {
+        heights.emplace_back(data.height);
+    }
+    return heights;
+}
+
+std::optional<ChainType> GetNetworkForMagic(MessageStartChars& message)
+{
+    const auto mainnet_msg = CChainParams::Main()->MessageStart();
+    const auto testnet_msg = CChainParams::TestNet()->MessageStart();
+    const auto regtest_msg = CChainParams::RegTest({})->MessageStart();
+    const auto signet_msg = CChainParams::SigNet({})->MessageStart();
+
+    if (std::equal(message.begin(), message.end(), mainnet_msg.data())) {
+        return ChainType::MAIN;
+    } else if (std::equal(message.begin(), message.end(), testnet_msg.data())) {
+        return ChainType::TESTNET;
+    } else if (std::equal(message.begin(), message.end(), regtest_msg.data())) {
+        return ChainType::REGTEST;
+    } else if (std::equal(message.begin(), message.end(), signet_msg.data())) {
+        return ChainType::SIGNET;
+    }
+    return std::nullopt;
+}
diff --git a/src/kernel/chainparams.h b/src/kernel/chainparams.h
@@ -93,6 +93,7 @@ class CChainParams
     const Consensus::Params& GetConsensus() const { return consensus; }
     const MessageStartChars& MessageStart() const { return pchMessageStart; }
     uint16_t GetDefaultPort() const { return nDefaultPort; }
+    std::vector<int> GetAvailableSnapshotHeights() const;
 
     const CBlock& GenesisBlock() const { return genesis; }
     /** Default value for -checkmempool and -checkblockindex argument */
@@ -183,4 +184,6 @@ class CChainParams
     ChainTxData chainTxData;
 };
 
+std::optional<ChainType> GetNetworkForMagic(MessageStartChars& pchMessageStart);
+
 #endif // BITCOIN_KERNEL_CHAINPARAMS_H
diff --git a/src/node/utxo_snapshot.h b/src/node/utxo_snapshot.h
@@ -6,27 +6,37 @@
 #ifndef BITCOIN_NODE_UTXO_SNAPSHOT_H
 #define BITCOIN_NODE_UTXO_SNAPSHOT_H
 
+#include <chainparams.h>
+#include <kernel/chainparams.h>
 #include <kernel/cs_main.h>
 #include <serialize.h>
 #include <sync.h>
 #include <uint256.h>
+#include <util/chaintype.h>
 #include <util/fs.h>
 
 #include <cstdint>
 #include <optional>
 #include <string_view>
 
+// UTXO set snapshot magic bytes
+static constexpr std::array<uint8_t, 5> SNAPSHOT_MAGIC_BYTES = {'u', 't', 'x', 'o', 0xff};
+
 class Chainstate;
 
 namespace node {
 //! Metadata describing a serialized version of a UTXO set from which an
 //! assumeutxo Chainstate can be constructed.
 class SnapshotMetadata
 {
+    const uint16_t m_version{1};
+    const std::set<uint16_t> m_supported_versions{1};
 public:
     //! The hash of the block that reflects the tip of the chain for the
     //! UTXO set contained in this snapshot.
     uint256 m_base_blockhash;
+    uint32_t m_base_blockheight;
+
 
     //! The number of coins in the UTXO set contained in this snapshot. Used
     //! during snapshot load to estimate progress of UTXO set reconstruction.
@@ -35,11 +45,55 @@ class SnapshotMetadata
     SnapshotMetadata() { }
     SnapshotMetadata(
         const uint256& base_blockhash,
+        const int base_blockheight,
         uint64_t coins_count) :
             m_base_blockhash(base_blockhash),
+            m_base_blockheight(base_blockheight),
             m_coins_count(coins_count) { }
 
-    SERIALIZE_METHODS(SnapshotMetadata, obj) { READWRITE(obj.m_base_blockhash, obj.m_coins_count); }
+    template <typename Stream>
+    inline void Serialize(Stream& s) const {
+        s << SNAPSHOT_MAGIC_BYTES;
+        s << m_version;
+        s << Params().MessageStart();
+        s << m_base_blockheight;
+        s << m_base_blockhash;
+        s << m_coins_count;
+    }
+
+    template <typename Stream>
+    inline void Unserialize(Stream& s) {
+        // Read the snapshot magic bytes
+        std::array<uint8_t, SNAPSHOT_MAGIC_BYTES.size()> snapshot_magic;
+        s >> snapshot_magic;
+        if (snapshot_magic != SNAPSHOT_MAGIC_BYTES) {
+            throw std::ios_base::failure("Invalid UTXO set snapshot magic bytes. Please check if this is indeed a snapshot file or if you are using an outdated snapshot format.");
+        }
+
+        // Read the version
+        uint16_t version;
+        s >> version;
+        if (m_supported_versions.find(version) == m_supported_versions.end()) {
+            throw std::ios_base::failure(strprintf("Version of snapshot %s does not match any of the supported versions.", version));
+        }
+
+        // Read the network magic (pchMessageStart)
+        MessageStartChars message;
+        s >> message;
+        if (!std::equal(message.begin(), message.end(), Params().MessageStart().data())) {
+            auto metadata_network = GetNetworkForMagic(message);
+            if (metadata_network) {
+                std::string network_string{ChainTypeToString(metadata_network.value())};
+                throw std::ios_base::failure(strprintf("The network of the snapshot (%s) does not match the network of this node (%s).", network_string, Params().GetChainTypeString()));
+            } else {
+                throw std::ios_base::failure("This snapshot has been created for an unrecognized network. This could be a custom signet, a new testnet or possibly caused by data corruption.");
+            }
+        }
+
+        s >> m_base_blockheight;
+        s >> m_base_blockhash;
+        s >> m_coins_count;
+    }
 };
 
 //! The file in the snapshot chainstate dir which stores the base blockhash. This is

diff --git a/src/rpc/blockchain.cpp b/src/rpc/blockchain.cpp
@@ -34,6 +34,7 @@
 #include <rpc/server_util.h>
 #include <rpc/util.h>
 #include <script/descriptor.h>
+#include <serialize.h>
 #include <streams.h>
 #include <sync.h>
 #include <txdb.h>
@@ -2690,29 +2691,60 @@ UniValue CreateUTXOSnapshot(
         tip->nHeight, tip->GetBlockHash().ToString(),
         fs::PathToString(path), fs::PathToString(temppath)));
 
-    SnapshotMetadata metadata{tip->GetBlockHash(), maybe_stats->coins_count};
+    SnapshotMetadata metadata{tip->GetBlockHash(), tip->nHeight, maybe_stats->coins_count};
 
     afile << metadata;
 
     COutPoint key;
+    Txid last_hash;
     Coin coin;
     unsigned int iter{0};
+    size_t written_coins_count{0};
+    std::vector<std::pair<uint32_t, Coin>> coins;
+
+    // To reduce space the serialization format of the snapshot avoids
+    // duplication of tx hashes. The code takes advantage of the guarantee by
+    // leveldb that keys are lexicographically sorted.
+    // In the coins vector we collect all coins that belong to a certain tx hash
+    // (key.hash) and when we have them all (key.hash != last_hash) we write
+    // them to file using the below lambda function.
+    // See also https://github.com/bitcoin/bitcoin/issues/25675
+    auto write_coins_to_file = [&](AutoFile& afile, const Txid& last_hash, const std::vector<std::pair<uint32_t, Coin>>& coins, size_t& written_coins_count) {
+        afile << last_hash;
+        WriteCompactSize(afile, coins.size());
+        for (const auto& [n, coin] : coins) {
+            WriteCompactSize(afile, n);
+            afile << coin;
+            ++written_coins_count;
+        }
+    };
 
+    pcursor->GetKey(key);
+    last_hash = key.hash;
     while (pcursor->Valid()) {
         if (iter % 5000 == 0) node.rpc_interruption_point();
         ++iter;
         if (pcursor->GetKey(key) && pcursor->GetValue(coin)) {
-            afile << key;
-            afile << coin;
+            if (key.hash != last_hash) {
+                write_coins_to_file(afile, last_hash, coins, written_coins_count);
+                last_hash = key.hash;
+                coins.clear();
+            }
+            coins.emplace_back(key.n, coin);
         }
-
         pcursor->Next();
     }
 
+    if (!coins.empty()) {
+        write_coins_to_file(afile, last_hash, coins, written_coins_count);
+    }
+
+    CHECK_NONFATAL(written_coins_count == maybe_stats->coins_count);
+
     afile.fclose();
 
     UniValue result(UniValue::VOBJ);
-    result.pushKV("coins_written", maybe_stats->coins_count);
+    result.pushKV("coins_written", written_coins_count);
     result.pushKV("base_hash", tip->GetBlockHash().ToString());
     result.pushKV("base_height", tip->nHeight);
     result.pushKV("path", path.utf8string());
@@ -2772,12 +2804,22 @@ static RPCHelpMan loadtxoutset()
     }
 
     SnapshotMetadata metadata;
-    afile >> metadata;
+    try {
+        afile >> metadata;
+    } catch (const std::ios_base::failure& e) {
+        throw JSONRPCError(RPC_DESERIALIZATION_ERROR, strprintf("Unable to parse metadata: %s", e.what()));
+    }
 
     uint256 base_blockhash = metadata.m_base_blockhash;
+    int base_blockheight = metadata.m_base_blockheight;
     if (!chainman.GetParams().AssumeutxoForBlockhash(base_blockhash).has_value()) {
+        auto available_heights = chainman.GetParams().GetAvailableSnapshotHeights();
+        std::string heights_formatted = Join(available_heights, ", ", [&](const auto& i) { return ToString(i); });
         throw JSONRPCError(RPC_INTERNAL_ERROR, strprintf("Unable to load UTXO snapshot, "
-            "assumeutxo block hash in snapshot metadata not recognized (%s)", base_blockhash.ToString()));
+            "assumeutxo block hash in snapshot metadata not recognized (hash: %s, height: %s). The following snapshot heights are available: %s.",
+            base_blockhash.ToString(),
+            base_blockheight,
+            heights_formatted));
     }
     CBlockIndex* snapshot_start_block = WITH_LOCK(::cs_main,
             return chainman.m_blockman.LookupBlockIndex(base_blockhash));

diff --git a/src/test/validation_chainstatemanager_tests.cpp b/src/test/validation_chainstatemanager_tests.cpp
@@ -226,10 +226,13 @@ struct SnapshotTestSetup : TestChain100Setup {
                 // A UTXO is missing but count is correct
                 metadata.m_coins_count -= 1;
 
-                COutPoint outpoint;
+                Txid txid;
+                auto_infile >> txid;
+                // coins size
+                (void)ReadCompactSize(auto_infile);
+                // vout index
+                (void)ReadCompactSize(auto_infile);
                 Coin coin;
-
-                auto_infile >> outpoint;
                 auto_infile >> coin;
         }));
 

diff --git a/src/validation.cpp b/src/validation.cpp
@@ -5660,69 +5660,81 @@ bool ChainstateManager::PopulateAndValidateSnapshot(
         return false;
     }
 
-    COutPoint outpoint;
-    Coin coin;
     const uint64_t coins_count = metadata.m_coins_count;
     uint64_t coins_left = metadata.m_coins_count;
 
-    LogPrintf("[snapshot] loading coins from snapshot %s\n", base_blockhash.ToString());
+    LogPrintf("[snapshot] loading %d coins from snapshot %s\n", coins_left, base_blockhash.ToString());
     int64_t coins_processed{0};
 
     while (coins_left > 0) {
         try {
-            coins_file >> outpoint;
-            coins_file >> coin;
-        } catch (const std::ios_base::failure&) {
-            LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
-                      coins_count - coins_left);
-            return false;
-        }
-        if (coin.nHeight > base_height ||
-            outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
-        ) {
-            LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
-                      coins_count - coins_left);
-            return false;
-        }
-        if (!MoneyRange(coin.out.nValue)) {
-            LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n",
-                      coins_count - coins_left);
-            return false;
-        }
+            Txid txid;
+            coins_file >> txid;
+            size_t coins_per_txid{0};
+            coins_per_txid = ReadCompactSize(coins_file);
+
+            if (coins_per_txid > coins_left) {
+                LogPrintf("[snapshot] mismatch in coins count in snapshot metadata and actual snapshot data\n");
+                return false;
+            }
 
-        coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));
+            for (size_t i = 0; i < coins_per_txid; i++) {
+                COutPoint outpoint;
+                Coin coin;
+                outpoint.n = static_cast<uint32_t>(ReadCompactSize(coins_file));
+                outpoint.hash = txid;
+                coins_file >> coin;
+                if (coin.nHeight > base_height ||
+                    outpoint.n >= std::numeric_limits<decltype(outpoint.n)>::max() // Avoid integer wrap-around in coinstats.cpp:ApplyHash
+                ) {
+                    LogPrintf("[snapshot] bad snapshot data after deserializing %d coins\n",
+                              coins_count - coins_left);
+                    return false;
+                }
+                if (!MoneyRange(coin.out.nValue)) {
+                    LogPrintf("[snapshot] bad snapshot data after deserializing %d coins - bad tx out value\n",
+                              coins_count - coins_left);
+                    return false;
+                }
+                coins_cache.EmplaceCoinInternalDANGER(std::move(outpoint), std::move(coin));
 
-        --coins_left;
-        ++coins_processed;
+                --coins_left;
+                ++coins_processed;
 
-        if (coins_processed % 1000000 == 0) {
-            LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
-                coins_processed,
-                static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
-                coins_cache.DynamicMemoryUsage() / (1000 * 1000));
-        }
+                if (coins_processed % 1000000 == 0) {
+                    LogPrintf("[snapshot] %d coins loaded (%.2f%%, %.2f MB)\n",
+                        coins_processed,
+                        static_cast<float>(coins_processed) * 100 / static_cast<float>(coins_count),
+                        coins_cache.DynamicMemoryUsage() / (1000 * 1000));
+                }
 
-        // Batch write and flush (if we need to) every so often.
-        //
-        // If our average Coin size is roughly 41 bytes, checking every 120,000 coins
-        // means <5MB of memory imprecision.
-        if (coins_processed % 120000 == 0) {
-            if (m_interrupt) {
-                return false;
-            }
+                // Batch write and flush (if we need to) every so often.
+                //
+                // If our average Coin size is roughly 41 bytes, checking every 120,000 coins
+                // means <5MB of memory imprecision.
+                if (coins_processed % 120000 == 0) {
+                    if (m_interrupt) {
+                        return false;
+                    }
 
-            const auto snapshot_cache_state = WITH_LOCK(::cs_main,
-                return snapshot_chainstate.GetCoinsCacheSizeState());
+                    const auto snapshot_cache_state = WITH_LOCK(::cs_main,
+                        return snapshot_chainstate.GetCoinsCacheSizeState());
 
-            if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
-                // This is a hack - we don't know what the actual best block is, but that
-                // doesn't matter for the purposes of flushing the cache here. We'll set this
-                // to its correct value (`base_blockhash`) below after the coins are loaded.
-                coins_cache.SetBestBlock(GetRandHash());
+                    if (snapshot_cache_state >= CoinsCacheSizeState::CRITICAL) {
+                        // This is a hack - we don't know what the actual best block is, but that
+                        // doesn't matter for the purposes of flushing the cache here. We'll set this
+                        // to its correct value (`base_blockhash`) below after the coins are loaded.
+                        coins_cache.SetBestBlock(GetRandHash());
 
-                // No need to acquire cs_main since this chainstate isn't being used yet.
-                FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
+                        // No need to acquire cs_main since this chainstate isn't being used yet.
+                        FlushSnapshotToDisk(coins_cache, /*snapshot_loaded=*/false);
+                    }
+                }
             }
+        } catch (const std::ios_base::failure&) {
+            LogPrintf("[snapshot] bad snapshot format or truncated snapshot after deserializing %d coins\n",
+                      coins_processed);
+            return false;
         }
     }
 
@@ -5735,7 +5747,8 @@ bool ChainstateManager::PopulateAndValidateSnapshot(
 
     bool out_of_coins{false};
     try {
-        coins_file >> outpoint;
+        Txid txid;
+        coins_file >> txid;
     } catch (const std::ios_base::failure&) {
         // We expect an exception since we should be out of coins.
         out_of_coins = true;