Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

blockstorage: Separate reindexing from saving new blocks #29975

Merged
merged 6 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/bench/readblock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ static FlatFilePos WriteBlockToDisk(ChainstateManager& chainman)
CBlock block;
stream >> TX_WITH_WITNESS(block);

return chainman.m_blockman.SaveBlockToDisk(block, 0, nullptr);
return chainman.m_blockman.SaveBlockToDisk(block, 0);
}

static void ReadBlockFromDiskTest(benchmark::Bench& bench)
Expand Down
167 changes: 86 additions & 81 deletions src/node/blockstorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,7 @@ fs::path BlockManager::GetBlockPosFilename(const FlatFilePos& pos) const
return BlockFileSeq().FileName(pos);
}

bool BlockManager::FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown)
FlatFilePos BlockManager::FindNextBlockPos(unsigned int nAddSize, unsigned int nHeight, uint64_t nTime)
mzumsande marked this conversation as resolved.
Show resolved Hide resolved
{
LOCK(cs_LastBlockFile);

Expand All @@ -863,88 +863,101 @@ bool BlockManager::FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigne
}
const int last_blockfile = m_blockfile_cursors[chain_type]->file_num;

int nFile = fKnown ? pos.nFile : last_blockfile;
int nFile = last_blockfile;
ryanofsky marked this conversation as resolved.
Show resolved Hide resolved
if (static_cast<int>(m_blockfile_info.size()) <= nFile) {
m_blockfile_info.resize(nFile + 1);
}

bool finalize_undo = false;
if (!fKnown) {
unsigned int max_blockfile_size{MAX_BLOCKFILE_SIZE};
// Use smaller blockfiles in test-only -fastprune mode - but avoid
// the possibility of having a block not fit into the block file.
if (m_opts.fast_prune) {
max_blockfile_size = 0x10000; // 64kiB
if (nAddSize >= max_blockfile_size) {
// dynamically adjust the blockfile size to be larger than the added size
max_blockfile_size = nAddSize + 1;
}
unsigned int max_blockfile_size{MAX_BLOCKFILE_SIZE};
// Use smaller blockfiles in test-only -fastprune mode - but avoid
// the possibility of having a block not fit into the block file.
if (m_opts.fast_prune) {
max_blockfile_size = 0x10000; // 64kiB
if (nAddSize >= max_blockfile_size) {
// dynamically adjust the blockfile size to be larger than the added size
max_blockfile_size = nAddSize + 1;
}
assert(nAddSize < max_blockfile_size);

while (m_blockfile_info[nFile].nSize + nAddSize >= max_blockfile_size) {
// when the undo file is keeping up with the block file, we want to flush it explicitly
// when it is lagging behind (more blocks arrive than are being connected), we let the
// undo block write case handle it
finalize_undo = (static_cast<int>(m_blockfile_info[nFile].nHeightLast) ==
Assert(m_blockfile_cursors[chain_type])->undo_height);

// Try the next unclaimed blockfile number
nFile = this->MaxBlockfileNum() + 1;
// Set to increment MaxBlockfileNum() for next iteration
m_blockfile_cursors[chain_type] = BlockfileCursor{nFile};

if (static_cast<int>(m_blockfile_info.size()) <= nFile) {
m_blockfile_info.resize(nFile + 1);
}
}
assert(nAddSize < max_blockfile_size);

while (m_blockfile_info[nFile].nSize + nAddSize >= max_blockfile_size) {
// when the undo file is keeping up with the block file, we want to flush it explicitly
// when it is lagging behind (more blocks arrive than are being connected), we let the
// undo block write case handle it
finalize_undo = (static_cast<int>(m_blockfile_info[nFile].nHeightLast) ==
Assert(m_blockfile_cursors[chain_type])->undo_height);

// Try the next unclaimed blockfile number
nFile = this->MaxBlockfileNum() + 1;
// Set to increment MaxBlockfileNum() for next iteration
m_blockfile_cursors[chain_type] = BlockfileCursor{nFile};

if (static_cast<int>(m_blockfile_info.size()) <= nFile) {
m_blockfile_info.resize(nFile + 1);
}
pos.nFile = nFile;
pos.nPos = m_blockfile_info[nFile].nSize;
}
FlatFilePos pos;
pos.nFile = nFile;
pos.nPos = m_blockfile_info[nFile].nSize;

if (nFile != last_blockfile) {
if (!fKnown) {
LogPrint(BCLog::BLOCKSTORAGE, "Leaving block file %i: %s (onto %i) (height %i)\n",
last_blockfile, m_blockfile_info[last_blockfile].ToString(), nFile, nHeight);

// Do not propagate the return code. The flush concerns a previous block
// and undo file that has already been written to. If a flush fails
// here, and we crash, there is no expected additional block data
// inconsistency arising from the flush failure here. However, the undo
// data may be inconsistent after a crash if the flush is called during
// a reindex. A flush error might also leave some of the data files
// untrimmed.
if (!FlushBlockFile(last_blockfile, !fKnown, finalize_undo)) {
LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning,
"Failed to flush previous block file %05i (finalize=%i, finalize_undo=%i) before opening new block file %05i\n",
last_blockfile, !fKnown, finalize_undo, nFile);
}
LogPrint(BCLog::BLOCKSTORAGE, "Leaving block file %i: %s (onto %i) (height %i)\n",
last_blockfile, m_blockfile_info[last_blockfile].ToString(), nFile, nHeight);

// Do not propagate the return code. The flush concerns a previous block
// and undo file that has already been written to. If a flush fails
// here, and we crash, there is no expected additional block data
// inconsistency arising from the flush failure here. However, the undo
// data may be inconsistent after a crash if the flush is called during
// a reindex. A flush error might also leave some of the data files
// untrimmed.
if (!FlushBlockFile(last_blockfile, /*fFinalize=*/true, finalize_undo)) {
LogPrintLevel(BCLog::BLOCKSTORAGE, BCLog::Level::Warning,
"Failed to flush previous block file %05i (finalize=1, finalize_undo=%i) before opening new block file %05i\n",
last_blockfile, finalize_undo, nFile);
}
// No undo data yet in the new file, so reset our undo-height tracking.
m_blockfile_cursors[chain_type] = BlockfileCursor{nFile};
}

m_blockfile_info[nFile].AddBlock(nHeight, nTime);
if (fKnown) {
m_blockfile_info[nFile].nSize = std::max(pos.nPos + nAddSize, m_blockfile_info[nFile].nSize);
} else {
m_blockfile_info[nFile].nSize += nAddSize;
m_blockfile_info[nFile].nSize += nAddSize;

bool out_of_space;
size_t bytes_allocated = BlockFileSeq().Allocate(pos, nAddSize, out_of_space);
if (out_of_space) {
m_opts.notifications.fatalError(_("Disk space is too low!"));
return {};
}
if (bytes_allocated != 0 && IsPruneMode()) {
m_check_for_pruning = true;
}

if (!fKnown) {
bool out_of_space;
size_t bytes_allocated = BlockFileSeq().Allocate(pos, nAddSize, out_of_space);
if (out_of_space) {
m_opts.notifications.fatalError(_("Disk space is too low!"));
return false;
}
if (bytes_allocated != 0 && IsPruneMode()) {
m_check_for_pruning = true;
}
m_dirty_fileinfo.insert(nFile);
return pos;
}

void BlockManager::UpdateBlockInfo(const CBlock& block, unsigned int nHeight, const FlatFilePos& pos)
{
LOCK(cs_LastBlockFile);

// Update the cursor so it points to the last file.
mzumsande marked this conversation as resolved.
Show resolved Hide resolved
const BlockfileType chain_type{BlockfileTypeForHeight(nHeight)};
auto& cursor{m_blockfile_cursors[chain_type]};
if (!cursor || cursor->file_num < pos.nFile) {
m_blockfile_cursors[chain_type] = BlockfileCursor{pos.nFile};
}

// Update the file information with the current block.
const unsigned int added_size = ::GetSerializeSize(TX_WITH_WITNESS(block));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In commit "blockstorage: split up FindBlockPos function" (064859b)

note: At this point in the PR, GetSerializeSize is already called by the SaveBlockToDisk function calling it, so calling it again here is a bit inefficient. But this is resolved by the next commit moving the the UpdateBlockInfo call

const int nFile = pos.nFile;
if (static_cast<int>(m_blockfile_info.size()) <= nFile) {
m_blockfile_info.resize(nFile + 1);
}
m_blockfile_info[nFile].AddBlock(nHeight, block.GetBlockTime());
m_blockfile_info[nFile].nSize = std::max(pos.nPos + added_size, m_blockfile_info[nFile].nSize);
m_dirty_fileinfo.insert(nFile);
return true;
}

bool BlockManager::FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize)
Expand Down Expand Up @@ -1014,7 +1027,7 @@ bool BlockManager::WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValid
// we want to flush the rev (undo) file once we've written the last block, which is indicated by the last height
// in the block file info as below; note that this does not catch the case where the undo writes are keeping up
// with the block writes (usually when a synced up node is getting newly mined blocks) -- this case is caught in
// the FindBlockPos function
// the FindNextBlockPos function
if (_pos.nFile < cursor.file_num && static_cast<uint32_t>(block.nHeight) == m_blockfile_info[_pos.nFile].nHeightLast) {
// Do not propagate the return code, a failed flush here should not
// be an indication for a failed write. If it were propagated here,
Expand Down Expand Up @@ -1130,28 +1143,20 @@ bool BlockManager::ReadRawBlockFromDisk(std::vector<uint8_t>& block, const FlatF
return true;
}

FlatFilePos BlockManager::SaveBlockToDisk(const CBlock& block, int nHeight, const FlatFilePos* dbp)
FlatFilePos BlockManager::SaveBlockToDisk(const CBlock& block, int nHeight)
{
unsigned int nBlockSize = ::GetSerializeSize(TX_WITH_WITNESS(block));
FlatFilePos blockPos;
const auto position_known {dbp != nullptr};
if (position_known) {
blockPos = *dbp;
} else {
// when known, blockPos.nPos points at the offset of the block data in the blk file. that already accounts for
// the serialization header present in the file (the 4 magic message start bytes + the 4 length bytes = 8 bytes = BLOCK_SERIALIZATION_HEADER_SIZE).
// we add BLOCK_SERIALIZATION_HEADER_SIZE only for new blocks since they will have the serialization header added when written to disk.
nBlockSize += static_cast<unsigned int>(BLOCK_SERIALIZATION_HEADER_SIZE);
}
if (!FindBlockPos(blockPos, nBlockSize, nHeight, block.GetBlockTime(), position_known)) {
LogError("%s: FindBlockPos failed\n", __func__);
// Account for the 4 magic message start bytes + the 4 length bytes (8 bytes total,
// defined as BLOCK_SERIALIZATION_HEADER_SIZE)
nBlockSize += static_cast<unsigned int>(BLOCK_SERIALIZATION_HEADER_SIZE);
FlatFilePos blockPos{FindNextBlockPos(nBlockSize, nHeight, block.GetBlockTime())};
if (blockPos.IsNull()) {
LogError("%s: FindNextBlockPos failed\n", __func__);
return FlatFilePos();
}
if (!position_known) {
if (!WriteBlockToDisk(block, blockPos)) {
m_opts.notifications.fatalError(_("Failed to write block."));
return FlatFilePos();
}
if (!WriteBlockToDisk(block, blockPos)) {
m_opts.notifications.fatalError(_("Failed to write block."));
return FlatFilePos();
}
return blockPos;
}
Expand Down
39 changes: 35 additions & 4 deletions src/node/blockstorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,16 @@ class BlockManager
/** Return false if undo file flushing fails. */
[[nodiscard]] bool FlushUndoFile(int block_file, bool finalize = false);

[[nodiscard]] bool FindBlockPos(FlatFilePos& pos, unsigned int nAddSize, unsigned int nHeight, uint64_t nTime, bool fKnown);
/**
* Helper function performing various preparations before a block can be saved to disk:
* Returns the correct position for the block to be saved, which may be in the current or a new
* block file depending on nAddSize. May flush the previous blockfile to disk if full, updates
* blockfile info, and checks if there is enough disk space to save the block.
*
mzumsande marked this conversation as resolved.
Show resolved Hide resolved
* The nAddSize argument passed to this function should include not just the size of the serialized CBlock, but also the size of
* separator fields which are written before it by WriteBlockToDisk (BLOCK_SERIALIZATION_HEADER_SIZE).
*/
[[nodiscard]] FlatFilePos FindNextBlockPos(unsigned int nAddSize, unsigned int nHeight, uint64_t nTime);
ryanofsky marked this conversation as resolved.
Show resolved Hide resolved
[[nodiscard]] bool FlushChainstateBlockFile(int tip_height);
bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize);

Expand All @@ -164,6 +173,12 @@ class BlockManager

AutoFile OpenUndoFile(const FlatFilePos& pos, bool fReadOnly = false) const;

/**
* Write a block to disk. The pos argument passed to this function is modified by this call. Before this call, it should
* point to an unused file location where separator fields will be written, followed by the serialized CBlock data.
* After this call, it will point to the beginning of the serialized CBlock data, after the separator fields
* (BLOCK_SERIALIZATION_HEADER_SIZE)
*/
bool WriteBlockToDisk(const CBlock& block, FlatFilePos& pos) const;
bool UndoWriteToDisk(const CBlockUndo& blockundo, FlatFilePos& pos, const uint256& hashBlock) const;

Expand Down Expand Up @@ -206,7 +221,7 @@ class BlockManager
//! effectively.
//!
//! This data structure maintains separate blockfile number cursors for each
//! BlockfileType. The ASSUMED state is initialized, when necessary, in FindBlockPos().
//! BlockfileType. The ASSUMED state is initialized, when necessary, in FindNextBlockPos().
//!
//! The first element is the NORMAL cursor, second is ASSUMED.
std::array<std::optional<BlockfileCursor>, BlockfileType::NUM_TYPES>
Expand Down Expand Up @@ -312,8 +327,24 @@ class BlockManager
bool WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValidationState& state, CBlockIndex& block)
EXCLUSIVE_LOCKS_REQUIRED(::cs_main);

/** Store block on disk. If dbp is not nullptr, then it provides the known position of the block within a block file on disk. */
FlatFilePos SaveBlockToDisk(const CBlock& block, int nHeight, const FlatFilePos* dbp);
/** Store block on disk and update block file statistics.
*
* @param[in] block the block to be stored
* @param[in] nHeight the height of the block
*
* @returns in case of success, the position to which the block was written to
* in case of an error, an empty FlatFilePos
*/
FlatFilePos SaveBlockToDisk(const CBlock& block, int nHeight);

mzumsande marked this conversation as resolved.
Show resolved Hide resolved
/** Update blockfile info while processing a block during reindex. The block must be available on disk.
*
* @param[in] block the block being processed
* @param[in] nHeight the height of the block
* @param[in] pos the position of the serialized CBlock on disk. This is the position returned
* by WriteBlockToDisk pointing at the CBlock, not the separator fields before it
*/
void UpdateBlockInfo(const CBlock& block, unsigned int nHeight, const FlatFilePos& pos);

/** Whether running in -prune mode. */
[[nodiscard]] bool IsPruneMode() const { return m_prune_mode; }
Expand Down
24 changes: 10 additions & 14 deletions src/test/blockmanager_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,20 @@ BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos)
};
BlockManager blockman{*Assert(m_node.shutdown), blockman_opts};
// simulate adding a genesis block normally
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, nullptr).nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0).nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
// simulate what happens during reindex
// simulate a well-formed genesis block being found at offset 8 in the blk00000.dat file
// the block is found at offset 8 because there is an 8 byte serialization header
// consisting of 4 magic bytes + 4 length bytes before each block in a well-formed blk file.
FlatFilePos pos{0, BLOCK_SERIALIZATION_HEADER_SIZE};
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, &pos).nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
const FlatFilePos pos{0, BLOCK_SERIALIZATION_HEADER_SIZE};
blockman.UpdateBlockInfo(params->GenesisBlock(), 0, pos);
// now simulate what happens after reindex for the first new block processed
// the actual block contents don't matter, just that it's a block.
// verify that the write position is at offset 0x12d.
// this is a check to make sure that https://github.com/bitcoin/bitcoin/issues/21379 does not recur
// 8 bytes (for serialization header) + 285 (for serialized genesis block) = 293
// add another 8 bytes for the second block's serialization header and we get 293 + 8 = 301
FlatFilePos actual{blockman.SaveBlockToDisk(params->GenesisBlock(), 1, nullptr)};
FlatFilePos actual{blockman.SaveBlockToDisk(params->GenesisBlock(), 1)};
BOOST_CHECK_EQUAL(actual.nPos, BLOCK_SERIALIZATION_HEADER_SIZE + ::GetSerializeSize(TX_WITH_WITNESS(params->GenesisBlock())) + BLOCK_SERIALIZATION_HEADER_SIZE);
}

Expand Down Expand Up @@ -156,12 +156,11 @@ BOOST_AUTO_TEST_CASE(blockmanager_flush_block_file)
// Blockstore is empty
BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), 0);

// Write the first block; dbp=nullptr means this block doesn't already have a disk
// location, so allocate a free location and write it there.
FlatFilePos pos1{blockman.SaveBlockToDisk(block1, /*nHeight=*/1, /*dbp=*/nullptr)};
// Write the first block to a new location.
FlatFilePos pos1{blockman.SaveBlockToDisk(block1, /*nHeight=*/1)};

// Write second block
FlatFilePos pos2{blockman.SaveBlockToDisk(block2, /*nHeight=*/2, /*dbp=*/nullptr)};
FlatFilePos pos2{blockman.SaveBlockToDisk(block2, /*nHeight=*/2)};

// Two blocks in the file
BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), (TEST_BLOCK_SIZE + BLOCK_SERIALIZATION_HEADER_SIZE) * 2);
Expand All @@ -181,22 +180,19 @@ BOOST_AUTO_TEST_CASE(blockmanager_flush_block_file)
BOOST_CHECK_EQUAL(read_block.nVersion, 2);
}

// When FlatFilePos* dbp is given, SaveBlockToDisk() will not write or
// overwrite anything to the flat file block storage. It will, however,
// update the blockfile metadata. This is to facilitate reindexing
// when the user has the blocks on disk but the metadata is being rebuilt.
// During reindex, the flat file block storage will not be written to.
// UpdateBlockInfo will, however, update the blockfile metadata.
// Verify this behavior by attempting (and failing) to write block 3 data
// to block 2 location.
CBlockFileInfo* block_data = blockman.GetBlockFileInfo(0);
BOOST_CHECK_EQUAL(block_data->nBlocks, 2);
BOOST_CHECK(blockman.SaveBlockToDisk(block3, /*nHeight=*/3, /*dbp=*/&pos2) == pos2);
blockman.UpdateBlockInfo(block3, /*nHeight=*/3, /*pos=*/pos2);
// Metadata is updated...
BOOST_CHECK_EQUAL(block_data->nBlocks, 3);
// ...but there are still only two blocks in the file
BOOST_CHECK_EQUAL(blockman.CalculateCurrentUsage(), (TEST_BLOCK_SIZE + BLOCK_SERIALIZATION_HEADER_SIZE) * 2);

// Block 2 was not overwritten:
// SaveBlockToDisk() did not call WriteBlockToDisk() because `FlatFilePos* dbp` was non-null
blockman.ReadBlockFromDisk(read_block, pos2);
BOOST_CHECK_EQUAL(read_block.nVersion, 2);
}
Expand Down