Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add historical state regen #6033

Open
wants to merge 22 commits into
base: unstable
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
583bc18
feat: add historical state regen
wemeetagain Oct 11, 2023
8c1790b
chore: wire up metrics
wemeetagain Oct 11, 2023
93c1981
chore: make historical state regen module optional
wemeetagain Oct 13, 2023
06e58af
chore: persist pubkey cache across historical state regen runs
wemeetagain Oct 13, 2023
623b58c
chore: cleanup worker termination
wemeetagain Oct 13, 2023
deba946
Merge branch 'unstable' into cayman/historical-state-regen
wemeetagain Oct 13, 2023
b8d6113
chore: fix worker usage
wemeetagain Oct 23, 2023
0315e1a
fix: swap Level for ClassicLevel for multithreading
matthewkeil Nov 6, 2023
cdf3732
fix: getStateV2 state handling hack
matthewkeil Nov 6, 2023
a8f719d
Merge branch 'unstable' into cayman/historical-state-regen
wemeetagain Jan 22, 2024
6d9d256
chore: update classic-level
wemeetagain Jan 22, 2024
5a812e1
chore: fix build errors
wemeetagain Jan 22, 2024
dabd767
chore: add comments
wemeetagain Jan 22, 2024
51fc29a
chore: fix test worker path
wemeetagain Jan 22, 2024
21bb96b
chore: simplify function naming
wemeetagain Jan 22, 2024
dc45097
chore: optimize getSlotFromOffset
wemeetagain Jan 22, 2024
d32f103
chore: refactor to avoid needless deserialization
wemeetagain Jan 22, 2024
25b1c9f
fix: update metrics names
wemeetagain Jan 22, 2024
b6fd577
feat: add historical state regen dashboard
wemeetagain Jan 22, 2024
e8c9d27
fix: update vm dashboards with historical state worker
wemeetagain Jan 22, 2024
03a241c
chore: fix test data
wemeetagain Jan 22, 2024
6001521
feat: transfer state across worker boundary
wemeetagain Jan 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2,646 changes: 2,646 additions & 0 deletions dashboards/lodestar_historical_state_regen.json

Large diffs are not rendered by default.

318 changes: 270 additions & 48 deletions dashboards/lodestar_vm_host.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion packages/api/src/beacon/routes/debug.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ export type Api = {
format?: ResponseFormat
): Promise<
ApiClientResponse<{
[HttpStatusCode.OK]: Uint8Array | {data: allForks.BeaconState; executionOptimistic: ExecutionOptimistic};
wemeetagain marked this conversation as resolved.
Show resolved Hide resolved
[HttpStatusCode.OK]: Uint8Array | {data: allForks.BeaconState};
}>
>;

Expand Down
2 changes: 1 addition & 1 deletion packages/api/test/unit/beacon/testData/debug.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export const testData: GenericServerTestCases<Api> = {
},
getState: {
args: ["head", "json"],
res: {executionOptimistic: true, data: ssz.phase0.BeaconState.defaultValue()},
res: {data: ssz.phase0.BeaconState.defaultValue()},
},
getStateV2: {
args: ["head", "json"],
Expand Down
14 changes: 7 additions & 7 deletions packages/beacon-node/src/api/impl/beacon/state/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
filterStateValidatorsByStatus,
getStateValidatorIndex,
getValidatorStatus,
resolveStateId,
getStateResponse,
toValidatorResponse,
} from "./utils.js";

Expand All @@ -25,7 +25,7 @@ export function getBeaconStateApi({
async function getState(
stateId: routes.beacon.StateId
): Promise<{state: BeaconStateAllForks; executionOptimistic: boolean}> {
return resolveStateId(chain, stateId);
return getStateResponse(chain, stateId);
}

return {
Expand Down Expand Up @@ -77,7 +77,7 @@ export function getBeaconStateApi({
},

async getStateValidators(stateId, filters) {
const {state, executionOptimistic} = await resolveStateId(chain, stateId);
const {state, executionOptimistic} = await getStateResponse(chain, stateId);
const currentEpoch = getCurrentEpoch(state);
const {validators, balances} = state; // Get the validators sub tree once for all the loop
const {pubkey2index} = chain.getHeadState().epochCtx;
Expand Down Expand Up @@ -128,7 +128,7 @@ export function getBeaconStateApi({
},

async getStateValidator(stateId, validatorId) {
const {state, executionOptimistic} = await resolveStateId(chain, stateId);
const {state, executionOptimistic} = await getStateResponse(chain, stateId);
const {pubkey2index} = chain.getHeadState().epochCtx;

const resp = getStateValidatorIndex(validatorId, state, pubkey2index);
Expand All @@ -149,7 +149,7 @@ export function getBeaconStateApi({
},

async getStateValidatorBalances(stateId, indices) {
const {state, executionOptimistic} = await resolveStateId(chain, stateId);
const {state, executionOptimistic} = await getStateResponse(chain, stateId);

if (indices) {
const headState = chain.getHeadState();
Expand Down Expand Up @@ -186,7 +186,7 @@ export function getBeaconStateApi({
},

async getEpochCommittees(stateId, filters) {
const {state, executionOptimistic} = await resolveStateId(chain, stateId);
const {state, executionOptimistic} = await getStateResponse(chain, stateId);

const stateCached = state as CachedBeaconStateAltair;
if (stateCached.epochCtx === undefined) {
Expand Down Expand Up @@ -228,7 +228,7 @@ export function getBeaconStateApi({
*/
async getEpochSyncCommittees(stateId, epoch) {
// TODO: Should pick a state with the provided epoch too
const {state, executionOptimistic} = await resolveStateId(chain, stateId);
const {state, executionOptimistic} = await getStateResponse(chain, stateId);

// TODO: If possible compute the syncCommittees in advance of the fork and expose them here.
// So the validators can prepare and potentially attest the first block. Not critical tho, it's very unlikely
Expand Down
85 changes: 56 additions & 29 deletions packages/beacon-node/src/api/impl/beacon/state/utils.ts
Original file line number Diff line number Diff line change
@@ -1,56 +1,41 @@
import {fromHexString} from "@chainsafe/ssz";
import {ChainForkConfig} from "@lodestar/config";
import {routes} from "@lodestar/api";
import {FAR_FUTURE_EPOCH, GENESIS_SLOT} from "@lodestar/params";
import {BeaconStateAllForks, PubkeyIndexMap} from "@lodestar/state-transition";
import {BLSPubkey, phase0} from "@lodestar/types";
import {BLSPubkey, allForks, phase0} from "@lodestar/types";
import {Epoch, ValidatorIndex} from "@lodestar/types";
import {IBeaconChain, StateGetOpts} from "../../../../chain/index.js";
import {IBeaconChain} from "../../../../chain/index.js";
import {ApiError, ValidationError} from "../../errors.js";
import {isOptimisticBlock} from "../../../../util/forkChoice.js";
import {getSlotFromBeaconStateSerialized} from "../../../../util/sszBytes.js";

export async function resolveStateId(
chain: IBeaconChain,
stateId: routes.beacon.StateId,
opts?: StateGetOpts
): Promise<{state: BeaconStateAllForks; executionOptimistic: boolean}> {
const stateRes = await resolveStateIdOrNull(chain, stateId, opts);
if (!stateRes) {
throw new ApiError(404, `No state found for id '${stateId}'`);
}

return stateRes;
export function deserializeBeaconStateSerialized(config: ChainForkConfig, data: Uint8Array): allForks.BeaconState {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
export function deserializeBeaconStateSerialized(config: ChainForkConfig, data: Uint8Array): allForks.BeaconState {
export function deserializeBeaconState(config: ChainForkConfig, data: Uint8Array): allForks.BeaconState {

const slot = getSlotFromBeaconStateSerialized(data);
return config.getForkTypes(slot).BeaconState.deserialize(data);
}

async function resolveStateIdOrNull(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like this being renamed. good call!

chain: IBeaconChain,
stateId: routes.beacon.StateId,
opts?: StateGetOpts
): Promise<{state: BeaconStateAllForks; executionOptimistic: boolean} | null> {
export function resolveStateId(chain: IBeaconChain, stateId: routes.beacon.StateId): string | number {
if (stateId === "head") {
// TODO: This is not OK, head and headState must be fetched atomically
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two questions:

  1. is the issue about mutability in case there is a slot boundary while this resolves? Or is the issue that getHeadState can mutate state during its run?

  2. If there is risk, should we leave this comment here, maybe move to getStateResponse or perhaps convert it to a TODO to make sure this doesn't get papered over?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the issue before was that the calls were unlinked and also async.
eg:

const head = chain.forkChoice.getHead();
// what happens if the head changes before this next line
const headState = await chain.getHeadState();

Then they were unlinked but sync (no problem, stale comment).

const head = chain.forkChoice.getHead();
// no chance for the head to change before this next line
const headState = chain.getHeadState();

Now they are linked / looked up by state root

const head = chain.forkChoice.getHead();
// ...
// the head may change, but the head _at the time of request_ can still be fetched
const headState = chain.getStateByRoot(head.stateRoot);

const head = chain.forkChoice.getHead();
const headState = chain.getHeadState();
return {state: headState, executionOptimistic: isOptimisticBlock(head)};
return head.stateRoot;
}

if (stateId === "genesis") {
return chain.getStateBySlot(GENESIS_SLOT, opts);
return GENESIS_SLOT;
}

if (stateId === "finalized") {
const block = chain.forkChoice.getFinalizedBlock();
const state = await chain.getStateByStateRoot(block.stateRoot, opts);
return state && {state: state.state, executionOptimistic: isOptimisticBlock(block)};
return block.stateRoot;
}

if (stateId === "justified") {
const block = chain.forkChoice.getJustifiedBlock();
const state = await chain.getStateByStateRoot(block.stateRoot, opts);
return state && {state: state.state, executionOptimistic: isOptimisticBlock(block)};
return block.stateRoot;
}

if (typeof stateId === "string" && stateId.startsWith("0x")) {
return chain.getStateByStateRoot(stateId, opts);
return stateId as string;
}

// id must be slot
Expand All @@ -59,7 +44,49 @@ async function resolveStateIdOrNull(
throw new ValidationError(`Invalid block id '${stateId}'`, "blockId");
}

return chain.getStateBySlot(blockSlot, opts);
return blockSlot;
}

export async function getStateResponse(
chain: IBeaconChain,
stateId: routes.beacon.StateId
): Promise<{state: BeaconStateAllForks; executionOptimistic: boolean}> {
const rootOrSlot = resolveStateId(chain, stateId);

let state: {state: BeaconStateAllForks; executionOptimistic: boolean} | null = null;
if (typeof rootOrSlot === "string") {
state = await chain.getStateByStateRoot(rootOrSlot);
} else if (typeof rootOrSlot === "number") {
state = await chain.getStateBySlot(rootOrSlot);
}

if (state == null) {
throw new ApiError(404, `No state found for id '${stateId}'`);
}
return state;
}

export async function getStateResponseWithRegen(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be combined with getStateResponse. can pass a third arg allowRegen that passes to the getStateByStateRoot, getStateBySlot and gates getHistoricalStateBySlot

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good

chain: IBeaconChain,
stateId: routes.beacon.StateId
): Promise<{state: BeaconStateAllForks | Uint8Array; executionOptimistic: boolean}> {
const rootOrSlot = resolveStateId(chain, stateId);

let state: {state: BeaconStateAllForks | Uint8Array; executionOptimistic: boolean} | null = null;
if (typeof rootOrSlot === "string") {
state = await chain.getStateByStateRoot(rootOrSlot, {allowRegen: true});
} else if (typeof rootOrSlot === "number") {
if (rootOrSlot >= chain.forkChoice.getFinalizedBlock().slot) {
state = await chain.getStateBySlot(rootOrSlot, {allowRegen: true});
} else {
state = await chain.getHistoricalStateBySlot(rootOrSlot);
}
}

if (state == null) {
throw new ApiError(404, `No state found for id '${stateId}'`);
}
return state;
}

/**
Expand Down
37 changes: 27 additions & 10 deletions packages/beacon-node/src/api/impl/debug/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {routes, ServerApi, ResponseFormat} from "@lodestar/api";
import {resolveStateId} from "../beacon/state/utils.js";
import {deserializeBeaconStateSerialized, getStateResponseWithRegen} from "../beacon/state/utils.js";
import {ApiModules} from "../types.js";
import {isOptimisticBlock} from "../../../util/forkChoice.js";

Expand Down Expand Up @@ -37,24 +37,41 @@ export function getDebugApi({chain, config}: Pick<ApiModules, "chain" | "config"
},

async getState(stateId: string | number, format?: ResponseFormat) {
const {state} = await resolveStateId(chain, stateId, {allowRegen: true});
const {state} = await getStateResponseWithRegen(chain, stateId);
if (format === "ssz") {
// Casting to any otherwise Typescript doesn't like the multi-type return
// eslint-disable-next-line @typescript-eslint/no-unsafe-return, @typescript-eslint/no-explicit-any
return state.serialize() as any;
if (state instanceof Uint8Array) {
return state;
}
return state.serialize();
} else {
if (state instanceof Uint8Array) {
return {data: deserializeBeaconStateSerialized(config, state)};
}
return {data: state.toValue()};
}
},

async getStateV2(stateId: string | number, format?: ResponseFormat) {
const {state} = await resolveStateId(chain, stateId, {allowRegen: true});
const {state, executionOptimistic} = await getStateResponseWithRegen(chain, stateId);
if (format === "ssz") {
// Casting to any otherwise Typescript doesn't like the multi-type return
// eslint-disable-next-line @typescript-eslint/no-unsafe-return, @typescript-eslint/no-explicit-any
return state.serialize() as any;
if (state instanceof Uint8Array) {
return state;
}
return state.serialize();
} else {
return {data: state.toValue(), version: config.getForkName(state.slot)};
if (state instanceof Uint8Array) {
const data = deserializeBeaconStateSerialized(config, state);
return {
data,
version: config.getForkName(data.slot),
executionOptimistic,
};
}
return {
data: state.toValue(),
version: config.getForkName(state.slot),
executionOptimistic,
};
}
},
};
Expand Down
4 changes: 2 additions & 2 deletions packages/beacon-node/src/api/impl/proof/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {createProof, ProofType} from "@chainsafe/persistent-merkle-tree";
import {routes, ServerApi} from "@lodestar/api";
import {ApiModules} from "../types.js";
import {resolveStateId} from "../beacon/state/utils.js";
import {getStateResponse} from "../beacon/state/utils.js";
import {resolveBlockId} from "../beacon/blocks/utils.js";
import {ApiOptions} from "../../options.js";

Expand All @@ -20,7 +20,7 @@ export function getProofApi(
throw new Error("Requested proof is too large.");
}

const {state} = await resolveStateId(chain, stateId);
const {state} = await getStateResponse(chain, stateId);

// Commit any changes before computing the state root. In normal cases the state should have no changes here
state.commit();
Expand Down
25 changes: 21 additions & 4 deletions packages/beacon-node/src/chain/chain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ import {BlockAttributes, produceBlockBody, produceCommonBlockBody} from "./produ
import {computeNewStateRoot} from "./produceBlock/computeNewStateRoot.js";
import {BlockInput} from "./blocks/types.js";
import {SeenAttestationDatas} from "./seenCache/seenAttestationData.js";
import {HistoricalStateRegen} from "./historicalState/index.js";
import {ShufflingCache} from "./shufflingCache.js";
import {StateContextCache} from "./stateCache/stateContextCache.js";
import {SeenGossipBlockInput} from "./seenCache/index.js";
Expand Down Expand Up @@ -108,6 +109,7 @@ export class BeaconChain implements IBeaconChain {
readonly regen: QueuedStateRegenerator;
readonly lightClientServer: LightClientServer;
readonly reprocessController: ReprocessController;
readonly historicalStateRegen?: HistoricalStateRegen;

// Ops pool
readonly attestationPool: AttestationPool;
Expand Down Expand Up @@ -165,6 +167,7 @@ export class BeaconChain implements IBeaconChain {
eth1,
executionEngine,
executionBuilder,
historicalStateRegen,
}: {
config: BeaconConfig;
db: IBeaconDb;
Expand All @@ -177,6 +180,7 @@ export class BeaconChain implements IBeaconChain {
eth1: IEth1ForBlockProduction;
executionEngine: IExecutionEngine;
executionBuilder?: IExecutionBuilder;
historicalStateRegen?: HistoricalStateRegen;
}
) {
this.opts = opts;
Expand All @@ -191,6 +195,7 @@ export class BeaconChain implements IBeaconChain {
this.eth1 = eth1;
this.executionEngine = executionEngine;
this.executionBuilder = executionBuilder;
this.historicalStateRegen = historicalStateRegen;
const signal = this.abortController.signal;
const emitter = new ChainEventEmitter();
// by default, verify signatures on both main threads and worker threads
Expand Down Expand Up @@ -401,12 +406,24 @@ export class BeaconChain implements IBeaconChain {
return state && {state, executionOptimistic: isOptimisticBlock(block)};
}
} else {
// request for finalized state
return null;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return null;
// fall back to caller to look in db or getHistoricalStateBySlot
return null;

}
}

async getHistoricalStateBySlot(slot: number): Promise<{state: Uint8Array; executionOptimistic: boolean} | null> {
const finalizedBlock = this.forkChoice.getFinalizedBlock();

// do not attempt regen, just check if state is already in DB
const state = await this.db.stateArchive.get(slot);
return state && {state, executionOptimistic: false};
if (slot >= finalizedBlock.slot) {
return null;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this null ever actually get reached? Perhaps is better to throw an error here or something to spark us to check for invalid logic if it ever comes up.

}

// request for finalized state using historical state regen
const stateSerialized = await this.historicalStateRegen?.getHistoricalState(slot);
if (!stateSerialized) {
return null;
}

return {state: stateSerialized, executionOptimistic: false};
}

async getStateByStateRoot(
Expand Down