Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Batch-optimization #3313

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
4d05198
trie: create file for batch functions
ScottyPoi Mar 10, 2024
30c4c5f
trie: helper function to sort batch keys
ScottyPoi Mar 10, 2024
8668578
trie: create modified _put method for batch use
ScottyPoi Mar 10, 2024
3db7de7
trie: create modified _del method for batch
ScottyPoi Mar 10, 2024
f7e0ceb
trie: create intermediate method batchPut
ScottyPoi Mar 10, 2024
674f7e6
trie: implement optimized batch method
ScottyPoi Mar 10, 2024
6c43d64
trie: return updated node stack from _update
ScottyPoi Mar 10, 2024
252ee7b
trie: replace trie.batch with imported batch method
ScottyPoi Mar 10, 2024
e4123db
trie: test new batch functions
ScottyPoi Mar 10, 2024
6fd4602
trie: use hashed keys in batch sort (secure trie)
ScottyPoi Mar 13, 2024
b2c2624
trie: test batch on secure / pruned trie
ScottyPoi Mar 13, 2024
71535aa
trie: use _del instead of del in batch
ScottyPoi Mar 13, 2024
1a98e2c
trie: test additional trie opts
ScottyPoi Mar 13, 2024
f08bd10
remove .only from test
ScottyPoi Mar 13, 2024
2135b9a
trie: modify original trie.put for batch
ScottyPoi Mar 15, 2024
945ace5
trie: modify original trie.del for batch
ScottyPoi Mar 15, 2024
87d0465
trie: modify _batch to use direct trie methods
ScottyPoi Mar 15, 2024
f51ff04
trie: remove custom functions from batch.ts
ScottyPoi Mar 15, 2024
4fd15a2
trie: move _batch back to trie.batch
ScottyPoi Mar 15, 2024
c3dadd4
trie: move orderBatch function to util file / delete batch.ts
ScottyPoi Mar 15, 2024
e075991
trie: condense batch test file
ScottyPoi Mar 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
87 changes: 70 additions & 17 deletions packages/trie/src/trie.ts
Expand Up @@ -29,7 +29,7 @@
import { verifyRangeProof } from './proof/range.js'
import { ROOT_DB_KEY } from './types.js'
import { _walkTrie } from './util/asyncWalk.js'
import { bytesToNibbles, matchingNibbleLength } from './util/nibbles.js'
import { bytesToNibbles, matchingNibbleLength, orderBatch } from './util/nibbles.js'
import { TrieReadStream as ReadStream } from './util/readStream.js'
import { WalkController } from './util/walkController.js'

Expand Down Expand Up @@ -495,14 +495,15 @@
async put(
key: Uint8Array,
value: Uint8Array | null,
skipKeyTransform: boolean = false
): Promise<void> {
skipKeyTransform: boolean = false,
path?: { stack: TrieNode[]; remaining: number[] }
): Promise<TrieNode[]> {
this.DEBUG && this.debug(`Key: ${bytesToHex(key)}`, ['PUT'])
this.DEBUG && this.debug(`Value: ${value === null ? 'null' : bytesToHex(key)}`, ['PUT'])
if (this._opts.useRootPersistence && equalsBytes(key, ROOT_DB_KEY) === true) {
throw new Error(`Attempted to set '${bytesToUtf8(ROOT_DB_KEY)}' key but it is not allowed.`)
}

let returnStack: TrieNode[] = []
// If value is empty, delete
if (value === null || value.length === 0) {
return this.del(key)
Expand All @@ -512,10 +513,10 @@
const appliedKey = skipKeyTransform ? key : this.appliedKey(key)
if (equalsBytes(this.root(), this.EMPTY_TRIE_ROOT) === true) {
// If no root, initialize this trie
await this._createInitialNode(appliedKey, value)
returnStack = [await this._createInitialNode(appliedKey, value)]
} else {
// First try to find the given key or its nearest node
const { remaining, stack } = await this.findPath(appliedKey)
const { remaining, stack } = path ?? (await this.findPath(appliedKey))
let ops: BatchDBOp[] = []
if (this._opts.useNodePruning) {
const val = await this.get(key)
Expand All @@ -540,14 +541,15 @@
}
}
// then update
await this._updateNode(appliedKey, value, remaining, stack)
returnStack = await this._updateNode(appliedKey, value, remaining, stack)
if (this._opts.useNodePruning) {
// Only after updating the node we can delete the keyhashes
await this._db.batch(ops)
}
}
await this.persistRoot()
this._lock.release()
return returnStack
}

/**
Expand All @@ -556,11 +558,16 @@
* @param key
* @returns A Promise that resolves once value is deleted.
*/
async del(key: Uint8Array, skipKeyTransform: boolean = false): Promise<void> {
async del(
key: Uint8Array,
skipKeyTransform: boolean = false,
path?: { stack: TrieNode[]; remaining: number[] }
): Promise<TrieNode[]> {
this.DEBUG && this.debug(`Key: ${bytesToHex(key)}`, ['DEL'])
await this._lock.acquire()
const appliedKey = skipKeyTransform ? key : this.appliedKey(key)
const { node, stack } = await this.findPath(appliedKey)
const partialPath = path && { stack: path.stack }
const { node, stack } = await this.findPath(appliedKey, false, partialPath)

let ops: BatchDBOp[] = []
// Only delete if the `key` currently has any value
Expand Down Expand Up @@ -588,6 +595,7 @@
}
await this.persistRoot()
this._lock.release()
return stack
}

/**
Expand Down Expand Up @@ -770,7 +778,7 @@
* Creates the initial node from an empty tree.
* @private
*/
protected async _createInitialNode(key: Uint8Array, value: Uint8Array): Promise<void> {
protected async _createInitialNode(key: Uint8Array, value: Uint8Array): Promise<TrieNode> {
const newNode = new LeafNode(bytesToNibbles(key), value)

const encoded = newNode.serialize()
Expand All @@ -779,6 +787,7 @@
rootKey = this._opts.keyPrefix ? concatBytes(this._opts.keyPrefix, rootKey) : rootKey
await this._db.put(rootKey, encoded)
await this.persistRoot()
return newNode
}

/**
Expand Down Expand Up @@ -817,7 +826,7 @@
value: Uint8Array,
keyRemainder: Nibbles,
stack: TrieNode[]
): Promise<void> {
): Promise<TrieNode[]> {
const toSave: BatchDBOp[] = []
const lastNode = stack.pop()
if (!lastNode) {
Expand Down Expand Up @@ -908,7 +917,9 @@
}
}

const returnStack = [...stack]
await this.saveStack(key, stack, toSave)
return returnStack
}

/**
Expand Down Expand Up @@ -1137,17 +1148,59 @@
* @param ops
*/
async batch(ops: BatchDBOp[], skipKeyTransform?: boolean): Promise<void> {
for (const op of ops) {
const keyTransform =
skipKeyTransform === true
? undefined

Check warning on line 1153 in packages/trie/src/trie.ts

View check run for this annotation

Codecov / codecov/patch

packages/trie/src/trie.ts#L1153

Added line #L1153 was not covered by tests
: (msg: Uint8Array) => {
return this.appliedKey(msg)
}
const sortedOps = orderBatch(ops, keyTransform)
let stack: TrieNode[] = []
const stackPathCache: Map<string, TrieNode> = new Map()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to make both the ordering as well as the caching optional for the batch() method, and therefore provide extra configuration parameters to configure.

  1. Ordering: Reason for ordering is that the ordering in certain cases does have an effect on the outcome. So if there is first a put for a key and then a delete for the same key this is obviously different then having this the other way around. Same for two puts for the same key but different values. So the whole ordering needs to be deactivated by default (also for backwards compatibility) and the user would need to activate manually.

  2. Caching: Reason that caching would need to also get an option and also would needed to be deactivated by default is that one does not know how big a batch() operation will be (only the user knows (at best)). So this might "overload" the cache respectively there is the risk for out-of-memory situations.

I would suggest that we add a dedicated options dictionary with its own type in types.ts called BatchOpts for this, seems worth it, and then we add boolean flags sortOperations (or so) and activatePathCache (or so) for this.

My tendency actually would be to also include skipKeyTransform and then be a bit hacky and change the function signature to async batch(ops: BatchDBOp[], skipKeyTransformOrOptsDict?: boolean | BatchOpts) so that we can then do a switch respectively type check for the parameter and either assign skipKeyTransform directly or use the options dict.

Then we can use the method directly with trie.batch(ops, opts) (or trie.batch(ops, { sortOperations: true })) and do not always need to channel in the skipKeyTransform default.

(let me know if this has serious downsides though I am overlooking. I would think this should still be backwards compatible for people)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did some further research into the effects of this in the context of other modules, and I agree about tweaking the options for how batch works based on the situation.

There are definitely circumstances in which it would be faster to simply iterate through the batch like we currently do, instead of sorting and caching. I also wonder how the results would change if we used different sorting and trie walking algorithms. If there's something useful there, it could be another option in batch()

for (const op of sortedOps) {
const appliedKey = skipKeyTransform === true ? op.key : this.appliedKey(op.key)
const nibbles = bytesToNibbles(appliedKey)
stack = []
let remaining = nibbles
for (let i = 0; i < nibbles.length; i++) {
const p: string = JSON.stringify(nibbles.slice(0, i) as number[])
if (stackPathCache.has(p)) {
const node = stackPathCache.get(p)!
stack.push(node)
remaining = nibbles.slice(i)
}
}
const _path =
stack.length > 0
? {
stack,
remaining,
}
: undefined
if (op.type === 'put') {
if (op.value === null || op.value === undefined) {
throw new Error('Invalid batch db operation')
stack = await this.put(op.key, op.value, skipKeyTransform, _path)
const path: number[] = []
for (const node of stack) {
stackPathCache.set(JSON.stringify([...path]), node)
if (node instanceof BranchNode) {
path.push(nibbles.shift()!)
} else {
path.push(...nibbles.splice(0, node.keyLength()))
}
}
await this.put(op.key, op.value, skipKeyTransform)
} else if (op.type === 'del') {
await this.del(op.key, skipKeyTransform)
stack = await this.put(op.key, null, skipKeyTransform, _path)
const path: number[] = []
for (const node of stack) {
stackPathCache.set(JSON.stringify([...path]), node)
if (node instanceof BranchNode) {
path.push(nibbles.shift()!)
} else {
path.push(...nibbles.splice(0, node.keyLength()))
}
}
}
}
await this.persistRoot()
}

// This method verifies if all keys in the trie (except the root) are reachable
Expand Down
18 changes: 18 additions & 0 deletions packages/trie/src/util/nibbles.ts
@@ -1,6 +1,7 @@
import { toBytes } from '@ethereumjs/util'

import type { Nibbles } from '../types.js'
import type { BatchDBOp } from '@ethereumjs/util'

/**
* Converts a bytes to a nibble array.
Expand Down Expand Up @@ -91,3 +92,20 @@ export function doKeysMatch(keyA: Nibbles, keyB: Nibbles): boolean {
const length = matchingNibbleLength(keyA, keyB)
return length === keyA.length && length === keyB.length
}

export function orderBatch(
ops: BatchDBOp[],
keyTransform: (msg: Uint8Array) => Uint8Array = (msg: Uint8Array) => {
return msg
}
): BatchDBOp[] {
const keyNibbles: [number, number[]][] = ops.map((o, i) => {
const appliedKey = keyTransform(o.key)
const nibbles: number[] = bytesToNibbles(appliedKey)
return [i, nibbles]
})
keyNibbles.sort(([_, a], [__, b]) => {
return nibblesCompare(a, b)
})
return keyNibbles.map(([i, _]) => ops[i])
}