Skip to content

Commit

Permalink
Mas d31 nhskv16sst (#428)
Browse files Browse the repository at this point in the history
* Add performance/profiling test

Add test to perf_SUITE to do performance tests and also profile different activities in leveled.

This can then be used to highlight functions with unexpectedly high execution times, and prove the impact of changes.

Switch between riak_ctperf and riak_fullperf to change from standard test (with profile option) to full-scale performance test

* Change shape of default perfTest

* Refactor SST

Compare and contrast profile for guess, before and after refactor:

pre

```
lists:map_1/2                                         313370     2.33    32379  [      0.10]

lists:foldl_1/3                                       956590     4.81    66992  [      0.07]

leveled_sst:'-expand_list_by_pointer/5-fun-0-'/4      925020     6.13    85318  [      0.09]

erlang:binary_to_term/1                                 3881     8.55   119012  [     30.67]

erlang:'++'/2                                         974322    11.55   160724  [      0.16]

lists:member/2                                       4000180    15.00   208697  [      0.05]

leveled_sst:find_pos/4                               4029220    21.01   292347  [      0.07]

leveled_sst:member_check/2                           4000000    21.17   294601  [      0.07]

--------------------------------------------------  --------  -------  -------  [----------]

Total:                                              16894665  100.00%  1391759  [      0.08]
```

post

```
lists:map_1/2                                         63800     0.79    6795  [      0.11]

erlang:term_to_binary/1                               15726     0.81    6950  [      0.44]

lists:keyfind/3                                      180967     0.92    7884  [      0.04]

erlang:spawn_link/3                                   15717     1.08    9327  [      0.59]

leveled_sst:'-read_slots/5-fun-1-'/8                  31270     1.15    9895  [      0.32]

gen:do_call/4                                          7881     1.31   11243  [      1.43]

leveled_penciller:find_nextkey/8                     180936     2.01   17293  [      0.10]

prim_file:pread_nif/3                                 15717     3.89   33437  [      2.13]

leveled_sst:find_pos/4                              4028940    17.85  153554  [      0.04]

erlang:binary_to_term/1                               15717    51.97  447048  [     28.44]

--------------------------------------------------  -------  -------  ------  [----------]

Total:                                              6704100  100.00%  860233  [      0.13]

```

* Update leveled_penciller.erl

* Mas d31 nhskv16sstpcl (#426)

Performance updates to leveled:

- Refactoring of pointer expansion when fetching from leveled_sst files to avoid expensive list concatenation.
- Refactoring of leveled_ebloom to make more flexible, reduce code, and improve check time.
- Refactoring of querying within leveled_sst to reduce the number of blocks that need to be de-serialised per query.
- Refactoring of the leveled_penciller's query key comparator, to make use of maps and simplify the filtering.
- General speed-up of frequently called functions.
  • Loading branch information
martinsumner committed Jan 22, 2024
1 parent 49490c3 commit c294570
Show file tree
Hide file tree
Showing 12 changed files with 1,821 additions and 2,117 deletions.
2 changes: 1 addition & 1 deletion include/leveled.hrl
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
end_key :: tuple() | undefined,
owner :: pid()|list(),
filename :: string() | undefined,
bloom :: binary() | none | undefined}).
bloom = none :: leveled_ebloom:bloom() | none}).

-record(cdb_options,
{max_size :: pos_integer() | undefined,
Expand Down
113 changes: 85 additions & 28 deletions src/leveled_codec.erl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
strip_to_keyseqonly/1,
strip_to_indexdetails/1,
striphead_to_v1details/1,
is_active/3,
endkey_passed/2,
key_dominates/2,
maybe_reap_expiredkey/2,
Expand Down Expand Up @@ -48,7 +47,10 @@
to_lookup/1,
next_key/1,
return_proxy/4,
get_metadata/1]).
get_metadata/1,
maybe_accumulate/5,
accumulate_index/2,
count_tombs/2]).

-define(LMD_FORMAT, "~4..0w~2..0w~2..0w~2..0w~2..0w").
-define(NRT_IDX, "$aae.").
Expand Down Expand Up @@ -251,22 +253,79 @@ striphead_to_v1details(V) ->
get_metadata(LV) ->
element(4, LV).

-spec key_dominates(ledger_kv(), ledger_kv()) ->
left_hand_first|right_hand_first|left_hand_dominant|right_hand_dominant.
-spec maybe_accumulate(
list(leveled_codec:ledger_kv()),
term(),
non_neg_integer(),
{pos_integer(), {non_neg_integer(), non_neg_integer()|infinity}},
leveled_penciller:pclacc_fun())
-> {term(), non_neg_integer()}.
%% @doc
%% Make an accumulation decision based on the date range and also the expiry
%% status of the ledger key and value Needs to handle v1 and v2 values. When
%% folding over heads -> v2 values, index-keys -> v1 values.
maybe_accumulate([], Acc, Count, _Filter, _Fun) ->
{Acc, Count};
maybe_accumulate(
[{K, {_SQN, {active, TS}, _SH, _MD, undefined}=V}|T],
Acc, Count, {Now, _ModRange}=Filter, AccFun)
when TS >= Now ->
maybe_accumulate(T, AccFun(K, V, Acc), Count + 1, Filter, AccFun);
maybe_accumulate(
[{K, {_SQN, {active, TS}, _SH, _MD}=V}|T],
Acc, Count, {Now, _ModRange}=Filter, AccFun)
when TS >= Now ->
maybe_accumulate(T, AccFun(K, V, Acc), Count + 1, Filter, AccFun);
maybe_accumulate(
[{_K, {_SQN, tomb, _SH, _MD, _LMD}}|T],
Acc, Count, Filter, AccFun) ->
maybe_accumulate(T, Acc, Count, Filter, AccFun);
maybe_accumulate(
[{_K, {_SQN, tomb, _SH, _MD}}|T],
Acc, Count, Filter, AccFun) ->
maybe_accumulate(T, Acc, Count, Filter, AccFun);
maybe_accumulate(
[{K, {_SQN, {active, TS}, _SH, _MD, LMD}=V}|T],
Acc, Count, {Now, {LowDate, HighDate}}=Filter, AccFun)
when TS >= Now, LMD >= LowDate, LMD =< HighDate ->
maybe_accumulate(T, AccFun(K, V, Acc), Count + 1, Filter, AccFun);
maybe_accumulate(
[_LV|T],
Acc, Count, Filter, AccFun) ->
maybe_accumulate(T, Acc, Count, Filter, AccFun).

-spec accumulate_index(
{boolean(), undefined|leveled_runner:mp()}, leveled_runner:acc_fun())
-> any().
accumulate_index({false, undefined}, FoldKeysFun) ->
fun({?IDX_TAG, Bucket, _IndexInfo, ObjKey}, _Value, Acc) ->
FoldKeysFun(Bucket, ObjKey, Acc)
end;
accumulate_index({true, undefined}, FoldKeysFun) ->
fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) ->
FoldKeysFun(Bucket, {IdxValue, ObjKey}, Acc)
end;
accumulate_index({AddTerm, TermRegex}, FoldKeysFun) ->
fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) ->
case re:run(IdxValue, TermRegex) of
nomatch ->
Acc;
_ ->
case AddTerm of
true ->
FoldKeysFun(Bucket, {IdxValue, ObjKey}, Acc);
false ->
FoldKeysFun(Bucket, ObjKey, Acc)
end
end
end.

-spec key_dominates(ledger_kv(), ledger_kv()) -> boolean().
%% @doc
%% When comparing two keys in the ledger need to find if one key comes before
%% the other, or if the match, which key is "better" and should be the winner
key_dominates({LK, _LVAL}, {RK, _RVAL}) when LK < RK ->
left_hand_first;
key_dominates({LK, _LVAL}, {RK, _RVAL}) when RK < LK ->
right_hand_first;
key_dominates(LObj, RObj) ->
case strip_to_seqonly(LObj) >= strip_to_seqonly(RObj) of
true ->
left_hand_dominant;
false ->
right_hand_dominant
end.
strip_to_seqonly(LObj) >= strip_to_seqonly(RObj).

-spec maybe_reap_expiredkey(ledger_kv(), {boolean(), integer()}) -> boolean().
%% @doc
Expand All @@ -286,20 +345,18 @@ maybe_reap(tomb, {true, _CurrTS}) ->
maybe_reap(_, _) ->
false.

-spec is_active(ledger_key(), ledger_value(), non_neg_integer()) -> boolean().
%% @doc
%% Is this an active KV pair or has the timestamp expired
is_active(Key, Value, Now) ->
case strip_to_statusonly({Key, Value}) of
{active, infinity} ->
true;
tomb ->
false;
{active, TS} when TS >= Now ->
true;
{active, _TS} ->
false
end.
-spec count_tombs(
list(ledger_kv()), non_neg_integer()|not_counted) ->
non_neg_integer()|not_counted.
count_tombs(_List, not_counted) ->
not_counted;
count_tombs([], Count) ->
Count;
count_tombs([{_K, V}|T], Count) when element(2, V) == tomb ->
count_tombs(T, Count + 1);
count_tombs([_KV|T], Count) ->
count_tombs(T, Count).


-spec from_ledgerkey(atom(), tuple()) -> false|tuple().
%% @doc
Expand Down

0 comments on commit c294570

Please sign in to comment.