Mas d31 nhskv16sst (#428)

* Add performance/profiling test Add test to perf_SUITE to do performance tests and also profile different activities in leveled. This can then be used to highlight functions with unexpectedly high execution times, and prove the impact of changes. Switch between riak_ctperf and riak_fullperf to change from standard test (with profile option) to full-scale performance test * Change shape of default perfTest * Refactor SST Compare and contrast profile for guess, before and after refactor: pre ``` lists:map_1/2 313370 2.33 32379 [ 0.10] lists:foldl_1/3 956590 4.81 66992 [ 0.07] leveled_sst:'-expand_list_by_pointer/5-fun-0-'/4 925020 6.13 85318 [ 0.09] erlang:binary_to_term/1 3881 8.55 119012 [ 30.67] erlang:'++'/2 974322 11.55 160724 [ 0.16] lists:member/2 4000180 15.00 208697 [ 0.05] leveled_sst:find_pos/4 4029220 21.01 292347 [ 0.07] leveled_sst:member_check/2 4000000 21.17 294601 [ 0.07] -------------------------------------------------- -------- ------- ------- [----------] Total: 16894665 100.00% 1391759 [ 0.08] ``` post ``` lists:map_1/2 63800 0.79 6795 [ 0.11] erlang:term_to_binary/1 15726 0.81 6950 [ 0.44] lists:keyfind/3 180967 0.92 7884 [ 0.04] erlang:spawn_link/3 15717 1.08 9327 [ 0.59] leveled_sst:'-read_slots/5-fun-1-'/8 31270 1.15 9895 [ 0.32] gen:do_call/4 7881 1.31 11243 [ 1.43] leveled_penciller:find_nextkey/8 180936 2.01 17293 [ 0.10] prim_file:pread_nif/3 15717 3.89 33437 [ 2.13] leveled_sst:find_pos/4 4028940 17.85 153554 [ 0.04] erlang:binary_to_term/1 15717 51.97 447048 [ 28.44] -------------------------------------------------- ------- ------- ------ [----------] Total: 6704100 100.00% 860233 [ 0.13] ``` * Update leveled_penciller.erl * Mas d31 nhskv16sstpcl (#426) Performance updates to leveled: - Refactoring of pointer expansion when fetching from leveled_sst files to avoid expensive list concatenation. - Refactoring of leveled_ebloom to make more flexible, reduce code, and improve check time. - Refactoring of querying within leveled_sst to reduce the number of blocks that need to be de-serialised per query. - Refactoring of the leveled_penciller's query key comparator, to make use of maps and simplify the filtering. - General speed-up of frequently called functions.
martinsumner · Jan 22, 2024 · c294570 · c294570
1 parent 49490c3
commit c294570
Show file tree

Hide file tree

Showing 12 changed files with 1,821 additions and 2,117 deletions.
diff --git a/include/leveled.hrl b/include/leveled.hrl
@@ -84,7 +84,7 @@
                         end_key :: tuple() | undefined,
                         owner :: pid()|list(),
                         filename :: string() | undefined,
-                        bloom :: binary() | none | undefined}).
+                        bloom = none :: leveled_ebloom:bloom() | none}).
 
 -record(cdb_options,
                         {max_size :: pos_integer() | undefined,

diff --git a/src/leveled_codec.erl b/src/leveled_codec.erl
@@ -18,7 +18,6 @@
         strip_to_keyseqonly/1,
         strip_to_indexdetails/1,
         striphead_to_v1details/1,
-        is_active/3,
         endkey_passed/2,
         key_dominates/2,
         maybe_reap_expiredkey/2,
@@ -48,7 +47,10 @@
         to_lookup/1,
         next_key/1,
         return_proxy/4,
-        get_metadata/1]).         
+        get_metadata/1,
+        maybe_accumulate/5,
+        accumulate_index/2,
+        count_tombs/2]).         
 
 -define(LMD_FORMAT, "~4..0w~2..0w~2..0w~2..0w~2..0w").
 -define(NRT_IDX, "$aae.").
@@ -251,22 +253,79 @@ striphead_to_v1details(V) ->
 get_metadata(LV) ->
     element(4, LV).
 
--spec key_dominates(ledger_kv(), ledger_kv()) -> 
-    left_hand_first|right_hand_first|left_hand_dominant|right_hand_dominant.
+-spec maybe_accumulate(
+        list(leveled_codec:ledger_kv()),
+        term(),
+        non_neg_integer(),
+        {pos_integer(), {non_neg_integer(), non_neg_integer()|infinity}},
+        leveled_penciller:pclacc_fun())
+            -> {term(), non_neg_integer()}.
+%% @doc
+%% Make an accumulation decision based on the date range and also the expiry
+%% status of the ledger key and value  Needs to handle v1 and v2 values.  When
+%% folding over heads -> v2 values, index-keys -> v1 values.
+maybe_accumulate([], Acc, Count, _Filter, _Fun) ->
+    {Acc, Count};
+maybe_accumulate(
+        [{K, {_SQN, {active, TS}, _SH, _MD, undefined}=V}|T],
+        Acc, Count, {Now, _ModRange}=Filter, AccFun)
+        when TS >= Now ->
+    maybe_accumulate(T, AccFun(K, V, Acc), Count + 1, Filter, AccFun);
+maybe_accumulate(
+        [{K, {_SQN, {active, TS}, _SH, _MD}=V}|T],
+        Acc, Count, {Now, _ModRange}=Filter, AccFun)
+        when TS >= Now ->
+    maybe_accumulate(T, AccFun(K, V, Acc), Count + 1, Filter, AccFun);
+maybe_accumulate(
+        [{_K, {_SQN, tomb, _SH, _MD, _LMD}}|T],
+        Acc, Count, Filter, AccFun) ->
+    maybe_accumulate(T, Acc, Count, Filter, AccFun);
+maybe_accumulate(
+        [{_K, {_SQN, tomb, _SH, _MD}}|T],
+        Acc, Count, Filter, AccFun) ->
+    maybe_accumulate(T, Acc, Count, Filter, AccFun);
+maybe_accumulate(
+        [{K, {_SQN, {active, TS}, _SH, _MD, LMD}=V}|T],
+        Acc, Count, {Now, {LowDate, HighDate}}=Filter, AccFun)
+        when TS >= Now, LMD >= LowDate, LMD =< HighDate ->
+    maybe_accumulate(T, AccFun(K, V, Acc), Count + 1, Filter, AccFun);
+maybe_accumulate(
+        [_LV|T],
+        Acc, Count, Filter, AccFun) ->
+    maybe_accumulate(T, Acc, Count, Filter, AccFun).
+
+-spec accumulate_index(
+        {boolean(), undefined|leveled_runner:mp()}, leveled_runner:acc_fun())
+            -> any().
+accumulate_index({false, undefined}, FoldKeysFun) ->
+    fun({?IDX_TAG, Bucket, _IndexInfo, ObjKey}, _Value, Acc) ->
+        FoldKeysFun(Bucket, ObjKey, Acc)
+    end;
+accumulate_index({true, undefined}, FoldKeysFun) ->
+    fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) ->
+        FoldKeysFun(Bucket, {IdxValue, ObjKey}, Acc)
+    end;
+accumulate_index({AddTerm, TermRegex}, FoldKeysFun) ->
+    fun({?IDX_TAG, Bucket, {_IdxFld, IdxValue}, ObjKey}, _Value, Acc) ->
+        case re:run(IdxValue, TermRegex) of
+            nomatch ->
+                Acc;
+            _ ->
+                case AddTerm of
+                    true ->
+                        FoldKeysFun(Bucket, {IdxValue, ObjKey}, Acc);
+                    false ->
+                        FoldKeysFun(Bucket, ObjKey, Acc)
+                end
+        end
+    end.
+
+-spec key_dominates(ledger_kv(), ledger_kv()) -> boolean().
 %% @doc
 %% When comparing two keys in the ledger need to find if one key comes before 
 %% the other, or if the match, which key is "better" and should be the winner
-key_dominates({LK, _LVAL}, {RK, _RVAL}) when LK < RK ->
-    left_hand_first;
-key_dominates({LK, _LVAL}, {RK, _RVAL}) when RK < LK ->
-    right_hand_first;
 key_dominates(LObj, RObj) ->
-    case strip_to_seqonly(LObj) >= strip_to_seqonly(RObj) of
-        true ->
-            left_hand_dominant;
-        false ->
-            right_hand_dominant
-    end.
+    strip_to_seqonly(LObj) >= strip_to_seqonly(RObj).
 
 -spec maybe_reap_expiredkey(ledger_kv(), {boolean(), integer()}) -> boolean().
 %% @doc
@@ -286,20 +345,18 @@ maybe_reap(tomb, {true, _CurrTS}) ->
 maybe_reap(_, _) ->
     false.
 
--spec is_active(ledger_key(), ledger_value(), non_neg_integer()) -> boolean().
-%% @doc
-%% Is this an active KV pair or has the timestamp expired
-is_active(Key, Value, Now) ->
-    case strip_to_statusonly({Key, Value}) of
-        {active, infinity} ->
-            true;
-        tomb ->
-            false;
-        {active, TS} when TS >= Now ->
-            true;
-        {active, _TS} ->
-            false
-    end.
+-spec count_tombs(
+        list(ledger_kv()), non_neg_integer()|not_counted) ->
+            non_neg_integer()|not_counted.
+count_tombs(_List, not_counted) ->
+    not_counted;
+count_tombs([], Count) ->
+    Count;
+count_tombs([{_K, V}|T], Count) when element(2, V) == tomb ->
+    count_tombs(T, Count + 1);
+count_tombs([_KV|T], Count) ->
+    count_tombs(T, Count).
+
 
 -spec from_ledgerkey(atom(), tuple()) -> false|tuple().
 %% @doc