Skip to content

Commit

Permalink
Improve lazy not iterator perf (#1669)
Browse files Browse the repository at this point in the history
* Refactor `is_valid(id)` logic.

* Fix `CollectionFilteringTest` and `CollectionJoinTest`.

* Refactor not equals iterator logic.

* Fix `CollectionJoinTest.SortByReference`.

* Fix `filter_result_iterator_t::contains_atleast_one`.

* Refactor `filter_result_iterator_t::and_filter_iterators`.

* Refactor `filter_result_iterator_t::contains_atleast_one`.

* Update `approx_filter_ids_length` in not equals.
  • Loading branch information
happy-san committed Apr 18, 2024
1 parent 4a4e50b commit fca277c
Show file tree
Hide file tree
Showing 5 changed files with 547 additions and 489 deletions.
48 changes: 28 additions & 20 deletions include/filter_result_iterator.h
Expand Up @@ -254,11 +254,10 @@ class filter_result_iterator_t {
std::vector<std::vector<posting_list_t::iterator_t>> posting_list_iterators;
std::vector<posting_list_t*> expanded_plists;

/// Used in case of a not equals string filter.
/// The iterative logic to find not equals match is to return the ids that occur in between the equals match. This
/// might lead to returning some ids that are deleted. So we use this iterator to check and return only the ids that
/// exist in `index->seq_ids`.
id_list_t::iterator_t all_seq_ids_iter = id_list_t::iterator_t(nullptr, nullptr, nullptr, false);
bool is_not_equals_iterator = false;
uint32_t equals_iterator_id = 0;
bool is_equals_iterator_valid = true;
uint32_t last_valid_id = 0;

/// Used in case of a single boolean filter matching more than `bool_filter_ids_threshold` ids.
num_tree_t::iterator_t bool_iterator = num_tree_t::iterator_t(nullptr, NUM_COMPARATOR::EQUALS, 0);
Expand All @@ -279,10 +278,6 @@ class filter_result_iterator_t {
/// Advances all the token iterators that are at seq_id and finds the next intersection.
void advance_string_filter_token_iterators();

/// Finds the first match for a filter on string field. Only used in `init()` and `reset()`. Handles `!` in string
/// filter.
void get_string_filter_first_match(const bool& field_is_array);

/// Finds the next match for a filter on string field.
void get_string_filter_next_match(const bool& field_is_array);

Expand All @@ -295,6 +290,10 @@ class filter_result_iterator_t {
/// Updates `validity` of the iterator to `timed_out` if condition is met. Assumes `timeout_info` is not null.
inline bool is_timed_out();

/// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during
/// this operation.
void skip_to(uint32_t id);

public:
uint32_t seq_id = 0;
/// Collection name -> references
Expand Down Expand Up @@ -330,16 +329,18 @@ class filter_result_iterator_t {
/// Recursively computes the result of each node and stores the final result in the root node.
void compute_iterators();

/// Returns a tri-state:
/// 0: id is not valid
/// 1: id is valid
/// -1: end of iterator / timed out
/// Handles moving the individual iterators to id internally and checks if `id` matches the filter.
///
/// Handles moving the individual iterators internally.
[[nodiscard]] int is_valid(uint32_t id);
/// \return
/// 0 : id is not valid
/// 1 : id is valid
/// -1: end of iterator / timed out
[[nodiscard]] int is_valid(uint32_t id, const bool& override_timeout = false);

/// Advances the iterator to get the next value of doc and reference. The iterator may become invalid during this
/// operation.
///
/// Should only be called after calling `compute_iterators()` or in conjunction with `is_valid(id)` when it returns `1`.
void next();

/// Collects n doc ids while advancing the iterator. The ids present in excluded_result_ids are ignored. The
Expand All @@ -349,17 +350,16 @@ class filter_result_iterator_t {
uint32_t const* const excluded_result_ids, const size_t& excluded_result_ids_size,
filter_result_t*& result, const bool& override_timeout = false);

/// Advances the iterator until the doc value reaches or just overshoots id. The iterator may become invalid during
/// this operation.
void skip_to(uint32_t id, const bool& override_timeout = false);

/// Returns true if at least one id from the posting list object matches the filter.
bool contains_atleast_one(const void* obj);

/// Returns to the initial state of the iterator.
void reset(const bool& override_timeout = false);

/// Iterates and collects all the filter ids into filter_array.
/// Copies filter ids from `filter_result` into `filter_array`.
///
/// Should only be called after calling `compute_iterators()`.
///
/// \return size of the filter array
uint32_t to_filter_id_array(uint32_t*& filter_array);

Expand All @@ -383,4 +383,12 @@ class filter_result_iterator_t {
[[nodiscard]] filter_result_iterator_t* _get_right_it() const {
return right_it;
}

[[nodiscard]] uint32_t _get_equals_iterator_id() const {
return equals_iterator_id;
}

[[nodiscard]] bool _get_is_equals_iterator_valid() const {
return is_equals_iterator_valid;
}
};

0 comments on commit fca277c

Please sign in to comment.