Skip to content

Commit

Permalink
LibWeb: Use an ancestor filter to quickly reject many CSS selectors
Browse files Browse the repository at this point in the history
Given a selector like `.foo .bar #baz`, we know that elements with
the class names `foo` and `bar` must be present in the ancestor chain of
the candidate element, or the selector cannot match.

By keeping track of the current ancestor chain during style computation,
and which strings are used in tag names and attribute names, we can do
a quick check before evaluating the selector itself, to see if all the
required ancestors are present.

The way this works:

1. CSS::Selector now has a cache of up to 8 strings that must be present
   in the ancestor chain of a matching element. Note that we actually
   store string *hashes*, not the strings themselves.

2. When Document performs a recursive style update, we now push and pop
   elements to the ancestor chain stack as they are entered and exited.

3. When entering/exiting an ancestor, StyleComputer collects all the
   relevant string hashes from that ancestor element and updates a
   counting bloom filter.

4. Before evaluating a selector, we first check if any of the hashes
   required by the selector are definitely missing from the ancestor
   filter. If so, it cannot be a match, and we reject it immediately.

5. Otherwise, we carry on and evaluate the selector as usual.

I originally tried doing this with a HashMap, but we ended up losing
a huge chunk of the time saved to HashMap instead. As it turns out,
a simple counting bloom filter is way better at handling this.
The cost is a flat 8KB per StyleComputer, and since it's a bloom filter,
false positives are a thing.

This is extremely efficient, and allows us to quickly reject the
majority of selectors on many huge websites.

Some example rejection rates:
- https://amazon.com: 77%
- https://github.com/SerenityOS/serenity: 61%
- https://nytimes.com: 57%
- https://store.steampowered.com: 55%
- https://en.wikipedia.org: 45%
- https://youtube.com: 32%
- https://shopify.com: 25%

This also yields a chunky 37% speedup on StyleBench. :^)
  • Loading branch information
awesomekling committed Mar 22, 2024
1 parent e232a84 commit afe6abf
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 5 deletions.
47 changes: 47 additions & 0 deletions Userland/Libraries/LibWeb/CSS/Selector.cpp
Expand Up @@ -23,6 +23,53 @@ Selector::Selector(Vector<CompoundSelector>&& compound_selectors)
}
}
}

collect_ancestor_hashes();
}

void Selector::collect_ancestor_hashes()
{
size_t next_hash_index = 0;
auto append_unique_hash = [&](u32 hash) -> bool {
if (next_hash_index >= m_ancestor_hashes.size())
return true;
for (size_t i = 0; i < next_hash_index; ++i) {
if (m_ancestor_hashes[i] == hash)
return false;
}
m_ancestor_hashes[next_hash_index++] = hash;
return false;
};

auto last_combinator = m_compound_selectors.last().combinator;
for (ssize_t compound_selector_index = static_cast<ssize_t>(m_compound_selectors.size()) - 2; compound_selector_index >= 0; --compound_selector_index) {
auto const& compound_selector = m_compound_selectors[compound_selector_index];
if (last_combinator == Combinator::Descendant) {
for (auto const& simple_selector : compound_selector.simple_selectors) {
switch (simple_selector.type) {
case SimpleSelector::Type::Id:
case SimpleSelector::Type::Class:
if (append_unique_hash(simple_selector.name().hash()))
return;
break;
case SimpleSelector::Type::TagName:
if (append_unique_hash(simple_selector.qualified_name().name.name.hash()))
return;
break;
case SimpleSelector::Type::Attribute:
if (append_unique_hash(simple_selector.attribute().qualified_name.name.name.hash()))
return;
break;
default:
break;
}
}
}
last_combinator = compound_selector.combinator;
}

for (size_t i = next_hash_index; i < m_ancestor_hashes.size(); ++i)
m_ancestor_hashes[i] = 0;
}

// https://www.w3.org/TR/selectors-4/#specificity-rules
Expand Down
6 changes: 6 additions & 0 deletions Userland/Libraries/LibWeb/CSS/Selector.h
Expand Up @@ -242,12 +242,18 @@ class Selector : public RefCounted<Selector> {
u32 specificity() const;
String serialize() const;

auto const& ancestor_hashes() const { return m_ancestor_hashes; }

private:
explicit Selector(Vector<CompoundSelector>&&);

Vector<CompoundSelector> m_compound_selectors;
mutable Optional<u32> m_specificity;
Optional<Selector::PseudoElement> m_pseudo_element;

void collect_ancestor_hashes();

Array<u32, 8> m_ancestor_hashes;
};

String serialize_a_group_of_selectors(Vector<NonnullRefPtr<Selector>> const& selectors);
Expand Down
47 changes: 47 additions & 0 deletions Userland/Libraries/LibWeb/CSS/StyleComputer.cpp
Expand Up @@ -61,6 +61,7 @@
#include <LibWeb/CSS/StyleValues/TransformationStyleValue.h>
#include <LibWeb/CSS/StyleValues/UnresolvedStyleValue.h>
#include <LibWeb/CSS/StyleValues/UnsetStyleValue.h>
#include <LibWeb/DOM/Attr.h>
#include <LibWeb/DOM/Document.h>
#include <LibWeb/DOM/Element.h>
#include <LibWeb/DOM/ShadowRoot.h>
Expand Down Expand Up @@ -302,6 +303,17 @@ StyleComputer::RuleCache const& StyleComputer::rule_cache_for_cascade_origin(Cas
return true;
}

bool StyleComputer::should_reject_with_ancestor_filter(Selector const& selector) const
{
for (u32 hash : selector.ancestor_hashes()) {
if (hash == 0)
break;
if (!m_ancestor_filter.may_contain(hash))
return true;
}
return false;
}

Vector<MatchingRule> StyleComputer::collect_matching_rules(DOM::Element const& element, CascadeOrigin cascade_origin, Optional<CSS::Selector::PseudoElement::Type> pseudo_element) const
{
auto const& root_node = element.root();
Expand Down Expand Up @@ -358,6 +370,10 @@ Vector<MatchingRule> StyleComputer::collect_matching_rules(DOM::Element const& e
continue;

auto const& selector = rule_to_run.rule->selectors()[rule_to_run.selector_index];

if (should_reject_with_ancestor_filter(*selector))
continue;

if (rule_to_run.can_use_fast_matches) {
if (!SelectorEngine::fast_matches(selector, *rule_to_run.sheet, element))
continue;
Expand Down Expand Up @@ -2585,4 +2601,35 @@ void StyleComputer::compute_math_depth(StyleProperties& style, DOM::Element cons
style.set_math_depth(inherited_math_depth());
}

static void for_each_element_hash(DOM::Element const& element, auto callback)
{
callback(element.local_name().hash());
if (element.id().has_value())
callback(element.id().value().hash());
for (auto const& class_ : element.class_names())
callback(class_.hash());
element.for_each_attribute([&](auto& attribute) {
callback(attribute.local_name().hash());
});
}

void StyleComputer::reset_ancestor_filter()
{
m_ancestor_filter.clear();
}

void StyleComputer::push_ancestor(DOM::Element const& element)
{
for_each_element_hash(element, [&](u32 hash) {
m_ancestor_filter.increment(hash);
});
}

void StyleComputer::pop_ancestor(DOM::Element const& element)
{
for_each_element_hash(element, [&](u32 hash) {
m_ancestor_filter.decrement(hash);
});
}

}
59 changes: 58 additions & 1 deletion Userland/Libraries/LibWeb/CSS/StyleComputer.h
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2018-2024, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
Expand All @@ -20,6 +20,55 @@

namespace Web::CSS {

// A counting bloom filter with 2 hash functions.
// NOTE: If a counter overflows, it's kept maxed-out until the whole filter is cleared.
template<typename CounterType, size_t key_bits>
class CountingBloomFilter {
public:
CountingBloomFilter() { }

void clear() { __builtin_memset(m_buckets, 0, sizeof(m_buckets)); }

void increment(u32 key)
{
auto& first = bucket1(key);
if (first < NumericLimits<CounterType>::max())
++first;
auto& second = bucket2(key);
if (second < NumericLimits<CounterType>::max())
++second;
}

void decrement(u32 key)
{
auto& first = bucket1(key);
if (first < NumericLimits<CounterType>::max())
--first;
auto& second = bucket2(key);
if (second < NumericLimits<CounterType>::max())
--second;
}

[[nodiscard]] bool may_contain(u32 hash) const
{
return bucket1(hash) && bucket2(hash);
}

private:
static constexpr u32 bucket_count = 1 << key_bits;
static constexpr u32 key_mask = bucket_count - 1;

[[nodiscard]] u32 hash1(u32 key) const { return key & key_mask; }
[[nodiscard]] u32 hash2(u32 key) const { return (key >> 16) & key_mask; }

[[nodiscard]] CounterType& bucket1(u32 key) { return m_buckets[hash1(key)]; }
[[nodiscard]] CounterType& bucket2(u32 key) { return m_buckets[hash2(key)]; }
[[nodiscard]] CounterType bucket1(u32 key) const { return m_buckets[hash1(key)]; }
[[nodiscard]] CounterType bucket2(u32 key) const { return m_buckets[hash2(key)]; }

CounterType m_buckets[bucket_count];
};

// https://www.w3.org/TR/css-cascade/#origin
enum class CascadeOrigin : u8 {
Author,
Expand Down Expand Up @@ -69,6 +118,10 @@ class StyleComputer {
DOM::Document& document() { return m_document; }
DOM::Document const& document() const { return m_document; }

void reset_ancestor_filter();
void push_ancestor(DOM::Element const&);
void pop_ancestor(DOM::Element const&);

NonnullRefPtr<StyleProperties> create_document_style() const;

NonnullRefPtr<StyleProperties> compute_style(DOM::Element&, Optional<CSS::Selector::PseudoElement::Type> = {}) const;
Expand Down Expand Up @@ -103,6 +156,8 @@ class StyleComputer {
class FontLoader;
struct MatchingFontCandidate;

[[nodiscard]] bool should_reject_with_ancestor_filter(Selector const&) const;

RefPtr<StyleProperties> compute_style_impl(DOM::Element&, Optional<CSS::Selector::PseudoElement::Type>, ComputeStyleMode) const;
void compute_cascaded_values(StyleProperties&, DOM::Element&, Optional<CSS::Selector::PseudoElement::Type>, bool& did_match_any_pseudo_element_rules, ComputeStyleMode) const;
static RefPtr<Gfx::FontCascadeList const> find_matching_font_weight_ascending(Vector<MatchingFontCandidate> const& candidates, int target_weight, float font_size_in_pt, bool inclusive);
Expand Down Expand Up @@ -167,6 +222,8 @@ class StyleComputer {
Length::FontMetrics m_root_element_font_metrics;

CSSPixelRect m_viewport_rect;

CountingBloomFilter<u8, 14> m_ancestor_filter;
};

}
17 changes: 13 additions & 4 deletions Userland/Libraries/LibWeb/DOM/Document.cpp
Expand Up @@ -1105,11 +1105,14 @@ void Document::update_layout()
m_needs_layout = false;
}

[[nodiscard]] static CSS::RequiredInvalidationAfterStyleChange update_style_recursively(Node& node)
[[nodiscard]] static CSS::RequiredInvalidationAfterStyleChange update_style_recursively(Node& node, CSS::StyleComputer& style_computer)
{
bool const needs_full_style_update = node.document().needs_full_style_update();
CSS::RequiredInvalidationAfterStyleChange invalidation;

if (node.is_element())
style_computer.push_ancestor(static_cast<Element const&>(node));

// NOTE: If the current node has `display:none`, we can disregard all invalidation
// caused by its children, as they will not be rendered anyway.
// We will still recompute style for the children, though.
Expand All @@ -1125,7 +1128,7 @@ void Document::update_layout()
if (node.is_element()) {
if (auto* shadow_root = static_cast<DOM::Element&>(node).shadow_root_internal()) {
if (needs_full_style_update || shadow_root->needs_style_update() || shadow_root->child_needs_style_update()) {
auto subtree_invalidation = update_style_recursively(*shadow_root);
auto subtree_invalidation = update_style_recursively(*shadow_root, style_computer);
if (!is_display_none)
invalidation |= subtree_invalidation;
}
Expand All @@ -1134,7 +1137,7 @@ void Document::update_layout()

node.for_each_child([&](auto& child) {
if (needs_full_style_update || child.needs_style_update() || child.child_needs_style_update()) {
auto subtree_invalidation = update_style_recursively(child);
auto subtree_invalidation = update_style_recursively(child, style_computer);
if (!is_display_none)
invalidation |= subtree_invalidation;
}
Expand All @@ -1143,6 +1146,10 @@ void Document::update_layout()
}

node.set_child_needs_style_update(false);

if (node.is_element())
style_computer.pop_ancestor(static_cast<Element const&>(node));

return invalidation;
}

Expand All @@ -1165,7 +1172,9 @@ void Document::update_style()

evaluate_media_rules();

auto invalidation = update_style_recursively(*this);
style_computer().reset_ancestor_filter();

auto invalidation = update_style_recursively(*this, style_computer());
if (invalidation.rebuild_layout_tree) {
invalidate_layout();
} else {
Expand Down
10 changes: 10 additions & 0 deletions Userland/Libraries/LibWeb/Layout/TreeBuilder.cpp
Expand Up @@ -290,6 +290,14 @@ i32 TreeBuilder::calculate_list_item_index(DOM::Node& dom_node)

void TreeBuilder::create_layout_tree(DOM::Node& dom_node, TreeBuilder::Context& context)
{
if (dom_node.is_element())
dom_node.document().style_computer().push_ancestor(static_cast<DOM::Element const&>(dom_node));

ScopeGuard pop_ancestor_guard = [&] {
if (dom_node.is_element())
dom_node.document().style_computer().pop_ancestor(static_cast<DOM::Element const&>(dom_node));
};

JS::GCPtr<Layout::Node> layout_node;
Optional<TemporaryChange<bool>> has_svg_root_change;

Expand Down Expand Up @@ -469,6 +477,8 @@ JS::GCPtr<Layout::Node> TreeBuilder::build(DOM::Node& dom_node)
{
VERIFY(dom_node.is_document());

dom_node.document().style_computer().reset_ancestor_filter();

Context context;
m_quote_nesting_level = 0;
create_layout_tree(dom_node, context);
Expand Down

0 comments on commit afe6abf

Please sign in to comment.