Skip to content

Commit

Permalink
ZeroTriePerfectHash: Increase bounds of p for newly found hard case (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
sffc committed May 11, 2024
1 parent fbd61a3 commit 1ada2eb
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -62,6 +62,7 @@
- `zerotrie`
- Add `as_borrowed_slice` and `AsRef` impl (https://github.com/unicode-org/icu4x/pull/4381)
- Add `ZeroTrieSimpleAsciiCursor` for manual iteration (https://github.com/unicode-org/icu4x/pull/4383)
- Increase bound of `p` to solve more perfect hash functions; _might_ break serialized ZeroTriePerfectHash from previous versions (https://github.com/unicode-org/icu4x/pull/4888)
- `zerovec`
- Change `ZeroHashMap` to use `twox-hash` (https://github.com/unicode-org/icu4x/pull/4592)
- `writeable`
Expand Down
10 changes: 6 additions & 4 deletions utils/zerotrie/src/byte_phf/builder.rs
Expand Up @@ -26,7 +26,7 @@ pub fn find(bytes: &[u8]) -> Result<(u8, Vec<u8>), Error> {

let mut bqs = vec![0u8; N];
let mut seen = vec![false; N];
let mut max_allowable_p = P_FAST_MAX;
let max_allowable_p = P_FAST_MAX;
let mut max_allowable_q = Q_FAST_MAX;

'p_loop: loop {
Expand Down Expand Up @@ -67,13 +67,15 @@ pub fn find(bytes: &[u8]) -> Result<(u8, Vec<u8>), Error> {
num_max_q += 1;
bqs[i] = 0;
if i == 0 || num_max_q > MAX_L2_SEARCH_MISSES {
if p == max_allowable_p && max_allowable_p != P_REAL_MAX {
if p == max_allowable_p && max_allowable_q != Q_REAL_MAX {
// println!("Could not solve fast function: trying again: {bytes:?}");
max_allowable_p = P_REAL_MAX;
max_allowable_q = Q_REAL_MAX;
p = 0;
continue 'p_loop;
} else if p == P_REAL_MAX {
} else if p == max_allowable_p {
// If a fallback algorithm for `p` is added, relax this assertion
// and re-run the loop with a higher `max_allowable_p`.
debug_assert_eq!(max_allowable_p, P_REAL_MAX);
// println!("Could not solve PHF function");
return Err(Error::CouldNotSolvePerfectHash);
} else {
Expand Down
35 changes: 26 additions & 9 deletions utils/zerotrie/src/byte_phf/mod.rs
Expand Up @@ -59,14 +59,16 @@ mod cached_owned;
pub use cached_owned::PerfectByteHashMapCacheOwned;

/// The cutoff for the fast version of [`f1`].
const P_FAST_MAX: u8 = 11;
#[cfg(feature = "alloc")] // used in the builder code
const P_FAST_MAX: u8 = 95;

/// The cutoff for the fast version of [`f2`].
const Q_FAST_MAX: u8 = 95;

/// The maximum allowable value of `p`. This could be raised if found to be necessary.
/// Values exceeding P_FAST_MAX could use a different `p` algorithm by modifying [`f1`].
#[cfg(feature = "alloc")] // used in the builder code
const P_REAL_MAX: u8 = 15;
const P_REAL_MAX: u8 = P_FAST_MAX;

/// The maximum allowable value of `q`. This could be raised if found to be necessary.
#[cfg(feature = "alloc")] // used in the builder code
Expand Down Expand Up @@ -118,13 +120,10 @@ pub fn f1(byte: u8, p: u8, n: usize) -> usize {
if p == 0 {
byte as usize % n
} else {
let mut result = byte ^ p ^ byte.wrapping_shr(p as u32);
// In almost all cases, the PHF works with the above constant-time operation.
// However, to crack a few difficult cases, we fall back to the linear-time
// operation shown below.
for _ in P_FAST_MAX..p {
result = result ^ (result << 1) ^ (result >> 1);
}
// `p` always uses the below constant-time operation. If needed, we
// could add some other operation here with `p > P_FAST_MAX` to solve
// difficult cases if the need arises.
let result = byte ^ p ^ byte.wrapping_shr(p as u32);
result as usize % n
}
}
Expand Down Expand Up @@ -359,6 +358,24 @@ mod tests {
assert!(count_fastq >= count_slowq * 100);
}

#[test]
fn test_hard_cases() {
let keys = [
0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108,
109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
126, 195, 196,
];

let computed = PerfectByteHashMap::try_new(&keys).unwrap();
let (p, qmax) = computed.p_qmax().unwrap();
assert_eq!(p, 69);
assert_eq!(qmax, 67);
}

#[test]
fn test_build_read_small() {
#[derive(Debug)]
Expand Down

0 comments on commit 1ada2eb

Please sign in to comment.