Skip to content

Commit

Permalink
perf: improve alignment::distance::levenshtein and alignment::distanc…
Browse files Browse the repository at this point in the history
…e::bounded_levenshtein on strrings where distance is small (#522)

* added O(nk) algorithm for edit distance with simd

* small edits

* small edits

* format fixing

* update dependencies

* increase MSRV

---------

Co-authored-by: Johannes Köster <johannes.koester@tu-dortmund.de>
Co-authored-by: Johannes Koester <johannes.koester@uk-essen.de>
  • Loading branch information
3 people committed Jun 14, 2023
1 parent 15cb044 commit da7daea
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Expand Up @@ -86,7 +86,7 @@ jobs:
needs: Formatting
runs-on: ubuntu-latest
env:
MSRV_VERSION: 1.60.0
MSRV_VERSION: 1.62.0
steps:
- name: Checkout repository
uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Expand Up @@ -53,6 +53,7 @@ triple_accel = ">=0.3, <0.5"
thiserror = "1"
anyhow = "1"
rand = ">=0.7.3, < 0.9"
editdistancek = ">=1.0.1, <2"

[dependencies.vec_map]
version = "0.8"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -57,7 +57,7 @@ For extra credit, feel free to familiarize yourself with:

## Minimum supported Rust version

Currently the minimum supported Rust version is 1.60.0.
Currently the minimum supported Rust version is 1.62.0.

## License

Expand Down
38 changes: 11 additions & 27 deletions src/alignment/distance.rs
Expand Up @@ -6,8 +6,6 @@
//! Various subroutines for computing a distance between sequences. Features
//! both scalar and efficient vectorized distance functions with SIMD.

use std::cmp::min;

use crate::utils::TextSlice;

/// Compute the Hamming distance between two strings. Complexity: O(n).
Expand Down Expand Up @@ -59,30 +57,7 @@ pub fn hamming(alpha: TextSlice<'_>, beta: TextSlice<'_>) -> u64 {
/// ```
#[allow(unused_assignments)]
pub fn levenshtein(alpha: TextSlice<'_>, beta: TextSlice<'_>) -> u32 {
let mut columns = [vec![0u32; alpha.len() + 1], vec![0u32; alpha.len() + 1]];
let mut i_prev = 0;
let mut i_cur = 1;

for i in 0..columns[0].len() {
columns[0][i] = i as u32;
}

for (j, item) in beta.iter().enumerate() {
i_cur %= 2;
i_prev = 1 - i_cur;

columns[i_cur][0] = 1 + j as u32;
for i in 1..columns[0].len() {
columns[i_cur][i] = min(
columns[i_prev][i - 1] + if alpha[i - 1] == *item { 0 } else { 1 },
min(columns[i_cur][i - 1] + 1, columns[i_prev][i] + 1),
);
}

i_cur += 1;
}

columns[i_cur - 1][columns[0].len() - 1]
editdistancek::edit_distance(alpha, beta) as u32
}

pub mod simd {
Expand All @@ -106,6 +81,7 @@ pub mod simd {
//! smaller vectors.

use crate::utils::TextSlice;
use std::cmp::{max, min};

/// SIMD-accelerated Hamming distance between two strings. Complexity: O(n / w), for
/// SIMD vectors of length w (usually w = 16 or w = 32).
Expand Down Expand Up @@ -187,7 +163,15 @@ pub mod simd {
/// assert_eq!(ldist, None);
/// ```
pub fn bounded_levenshtein(alpha: TextSlice<'_>, beta: TextSlice<'_>, k: u32) -> Option<u32> {
triple_accel::levenshtein::levenshtein_simd_k(alpha, beta, k)
if let Some(x) = editdistancek::edit_distance_bounded(
alpha,
beta,
min(k as usize, max(alpha.len(), beta.len())),
) {
Some(x as u32)
} else {
None
}

Check warning on line 174 in src/alignment/distance.rs

View workflow job for this annotation

GitHub Actions / clippy

manual implementation of `Option::map`

warning: manual implementation of `Option::map` --> src/alignment/distance.rs:166:9 | 166 | / if let Some(x) = editdistancek::edit_distance_bounded( 167 | | alpha, 168 | | beta, 169 | | min(k as usize, max(alpha.len(), beta.len())), ... | 173 | | None 174 | | } | |_________^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#manual_map = note: `#[warn(clippy::manual_map)]` on by default help: try this | 166 ~ editdistancek::edit_distance_bounded( 167 + alpha, 168 + beta, 169 + min(k as usize, max(alpha.len(), beta.len())), 170 + ).map(|x| x as u32) |
}
}

Expand Down

0 comments on commit da7daea

Please sign in to comment.