rust-bio · pmarks · Aug 23, 2021 · Aug 19, 2021 · Aug 20, 2021 · Aug 20, 2021
diff --git a/src/data_structures/bwt.rs b/src/data_structures/bwt.rs
@@ -73,7 +73,7 @@ pub fn invert_bwt(bwt: &BWTSlice) -> Vec<u8> {
 }
 
 /// An occurrence array implementation.
-#[derive(Serialize, Deserialize)]
+#[derive(Clone, Serialize, Deserialize)]
 pub struct Occ {
     occ: Vec<Vec<usize>>,
     k: u32,

diff --git a/src/data_structures/fmindex.rs b/src/data_structures/fmindex.rs
@@ -480,6 +480,21 @@ impl<DBWT: Borrow<BWT>, DLess: Borrow<Less>, DOcc: Borrow<Occ>> FMDIndex<DBWT, D
 
         self.backward_ext(&interval.swapped(), comp_a).swapped()
     }
+
+    /// Construct a new instance of the FMD index (see Heng Li (2012) Bioinformatics)
+    /// without checking whether the text is over the DNA alphabet with N.
+    /// This expects a BWT that was created from a text over the DNA alphabet with N
+    /// (`alphabets::dna::n_alphabet()`) consisting of the
+    /// concatenation with its reverse complement, separated by the sentinel symbol `$`.
+    /// I.e., let T be the original text and R be its reverse complement.
+    /// Then, the expected text is T$R$. Further, multiple concatenated texts are allowed, e.g.
+    /// T1$R1$T2$R2$T3$R3$.
+    /// It is unsafe to construct an FMD index from an FM index that is not built on the DNA alphabet.
+    pub unsafe fn from_fmindex_unchecked(
+        fmindex: FMIndex<DBWT, DLess, DOcc>,
+    ) -> FMDIndex<DBWT, DLess, DOcc> {
+        FMDIndex { fmindex }
+    }
 }
 
 #[cfg(test)]