From d17f823de1466c0fa2f21ae1dbdd1298a36744e6 Mon Sep 17 00:00:00 2001 From: Mitchell Robert Vollger Date: Tue, 11 Jan 2022 17:39:41 -0800 Subject: [PATCH] fix: added code to ignore commented lines in a bed file (#474) * Added code to ignore commented lines in a bed file and added a simple test function * Update CHANGELOG.md Co-authored-by: Michael Hall --- CHANGELOG.md | 3 +++ src/io/bed.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7ca6bcea..6d0d8cf98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). +# [Unreleased](https://github.com/rust-bio/rust-bio/compare/v0.39.0...HEAD) +- Fixed the bed reader to allow comment lines (@mrvollger). + # [0.35.0] - 2021-07-05 - Improved buffer control in Fasta and Fastq API (@natir). - Fixed an indexing bug in ArrayBackedIntervalTree (@wabain). diff --git a/src/io/bed.rs b/src/io/bed.rs index 08182eb73..f79c2ca65 100644 --- a/src/io/bed.rs +++ b/src/io/bed.rs @@ -54,6 +54,7 @@ impl Reader { inner: csv::ReaderBuilder::new() .delimiter(b'\t') .has_headers(false) + .comment(Some(b'#')) .from_reader(reader), } } @@ -391,6 +392,12 @@ mod tests { const BED_FILE: &[u8] = b"1\t5\t5000\tname1\tup 2\t3\t5005\tname2\tup +"; + const BED_FILE_COMMENT: &[u8] = b"\ +# this line should be ignored +1\t5\t5000\tname1\tup +# and this one as well +2\t3\t5005\tname2\tup "; const BED_FILE_COMPACT: &[u8] = b"1\t5\t5000\n2\t3\t5005\n"; @@ -413,6 +420,25 @@ mod tests { } } + #[test] + fn test_reader_with_comment() { + let chroms = ["1", "2"]; + let starts = [5, 3]; + let ends = [5000, 5005]; + let names = ["name1", "name2"]; + let scores = ["up", "up"]; + + let mut reader = Reader::new(BED_FILE_COMMENT); + for (i, r) in reader.records().enumerate() { + let record = r.expect("Error reading record"); + assert_eq!(record.chrom(), chroms[i]); + assert_eq!(record.start(), starts[i]); + assert_eq!(record.end(), ends[i]); + assert_eq!(record.name().expect("Error reading name"), names[i]); + assert_eq!(record.score().expect("Error reading score"), scores[i]); + } + } + #[test] fn test_reader_compact() { let chroms = ["1", "2"];