Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'CW-2063-samplesheet' into 'dev'
add sample_sheet test Closes CW-2063 See merge request epi2melabs/workflows/wf-bacterial-genomes!61
- Loading branch information
Showing
6 changed files
with
104 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,43 +1,93 @@ | ||
"""Script to check that sample sheet is well-formatted.""" | ||
"""Check if a sample sheet is valid.""" | ||
import csv | ||
import sys | ||
|
||
import pandas as pd | ||
|
||
from .util import get_named_logger, wf_parser # noqa: ABS101 | ||
|
||
|
||
def main(args): | ||
"""Run entry point.""" | ||
logger = get_named_logger("check-sheet") | ||
"""Run the entry point.""" | ||
logger = get_named_logger("checkSheet") | ||
|
||
barcodes = [] | ||
aliases = [] | ||
sample_types = [] | ||
allowed_sample_types = [ | ||
"test_sample", "positive_control", "negative_control", "no_template_control" | ||
] | ||
|
||
try: | ||
logger.info(f"Reading {args.sample_sheet}.") | ||
samples = pd.read_csv(args.sample_sheet, sep=None) | ||
if 'alias' in samples.columns: | ||
if 'sample_id' in samples.columns: | ||
sys.stderr.write( | ||
"Warning: sample sheet contains both 'alias' and " | ||
'sample_id, using the former.') | ||
samples['sample_id'] = samples['alias'] | ||
if not set(['sample_id', 'barcode']).intersection(samples.columns): | ||
raise IOError() | ||
except Exception: | ||
raise IOError( | ||
"Could not parse sample sheet, it must contain two columns " | ||
"named 'barcode' and 'sample_id' or 'alias'.") | ||
# check duplicates | ||
dup_bc = samples['barcode'].duplicated() | ||
dup_sample = samples['sample_id'].duplicated() | ||
if any(dup_bc) or any(dup_sample): | ||
raise IOError( | ||
"Sample sheet contains duplicate values.") | ||
samples.to_csv(args.output, sep=",", index=False) | ||
logger.info(f"Written cleaned-up sheet to {args.output}.") | ||
with open(args.sample_sheet, "r") as f: | ||
csv_reader = csv.DictReader(f) | ||
n_row = 0 | ||
for row in csv_reader: | ||
n_row += 1 | ||
if n_row == 1: | ||
n_cols = len(row) | ||
else: | ||
# check we got the same number of fields | ||
if len(row) != n_cols: | ||
raise ValueError( | ||
f"Unexpected number of cells in row number {n_row}." | ||
) | ||
try: | ||
barcodes.append(row["barcode"]) | ||
except KeyError: | ||
sys.stdout.write("'barcode' column missing") | ||
sys.exit() | ||
try: | ||
aliases.append(row["alias"]) | ||
except KeyError: | ||
sys.stdout.write("'alias' column missing") | ||
sys.exit() | ||
try: | ||
sample_types.append(row["type"]) | ||
except KeyError: | ||
pass | ||
except Exception as e: | ||
sys.stdout.write(f"Parsing error: {e}") | ||
sys.exit() | ||
|
||
# check barcode and alias values are unique | ||
if len(barcodes) > len(set(barcodes)): | ||
sys.stdout.write("values in 'barcode' column not unique") | ||
sys.exit() | ||
if len(aliases) > len(set(aliases)): | ||
sys.stdout.write("values in 'alias' column not unique") | ||
sys.exit() | ||
|
||
if sample_types: | ||
# check if "type" column has unexpected values | ||
unexp_type_vals = set(sample_types) - set(allowed_sample_types) | ||
|
||
if unexp_type_vals: | ||
sys.stdout.write( | ||
f"found unexpected values in 'type' column: {unexp_type_vals}. " | ||
f"Allowed values are: {allowed_sample_types}" | ||
) | ||
sys.exit() | ||
|
||
if args.required_sample_types: | ||
for required_type in args.required_sample_types: | ||
if required_type not in allowed_sample_types: | ||
sys.stdout.write(f"Not an allowed sample type: {required_type}") | ||
sys.exit() | ||
if sample_types.count(required_type) < 1: | ||
sys.stdout.write( | ||
f"Sample sheet requires at least 1 of {required_type}") | ||
sys.exit() | ||
|
||
logger.info(f"Checked sample sheet {args.sample_sheet}.") | ||
|
||
|
||
def argparser(): | ||
"""Argument parser for entrypoint.""" | ||
parser = wf_parser("check-sample-sheet") | ||
parser.add_argument('sample_sheet') | ||
parser.add_argument('output') | ||
parser = wf_parser("check_sample_sheet") | ||
parser.add_argument("sample_sheet", help="Sample sheet to check") | ||
parser.add_argument( | ||
"--required_sample_types", | ||
help="List of required sample types. Each sample type provided must " | ||
"appear at least once in the sample sheet", | ||
nargs="*" | ||
) | ||
return parser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
barcode,alias,type | ||
barcode01,sample1,test_sample | ||
barcode02,sample2,test_sample |