Skip to content

Commit

Permalink
Merge branch 'tag' into 'dev'
Browse files Browse the repository at this point in the history
tag and template update

See merge request epi2melabs/workflows/wf-bacterial-genomes!42
  • Loading branch information
sarahjeeeze committed Oct 21, 2022
2 parents 7c8e932 + 5d1dbf3 commit 3eb3981
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 61 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [unreleased]

## [v0.2.6]
### Changes
- Added quast for assembly stats
- Remove sanitize option
### Fixes
- Update syntax to fix reference error

## [v0.2.5]
### Changes
Expand Down
50 changes: 1 addition & 49 deletions lib/fastqingress.nf
Original file line number Diff line number Diff line change
Expand Up @@ -87,42 +87,6 @@ def find_fastq(pattern, maxdepth)
}


/**
* Rework EPI2ME flattened directory structure into standard form
* files are matched on barcode\d+ and moved into corresponding
* subdirectories ready for processing.
*
* @param input_folder Top-level input directory.
* @param staging Top-level output_directory.
* @return A File object representating the staging directory created
* under output
*/
def sanitize_fastq(input_folder, staging)
{
// TODO: this fails if input_folder is an S3 path
log.info "Running sanitization."
log.info " - Moving files: ${input_folder} -> ${staging}"
staging.mkdirs()
files = find_fastq(input_folder.resolve("**"), 1)
for (fastq in files) {
fname = fastq.getFileName()
// find barcode
pattern = ~/barcode\d+/
matcher = fname =~ pattern
if (!matcher.find()) {
// not barcoded - leave alone
fastq.renameTo(staging.resolve(fname))
} else {
bc_dir = file(staging.resolve(matcher[0]))
bc_dir.mkdirs()
fastq.renameTo(staging.resolve("${matcher[0]}/${fname}"))
}
}
log.info " - Finished sanitization."
return staging
}


/**
* Take an input directory return the barcode and non barcode
* sub directories contained within.
Expand Down Expand Up @@ -345,8 +309,6 @@ def create_metamap(Map arguments) {
* @param input Top level input file or folder to locate fastq data.
* @param sample string to name single sample data.
* @param sample_sheet Path to sample sheet CSV file.
* @param sanitize regularize inputs from EPI2ME platform.
* @param output output location, required if sanitize==true
* @param min_barcode Minimum barcode to accept.
* @param max_barcode Maximum (inclusive) barcode to accept.
*
Expand All @@ -357,15 +319,11 @@ def fastq_ingress(Map arguments)
def parser = new ArgumentParser(
args:["input"],
kwargs:[
"sample":null, "sample_sheet":null, "sanitize":false, "output":null,
"sample":null, "sample_sheet":null,
"min_barcode":0, "max_barcode":Integer.MAX_VALUE],
name:"fastq_ingress")
Map margs = parser.parse_args(arguments)

if (margs.sanitize && margs.output == null) {
throw new Exception("Argument 'output' required if 'sanitize' is true.")
}


log.info "Checking fastq input."
input = file(margs.input)
Expand All @@ -382,12 +340,6 @@ def fastq_ingress(Map arguments)

// Handle directory input
if (input.isDirectory()) {
// EPI2ME harness
if (margs.sanitize) {
staging = file(margs.output).resolve("staging")
input = sanitize_fastq(input, staging)
}

// Get barcoded and non barcoded subdirectories
(barcoded, non_barcoded) = get_subdirectories(input)

Expand Down
4 changes: 1 addition & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -383,9 +383,7 @@ workflow {
samples = fastq_ingress([
"input":params.fastq,
"sample":params.sample,
"sample_sheet":params.sample_sheet,
"sanitize": params.sanitize_fastq,
"output":params.out_dir])
"sample_sheet":params.sample_sheet])

reference = params.reference
results = calling_pipeline(samples, reference)
Expand Down
3 changes: 1 addition & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@ params {
chunk_size = 1000000
run_prokka = true
prokka_opts = null
wfversion = "v0.2.5"
wfversion = "v0.2.6"
prokka_version = "1.14.5"
aws_image_prefix = null
aws_queue = null
report_name = "report"
sample = null
sample_sheet = null
sanitize_fastq = false
disable_ping = false
genome_size = 5000000

Expand Down
7 changes: 1 addition & 6 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,6 @@
"default": 5000000,
"description": "Estimated genome size to be used in assembly"
},
"sanitize_fastq": {
"type": "boolean",
"description": "Use additional heuristics to identify barcodes from file paths\"",
"help_text": "Enabling this option will group together files into samples by the presence of strings of the form `barcodeXXX` present in filenames, rather than simply files grouped into directories (as output by MinKNOW and the Guppy basecaller)"
},
"chunk_size": {
"type": "integer",
"default": 1000000,
Expand Down Expand Up @@ -139,7 +134,7 @@
},
"wfversion": {
"type": "string",
"default": "v0.2.5",
"default": "v0.2.6",
"hidden": true
},
"help": {
Expand Down

0 comments on commit 3eb3981

Please sign in to comment.