Skip to content

Commit

Permalink
Merge branch 'CW-3247_1' into 'dev'
Browse files Browse the repository at this point in the history
Fix build tables defect where sample was not taken into account when calculating bases mapped

Closes CW-3247

See merge request epi2melabs/workflows/wf-cas9!52
  • Loading branch information
nrhorner committed Jan 4, 2024
2 parents 7a7ce3c + 72de349 commit 8bfe7d6
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 7 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v1.0.1]
### Fixed
- Overestimation of kbases mapped values.
- Target coverage plots showing only one sample.

## [v1.0.0]
### Added
- Memory and CPU requirements for each process.
Expand Down
7 changes: 4 additions & 3 deletions bin/workflow_glue/build_tables.py
Expand Up @@ -59,8 +59,8 @@ def main(args):
read_stats_df = pd.read_csv(args.aln_summary, sep='\t', index_col=False)

df_read_to_target = df_read_to_target.merge(
read_stats_df[['name', 'read_length']],
left_on='read_id', right_on='name')
read_stats_df[['sample_id', 'name', 'read_length']],
left_on=['sample_id', 'read_id'], right_on=['sample_id', 'name'])

df_target_summary = pd.read_csv(
args.target_summary, sep='\t', names=header, index_col=False)
Expand Down Expand Up @@ -93,7 +93,8 @@ def main(args):
# Deletions and insertions within the reads will mean the actual value may
# vary slightly
kbases = (
df_read_to_target[['target', 'align_len']]
df_read_to_target[
df_read_to_target.sample_id == id_][['target', 'align_len']]
.groupby(['target']).sum() / 1000
)
kbases.columns = ['kbases']
Expand Down
12 changes: 10 additions & 2 deletions lib/ingress.nf
Expand Up @@ -307,14 +307,18 @@ process bamstats {
input:
tuple val(meta), path("reads.bam")
output:
tuple val(meta), path("reads.bam"), path("bamstats_results")
tuple val(meta),
path("reads.bam"),
path("bamstats_results")
script:
def bamstats_threads = Math.max(1, task.cpus - 1)
"""
mkdir bamstats_results
bamstats reads.bam -s $meta.alias -u \
-f bamstats_results/bamstats.flagstat.tsv -t $bamstats_threads \
--histograms histograms \
| bgzip > bamstats_results/bamstats.readstats.tsv.gz
mv histograms/* bamstats_results/
# extract the run IDs from the per-read stats
csvtk cut -tf runid bamstats_results/bamstats.readstats.tsv.gz \
Expand Down Expand Up @@ -451,7 +455,9 @@ process fastcat {
tuple val(meta), path("input")
val extra_args
output:
tuple val(meta), path("seqs.fastq.gz"), path("fastcat_stats")
tuple val(meta),
path("seqs.fastq.gz"),
path("fastcat_stats")
script:
String out = "seqs.fastq.gz"
String fastcat_stats_outdir = "fastcat_stats"
Expand All @@ -461,10 +467,12 @@ process fastcat {
-s ${meta["alias"]} \
-r >(bgzip -c > $fastcat_stats_outdir/per-read-stats.tsv.gz) \
-f $fastcat_stats_outdir/per-file-stats.tsv \
--histograms histograms \
$extra_args \
input \
| bgzip > $out
mv histograms/* $fastcat_stats_outdir
# extract the run IDs from the per-read stats
csvtk cut -tf runid $fastcat_stats_outdir/per-read-stats.tsv.gz \
| csvtk del-header | sort | uniq > $fastcat_stats_outdir/run_ids
Expand Down
1 change: 1 addition & 0 deletions main.nf
Expand Up @@ -511,6 +511,7 @@ workflow pipeline {
align_reads.out.bed)

tar_cov_tsv = target_coverage.out.target_coverage
.map {meta, target_cov -> target_cov}
.collectFile(name: 'target_coverage', keepHeader: true)

tile_cov = background.out.tiles_coverage.collectFile(name: 'tile_cov', keepHeader: true)
Expand Down
4 changes: 2 additions & 2 deletions nextflow.config
Expand Up @@ -42,7 +42,7 @@ params {
]

container_sha = "shaa7b95018145dc9c753d6092309ac6be5166a491a"
common_sha = "sha91452ece4f647f62b32dac3a614635a6f0d7f8b5"
common_sha = "sha399b89c275a4d8eac477a415691cb93180661be6"
}
}

Expand All @@ -53,7 +53,7 @@ manifest {
description = 'Summarise the results of Cas9 enrichment sequencing.'
mainScript = 'main.nf'
nextflowVersion = '>=23.04.2'
version = 'v1.0.0'
version = 'v1.0.1'

}

Expand Down

0 comments on commit 8bfe7d6

Please sign in to comment.