-
Notifications
You must be signed in to change notification settings - Fork 22
/
main.wdl
172 lines (145 loc) · 7.13 KB
/
main.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
version 1.0
import "humanwgs_structs.wdl"
import "wdl-common/wdl/workflows/backend_configuration/backend_configuration.wdl" as BackendConfiguration
import "sample_analysis/sample_analysis.wdl" as SampleAnalysis
import "cohort_analysis/cohort_analysis.wdl" as CohortAnalysis
import "tertiary_analysis/tertiary_analysis.wdl" as TertiaryAnalysis
workflow humanwgs {
input {
Cohort cohort
ReferenceData reference
SlivarData? slivar_data
String deepvariant_version = "1.5.0"
DeepVariantModel? deepvariant_model
Int? pbsv_call_mem_gb
Int? glnexus_mem_gb
Boolean run_tertiary_analysis = false
# Backend configuration
String backend
String? zones
String? aws_spot_queue_arn
String? aws_on_demand_queue_arn
String? container_registry
Boolean preemptible
}
call BackendConfiguration.backend_configuration {
input:
backend = backend,
zones = zones,
aws_spot_queue_arn = aws_spot_queue_arn,
aws_on_demand_queue_arn = aws_on_demand_queue_arn,
container_registry = container_registry
}
RuntimeAttributes default_runtime_attributes = if preemptible then backend_configuration.spot_runtime_attributes else backend_configuration.on_demand_runtime_attributes
scatter (sample in cohort.samples) {
call SampleAnalysis.sample_analysis {
input:
sample = sample,
reference = reference,
deepvariant_version = deepvariant_version,
deepvariant_model = deepvariant_model,
default_runtime_attributes = default_runtime_attributes
}
}
if (length(cohort.samples) > 1) {
scatter (sample in cohort.samples) {
String sample_id = sample.sample_id
}
call CohortAnalysis.cohort_analysis {
input:
cohort_id = cohort.cohort_id,
sample_ids = sample_id,
aligned_bams = flatten(sample_analysis.aligned_bams),
svsigs = flatten(sample_analysis.svsigs),
gvcfs = sample_analysis.small_variant_gvcf,
reference = reference,
pbsv_call_mem_gb = pbsv_call_mem_gb,
glnexus_mem_gb = glnexus_mem_gb,
default_runtime_attributes = default_runtime_attributes
}
}
if (run_tertiary_analysis && defined(slivar_data) && defined(reference.gnomad_af) && defined(reference.hprc_af) && defined(reference.gff) && defined(reference.population_vcfs)) {
IndexData slivar_small_variant_input_vcf = select_first([
cohort_analysis.phased_joint_small_variant_vcf,
sample_analysis.phased_small_variant_vcf[0]
])
IndexData slivar_sv_input_vcf = select_first([
cohort_analysis.phased_joint_sv_vcf,
sample_analysis.phased_sv_vcf[0]
])
call TertiaryAnalysis.tertiary_analysis {
input:
cohort = cohort,
small_variant_vcf = slivar_small_variant_input_vcf,
sv_vcf = slivar_sv_input_vcf,
reference = reference,
slivar_data = select_first([slivar_data]),
default_runtime_attributes = default_runtime_attributes
}
}
output {
# sample_analysis output
# per movie stats, alignments
Array[Array[File]] bam_stats = sample_analysis.bam_stats
Array[Array[File]] read_length_summary = sample_analysis.read_length_summary
Array[Array[File]] read_quality_summary = sample_analysis.read_quality_summary
# per sample small variant calls
Array[IndexData] small_variant_gvcfs = sample_analysis.small_variant_gvcf
Array[File] small_variant_vcf_stats = sample_analysis.small_variant_vcf_stats
Array[File] small_variant_roh_out = sample_analysis.small_variant_roh_out
Array[File] small_variant_roh_bed = sample_analysis.small_variant_roh_bed
# per sample final phased variant calls and haplotagged alignments
Array[IndexData] sample_phased_small_variant_vcfs = sample_analysis.phased_small_variant_vcf
Array[IndexData] sample_phased_sv_vcfs = sample_analysis.phased_sv_vcf
Array[File] sample_hiphase_stats = sample_analysis.hiphase_stats
Array[File] sample_hiphase_blocks = sample_analysis.hiphase_blocks
Array[File] sample_hiphase_haplotags = sample_analysis.hiphase_haplotags
Array[IndexData] merged_haplotagged_bam = sample_analysis.merged_haplotagged_bam
Array[File] haplotagged_bam_mosdepth_summary = sample_analysis.haplotagged_bam_mosdepth_summary
Array[File] haplotagged_bam_mosdepth_region_bed = sample_analysis.haplotagged_bam_mosdepth_region_bed
# per sample trgt outputs
Array[IndexData] trgt_spanning_reads = sample_analysis.trgt_spanning_reads
Array[IndexData] trgt_repeat_vcf = sample_analysis.trgt_repeat_vcf
Array[File] trgt_dropouts = sample_analysis.trgt_dropouts
# per sample cpg outputs
Array[Array[File]] cpg_pileup_beds = sample_analysis.cpg_pileup_beds
Array[Array[File]] cpg_pileup_bigwigs = sample_analysis.cpg_pileup_bigwigs
# per sample paraphase outputs
Array[File] paraphase_output_jsons = sample_analysis.paraphase_output_json
Array[IndexData] paraphase_realigned_bams = sample_analysis.paraphase_realigned_bam
Array[Array[File]] paraphase_vcfs = sample_analysis.paraphase_vcfs
# per sample hificnv outputs
Array[IndexData] hificnv_vcfs = sample_analysis.hificnv_vcf
Array[File] hificnv_copynum_bedgraphs = sample_analysis.hificnv_copynum_bedgraph
Array[File] hificnv_depth_bws = sample_analysis.hificnv_depth_bw
Array[File] hificnv_maf_bws = sample_analysis.hificnv_maf_bw
# cohort_analysis output
IndexData? cohort_sv_vcf = cohort_analysis.phased_joint_sv_vcf
IndexData? cohort_small_variant_vcf = cohort_analysis.phased_joint_small_variant_vcf
File? cohort_hiphase_stats = cohort_analysis.hiphase_stats
File? cohort_hiphase_blocks = cohort_analysis.hiphase_blocks
# tertiary_analysis output
IndexData? filtered_small_variant_vcf = tertiary_analysis.filtered_small_variant_vcf
IndexData? compound_het_small_variant_vcf = tertiary_analysis.compound_het_small_variant_vcf
File? filtered_small_variant_tsv = tertiary_analysis.filtered_small_variant_tsv
File? compound_het_small_variant_tsv = tertiary_analysis.compound_het_small_variant_tsv
IndexData? filtered_svpack_vcf = tertiary_analysis.filtered_svpack_vcf
File? filtered_svpack_tsv = tertiary_analysis.filtered_svpack_tsv
}
parameter_meta {
cohort: {help: "Sample information for the cohort"}
reference: {help: "Reference genome data"}
slivar_data: {help: "Data files used for annotation with slivar (required if `run_tertiary_analysis` is set to `true`)"}
deepvariant_version: {help: "Version of deepvariant to use"}
deepvariant_model: {help: "Optional deepvariant model file to use"}
pbsv_call_mem_gb: {help: "Optional amount of RAM in GB for pbsv_call; default 64 for cohorts N<=3, 96 for cohorts N>3"}
glnexus_mem_gb: {help: "Optional amount of RAM in GB for glnexus; default 30"}
run_tertiary_analysis: {help: "Run the optional tertiary analysis steps"}
backend: {help: "Backend where the workflow will be executed ['GCP', 'Azure', 'AWS', 'HPC']"}
zones: {help: "Zones where compute will take place; required if backend is set to 'AWS' or 'GCP'"}
aws_spot_queue_arn: {help: "Queue ARN for the spot batch queue; required if backend is set to 'AWS'"}
aws_on_demand_queue_arn: {help: "Queue ARN for the on demand batch queue; required if backend is set to 'AWS'"}
container_registry: {help: "Container registry where workflow images are hosted. If left blank, PacBio's public Quay.io registry will be used."}
preemptible: {help: "Where possible, run tasks preemptibly"}
}
}