-
Notifications
You must be signed in to change notification settings - Fork 4
/
nextflow_schema.json
199 lines (199 loc) · 14.7 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json",
"title": "epi2me-labs/wf-flu",
"description": "Influenza A&B typing and analysis from Nanopore data.",
"demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-flu/wf-flu-demo.tar.gz",
"url": "https://github.com/epi2me-labs/wf-flu",
"type": "object",
"definitions": {
"input": {
"title": "Input Options",
"type": "object",
"fa_icon": "fas fa-arrow-right",
"description": "Parameters for finding and handling input data for analysis.",
"properties": {
"fastq": {
"type": "string",
"format": "path",
"description": "FASTQ files to use in the analysis.",
"help_text": "This accepts one of three cases: (i) the path to a single FASTQ file; (ii) the path to a top-level directory containing FASTQ files; (iii) the path to a directory containing one level of sub-directories which in turn contain FASTQ files. In the first and second case, a sample name can be supplied with `--sample`. In the last case, the data is assumed to be multiplexed with the names of the sub-directories as barcodes. In this case, a sample sheet can be provided with `--sample_sheet`."
},
"analyse_unclassified": {
"type": "boolean",
"default": false,
"description": "Analyse unclassified reads from input directory. By default the workflow will not process reads in the unclassified directory.",
"help_text": "If selected and if the input is a multiplex directory the workflow will also process the unclassified directory."
}
},
"required": [
"fastq"
]
},
"samples": {
"title": "Sample Options",
"type": "object",
"fa_icon": "fas fa-vials",
"description": "Parameters that relate to samples such as sample sheets and sample names.",
"properties": {
"sample_sheet": {
"type": "string",
"format": "path",
"description": "A CSV file used to map barcodes to sample aliases. The sample sheet can be provided when the input data is a directory containing sub-directories with FASTQ files.",
"help_text": "The sample sheet is a CSV file with, minimally, columns named `barcode` and `alias`. Extra columns are allowed. A `type` column is required for certain workflows and should have the following values; `test_sample`, `positive_control`, `negative_control`, `no_template_control`."
},
"sample": {
"type": "string",
"description": "A single sample name for non-multiplexed data. Permissible if passing a single .fastq(.gz) file or directory of .fastq(.gz) files."
}
}
},
"output": {
"title": "Output Options",
"type": "object",
"fa_icon": "fas fa-arrow-left",
"description": "Parameters for saving and naming workflow outputs.",
"properties": {
"out_dir": {
"type": "string",
"default": "output",
"format": "directory-path",
"description": "Directory for output of all workflow results."
}
}
},
"advanced_options": {
"title": "Advanced Options",
"type": "object",
"fa_icon": "far fa-question-circle",
"description": "Advanced options for configuring processes inside the workflow.",
"help_text": "These advanced options do not need to be changed for typical use, but allow fine tuning of workflows for users who want more control over the workflow.",
"properties": {
"reference": {
"type": "string",
"format": "file-path",
"description": "Enter the full path to a custom reference genome you would like to use.",
"help_text": "The workflow defaults to the IRMA consensus reference. This option allows you to specify a path to an alterative reference."
},
"blastdb": {
"type": "string",
"format": "file-path",
"description": "blastdb file used for typing.",
"help_text": "The workflow provides the INSaFLU blastdb. If you would like to supply an alterative then provide the full path to the file here."
},
"min_coverage": {
"type": "integer",
"description": "Coverage threshold for masking bases in the consensus.",
"help_text": "Any bases that are covered below 20x are masked (i.e. represented by 'N') by default in the consensus, this threshold can be changed using this parameter.",
"min": 0,
"default": 20
},
"min_qscore": {
"type": "number",
"description": "Minimum read quality score for fastcat.",
"help_text": "Any reads which are below quality score of 9 are not used by default. This parameter allows you to customise that. For more information on quality scores please see this blog post: https://labs.epi2me.io/quality-scores",
"min": 0,
"default": 9
},
"downsample": {
"type": "integer",
"description": "Number of reads to downsample to in each direction, leave blank for no downsampling.",
"help_text": "By default the workflow will use all high quality reads for typing, however, in experiements where you have a lot of data you might want to restrict the amount of reads processed to speed up the time to answer. Specify the number of reads you would like to downsample to here.",
"min": 1
},
"align_threads": {
"type": "number",
"description": "Number of CPU threads to use per alignment task.",
"help_text": "The total CPU resource used by the workflow is constrained by the executor configuration.",
"min": 1,
"default": 4
}
}
},
"misc": {
"title": "Miscellaneous Options",
"type": "object",
"description": "Everything else.",
"default": "",
"properties": {
"disable_ping": {
"type": "boolean",
"default": false,
"description": "Enable to prevent sending a workflow ping."
},
"help": {
"type": "boolean",
"description": "Display help text.",
"fa_icon": "fas fa-question-circle",
"hidden": true
},
"version": {
"type": "boolean",
"description": "Display version and exit.",
"fa_icon": "fas fa-question-circle",
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input"
},
{
"$ref": "#/definitions/samples"
},
{
"$ref": "#/definitions/output"
},
{
"$ref": "#/definitions/advanced_options"
},
{
"$ref": "#/definitions/misc"
}
],
"properties": {
"process_label": {
"type": "string",
"description": "The main process label for processes to use by default",
"hidden": true,
"default": "wfflu"
},
"aws_image_prefix": {
"type": "string",
"hidden": true
},
"aws_queue": {
"type": "string",
"hidden": true
},
"_reference": {
"type": "string",
"hidden": true
},
"_blastdb": {
"type": "string",
"hidden": true
},
"wfversion": {
"type": "string",
"default": "v0.0.5",
"hidden": true
},
"monochrome_logs": {
"type": "boolean"
},
"validate_params": {
"type": "boolean",
"default": true
},
"show_hidden_params": {
"type": "boolean"
}
},
"docs": {
"intro": "## Introduction\n\nInfluenza is a single-stranded RNA virus and contains a 13.5-14.5kb genome which is split into 8 segments encoding 10-14 proteins (dependent on strain).\n\nThe virus is classified using two proteins found on the outer surface of the viral capsid. You\u2019ve probably heard of H1N1 Influenza for example. The H represents hemagglutinin and the N is neuraminidase.\n\nThe Oxford Nanopore Technologies protocol listed [here](https://community.nanoporetech.com/docs/prepare/library_prep_protocols/ligation-sequencing-influenza-whole-genome) amplifies segments of the Influenza Type A and Type B genomes. Using this analysis workflow, users can determine the most likely strain of Influenza to which the sample being sequenced belongs.\n\n### Data Analysis\n\nWorkflow steps:\n1. Concatenate reads & filter out short reads < 200 bases long\n2. Align reads to reference (minimap2)\n3. Coverage calculations (samtools)\n4. Call variants with medaka\n5. Make a (coverage masked) consensus with bcftools\n6. Type with abricate\n\n#### Downsampling\n\nDownsampling is optional\n\nFor every segment in the reference genome:\n1. Get the length\n2. Work out +/- 10%\n3. Filter reads within that segment and within +/- 10% of segment length\n\n#### Typing\n\nTyping is carried out using [abricate](https://github.com/tseemann/abricate) using the insaflu database containing the following sequences:\n\n| Database|Gene|Accession|Details|\n|----|----|----|----|\n|insaflu|M1|MK576795|Type_A MK576795 A/England/7821/2019 2019/01/03 7 (MP)\n|insaflu|M1|AF100378|Type_B AF100378.1 Influenza B virus B/Yamagata/16/88 segment 7 M1 matrix protein (M) and BM2 protein (BM2) genes, complete cds\n|insaflu|HA|FJ966974|H1 FJ966974.1 Influenza A virus (A/California/07/2009(H1N1)) segment 4 hemagglutinin (HA) gene, complete cds\n|insaflu|HA|L11142|H2 L11142.1 Influenza A virus (A/Singapore/1/57 (H2N2)) hemagglutinin (HA) gene, complete cds\n|insaflu|HA|MK576794|H3 MK576794 A/England/7821/2019 2019/01/03 4 (HA)\n|insaflu|HA|AF285883|H4 AF285883.2 Influenza A virus (A/Swine/Ontario/01911-2/99 (H4N6)) segment 4 hemagglutinin (HA) gene, complete cds\n|insaflu|HA|EF541403|H5 EF541403.1 Influenza A virus (A/Viet Nam/1203/2004(H5N1)) segment 4 hemagglutinin (HA) gene, complete cds\n|insaflu|HA|AB295613|H15 AB295613.1 Influenza A virus (A/duck/Australia/341/83(H15N8)) HA gene for haemagglutinin, complete cds\n|insaflu|NA|GQ377078|N1 GQ377078.1 Influenza A virus (A/California/07/2009(H1N1)) segment 6 neuraminidase (NA) gene, complete cds\n|insaflu|NA|MK576796|N2 MK576796 A/England/7821/2019 2019/01/03 6 (NA)\n|insaflu|NA|AB295614|N8 AB295614.1 Influenza A virus (A/duck/Australia/341/83(H15N8)) NA gene for neuraminidase, complete cds\n|insaflu|HA|AY338459|H7 AY338459.1 Influenza A virus (A/Netherlands/219/2003(H7N7)) segment 4 hemagglutinin (HA) gene, complete cds\n|insaflu|HA|CY014659|H8 CY014659.1 Influenza A virus (A/turkey/Ontario/6118/1968(H8N4)) segment 4, complete sequence\n|insaflu|HA|CY014694|H13 CY014694.1 Influenza A virus (A/gull/Maryland/704/1977(H13N6)) segment 4, complete sequence\n|insaflu|HA|CY018765|Yamagata CY018765.1 Influenza B virus (B/Yamagata/16/1988) segment 4, complete sequence\n|insaflu|HA|CY103892|H17 CY103892.1 Influenza A virus (A/little yellow-shouldered bat/Guatemala/060/2010(H17N10)) hemagglutinin (HA) gene, complete cds\n|insaflu|NA|CY103894|N10 CY103894.1 Influenza A virus (A/little yellow-shouldered bat/Guatemala/060/2010(H17N10)) neuraminidase (NA) gene, complete cds\n|insaflu|NA|CY125730|N3v2 CY125730.1 Influenza A virus (A/Mexico/InDRE7218/2012(H7N3)) neuraminidase (NA) gene, complete cds\n|insaflu|HA|CY125945|H18 CY125945.1 Influenza A virus (A/flat-faced bat/Peru/033/2010(H18N11)) hemagglutinin (HA) gene, complete cds\n|insaflu|NA|CY125947|N11 CY125947.1 Influenza A virus (A/flat-faced bat/Peru/033/2010(H18N11)) neuraminidase-like protein (NA) gene, complete cds\n|insaflu|HA|CY130078|H12 CY130078.1 Influenza A virus (A/duck/Alberta/60/1976(H12N5)) hemagglutinin (HA) gene, complete cds\n|insaflu|HA|CY130094|H14 CY130094.1 Influenza A virus (A/mallard/Astrakhan/263/1982(H14N5)) hemagglutinin (HA) gene, complete cds\n|insaflu|NA|CY130096|N5 CY130096.1 Influenza A virus (A/mallard/Astrakhan/263/1982(H14N5)) neuraminidase (NA) gene, complete cds\n|insaflu|HA|DQ376624|H6 DQ376624.1 Influenza A virus (A/chicken/Taiwan/0705/99(H6N1)) hemagglutinin (HA) gene, complete cds\n|insaflu|HA|EU293864|H16 EU293864.1 Influenza A virus (A/black-headed gull/Turkmenistan/13/76(H16N3)) hemagglutinin (HA) gene, complete cds\n|insaflu|HA|FJ183474|H10 FJ183474.1 Influenza A virus (A/mallard/Bavaria/3/2006(H10N7)) segment 4 hemagglutinin (HA) gene, complete cds\n|insaflu|NA|FJ183475|N7 FJ183475.1 Influenza A virus (A/mallard/Bavaria/3/2006(H10N7)) segment 6 neuraminidase (NA) gene, complete cds\n|insaflu|NA|GQ907296|N3v1 GQ907296.1 Influenza A virus (A/black headed gull/Mongolia/1756/2006(H16N3)) segment 6 neuraminidase (NA) gene, complete cds\n|insaflu|HA|GU052203|H11 GU052203.1 Influenza A virus (A/duck/England/1/1956(H11N6)) segment 4 hemagglutinin (HA) gene, complete cds\n|insaflu|NA|KC853765|N9 KC853765.1 Influenza A virus (A/Hangzhou/1/2013(H7N9)) segment 6 neuraminidase (NA) gene, complete cds\n|insaflu|HA|KX879589|H9 KX879589.1 Influenza A virus (A/swine/Hong Kong/9/98(H9N2)) segment 4 hemagglutinin (HA) gene, partial cds\n|insaflu|HA|M58428|Victoria M58428.1 Influenza B/Victoria/2/87, hemagglutinin (seg 4), RNA\n|insaflu|NA|EU429793|N4 EU429793.1 Influenza A virus (A/turkey/Ontario/6118/1968(H8N4)) segment 6 neuraminidase (NA) mRNA, complete cds\n|insaflu|NA|EU429795|N6 EU429795.1 Influenza A virus (A/duck/England/1/1956(H11N6)) segment 6 neuraminidase (NA) mRNA, complete cds\n",
"links": "## Useful links\n\n* [nextflow](https://www.nextflow.io/)\n* [docker](https://www.docker.com/products/docker-desktop)\n* [singularity](https://docs.sylabs.io/guides/3.5/user-guide/introduction.html)\n* [insaflu](https://insaflu.insa.pt/)\n"
}
}