From 304a21bca232be765723a044a4005e541d806de4 Mon Sep 17 00:00:00 2001 From: Jonathan Golob Date: Wed, 10 Apr 2024 13:36:12 -0400 Subject: [PATCH] Basic working main.nf Slight modification to other modules to smooth out the process --- .github/workflows/test.yaml | 27 ++ main.nf | 484 ++++++++---------------------------- modules/allele_catalog.nf | 3 +- modules/composition.nf | 7 +- modules/general.nf | 54 ---- 5 files changed, 133 insertions(+), 442 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 19a4fb1..8cc8506 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -4,6 +4,33 @@ on: [push, pull_request] jobs: + full: + runs-on: ubuntu-22.04 + env: + NXF_ANSI_LOG: 0 + steps: + - uses: actions/checkout@v1 + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Free disk space + run: | + sudo swapoff -a + sudo rm -f /swapfile + sudo apt clean + docker rmi $(docker image ls -aq) + df -h + - name: Run preprocess + run: | + NXF_VER=23.10.0 nextflow run main.nf --manifest data/mock.manifest.csv --output output --savereads --host_index data/hg_chr_21_bwa_index.tar.gz -w work/ -with-docker ubuntu:22.04 -c nextflow.config.sample -profile testing + - name: Validate results + run: | + tree output/ + [[ -s output/qc/manifest.qc.csv ]] + [[ -s output/qc/Mock__11.R1.fastq.gz ]] + [[ -s output/qc/Mock__13__R1.noadapt.nohuman.fq.gz ]] + preproccessing: runs-on: ubuntu-22.04 env: diff --git a/main.nf b/main.nf index a41197a..d63eb11 100755 --- a/main.nf +++ b/main.nf @@ -9,14 +9,6 @@ The short reads are then aligned against the assembled peptides plus uniref100. We use the FAMLI algorithm to adjuticate these alignments. Annotations can follow. - - I. (Optional) Geneshot preprocessing submodule: - Steps: - 1) (if index is available): barcodecop to verify demultiplexing - 2) cutadapt to remove adapters. - 3) remove human reads via - 3A) downloading the cached human genome index - 3B) aligning against the human genome and extracting unpaired reads */ // Using DSL-2 @@ -25,32 +17,31 @@ nextflow.enable.dsl=2 // Default values for boolean flags // If these are not set by the user, then they will be set to the values below // This is useful for the if/then control syntax below -params.nopreprocess = false -params.savereads = false + params.help = false -params.output = './results' -params.output_prefix = 'geneshot' +params.output = './results/' params.manifest = null +// Flow control +params.nopreprocess = false +params.nocomposition = false + // Preprocessing options -params.hg_index_url = 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set.fna.bwa_index.tar.gz' -params.hg_index = false -params.min_hg_align_score = 30 +params.host_index_url = 'https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set.fna.bwa_index.tar.gz' +params.host_index = false +params.min_host_align_score = 30 +params.savereads = false -// Assembly options -params.gene_fasta = false +// Assembly-based Allele catalog options params.phred_offset = 33 // spades -params.min_identity = 90 // linclust and reference genome alignment -params.min_coverage = 50 // linclust and reference genome alignment -// Alignment options -params.dmnd_min_identity = 80 // DIAMOND -params.dmnd_min_coverage = 50 // DIAMOND +// Quantification via Alignment options +params.dmnd_min_identity = 90 // DIAMOND +params.dmnd_min_coverage = 80 // DIAMOND params.dmnd_top_pct = 1 // DIAMOND params.dmnd_min_score = 20 // DIAMOND params.gencode = 11 //DIAMOND params.sd_mean_cutoff = 3.0 // FAMLI -params.famli_batchsize = 10000000 // FAMLI // Annotation options params.noannot = false @@ -70,8 +61,6 @@ params.formula = false params.fdr_method = "fdr_bh" params.corncob_batches = 10 -// Compositional analysis options -params.composition = false // Function which prints help message text def helpMessage() { @@ -83,24 +72,31 @@ def helpMessage() { Required Arguments: --manifest CSV file listing samples (see below) + + Flow control options: + --nopreprocess If specified, omit the preprocessing steps (removing adapters and human sequences). Assume manifest is QCed + --nocompostion If specified, will skip the metaphlan2 compositional analysis steps. + Options: --output Folder to place analysis outputs (default ./results) - --output_prefix Text used as a prefix for summary HDF5 output files (default: geneshot) - --nopreprocess If specified, omit the preprocessing steps (removing adapters and human sequences) - --savereads If specified, save the preprocessed reads to the output folder (inside qc/) -w Working directory. Defaults to `./work` For preprocessing: - --hg_index_url URL for human genome index, defaults to current HG - --hg_index Cached copy of the bwa indexed human genome, TGZ format - --min_hg_align_score Minimum alignment score for human genome (default 30) + --host_index_url URL for host genome index, defaults to current Human Genome + --host_index or Cached copy of the bwa indexed human genome, TGZ format + --min_host_align_score Minimum alignment score for human genome (default 30) + --savereads If provided, save the preprocessed reads to the qc/ subdirectory. - For Assembly: - --gene_fasta (optional) Compressed FASTA with pre-generated catalog of microbial genes. - If provided, then the entire de novo assembly process will be skipped entirely. + For Allele Catalog via Assembly: --phred_offset for spades. Default 33. - --min_identity Amino acid identity cutoff used to combine similar genes (default: 90) - --min_coverage Length cutoff used to combine similar genes (default: 50) (linclust) + + For Quantification via Alignment: + --dmnd_min_identity Amino acid identity cutoff used to align short reads (default: 90) (DIAMOND) + --dmnd_min_coverage Query coverage cutoff used to align short reads (default: 80) (DIAMOND) + --dmnd_top_pct Keep top X% of alignments for each short read (default: 1) (DIAMOND) + --dmnd_min_score Minimum score for short read alignment (default: 20) (DIAMOND) + --gencode Genetic code used for conceptual translation (default: 11) (DIAMOND) + --sd_mean_cutoff Ratio of standard deviation / mean depth of sequencing used to filter genes (default: 3.0) (FAMLI) For Annotation: --noannot If specified, disable annotation for taxonomy or function. @@ -114,27 +110,6 @@ def helpMessage() { --eggnog_db One of two databases used for functional annotation with eggNOG (default: false) (Data available at s3://fh-ctr-public-reference-data/tool_specific_data/geneshot/2020-06-17-eggNOG-v5.0/eggnog.db) - For Alignment: - --dmnd_min_identity Amino acid identity cutoff used to align short reads (default: 90) (DIAMOND) - --dmnd_min_coverage Query coverage cutoff used to align short reads (default: 50) (DIAMOND) - --dmnd_top_pct Keep top X% of alignments for each short read (default: 1) (DIAMOND) - --dmnd_min_score Minimum score for short read alignment (default: 20) (DIAMOND) - --gencode Genetic code used for conceptual translation (default: 11) (DIAMOND) - --sd_mean_cutoff Ratio of standard deviation / mean depth of sequencing used to filter genes (default: 3.0) (FAMLI) - --famli_batchsize Number of alignments to deduplicate in batches (default: 10000000) (FAMLI) - - For CAGs: - --distance_metric Distance metric used to group genes by co-abundance (default: cosine) - --distance_threshold Distance threshold used to group genes by co-abundance (default: 0.25) - --linkage_type Linkage type used to group genes by co-abundance (default: average) - - For Statistical Analysis: - --formula Optional formula used to estimate associations with CAG relative abundance - --fdr_method FDR method used to calculate q-values for associations (default: 'fdr_bh') - --corncob_batches Number of parallel processes to use processing each formula - - For Compositional Analysis: - --composition When included, metaPhlAn2 will be run on all specimens Batchfile: The manifest is a CSV with a header indicating which samples correspond to which files. @@ -156,75 +131,57 @@ if (params.help || params.manifest == null){ // Make sure that --output ends with trailing "/" characters if (!params.output.endsWith("/")){ - output_folder = params.output.concat("/") + params.output_folder = params.output.concat("/") } else { - output_folder = params.output + params.output_folder = params.output } // Import the preprocess_wf module include { Preprocess_wf } from './modules/preprocess' params( manifest: params.manifest, - hg_index: params.hg_index, - hg_index_url: params.hg_index_url, - min_hg_align_score: params.min_hg_align_score, - savereads: params.savereads, - output: output_folder -) -include { CombineReads} from './modules/preprocess' params( + host_index: params.host_index, + host_index_url: params.host_index_url, + min_host_align_score: params.min_host_align_score, savereads: params.savereads, - output: output_folder -) -include { WriteManifest} from './modules/preprocess' params( - savereads: params.savereads, - output: output_folder + output: params.output_folder, + ) -// Import some general tasks, such as CombineReads and writeManifest include { Read_manifest } from './modules/general' -include { countReads } from './modules/general' -include { countReadsSummary } from './modules/general' params( - output_folder: output_folder -) -include { collectAbundances } from './modules/general' params( - output_prefix: params.output_prefix, - formula: params.formula, - distance_metric: params.distance_metric, - distance_threshold: params.distance_threshold, - linkage_type: params.linkage_type, - sd_mean_cutoff: params.sd_mean_cutoff, - min_identity: params.min_identity, - min_coverage: params.min_coverage, - dmnd_min_identity: params.dmnd_min_identity, - dmnd_min_coverage: params.dmnd_min_coverage -) -include { addGeneAssembly } from './modules/general' -include { readTaxonomy } from './modules/general' -include { addEggnogResults } from './modules/general' -include { addCorncobResults } from './modules/general' params( - fdr_method: params.fdr_method +include { Metaphlan2_wf } from './modules/composition' params( + manifest: params.manifest, + output: params.output_folder, ) -include { addTaxResults } from './modules/general' -include { repackHDF as repackFullHDF } from './modules/general' params( - output_folder: output_folder + +// Import the workflows used for assembly-based allele-catalog +include { Allele_catalog } from './modules/allele_catalog' params( + output: params.output_folder, + phred_offset: params.phred_offset, ) -include { repackHDF as repackDetailedHDF } from './modules/general' params( - output_folder: output_folder + +// Import the workflow responsible for clustering alleles into 'genes' +include { Allele_clustering } from './modules/allele_clustering' params( + output: params.output_folder, ) -// Import the workflows used for assembly -include { Genecatalog_wf } from './modules/genecatalog' params( - output_folder: output_folder, - output_prefix: params.output_prefix, - phred_offset: params.phred_offset, - min_identity: params.min_identity, - min_coverage: params.min_coverage, - noannot: params.noannot, - eggnog_db: params.eggnog_db, - eggnog_dmnd: params.eggnog_dmnd, - taxonomic_dmnd: params.taxonomic_dmnd, +include { Alignment_wf } from './modules/quantify' params ( + output: params.output_folder, + dmnd_min_identity: params.dmnd_min_identity, + dmnd_min_coverage: params.dmnd_min_coverage, + dmnd_top_pct: params.dmnd_top_pct, + dmnd_min_score: params.dmnd_min_score, gencode: params.gencode, + sd_mean_cutoff: params.sd_mean_cutoff, + ) +/* + + + +// --- + +/* // Import the workflows used for annotation include { Annotation_wf } from './modules/annotation' params( output_folder: output_folder, @@ -238,90 +195,18 @@ include { Annotation_wf } from './modules/annotation' params( gencode: params.gencode, ) -// Import the workflows used for alignment-based analysis -include { Alignment_wf } from './modules/quantify' params( - output_folder: output_folder, - dmnd_min_identity: params.dmnd_min_identity, - dmnd_min_coverage: params.dmnd_min_coverage, - dmnd_top_pct: params.dmnd_top_pct, - dmnd_min_score: params.dmnd_min_score, - gencode: params.gencode, - sd_mean_cutoff: params.sd_mean_cutoff, - famli_batchsize: params.famli_batchsize, - cag_batchsize: params.cag_batchsize -) - -// And for CAG generation -include { CAG_contig_oriented_wf } from './modules/make_cags' params( - output_folder: output_folder, - dmnd_min_identity: params.dmnd_min_identity, - dmnd_min_coverage: params.dmnd_min_coverage, - dmnd_top_pct: params.dmnd_top_pct, - dmnd_min_score: params.dmnd_min_score, - gencode: params.gencode, - distance_metric: params.distance_metric, - distance_threshold: params.distance_threshold, - linkage_type: params.linkage_type, - sd_mean_cutoff: params.sd_mean_cutoff, - famli_batchsize: params.famli_batchsize, - cag_batchsize: params.cag_batchsize -) -// Import the workflows used for statistical analysis -include { validation_wf } from './modules/statistics' params( - output_folder: output_folder, - formula: params.formula, - corncob_batches: params.corncob_batches -) -include { corncob_wf } from './modules/statistics' params( - output_folder: output_folder, - formula: params.formula, - corncob_batches: params.corncob_batches -) -include { runBetta } from './modules/statistics' -include { addBetta } from './modules/statistics' params( - fdr_method: params.fdr_method -) -include { breakaway } from './modules/statistics' -include { collectBreakaway } from './modules/statistics' params( - output_folder: output_folder, - output_prefix: params.output_prefix -) +*/ -// Import the workflow used for composition analysis -include { metaphlan2_fastq } from './modules/composition' params( - output_folder: output_folder -) -// include join_metaphlan2 from './modules/composition' -include { addMetaPhlAn2Results } from './modules/general' -// Process to publish specific output files -include { publish as publishGeneAbundances } from './modules/general' params( - output_folder: "${output_folder}/abund/" -) workflow { main: - // Phase 0: Validation of input data - - // If the user specifies a `--formula`, the first step in the process - // will be to ensure that the formula is written correctly, and is - // compatible with the data provided in the manifest - if ( params.formula ) { - // Set up a channel with the strings of the formula(s) provided - formula_ch = Channel.of( - params.formula.split(",") - ) - validation_wf( - file(params.manifest), - formula_ch - ) - manifest_file = validation_wf.out - } else { - manifest_file = Channel.from(file(params.manifest)) - } - + // ########################## + // # PREPROCESSING # + // ########################## + manifest_file = Channel.from(file(params.manifest)) manifest_qced = Read_manifest(manifest_file) // Phase I: Preprocessing @@ -333,224 +218,55 @@ workflow { manifest_qced.valid_paired ) - combined_reads = Preprocess_wf + combined_reads_pe = Preprocess_wf.out } else { - // If the user specified --nopreprocess, then just - // read the manifest and combine by specimen - CombineReads( - manifest_qced.valid_paired.mix(manifest_qced.valid_paired_indexed) + // If the user specified --nopreprocess, then just read in the manifest assuming these are already QCed and normalized. + combined_reads_pe = manifest_qced.valid_paired.mix(manifest_qced.valid_paired_indexed) .map { r -> [r.specimen, file(r.R1), file(r.R2)] - }.groupTuple() - ) - combined_reads = CombineReads - // If the user specified --savereads, write out the manifest - if (params.savereads) { - writeManifest( - combined_reads.out - ) - } + } } - // Count the reads for every sample individually (just take the first of the pair of reads) - countReads( - combined_reads.out.map { - r -> [r[0], r[1], r[2]] - } - ) - - // Make a summary of every sample and write it out to --output - countReadsSummary( - countReads.collect() - ) - // ########################## // # COMPOSITIONAL ANALYSIS # // ########################## - if (params.composition) { - metaphlan2_fastq( - combined_reads.out.map { - r -> [r[0], r[1], r[2]] - } - ) - } - - // ################################### - // # DE NOVO ASSEMBLY AND ANNOTATION # - // ################################### - - // A gene catalog was provided, so skip de novo assembly - if ( params.gene_fasta ) { - - // Point to the file provided - gene_fasta = file(params.gene_fasta) - - } else { - - // Run the assembly and annotation workflow (in modules/genecatalog.nf) - Genecatalog_wf( - combined_reads.out + if (!params.nocomposition) { + Metaphlan2_wf( + combined_reads_pe, + Channel.from([]) ) - - gene_fasta = Genecatalog_wf.out.gene_fasta } - // Run the annotation steps on the gene catalog - Annotation_wf( - gene_fasta - ) - - // ############################ - // # ALIGNMENT-BASED ANALYSIS # - // ############################ - - // Run the alignment-based analysis steps (in modules/alignment.nf) - Alignment_wf( - gene_fasta, - combined_reads.out, - 'gene' // output prefix here, of gene level output - ) - /* - // And the CAG generation steps - CAG_contig_oriented_wf( - Alignment_wf.out.gene_abundances_zarr_tar, - Alignment_wf.out.gene_lists - ) + // ######################################### + // # ALLELE CATALOG FROM DE NOVO ASSEMBLY # + // ######################################### - // ######################## - // # STATISTICAL ANALYSIS # - // ######################## - - // Calculate the richness of each sample using the breakaway algorithm - breakaway( - Alignment_wf.out.famli_json_list.flatten() - ) - collectBreakaway( - breakaway.out.toSortedList() + Allele_catalog( + combined_reads_pe, ) - // Calculate the association of individual CAGs with user-provided features - if ( params.formula ) { - corncob_wf( - Alignment_wf.out.famli_json_list, - CAG_contig_oriented_wf.out.cag_csv, - file(params.manifest), - formula_ch - ) - corncob_results = corncob_wf.out - } else { - corncob_results = Channel.empty() - } + // ######################################### + // # ALLELE CLUSTERING INTO "GENES" # + // ######################################### - // ################### - // # GATHER RESULTS # - // ################### - - // Start by gathering all of the results which are generated - // no matter what options were selected by the user - // NOTE: The code used here is imported from ./modules/general.nf - - collectAbundances( - CAG_contig_oriented_wf.out.cag_csv, - CAG_contig_oriented_wf.out.cag_abund_feather, - countReadsSummary.out, - manifest_file, - Alignment_wf.out.specimen_gene_count_csv, - Alignment_wf.out.specimen_reads_aligned_csv, - Alignment_wf.out.gene_length_csv, - collectBreakaway.out, - ) - - // If we performed de novo assembly, add the gene assembly information - if ( params.gene_fasta ) { - resultsHDF = collectAbundances.out - detailedHDF = Alignment_wf.out.detailed_hdf - } else { - addGeneAssembly( - collectAbundances.out, - Alignment_wf.out.detailed_hdf, - Genecatalog_wf.out.allele_assembly_csv_list - ) - resultsHDF = addGeneAssembly.out[0] - detailedHDF = addGeneAssembly.out[1] - } - - // If we performed compositional analysis, add the results ot the HDF5 - if (params.composition) { - addMetaPhlAn2Results( - resultsHDF, - metaphlan2_fastq.out.map { - r -> r[1] - }.toSortedList() - ) - - resultsHDF = addMetaPhlAn2Results.out - } - - // If we performed functional analysis with eggNOG, add the results to the HDF5 - if ( params.noannot == false ) { - if ( params.eggnog_db && params.eggnog_dmnd ) { - if ( !file(params.eggnog_db).isEmpty() && !file(params.eggnog_dmnd).isEmpty() ){ - addEggnogResults( - resultsHDF, - Annotation_wf.out.eggnog_tsv - ) - - resultsHDF = addEggnogResults.out - } - } - } - - // If we performed taxonomic analysis with DIAMOND, add the results to the HDF5 - if ( params.noannot == false ) { - if ( params.taxonomic_dmnd ) { - if ( !file(params.taxonomic_dmnd).isEmpty() ){ - readTaxonomy( - file(params.ncbi_taxdump) - ) - - addTaxResults( - resultsHDF, - Annotation_wf.out.tax_tsv, - readTaxonomy.out - ) - - resultsHDF = addTaxResults.out - } - } - } - - // If we performed statistical analysis, add the results to the HDF5 - if ( params.formula ) { - addCorncobResults( - resultsHDF, - corncob_wf.out - ) - - runBetta( - addCorncobResults.out[1].flatten() - ) - - addBetta( - addCorncobResults.out[0], - runBetta.out.toSortedList() - ) - - resultsHDF = addBetta.out[0] - - } - - // "Repack" the HDF5, which enhances space efficiency and adds GZIP compression - repackFullHDF( - resultsHDF + Allele_clustering( + Allele_catalog.out.alleles, + Allele_catalog.out.allele_info ) + + // ################################## + // # ALIGNMENT-BASED QUANTIFICATION # + // ################################## - // "Repack" and compress the detailed results HDF5 as well - repackDetailedHDF( - detailedHDF + Alignment_wf( + Allele_clustering.out.centroids_C100, + Allele_clustering.out.dmdb_C100, + combined_reads_pe, ) + Alignment_wf.out.specimen_allele_quant + // */ } diff --git a/modules/allele_catalog.nf b/modules/allele_catalog.nf index fdbe324..b82d7cf 100755 --- a/modules/allele_catalog.nf +++ b/modules/allele_catalog.nf @@ -4,7 +4,6 @@ nextflow.enable.dsl=2 // Default parameters // Assembly options -params.gene_fasta = false params.phred_offset = 33 // spades // Default values for boolean flags @@ -22,7 +21,7 @@ if (!params.output.endsWith("/")){ } // Containers -container__assembler = "quay.io/biocontainers/megahit:1.2.9--h8b12597_0" +container__assembler = "quay.io/biocontainers/megahit:1.2.9--h43eeafb_5" container__anndata = "golob/python-anndata:0.9.2" container__prodigal = 'quay.io/biocontainers/prodigal:2.6.3--h516909a_2' container__fastatools = "quay.io/fhcrc-microbiome/fastatools:0.7.1__bcw.0.3.2" diff --git a/modules/composition.nf b/modules/composition.nf index b316f06..bd00591 100644 --- a/modules/composition.nf +++ b/modules/composition.nf @@ -28,6 +28,9 @@ workflow Metaphlan2_wf { Join_metaphlan2.out ) + emit: + anndata = MetaphlanLongToAnndata.out + long_format = Join_metaphlan2.out } @@ -36,7 +39,7 @@ process Metaphlan2_paired { container "${container__metaphlan2}" label = 'multithread' errorStrategy 'ignore' - publishDir path: "${params.output_folder}MetaPhlAn2/by_specimen/", mode: "copy" + publishDir path: "${params.output}MetaPhlAn2/by_specimen/", mode: "copy" input: tuple val(specimen), path(R1), path(R2) @@ -60,7 +63,7 @@ process Metaphlan2_unpaired { label = 'multithread' errorStrategy 'ignore' - publishDir path: "${params.output_folder}MetaPhlAn2/by_specimen/", mode: "copy" + publishDir path: "${params.output}MetaPhlAn2/by_specimen/", mode: "copy" input: tuple val(specimen), path(R1) diff --git a/modules/general.nf b/modules/general.nf index 6f27d0a..422edbc 100644 --- a/modules/general.nf +++ b/modules/general.nf @@ -24,60 +24,6 @@ def Read_manifest(manifest_file){ -// Count the number of input reads for a single sample -process countReads { - tag "Count the number of reads per sample" - container "${container__fastatools}" - cpus 1 - memory "4 GB" - errorStrategy "finish" - - input: - tuple val(sample_name), file(R1), file(R2) - - output: - file "${sample_name}.countReads.csv" - -""" -set -e - -[[ -s ${R1} ]] -[[ -s ${R2} ]] - -n=\$(cat <(gunzip -c "${R1}") <(gunzip -c "${R2}") | awk 'NR % 4 == 1' | wc -l) -echo "${sample_name},\$n" > "${sample_name}.countReads.csv" -""" -} - - -// Make a single file which summarizes the number of reads across all samples -// This is only run after all of the samples are done processing through the -// 'total_counts' channel, which is transformed by the .collect() command into -// a single list containing all of the data from all samples. -process countReadsSummary { - tag "Summarize the number of reads per sample" - container "${container__fastatools}" - // The output from this process will be copied to the --output_folder specified by the user - publishDir "${params.output_folder}/qc/", mode: 'copy' - errorStrategy "finish" - - input: - // Because the input channel has been collected into a single list, this process will only be run once - file readcount_csv_list - - output: - file "readcounts.csv" - - -""" -set -e - -echo specimen,n_reads > readcounts.csv -cat ${readcount_csv_list} >> readcounts.csv -""" -} - - // Process which will concatenate a set of files process concatenateFiles { tag "Directly combine a group of files"