diff --git a/composition_only.nf b/composition_only.nf
deleted file mode 100644
index 05cb2f8..0000000
--- a/composition_only.nf
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env nextflow
-
-/*
-  Geneshot: A pipeline to robustly identify which alleles (n.e.e peptide coding sequences)
-  are present in a microbial community.
-
-    This is a workflow oriented around obtaining the composition of a community via WGS data.
-    Reuses components (primarily pre-processing) from the broader geneshot. 
-*/
-
-// Using DSL-2
-nextflow.enable.dsl=2
-
-// Default values for boolean flags
-// If these are not set by the user, then they will be set to the values below
-// This is useful for the if/then control syntax below
-params.nopreprocess = false
-params.savereads = false
-params.help = false
-params.output = './results'
-params.manifest = false
-
-// Preprocessing options
-params.hg_index_url = 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set.fna.bwa_index.tar.gz'
-params.hg_index = false
-params.min_hg_align_score = 30
-
-// Make sure that --output ends with trailing "/" characters
-if (!params.output.endsWith("/")){
-    output_folder = params.output.concat("/")
-} else {
-    output_folder = params.output
-}
-
-
-// Import the preprocess_wf module
-include { Read_manifest } from './modules/general'
-include { Preprocess_wf } from './modules/preprocess' params(
-    hg_index: params.hg_index,
-    hg_index_url: params.hg_index_url,
-    min_hg_align_score: params.min_hg_align_score,
-    output: output_folder,
-    savereads: params.savereads
-)
-// Import some general tasks
-include { CombineReads } from './modules/preprocess' params(
-    savereads: params.savereads,
-    output_folder: output_folder
-)
-
-
-// Import from composition_wf module
-include { composition_wf } from './modules/composition' params(
-    output_folder: output_folder
-)
-
-// Function which prints help message text
-def helpMessage() {
-    log.info"""
-    A workflow oriented around obtaining the composition of a community via WGS data.
-    Reuses components (primarily pre-processing) from the broader geneshot. 
-
-    Usage:
-
-    nextflow run composition_only.nf <ARGUMENTS>
-    
-    Required Arguments:
-      --manifest            CSV file listing samples (see below)
-
-    Options:
-      --output              Folder to place analysis outputs (default ./results)
-      --nopreprocess        If specified, omit the preprocessing steps (removing adapters and human sequences)
-      --savereads           If specified, save the preprocessed reads to the output folder (inside qc/)
-      -w                    Working directory. Defaults to `./work`
-
-    For preprocessing:
-      --hg_index_url        URL for human genome index, defaults to current HG
-      --hg_index            Cached copy of the bwa indexed human genome, TGZ format
-      --min_hg_align_score  Minimum alignment score for human genome (default 30)
-    
-    Manifest file:
-      The manifest is a CSV with a header indicating which samples correspond to which files.
-      The file must contain a column `specimen`. This can be repeated. 
-      Data for preprocessing is only accepted as paired reads.
-      Reads are specified by columns, `R1` and `R2`.
-      If index reads are provided, the column titles should be `I1` and `I2`
-      If you wish to provide already processed data in fasta format, please include it in `R1` alone,
-        with only *one* file specified per specimen.
-    """.stripIndent()
-}
-
-// Show help message if the user specifies the --help flag at runtime
-if (params.help || !params.manifest){
-    // Invoke the function above which prints the help message
-    helpMessage()
-    // Exit out and do not run anything else
-    exit 0
-}
-
-
-workflow {
-    main:
-
-    // Phase 0: Validation of input data
-    manifest_file = Channel.from(file(params.manifest))
-    // Read manifest splits out our manifest.
-    manifest_qced = Read_manifest(manifest_file)
-
-    // Phase I: Preprocessing
-    if (!params.nopreprocess) {
-        // Run the entire preprocessing workflow
-        Preprocess_wf(
-            manifest_qced.valid_paired_indexed,
-             manifest_qced.valid_paired
-        )
-        // Combine the reads by specimen name
-        CombineReads(Preprocess_wf.out.groupTuple())
-
-    } else {
-        // If the user specified --nopreprocess, then just 
-        // read the manifest and combine by specimen
-        CombineReads(
-            manifest_qced.valid_paired.mix(manifest_qced.valid_paired_indexed)
-            .map { 
-                r -> [r.specimen, file(r.R1), file(r.R2)]
-            }.groupTuple()
-        )
-
-    }
-
-    // ################
-    // # Composition  #
-    // ################
-    composition_wf(
-        CombineReads.out,
-        manifest_qced.valid_unpaired.map{ r-> 
-            [r.specimen, file(r.R1)]
-        }
-    )
-
-}
diff --git a/gene_abund.nf b/gene_abund.nf
deleted file mode 100644
index 3867ed4..0000000
--- a/gene_abund.nf
+++ /dev/null
@@ -1,363 +0,0 @@
-#!/usr/bin/env nextflow
-
-/*
-  Geneshot: A pipeline to robustly identify which alleles (n.e.e peptide coding sequences)
-  are present in a microbial community.
-
-  This utility extracts the proportion of gene copies from each specimen which are
-  annotated with a given function by the eggNOG-mapper functional annotation tool.
-
-  To use this utility, provide a set of previously-generated geneshot results to query,
-  as well as a string which will be used to select all eggNOG annotations which contain it.
-*/
-
-// Using DSL-2
-nextflow.enable.dsl=2
-
-// Parameters
-params.results_hdf = false
-params.details_hdf = false
-params.genes_fasta = false
-params.output_folder = false
-params.output_prefix = false
-params.query = false
-params.help = false
-
-// Function which prints help message text
-def helpMessage() {
-    log.info"""
-    This utility extracts the proportion of gene copies from each specimen which are
-    annotated with a given function by the eggNOG-mapper functional annotation tool.
-
-    To use this utility, provide a set of previously-generated geneshot results to query,
-    as well as a string which will be used to select all eggNOG annotations which contain it.
-
-    Usage:
-
-    nextflow run Golob-Minot/geneshot/gene_abund.nf <ARGUMENTS>
-    
-    Options:
-      --results_hdf         Location for results.hdf5 generated by geneshot
-      --details_hdf         Location for details.hdf5 generated by geneshot
-      --genes_fasta         Location for input 'genes.fasta.gz'
-      --output_folder       Location for output files
-      --output_prefix       Prefix for output files
-      --query               Query string to use to subset eggNOG gene descriptions
-    
-    """.stripIndent()
-}
-
-// Show help message if the user specifies the --help flag at runtime
-if (params.help || params.results_hdf == false || params.details_hdf == false || params.genes_fasta == false || params.output_prefix == false || params.output_folder == false || params.query == false){
-    // Invoke the function above which prints the help message
-    helpMessage()
-    // Exit out and do not run anything else
-    exit 0
-}
-
-workflow {
-
-    // Make sure we can find the input files
-    if(file(params.results_hdf).isEmpty()){
-        log.info"""Cannot find input file ${params.results_hdf}""".stripIndent()
-        exit 0
-    }
-    if(file(params.details_hdf).isEmpty()){
-        log.info"""Cannot find input file ${params.details_hdf}""".stripIndent()
-        exit 0
-    }
-    if(file(params.genes_fasta).isEmpty()){
-        log.info"""Cannot find input file ${params.genes_fasta}""".stripIndent()
-        exit 0
-    }
-
-    // Get the table of genes which contain this annotation
-    extractAnnotations(
-        file(params.results_hdf)
-    )
-
-    // Get the sequences of the genes which match this query
-    extractFASTA(
-        extractAnnotations.out,
-        file(params.genes_fasta)
-    )
-
-    // Get the user-provided manifest
-    extractManifest(
-        file(params.results_hdf)
-    )
-
-    // Get the proportion of gene copies for these genes across all specimens
-    extractAbund(
-        extractAnnotations.out,
-        file(params.details_hdf),
-        extractManifest.out
-    )
-
-}
-
-process extractAnnotations {
-
-    container "quay.io/fhcrc-microbiome/integrate-metagenomic-assemblies:v0.5"
-    label "mem_medium"
-
-    input:
-    file results_hdf
-
-    output:
-    file "${params.output_prefix}.genes.csv"
-
-    publishDir "${params.output_folder}", mode: 'copy', overwrite: true
-
-"""#!/usr/bin/env python3
-
-import os
-import pandas as pd
-
-# Set the input path
-results_hdf = '${results_hdf}'
-
-# Make sure that the file is present in the working folder
-assert os.path.exists(results_hdf)
-
-# Set up a function to filter a DataFrame by the query string
-query_str = '${params.query}'
-def filter_df(df):
-    return df.loc[
-        df['eggNOG_desc'].fillna('').apply(lambda s: query_str in s)
-    ]
-
-# Read in the table in chunks and filter as we go
-df = pd.concat([
-    filter_df(chunk_df)
-    for chunk_df in pd.read_hdf(
-        results_hdf, 
-        '/annot/gene/all', 
-        iterator=True
-    )
-])
-print("Number of genes containing the query '%s': %d" % (query_str, df.shape[0]))
-
-# If there are any genes matching this string
-if df.shape[0] > 0:
-
-    # Write out the smaller table
-    df.to_csv("${params.output_prefix}.genes.csv", index=None)
-
-    print("Done")
-
-else:
-
-    print("NO GENES FOUND MATCHING THE QUERY: %s" % query_str)
-
-"""
-
-}
-
-process extractManifest {
-
-    container "quay.io/fhcrc-microbiome/integrate-metagenomic-assemblies:v0.5"
-    label "mem_medium"
-
-    input:
-    file results_hdf
-
-    output:
-    file "${params.output_prefix}.manifest.csv"
-
-    publishDir "${params.output_folder}", mode: 'copy', overwrite: true
-
-"""#!/usr/bin/env python3
-
-import os
-import pandas as pd
-
-# Set the input path
-results_hdf = '${results_hdf}'
-
-# Make sure that the file is present in the working folder
-assert os.path.exists(results_hdf)
-
-# Read the manifest
-manifest_df = pd.read_hdf(results_hdf, "/manifest")
-
-# Write out to a file
-manifest_df.to_csv("${params.output_prefix}.manifest.csv", index=None)
-
-"""
-
-}
-
-process extractFASTA {
-
-    container "quay.io/fhcrc-microbiome/integrate-metagenomic-assemblies:v0.5"
-    label "io_limited"
-
-    input:
-    file gene_csv
-    file gene_fasta_gz
-
-    output:
-    file "${params.output_prefix}.genes.fasta.gz"
-
-    publishDir "${params.output_folder}", mode: 'copy', overwrite: true
-
-"""#!/usr/bin/env python3
-
-from Bio.SeqIO.FastaIO import SimpleFastaParser
-import gzip
-import os
-import pandas as pd
-
-# Set the input paths
-# Make sure that the files are present in the working folder
-gene_csv = '${gene_csv}'
-assert os.path.exists(gene_csv)
-gene_fasta_gz = '${gene_fasta_gz}'
-assert os.path.exists(gene_fasta_gz)
-output_fasta_gz = '${params.output_prefix}.genes.fasta.gz'
-assert not os.path.exists(output_fasta_gz)
-
-# Read in the table
-df = pd.read_csv(gene_csv)
-print("Read in annotations for %d genes" % df.shape[0])
-
-# Get the list of genes
-gene_names = set(df['gene'].tolist())
-
-# Keep a counter of how many genes we've found
-n_found = 0
-
-# Open the input and output files
-with gzip.open(gene_fasta_gz, 'rt') as handle_in, gzip.open(output_fasta_gz, 'wt') as handle_out:
-
-    # Iterate over the inputs
-    for gene_name, gene_seq in SimpleFastaParser(handle_in):
-
-        # If this is one of the genes we are looking for
-        if gene_name in gene_names:
-
-            # Write it out
-            handle_out.write(">%s\\n%s\\n" % (gene_name, gene_seq))
-
-            # Increment the counter
-            n_found += 1
-
-# Report the number of genes which were found
-print("Wrote out %d gene sequences" % n_found)
-print("DONE")
-
-"""
-
-}
-
-process extractAbund {
-
-    container "quay.io/fhcrc-microbiome/integrate-metagenomic-assemblies:v0.5"
-    label "mem_medium"
-
-    input:
-    file gene_csv
-    file details_hdf
-    file manifest_csv
-
-    output:
-    file "${params.output_prefix}.*.csv.gz"
-
-    publishDir "${params.output_folder}", mode: 'copy', overwrite: true
-
-"""#!/usr/bin/env python3
-
-import os
-import pandas as pd
-
-# Set the input paths
-# Make sure that the files are present in the working folder
-gene_csv = '${gene_csv}'
-assert os.path.exists(gene_csv)
-details_hdf = '${details_hdf}'
-assert os.path.exists(details_hdf)
-manifest_csv = '${manifest_csv}'
-assert os.path.exists(manifest_csv)
-
-# Read in the table of gene annotations
-df = pd.read_csv(gene_csv)
-print("Read in annotations for %d genes" % df.shape[0])
-
-# Get the list of genes
-gene_names = set(df['gene'].tolist())
-
-# Read in the manifest
-manifest_df = pd.read_csv(manifest_csv)
-
-# Keep the complete set of gene abundances in long format
-output = []
-
-# Open a connection to the HDF store
-with pd.HDFStore(details_hdf, 'r') as store:
-
-    # Set up an object to save the proportion of gene copies in each specimen
-    for specimen_name in manifest_df['specimen'].unique():
-
-        print("Reading in abundances for specimen '%s'" % specimen_name)
-
-        # Read in the full table
-        specimen_df = pd.read_hdf(store, "/abund/gene/long/%s" % specimen_name)
-
-        # Get the total depth for all gene copies
-        tot = specimen_df['depth'].sum()
-
-        # Subset to the genes of interest, add the specimen, add the proportion of gene copies, and append to the output
-        output.append(
-            specimen_df.loc[
-                specimen_df['id'].isin(gene_names)
-            ].assign(
-                specimen = specimen_name,
-                prop = lambda d: d['depth'] / tot
-            )
-        )
-
-# Combine all of the output
-print("Combining all outputs")
-output = pd.concat(
-    output
-).reset_index(
-    drop=True
-)
-
-# Add the eggNOG annotation
-print("Adding eggNOG names")
-output = output.assign(
-    eggNOG_desc = output['id'].apply(
-        df.set_index('gene')['eggNOG_desc'].get
-    )
-)
-
-# Save the long output
-print("Saving long output")
-output.to_csv(
-    "${params.output_prefix}.long.csv.gz",
-    index=None,
-    compression='gzip'
-)
-
-# Save the wide output
-output.pivot_table(
-    index="specimen",
-    columns="eggNOG_desc",
-    values="prop",
-    aggfunc=sum
-).fillna(
-    0
-).reset_index(
-).to_csv(
-    "${params.output_prefix}.wide.csv.gz",
-    index=None,
-    compression='gzip'
-)
-
-print("DONE")
-
-"""
-
-}
\ No newline at end of file
diff --git a/run_annotations.nf b/run_annotations.nf
deleted file mode 100755
index 2f46059..0000000
--- a/run_annotations.nf
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env nextflow
-
-/*
-  Geneshot: A pipeline to robustly identify which alleles (n.e.e peptide coding sequences)
-  are present in a microbial community.
-
-  This utility performs taxonomic and/or functional annotations on an existing geneshot
-  results object
-*/
-
-// Using DSL-2
-nextflow.enable.dsl=2
-
-// Parameters
-params.input_hdf = false
-params.gene_fasta = false
-params.output_hdf = false
-params.output_folder = false
-params.help = false
-params.min_coverage = 50
-params.min_identity = 90
-params.taxonomic_dmnd = false
-params.ncbi_taxdump = "ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz"
-params.eggnog_db = false
-params.eggnog_dmnd = false
-params.noannot = false
-
-// Function which prints help message text
-def helpMessage() {
-    log.info"""
-    Add functional and/or taxonomic annotations to a geneshot output file.
-    This utility will create a new geneshot output file in which
-    the only difference is the annotation output results stored within.
-
-    Usage:
-
-    nextflow run Golob-Minot/geneshot/run_annotations.nf <ARGUMENTS>
-    
-    Options:
-      --input_hdf           Location for input HDF
-      --gene_fasta          Location for input 'genes.fasta.gz'
-      --output_folder       Location for output HDF
-      --output_hdf          Name of output HDF to be placed in the output folder
-      --taxonomic_dmnd      Database used for taxonomic annotation (default: false)
-                            (Data available at s3://fh-ctr-public-reference-data/tool_specific_data/geneshot/2020-01-15-geneshot/DB.refseq.tax.dmnd)
-      --ncbi_taxdump        Reference describing the NCBI Taxonomy
-                            (default: ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz)
-      --eggnog_dmnd         One of two databases used for functional annotation with eggNOG (default: false)
-                            (Data available at s3://fh-ctr-public-reference-data/tool_specific_data/geneshot/2020-06-17-eggNOG-v5.0/eggnog_proteins.dmnd)
-      --eggnog_db           One of two databases used for functional annotation with eggNOG (default: false)
-                            (Data available at s3://fh-ctr-public-reference-data/tool_specific_data/geneshot/2020-06-17-eggNOG-v5.0/eggnog.db)
-    
-    """.stripIndent()
-}
-
-// Show help message if the user specifies the --help flag at runtime
-if (params.help || params.input_hdf == false || params.gene_fasta == false || params.output_hdf == false || params.output_folder == false){
-    // Invoke the function above which prints the help message
-    helpMessage()
-    // Exit out and do not run anything else
-    exit 0
-}
-
-// Show help message if the user does not specify any annotations
-if (params.taxonomic_dmnd == false && params.eggnog_dmnd == false && params.eggnog_db == false){
-    // Invoke the function above which prints the help message
-    helpMessage()
-    // Exit out and do not run anything else
-    exit 0
-}
-
-// Import the process used to add annotation results to the output
-include { readTaxonomy } from './modules/general'
-include { addTaxResults } from './modules/general'
-
-include { addEggnogResults } from './modules/general'
-
-include { repackHDF } from './modules/general' params(
-    output_folder: params.output_folder
-)
-
-// Import the workflows used for annotation
-include { Annotation_wf } from './modules/annotation' params(
-    output_folder: params.output_folder,
-    eggnog_db: params.eggnog_db,
-    eggnog_dmnd: params.eggnog_dmnd,
-    taxonomic_dmnd: params.taxonomic_dmnd,
-    noannot: params.noannot
-)
-
-// Process to rename the input HDF file
-process renameHDF{
-    container "ubuntu:20.04"
-    label 'io_limited'
-
-    input:
-        path "INPUT.RESULTS.HDF"
-        val new_file_name
-
-    output:
-        path "${new_file_name}"
-
-"""#!/bin/bash
-
-mv INPUT.RESULTS.HDF ${new_file_name}
-"""
-}
-
-
-workflow {
-
-    // Make sure we can find the input files
-    if(file(params.input_hdf).isEmpty()){
-        log.info"""Cannot find input file ${params.input_hdf}""".stripIndent()
-        exit 0
-    }
-    if(file(params.gene_fasta).isEmpty()){
-        log.info"""Cannot find input file ${params.gene_fasta}""".stripIndent()
-        exit 0
-    }
-
-    // Run the annotation steps on the gene catalog
-    Annotation_wf(
-        file(params.gene_fasta)
-    )
-
-    // Rename the input HDF
-    renameHDF(
-        file(params.input_hdf),
-        params.output_hdf
-    )
-    outputHDF = renameHDF.out
-
-    // Add the eggNOG results
-    if (Annotation_wf.out.eggnog_tsv != false && params.eggnog_db != false && params.eggnog_dmnd != false) {
-        addEggnogResults(
-            outputHDF,
-            Annotation_wf.out.eggnog_tsv
-        )
-        outputHDF = addEggnogResults.out
-    }
-    
-    // Add the taxonomic assignment results
-    if (Annotation_wf.out.tax_tsv != false && params.ncbi_taxdump != false && params.taxonomic_dmnd != false) {
-        readTaxonomy(
-            file(params.ncbi_taxdump)
-        )
-
-        addTaxResults(
-            outputHDF,
-            Annotation_wf.out.tax_tsv,
-            readTaxonomy.out
-        )
-
-        outputHDF = addTaxResults.out
-    }
-
-    // Repack the HDF
-    repackHDF(
-        outputHDF
-    )
-
-}
\ No newline at end of file
diff --git a/run_corncob.nf b/run_corncob.nf
deleted file mode 100644
index fee6620..0000000
--- a/run_corncob.nf
+++ /dev/null
@@ -1,258 +0,0 @@
-#!/usr/bin/env nextflow
-
-/*
-  Geneshot: A pipeline to robustly identify which alleles (n.e.e peptide coding sequences)
-  are present in a microbial community.
-
-  This utility runs the corncob algorithm with a specified formula on an
-  existing geneshot output file object.
-*/
-
-// Using DSL-2
-nextflow.enable.dsl=2
-
-// Parameters
-params.input_hdf = false
-params.input_folder = false
-params.output_folder = false
-params.output_prefix = false
-params.formula = false
-params.fdr_method = "fdr_bh"
-params.corncob_batches = 10
-params.help = false
-
-// Docker containers
-container__pandas = "quay.io/fhcrc-microbiome/python-pandas:v1.0.3"
-
-// Function which prints help message text
-def helpMessage() {
-    log.info"""
-    Run corncob with a specified formula on a geneshot output file.
-    This utility will create a new geneshot output file in which
-    the only difference is the corncob output results stored within.
-
-    Usage:
-
-    nextflow run Golob-Minot/geneshot/run_corncob.nf <ARGUMENTS>
-    
-    Options:
-      --input_hdf           Geneshot results HDF5 file to be used for analysis
-      --input_folder        Folder containing geneshot output to be used for analysis (must contain the subfolder "abund/")
-      --output_folder       Folder to place output tile
-      --output_prefix       Text used as a prefix for summary HDF5 output files (the .corncob.hdf5 suffix will be attached)
-      -w                    Working directory. Defaults to `./work`
-      
-    For Statistical Analysis:
-      --formula             Optional formula used to estimate associations with CAG relative abundance
-      --fdr_method          FDR method used to calculate q-values for associations (default: 'fdr_bh')
-      --corncob_batches     Number of parallel processes to use processing each formula
-
-    """.stripIndent()
-}
-
-// Show help message if the user specifies the --help flag at runtime
-if (params.help || params.input_hdf == false || params.input_folder == false || params.output_folder == false || params.output_prefix == false || params.formula == false){
-    // Invoke the function above which prints the help message
-    helpMessage()
-    // Exit out and do not run anything else
-    exit 0
-}
-
-// Import the process used to shard CAGs for corncob
-include { splitCorncob } from './modules/statistics' params(
-    corncob_batches: params.corncob_batches
-)
-
-// Import the process used for statistical analysis
-include { runCorncob } from './modules/statistics' params(
-    formula: params.formula
-)
-
-// Import the process used to join the results for multiple formulae
-include { joinCorncob } from './modules/statistics' params(
-    output_folder: params.output_folder
-)
-
-// Import the process used to aggregate readcounts across all samples
-// This process is only run if `abund/CAG.readcounts.csv.gz` can not be found yet
-// After the process completes, a new `abund/CAG.readcounts.csv.gz` will be added to that input folder
-include { extractCounts } from './modules/statistics' params(
-    output_folder: params.input_folder
-)
-
-// Import the processes needed to run meta-analysis of corncob results by annotation
-include { runBetta } from './modules/statistics'
-include { addBetta } from './modules/statistics' params(
-    fdr_method: params.fdr_method
-)
-
-// Import the process used to add corncob results to the output
-include { repackHDF } from './modules/general' params(
-    output_folder: params.output_folder
-)
-include { addCorncobResults } from './modules/general' params(
-    fdr_method: params.fdr_method
-)
-
-// Process to update the formula listed in the summary table
-// Also extract the manifest to reduce the number of times
-// the file is opened, as well as the table listing the genes
-// which make up each CAG.
-process updateFormula{
-    container "${container__pandas}"
-    label 'mem_medium'
-    errorStrategy 'finish'
-
-    input:
-        path results_hdf
-
-    output:
-        path "*corncob.hdf5"
-        path "manifest.csv"
-        path "CAG.assignments.csv.gz"
-
-"""
-#!/usr/bin/env python3
-
-import pandas as pd
-import shutil
-import pickle
-pickle.HIGHEST_PROTOCOL = 4
-
-# Come up with a new name for the output file
-formula_name = "${params.formula}".replace(
-    " ", "_"
-).replace(
-    "+", "_"
-).replace(
-    ",", "_"
-).replace(
-    "*", "_"
-).replace(
-    ":", "_"
-).replace(
-    "__", "_"
-).replace(
-    "__", "_"
-)
-new_file_name = "${params.output_prefix}.%s.corncob.hdf5" % formula_name
-
-# Copy the input file to a new output file object
-print("Copying %s to %s" % ("${results_hdf}", new_file_name))
-shutil.copyfile("${results_hdf}", new_file_name)
-
-# Open a connection to the store
-with pd.HDFStore(new_file_name, "a") as store:
-    
-    # Read in the existing summary table
-    df = pd.read_hdf(store, "/summary/experiment")
-
-    print("Existing summary:")
-    print(df)
-
-    # Add the formula
-    df = pd.concat([
-        df.query("variable != 'formula'"),
-        pd.DataFrame([dict(variable='formula', value='${params.formula}')])
-    ])
-
-    print("")
-    print("New summary:")
-    print(df)
-
-    # Write back to the store
-    df.to_hdf(store, "/summary/experiment")
-
-    # Extract the manifest
-    print("Extracting the manifest")
-    pd.read_hdf(store, "/manifest").to_csv("manifest.csv")
-
-    # Extract the table of genes making up each CAG
-    print("Extracting the gene~CAG table")
-    pd.read_hdf(
-        store, 
-        "/annot/gene/all",
-        columns=["gene", "CAG"]
-    ).to_csv(
-        "CAG.assignments.csv.gz",
-        index=None
-    )
-
-print("Done")
-"""
-
-}
-
-workflow {
-
-    // Make sure we can find the input file
-    if(file(params.input_hdf).isEmpty()){
-        log.info"""Cannot find input file ${params.input_hdf}""".stripIndent()
-        exit 0
-    }
-
-    // First update the formula listed in the geneshot results file,
-    // a process which will also extract the manifest CSV
-    updateFormula(
-        file(params.input_hdf)
-    )
-
-    // Check to see if `abund/CAG.readcounts.csv.gz` is present in the input folder
-    // For context, this file is only created by geneshot for runs which originally
-    // included a `--formula`. 
-    // If this file cannot be found, then we will run a process to create that file
-    // and also to publish that file back to the input folder.
-    abund_folder = "${params.input_folder.replaceAll(/\/$/, "")}/abund"
-    if(file("${abund_folder}/CAG.readcounts.csv.gz").isEmpty()){
-        extractCounts(
-            Channel.fromPath(
-                "${abund_folder}/details/*json.gz"
-            ).toSortedList(),
-            updateFormula.out[2]
-        )
-        input_csv = extractCounts.out
-    } else {
-        input_csv = file("${abund_folder}/CAG.readcounts.csv.gz")
-    }
-
-    // Set up a channel with the strings of the formula(s) provided
-    formula_ch = Channel.of(
-        params.formula.split(",")
-    )
-
-    splitCorncob(
-        input_csv
-    )
-
-    // Now run corncob on the extracted manifest, as well as the gene counts table
-    runCorncob(
-        splitCorncob.out.flatten(),
-        updateFormula.out[1],
-        formula_ch
-    )
-
-    joinCorncob(
-        runCorncob.out.toSortedList()
-    )
-
-    // Add those results to the output file
-    addCorncobResults(
-        updateFormula.out[0],
-        joinCorncob.out
-    )
-
-    runBetta(
-        addCorncobResults.out[1].flatten()
-    )
-
-    addBetta(
-        addCorncobResults.out[0],
-        runBetta.out.toSortedList()
-    )
-
-    // Repack the HDF
-    repackHDF(
-        addBetta.out[0]
-    )
-
-}
\ No newline at end of file
diff --git a/update_manifest.nf b/update_manifest.nf
deleted file mode 100644
index 8d60883..0000000
--- a/update_manifest.nf
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env nextflow
-
-/*
-  Geneshot: A pipeline to robustly identify which alleles (n.e.e peptide coding sequences)
-  are present in a microbial community.
-
-  This utility replaces the /manifest table in an existing geneshot results output file.
-*/
-
-// Using DSL-2
-nextflow.enable.dsl=2
-
-// Parameters
-params.input_hdf = false
-params.output_folder = false
-params.output_prefix = false
-params.manifest = false
-params.help = false
-
-// Docker containers
-container__pandas = "quay.io/fhcrc-microbiome/python-pandas:v1.0.3"
-
-// Function which prints help message text
-def helpMessage() {
-    log.info"""
-    Replace the /manifest in an existing geneshot output file.
-
-    Usage:
-
-    nextflow run Golob-Minot/geneshot/update_manifest.nf <ARGUMENTS>
-    
-    Options:
-      --manifest            Manifest (CSV) to be used in the updated geneshot output file
-      --input_hdf           Geneshot results HDF5 file to be used for analysis
-      --output_folder       Folder to place output tile
-      --output_prefix       Text used as a prefix for summary HDF5 output files (the .corncob.hdf5 suffix will be attached)
-      -w                    Working directory. Defaults to `./work`
-      
-    """.stripIndent()
-}
-
-// Show help message if the user specifies the --help flag at runtime
-if (params.help || params.input_hdf == false || params.manifest == false || params.output_folder == false || params.output_prefix == false){
-    // Invoke the function above which prints the help message
-    helpMessage()
-    // Exit out and do not run anything else
-    exit 0
-}
-
-// Import the process used to repack the output HDF5 file
-include repackHDF from './modules/general'
-
-// Process to update the formula listed in the summary table
-// Also extract the manifest to reduce the number of times
-// the file is opened
-process replaceManifest {
-    container "${container__pandas}"
-    label 'mem_medium'
-    errorStrategy 'finish'
-
-    input:
-        path "input.hdf5"
-        path manifest_csv
-
-    output:
-        path "${params.output_prefix}.results.hdf5"
-
-"""
-#!/usr/bin/env python3
-
-import pandas as pd
-import shutil
-import pickle
-pickle.HIGHEST_PROTOCOL = 4
-
-# Read in the new manifest
-new_manifest = pd.read_csv("${manifest_csv}", sep=",")
-
-# Make sure that the new manifest has the three required columns
-for k in ["specimen", "R1", "R2"]:
-    assert k in new_manifest.columns.values, "New manifest must contain a %s column" % k
-
-# Remove the read path columns, if present
-for k in ["R1", "R2", "I1", "I2"]:
-    if k in new_manifest.columns.values:
-        new_manifest = new_manifest.drop(columns=k)
-
-# Assign the new name for the output file
-new_file_name = "${params.output_prefix}.results.hdf5"
-
-# Copy the input file to a new output file object
-print("Copying input.hdf5 to %s" % new_file_name)
-shutil.copyfile("input.hdf5", new_file_name)
-
-# Open a connection to the store
-with pd.HDFStore(new_file_name, "a") as store:
-    
-    # Read in the existing manifest
-    old_manifest = pd.read_hdf(store, "/manifest")
-
-    # Make sure that the new and old manifest have the same set of specimens
-    old_specimen_set = set(old_manifest["specimen"].tolist())
-    new_specimen_set = set(new_manifest["specimen"].tolist())
-
-    for s, msg in [
-        (old_specimen_set - new_specimen_set, "Specimens missing from old specimen list"),
-        (new_specimen_set - old_specimen_set, "Specimens missing from new specimen list"),
-    ]:
-        assert len(s) == 0, "%s: %s" % (msg, ", ".join(list(s)))
-
-    # Write new manifest to the store
-    new_manifest.to_hdf(store, "/manifest")
-
-print("Done")
-"""
-
-}
-
-workflow {
-
-    // Update the manifest
-    replaceManifest(
-        file(params.input_hdf),
-        file(params.manifest)
-    )
-
-    // Repack the HDF
-    repackHDF(
-        replaceManifest.out
-    )
-
-    // Publish output files
-    publish:
-        repackHDF.out to: "${params.output_folder}", mode: "copy", overwrite: true
-
-}
\ No newline at end of file