diff --git a/.gitignore b/.gitignore index 44a50e4..769e5d4 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.swp __pycache__ +args/ venv/* venv35/* temp*/* diff --git a/atram.py b/atram.py index a7fd115..164d132 100755 --- a/atram.py +++ b/atram.py @@ -43,103 +43,101 @@ def parse_command_line(): group = parser.add_argument_group('required arguments') - group.add_argument('-b', '--blast-db', '--sra', '--db', '--database', - required=True, metavar='DB', nargs='+', - help="""This needs to match the DB prefix you - entered for atram_preprocessor.py. You may repeat - this argument to run the --query sequence(s) - against multiple blast databases.""") - - group.add_argument('-q', '--query', '--target', '--probe', - required=False, nargs='+', - help="""The path to the fasta file with sequences of - interest. You may repeat this argument. If you do - then Each --query sequence file will be run - against every --blast-db.""") - - group.add_argument('-Q', '--query-split', '--target-split', - required=False, nargs='+', - help="""The path to the fasta file with multiple - sequences of interest. This will take every - sequence in the fasta file and treat it as if it - were its own --query argument. So every sequence in - --query-split will be run against every --blast-db. - """) - - group.add_argument('-o', '--output-prefix', required=True, - help="""This is the prefix of all of the output files. - So you can identify different blast output file - sets. You may include a directory as part of the - prefix. aTRAM will add suffixes to differentiate - output files.""") - - group.add_argument('-a', '--assembler', default='none', - choices=['abyss', 'trinity', 'velvet', 'spades', - 'none'], - help="""Which assembler to use. Choosing "none" (the - default) will do a single blast run and stop before - any assembly.""") - - group.add_argument('-i', '--iterations', type=int, default=5, metavar='N', - help="""The number of pipeline iterations. - The default is "5".""") - - group.add_argument('-p', '--protein', action='store_true', - help="""Are the query sequences protein? - aTRAM will guess if you skip this argument.""") - - group.add_argument('--fraction', type=float, default=1.0, - help="""Use only the specified fraction of the aTRAM - database. The default is 1.0.""") + group.add_argument( + '-b', '--blast-db', '--sra', '--db', '--database', + required=True, metavar='DB', nargs='+', + help="""This needs to match the DB prefix you entered for + atram_preprocessor.py. You may repeat this argument to run the + --query sequence(s) against multiple blast databases.""") + + group.add_argument( + '-q', '--query', '--target', '--probe', required=False, nargs='+', + help="""The path to the fasta file with sequences of interest. You may + repeat this argument. If you do then Each --query sequence file + will be run against every --blast-db.""") + + group.add_argument( + '-Q', '--query-split', '--target-split', required=False, nargs='+', + help="""The path to the fasta file with multiple sequences of interest. + This will take every sequence in the fasta file and treat it as if + it were its own --query argument. So every sequence in + --query-split will be run against every --blast-db.""") + + group.add_argument( + '-o', '--output-prefix', required=True, + help="""This is the prefix of all of the output files. So you can + identify different blast output file sets. You may include a + directory as part of the prefix. aTRAM will add suffixes to + differentiate output files.""") + + group.add_argument( + '-a', '--assembler', default='none', + choices=['abyss', 'trinity', 'velvet', 'spades', 'none'], + help="""Which assembler to use. Choosing "none" (the default) will do + a single blast run and stop before any assembly.""") + + group.add_argument( + '-i', '--iterations', type=int, default=5, metavar='N', + help="""The number of pipeline iterations. The default is "5".""") + + group.add_argument( + '-p', '--protein', action='store_true', + help="""Are the query sequences protein? aTRAM will guess if you skip + this argument.""") + + group.add_argument( + '--fraction', type=float, default=1.0, + help="""Use only the specified fraction of the aTRAM database. The + default is 1.0.""") cpus = min(10, os.cpu_count() - 4 if os.cpu_count() > 4 else 1) - group.add_argument('--cpus', '--processes', '--max-processes', - type=int, default=cpus, - help="""Number of CPU processors to use. This will also - be used for the assemblers when possible. We will - use {} out of {} CPUs.""".format( - cpus, os.cpu_count())) + group.add_argument( + '--cpus', '--processes', '--max-processes', type=int, default=cpus, + help="""Number of CPU processors to use. This will also be used for + the assemblers when possible. We will use {} out of {} CPUs. + """.format(cpus, os.cpu_count())) group.add_argument('--log-file', help="""Log file (full path)".""") - group.add_argument('--path', - help="""If the assembler or blast you want to use is not - in your $PATH then use this to prepend - directories to your path.""") + group.add_argument( + '--path', + help="""If the assembler or blast you want to use is not in your $PATH\ + then use this to prepend directories to your path.""") - group.add_argument('-t', '--temp-dir', metavar='DIR', - help="""Place temporary files in this directory. All - files will be deleted after aTRAM completes. The - directory must exist.""") + group.add_argument( + '-t', '--temp-dir', metavar='DIR', + help="""Place temporary files in this directory. All files will be + deleted after aTRAM completes. The directory must exist.""") - group.add_argument('--keep-temp-dir', action='store_true', - help="""This flag will keep the temporary files in the - --temp-dir around for debugging.""") + group.add_argument( + '--keep-temp-dir', action='store_true', + help="""This flag will keep the temporary files in the --temp-dir + around for debugging.""") - group.add_argument('-T', '--timeout', metavar='SECONDS', default=300, - type=int, - help="""How many seconds to wait for an assembler before - stopping the run. To wait forever set this to 0. - The default is "300" (5 minutes).""") + group.add_argument( + '-T', '--timeout', metavar='SECONDS', default=300, type=int, + help="""How many seconds to wait for an assembler before stopping the + run. To wait forever set this to 0. The default is "300" + (5 minutes).""") group = parser.add_argument_group( 'optional values for blast-filtering contigs') - group.add_argument('--no-filter', action='store_true', - help="""Do not filter the assembled contigs. This will: - set both the --bit-score and --contig-length - to 0""") - - group.add_argument('--bit-score', type=float, default=70.0, - metavar='SCORE', - help="""Remove contigs that have a value less than this. - The default is "70.0". This is turned off by the - --no-filter argument.""") - - group.add_argument('--contig-length', '--length', type=int, default=100, - help="""Remove blast hits that are shorter than this - length. The default is "100". This is turned - off by the --no-filter argument.""") + group.add_argument( + '--no-filter', action='store_true', + help="""Do not filter the assembled contigs. This will: set both the + --bit-score and --contig-length to 0""") + + group.add_argument( + '--bit-score', type=float, default=70.0, metavar='SCORE', + help="""Remove contigs that have a value less than this. The default + is "70.0". This is turned off by the --no-filter argument.""") + + group.add_argument( + '--contig-length', '--length', type=int, default=100, + help="""Remove blast hits that are shorter than this length. The + default is "100". This is turned off by the --no-filter argument. + """) blast.command_line_args(parser) assembly.command_line_args(parser) diff --git a/atram_preprocessor.py b/atram_preprocessor.py index 90be950..c088c15 100755 --- a/atram_preprocessor.py +++ b/atram_preprocessor.py @@ -42,6 +42,7 @@ def parse_command_line(): """ parser = argparse.ArgumentParser( + fromfile_prefix_chars='@', formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent(description)) @@ -83,7 +84,7 @@ def parse_command_line(): blast_db = join('.', 'atram_' + date.today().isoformat()) group.add_argument( - '-b', '--blast-db', '--output', '--db', default=blast_db, metavar='DB', + '-b', '--blast-db', '--db', default=blast_db, metavar='DB', help="""This is the prefix of all of the blast database files. So you can identify different blast database sets. You may include a directory as part of the prefix. The default is "{}". diff --git a/atram_stitcher.py b/atram_stitcher.py index 1a153e0..9dc378d 100755 --- a/atram_stitcher.py +++ b/atram_stitcher.py @@ -45,7 +45,7 @@ def parse_command_line(): parser.add_argument( '-O', '--overlap', type=int, default=10, - help="""Contigs must overlap by this many codons before it is + help="""Contigs must overlap by this many codons before it is considered a real overlap.""") parser.add_argument( @@ -64,9 +64,9 @@ def parse_command_line(): "atram_stitcher_.log".""") parser.add_argument( - '-i', '--iterations', type=int, default=2, metavar='N', - help="""The number of times to run the main stitcher loop. This - must be either 1 or 2, the default is 2.""") + '-i', '--iterations', type=int, default=2, metavar='N', + help="""The number of times to run the main stitcher loop. This + must be either 1 or 2, the default is 2.""") parser.add_argument( '-o', '--output-prefix', @@ -77,18 +77,18 @@ def parse_command_line(): parser.add_argument( '-f', '--file-filter', default='*.fasta', - help="""Use this to filter files in the assemblies directory. For - example '*filtered*.fasta' will select all fasta files in the - assemblies directory with the word filtered in them. The default - is to select all fasta files in the assemblies directory + help="""Use this to filter files in the assemblies directory. For + example '*filtered*.fasta' will select all fasta files in the + assemblies directory with the word filtered in them. The default + is to select all fasta files in the assemblies directory '*.fasta'.""") parser.add_argument( - '--reference-name', action='store_true', - help="""Add the reference name to the final assembled gene name? - if false the gene name in the reference file with just be the - if you select this then the assembled gene name - will be ..""") + '--reference-name', action='store_true', + help="""Add the reference name to the final assembled gene name? + if false the gene name in the reference file with just be the + if you select this then the assembled gene name + will be ..""") args = parser.parse_args()