Skip to content

Commit

Permalink
Allow argument files for atram_preprocessor.py
Browse files Browse the repository at this point in the history
  • Loading branch information
rafelafrance committed Jul 12, 2019
1 parent c04c778 commit c1bf387
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 99 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -3,6 +3,7 @@
*.swp
__pycache__

args/
venv/*
venv35/*
temp*/*
Expand Down
168 changes: 83 additions & 85 deletions atram.py
Expand Up @@ -43,103 +43,101 @@ def parse_command_line():

group = parser.add_argument_group('required arguments')

group.add_argument('-b', '--blast-db', '--sra', '--db', '--database',
required=True, metavar='DB', nargs='+',
help="""This needs to match the DB prefix you
entered for atram_preprocessor.py. You may repeat
this argument to run the --query sequence(s)
against multiple blast databases.""")

group.add_argument('-q', '--query', '--target', '--probe',
required=False, nargs='+',
help="""The path to the fasta file with sequences of
interest. You may repeat this argument. If you do
then Each --query sequence file will be run
against every --blast-db.""")

group.add_argument('-Q', '--query-split', '--target-split',
required=False, nargs='+',
help="""The path to the fasta file with multiple
sequences of interest. This will take every
sequence in the fasta file and treat it as if it
were its own --query argument. So every sequence in
--query-split will be run against every --blast-db.
""")

group.add_argument('-o', '--output-prefix', required=True,
help="""This is the prefix of all of the output files.
So you can identify different blast output file
sets. You may include a directory as part of the
prefix. aTRAM will add suffixes to differentiate
output files.""")

group.add_argument('-a', '--assembler', default='none',
choices=['abyss', 'trinity', 'velvet', 'spades',
'none'],
help="""Which assembler to use. Choosing "none" (the
default) will do a single blast run and stop before
any assembly.""")

group.add_argument('-i', '--iterations', type=int, default=5, metavar='N',
help="""The number of pipeline iterations.
The default is "5".""")

group.add_argument('-p', '--protein', action='store_true',
help="""Are the query sequences protein?
aTRAM will guess if you skip this argument.""")

group.add_argument('--fraction', type=float, default=1.0,
help="""Use only the specified fraction of the aTRAM
database. The default is 1.0.""")
group.add_argument(
'-b', '--blast-db', '--sra', '--db', '--database',
required=True, metavar='DB', nargs='+',
help="""This needs to match the DB prefix you entered for
atram_preprocessor.py. You may repeat this argument to run the
--query sequence(s) against multiple blast databases.""")

group.add_argument(
'-q', '--query', '--target', '--probe', required=False, nargs='+',
help="""The path to the fasta file with sequences of interest. You may
repeat this argument. If you do then Each --query sequence file
will be run against every --blast-db.""")

group.add_argument(
'-Q', '--query-split', '--target-split', required=False, nargs='+',
help="""The path to the fasta file with multiple sequences of interest.
This will take every sequence in the fasta file and treat it as if
it were its own --query argument. So every sequence in
--query-split will be run against every --blast-db.""")

group.add_argument(
'-o', '--output-prefix', required=True,
help="""This is the prefix of all of the output files. So you can
identify different blast output file sets. You may include a
directory as part of the prefix. aTRAM will add suffixes to
differentiate output files.""")

group.add_argument(
'-a', '--assembler', default='none',
choices=['abyss', 'trinity', 'velvet', 'spades', 'none'],
help="""Which assembler to use. Choosing "none" (the default) will do
a single blast run and stop before any assembly.""")

group.add_argument(
'-i', '--iterations', type=int, default=5, metavar='N',
help="""The number of pipeline iterations. The default is "5".""")

group.add_argument(
'-p', '--protein', action='store_true',
help="""Are the query sequences protein? aTRAM will guess if you skip
this argument.""")

group.add_argument(
'--fraction', type=float, default=1.0,
help="""Use only the specified fraction of the aTRAM database. The
default is 1.0.""")

cpus = min(10, os.cpu_count() - 4 if os.cpu_count() > 4 else 1)
group.add_argument('--cpus', '--processes', '--max-processes',
type=int, default=cpus,
help="""Number of CPU processors to use. This will also
be used for the assemblers when possible. We will
use {} out of {} CPUs.""".format(
cpus, os.cpu_count()))
group.add_argument(
'--cpus', '--processes', '--max-processes', type=int, default=cpus,
help="""Number of CPU processors to use. This will also be used for
the assemblers when possible. We will use {} out of {} CPUs.
""".format(cpus, os.cpu_count()))

group.add_argument('--log-file', help="""Log file (full path)".""")

group.add_argument('--path',
help="""If the assembler or blast you want to use is not
in your $PATH then use this to prepend
directories to your path.""")
group.add_argument(
'--path',
help="""If the assembler or blast you want to use is not in your $PATH\
then use this to prepend directories to your path.""")

group.add_argument('-t', '--temp-dir', metavar='DIR',
help="""Place temporary files in this directory. All
files will be deleted after aTRAM completes. The
directory must exist.""")
group.add_argument(
'-t', '--temp-dir', metavar='DIR',
help="""Place temporary files in this directory. All files will be
deleted after aTRAM completes. The directory must exist.""")

group.add_argument('--keep-temp-dir', action='store_true',
help="""This flag will keep the temporary files in the
--temp-dir around for debugging.""")
group.add_argument(
'--keep-temp-dir', action='store_true',
help="""This flag will keep the temporary files in the --temp-dir
around for debugging.""")

group.add_argument('-T', '--timeout', metavar='SECONDS', default=300,
type=int,
help="""How many seconds to wait for an assembler before
stopping the run. To wait forever set this to 0.
The default is "300" (5 minutes).""")
group.add_argument(
'-T', '--timeout', metavar='SECONDS', default=300, type=int,
help="""How many seconds to wait for an assembler before stopping the
run. To wait forever set this to 0. The default is "300"
(5 minutes).""")

group = parser.add_argument_group(
'optional values for blast-filtering contigs')

group.add_argument('--no-filter', action='store_true',
help="""Do not filter the assembled contigs. This will:
set both the --bit-score and --contig-length
to 0""")

group.add_argument('--bit-score', type=float, default=70.0,
metavar='SCORE',
help="""Remove contigs that have a value less than this.
The default is "70.0". This is turned off by the
--no-filter argument.""")

group.add_argument('--contig-length', '--length', type=int, default=100,
help="""Remove blast hits that are shorter than this
length. The default is "100". This is turned
off by the --no-filter argument.""")
group.add_argument(
'--no-filter', action='store_true',
help="""Do not filter the assembled contigs. This will: set both the
--bit-score and --contig-length to 0""")

group.add_argument(
'--bit-score', type=float, default=70.0, metavar='SCORE',
help="""Remove contigs that have a value less than this. The default
is "70.0". This is turned off by the --no-filter argument.""")

group.add_argument(
'--contig-length', '--length', type=int, default=100,
help="""Remove blast hits that are shorter than this length. The
default is "100". This is turned off by the --no-filter argument.
""")

blast.command_line_args(parser)
assembly.command_line_args(parser)
Expand Down
3 changes: 2 additions & 1 deletion atram_preprocessor.py
Expand Up @@ -42,6 +42,7 @@ def parse_command_line():
"""

parser = argparse.ArgumentParser(
fromfile_prefix_chars='@',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent(description))

Expand Down Expand Up @@ -83,7 +84,7 @@ def parse_command_line():

blast_db = join('.', 'atram_' + date.today().isoformat())
group.add_argument(
'-b', '--blast-db', '--output', '--db', default=blast_db, metavar='DB',
'-b', '--blast-db', '--db', default=blast_db, metavar='DB',
help="""This is the prefix of all of the blast database files. So you
can identify different blast database sets. You may include a
directory as part of the prefix. The default is "{}".
Expand Down
26 changes: 13 additions & 13 deletions atram_stitcher.py
Expand Up @@ -45,7 +45,7 @@ def parse_command_line():

parser.add_argument(
'-O', '--overlap', type=int, default=10,
help="""Contigs must overlap by this many codons before it is
help="""Contigs must overlap by this many codons before it is
considered a real overlap.""")

parser.add_argument(
Expand All @@ -64,9 +64,9 @@ def parse_command_line():
"atram_stitcher_<date>.log".""")

parser.add_argument(
'-i', '--iterations', type=int, default=2, metavar='N',
help="""The number of times to run the main stitcher loop. This
must be either 1 or 2, the default is 2.""")
'-i', '--iterations', type=int, default=2, metavar='N',
help="""The number of times to run the main stitcher loop. This
must be either 1 or 2, the default is 2.""")

parser.add_argument(
'-o', '--output-prefix',
Expand All @@ -77,18 +77,18 @@ def parse_command_line():

parser.add_argument(
'-f', '--file-filter', default='*.fasta',
help="""Use this to filter files in the assemblies directory. For
example '*filtered*.fasta' will select all fasta files in the
assemblies directory with the word filtered in them. The default
is to select all fasta files in the assemblies directory
help="""Use this to filter files in the assemblies directory. For
example '*filtered*.fasta' will select all fasta files in the
assemblies directory with the word filtered in them. The default
is to select all fasta files in the assemblies directory
'*.fasta'.""")

parser.add_argument(
'--reference-name', action='store_true',
help="""Add the reference name to the final assembled gene name?
if false the gene name in the reference file with just be the
<taxon-name> if you select this then the assembled gene name
will be <reference-name>.<taxon-name>.""")
'--reference-name', action='store_true',
help="""Add the reference name to the final assembled gene name?
if false the gene name in the reference file with just be the
<taxon-name> if you select this then the assembled gene name
will be <reference-name>.<taxon-name>.""")

args = parser.parse_args()

Expand Down

0 comments on commit c1bf387

Please sign in to comment.