diff --git a/checkm/VERSION b/checkm/VERSION index fd9b658..bcaad1e 100755 --- a/checkm/VERSION +++ b/checkm/VERSION @@ -1,3 +1,9 @@ +1.1.9 +- fixed support for gzip input files + +1.1.8 +- updated setup.py to include long description and fixed PyPi URL + 1.1.7 - added Jie Zhu's (alienzj) PR to allow input files to be specified via a file - added Jeremy Jacobson's (jjacobson95) PR to fix prettytable import issue diff --git a/checkm/prodigal.py b/checkm/prodigal.py index 36ac847..41ee381 100755 --- a/checkm/prodigal.py +++ b/checkm/prodigal.py @@ -25,6 +25,7 @@ import subprocess import logging import shutil +import tempfile import numpy as np @@ -54,18 +55,20 @@ def run(self, query, bNucORFs=True): prodigal_input = query - # decompress gzip input files - if prodigal_input.endswith('.gz'): - tmp_dir = tempfile.mkdtemp() - prodigal_input = os.path.join(tmp_dir, os.path.basename(prodigal_input[0:-3]) + '.fna') - writeFasta(seqs, prodigal_input) - # gather statistics about query file seqs = readFasta(prodigal_input) totalBases = 0 for seqId, seq in seqs.items(): totalBases += len(seq) + # decompress gzip input files + tmp_dir = None + if prodigal_input.endswith('.gz'): + tmp_dir = tempfile.mkdtemp() + prodigal_input = os.path.join( + tmp_dir, os.path.basename(prodigal_input[0:-3])) + writeFasta(seqs, prodigal_input) + # call ORFs with different translation tables and select the one with the highest coding density tableCodingDensity = {} for translationTable in [4, 11]: @@ -119,10 +122,13 @@ def run(self, query, bNucORFs=True): if (tableCodingDensity[4] - tableCodingDensity[11] > 0.05) and tableCodingDensity[4] > 0.7: bestTranslationTable = 4 - shutil.copyfile(self.aaGeneFile + '.' + str(bestTranslationTable), self.aaGeneFile) - shutil.copyfile(self.gffFile + '.' + str(bestTranslationTable), self.gffFile) + shutil.copyfile(self.aaGeneFile + '.' + + str(bestTranslationTable), self.aaGeneFile) + shutil.copyfile(self.gffFile + '.' + + str(bestTranslationTable), self.gffFile) if bNucORFs: - shutil.copyfile(self.ntGeneFile + '.' + str(bestTranslationTable), self.ntGeneFile) + shutil.copyfile(self.ntGeneFile + '.' + + str(bestTranslationTable), self.ntGeneFile) # clean up redundant prodigal results for translationTable in [4, 11]: @@ -131,7 +137,7 @@ def run(self, query, bNucORFs=True): if bNucORFs: os.remove(self.ntGeneFile + '.' + str(translationTable)) - if prodigal_input.endswith('.gz'): + if tmp_dir: shutil.rmtree(tmp_dir) return bestTranslationTable @@ -153,7 +159,8 @@ def checkForProdigal(self): # Assume that a successful prodigal -h returns 0 and anything # else returns something non-zero try: - subprocess.call(['prodigal', '-h'], stdout=open(os.devnull, 'w'), stderr=subprocess.STDOUT) + subprocess.call( + ['prodigal', '-h'], stdout=open(os.devnull, 'w'), stderr=subprocess.STDOUT) except: self.logger.error("Make sure prodigal is on your system path.") sys.exit(1) @@ -205,7 +212,8 @@ def __parseGFF(self, filename): lineSplit = line.split(';') for token in lineSplit: if 'transl_table' in token: - self.translationTable = int(token[token.find('=') + 1:]) + self.translationTable = int( + token[token.find('=') + 1:]) if line[0] == '#' or line.strip() == '"': # work around for Prodigal having lines with just a diff --git a/checkm/util/seqUtils.py b/checkm/util/seqUtils.py index d83ffac..8d398fa 100755 --- a/checkm/util/seqUtils.py +++ b/checkm/util/seqUtils.py @@ -138,11 +138,13 @@ def checkNuclotideSeqs(seq_files): for seq_file in seq_files: if os.stat(seq_file).st_size == 0: continue - + if not isNucleotide(seq_file): logger = logging.getLogger('timestamp') - logger.warning('Expected all files to contain sequences in nucleotide space.') - logger.warning('File %s appears to contain amino acids sequences.' % seq_file) + logger.warning( + 'Expected all files to contain sequences in nucleotide space.') + logger.warning( + 'File %s appears to contain amino acids sequences.' % seq_file) return True @@ -164,11 +166,13 @@ def checkProteinSeqs(seq_files): for seq_file in seq_files: if os.stat(seq_file).st_size == 0: continue - + if isNucleotide(seq_file): logger = logging.getLogger('timestamp') - logger.warning('Expected all files to contain sequences in amino acid space.') - logger.warning('File %s appears to contain nucleotide sequences.' % seq_file) + logger.warning( + 'Expected all files to contain sequences in amino acid space.') + logger.warning( + 'File %s appears to contain nucleotide sequences.' % seq_file) return True @@ -182,7 +186,7 @@ def readFasta(fastaFile, trimHeader=True): openFile = open seqs = {} - for line in openFile(fastaFile): + for line in openFile(fastaFile, 'rt'): # skip blank lines if not line.strip(): continue @@ -198,7 +202,8 @@ def readFasta(fastaFile, trimHeader=True): for seqId, seq in seqs.items(): seqs[seqId] = ''.join(seq) - except: + except Exception as e: + print(e) logger = logging.getLogger('timestamp') logger.error("Failed to process sequence file: {}".format(fastaFile)) sys.exit(1) diff --git a/setup.py b/setup.py index 5520995..58c68f0 100755 --- a/setup.py +++ b/setup.py @@ -3,11 +3,18 @@ import os from setuptools import setup + def version(): setupDir = os.path.dirname(os.path.realpath(__file__)) versionFile = open(os.path.join(setupDir, 'checkm', 'VERSION')) return versionFile.readline().strip() + +def readme(): + with open('README.md') as f: + return f.read() + + setup( name='checkm-genome', version=version(), @@ -17,11 +24,14 @@ def version(): scripts=['bin/checkm'], package_data={'checkm': ['VERSION', 'DATA_CONFIG']}, include_package_data=True, - url='http://pypi.python.org/pypi/checkm/', + url='http://pypi.python.org/pypi/checkm-genome/', license='GPL3', description='Assess the quality of putative genome bins.', + long_description=readme(), + long_description_content_type='text/markdown', classifiers=[ 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Science/Research', 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 'Natural Language :: English', 'Programming Language :: Python :: 3.6',