Skip to content

Commit 9639210

Browse files
committed
enable gapped blast hits for porins, rev guided by @alawsin
1 parent 7f8b209 commit 9639210

File tree

1 file changed

+21
-18
lines changed

1 file changed

+21
-18
lines changed

c-SSTAR

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python
22

3-
__version__ = '1.1'
3+
__version__ = '1.2'
44

55
import argparse
66
import logging
@@ -30,12 +30,16 @@ def syscall(syscmd):
3030
sys.exit('ERROR: failed syscall ' + syscmd)
3131

3232
def translateSeq(nuclSeq):
33+
'''use biopython to take in a nucleotide sequence (string) and return a protein sequence'''
3334
proteinSeq = Seq(nuclSeq, generic_dna).translate(cds=False, to_stop=False, stop_symbol='*')
3435
return proteinSeq
3536

3637
def internalSTOPcodon(candidate):
3738
'''counts number of internal stop codons; requires sequence from database to be in frame'''
38-
nucSeq = candidate[10]
39+
if '-' in candidate[10]:
40+
nucSeq = candidate[10].replace('-', '')
41+
else:
42+
nucSeq = candidate[10]
3943
if int(int(candidate[8])%3) == 1:
4044
frameStart = int(0)
4145
elif int(int(candidate[8])%3) == 0:
@@ -44,13 +48,12 @@ def internalSTOPcodon(candidate):
4448
frameStart = int(2)
4549
if frameStart > 0:
4650
nucSeq = nucSeq[frameStart:]
47-
frameEnd = int(int(candidate[9])%3)
48-
if frameEnd > 0:
49-
nucSeq = nucSeq[:-frameEnd]
5051
if len(nucSeq)%3 == 0:
5152
protein = translateSeq(nucSeq)
53+
elif len(nucSeq)%3 > 0:
54+
protein = translateSeq(nucSeq[:-(len(nucSeq)%3)])
5255
else:
53-
sys.exit('ERROR: incorrect nucleotide length (%s) after trimming' % len(nucSeq))
56+
sys.exit('ERROR: incorrect nucleotide length ({}) after trimming'.format(len(nucSeq)))
5457
numInternalSTOPcodons = len(re.findall(r'\*[ABCDEFGHIKLMNPQRSTVWYZ]', str(protein)))
5558
return (protein, str(numInternalSTOPcodons))
5659

@@ -74,22 +77,22 @@ def main(args=None):
7477
baseGenome = os.path.splitext(os.path.basename(genome))[0]
7578
if not os.path.exists(outdir):
7679
os.mkdir(outdir)
77-
logging.basicConfig(filename='%s/c-SSTAR_%s.log' % (outdir, baseGenome), format='%(asctime)s: %(levelname)s: %(message)s', datefmt='%d-%m-%Y %I:%M:%S %p', level=logging.INFO)
78-
logging.info('user: %s' % pwd.getpwuid(os.getuid()).pw_name)
79-
logging.info('release: %s' % os.uname()[3])
80-
logging.info('shell env: %s' % pwd.getpwuid(os.getuid()).pw_shell)
81-
logging.info('cwd: %s' % pwd.getpwuid(os.getuid()).pw_dir)
82-
logging.info('python version: %s' % sys.version)
80+
logging.basicConfig(filename='{}/c-SSTAR_{}.log'.format(outdir, baseGenome), format='%(asctime)s: %(levelname)s: %(message)s', datefmt='%d-%m-%Y %I:%M:%S %p', level=logging.INFO)
81+
logging.info('user: {}'.format(pwd.getpwuid(os.getuid()).pw_name))
82+
logging.info('release: {}'.format(os.uname()[3]))
83+
logging.info('shell env: {}'.format(pwd.getpwuid(os.getuid()).pw_shell))
84+
logging.info('cwd: {}'.format(pwd.getpwuid(os.getuid()).pw_dir))
85+
logging.info('python version: {}'.format(sys.version))
8386
logging.info(subprocess.check_output('command -v blastn', shell=True).rstrip())
8487
logging.info(subprocess.check_output('blastn -version | tail -n1', shell=True).rstrip())
8588
database = args.database
8689
similarity = args.similarity
87-
syscall('makeblastdb -in ' + genome + ' -out ' + os.path.join(outdir, baseGenome) + ' -dbtype nucl')
88-
syscall('blastn -task blastn -query {0} -db {out} -out {out}.blastn.tsv -evalue 1e-5 -max_target_seqs 1 -ungapped -perc_identity {1} -culling_limit 1 -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore qlen sseq"'.format(database, similarity, out=os.path.join(outdir, baseGenome)))
89-
syscall('rm ' + os.path.join(outdir, baseGenome) + '.nin')
90-
syscall('rm ' + os.path.join(outdir, baseGenome) + '.nsq')
91-
syscall('rm ' + os.path.join(outdir, baseGenome) + '.nhr')
92-
with open(os.path.join(outdir, baseGenome) + '.blastn.tsv') as infile:
90+
syscall('makeblastdb -in {} -out {} -dbtype nucl'.format(genome, os.path.join(outdir, baseGenome)))
91+
syscall('blastn -task blastn -query {0} -db {out} -out {out}.blastn.tsv -evalue 1e-5 -max_target_seqs 1 -perc_identity {1} -culling_limit 1 -outfmt "6 qseqid sseqid pident length mismatch gaps qstart qend sstart send evalue bitscore qlen sseq"'.format(database, similarity, out=os.path.join(outdir, baseGenome)))
92+
os.remove(os.path.join(outdir, baseGenome + '.nin'))
93+
os.remove(os.path.join(outdir, baseGenome + '.nsq'))
94+
os.remove(os.path.join(outdir, baseGenome + '.nhr'))
95+
with open(os.path.join(outdir, baseGenome + '.blastn.tsv')) as infile:
9396
best = ['-1','a','a','a',0,'-1','a',0,'a','a','a']
9497
currentClusterNr = '-1'
9598
topHits = []

0 commit comments

Comments
 (0)