Skip to content

Commit 1168ef3

Browse files
committed
Update scripts
1 parent 423a55d commit 1168ef3

File tree

3 files changed

+75
-13
lines changed

3 files changed

+75
-13
lines changed

sbx_cenote_taker.smk

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ VIRUS_FP = output_subdir(Cfg, "virus")
99
def get_extension_path() -> Path:
1010
return Path(__file__).parent.resolve()
1111

12+
1213
def cenote_output() -> Path:
1314
return VIRUS_FP / "cenote_taker" / "{sample}.fasta"
1415

@@ -30,8 +31,8 @@ rule cenote_taker:
3031
input:
3132
contigs=ASSEMBLY_FP / "megahit" / "{sample}_asm" / "final.contigs.fa",
3233
output:
33-
VIRUS_FP / "cenote_taker" / "{sample}" / "final.contigs.fasta",
34-
VIRUS_FP
34+
contigs=VIRUS_FP / "cenote_taker" / "{sample}" / "final.contigs.fasta",
35+
summary=VIRUS_FP
3536
/ "cenote_taker"
3637
/ "{sample}"
3738
/ "{sample}"
@@ -65,7 +66,8 @@ rule cenote_taker:
6566
echo "Contigs file exists and is not empty" >> {log}
6667
else
6768
echo "Contigs file is empty" >> {log}
68-
exit 1
69+
touch {output.contigs} {output.summary}
70+
exit 0
6971
fi
7072
7173
if [ ! -d {params.db_fp} ] || [ ! "$(ls -A {params.db_fp})" ]; then

scripts/filter_cenote_contigs.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,50 @@
11
import csv
2-
from sunbeamlib.parse import parse_fasta, write_fasta
2+
import os
3+
from typing import Generator, TextIO
34

45

5-
with open(snakemake.input.summary) as f_summary, open(
6-
snakemake.input.contigs
7-
) as f_contigs, open(snakemake.output[0], "w") as f_out:
6+
def parse_fasta(f: TextIO) -> Generator[tuple[str, str], None, None]:
7+
header_str = ""
8+
seq_str = ""
9+
for line in f.readlines():
10+
line = line.strip()
11+
if line.startswith(">"):
12+
if header_str:
13+
yield header_str, seq_str
14+
header_str = line
15+
seq_str = ""
16+
else:
17+
seq_str += line
18+
if header_str:
19+
yield header_str, seq_str
20+
21+
22+
def write_fasta(record: list[str], f: TextIO) -> None:
23+
f.write(f"{record[0]}\n")
24+
f.write(f"{record[1]}\n")
25+
26+
27+
summary = snakemake.input.summary # type: ignore
28+
contigs = snakemake.input.contigs # type: ignore
29+
output_fp = snakemake.output[0] # type: ignore
30+
include_phages = snakemake.params["include_phages"] # type: ignore
31+
32+
# Empty output if empty contigs
33+
if os.path.getsize(contigs) == 0:
34+
with open(output_fp, "w") as f_out:
35+
pass
36+
exit(0)
37+
38+
with open(summary) as f_summary, open(contigs) as f_contigs, open(
39+
output_fp, "w"
40+
) as f_out:
841
dr = csv.DictReader(f_summary, delimiter="\t")
942
cd = {}
1043
phages = ["phage", "siphoviridae", "conjugative transposon"]
1144
for line in dr:
1245
if (
1346
all([x not in line["ORGANISM_NAME"].lower() for x in phages])
14-
or snakemake.params["include_phages"]
47+
or include_phages
1548
) and int(line["NUM_HALLMARKS"]) > 0:
1649
cd[line["ORIGINAL_NAME"]] = 1
1750

scripts/filter_virus_coverage.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,36 @@
11
import csv
2-
from sunbeamlib.parse import parse_fasta, write_fasta
2+
import os
3+
from typing import Generator, TextIO
34

5+
6+
def parse_fasta(f: TextIO) -> Generator[tuple[str, str], None, None]:
7+
header_str = ""
8+
seq_str = ""
9+
for line in f.readlines():
10+
line = line.strip()
11+
if line.startswith(">"):
12+
if header_str:
13+
yield header_str, seq_str
14+
header_str = line
15+
seq_str = ""
16+
else:
17+
seq_str += line
18+
if header_str:
19+
yield header_str, seq_str
20+
21+
22+
def write_fasta(record: list[str], f: TextIO) -> None:
23+
f.write(f"{record[0]}\n")
24+
f.write(f"{record[1]}\n")
25+
26+
27+
idx = snakemake.input.idx # type: ignore
28+
fa = snakemake.input.fa # type: ignore
29+
output_fp = snakemake.output[0] # type: ignore
30+
log_fp = snakemake.log[0] # type: ignore
431
contigs = {}
532

6-
with open(snakemake.input.idx) as f_idx:
33+
with open(idx) as f_idx:
734
rd = csv.reader(f_idx, delimiter="\t", quotechar='"')
835
for row in rd:
936
if row[0] != "*":
@@ -12,11 +39,11 @@
1239
else:
1340
contigs[row[0]] = 0
1441

15-
with open(snakemake.log[0], "w") as f_log:
42+
with open(log_fp, "w") as f_log:
1643
f_log.write(f"Contigs: {contigs}")
1744

18-
with open(snakemake.input.fa) as f_fa, open(snakemake.output[0], "w") as f_out:
45+
with open(fa) as f_fa, open(output_fp, "w") as f_out:
1946
for header, seq in parse_fasta(f_fa):
2047
contig_name = header.split(" ")[0]
2148
if contigs[contig_name]:
22-
write_fasta((contig_name, seq), f_out)
49+
write_fasta([contig_name, seq], f_out)

0 commit comments

Comments
 (0)