diff --git a/README.md b/README.md
index 87c102c..102177a 100755
--- a/README.md
+++ b/README.md
@@ -1,52 +1,42 @@
-# sbx_virus_id
+# sbx_cenote_taker
-[](https://github.com/sunbeam-labs/sbx_virus_id/actions/workflows/tests.yml)
-[](https://hub.docker.com/repository/docker/sunbeamlabs/sbx_virus_id/)
+[](https://github.com/sunbeam-labs/sbx_cenote_taker/actions/workflows/tests.yml)
+[](https://hub.docker.com/repository/docker/sunbeamlabs/sbx_cenote_taker/)
## Introduction
-sbx_virus_id is a [sunbeam](https://github.com/sunbeam-labs/sunbeam) extension for identifying viruses in samples. This pipeline uses [MEGAHIT](https://github.com/voutcn/megahit) or [SPAdes](https://github.com/ablab/spades) for assembly of contigs and [Cenote-Taker2](https://github.com/mtisza1/Cenote-Taker2) or [Virsorter2](https://github.com/jiarong/VirSorter2) for viral identification.
+sbx_cenote_taker is a [sunbeam](https://github.com/sunbeam-labs/sunbeam) extension for identifying viruses in samples with [Cenote-Taker3](https://github.com/jedvachey/Cenote-Taker3). This pipeline uses [MEGAHIT](https://github.com/voutcn/megahit) for assembly of contigs and then processes assemblies with Cenote-Taker3.
-N.B. If using Megahit for assembly, this extension requires also having sbx_assembly installed.
+N.B. This extension requires also having sbx_assembly installed.
### Installation
```
-sunbeam extend https://github.com/sunbeam-labs/sbx_virus_id.git
+sunbeam extend https://github.com/sunbeam-labs/sbx_assembly.git
+sunbeam extend https://github.com/sunbeam-labs/sbx_cenote_taker.git
```
-# Installing blast dbs
+### Cenote-Taker database
-Install blast db:
+sbx_cenote_taker expects the Cenote-Taker3 reference database to be available locally. Download the database following the official instructions, for example:
```
-conda create -n blast
-conda activate blast
-conda install -c bioconda blast
-mkdir refseq_select_prot/
-cd refseq_select_prot/
-perl `which update_blastdb.pl` --decompress refseq_select_prot
+conda activate cenote-taker
+get_ct3_dbs -o /path/to/ct3_db --hmm T --hallmark_tax T --refseq_tax T --mmseqs_cdd T --domain_list T --hhCDD T --hhPFAM T --hhPDB T
```
-Install viral blast db:
+Update the `cenote_taker_db` entry in your Sunbeam configuration to point at the resulting directory.
-```
-conda stuff from above ^^^
-mkdir viral_prot/ && cd viral_prot/
-wget https://ftp.ncbi.nlm.nih.gov/refseq/release/viral/viral.1.protein.faa.gz && gzip -d viral.1.protein.faa.gz
-makeblastdb -in viral.1.protein.faa -parse_seqids -title "viral" -dbtype prot
-```
+### Running
-## Running
-
-Run with sunbeam on the target `all_virus_id`,
+Run with sunbeam on the target `all_cenote_taker`:
```
-sunbeam run --profile /path/to/project/ all_virus_id
+sunbeam run --profile /path/to/project/ all_cenote_taker
```
### Options for config.yml
@@ -54,16 +44,5 @@ sunbeam run --profile /path/to/project/ all_virus_id
- blast_db: path to blast db (default: "") (NOTE: this should be the database file not just the directory it's in)
- blastx_threads: number of threads for running blastx (default: 4)
- bowtie2_build_threads: number of threads for running bowtie2-build (default: 4)
- - cenote_taker2_db: path to cenote-taker2 db (default: "") (NOTE: this should be a directory)
- - virsorter_db: path to virsorter2 db (default: "") (NOTE: this should be a directory)
+ - cenote_taker_db: path to cenote-taker3 db (default: "") (NOTE: this should be a directory)
- include_phages: Whether to include phages in the output (default: False)
- - use_spades: Whether to use SPAdes instead of MEGAHIT (default: False)
- - use_virsorter: Whether to use Virsorter2 instead of Cenote-Taker2 (default: False)
-
-## Legacy Installation
-
-```
-git clone https://github.com/sunbeam-labs/sbx_virus_id.git extensions/sbx_virus_id
-cd extensions/sbx_virus_id
-cat config.yml >> /path/to/sunbeam_config.yml
-```
diff --git a/VERSION b/VERSION
index 8a9ecc2..6c6aa7c 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.0.1
\ No newline at end of file
+0.1.0
\ No newline at end of file
diff --git a/config.yml b/config.yml
index 193b1b9..4055dfd 100755
--- a/config.yml
+++ b/config.yml
@@ -1,10 +1,6 @@
-sbx_virus_id:
+sbx_cenote_taker:
blast_db: ''
blastx_threads: 4
bowtie2_build_threads: 4
cenote_taker_db: ''
- cenote_taker_extra_dbs: True # Download hhsuite databases (True, ~70GB) or only basic (False, ~3GB)
- virsorter_db: ''
include_phages: False
- use_spades: False # Default: Megahit
- use_virsorter: False # Default: Cenote-Taker2
\ No newline at end of file
diff --git a/envs/cenote_taker_env.Dockerfile b/envs/cenote_taker_env.Dockerfile
index 4154ef4..f6d050c 100644
--- a/envs/cenote_taker_env.Dockerfile
+++ b/envs/cenote_taker_env.Dockerfile
@@ -1,17 +1,17 @@
-FROM condaforge/mambaforge:latest
-
-# Setup
-WORKDIR /home/sbx_virus_id_env
-
-COPY envs/cenote_taker_env.yml ./
-
-# Install environment
-RUN conda env create --file cenote_taker_env.yml --name cenote_taker
-
-ENV PATH="/opt/conda/envs/cenote_taker/bin/:${PATH}"
-
-# "Activate" the environment
-SHELL ["conda", "run", "-n", "cenote_taker", "/bin/bash", "-c"]
-
-# Run
-CMD "bash"
\ No newline at end of file
+FROM condaforge/mambaforge:latest
+
+# Setup
+WORKDIR /home/sbx_cenote_taker_env
+
+COPY envs/cenote_taker_env.yml ./
+
+# Install environment
+RUN conda env create --file cenote_taker_env.yml --name cenote_taker
+
+ENV PATH="/opt/conda/envs/cenote_taker/bin/:${PATH}"
+
+# "Activate" the environment
+SHELL ["conda", "run", "-n", "cenote_taker", "/bin/bash", "-c"]
+
+# Run
+CMD "bash"
diff --git a/envs/sbx_cenote_taker.Dockerfile b/envs/sbx_cenote_taker.Dockerfile
new file mode 100644
index 0000000..82f90fc
--- /dev/null
+++ b/envs/sbx_cenote_taker.Dockerfile
@@ -0,0 +1,17 @@
+FROM condaforge/mambaforge:latest
+
+# Setup
+WORKDIR /home/sbx_cenote_taker_env
+
+COPY envs/sbx_cenote_taker.yml ./
+
+# Install environment
+RUN conda env create --file sbx_cenote_taker.yml --name sbx_cenote_taker
+
+ENV PATH="/opt/conda/envs/sbx_cenote_taker/bin/:${PATH}"
+
+# "Activate" the environment
+SHELL ["conda", "run", "-n", "sbx_cenote_taker", "/bin/bash", "-c"]
+
+# Run
+CMD "bash"
diff --git a/envs/sbx_virus_id.linux-64.pin.txt b/envs/sbx_cenote_taker.linux-64.pin.txt
similarity index 100%
rename from envs/sbx_virus_id.linux-64.pin.txt
rename to envs/sbx_cenote_taker.linux-64.pin.txt
diff --git a/envs/sbx_virus_id.yml b/envs/sbx_cenote_taker.yml
similarity index 71%
rename from envs/sbx_virus_id.yml
rename to envs/sbx_cenote_taker.yml
index 57050ac..bffc5c0 100755
--- a/envs/sbx_virus_id.yml
+++ b/envs/sbx_cenote_taker.yml
@@ -1,4 +1,4 @@
-name: sbx_virus_id
+name: sbx_cenote_taker
channels:
- conda-forge
- bioconda
@@ -7,4 +7,4 @@ dependencies:
- blast
- bowtie2
- samtools
- - python>=3.10
\ No newline at end of file
+ - python>=3.10
diff --git a/envs/sbx_virus_id.Dockerfile b/envs/sbx_virus_id.Dockerfile
deleted file mode 100644
index 06c4672..0000000
--- a/envs/sbx_virus_id.Dockerfile
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM condaforge/mambaforge:latest
-
-# Setup
-WORKDIR /home/sbx_virus_id_env
-
-COPY envs/sbx_virus_id.yml ./
-
-# Install environment
-RUN conda env create --file sbx_virus_id.yml --name sbx_virus_id
-
-ENV PATH="/opt/conda/envs/sbx_virus_id/bin/:${PATH}"
-
-# "Activate" the environment
-SHELL ["conda", "run", "-n", "sbx_virus_id", "/bin/bash", "-c"]
-
-# Run
-CMD "bash"
\ No newline at end of file
diff --git a/envs/spades_env.Dockerfile b/envs/spades_env.Dockerfile
deleted file mode 100644
index 44fb8f0..0000000
--- a/envs/spades_env.Dockerfile
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM condaforge/mambaforge:latest
-
-# Setup
-WORKDIR /home/sbx_virus_id_env
-
-COPY envs/spades_env.yml ./
-
-# Install environment
-RUN conda env create --file spades_env.yml --name spades
-
-ENV PATH="/opt/conda/envs/spades/bin/:${PATH}"
-
-# "Activate" the environment
-SHELL ["conda", "run", "-n", "spades", "/bin/bash", "-c"]
-
-# Run
-CMD "bash"
\ No newline at end of file
diff --git a/envs/spades_env.linux-64.pin.txt b/envs/spades_env.linux-64.pin.txt
deleted file mode 100644
index 79dc603..0000000
--- a/envs/spades_env.linux-64.pin.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name --file
-# platform: linux-64
-@EXPLICIT
-https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
-https://conda.anaconda.org/conda-forge/noarch/_sysroot_linux-64_curr_repodata_hack-3-h69a702a_13.conda#f6ce7955b53ae1ca83144adb3be9c600
-https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.7.22-hbcca054_0.conda#a73ecd2988327ad4c8f2c331482917f2
-https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3
-https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_2.conda#9172c297304f2a20134fc56c97fbe229
-https://conda.anaconda.org/conda-forge/noarch/tzdata-2023c-h71feb2d_0.conda#939e3e74d8be4dac89ce83b20de2492a
-https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-3.10.0-h4a8ded7_13.tar.bz2#523bc836a954faf0cca94831971bb85a
-https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_2.conda#e2042154faafe61969556f28bade94b9
-https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d
-https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.17-h4a8ded7_13.tar.bz2#57e5a5191ffe999b9f4dfdbcd0ddcba4
-https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_2.conda#c28003b0be0494f9a7664389146716ff
-https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54
-https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd
-https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
-https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7
-https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
-https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad
-https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-8.0.1-hc9558a2_0.tar.bz2#67590caab043d6d7ffc371f9cced7848
-https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179
-https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.3-hd590300_0.conda#7bb88ce04c8deb9f7d763ae04a1da72f
-https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
-https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.43.2-h2797004_0.conda#4b441a1ee22397d5a27dc1126b849edd
-https://conda.anaconda.org/conda-forge/linux-64/openmp-8.0.1-0.tar.bz2#b35241079152e5cc891c99368395b2c6
-https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
-https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-h2797004_0.conda#513336054f884f95d9fd925748f41ef3
-https://conda.anaconda.org/conda-forge/linux-64/python-3.12.0-hab00c5b_0_cpython.conda#7f97faab5bebcc2580f4f299285323da
-https://conda.anaconda.org/conda-forge/noarch/setuptools-68.2.2-pyhd8ed1ab_0.conda#fc2166155db840c634a1291a5c35a709
-https://conda.anaconda.org/bioconda/linux-64/spades-3.15.5-h95f258a_1.tar.bz2#62ab35497479905dce6860525262104f
-https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.2-pyhd8ed1ab_0.conda#1ccd092478b3e0ee10d7a891adbf8a4f
-https://conda.anaconda.org/conda-forge/noarch/pip-23.3-pyhd8ed1ab_0.conda#a06f102f59c8e3bb8b3e46e71c384709
diff --git a/envs/spades_env.yml b/envs/spades_env.yml
deleted file mode 100755
index 2d7a018..0000000
--- a/envs/spades_env.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-channels:
- - bioconda
-dependencies:
- - spades
-name: spades
\ No newline at end of file
diff --git a/envs/virsorter_env.Dockerfile b/envs/virsorter_env.Dockerfile
deleted file mode 100644
index 8c08f56..0000000
--- a/envs/virsorter_env.Dockerfile
+++ /dev/null
@@ -1,17 +0,0 @@
-FROM condaforge/mambaforge:latest
-
-# Setup
-WORKDIR /home/sbx_virus_id_env
-
-COPY envs/virsorter_env.yml ./
-
-# Install environment
-RUN conda env create --file virsorter_env.yml --name virsorter
-
-ENV PATH="/opt/conda/envs/virsorter/bin/:${PATH}"
-
-# "Activate" the environment
-SHELL ["conda", "run", "-n", "virsorter", "/bin/bash", "-c"]
-
-# Run
-CMD "bash"
\ No newline at end of file
diff --git a/envs/virsorter_env.linux-64.pin.txt b/envs/virsorter_env.linux-64.pin.txt
deleted file mode 100644
index 6ad0f8c..0000000
--- a/envs/virsorter_env.linux-64.pin.txt
+++ /dev/null
@@ -1,145 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name --file
-# platform: linux-64
-@EXPLICIT
-https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
-https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.7.22-hbcca054_0.conda#a73ecd2988327ad4c8f2c331482917f2
-https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3
-https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_2.conda#9172c297304f2a20134fc56c97fbe229
-https://conda.anaconda.org/conda-forge/noarch/pybind11-abi-4-hd8ed1ab_3.tar.bz2#878f923dd6acc8aeb47a75da6c4098be
-https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.10-4_cp310.conda#26322ec5d7712c3ded99dd656142b8ce
-https://conda.anaconda.org/conda-forge/noarch/tzdata-2023c-h71feb2d_0.conda#939e3e74d8be4dac89ce83b20de2492a
-https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_2.conda#e2042154faafe61969556f28bade94b9
-https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d
-https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_2.conda#c28003b0be0494f9a7664389146716ff
-https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54
-https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.20.1-hd590300_0.conda#6642e4faa4804be3a0e7edfefbd16595
-https://conda.anaconda.org/conda-forge/linux-64/fmt-10.1.1-h00ab1b0_0.conda#5c875bdc09118cd3fc2edd39842e4c35
-https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37
-https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff
-https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3
-https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-h516909a_1.tar.bz2#6f8720dff19e17ce5d48cfe7f3d2f0a3
-https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd
-https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
-https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-ha4646dd_2.conda#78fdab09d9138851dde2b5fe2a11019e
-https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d
-https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7
-https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
-https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad
-https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0
-https://conda.anaconda.org/conda-forge/linux-64/lzo-2.10-h516909a_1000.tar.bz2#bb14fcb13341b81d5eb386423b9d2bac
-https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179
-https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.3-hd590300_0.conda#7bb88ce04c8deb9f7d763ae04a1da72f
-https://conda.anaconda.org/conda-forge/linux-64/reproc-14.2.4.post0-hd590300_0.conda#9f067e96da541ba572d160704984208f
-https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
-https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2#4cb3ad778ec2d5a7acbdf254eb1c42ae
-https://conda.anaconda.org/conda-forge/linux-64/yaml-cpp-0.8.0-h59595ed_0.conda#965eaacd7c18eb8361fd12bb9e7a57d7
-https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1
-https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_2.conda#e75a75a6eaf6f318dae2631158c46575
-https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.52.0-h61bc06f_0.conda#613955a50485812985c059e7b269f42e
-https://conda.anaconda.org/conda-forge/linux-64/libsolv-0.7.25-hfc55251_0.conda#17e88b01ca0601d5fd55bb72a9e352d9
-https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.43.2-h2797004_0.conda#4b441a1ee22397d5a27dc1126b849edd
-https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe
-https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.5-h232c23b_1.conda#f3858448893839820d4bcfb14ad3ecdf
-https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b
-https://conda.anaconda.org/conda-forge/linux-64/perl-5.32.1-4_hd590300_perl5.conda#3e785bff761095eb7f8676f4694bd1b1
-https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
-https://conda.anaconda.org/conda-forge/linux-64/reproc-cpp-14.2.4.post0-h59595ed_0.conda#33402b9a26cdc1a5a8eeecbe5ce6f486
-https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-h2797004_0.conda#513336054f884f95d9fd925748f41ef3
-https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589
-https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844
-https://conda.anaconda.org/conda-forge/linux-64/libarchive-3.7.2-h039dbb9_0.conda#611d6c83d1130ea60c916531adfb11db
-https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.24-pthreads_h413a1c8_0.conda#6e4ef6ca28655124dcde9bd500e44c32
-https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.43.2-h2c6b66d_0.conda#c37b95bcd6c6833dacfd5df0ae2f4303
-https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-19_linux64_openblas.conda#420f4e9be59d0dc9133a0f43f7bab3f3
-https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.4.0-hca28451_0.conda#1158ac1d2613b28685644931f11ee807
-https://conda.anaconda.org/conda-forge/linux-64/python-3.10.0-h543edf9_3_cpython.tar.bz2#67cdff58413ce9f034fb971188060313
-https://conda.anaconda.org/conda-forge/noarch/appdirs-1.4.4-pyh9f0ad1d_0.tar.bz2#5f095bc6454094e96f146491fd03633b
-https://conda.anaconda.org/conda-forge/noarch/attrs-23.1.0-pyh71513ae_1.conda#3edfead7cedd1ab4400a6c588f3e75f8
-https://conda.anaconda.org/conda-forge/noarch/boltons-23.0.0-pyhd8ed1ab_0.conda#033eb25fffd222aceeca6d58cd953680
-https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py310hc6cd4ac_1.conda#1f95722c94f00b69af69a066c7433714
-https://conda.anaconda.org/conda-forge/noarch/certifi-2023.7.22-pyhd8ed1ab_0.conda#7f3dbc9179b4dde7da98dfb151d0ad22
-https://conda.anaconda.org/conda-forge/linux-64/chardet-5.2.0-py310hff52083_1.conda#a677136a83b823803d2f92045f885be2
-https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.0-pyhd8ed1ab_0.conda#fef8ef5f0a54546b9efee39468229917
-https://conda.anaconda.org/conda-forge/noarch/click-8.1.7-unix_pyh707e725_0.conda#f3ad426304898027fc619827ff428eca
-https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
-https://conda.anaconda.org/conda-forge/noarch/configargparse-1.7-pyhd8ed1ab_0.conda#0d07dc29b1c1cc973f76b74beb44915f
-https://conda.anaconda.org/conda-forge/linux-64/curl-8.4.0-hca28451_0.conda#2bcf7689cae931dd35d9a45626f49fce
-https://conda.anaconda.org/conda-forge/linux-64/datrie-0.8.2-py310h2372a71_7.conda#e0a66d90c57bdf40cf659a4b0357a795
-https://conda.anaconda.org/conda-forge/linux-64/docutils-0.20.1-py310hff52083_2.conda#ac157d9b464d15fac78b13fcabc0f845
-https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed
-https://conda.anaconda.org/conda-forge/linux-64/jsonpointer-2.4-py310hff52083_3.conda#08ec1463dbc5c806a32fc431874032ca
-https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-19_linux64_openblas.conda#d12374af44575413fbbd4a217d46ea33
-https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-19_linux64_openblas.conda#9f100edf65436e3eabc2a51fc00b2c37
-https://conda.anaconda.org/conda-forge/linux-64/libmamba-1.5.1-had39da4_2.conda#c915e6866cde479b486f1f1fc1ca325b
-https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.3-py310h2372a71_1.conda#b74e07a054c479e45a83a83fc5be713c
-https://conda.anaconda.org/conda-forge/noarch/mdurl-0.1.0-pyhd8ed1ab_0.tar.bz2#f8dab71fdc13b1bf29a01248b156d268
-https://conda.anaconda.org/conda-forge/noarch/packaging-23.2-pyhd8ed1ab_0.conda#79002079284aa895f883c6b7f3f88fd6
-https://conda.anaconda.org/conda-forge/noarch/pkgutil-resolve-name-1.3.10-pyhd8ed1ab_1.conda#405678b942f2481cecdb3e010f4925d9
-https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d
-https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.5-py310h2372a71_1.conda#cb25177acf28cc35cfa6c1ac1c679e22
-https://conda.anaconda.org/conda-forge/linux-64/pycosat-0.6.6-py310h2372a71_0.conda#0adaac9a86d59adae2bc86b3cdef2df1
-https://conda.anaconda.org/conda-forge/noarch/pycparser-2.21-pyhd8ed1ab_0.tar.bz2#076becd9e05608f8dc72757d5f3a91ff
-https://conda.anaconda.org/conda-forge/noarch/pygments-2.16.1-pyhd8ed1ab_0.conda#40e5cb18165466773619e5c963f00a7b
-https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb
-https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025
-https://conda.anaconda.org/conda-forge/noarch/python-fastjsonschema-2.18.1-pyhd8ed1ab_0.conda#305141cff54af2f90e089d868fffce28
-https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.1-py310h2372a71_1.conda#bb010e368de4940771368bc3dc4c63e7
-https://conda.anaconda.org/conda-forge/noarch/ratelimiter-1.2.0-pyhd8ed1ab_1003.tar.bz2#432d4fa75ebc28bf4b337eeff0606cf4
-https://conda.anaconda.org/conda-forge/linux-64/rpds-py-0.10.6-py310hcb5633a_0.conda#43c12d8f7891a87378eb5339c49ef051
-https://conda.anaconda.org/conda-forge/linux-64/ruamel.yaml.clib-0.2.7-py310h2372a71_2.conda#7c9da9721ee545d57ad759f020172853
-https://conda.anaconda.org/conda-forge/noarch/setuptools-68.2.2-pyhd8ed1ab_0.conda#fc2166155db840c634a1291a5c35a709
-https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
-https://conda.anaconda.org/conda-forge/noarch/smmap-3.0.5-pyh44b312d_0.tar.bz2#3a8dc70789709aa315325d5df06fb7e4
-https://conda.anaconda.org/conda-forge/noarch/text-unidecode-1.3-pyhd8ed1ab_1.conda#ba8aba332d8868897ce44ad74015a7fe
-https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36
-https://conda.anaconda.org/conda-forge/noarch/toposort-1.10-pyhd8ed1ab_0.conda#aeef653e20028f19a3c2cc70e166b509
-https://conda.anaconda.org/conda-forge/noarch/traitlets-5.11.2-pyhd8ed1ab_0.conda#bd3f90f7551e1cffb1f402880eb2cef1
-https://conda.anaconda.org/conda-forge/noarch/types-python-dateutil-2.8.19.14-pyhd8ed1ab_0.conda#4df15c51a543e806d439490b862be1c6
-https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.8.0-pyha770c72_0.conda#5b1be40a26d10a06f6d4f1f9e19fa0c7
-https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.2-pyhd8ed1ab_0.conda#1ccd092478b3e0ee10d7a891adbf8a4f
-https://conda.anaconda.org/conda-forge/linux-64/wrapt-1.15.0-py310h2372a71_1.conda#43e5d746d736ae6c71060ed923179d6d
-https://conda.anaconda.org/conda-forge/noarch/zipp-3.17.0-pyhd8ed1ab_0.conda#2e4d6bc0b14e10f895fc6791a7d9b26a
-https://conda.anaconda.org/conda-forge/noarch/amply-0.1.6-pyhd8ed1ab_0.conda#a45a9cc65a438f465845ebff49c6fbf9
-https://conda.anaconda.org/conda-forge/noarch/binaryornot-0.4.4-py_1.tar.bz2#a556fa60840fcb9dd739d186bfd252f7
-https://conda.anaconda.org/conda-forge/linux-64/cffi-1.16.0-py310h2fee648_0.conda#45846a970e71ac98fd327da5d40a0a2c
-https://conda.anaconda.org/conda-forge/linux-64/git-2.42.0-pl5321h86e50cf_0.conda#96ad24c67e0056d171385859c43218a2
-https://conda.anaconda.org/conda-forge/noarch/gitdb-4.0.10-pyhd8ed1ab_0.conda#3706d2f3d7cb5dae600c833345a76132
-https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.1.0-pyhd8ed1ab_0.conda#48b0d98e0c0ec810d3ccc2a0926c8c0e
-https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.2-pyhd8ed1ab_1.tar.bz2#c8490ed5c70966d232fdd389d0dbed37
-https://conda.anaconda.org/conda-forge/noarch/jsonpatch-1.33-pyhd8ed1ab_0.conda#bfdb7c5c6ad1077c82a69a8642c87aff
-https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-19_linux64_openblas.conda#685e99d3214f5ac9d1ec6b37983985a6
-https://conda.anaconda.org/conda-forge/linux-64/libmambapy-1.5.1-py310h39ff949_2.conda#d1fb9113b71380d7f3eca059ac6cd14b
-https://conda.anaconda.org/conda-forge/noarch/markdown-it-py-3.0.0-pyhd8ed1ab_0.conda#93a8e71256479c62074356ef6ebf501b
-https://conda.anaconda.org/conda-forge/noarch/pip-23.3-pyhd8ed1ab_0.conda#a06f102f59c8e3bb8b3e46e71c384709
-https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
-https://conda.anaconda.org/conda-forge/noarch/python-slugify-8.0.1-pyhd8ed1ab_2.conda#519897ff446e0dc056e12402e6785cd5
-https://conda.anaconda.org/conda-forge/noarch/referencing-0.30.2-pyhd8ed1ab_0.conda#a33161b983172ba6ef69d5fc850650cd
-https://conda.anaconda.org/conda-forge/linux-64/ruamel.yaml-0.16.12-py310h5764c6d_3.tar.bz2#bbf52d5fb8daa88be7517abd50722942
-https://conda.anaconda.org/conda-forge/noarch/tqdm-4.66.1-pyhd8ed1ab_0.conda#03c97908b976498dcae97eb4e4f3149c
-https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.8.0-hd8ed1ab_0.conda#384462e63262a527bda564fa2d9126c0
-https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.6-pyhd8ed1ab_0.conda#d5f8944ff9ab24a292511c83dce33dea
-https://conda.anaconda.org/conda-forge/noarch/arrow-1.3.0-pyhd8ed1ab_0.conda#b77d8c2313158e6e461ca0efb1c2c508
-https://conda.anaconda.org/conda-forge/linux-64/coin-or-utils-2.11.9-hee58242_0.conda#0f5e6a4d88aac3fa5fcd2fa929862711
-https://conda.anaconda.org/conda-forge/linux-64/conda-package-handling-1.9.0-py310h5764c6d_1.tar.bz2#1b23ed7479259e9bb83bc4cf4b964e88
-https://conda.anaconda.org/conda-forge/linux-64/cryptography-41.0.4-py310h75e40e8_0.conda#ad06c4db71ba0b6d153c66de88a41fdc
-https://conda.anaconda.org/conda-forge/noarch/gitpython-3.1.37-pyhd8ed1ab_0.conda#8b94c329190fa6814f412adf2ab0f0a2
-https://conda.anaconda.org/conda-forge/noarch/jsonschema-specifications-2023.7.1-pyhd8ed1ab_0.conda#7c27ea1bdbe520bb830dcadd59f55cbf
-https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.11.0-pyhd8ed1ab_0.conda#8f567c0a74aa44cf732f15773b4083b0
-https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b
-https://conda.anaconda.org/conda-forge/noarch/rich-13.6.0-pyhd8ed1ab_0.conda#3ca4829f40710f581ca1d76bc907e99f
-https://conda.anaconda.org/conda-forge/linux-64/coin-or-osi-0.108.8-ha2443b9_0.conda#7e4adb609b8bf87746d6fab1062348c8
-https://conda.anaconda.org/conda-forge/noarch/cookiecutter-2.4.0-pyhca7485f_0.conda#d51520e0dc4e4d6ef149c41be36541e8
-https://conda.anaconda.org/conda-forge/noarch/jsonschema-4.19.1-pyhd8ed1ab_0.conda#78aff5d2af74e6537c1ca73017f01f4f
-https://conda.anaconda.org/conda-forge/linux-64/jupyter_core-5.4.0-py310hff52083_0.conda#28cdf08d2d44db099a95a176f01f7120
-https://conda.anaconda.org/conda-forge/noarch/pyopenssl-23.2.0-pyhd8ed1ab_1.conda#34f7d568bf59d18e3fef8c405cbece21
-https://conda.anaconda.org/conda-forge/linux-64/coin-or-clp-1.17.8-h1ee7a9c_0.conda#a2d4935dc3955aa906c17136039db06c
-https://conda.anaconda.org/conda-forge/linux-64/conda-23.7.4-py310hff52083_0.conda#27638d3af384a315d5f4ca8f056ac23b
-https://conda.anaconda.org/conda-forge/noarch/nbformat-5.9.2-pyhd8ed1ab_0.conda#61ba076de6530d9301a0053b02f093d2
-https://conda.anaconda.org/conda-forge/linux-64/coin-or-cgl-0.60.7-h516709c_0.conda#9170a2b48868d5a340d7076979a775f8
-https://conda.anaconda.org/conda-forge/linux-64/mamba-1.5.1-py310h51d5547_2.conda#974872f0c75edb645d51c619ea155807
-https://conda.anaconda.org/conda-forge/linux-64/coin-or-cbc-2.10.10-h9002f0b_0.conda#f004ee86906bc133df1775e4b3be00ce
-https://conda.anaconda.org/conda-forge/noarch/coincbc-2.10.10-0_metapackage.conda#f1170e44b26962b3e8adade2dd0f3902
-https://conda.anaconda.org/conda-forge/linux-64/pulp-2.7.0-py310hff52083_1.conda#8382eec14f1c1429401549d23694b998
-https://conda.anaconda.org/bioconda/noarch/snakemake-minimal-5.26.0-py_0.tar.bz2#6ddc923aada5bbc448c9b3f14e05ee6a
-https://conda.anaconda.org/bioconda/noarch/virsorter-2.2.4-pyhdfd78af_1.tar.bz2#b2ae5573e2d42548a165dbaa64d52890
diff --git a/envs/virsorter_env.yml b/envs/virsorter_env.yml
deleted file mode 100755
index 0c06f18..0000000
--- a/envs/virsorter_env.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-channels:
- - conda-forge
- - bioconda
-dependencies:
- - virsorter =2
-name: virsorter
\ No newline at end of file
diff --git a/sbx_cenote_taker.smk b/sbx_cenote_taker.smk
new file mode 100755
index 0000000..fa63ecd
--- /dev/null
+++ b/sbx_cenote_taker.smk
@@ -0,0 +1,305 @@
+try:
+ SBX_CENOTE_TAKER_VERSION = get_ext_version("sbx_cenote_taker")
+except (NameError, ValueError):
+ # For backwards compatibility with older versions of Sunbeam
+ SBX_CENOTE_TAKER_VERSION = "0.0.0"
+VIRUS_FP = output_subdir(Cfg, "virus")
+
+
+def get_extension_path() -> Path:
+ return Path(__file__).parent.resolve()
+
+
+def cenote_output() -> Path:
+ return VIRUS_FP / "cenote_taker" / "{sample}.fasta"
+
+
+rule all_cenote_taker:
+ input:
+ expand(
+ VIRUS_FP / "alignments" / "{sample}.gene_coverage.tsv",
+ sample=Samples.keys(),
+ ),
+ expand(
+ VIRUS_FP / "blastx" / "{sample}.btf",
+ sample=Samples.keys(),
+ ),
+ VIRUS_FP / "summary" / "all_align_summary.txt",
+
+
+rule cenote_taker:
+ input:
+ contigs=ASSEMBLY_FP / "megahit" / "{sample}_asm" / "final.contigs.fa",
+ output:
+ contigs=VIRUS_FP / "cenote_taker" / "{sample}" / "final.contigs.fasta",
+ summary=VIRUS_FP
+ / "cenote_taker"
+ / "{sample}"
+ / "{sample}"
+ / "{sample}_CONTIG_SUMMARY.tsv",
+ benchmark:
+ BENCHMARK_FP / "cenote_taker_{sample}.tsv"
+ log:
+ LOG_FP / "cenote_taker_{sample}.log",
+ params:
+ out_dir=str(VIRUS_FP / "cenote_taker"),
+ sample="{sample}",
+ db_fp=Cfg["sbx_cenote_taker"]["cenote_taker_db"],
+ resources:
+ mem_mb=24000,
+ runtime=720,
+ conda:
+ "envs/cenote_taker_env.yml"
+ container:
+ f"docker://sunbeamlabs/sbx_cenote_taker:{SBX_CENOTE_TAKER_VERSION}-cenote-taker"
+ shell:
+ """
+ SAMPLE={params.sample}
+ if [[ ${{#SAMPLE}} -lt 18 ]] && [[ {params.sample} =~ ^[a-zA-Z0-9_]+$ ]]; then
+ echo "Sample name format is valid" >> {log}
+ else
+ echo "Cenote-Taker requires a sample name that is less than 18 characters and contains only alphanumeric characters and underscores" >> {log}
+ exit 1
+ fi
+
+ if [ -s {input.contigs} ]; then
+ echo "Contigs file exists and is not empty" >> {log}
+ else
+ echo "Contigs file is empty" >> {log}
+ touch {output.contigs} {output.summary}
+ exit 0
+ fi
+
+ if [ ! -d {params.db_fp} ] || [ ! "$(ls -A {params.db_fp})" ]; then
+ echo "Cenote-Taker database path {params.db_fp} is missing or empty" >> {log}
+ exit 1
+ fi
+
+ cd {params.out_dir}
+ cenotetaker3 --contigs {input.contigs} -r {params.sample} -p T >> {log} 2>&1
+ """
+
+
+rule filter_cenote_contigs:
+ input:
+ contigs=VIRUS_FP / "cenote_taker" / "{sample}" / "final.contigs.fasta",
+ summary=VIRUS_FP
+ / "cenote_taker"
+ / "{sample}"
+ / "{sample}"
+ / "{sample}_CONTIG_SUMMARY.tsv",
+ output:
+ VIRUS_FP / "cenote_taker" / "{sample}.fasta",
+ params:
+ include_phages=Cfg["sbx_cenote_taker"]["include_phages"],
+ script:
+ "scripts/filter_cenote_contigs.py"
+
+
+rule build_virus_index:
+ input:
+ cenote_output(),
+ output:
+ str(cenote_output()) + ".1.bt2", # Don't use f-string, broken with python 3.12
+ conda:
+ "envs/sbx_cenote_taker.yml"
+ container:
+ f"docker://sunbeamlabs/sbx_cenote_taker:{SBX_CENOTE_TAKER_VERSION}-sbx-cenote-taker"
+ threads: Cfg["sbx_cenote_taker"]["bowtie2_build_threads"]
+ shell:
+ "bowtie2-build --threads {threads} -f {input} {input}"
+
+
+rule align_virus_reads:
+ input:
+ r1=QC_FP / "decontam" / "{sample}_1.fastq.gz",
+ r2=QC_FP / "decontam" / "{sample}_2.fastq.gz",
+ index=str(cenote_output()) + ".1.bt2", # Don't use f-string, broken with python 3.12
+ output:
+ temp(VIRUS_FP / "alignments" / "{sample}.sam"),
+ params:
+ index=str(cenote_output()),
+ threads: 6
+ conda:
+ "envs/sbx_cenote_taker.yml"
+ container:
+ f"docker://sunbeamlabs/sbx_cenote_taker:{SBX_CENOTE_TAKER_VERSION}-sbx-cenote-taker"
+ shell:
+ "bowtie2 -q --local -t --very-sensitive-local --threads {threads} --no-mixed --no-discordant -x {params.index} -1 {input.r1} -2 {input.r2} -S {output}"
+
+
+rule process_virus_alignment:
+ input:
+ VIRUS_FP / "alignments" / "{sample}.sam",
+ output:
+ bam=temp(VIRUS_FP / "alignments" / "{sample}.bam"),
+ sorted=temp(VIRUS_FP / "alignments" / "{sample}.sorted.bam"),
+ bai=temp(VIRUS_FP / "alignments" / "{sample}.sorted.bam.bai"),
+ params:
+ target=str(cenote_output()),
+ conda:
+ "envs/sbx_cenote_taker.yml"
+ container:
+ f"docker://sunbeamlabs/sbx_cenote_taker:{SBX_CENOTE_TAKER_VERSION}-sbx-cenote-taker"
+ shell:
+ """
+ samtools view -bT {params.target} {input} > {output.bam}
+ samtools sort -o {output.sorted} {output.bam}
+ samtools index {output.sorted} {output.bai}
+ """
+
+
+rule calculate_mapping_stats:
+ input:
+ bam=VIRUS_FP / "alignments" / "{sample}.sorted.bam",
+ idx=VIRUS_FP / "alignments" / "{sample}.sorted.bam.bai",
+ output:
+ VIRUS_FP / "alignments" / "{sample}.sorted.idxstats.tsv",
+ conda:
+ "envs/sbx_cenote_taker.yml"
+ container:
+ f"docker://sunbeamlabs/sbx_cenote_taker:{SBX_CENOTE_TAKER_VERSION}-sbx-cenote-taker"
+ shell:
+ """
+ samtools idxstats {input.bam} > {output}
+ """
+
+
+rule virus_mpileup:
+ input:
+ bam=VIRUS_FP / "alignments" / "{sample}.sorted.bam",
+ idx=VIRUS_FP / "alignments" / "{sample}.sorted.bam.bai",
+ contigs=cenote_output(),
+ output:
+ VIRUS_FP / "alignments" / "{sample}.mpileup",
+ conda:
+ "envs/sbx_cenote_taker.yml"
+ container:
+ f"docker://sunbeamlabs/sbx_cenote_taker:{SBX_CENOTE_TAKER_VERSION}-sbx-cenote-taker"
+ shell:
+ """
+ samtools mpileup -f {input.contigs} {input.bam} > {output}
+ """
+
+
+rule filter_virus_coverage:
+ input:
+ fa=cenote_output(),
+ idx=VIRUS_FP / "alignments" / "{sample}.sorted.idxstats.tsv",
+ output:
+ VIRUS_FP / "final_{sample}_contigs.fasta",
+ log:
+ LOG_FP / "filter_virus_coverage_{sample}.log",
+ script:
+ "scripts/filter_virus_coverage.py"
+
+
+rule virus_blastx:
+ """Run blastx on untranslated genes against a target db and write to blast tabular format."""
+ input:
+ VIRUS_FP / "final_{sample}_contigs.fasta",
+ output:
+ VIRUS_FP / "blastx" / "{sample}.btf",
+ benchmark:
+ BENCHMARK_FP / "run_virus_blastx_{sample}.tsv"
+ log:
+ LOG_FP / "run_virus_blastx_{sample}.log",
+ params:
+ blast_db=Cfg["sbx_cenote_taker"]["blast_db"],
+ threads: Cfg["sbx_cenote_taker"]["blastx_threads"]
+ resources:
+ mem_mb=24000,
+ runtime=720,
+ conda:
+ "envs/sbx_cenote_taker.yml"
+ container:
+ f"docker://sunbeamlabs/sbx_cenote_taker:{SBX_CENOTE_TAKER_VERSION}-sbx-cenote-taker"
+ shell:
+ """
+ if [ -s {input} ]; then
+ export BLASTDB=$(dirname {params.blast_db})
+ blastx \
+ -query {input} \
+ -db $(basename {params.blast_db}) \
+ -outfmt "7 qacc sacc pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle" \
+ -num_threads {threads} \
+ -evalue 0.05 \
+ -max_target_seqs 100 \
+ -out {output} \
+ 2>&1 | tee {log}
+ else
+ echo "Caught empty query" >> {log}
+ touch {output}
+ fi
+ """
+
+
+rule calculate_coverage:
+ input:
+ bam=VIRUS_FP / "alignments" / "{sample}.sorted.bam",
+ idx=VIRUS_FP / "alignments" / "{sample}.sorted.bam.bai",
+ output:
+ VIRUS_FP / "alignments" / "{sample}.genomecoverage.txt",
+ params:
+ ext_fp=str(get_extension_path()),
+ conda:
+ "envs/sbx_cenote_taker.yml"
+ container:
+ f"docker://sunbeamlabs/sbx_cenote_taker:{SBX_CENOTE_TAKER_VERSION}-sbx-cenote-taker"
+ shell:
+ """
+ samtools view -b {input.bam} | genomeCoverageBed -ibam stdin | grep -v 'genome'| perl {params.ext_fp}/scripts/coverage_counter.pl > {output}
+ """
+
+
+rule combine_coverage_stats:
+ input:
+ cov=VIRUS_FP / "alignments" / "{sample}.genomecoverage.txt",
+ stats=VIRUS_FP / "alignments" / "{sample}.sorted.idxstats.tsv",
+ output:
+ VIRUS_FP / "alignments" / "{sample}.align.summary.txt",
+ benchmark:
+ BENCHMARK_FP / "combine_coverage_stats_{sample}.tsv"
+ log:
+ LOG_FP / "combine_coverage_stats_{sample}.log",
+ params:
+ ext_fp=str(get_extension_path()),
+ conda:
+ "envs/r_env.yml"
+ container:
+ "docker://r-base:latest"
+ shell:
+ """
+ Rscript {params.ext_fp}/scripts/combine_coverage_stats.R {input.cov} {input.stats} {output} 2>&1 | tee {log}
+ """
+
+
+rule virus_coverage_per_gene:
+ input:
+ mpileup=VIRUS_FP / "alignments" / "{sample}.mpileup",
+ btf=VIRUS_FP / "blastx" / "{sample}.btf",
+ output:
+ tsv=VIRUS_FP / "alignments" / "{sample}.gene_coverage.tsv",
+ params:
+ contigs=cenote_output(),
+ conda:
+ "envs/sbx_cenote_taker.yml"
+ container:
+ f"docker://sunbeamlabs/sbx_cenote_taker:{SBX_CENOTE_TAKER_VERSION}-sbx-cenote-taker"
+ script:
+ "scripts/virus_coverage_per_gene.py"
+
+
+rule all_summary:
+ input:
+ expand(
+ VIRUS_FP / "alignments" / "{sample}.align.summary.txt",
+ sample=Samples.keys(),
+ ),
+ output:
+ VIRUS_FP / "summary" / "all_align_summary.txt",
+ shell:
+ """
+ echo -e "Sample\tAlignTarget\tFractionCoverage\tTargetLength\tMappedReads" > {output}
+ cat {input} >> {output}
+ """
diff --git a/sbx_virus_id.smk b/sbx_virus_id.smk
deleted file mode 100755
index 0d51461..0000000
--- a/sbx_virus_id.smk
+++ /dev/null
@@ -1,461 +0,0 @@
-# -*- mode: Snakemake -*-
-#
-# Rules for running Cenote-Taker2 and other tools in the viral id pipeline
-
-VIRUS_FP = Cfg["all"]["output_fp"] / "virus"
-
-
-try:
- BENCHMARK_FP
-except NameError:
- BENCHMARK_FP = Cfg["all"]["output_fp"] / "benchmarks"
-try:
- LOG_FP
-except NameError:
- LOG_FP = Cfg["all"]["output_fp"] / "logs"
-
-
-def get_virus_ext_path() -> Path:
- ext_path = Path(sunbeam_dir) / "extensions" / "sbx_virus_id"
- if ext_path.exists():
- return ext_path
- raise Error(
- "Filepath for virus_id not found, are you sure it's installed under extensions/sbx_virus_id?"
- )
-
-
-SBX_VIRUS_ID_VERSION = open(get_virus_ext_path() / "VERSION").read().strip()
-
-
-def virus_sorter_input() -> Path:
- if Cfg["sbx_virus_id"]["use_spades"]:
- return ASSEMBLY_FP / "virus_id_spades" / "{sample}" / "scaffolds.fasta"
- else:
- return ASSEMBLY_FP / "megahit" / "{sample}_asm" / "final.contigs.fa"
-
-
-def virus_sorter_output() -> Path:
- if Cfg["sbx_virus_id"]["use_virsorter"]:
- return VIRUS_FP / "virsorter" / "{sample}.fasta"
- else:
- return VIRUS_FP / "cenote_taker" / "{sample}.fasta"
-
-
-rule all_virus_id:
- input:
- expand(
- VIRUS_FP / "alignments" / "{sample}.gene_coverage.tsv",
- sample=Samples.keys(),
- ),
- expand(
- VIRUS_FP / "blastx" / "{sample}.btf",
- sample=Samples.keys(),
- ),
- VIRUS_FP / "summary" / "all_align_summary.txt",
-
-
-rule virus_id_spades_paired:
- input:
- r1=QC_FP / "decontam" / "{sample}_1.fastq.gz",
- r2=QC_FP / "decontam" / "{sample}_2.fastq.gz",
- output:
- ASSEMBLY_FP / "virus_id_spades" / "{sample}" / "scaffolds.fasta",
- benchmark:
- BENCHMARK_FP / "virus_id_spades_paired_{sample}.tsv"
- log:
- LOG_FP / "virus_id_spades_paired_{sample}.log",
- params:
- out_fp=str(ASSEMBLY_FP / "virus_id_spades" / "{sample}"),
- threads: 4
- conda:
- "envs/spades_env.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-spades"
- resources:
- mem_mb=20000,
- runtime=720,
- shell:
- """
- spades.py -1 {input.r1} -2 {input.r2} -t {threads} -o {params.out_fp} 2>&1 | tee {log}
- """
-
-
-rule install_cenote_taker:
- output:
- VIRUS_FP / "cenote_taker" / ".installed",
- benchmark:
- BENCHMARK_FP / "install_cenote_taker.tsv"
- log:
- LOG_FP / "install_cenote_taker.log",
- params:
- db_fp=Cfg["sbx_virus_id"]["cenote_taker_db"],
- extra_dbs=Cfg["sbx_virus_id"]["cenote_taker_extra_dbs"],
- resources:
- runtime=2400,
- conda:
- "envs/cenote_taker_env.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-cenote-taker"
- shell:
- """
- conda env config vars set CENOTE_DBS={params.db_fp}
-
- if [ -d {params.db_fp} ] && [ "$(ls -A {params.db_fp})" ]; then
- echo "Cenote-Taker database already installed" >> {log}
- touch {output}
- exit 0
- fi
-
- if [[ {params.extra_dbs} == "True" ]]; then
- echo "Installing Cenote-Taker database with hhsuite" >> {log}
- get_ct3_dbs -o {params.db_fp} --hmm T --hallmark_tax T --refseq_tax T --mmseqs_cdd T --domain_list T --hhCDD T --hhPFAM T --hhPDB T >> {log} 2>&1
- else
- echo "Installing Cenote-Taker database without hhsuite" >> {log}
- get_ct3_dbs -o {params.db_fp} --hmm T --hallmark_tax T --refseq_tax T --mmseqs_cdd T --domain_list T >> {log} 2>&1
- fi
-
- touch {output}
- """
-
-
-rule cenote_taker:
- input:
- contigs=virus_sorter_input(),
- install=VIRUS_FP / "cenote_taker" / ".installed",
- output:
- VIRUS_FP / "cenote_taker" / "{sample}" / "final.contigs.fasta",
- VIRUS_FP
- / "cenote_taker"
- / "{sample}"
- / "{sample}"
- / "{sample}_CONTIG_SUMMARY.tsv",
- benchmark:
- BENCHMARK_FP / "cenote_taker_{sample}.tsv"
- log:
- LOG_FP / "cenote_taker_{sample}.log",
- params:
- run_script=str(get_virus_ext_path() / "Cenote-Taker2" / "run_cenote-taker2.py"),
- out_dir=str(VIRUS_FP / "cenote_taker"),
- sample="{sample}",
- db_fp=Cfg["sbx_virus_id"]["cenote_taker_db"],
- resources:
- mem_mb=24000,
- runtime=720,
- conda:
- "envs/cenote_taker_env.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-cenote-taker"
- shell:
- """
- SAMPLE={params.sample}
- if [[ ${{#SAMPLE}} -lt 18 ]] && [[ {params.sample} =~ ^[a-zA-Z0-9_]+$ ]]; then
- echo "Sample name format is valid" >> {log}
- else
- echo "Cenote-Taker requires a sample name that is less than 18 characters and contains only alphanumeric characters and underscores" >> {log}
- exit 1
- fi
-
- if [ -s {input.contigs} ]; then
- echo "Contigs file exists and is not empty" >> {log}
- else
- echo "Contigs file is empty" >> {log}
- exit 1
- fi
-
- cd {params.out_dir}
- cenotetaker3 --contigs {input.contigs} -r {params.sample} -p T >> {log} 2>&1
- """
-
-
-rule install_virsorter:
- output:
- VIRUS_FP / "virsorter" / ".installed",
- benchmark:
- BENCHMARK_FP / "install_virsorter.tsv"
- log:
- LOG_FP / "install_virsorter.log",
- params:
- db_fp=Cfg["sbx_virus_id"]["virsorter_db"],
- resources:
- runtime=2400,
- threads: 4
- conda:
- "envs/virsorter_env.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-virsorter"
- shell:
- """
- # First check if directory exists and has files
- if [ -d {params.db_fp} ] && [ "$(ls -A {params.db_fp})" ]; then
- echo "VirSorter database already installed"
- touch {output}
- exit 0
- fi
-
- echo "Installing VirSorter database"
- virsorter setup -d {params.db_fp} -j 4
- touch {output}
- """
-
-
-rule virsorter:
- input:
- contigs=virus_sorter_input(),
- install=VIRUS_FP / "virsorter" / ".installed",
- output:
- combined_viral=VIRUS_FP / "virsorter" / "{sample}" / "final-viral-combined.fa",
- scores=VIRUS_FP / "virsorter" / "{sample}" / "final-viral-score.tsv",
- boundaries=VIRUS_FP / "virsorter" / "{sample}" / "final-viral-boundary.tsv",
- benchmark:
- BENCHMARK_FP / "virsorter_{sample}.tsv"
- log:
- LOG_FP / "virsorter_{sample}.log",
- params:
- out_dir=str(VIRUS_FP / "virsorter" / "{sample}"),
- db_fp=Cfg["sbx_virus_id"]["virsorter_db"],
- resources:
- mem_mb=24000,
- runtime=720,
- threads: 4
- conda:
- "envs/virsorter_env.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-virsorter"
- shell:
- """
- virsorter run -w {params.out_dir} -i {input.contigs} --min-length 1000 -j {threads} --db-dir {params.db_fp} all
- """
-
-
-rule filter_cenote_contigs:
- input:
- contigs=VIRUS_FP / "cenote_taker" / "{sample}" / "final.contigs.fasta",
- summary=VIRUS_FP
- / "cenote_taker"
- / "{sample}"
- / "{sample}"
- / "{sample}_CONTIG_SUMMARY.tsv",
- output:
- VIRUS_FP / "cenote_taker" / "{sample}.fasta",
- params:
- include_phages=Cfg["sbx_virus_id"]["include_phages"],
- script:
- "scripts/filter_cenote_contigs.py"
-
-
-rule filter_virsorter_contigs:
- input:
- contigs=VIRUS_FP / "virsorter" / "{sample}" / "final-viral-combined.fa",
- output:
- VIRUS_FP / "virsorter" / "{sample}.fasta",
- script:
- "scripts/filter_virsorter_contigs.py"
-
-
-rule build_virus_index:
- input:
- virus_sorter_output(),
- output:
- str(virus_sorter_output()) + ".1.bt2", # Don't use f-string, broken with python 3.12
- conda:
- "envs/sbx_virus_id.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-sbx-virus-id"
- threads: Cfg["sbx_virus_id"]["bowtie2_build_threads"]
- shell:
- "bowtie2-build --threads {threads} -f {input} {input}"
-
-
-rule align_virus_reads:
- input:
- r1=QC_FP / "decontam" / "{sample}_1.fastq.gz",
- r2=QC_FP / "decontam" / "{sample}_2.fastq.gz",
- index=str(virus_sorter_output()) + ".1.bt2", # Don't use f-string, broken with python 3.12
- output:
- temp(VIRUS_FP / "alignments" / "{sample}.sam"),
- params:
- index=str(virus_sorter_output()),
- threads: 6
- conda:
- "envs/sbx_virus_id.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-sbx-virus-id"
- shell:
- "bowtie2 -q --local -t --very-sensitive-local --threads {threads} --no-mixed --no-discordant -x {params.index} -1 {input.r1} -2 {input.r2} -S {output}"
-
-
-rule process_virus_alignment:
- input:
- VIRUS_FP / "alignments" / "{sample}.sam",
- output:
- bam=temp(VIRUS_FP / "alignments" / "{sample}.bam"),
- sorted=temp(VIRUS_FP / "alignments" / "{sample}.sorted.bam"),
- bai=temp(VIRUS_FP / "alignments" / "{sample}.sorted.bam.bai"),
- params:
- target=str(virus_sorter_output()),
- conda:
- "envs/sbx_virus_id.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-sbx-virus-id"
- shell:
- """
- samtools view -bT {params.target} {input} > {output.bam}
- samtools sort -o {output.sorted} {output.bam}
- samtools index {output.sorted} {output.bai}
- """
-
-
-rule calculate_mapping_stats:
- input:
- bam=VIRUS_FP / "alignments" / "{sample}.sorted.bam",
- idx=VIRUS_FP / "alignments" / "{sample}.sorted.bam.bai",
- output:
- VIRUS_FP / "alignments" / "{sample}.sorted.idxstats.tsv",
- conda:
- "envs/sbx_virus_id.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-sbx-virus-id"
- shell:
- """
- samtools idxstats {input.bam} > {output}
- """
-
-
-rule virus_mpileup:
- input:
- bam=VIRUS_FP / "alignments" / "{sample}.sorted.bam",
- idx=VIRUS_FP / "alignments" / "{sample}.sorted.bam.bai",
- contigs=virus_sorter_output(),
- output:
- VIRUS_FP / "alignments" / "{sample}.mpileup",
- conda:
- "envs/sbx_virus_id.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-sbx-virus-id"
- shell:
- """
- samtools mpileup -f {input.contigs} {input.bam} > {output}
- """
-
-
-rule filter_virus_coverage:
- input:
- fa=virus_sorter_output(),
- idx=VIRUS_FP / "alignments" / "{sample}.sorted.idxstats.tsv",
- output:
- VIRUS_FP / "final_{sample}_contigs.fasta",
- log:
- LOG_FP / "filter_virus_coverage_{sample}.log",
- script:
- "scripts/filter_virus_coverage.py"
-
-
-rule virus_blastx:
- """Run blastx on untranslated genes against a target db and write to blast tabular format."""
- input:
- VIRUS_FP / "final_{sample}_contigs.fasta",
- output:
- VIRUS_FP / "blastx" / "{sample}.btf",
- benchmark:
- BENCHMARK_FP / "run_virus_blastx_{sample}.tsv"
- log:
- LOG_FP / "run_virus_blastx_{sample}.log",
- params:
- blast_db=Cfg["sbx_virus_id"]["blast_db"],
- threads: Cfg["sbx_virus_id"]["blastx_threads"]
- resources:
- mem_mb=24000,
- runtime=720,
- conda:
- "envs/sbx_virus_id.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-sbx-virus-id"
- shell:
- """
- if [ -s {input} ]; then
- export BLASTDB=$(dirname {params.blast_db})
- blastx \
- -query {input} \
- -db $(basename {params.blast_db}) \
- -outfmt "7 qacc sacc pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle" \
- -num_threads {threads} \
- -evalue 0.05 \
- -max_target_seqs 100 \
- -out {output} \
- 2>&1 | tee {log}
- else
- echo "Caught empty query" >> {log}
- touch {output}
- fi
- """
-
-
-rule calculate_coverage:
- input:
- bam=VIRUS_FP / "alignments" / "{sample}.sorted.bam",
- idx=VIRUS_FP / "alignments" / "{sample}.sorted.bam.bai",
- output:
- VIRUS_FP / "alignments" / "{sample}.genomecoverage.txt",
- params:
- ext_fp=str(get_virus_ext_path()),
- conda:
- "envs/sbx_virus_id.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-sbx-virus-id"
- shell:
- """
- samtools view -b {input.bam} | genomeCoverageBed -ibam stdin | grep -v 'genome'| perl {params.ext_fp}/scripts/coverage_counter.pl > {output}
- """
-
-
-rule combine_coverage_stats:
- input:
- cov=VIRUS_FP / "alignments" / "{sample}.genomecoverage.txt",
- stats=VIRUS_FP / "alignments" / "{sample}.sorted.idxstats.tsv",
- output:
- VIRUS_FP / "alignments" / "{sample}.align.summary.txt",
- benchmark:
- BENCHMARK_FP / "combine_coverage_stats_{sample}.tsv"
- log:
- LOG_FP / "combine_coverage_stats_{sample}.log",
- params:
- ext_fp=str(get_virus_ext_path()),
- conda:
- "envs/r_env.yml"
- container:
- "docker://r-base:latest"
- shell:
- """
- Rscript {params.ext_fp}/scripts/combine_coverage_stats.R {input.cov} {input.stats} {output} 2>&1 | tee {log}
- """
-
-
-rule virus_coverage_per_gene:
- input:
- mpileup=VIRUS_FP / "alignments" / "{sample}.mpileup",
- btf=VIRUS_FP / "blastx" / "{sample}.btf",
- output:
- tsv=VIRUS_FP / "alignments" / "{sample}.gene_coverage.tsv",
- params:
- contigs=virus_sorter_output(),
- conda:
- "envs/sbx_virus_id.yml"
- container:
- f"docker://sunbeamlabs/sbx_virus_id:{SBX_VIRUS_ID_VERSION}-sbx-virus-id"
- script:
- "scripts/virus_coverage_per_gene.py"
-
-
-rule all_summary:
- input:
- expand(
- VIRUS_FP / "alignments" / "{sample}.align.summary.txt",
- sample=Samples.keys(),
- ),
- output:
- VIRUS_FP / "summary" / "all_align_summary.txt",
- shell:
- """
- echo -e "Sample\tAlignTarget\tFractionCoverage\tTargetLength\tMappedReads" > {output}
- cat {input} >> {output}
- """
diff --git a/scripts/filter_cenote_contigs.py b/scripts/filter_cenote_contigs.py
index 04d93fc..6e23c00 100755
--- a/scripts/filter_cenote_contigs.py
+++ b/scripts/filter_cenote_contigs.py
@@ -1,17 +1,50 @@
import csv
-from sunbeamlib.parse import parse_fasta, write_fasta
+import os
+from typing import Generator, TextIO
-with open(snakemake.input.summary) as f_summary, open(
- snakemake.input.contigs
-) as f_contigs, open(snakemake.output[0], "w") as f_out:
+def parse_fasta(f: TextIO) -> Generator[tuple[str, str], None, None]:
+ header_str = ""
+ seq_str = ""
+ for line in f.readlines():
+ line = line.strip()
+ if line.startswith(">"):
+ if header_str:
+ yield header_str, seq_str
+ header_str = line
+ seq_str = ""
+ else:
+ seq_str += line
+ if header_str:
+ yield header_str, seq_str
+
+
+def write_fasta(record: list[str], f: TextIO) -> None:
+ f.write(f"{record[0]}\n")
+ f.write(f"{record[1]}\n")
+
+
+summary = snakemake.input.summary # type: ignore
+contigs = snakemake.input.contigs # type: ignore
+output_fp = snakemake.output[0] # type: ignore
+include_phages = snakemake.params["include_phages"] # type: ignore
+
+# Empty output if empty contigs
+if os.path.getsize(contigs) == 0:
+ with open(output_fp, "w") as f_out:
+ pass
+ exit(0)
+
+with open(summary) as f_summary, open(contigs) as f_contigs, open(
+ output_fp, "w"
+) as f_out:
dr = csv.DictReader(f_summary, delimiter="\t")
cd = {}
phages = ["phage", "siphoviridae", "conjugative transposon"]
for line in dr:
if (
all([x not in line["ORGANISM_NAME"].lower() for x in phages])
- or snakemake.params["include_phages"]
+ or include_phages
) and int(line["NUM_HALLMARKS"]) > 0:
cd[line["ORIGINAL_NAME"]] = 1
diff --git a/scripts/filter_virsorter_contigs.py b/scripts/filter_virsorter_contigs.py
deleted file mode 100755
index 5bb2f17..0000000
--- a/scripts/filter_virsorter_contigs.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from sunbeamlib.parse import parse_fasta, write_fasta
-
-# This does nothing but I'm leaving it in case we want to add custom filtering here later
-with open(snakemake.input.contigs) as f_contigs, open(
- snakemake.output[0], "w"
-) as f_out:
- for header_str, seq_str in parse_fasta(f_contigs):
- write_fasta((header_str, seq_str), f_out)
diff --git a/scripts/filter_virus_coverage.py b/scripts/filter_virus_coverage.py
index 8509ee2..dda0cdb 100755
--- a/scripts/filter_virus_coverage.py
+++ b/scripts/filter_virus_coverage.py
@@ -1,9 +1,36 @@
import csv
-from sunbeamlib.parse import parse_fasta, write_fasta
+import os
+from typing import Generator, TextIO
+
+def parse_fasta(f: TextIO) -> Generator[tuple[str, str], None, None]:
+ header_str = ""
+ seq_str = ""
+ for line in f.readlines():
+ line = line.strip()
+ if line.startswith(">"):
+ if header_str:
+ yield header_str, seq_str
+ header_str = line
+ seq_str = ""
+ else:
+ seq_str += line
+ if header_str:
+ yield header_str, seq_str
+
+
+def write_fasta(record: list[str], f: TextIO) -> None:
+ f.write(f"{record[0]}\n")
+ f.write(f"{record[1]}\n")
+
+
+idx = snakemake.input.idx # type: ignore
+fa = snakemake.input.fa # type: ignore
+output_fp = snakemake.output[0] # type: ignore
+log_fp = snakemake.log[0] # type: ignore
contigs = {}
-with open(snakemake.input.idx) as f_idx:
+with open(idx) as f_idx:
rd = csv.reader(f_idx, delimiter="\t", quotechar='"')
for row in rd:
if row[0] != "*":
@@ -12,11 +39,11 @@
else:
contigs[row[0]] = 0
-with open(snakemake.log[0], "w") as f_log:
+with open(log_fp, "w") as f_log:
f_log.write(f"Contigs: {contigs}")
-with open(snakemake.input.fa) as f_fa, open(snakemake.output[0], "w") as f_out:
+with open(fa) as f_fa, open(output_fp, "w") as f_out:
for header, seq in parse_fasta(f_fa):
contig_name = header.split(" ")[0]
if contigs[contig_name]:
- write_fasta((contig_name, seq), f_out)
+ write_fasta([contig_name, seq], f_out)