File tree Expand file tree Collapse file tree 3 files changed +48
-0
lines changed Expand file tree Collapse file tree 3 files changed +48
-0
lines changed Original file line number Diff line number Diff line change @@ -67,6 +67,8 @@ namespace kat {
6767
6868 static shared_ptr<vector<path>> globFiles (const string& input);
6969 static shared_ptr<vector<path>> globFiles (const vector<path>& input);
70+
71+ static string determineSequenceFileType (const path& file);
7072
7173 private:
7274 static int globerr (const char *path, int eerrno);
Original file line number Diff line number Diff line change 2020#endif
2121
2222#include < iostream>
23+ #include < fstream>
2324#include < glob.h>
25+ using std::fstream;
2426using std::stringstream;
2527
2628#include < boost/filesystem.hpp>
@@ -259,3 +261,37 @@ shared_ptr<vector<path>> kat::InputHandler::globFiles(const vector<path>& input)
259261
260262 return globbed;
261263}
264+
265+ string kat::InputHandler::determineSequenceFileType (const path& filename) {
266+
267+ string ext = filename.extension ().string ();
268+
269+ // Check extension first
270+ if (boost::iequals (ext, " .fastq" ) || boost::iequals (ext, " .fq" )) {
271+ return " fastq" ;
272+ }
273+ else if ( boost::iequals (ext, " .fasta" ) ||
274+ boost::iequals (ext, " .fa" ) ||
275+ boost::iequals (ext, " .fna" ) ||
276+ boost::iequals (ext, " .fas" ) ||
277+ boost::iequals (ext, " .scafSeq" )) {
278+ return " fasta" ;
279+ }
280+ else {
281+ // Now check first character of the file
282+ char ch;
283+ fstream fin (filename.string (), fstream::in);
284+ fin >> ch;
285+ fin.close ();
286+
287+ if (ch == ' >' ) {
288+ return " fasta" ;
289+ }
290+ else if (ch == ' @' ) {
291+ return " fastq" ;
292+ }
293+ }
294+
295+ // If we've got this far then it's not obviously a sequence file we recognise.
296+ BOOST_THROW_EXCEPTION (InputFileException () << InputFileErrorInfo (" Unknown file type" ));
297+ }
Original file line number Diff line number Diff line change @@ -28,7 +28,9 @@ template<typename DtnType>
2828inline double as_seconds (DtnType dtn) { return duration_cast<duration<double >>(dtn).count (); }
2929
3030#include < kat/jellyfish_helper.hpp>
31+ #include < kat/input_handler.hpp>
3132using kat::JellyfishHelper;
33+ using kat::InputHandler;
3234using kat::HashLoader;
3335
3436namespace kat {
@@ -209,4 +211,12 @@ TEST(jellyfish, unknownexttest) {
209211 EXPECT_EQ ( res, true );
210212}
211213
214+ TEST (jellyfish, determineexttest) {
215+ path unknownpath = path (DATADIR " /unknown.dat" );
216+
217+ string ft = InputHandler::determineSequenceFileType (unknownpath);
218+
219+ EXPECT_EQ ( ft, " fasta" );
220+ }
221+
212222}
You can’t perform that action at this time.
0 commit comments