Skip to content

Commit e18eb34

Browse files
authored
fix: ci slurm check (#388)
This is a fix for breaking 3rd party workflows: Bioconda was unable to build (i.e. https://github.com/bioconda/bioconda-recipes/actions/runs/20041946577/job/57478140127?pr=61020) because an import of this plugin immediately threw an error due to the missing SLURM setup. This had repercussions in other repos, too. Some bits of the test suite have been refactored. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Detects absence of SLURM tools and disables the default status command to avoid misconfiguration when SLURM is not available. * Provides a clear user-facing message when SLURM is not detected and no status command can be used. * **Tests** * Added focused parsing and time-conversion tests that run without SLURM present. * Removed older redundant tests and reorganized the test suite for clarity. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub> <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent fb09d7f commit e18eb34

File tree

3 files changed

+160
-146
lines changed

3 files changed

+160
-146
lines changed

snakemake_executor_plugin_slurm/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from pathlib import Path
1111
import re
1212
import shlex
13+
import shutil
1314
import subprocess
1415
import time
1516
from dataclasses import dataclass, field
@@ -59,6 +60,10 @@ def _get_status_command_default():
5960
squeue_available = is_query_tool_available("squeue")
6061
# squeue is assumed to always be available on SLURM clusters
6162

63+
is_slurm_available = shutil.which("sinfo") is not None
64+
if not is_slurm_available:
65+
return None
66+
6267
if not squeue_available and not sacct_available:
6368
raise WorkflowError(
6469
"Neither 'sacct' nor 'squeue' commands are available on this "
@@ -74,6 +79,15 @@ def _get_status_command_default():
7479
def _get_status_command_help():
7580
"""Get help text with computed default."""
7681
default_cmd = _get_status_command_default()
82+
83+
# if SLURM is not available (should not occur, only
84+
# in 3rd party CI tests)
85+
if default_cmd is None:
86+
return (
87+
"Command to query job status. Options: 'sacct', 'squeue'. "
88+
"SLURM not detected on this system, so no status command can be used."
89+
)
90+
7791
sacct_available = is_query_tool_available("sacct")
7892
squeue_recommended = should_recommend_squeue_status_command()
7993

tests/test_parsing.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
"""Tests for parsing functions that don't require SLURM to be installed."""
2+
from io import StringIO
3+
import pandas as pd
4+
from snakemake_executor_plugin_slurm.efficiency_report import (
5+
parse_sacct_data,
6+
time_to_seconds,
7+
)
8+
9+
10+
def test_parse_sacct_data():
11+
test_data = [
12+
"10294159|b10191d0-6985-4c3a-8ccb-"
13+
"aa7d23ebffc7|rule_bam_bwa_mem_mosdepth_"
14+
"simulate_reads|00:01:31|00:24.041|1|1||32000M",
15+
"10294159.batch|batch||00:01:31|00:03.292|1|1|71180K|",
16+
"10294159.0|python3.12||00:01:10|00:20.749|1|1|183612K|",
17+
"10294160|b10191d0-6985-4c3a-8ccb-"
18+
"aa7d23ebffc7|rule_bam_bwa_mem_mosdepth_"
19+
"simulate_reads|00:01:30|00:24.055|1|1||32000M",
20+
"10294160.batch|batch||00:01:30|00:03.186|1|1|71192K|",
21+
"10294160.0|python3.12||00:01:10|00:20.868|1|1|184352K|",
22+
]
23+
df = parse_sacct_data(
24+
lines=test_data, e_threshold=0.0, run_uuid="test", logger=None
25+
)
26+
output = StringIO()
27+
df.to_csv(output, index=False)
28+
print(output.getvalue())
29+
# this should only be two rows once collapsed
30+
assert len(df) == 2
31+
# check that RuleName is properly inherited from main jobs
32+
assert all(df["RuleName"] == "rule_bam_bwa_mem_mosdepth_simulate_reads")
33+
# check that RequestedMem_MB is properly inherited
34+
assert all(df["RequestedMem_MB"] == 32000.0)
35+
# check that MaxRSS_MB is properly calculated from job steps
36+
assert df.iloc[0]["MaxRSS_MB"] > 0 # Should have actual memory usage from job step
37+
38+
39+
class TestTimeToSeconds:
40+
"""Test the time_to_seconds function with SLURM sacct time formats."""
41+
42+
def test_elapsed_format_with_days(self):
43+
"""
44+
Test Elapsed format: [D-]HH:MM:SS or
45+
[DD-]HH:MM:SS (no fractional seconds).
46+
"""
47+
# Single digit days
48+
assert time_to_seconds("1-00:00:00") == 86400 # 1 day
49+
assert (
50+
time_to_seconds("1-12:30:45") == 86400 + 12 * 3600 + 30 * 60 + 45
51+
) # 131445
52+
assert time_to_seconds("9-23:59:59") == 9 * 86400 + 23 * 3600 + 59 * 60 + 59
53+
54+
# Double digit days
55+
assert (
56+
time_to_seconds("10-01:02:03") == 10 * 86400 + 1 * 3600 + 2 * 60 + 3
57+
) # 867723
58+
59+
def test_elapsed_format_hours_minutes_seconds(self):
60+
"""Test Elapsed format: HH:MM:SS (no fractional seconds)."""
61+
assert time_to_seconds("00:00:00") == 0
62+
assert time_to_seconds("01:00:00") == 3600 # 1 hour
63+
assert time_to_seconds("23:59:59") == 23 * 3600 + 59 * 60 + 59 # 86399
64+
assert time_to_seconds("12:30:45") == 12 * 3600 + 30 * 60 + 45 # 45045
65+
66+
def test_totalcpu_format_with_days(self):
67+
"""
68+
Test TotalCPU format: [D-][HH:]MM:SS or [DD-][HH:]MM:SS
69+
(with fractional seconds).
70+
"""
71+
# With days and hours
72+
assert time_to_seconds("1-12:30:45.5") == 86400 + 12 * 3600 + 30 * 60 + 45.5
73+
assert (
74+
time_to_seconds("10-01:02:03.123")
75+
== 10 * 86400 + 1 * 3600 + 2 * 60 + 3.123
76+
)
77+
78+
# With days, no hours (MM:SS format)
79+
assert time_to_seconds("1-30:45") == 86400 + 30 * 60 + 45
80+
assert time_to_seconds("1-30:45.5") == 86400 + 30 * 60 + 45.5
81+
82+
def test_totalcpu_format_minutes_seconds(self):
83+
"""Test TotalCPU format: MM:SS with fractional seconds."""
84+
assert time_to_seconds("00:00") == 0
85+
assert time_to_seconds("01:00") == 60 # 1 minute
86+
assert time_to_seconds("59:59") == 59 * 60 + 59 # 3599
87+
assert time_to_seconds("30:45") == 30 * 60 + 45 # 1845
88+
assert time_to_seconds("30:45.5") == 30 * 60 + 45.5 # 1845.5
89+
90+
def test_totalcpu_format_seconds_only(self):
91+
"""Test TotalCPU format: SS or SS.sss (seconds only with fractional)."""
92+
assert time_to_seconds("0") == 0
93+
assert time_to_seconds("1") == 1
94+
assert time_to_seconds("30") == 30
95+
assert time_to_seconds("59") == 59
96+
97+
# Fractional seconds
98+
assert time_to_seconds("30.5") == 30.5
99+
assert time_to_seconds("0.5") == 0.5
100+
101+
def test_real_world_sacct_examples(self):
102+
"""Test with realistic sacct time values from actual output."""
103+
# From your test data
104+
assert time_to_seconds("00:01:31") == 91 # 1 minute 31 seconds
105+
assert time_to_seconds("00:24.041") == 24.041 # 24.041 seconds
106+
assert time_to_seconds("00:03.292") == 3.292 # 3.292 seconds
107+
assert time_to_seconds("00:20.749") == 20.749 # 20.749 seconds
108+
109+
# Longer running jobs
110+
assert time_to_seconds("02:15:30") == 2 * 3600 + 15 * 60 + 30 # 2h 15m 30s
111+
assert time_to_seconds("1-12:00:00") == 86400 + 12 * 3600 # 1 day 12 hours
112+
assert time_to_seconds("7-00:00:00") == 7 * 86400 # 1 week
113+
114+
def test_empty_and_invalid_inputs(self):
115+
"""Test empty, None, and invalid inputs."""
116+
assert time_to_seconds("") == 0
117+
assert time_to_seconds(" ") == 0
118+
assert time_to_seconds(None) == 0
119+
assert time_to_seconds(pd.NA) == 0
120+
assert time_to_seconds("invalid") == 0
121+
assert time_to_seconds("1:2:3:4") == 0 # Too many colons
122+
assert time_to_seconds("abc:def") == 0
123+
assert time_to_seconds("-1:00:00") == 0 # Negative values
124+
125+
def test_whitespace_handling(self):
126+
"""Test that whitespace is properly handled."""
127+
assert time_to_seconds(" 30 ") == 30
128+
assert time_to_seconds(" 1-02:30:45 ") == 86400 + 2 * 3600 + 30 * 60 + 45
129+
assert time_to_seconds("\t12:30:45\n") == 12 * 3600 + 30 * 60 + 45
130+
131+
def test_pandas_na_values(self):
132+
"""Test pandas NA and NaN values."""
133+
assert time_to_seconds(pd.NA) == 0
134+
assert (
135+
time_to_seconds(pd.NaType()) == 0 if hasattr(pd, "NaType") else True
136+
) # Skip if not available
137+
138+
def test_edge_case_values(self):
139+
"""Test edge case values that might appear in SLURM output."""
140+
# Zero padding variations (should work with datetime parsing)
141+
assert time_to_seconds("01:02:03") == 1 * 3600 + 2 * 60 + 3
142+
assert time_to_seconds("1:2:3") == 1 * 3600 + 2 * 60 + 3
143+
144+
# Single digit values
145+
assert time_to_seconds("5") == 5
146+
assert time_to_seconds("1:5") == 1 * 60 + 5

tests/tests.py

Lines changed: 0 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,11 @@
77
from unittest.mock import MagicMock, patch
88
import pytest
99
from snakemake_executor_plugin_slurm import ExecutorSettings
10-
from snakemake_executor_plugin_slurm.efficiency_report import (
11-
parse_sacct_data,
12-
time_to_seconds,
13-
)
1410
from snakemake_executor_plugin_slurm.utils import set_gres_string
1511
from snakemake_executor_plugin_slurm.submit_string import get_submit_command
1612

1713
from snakemake_executor_plugin_slurm.validation import validate_slurm_extra
1814
from snakemake_interface_common.exceptions import WorkflowError
19-
import pandas as pd
2015

2116

2217
class TestWorkflows(snakemake.common.tests.TestWorkflowsLocalStorageBase):
@@ -32,147 +27,6 @@ def get_executor_settings(self) -> Optional[ExecutorSettingsBase]:
3227
)
3328

3429

35-
def test_parse_sacct_data():
36-
from io import StringIO
37-
38-
test_data = [
39-
"10294159|b10191d0-6985-4c3a-8ccb-"
40-
"aa7d23ebffc7|rule_bam_bwa_mem_mosdepth_"
41-
"simulate_reads|00:01:31|00:24.041|1|1||32000M",
42-
"10294159.batch|batch||00:01:31|00:03.292|1|1|71180K|",
43-
"10294159.0|python3.12||00:01:10|00:20.749|1|1|183612K|",
44-
"10294160|b10191d0-6985-4c3a-8ccb-"
45-
"aa7d23ebffc7|rule_bam_bwa_mem_mosdepth_"
46-
"simulate_reads|00:01:30|00:24.055|1|1||32000M",
47-
"10294160.batch|batch||00:01:30|00:03.186|1|1|71192K|",
48-
"10294160.0|python3.12||00:01:10|00:20.868|1|1|184352K|",
49-
]
50-
df = parse_sacct_data(
51-
lines=test_data, e_threshold=0.0, run_uuid="test", logger=None
52-
)
53-
output = StringIO()
54-
df.to_csv(output, index=False)
55-
print(output.getvalue())
56-
# this should only be two rows once collapsed
57-
assert len(df) == 2
58-
# check that RuleName is properly inherited from main jobs
59-
assert all(df["RuleName"] == "rule_bam_bwa_mem_mosdepth_simulate_reads")
60-
# check that RequestedMem_MB is properly inherited
61-
assert all(df["RequestedMem_MB"] == 32000.0)
62-
# check that MaxRSS_MB is properly calculated from job steps
63-
assert df.iloc[0]["MaxRSS_MB"] > 0 # Should have actual memory usage from job step
64-
65-
66-
class TestTimeToSeconds:
67-
"""Test the time_to_seconds function with SLURM sacct time formats."""
68-
69-
def test_elapsed_format_with_days(self):
70-
"""
71-
Test Elapsed format: [D-]HH:MM:SS or
72-
[DD-]HH:MM:SS (no fractional seconds).
73-
"""
74-
# Single digit days
75-
assert time_to_seconds("1-00:00:00") == 86400 # 1 day
76-
assert (
77-
time_to_seconds("1-12:30:45") == 86400 + 12 * 3600 + 30 * 60 + 45
78-
) # 131445
79-
assert time_to_seconds("9-23:59:59") == 9 * 86400 + 23 * 3600 + 59 * 60 + 59
80-
81-
# Double digit days
82-
assert (
83-
time_to_seconds("10-01:02:03") == 10 * 86400 + 1 * 3600 + 2 * 60 + 3
84-
) # 867723
85-
86-
def test_elapsed_format_hours_minutes_seconds(self):
87-
"""Test Elapsed format: HH:MM:SS (no fractional seconds)."""
88-
assert time_to_seconds("00:00:00") == 0
89-
assert time_to_seconds("01:00:00") == 3600 # 1 hour
90-
assert time_to_seconds("23:59:59") == 23 * 3600 + 59 * 60 + 59 # 86399
91-
assert time_to_seconds("12:30:45") == 12 * 3600 + 30 * 60 + 45 # 45045
92-
93-
def test_totalcpu_format_with_days(self):
94-
"""
95-
Test TotalCPU format: [D-][HH:]MM:SS or [DD-][HH:]MM:SS
96-
(with fractional seconds).
97-
"""
98-
# With days and hours
99-
assert time_to_seconds("1-12:30:45.5") == 86400 + 12 * 3600 + 30 * 60 + 45.5
100-
assert (
101-
time_to_seconds("10-01:02:03.123")
102-
== 10 * 86400 + 1 * 3600 + 2 * 60 + 3.123
103-
)
104-
105-
# With days, no hours (MM:SS format)
106-
assert time_to_seconds("1-30:45") == 86400 + 30 * 60 + 45
107-
assert time_to_seconds("1-30:45.5") == 86400 + 30 * 60 + 45.5
108-
109-
def test_totalcpu_format_minutes_seconds(self):
110-
"""Test TotalCPU format: MM:SS with fractional seconds."""
111-
assert time_to_seconds("00:00") == 0
112-
assert time_to_seconds("01:00") == 60 # 1 minute
113-
assert time_to_seconds("59:59") == 59 * 60 + 59 # 3599
114-
assert time_to_seconds("30:45") == 30 * 60 + 45 # 1845
115-
assert time_to_seconds("30:45.5") == 30 * 60 + 45.5 # 1845.5
116-
117-
def test_totalcpu_format_seconds_only(self):
118-
"""Test TotalCPU format: SS or SS.sss (seconds only with fractional)."""
119-
assert time_to_seconds("0") == 0
120-
assert time_to_seconds("1") == 1
121-
assert time_to_seconds("30") == 30
122-
assert time_to_seconds("59") == 59
123-
124-
# Fractional seconds
125-
assert time_to_seconds("30.5") == 30.5
126-
assert time_to_seconds("0.5") == 0.5
127-
128-
def test_real_world_sacct_examples(self):
129-
"""Test with realistic sacct time values from actual output."""
130-
# From your test data
131-
assert time_to_seconds("00:01:31") == 91 # 1 minute 31 seconds
132-
assert time_to_seconds("00:24.041") == 24.041 # 24.041 seconds
133-
assert time_to_seconds("00:03.292") == 3.292 # 3.292 seconds
134-
assert time_to_seconds("00:20.749") == 20.749 # 20.749 seconds
135-
136-
# Longer running jobs
137-
assert time_to_seconds("02:15:30") == 2 * 3600 + 15 * 60 + 30 # 2h 15m 30s
138-
assert time_to_seconds("1-12:00:00") == 86400 + 12 * 3600 # 1 day 12 hours
139-
assert time_to_seconds("7-00:00:00") == 7 * 86400 # 1 week
140-
141-
def test_empty_and_invalid_inputs(self):
142-
"""Test empty, None, and invalid inputs."""
143-
assert time_to_seconds("") == 0
144-
assert time_to_seconds(" ") == 0
145-
assert time_to_seconds(None) == 0
146-
assert time_to_seconds(pd.NA) == 0
147-
assert time_to_seconds("invalid") == 0
148-
assert time_to_seconds("1:2:3:4") == 0 # Too many colons
149-
assert time_to_seconds("abc:def") == 0
150-
assert time_to_seconds("-1:00:00") == 0 # Negative values
151-
152-
def test_whitespace_handling(self):
153-
"""Test that whitespace is properly handled."""
154-
assert time_to_seconds(" 30 ") == 30
155-
assert time_to_seconds(" 1-02:30:45 ") == 86400 + 2 * 3600 + 30 * 60 + 45
156-
assert time_to_seconds("\t12:30:45\n") == 12 * 3600 + 30 * 60 + 45
157-
158-
def test_pandas_na_values(self):
159-
"""Test pandas NA and NaN values."""
160-
assert time_to_seconds(pd.NA) == 0
161-
assert (
162-
time_to_seconds(pd.NaType()) == 0 if hasattr(pd, "NaType") else True
163-
) # Skip if not available
164-
165-
def test_edge_case_values(self):
166-
"""Test edge case values that might appear in SLURM output."""
167-
# Zero padding variations (should work with datetime parsing)
168-
assert time_to_seconds("01:02:03") == 1 * 3600 + 2 * 60 + 3
169-
assert time_to_seconds("1:2:3") == 1 * 3600 + 2 * 60 + 3
170-
171-
# Single digit values
172-
assert time_to_seconds("5") == 5
173-
assert time_to_seconds("1:5") == 1 * 60 + 5
174-
175-
17630
class TestEfficiencyReport(snakemake.common.tests.TestWorkflowsLocalStorageBase):
17731
__test__ = True
17832

0 commit comments

Comments
 (0)