From c9f814232518c59d0d09d33cdabede448fa8e687 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Tue, 17 Mar 2026 19:37:52 +0100 Subject: [PATCH 1/9] cleanup nixl mounts --- src/cloudai/workloads/common/nixl.py | 36 +++++++++++++++++++ .../nixl_bench/slurm_command_gen_strategy.py | 3 ++ .../slurm_command_gen_strategy.py | 3 ++ .../test_command_gen_strategy_slurm.py | 19 ++++++++++ .../nixl_kvbench/test_command_gen_slurm.py | 24 +++++++++++-- 5 files changed, 83 insertions(+), 2 deletions(-) diff --git a/src/cloudai/workloads/common/nixl.py b/src/cloudai/workloads/common/nixl.py index fc35d6dab..be977e421 100644 --- a/src/cloudai/workloads/common/nixl.py +++ b/src/cloudai/workloads/common/nixl.py @@ -17,6 +17,7 @@ import logging import re +import shlex from functools import cache from pathlib import Path from typing import TYPE_CHECKING, Any, Final, Generic, TypeVar, cast @@ -231,6 +232,41 @@ def _unique_file_name(self, file_name: str, used_filenames: set[str]) -> str: used_filenames.add(candidate) return candidate + def gen_cleanup_srun_command(self) -> list[str]: + cleanup_cmds = self._container_cleanup_commands() + if not cleanup_cmds: + return [] + + return [ + *self.gen_srun_prefix(with_num_nodes=False), + "--overlap", + "--nodelist=$SLURM_JOB_MASTER_NODE", + "--ntasks-per-node=1", + "--ntasks=1", + "-N1", + "bash", + "-c", + f'"{"; ".join(cleanup_cmds)}"', + ] + + def _container_cleanup_commands(self) -> list[str]: + cleanup_cmds: list[str] = [] + + filepath_raw: str | None = cast(str | None, self.test_run.test.cmd_args_dict.get("filepath")) + if filepath_raw: + filepath = Path(filepath_raw) + if filepath == Path("/"): + logging.warning("Skipping filepath cleanup for '/': refusing to delete container root contents.") + else: + cleanup_cmds.append(f"rm -rf {shlex.quote(str(filepath))}") + + device_list_raw: str | None = cast(str | None, self.test_run.test.cmd_args_dict.get("device_list")) + if device_list_raw: + for device_path in get_files_from_device_list(device_list_raw): + cleanup_cmds.append(f"rm -rf {shlex.quote(str(device_path))}") + + return cleanup_cmds + @property def final_env_vars(self) -> dict[str, str | list[str]]: env_vars = super().final_env_vars diff --git a/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py b/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py index 7d0995e6f..19f4f4abf 100644 --- a/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py @@ -36,6 +36,7 @@ def _gen_srun_command(self) -> str: nixl_commands = self.gen_nixlbench_srun_commands( self.gen_nixlbench_command(), str(self.tdef.cmd_args_dict.get("backend", "unset")) ) + cleanup_command = self.gen_cleanup_srun_command() self._current_image_url = None commands: list[str] = [ @@ -46,6 +47,8 @@ def _gen_srun_command(self) -> str: " ".join(nixl_commands[-1]), " ".join(self.gen_kill_and_wait_cmd("etcd_pid")), ] + if cleanup_command: + commands.insert(-1, " ".join(cleanup_command)) return "\n".join(commands) def gen_nixlbench_command(self) -> list[str]: diff --git a/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py b/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py index f695c76f0..86d30e47d 100644 --- a/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py @@ -39,6 +39,7 @@ def _gen_srun_command(self) -> str: kvbench_commands = self.gen_nixlbench_srun_commands( self.gen_kvbench_command(), str(self.tdef.cmd_args.backend or "unset") ) + cleanup_command = self.gen_cleanup_srun_command() self._current_image_url = None self.create_env_vars_file() @@ -51,6 +52,8 @@ def _gen_srun_command(self) -> str: " ".join(kvbench_commands[-1]), " ".join(self.gen_kill_and_wait_cmd("etcd_pid")), ] + if cleanup_command: + final_cmd.insert(-1, " ".join(cleanup_command)) return "\n".join(final_cmd) def gen_kvbench_command(self) -> list[str]: diff --git a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py index f63802e6e..57a6ee50b 100644 --- a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py +++ b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py @@ -105,6 +105,25 @@ def test_container_mounts(self, nixl_bench_tr: TestRun, slurm_system: SlurmSyste assert (nixl_bench_tr.output_path / "device_list_mounts" / local_device_filename).is_file() assert (nixl_bench_tr.output_path / "device_list_mounts" / local_device_filename).stat().st_size == 1024 + def test_cleanup_srun_command(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): + nixl_bench_tr.test.cmd_args = NIXLBenchCmdArgs.model_validate( + { + "docker_image_url": "docker.io/library/ubuntu:22.04", + "path_to_benchmark": "/nixlbench", + "backend": "GUSLI", + "device_list": "11:K:/dev/nvme0n1,12:F:/p1/store0.bin,13:F:/p2/store0.bin", + "filepath": "/data", + } + ) + strategy = NIXLBenchSlurmCommandGenStrategy(slurm_system, nixl_bench_tr) + strategy._current_image_url = str(cast(NIXLBenchTestDefinition, nixl_bench_tr.test).docker_image.installed_path) + + cleanup_cmd = " ".join(strategy.gen_cleanup_srun_command()) + + assert "rm -rf /data" in cleanup_cmd + assert "rm -rf /p1/store0.bin" in cleanup_cmd + assert "rm -rf /p2/store0.bin" in cleanup_cmd + @pytest.mark.parametrize( ("override", "expected_error_match", "expected_total_buffer_size"), ( diff --git a/tests/workloads/nixl_kvbench/test_command_gen_slurm.py b/tests/workloads/nixl_kvbench/test_command_gen_slurm.py index e9c595828..354cb19fd 100644 --- a/tests/workloads/nixl_kvbench/test_command_gen_slurm.py +++ b/tests/workloads/nixl_kvbench/test_command_gen_slurm.py @@ -38,8 +38,10 @@ def kvbench() -> NIXLKVBenchTestDefinition: @pytest.fixture -def kvbench_tr(kvbench: NIXLKVBenchTestDefinition) -> TestRun: - return TestRun(name="nixl-bench", num_nodes=2, nodes=[], test=kvbench) +def kvbench_tr(kvbench: NIXLKVBenchTestDefinition, tmp_path) -> TestRun: + output_path = tmp_path / "nixl-kvbench" + output_path.mkdir(parents=True, exist_ok=True) + return TestRun(name="nixl-bench", num_nodes=2, nodes=[], test=kvbench, output_path=output_path) def test_gen_kvbench_ucx(kvbench_tr: TestRun, slurm_system: SlurmSystem): @@ -124,3 +126,21 @@ def test_get_etcd_srun_command_with_etcd_image(kvbench_tr: TestRun, slurm_system cmd = " ".join(strategy.gen_etcd_srun_command(tdef.cmd_args.etcd_path)) assert tdef.etcd_image is not None assert f"--container-image={tdef.etcd_image.installed_path}" in cmd + + +def test_kvbench_cleanup_srun_command_uses_container_paths(kvbench_tr: TestRun, slurm_system: SlurmSystem): + kvbench_tr.test.cmd_args = NIXLKVBenchCmdArgs.model_validate( + { + "docker_image_url": "docker://image/url", + "backend": "GUSLI", + "filepath": "/data", + "device_list": "11:F:/store0.bin", + } + ) + strategy = NIXLKVBenchSlurmCommandGenStrategy(slurm_system, kvbench_tr) + strategy._current_image_url = str(cast(NIXLKVBenchTestDefinition, kvbench_tr.test).docker_image.installed_path) + + cmd = " ".join(strategy.gen_cleanup_srun_command()) + + assert "rm -rf /data" in cmd + assert "rm -rf /store0.bin" in cmd From 2a501dc79a1f9643d8f8a46e0b06750cb8379e66 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 19 Mar 2026 17:20:55 +0100 Subject: [PATCH 2/9] using host os filepaths for cleanup --- src/cloudai/workloads/common/nixl.py | 35 ++++++------------- .../nixl_bench/slurm_command_gen_strategy.py | 2 +- .../slurm_command_gen_strategy.py | 2 +- .../test_command_gen_strategy_slurm.py | 33 +++++++++++++---- .../nixl_kvbench/test_command_gen_slurm.py | 32 ++++++++++++++--- 5 files changed, 65 insertions(+), 39 deletions(-) diff --git a/src/cloudai/workloads/common/nixl.py b/src/cloudai/workloads/common/nixl.py index be977e421..4a81c470f 100644 --- a/src/cloudai/workloads/common/nixl.py +++ b/src/cloudai/workloads/common/nixl.py @@ -232,40 +232,25 @@ def _unique_file_name(self, file_name: str, used_filenames: set[str]) -> str: used_filenames.add(candidate) return candidate - def gen_cleanup_srun_command(self) -> list[str]: - cleanup_cmds = self._container_cleanup_commands() - if not cleanup_cmds: + def gen_cleanup_command(self) -> list[str]: + cleanup_targets = self._cleanup_targets() + if not cleanup_targets: return [] - return [ - *self.gen_srun_prefix(with_num_nodes=False), - "--overlap", - "--nodelist=$SLURM_JOB_MASTER_NODE", - "--ntasks-per-node=1", - "--ntasks=1", - "-N1", - "bash", - "-c", - f'"{"; ".join(cleanup_cmds)}"', - ] + return ["rm", "-rf", *(shlex.quote(path) for path in cleanup_targets)] - def _container_cleanup_commands(self) -> list[str]: - cleanup_cmds: list[str] = [] + def _cleanup_targets(self) -> list[str]: + cleanup_targets: list[str] = [] filepath_raw: str | None = cast(str | None, self.test_run.test.cmd_args_dict.get("filepath")) if filepath_raw: - filepath = Path(filepath_raw) - if filepath == Path("/"): - logging.warning("Skipping filepath cleanup for '/': refusing to delete container root contents.") - else: - cleanup_cmds.append(f"rm -rf {shlex.quote(str(filepath))}") + cleanup_targets.append(str((self.test_run.output_path / "filepath_mount").resolve())) device_list_raw: str | None = cast(str | None, self.test_run.test.cmd_args_dict.get("device_list")) - if device_list_raw: - for device_path in get_files_from_device_list(device_list_raw): - cleanup_cmds.append(f"rm -rf {shlex.quote(str(device_path))}") + if device_list_raw and get_files_from_device_list(device_list_raw): + cleanup_targets.append(str((self.test_run.output_path / "device_list_mounts").resolve())) - return cleanup_cmds + return cleanup_targets @property def final_env_vars(self) -> dict[str, str | list[str]]: diff --git a/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py b/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py index 19f4f4abf..8cb0f14bc 100644 --- a/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py @@ -36,7 +36,7 @@ def _gen_srun_command(self) -> str: nixl_commands = self.gen_nixlbench_srun_commands( self.gen_nixlbench_command(), str(self.tdef.cmd_args_dict.get("backend", "unset")) ) - cleanup_command = self.gen_cleanup_srun_command() + cleanup_command = self.gen_cleanup_command() self._current_image_url = None commands: list[str] = [ diff --git a/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py b/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py index 86d30e47d..73be5b9c7 100644 --- a/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py @@ -39,7 +39,7 @@ def _gen_srun_command(self) -> str: kvbench_commands = self.gen_nixlbench_srun_commands( self.gen_kvbench_command(), str(self.tdef.cmd_args.backend or "unset") ) - cleanup_command = self.gen_cleanup_srun_command() + cleanup_command = self.gen_cleanup_command() self._current_image_url = None self.create_env_vars_file() diff --git a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py index 57a6ee50b..5497b382a 100644 --- a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py +++ b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py @@ -105,7 +105,7 @@ def test_container_mounts(self, nixl_bench_tr: TestRun, slurm_system: SlurmSyste assert (nixl_bench_tr.output_path / "device_list_mounts" / local_device_filename).is_file() assert (nixl_bench_tr.output_path / "device_list_mounts" / local_device_filename).stat().st_size == 1024 - def test_cleanup_srun_command(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): + def test_cleanup_command_uses_host_paths(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): nixl_bench_tr.test.cmd_args = NIXLBenchCmdArgs.model_validate( { "docker_image_url": "docker.io/library/ubuntu:22.04", @@ -116,13 +116,32 @@ def test_cleanup_srun_command(self, nixl_bench_tr: TestRun, slurm_system: SlurmS } ) strategy = NIXLBenchSlurmCommandGenStrategy(slurm_system, nixl_bench_tr) - strategy._current_image_url = str(cast(NIXLBenchTestDefinition, nixl_bench_tr.test).docker_image.installed_path) - cleanup_cmd = " ".join(strategy.gen_cleanup_srun_command()) + cleanup_cmd = " ".join(strategy.gen_cleanup_command()) + filepath_dir = nixl_bench_tr.output_path / "filepath_mount" + device_list_dir = nixl_bench_tr.output_path / "device_list_mounts" + assert cleanup_cmd == f"rm -rf {filepath_dir} {device_list_dir}" - assert "rm -rf /data" in cleanup_cmd - assert "rm -rf /p1/store0.bin" in cleanup_cmd - assert "rm -rf /p2/store0.bin" in cleanup_cmd + def test_gen_cleanup_command_empty_without_storage_args(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): + strategy = NIXLBenchSlurmCommandGenStrategy(slurm_system, nixl_bench_tr) + assert strategy.gen_cleanup_command() == [] + + def test_gen_srun_command_includes_host_cleanup(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): + nixl_bench_tr.test.cmd_args = NIXLBenchCmdArgs.model_validate( + { + "docker_image_url": "docker.io/library/ubuntu:22.04", + "path_to_benchmark": "/nixlbench", + "backend": "GUSLI", + "device_list": "11:F:/store0.bin", + "filepath": "/data", + } + ) + strategy = NIXLBenchSlurmCommandGenStrategy(slurm_system, nixl_bench_tr) + + cleanup_cmd = " ".join(strategy.gen_cleanup_command()) + cmd = strategy._gen_srun_command() + + assert cleanup_cmd in cmd @pytest.mark.parametrize( ("override", "expected_error_match", "expected_total_buffer_size"), @@ -240,7 +259,7 @@ def test_gen_nixl_srun_command( assert "--nodelist=$SLURM_JOB_MASTER_NODE" in cmd -def test_gen_srun_command(nixl_bench_tr: TestRun, slurm_system: SlurmSystem): +def test_gen_wait_for_etcd_command(nixl_bench_tr: TestRun, slurm_system: SlurmSystem): strategy = NIXLBenchSlurmCommandGenStrategy(slurm_system, nixl_bench_tr) cmd = strategy.gen_wait_for_etcd_command() assert cmd == [ diff --git a/tests/workloads/nixl_kvbench/test_command_gen_slurm.py b/tests/workloads/nixl_kvbench/test_command_gen_slurm.py index 354cb19fd..6c3b744dd 100644 --- a/tests/workloads/nixl_kvbench/test_command_gen_slurm.py +++ b/tests/workloads/nixl_kvbench/test_command_gen_slurm.py @@ -128,7 +128,7 @@ def test_get_etcd_srun_command_with_etcd_image(kvbench_tr: TestRun, slurm_system assert f"--container-image={tdef.etcd_image.installed_path}" in cmd -def test_kvbench_cleanup_srun_command_uses_container_paths(kvbench_tr: TestRun, slurm_system: SlurmSystem): +def test_kvbench_cleanup_command_uses_host_paths(kvbench_tr: TestRun, slurm_system: SlurmSystem): kvbench_tr.test.cmd_args = NIXLKVBenchCmdArgs.model_validate( { "docker_image_url": "docker://image/url", @@ -138,9 +138,31 @@ def test_kvbench_cleanup_srun_command_uses_container_paths(kvbench_tr: TestRun, } ) strategy = NIXLKVBenchSlurmCommandGenStrategy(slurm_system, kvbench_tr) - strategy._current_image_url = str(cast(NIXLKVBenchTestDefinition, kvbench_tr.test).docker_image.installed_path) - cmd = " ".join(strategy.gen_cleanup_srun_command()) + cmd = " ".join(strategy.gen_cleanup_command()) + filepath_dir = kvbench_tr.output_path / "filepath_mount" + device_list_dir = kvbench_tr.output_path / "device_list_mounts" + assert cmd == f"rm -rf {filepath_dir} {device_list_dir}" - assert "rm -rf /data" in cmd - assert "rm -rf /store0.bin" in cmd + +def test_kvbench_gen_cleanup_command_empty_without_storage_args(kvbench_tr: TestRun, slurm_system: SlurmSystem): + strategy = NIXLKVBenchSlurmCommandGenStrategy(slurm_system, kvbench_tr) + + assert strategy.gen_cleanup_command() == [] + + +def test_kvbench_gen_srun_command_includes_host_cleanup(kvbench_tr: TestRun, slurm_system: SlurmSystem): + kvbench_tr.test.cmd_args = NIXLKVBenchCmdArgs.model_validate( + { + "docker_image_url": "docker://image/url", + "backend": "GUSLI", + "filepath": "/data", + "device_list": "11:F:/store0.bin", + } + ) + strategy = NIXLKVBenchSlurmCommandGenStrategy(slurm_system, kvbench_tr) + + cleanup_cmd = " ".join(strategy.gen_cleanup_command()) + cmd = strategy._gen_srun_command() + + assert cleanup_cmd in cmd From 271cc885594e58d0577ecf7d6e4ba6fb8c8cc327 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Thu, 19 Mar 2026 18:48:32 +0100 Subject: [PATCH 3/9] change commands order --- src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py | 2 +- .../workloads/nixl_kvbench/slurm_command_gen_strategy.py | 2 +- tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py | 1 + tests/workloads/nixl_kvbench/test_command_gen_slurm.py | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py b/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py index 8cb0f14bc..218023d12 100644 --- a/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py @@ -48,7 +48,7 @@ def _gen_srun_command(self) -> str: " ".join(self.gen_kill_and_wait_cmd("etcd_pid")), ] if cleanup_command: - commands.insert(-1, " ".join(cleanup_command)) + commands.append(" ".join(cleanup_command)) return "\n".join(commands) def gen_nixlbench_command(self) -> list[str]: diff --git a/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py b/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py index 73be5b9c7..9285b0fe6 100644 --- a/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py @@ -53,7 +53,7 @@ def _gen_srun_command(self) -> str: " ".join(self.gen_kill_and_wait_cmd("etcd_pid")), ] if cleanup_command: - final_cmd.insert(-1, " ".join(cleanup_command)) + final_cmd.append(" ".join(cleanup_command)) return "\n".join(final_cmd) def gen_kvbench_command(self) -> list[str]: diff --git a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py index 5497b382a..624a31132 100644 --- a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py +++ b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py @@ -142,6 +142,7 @@ def test_gen_srun_command_includes_host_cleanup(self, nixl_bench_tr: TestRun, sl cmd = strategy._gen_srun_command() assert cleanup_cmd in cmd + assert cmd.rfind("kill -TERM $etcd_pid") < cmd.rfind(cleanup_cmd) @pytest.mark.parametrize( ("override", "expected_error_match", "expected_total_buffer_size"), diff --git a/tests/workloads/nixl_kvbench/test_command_gen_slurm.py b/tests/workloads/nixl_kvbench/test_command_gen_slurm.py index 6c3b744dd..dd537f7ba 100644 --- a/tests/workloads/nixl_kvbench/test_command_gen_slurm.py +++ b/tests/workloads/nixl_kvbench/test_command_gen_slurm.py @@ -166,3 +166,4 @@ def test_kvbench_gen_srun_command_includes_host_cleanup(kvbench_tr: TestRun, slu cmd = strategy._gen_srun_command() assert cleanup_cmd in cmd + assert cmd.rfind("kill -TERM $etcd_pid") < cmd.rfind(cleanup_cmd) From 386edb2a372e4bbb378e34638e0b84c34216bec2 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 20 Mar 2026 11:15:22 +0100 Subject: [PATCH 4/9] using python for clenaup --- src/cloudai/_core/command_gen_strategy.py | 4 +++ .../systems/slurm/single_sbatch_runner.py | 5 +++- src/cloudai/systems/slurm/slurm_runner.py | 8 ++++++ src/cloudai/workloads/common/nixl.py | 20 +++++++------- .../nixl_bench/slurm_command_gen_strategy.py | 3 --- .../slurm_command_gen_strategy.py | 3 --- tests/test_get_job_id.py | 19 +++++++++++-- tests/test_single_sbatch_runner.py | 27 ++++++++++++++++++- .../test_command_gen_strategy_slurm.py | 26 +++++++++--------- .../nixl_kvbench/test_command_gen_slurm.py | 26 +++++++++--------- 10 files changed, 95 insertions(+), 46 deletions(-) diff --git a/src/cloudai/_core/command_gen_strategy.py b/src/cloudai/_core/command_gen_strategy.py index 5238bb675..0268fbca5 100644 --- a/src/cloudai/_core/command_gen_strategy.py +++ b/src/cloudai/_core/command_gen_strategy.py @@ -49,6 +49,10 @@ def store_test_run(self) -> None: """ pass + def cleanup_job_artifacts(self) -> None: + """Best-effort cleanup hook run after the job has fully completed.""" + return + @property def final_env_vars(self) -> dict[str, str | list[str]]: if not self._final_env_vars: diff --git a/src/cloudai/systems/slurm/single_sbatch_runner.py b/src/cloudai/systems/slurm/single_sbatch_runner.py index 2ea28d554..31865b433 100644 --- a/src/cloudai/systems/slurm/single_sbatch_runner.py +++ b/src/cloudai/systems/slurm/single_sbatch_runner.py @@ -22,7 +22,7 @@ from typing import Generator, Optional, cast from cloudai.configurator.cloudai_gym import CloudAIGymEnv -from cloudai.core import JobIdRetrievalError, System, TestRun, TestScenario +from cloudai.core import BaseJob, JobIdRetrievalError, System, TestRun, TestScenario from cloudai.util import CommandShell, format_time_limit, parse_time_limit from .slurm_command_gen_strategy import SlurmCommandGenStrategy @@ -214,6 +214,9 @@ def handle_dse(self): reward = gym.compute_reward(observation) gym.write_trajectory(idx, combination, reward, observation) + def completed_test_runs(self, job: BaseJob) -> list[TestRun]: + return list(self.all_trs) + def _submit_test(self, tr: TestRun) -> SlurmJob: with open(self.scenario_root / "cloudai_sbatch_script.sh", "w") as f: f.write(self.gen_sbatch_content()) diff --git a/src/cloudai/systems/slurm/slurm_runner.py b/src/cloudai/systems/slurm/slurm_runner.py index 50a70082d..fd8f0902e 100644 --- a/src/cloudai/systems/slurm/slurm_runner.py +++ b/src/cloudai/systems/slurm/slurm_runner.py @@ -77,10 +77,18 @@ def on_job_submit(self, tr: TestRun) -> None: cmd_gen = self.get_cmd_gen_strategy(self.system, tr) cmd_gen.store_test_run() + def completed_test_runs(self, job: BaseJob) -> list[TestRun]: + return [cast(SlurmJob, job).test_run] + def on_job_completion(self, job: BaseJob) -> None: logging.debug(f"Job completion callback for job {job.id}") self.system.complete_job(cast(SlurmJob, job)) self.store_job_metadata(cast(SlurmJob, job)) + for tr in self.completed_test_runs(job): + try: + self.get_cmd_gen_strategy(self.system, tr).cleanup_job_artifacts() + except Exception: + logging.warning(f"Cleanup failed for test run at {tr.output_path}", exc_info=True) def _mock_job_metadata(self) -> SlurmStepMetadata: return SlurmStepMetadata( diff --git a/src/cloudai/workloads/common/nixl.py b/src/cloudai/workloads/common/nixl.py index 4a81c470f..950f91707 100644 --- a/src/cloudai/workloads/common/nixl.py +++ b/src/cloudai/workloads/common/nixl.py @@ -17,7 +17,7 @@ import logging import re -import shlex +import shutil from functools import cache from pathlib import Path from typing import TYPE_CHECKING, Any, Final, Generic, TypeVar, cast @@ -232,23 +232,21 @@ def _unique_file_name(self, file_name: str, used_filenames: set[str]) -> str: used_filenames.add(candidate) return candidate - def gen_cleanup_command(self) -> list[str]: - cleanup_targets = self._cleanup_targets() - if not cleanup_targets: - return [] - - return ["rm", "-rf", *(shlex.quote(path) for path in cleanup_targets)] + def cleanup_job_artifacts(self) -> None: + for cleanup_target in self._cleanup_targets(): + if cleanup_target.exists(): + shutil.rmtree(cleanup_target) - def _cleanup_targets(self) -> list[str]: - cleanup_targets: list[str] = [] + def _cleanup_targets(self) -> list[Path]: + cleanup_targets: list[Path] = [] filepath_raw: str | None = cast(str | None, self.test_run.test.cmd_args_dict.get("filepath")) if filepath_raw: - cleanup_targets.append(str((self.test_run.output_path / "filepath_mount").resolve())) + cleanup_targets.append((self.test_run.output_path / "filepath_mount").resolve()) device_list_raw: str | None = cast(str | None, self.test_run.test.cmd_args_dict.get("device_list")) if device_list_raw and get_files_from_device_list(device_list_raw): - cleanup_targets.append(str((self.test_run.output_path / "device_list_mounts").resolve())) + cleanup_targets.append((self.test_run.output_path / "device_list_mounts").resolve()) return cleanup_targets diff --git a/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py b/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py index 218023d12..7d0995e6f 100644 --- a/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/nixl_bench/slurm_command_gen_strategy.py @@ -36,7 +36,6 @@ def _gen_srun_command(self) -> str: nixl_commands = self.gen_nixlbench_srun_commands( self.gen_nixlbench_command(), str(self.tdef.cmd_args_dict.get("backend", "unset")) ) - cleanup_command = self.gen_cleanup_command() self._current_image_url = None commands: list[str] = [ @@ -47,8 +46,6 @@ def _gen_srun_command(self) -> str: " ".join(nixl_commands[-1]), " ".join(self.gen_kill_and_wait_cmd("etcd_pid")), ] - if cleanup_command: - commands.append(" ".join(cleanup_command)) return "\n".join(commands) def gen_nixlbench_command(self) -> list[str]: diff --git a/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py b/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py index 9285b0fe6..f695c76f0 100644 --- a/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py +++ b/src/cloudai/workloads/nixl_kvbench/slurm_command_gen_strategy.py @@ -39,7 +39,6 @@ def _gen_srun_command(self) -> str: kvbench_commands = self.gen_nixlbench_srun_commands( self.gen_kvbench_command(), str(self.tdef.cmd_args.backend or "unset") ) - cleanup_command = self.gen_cleanup_command() self._current_image_url = None self.create_env_vars_file() @@ -52,8 +51,6 @@ def _gen_srun_command(self) -> str: " ".join(kvbench_commands[-1]), " ".join(self.gen_kill_and_wait_cmd("etcd_pid")), ] - if cleanup_command: - final_cmd.append(" ".join(cleanup_command)) return "\n".join(final_cmd) def gen_kvbench_command(self) -> list[str]: diff --git a/tests/test_get_job_id.py b/tests/test_get_job_id.py index 260593dea..ecdf6ced3 100644 --- a/tests/test_get_job_id.py +++ b/tests/test_get_job_id.py @@ -16,14 +16,14 @@ import subprocess from pathlib import Path -from unittest.mock import Mock +from unittest.mock import Mock, patch import pytest from cloudai.core import JobIdRetrievalError, TestRun, TestScenario from cloudai.systems.lsf.lsf_runner import LSFRunner from cloudai.systems.lsf.lsf_system import LSFSystem -from cloudai.systems.slurm import SlurmRunner, SlurmSystem +from cloudai.systems.slurm import SlurmJob, SlurmRunner, SlurmSystem from cloudai.util import CommandShell from cloudai.workloads.sleep.sleep import SleepCmdArgs, SleepTestDefinition @@ -88,6 +88,21 @@ def test_slurm_get_job_id(slurm_runner: SlurmRunner, stdout: str, stderr: str, e assert res == expected_job_id +def test_slurm_runner_on_job_completion_calls_cleanup(slurm_runner: SlurmRunner): + tr = slurm_runner.test_scenario.test_runs[0] + job = SlurmJob(tr, id=1) + slurm_runner.store_job_metadata = Mock() + cleanup = Mock() + slurm_runner.get_cmd_gen_strategy = Mock(return_value=Mock(cleanup_job_artifacts=cleanup)) + + with patch.object(SlurmSystem, "complete_job") as complete_job: + slurm_runner.on_job_completion(job) + + complete_job.assert_called_once_with(job) + slurm_runner.store_job_metadata.assert_called_once_with(job) + cleanup.assert_called_once() + + @pytest.mark.parametrize( "stdout, stderr, expected_job_id", [ diff --git a/tests/test_single_sbatch_runner.py b/tests/test_single_sbatch_runner.py index 72ad93f79..91d3cdf27 100644 --- a/tests/test_single_sbatch_runner.py +++ b/tests/test_single_sbatch_runner.py @@ -16,8 +16,9 @@ import copy import re +from pathlib import Path from typing import Generator, Optional, cast -from unittest.mock import Mock +from unittest.mock import Mock, patch import pandas as pd import pytest @@ -506,6 +507,30 @@ def test_store_job_metadata(nccl_tr: TestRun, slurm_system: SlurmSystem) -> None assert sjm == SlurmJobMetadata.model_validate(toml.loads(toml.dumps(sjm.model_dump()))) +def test_on_job_completion_cleans_all_effective_test_runs( + dse_tr: TestRun, nccl_tr: TestRun, slurm_system: SlurmSystem +) -> None: + tc = TestScenario(name="tc", test_runs=[dse_tr, nccl_tr]) + runner = SingleSbatchRunner(mode="run", system=slurm_system, test_scenario=tc, output_path=slurm_system.output_path) + runner.mode = "dry-run" + runner.store_job_metadata = Mock() + + cleanup_calls: list[Path] = [] + + def _cmd_gen(_, tr: TestRun): + return Mock(cleanup_job_artifacts=Mock(side_effect=lambda: cleanup_calls.append(tr.output_path))) + + runner.get_cmd_gen_strategy = Mock(side_effect=_cmd_gen) + + expected_paths = [tr.output_path for tr in runner.all_trs] + job = SlurmJob(nccl_tr, id=1) + + with patch.object(SlurmSystem, "complete_job"): + runner.on_job_completion(job) + + assert cleanup_calls == expected_paths + + def test_pre_test(nccl_tr: TestRun, sleep_tr: TestRun, slurm_system: SlurmSystem) -> None: nccl_tr.pre_test = TestScenario(name="pre_test", test_runs=[sleep_tr]) tc = TestScenario(name="tc", test_runs=[nccl_tr]) diff --git a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py index 624a31132..ce2e26a83 100644 --- a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py +++ b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py @@ -105,7 +105,7 @@ def test_container_mounts(self, nixl_bench_tr: TestRun, slurm_system: SlurmSyste assert (nixl_bench_tr.output_path / "device_list_mounts" / local_device_filename).is_file() assert (nixl_bench_tr.output_path / "device_list_mounts" / local_device_filename).stat().st_size == 1024 - def test_cleanup_command_uses_host_paths(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): + def test_cleanup_job_artifacts(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): nixl_bench_tr.test.cmd_args = NIXLBenchCmdArgs.model_validate( { "docker_image_url": "docker.io/library/ubuntu:22.04", @@ -116,17 +116,22 @@ def test_cleanup_command_uses_host_paths(self, nixl_bench_tr: TestRun, slurm_sys } ) strategy = NIXLBenchSlurmCommandGenStrategy(slurm_system, nixl_bench_tr) - - cleanup_cmd = " ".join(strategy.gen_cleanup_command()) filepath_dir = nixl_bench_tr.output_path / "filepath_mount" device_list_dir = nixl_bench_tr.output_path / "device_list_mounts" - assert cleanup_cmd == f"rm -rf {filepath_dir} {device_list_dir}" + other_file = nixl_bench_tr.output_path / "keep.txt" + filepath_dir.mkdir(parents=True, exist_ok=True) + device_list_dir.mkdir(parents=True, exist_ok=True) + (filepath_dir / "a.txt").write_text("x") + (device_list_dir / "b.txt").write_text("x") + other_file.write_text("keep") - def test_gen_cleanup_command_empty_without_storage_args(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): - strategy = NIXLBenchSlurmCommandGenStrategy(slurm_system, nixl_bench_tr) - assert strategy.gen_cleanup_command() == [] + strategy.cleanup_job_artifacts() - def test_gen_srun_command_includes_host_cleanup(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): + assert not filepath_dir.exists() + assert not device_list_dir.exists() + assert other_file.exists() + + def test_gen_srun_command_excludes_cleanup(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): nixl_bench_tr.test.cmd_args = NIXLBenchCmdArgs.model_validate( { "docker_image_url": "docker.io/library/ubuntu:22.04", @@ -137,12 +142,9 @@ def test_gen_srun_command_includes_host_cleanup(self, nixl_bench_tr: TestRun, sl } ) strategy = NIXLBenchSlurmCommandGenStrategy(slurm_system, nixl_bench_tr) - - cleanup_cmd = " ".join(strategy.gen_cleanup_command()) cmd = strategy._gen_srun_command() - assert cleanup_cmd in cmd - assert cmd.rfind("kill -TERM $etcd_pid") < cmd.rfind(cleanup_cmd) + assert "rm -rf " not in cmd @pytest.mark.parametrize( ("override", "expected_error_match", "expected_total_buffer_size"), diff --git a/tests/workloads/nixl_kvbench/test_command_gen_slurm.py b/tests/workloads/nixl_kvbench/test_command_gen_slurm.py index dd537f7ba..e09e3f78c 100644 --- a/tests/workloads/nixl_kvbench/test_command_gen_slurm.py +++ b/tests/workloads/nixl_kvbench/test_command_gen_slurm.py @@ -128,7 +128,7 @@ def test_get_etcd_srun_command_with_etcd_image(kvbench_tr: TestRun, slurm_system assert f"--container-image={tdef.etcd_image.installed_path}" in cmd -def test_kvbench_cleanup_command_uses_host_paths(kvbench_tr: TestRun, slurm_system: SlurmSystem): +def test_kvbench_cleanup_job_artifacts(kvbench_tr: TestRun, slurm_system: SlurmSystem): kvbench_tr.test.cmd_args = NIXLKVBenchCmdArgs.model_validate( { "docker_image_url": "docker://image/url", @@ -138,20 +138,23 @@ def test_kvbench_cleanup_command_uses_host_paths(kvbench_tr: TestRun, slurm_syst } ) strategy = NIXLKVBenchSlurmCommandGenStrategy(slurm_system, kvbench_tr) - - cmd = " ".join(strategy.gen_cleanup_command()) filepath_dir = kvbench_tr.output_path / "filepath_mount" device_list_dir = kvbench_tr.output_path / "device_list_mounts" - assert cmd == f"rm -rf {filepath_dir} {device_list_dir}" - + other_file = kvbench_tr.output_path / "keep.txt" + filepath_dir.mkdir(parents=True, exist_ok=True) + device_list_dir.mkdir(parents=True, exist_ok=True) + (filepath_dir / "a.txt").write_text("x") + (device_list_dir / "b.txt").write_text("x") + other_file.write_text("keep") -def test_kvbench_gen_cleanup_command_empty_without_storage_args(kvbench_tr: TestRun, slurm_system: SlurmSystem): - strategy = NIXLKVBenchSlurmCommandGenStrategy(slurm_system, kvbench_tr) + strategy.cleanup_job_artifacts() - assert strategy.gen_cleanup_command() == [] + assert not filepath_dir.exists() + assert not device_list_dir.exists() + assert other_file.exists() -def test_kvbench_gen_srun_command_includes_host_cleanup(kvbench_tr: TestRun, slurm_system: SlurmSystem): +def test_kvbench_gen_srun_command_excludes_cleanup(kvbench_tr: TestRun, slurm_system: SlurmSystem): kvbench_tr.test.cmd_args = NIXLKVBenchCmdArgs.model_validate( { "docker_image_url": "docker://image/url", @@ -161,9 +164,6 @@ def test_kvbench_gen_srun_command_includes_host_cleanup(kvbench_tr: TestRun, slu } ) strategy = NIXLKVBenchSlurmCommandGenStrategy(slurm_system, kvbench_tr) - - cleanup_cmd = " ".join(strategy.gen_cleanup_command()) cmd = strategy._gen_srun_command() - assert cleanup_cmd in cmd - assert cmd.rfind("kill -TERM $etcd_pid") < cmd.rfind(cleanup_cmd) + assert "rm -rf " not in cmd From 409854622df79cfc2c5c6cd8139515fc3b3de963 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 20 Mar 2026 15:00:26 +0100 Subject: [PATCH 5/9] remove redundant tests --- .../nixl_bench/test_command_gen_strategy_slurm.py | 15 --------------- .../nixl_kvbench/test_command_gen_slurm.py | 15 --------------- 2 files changed, 30 deletions(-) diff --git a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py index ce2e26a83..e984855c0 100644 --- a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py +++ b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py @@ -131,21 +131,6 @@ def test_cleanup_job_artifacts(self, nixl_bench_tr: TestRun, slurm_system: Slurm assert not device_list_dir.exists() assert other_file.exists() - def test_gen_srun_command_excludes_cleanup(self, nixl_bench_tr: TestRun, slurm_system: SlurmSystem): - nixl_bench_tr.test.cmd_args = NIXLBenchCmdArgs.model_validate( - { - "docker_image_url": "docker.io/library/ubuntu:22.04", - "path_to_benchmark": "/nixlbench", - "backend": "GUSLI", - "device_list": "11:F:/store0.bin", - "filepath": "/data", - } - ) - strategy = NIXLBenchSlurmCommandGenStrategy(slurm_system, nixl_bench_tr) - cmd = strategy._gen_srun_command() - - assert "rm -rf " not in cmd - @pytest.mark.parametrize( ("override", "expected_error_match", "expected_total_buffer_size"), ( diff --git a/tests/workloads/nixl_kvbench/test_command_gen_slurm.py b/tests/workloads/nixl_kvbench/test_command_gen_slurm.py index e09e3f78c..fecf1d371 100644 --- a/tests/workloads/nixl_kvbench/test_command_gen_slurm.py +++ b/tests/workloads/nixl_kvbench/test_command_gen_slurm.py @@ -152,18 +152,3 @@ def test_kvbench_cleanup_job_artifacts(kvbench_tr: TestRun, slurm_system: SlurmS assert not filepath_dir.exists() assert not device_list_dir.exists() assert other_file.exists() - - -def test_kvbench_gen_srun_command_excludes_cleanup(kvbench_tr: TestRun, slurm_system: SlurmSystem): - kvbench_tr.test.cmd_args = NIXLKVBenchCmdArgs.model_validate( - { - "docker_image_url": "docker://image/url", - "backend": "GUSLI", - "filepath": "/data", - "device_list": "11:F:/store0.bin", - } - ) - strategy = NIXLKVBenchSlurmCommandGenStrategy(slurm_system, kvbench_tr) - cmd = strategy._gen_srun_command() - - assert "rm -rf " not in cmd From 87514ce6bd5b46e9505ed41d3b50cf9906732663 Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 20 Mar 2026 15:01:42 +0100 Subject: [PATCH 6/9] revert redundant line change --- tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py index e984855c0..814b20e7b 100644 --- a/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py +++ b/tests/workloads/nixl_bench/test_command_gen_strategy_slurm.py @@ -247,7 +247,7 @@ def test_gen_nixl_srun_command( assert "--nodelist=$SLURM_JOB_MASTER_NODE" in cmd -def test_gen_wait_for_etcd_command(nixl_bench_tr: TestRun, slurm_system: SlurmSystem): +def test_gen_srun_command(nixl_bench_tr: TestRun, slurm_system: SlurmSystem): strategy = NIXLBenchSlurmCommandGenStrategy(slurm_system, nixl_bench_tr) cmd = strategy.gen_wait_for_etcd_command() assert cmd == [ From 6f2338a6389e8df5d6f447e89e97df8623669ccc Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 20 Mar 2026 18:57:26 +0100 Subject: [PATCH 7/9] update copyright --- src/cloudai/_core/command_gen_strategy.py | 2 +- src/cloudai/systems/slurm/slurm_runner.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cloudai/_core/command_gen_strategy.py b/src/cloudai/_core/command_gen_strategy.py index 0268fbca5..56a17f0ee 100644 --- a/src/cloudai/_core/command_gen_strategy.py +++ b/src/cloudai/_core/command_gen_strategy.py @@ -1,5 +1,5 @@ # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES -# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/src/cloudai/systems/slurm/slurm_runner.py b/src/cloudai/systems/slurm/slurm_runner.py index fd8f0902e..dae0cdb29 100644 --- a/src/cloudai/systems/slurm/slurm_runner.py +++ b/src/cloudai/systems/slurm/slurm_runner.py @@ -1,5 +1,5 @@ # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES -# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); From 1a88b1cae56445198c9d437c1c230870b529557f Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 20 Mar 2026 19:10:15 +0100 Subject: [PATCH 8/9] Update src/cloudai/workloads/common/nixl.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- src/cloudai/workloads/common/nixl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cloudai/workloads/common/nixl.py b/src/cloudai/workloads/common/nixl.py index 950f91707..89ee72ba9 100644 --- a/src/cloudai/workloads/common/nixl.py +++ b/src/cloudai/workloads/common/nixl.py @@ -236,6 +236,7 @@ def cleanup_job_artifacts(self) -> None: for cleanup_target in self._cleanup_targets(): if cleanup_target.exists(): shutil.rmtree(cleanup_target) + logging.debug(f"Cleaned up job artifact: {cleanup_target}") def _cleanup_targets(self) -> list[Path]: cleanup_targets: list[Path] = [] From a2d607048dfd2c4d0305892901d76bbf9637b47e Mon Sep 17 00:00:00 2001 From: Ivan Podkidyshev Date: Fri, 20 Mar 2026 19:26:32 +0100 Subject: [PATCH 9/9] safer nixl artifacts deletion --- src/cloudai/workloads/common/nixl.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cloudai/workloads/common/nixl.py b/src/cloudai/workloads/common/nixl.py index 89ee72ba9..430a63951 100644 --- a/src/cloudai/workloads/common/nixl.py +++ b/src/cloudai/workloads/common/nixl.py @@ -234,7 +234,7 @@ def _unique_file_name(self, file_name: str, used_filenames: set[str]) -> str: def cleanup_job_artifacts(self) -> None: for cleanup_target in self._cleanup_targets(): - if cleanup_target.exists(): + if cleanup_target.is_dir(): shutil.rmtree(cleanup_target) logging.debug(f"Cleaned up job artifact: {cleanup_target}") @@ -243,11 +243,11 @@ def _cleanup_targets(self) -> list[Path]: filepath_raw: str | None = cast(str | None, self.test_run.test.cmd_args_dict.get("filepath")) if filepath_raw: - cleanup_targets.append((self.test_run.output_path / "filepath_mount").resolve()) + cleanup_targets.append(self.test_run.output_path / "filepath_mount") device_list_raw: str | None = cast(str | None, self.test_run.test.cmd_args_dict.get("device_list")) if device_list_raw and get_files_from_device_list(device_list_raw): - cleanup_targets.append((self.test_run.output_path / "device_list_mounts").resolve()) + cleanup_targets.append(self.test_run.output_path / "device_list_mounts") return cleanup_targets