Skip to content

Commit 55c21c8

Browse files
authored
[ROCm][CI] Fix "Cannot re-initialize CUDA in forked subprocess" in test_pynccl.py (#29119)
Signed-off-by: Micah Williamson <[email protected]>
1 parent 3999442 commit 55c21c8

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

requirements/rocm-test.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,8 @@ mteb[bm25s]>=1.38.11, <2
4040
# Required for eval tests
4141
lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d
4242

43+
# Required for multiprocessed tests that use spawn method
44+
multiprocess==0.70.16
45+
4346
# Plugins test
4447
terratorch @ git+https://github.com/IBM/terratorch.git@07184fcf91a1324f831ff521dd238d97fe350e3e

tests/distributed/test_pynccl.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4-
import multiprocessing
54
import os
65

6+
import multiprocess as mp
77
import numpy as np
88
import pytest
99
import torch
@@ -20,10 +20,12 @@
2020
)
2121
from vllm.utils.system_utils import update_environment_variables
2222

23+
mp.set_start_method("spawn", force=True)
24+
2325

2426
def distributed_run(fn, world_size):
2527
number_of_processes = world_size
26-
processes: list[multiprocessing.Process] = []
28+
processes: list[mp.Process] = []
2729
for i in range(number_of_processes):
2830
env: dict[str, str] = {}
2931
env["RANK"] = str(i)
@@ -32,7 +34,7 @@ def distributed_run(fn, world_size):
3234
env["LOCAL_WORLD_SIZE"] = str(number_of_processes)
3335
env["MASTER_ADDR"] = "localhost"
3436
env["MASTER_PORT"] = "12345"
35-
p = multiprocessing.Process(target=fn, args=(env,))
37+
p = mp.Process(target=fn, args=(env,))
3638
processes.append(p)
3739
p.start()
3840

0 commit comments

Comments
 (0)