@@ -31,6 +31,7 @@ class ParallelSetup(NamedTuple):
3131 tp_size : int
3232 pp_size : int
3333 dcp_size : int
34+ pcp_size : int
3435 cp_kv_cache_interleave_size : int
3536 eager_mode : bool
3637 chunked_prefill : bool
@@ -55,6 +56,7 @@ def detailed(
5556 tp_base : int = 4 ,
5657 pp_base : int = 1 ,
5758 dcp_base : int = 1 ,
59+ pcp_base : int = 1 ,
5860 cp_kv_cache_interleave_size : int = 1 ,
5961 multi_node_only : bool = False ,
6062 runner : RunnerOption = "auto" ,
@@ -70,7 +72,8 @@ def detailed(
7072 ParallelSetup (
7173 tp_size = tp_base ,
7274 pp_size = pp_multiplier * pp_base ,
73- dcp_size = int (dcp_multiplier * tp_base ),
75+ dcp_size = max (1 , int (dcp_multiplier * tp_base )),
76+ pcp_size = pcp_base ,
7477 cp_kv_cache_interleave_size = cp_kv_cache_interleave_size ,
7578 eager_mode = eager_mode_val ,
7679 chunked_prefill = chunked_prefill_val ,
@@ -116,6 +119,7 @@ def _compare_cp_with_tp(
116119 tp_size ,
117120 pp_size ,
118121 dcp_size ,
122+ pcp_size ,
119123 cp_kv_cache_interleave_size ,
120124 eager_mode ,
121125 chunked_prefill ,
@@ -196,7 +200,9 @@ def _compare_cp_with_tp(
196200 str (pp_size ),
197201 "--decode-context-parallel-size" ,
198202 str (dcp_size ),
199- "--dcp-kv-cache-interleave-size" ,
203+ "--prefill-context-parallel-size" ,
204+ str (pcp_size ),
205+ "--cp-kv-cache-interleave-size" ,
200206 str (cp_kv_cache_interleave_size ),
201207 "--distributed-executor-backend" ,
202208 distributed_backend ,
@@ -228,6 +234,8 @@ def _compare_cp_with_tp(
228234 CPTestSettings .detailed (),
229235 CPTestSettings .detailed (tp_base = 2 ),
230236 CPTestSettings .detailed (tp_base = 2 , cp_kv_cache_interleave_size = 64 ),
237+ CPTestSettings .detailed (tp_base = 1 , pcp_base = 2 ),
238+ CPTestSettings .detailed (tp_base = 1 , pcp_base = 2 , cp_kv_cache_interleave_size = 64 ),
231239 ],
232240 "bigcode/gpt_bigcode-santacoder" : [
233241 CPTestSettings .detailed (),
0 commit comments