File tree Expand file tree Collapse file tree 2 files changed +3
-25
lines changed
experiments/auto_parallel Expand file tree Collapse file tree 2 files changed +3
-25
lines changed Original file line number Diff line number Diff line change @@ -340,11 +340,6 @@ def build_optimizers_with_moe_load_balancing(
340340 ft_manager = ft_manager ,
341341 )
342342
343- def should_manual_allreduce (tokens_per_expert_by_layer ):
344- return not isinstance (
345- tokens_per_expert_by_layer , torch .distributed .tensor .DTensor
346- )
347-
348343 def _should_register_moe_balancing_hook (model_parts : list [nn .Module ]) -> bool :
349344 for model_part in model_parts :
350345 for transformer_block in model_part .layers .values ():
@@ -385,7 +380,9 @@ def _update_expert_bias(
385380 tokens_per_expert_by_layer = torch .vstack (tokens_per_expert_list )
386381
387382 if dp_cp_mesh is not None :
388- if should_manual_allreduce (tokens_per_expert_by_layer ):
383+ if isinstance (tokens_per_expert_by_layer , torch .distributed .tensor .DTensor ):
384+ tokens_per_expert_by_layer = tokens_per_expert_by_layer .full_tensor ()
385+ else :
389386 # Perform single all-reduce to get global statistics across all processes
390387 pg = dp_cp_mesh .get_group ()
391388 torch .distributed .all_reduce (
Original file line number Diff line number Diff line change 1414
1515@dataclass
1616class Experimental :
17- custom_import : str = ""
18- """
19- This option enables the importation of external modules.
20- Currently, it only supports dotted import modules (e.g., some_package.model_x).
21- It is the user's responsibility to ensure that the specified path can be
22- successfully imported. One method to achieve this, you can place your module
23- inside the ``torchtitan/torchtitan`` folder and execute ``pip install -e .`` to
24- make it available for import.
25- """
26-
27- custom_args_module : str = ""
28- """
29- DEPRECATED (moved to Job.custom_config_module). Will be removed soon.
30-
31- This option allows users to extend TorchTitan's existing JobConfig by extending
32- a user defined JobConfig dataclass. Similar to ``--experimental.custom_import``, the user
33- needs to ensure that the path can be imported.
34- """
35-
3617 # "aten" (default), "inductor", "none"
3718 comms_bucket_reorder_strategy : str = "aten"
3819
You can’t perform that action at this time.
0 commit comments