[Serve] Enable GPU sampler for Metal (#3349)

akaashrp · web-flow · commit 9fa51dbb7d59 · 2025-09-25T23:34:48.000-04:00
Add "metal" to AttachGPUSamplingFunc transform_module list
to include GPU sampling functions for models compiled for metal.
Update SupportGPUSampler function to use GPU sampling functions
for metal during runtime.
diff --git a/cpp/serve/sampler/sampler.h b/cpp/serve/sampler/sampler.h
@@ -144,7 +144,8 @@ class Sampler : public ObjectRef {
   /*! \brief Check if the given device supports GPU sampling. */
   static bool SupportGPUSampler(Device device) {
     return device.device_type == DLDeviceType::kDLCUDA ||
-           device.device_type == DLDeviceType::kDLVulkan;
+           device.device_type == DLDeviceType::kDLVulkan ||
+           device.device_type == DLDeviceType::kDLMetal;
   }
 
   TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(Sampler, ObjectRef, SamplerObj);
diff --git a/python/mlc_llm/compiler_pass/attach_sampler.py b/python/mlc_llm/compiler_pass/attach_sampler.py
@@ -28,8 +28,8 @@ def __init__(self, target: tvm.target.Target, variable_bounds: Dict[str, int]):
 
     def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassContext) -> IRModule:
         """Entrypoint"""
-        if str(self.target.kind) not in ["cuda", "vulkan"]:
-            # Only enable GPU sampling for CUDA.
+        if str(self.target.kind) not in ["cuda", "vulkan", "metal"]:
+            # Only enable GPU sampling for CUDA, Vulkan, and Metal.
             return mod
 
         bb = relax.BlockBuilder(mod)

Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,8 @@ class Sampler : public ObjectRef {`
`144`	`144`	`/! \brief Check if the given device supports GPU sampling. /`
`145`	`145`	`static bool SupportGPUSampler(Device device) {`
`146`	`146`	`return device.device_type == DLDeviceType::kDLCUDA \|\|`
`147`		`- device.device_type == DLDeviceType::kDLVulkan;`
	`147`	`+ device.device_type == DLDeviceType::kDLVulkan \|\|`
	`148`	`+ device.device_type == DLDeviceType::kDLMetal;`
`148`	`149`	`}`
`149`	`150`
`150`	`151`	`TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(Sampler, ObjectRef, SamplerObj);`