Merge pull request #88 from windsonsea/metaxy

Nimbus318 · web-flow · commit e7779ebffa66 · 2025-07-23T11:24:09.000+08:00
Update version-v2.5.1-sidebars.json
diff --git a/i18n/zh/docusaurus-plugin-content-docs/version-v2.5.1.json b/i18n/zh/docusaurus-plugin-content-docs/version-v2.5.1.json
@@ -0,0 +1,70 @@
+{
+  "version.label": {
+    "message": "v2.5.1",
+    "description": "The label for version v2.5.1"
+  },
+  "sidebar.docs.category.Core Concepts": {
+    "message": "核心概念",
+    "description": "The label for category Core Concepts in sidebar docs"
+  },
+  "sidebar.docs.category.Get Started": {
+    "message": "开始使用",
+    "description": "The label for category Get Started in sidebar docs"
+  },
+  "sidebar.docs.category.Installation": {
+    "message": "安装",
+    "description": "The label for category Installation in sidebar docs"
+  },
+  "sidebar.docs.category.User Guide": {
+    "message": "用户指南",
+    "description": "The label for category User Guide in sidebar docs"
+  },
+  "sidebar.docs.category.Monitoring": {
+    "message": "监控",
+    "description": "The label for category Monitoring in sidebar docs"
+  },
+  "sidebar.docs.category.Share NVIDIA GPU devices": {
+    "message": "共享 NVIDIA GPU 设备",
+    "description": "The label for category Share NVIDIA GPU devices in sidebar docs"
+  },
+  "sidebar.docs.category.Examples": {
+    "message": "示例",
+    "description": "The label for category Examples in sidebar docs"
+  },
+  "sidebar.docs.category.Share Cambricon MLU devices": {
+    "message": "共享寒武纪 MLU 设备",
+    "description": "The label for category Share Cambricon MLU devices in sidebar docs"
+  },
+  "sidebar.docs.category.Contributor Guide": {
+    "message": "贡献者指南",
+    "description": "The label for category Contributor Guide in sidebar docs"
+  },
+  "sidebar.docs.category.Developer Guide": {
+    "message": "开发者指南",
+    "description": "The label for category Developer Guide in sidebar docs"
+  },
+  "sidebar.docs.category.Key Features": {
+    "message": "核心功能",
+    "description": "The label for category Key Features in sidebar docs"
+  },
+  "sidebar.docs.category.Share Hygon DCU devices": {
+    "message": "共享海光 DCU 设备",
+    "description": "The label for category Share Hygon DCU devices in sidebar docs"
+  },
+  "sidebar.docs.category.Share Mthreads GPU devices": {
+    "message": "共享摩尔线程 GPU 设备",
+    "description": "The label for category Share Mthreads GPU devices in sidebar docs"
+  },
+  "sidebar.docs.category.Optimize Metax GPU scheduling": {
+    "message": "优化沐曦 GPU 调度",
+    "description": "The label for category Optimize Metax GPU scheduling in sidebar docs"
+  },
+  "sidebar.docs.category.Volcano vgpu support": {
+    "message": "Volcano vGPU",
+    "description": "The label for category Volcano vgpu support in sidebar docs"
+  },
+  "sidebar.docs.category.Share Ascend devices": {
+    "message": "共享昇腾 GPU 设备",
+    "description": "The label for category Share Ascend devices in sidebar docs"
+  }
+}
diff --git a/versioned_docs/version-v2.5.1/userguide/Metax-device/enable-metax-gpu-sharing.md b/versioned_docs/version-v2.5.1/userguide/Metax-device/enable-metax-gpu-sharing.md
@@ -0,0 +1,48 @@
+---
+title: Enable Metax GPU sharing
+---
+
+**HAMi now supports metax.com/gpu by implementing most device-sharing features as nvidia-GPU**, device-sharing features include the following:
+
+- **GPU Sharing**: Tasks can request a fraction of a GPU rather than the entire GPU card, allowing multiple tasks to share the same GPU.
+
+- **Device Memory Control**: Tasks can be allocated a specific amount of GPU memory, with strict enforcement to ensure usage does not exceed the assigned limit.
+
+- **Compute Core Limiting**: Tasks can be allocated a specific percentage of GPU compute cores (e.g., `60` means the container can use 60% of the GPU’s compute cores).
+
+## Prerequisites
+
+* Metax Driver >= 2.31.0
+* Metax GPU Operator >= 0.10.1
+* Kubernetes >= 1.23
+
+## Enabling GPU-sharing Support
+
+* Deploy Metax GPU Operator on metax nodes (Please consult your device provider to aquire its package and document)
+
+* Deploy HAMi according to README.md
+
+## Running Metax jobs
+
+Metax GPUs can now be requested by a container
+using the `metax-tech.com/sgpu`  resource type:
+
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: gpu-pod1
+spec:
+  containers:
+    - name: ubuntu-container
+      image: cr.metax-tech.com/public-ai-release/c500/colossalai:2.24.0.5-py38-ubuntu20.04-amd64 
+      imagePullPolicy: IfNotPresent
+      command: ["sleep","infinity"]
+      resources:
+        limits:
+          metax-tech.com/sgpu: 1 # requesting 1 GPU 
+          metax-tech.com/vcore: 60 # each GPU use 60% of total compute cores
+          metax-tech.com/vmemory: 4 # each GPU require 4 GiB device memory
+```
+
+> **NOTICE1:** *You can find more examples in examples/sgpu folder.*
diff --git a/versioned_sidebars/version-v2.5.1-sidebars.json b/versioned_sidebars/version-v2.5.1-sidebars.json
@@ -132,6 +132,7 @@
           "label": "Optimize Metax GPU scheduling",
           "items": [
             "userguide/Metax-device/enable-metax-gpu-schedule",
+            "userguide/Metax-device/enable-metax-gpu-sharing",
             "userguide/Metax-device/specify-binpack-task",
             "userguide/Metax-device/specify-spread-task",
             {

Original file line number	Diff line number	Diff line change
`@@ -132,6 +132,7 @@`
`132`	`132`	`"label": "Optimize Metax GPU scheduling",`
`133`	`133`	`"items": [`
`134`	`134`	`"userguide/Metax-device/enable-metax-gpu-schedule",`
	`135`	`+ "userguide/Metax-device/enable-metax-gpu-sharing",`
`135`	`136`	`"userguide/Metax-device/specify-binpack-task",`
`136`	`137`	`"userguide/Metax-device/specify-spread-task",`
`137`	`138`	`{`