Skip to content

Commit 388c8f5

Browse files
sl-gnsibianlxz-gradient
authored
feat(backend): add recommand vram and update version to 0.1.0 (#218)
Co-authored-by: sibianl <[email protected]> Co-authored-by: Xiaodong <[email protected]>
1 parent 55c2c2e commit 388c8f5

22 files changed

+223
-93
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
55

66
[project]
77
name = "parallax"
8-
version = "0.0.1"
8+
version = "0.1.0"
99
description = "Decentralised pipeline-parallel LLM serving with Sglang + MLX-LM + Lattica"
1010
readme = "README.md"
1111
requires-python = ">=3.11,<3.14"

src/backend/server/scheduler_manage.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ def get_peer_id(self):
9191
return None
9292
return self.lattica.peer_id()
9393

94+
def need_more_nodes(self):
95+
return self.scheduler.need_more_nodes() if self.scheduler else False
96+
9497
def get_cluster_status(self):
9598
return {
9699
"type": "cluster_status",
@@ -102,6 +105,7 @@ def get_cluster_status(self):
102105
self.get_peer_id(), self.is_local_network
103106
),
104107
"node_list": self.get_node_list(),
108+
"need_more_nodes": self.need_more_nodes(),
105109
},
106110
}
107111

src/backend/server/static_config.py

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
import concurrent.futures
12
import json
2-
import logging
3+
import math
34
from pathlib import Path
45

6+
from parallax_utils.logging_config import get_logger
57
from scheduling.model_info import ModelInfo
68

9+
logger = get_logger(__name__)
10+
711
# Supported model list - key: model name, value: MLX model name (same as key if no MLX variant)
812
MODELS = {
913
"Qwen/Qwen3-0.6B": "Qwen/Qwen3-0.6B",
@@ -57,7 +61,6 @@
5761
"zai-org/GLM-4.6": "mlx-community/GLM-4.6-4bit",
5862
}
5963

60-
logger = logging.getLogger(__name__)
6164
NODE_JOIN_COMMAND_LOCAL_NETWORK = """parallax join"""
6265

6366
NODE_JOIN_COMMAND_PUBLIC_NETWORK = """parallax join -s {scheduler_addr} """
@@ -80,9 +83,6 @@ def _load_config_only(name: str) -> dict:
8083

8184
config = _load_config_only(model_name)
8285

83-
# get quant method
84-
# logger.info(f"Loading model config from {model_name}")
85-
8686
quant_method = config.get("quant_method", None)
8787
quantization_config = config.get("quantization_config", None)
8888
if quant_method is None and quantization_config is not None:
@@ -92,8 +92,13 @@ def _load_config_only(name: str) -> dict:
9292
param_bytes_per_element = 2
9393
elif quant_method == "fp8":
9494
param_bytes_per_element = 1
95-
elif quant_method in ("mxfp4", "int4", "awq", "gptq"):
95+
elif quant_method in ("mxfp4", "int4", "awq", "gptq", "compressed-tensors"):
9696
param_bytes_per_element = 0.5
97+
else:
98+
param_bytes_per_element = 1
99+
logger.warning(
100+
f"model_name:{model_name} quant_method {quant_method} not supported in get_model_info method"
101+
)
97102

98103
mlx_param_bytes_per_element = param_bytes_per_element
99104
mlx_model_name = MODELS.get(model_name, model_name)
@@ -135,8 +140,42 @@ def _load_config_only(name: str) -> dict:
135140
return model_info
136141

137142

143+
def get_model_info_list():
144+
model_name_list = list(MODELS.keys())
145+
with concurrent.futures.ThreadPoolExecutor() as executor:
146+
model_info_list = list(executor.map(get_model_info, model_name_list))
147+
return model_info_list
148+
149+
150+
model_info_list_cache = get_model_info_list()
151+
152+
138153
def get_model_list():
139-
return list(MODELS.keys())
154+
model_info_list = model_info_list_cache
155+
156+
def build_single_model(model_info):
157+
return {
158+
"name": model_info.model_name,
159+
"vram_gb": math.ceil(estimate_vram_gb_required(model_info)),
160+
}
161+
162+
results = [build_single_model(model_info) for model_info in model_info_list]
163+
return results
164+
165+
166+
def estimate_vram_gb_required(model_info):
167+
if model_info is None:
168+
return 0
169+
return (
170+
(
171+
model_info.embedding_io_bytes
172+
+ model_info.num_layers * model_info.decoder_layer_io_bytes(roofline=False)
173+
)
174+
* 1.0
175+
/ 1024
176+
/ 1024
177+
/ 1024
178+
)
140179

141180

142181
def get_node_join_command(scheduler_addr, is_local_network):

src/frontend/dist/assets/App-BwG-l8Xs.js renamed to src/frontend/dist/assets/App-Ba9WPx9O.js

Lines changed: 29 additions & 29 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/frontend/dist/assets/chat-DSga-3Xw.js renamed to src/frontend/dist/assets/chat-D4M-59Zb.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
import{c as t,j as e,C as o}from"./App-Ba9WPx9O.js";t.createRoot(document.getElementById("root")).render(e.jsx(o,{}));

src/frontend/dist/assets/chat-fdxrhkT3.js

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/frontend/dist/assets/join-BWhers2Y.js

Lines changed: 0 additions & 6 deletions
This file was deleted.

src/frontend/dist/assets/join-BzV0zEA8.js

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/frontend/dist/assets/main-C0U2HpN8.js

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)