From da051fd0a15475a37da24a9cb06905d0f293ccd6 Mon Sep 17 00:00:00 2001 From: wxd Date: Sat, 26 Jul 2025 16:54:00 +0800 Subject: [PATCH 01/11] chore: refactor the build script to split the package built --- Makefile | 51 +++++++++++++++++++++++++++++++ README.md | 15 ++++++--- dockerfiles/build_113.Dockerfile | 35 +++++++++++++++++++++ scripts/build_scripts/build.sh | 6 +++- scripts/build_scripts/common.go | 3 ++ scripts/build_scripts/cuda.go | 52 ++++++++++++++++++++++++++++---- scripts/build_scripts/main.go | 7 ++++- scripts/build_scripts/pos.go | 5 +++ scripts/utils/dependencies.sh | 1 - scripts/utils/get_root_dir.sh | 7 ++++- 10 files changed, 168 insertions(+), 14 deletions(-) create mode 100644 Makefile create mode 100644 dockerfiles/build_113.Dockerfile diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..c8a8d32e --- /dev/null +++ b/Makefile @@ -0,0 +1,51 @@ +SRC_DIR := $(shell pwd) + +IMAGE_NAME := phos-base-113 +DOCKERFILE := $(SRC_DIR)/dockerfiles/build_113.Dockerfile + +BUILD_ARGS ?= -i -3 -u -p=false +CLIENT_RUN_CMD ?= "python" + +.PHONY: build clean exec + +build-image: + docker build \ + --build-arg proxy=http://ipads:ipads123@127.0.0.1:11235 \ + --progress=plain -f $(DOCKERFILE) -t $(IMAGE_NAME) . + +build: + docker run --rm --gpus all \ + -v $(SRC_DIR):/root \ + --privileged --network=host --ipc=host \ + $(IMAGE_NAME) \ + bash -c "cd /root/scripts/build_scripts/ && bash build.sh $(BUILD_ARGS)" + +server-run: + docker run --rm --gpus all -it \ + -v $(SRC_DIR):/root \ + --privileged --network=host --ipc=host \ + $(IMAGE_NAME) \ + bash -c "CUDA_VISIBLE_DEVICES=2 pos_cli --start --target daemon" + +client-run: + docker run --rm --gpus all \ + -v $(SRC_DIR):/root \ + --privileged --network=host --ipc=host \ + $(IMAGE_NAME) \ + bash -c "cd /root && export LD_LIBRARY_PATH=/root/lib:$LD_LIBRARY_PATH && export LIBRARY_PATH=/root/lib:$LIBRARY_PATH && LD_PRELOAD=/root/lib/libxpuclient.so RUST_LOG=error $(CLIENT_RUN_CMD)" + +clean: + docker run --rm --gpus all \ + -v $(SRC_DIR):/root \ + --privileged --network=host --ipc=host \ + $(IMAGE_NAME) \ + bash -c "cd /root/scripts/build_scripts/ && bash build.sh -c -3" + +exec: + docker run --rm --gpus all -it \ + -v $(SRC_DIR):/root \ + --privileged --network=host --ipc=host \ + $(IMAGE_NAME) \ + bash + + diff --git a/README.md b/README.md index 6c07c581..e5623747 100755 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ git clone --recursive https://github.com/SJTU-IPADS/PhoenixOS.git ``` -2. **[Start Container]** +2. **(Optional) [Start Container]** PhOS can be built and installed on official vendor image. > NOTE: PhOS require libc6 >= 2.29 for compiling CRIU from source. @@ -103,10 +103,8 @@ please download these assets by simply running following commands: ```bash - # inside container - # download assets - cd /root/scripts/build_scripts + cd path/to/phos/scripts/build_scripts bash download_assets.sh ``` @@ -168,6 +166,15 @@ bash build.sh -i -3 -u ``` + Optionally, we can use pre-built image to simplify building, e.g., + on CUDA 12.3, please simply use the following: + ```bash + ## build a container named phos-base. Can be skipped if it is on the hub + make build-image + + + ``` + For customizing build options, please refers to and modify avaiable options under `scripts/build_scripts/build_config.yaml`. If you encounter any build issues, you're able to see building logs under `build_log`. Please open a new issue if things are stuck :-| diff --git a/dockerfiles/build_113.Dockerfile b/dockerfiles/build_113.Dockerfile new file mode 100644 index 00000000..e27348fa --- /dev/null +++ b/dockerfiles/build_113.Dockerfile @@ -0,0 +1,35 @@ +FROM phoenixos/pytorch:11.3-ubuntu20.04 as base + +ARG DEBIAN_FRONTEND=noninteractive +ARG proxy + +RUN apt update +RUN apt-get install -y libibverbs-dev libboost-all-dev net-tools \ + git-lfs pkg-config python3-pip libelf-dev libssl-dev libgl1-mesa-dev \ + libvdpau-dev iputils-ping wget gdb vim nsight-compute-2023.1.1 + +RUN apt-get update && \ + apt-get install -y software-properties-common && \ + add-apt-repository -y ppa:ubuntu-toolchain-r/test && \ + apt-get update + +RUN apt-get install -y g++-9 +RUN apt-get install -y g++-13 + +RUN pip3 install meson -i https://mirrors.aliyun.com/pypi/simple/ + +RUN ln -s /opt/nvidia/nsight-compute/2023.1.1/target/linux-desktop-glibc_2_11_3-x64/ncu /usr/local/bin/ncu + +# Copy build scripts from the project root +COPY scripts/ /scripts +COPY third_party/go1.23.2.linux-amd64.tar.gz /third_party/go1.23.2.linux-amd64.tar.gz + +# Make scripts executable and run download_assets.sh +RUN chmod +x /scripts/build_scripts/*.sh +RUN cd /scripts/build_scripts && bash build.sh -p -b=false -3=true + +ENV PATH="/root/.cargo/bin:${PATH}" +ENV PATH="/root/bin:${PATH}" +ENV LD_LIBRARY_PATH="/root/lib:${LD_LIBRARY_PATH}" + +WORKDIR /root diff --git a/scripts/build_scripts/build.sh b/scripts/build_scripts/build.sh index d661ed10..4750d805 100755 --- a/scripts/build_scripts/build.sh +++ b/scripts/build_scripts/build.sh @@ -23,15 +23,19 @@ source $script_dir/../common.sh # ================== program starts here ================== + check_and_install_go if [ $? -ne 0 ]; then error "failed to install golang" fi cd $script_dir + +go env -w GOPROXY=https://goproxy.cn,direct go build -o pos_build + if [ $? -ne 0 ]; then - error "faile to build PhOS's build system" + error "failed to build PhOS's build system" fi if [ ! -e $script_dir/pos_build ]; then error "no building binary was built" diff --git a/scripts/build_scripts/common.go b/scripts/build_scripts/common.go index e3b3c91a..332d74d2 100755 --- a/scripts/build_scripts/common.go +++ b/scripts/build_scripts/common.go @@ -63,6 +63,7 @@ type CmdOptions struct { WithUnitTest bool Target string PrintHelp bool + DoPackage bool DoBuild bool DoInstall bool DoClean bool @@ -75,6 +76,7 @@ func (cmdOpt *CmdOptions) print(logger *log.Logger) { - WithThirdParty: %v - Target: %v - PrintHelp: %v + - DoPackage: %v - DoClean: %v - DoInstall: %v - WithUnitTest: %v @@ -83,6 +85,7 @@ func (cmdOpt *CmdOptions) print(logger *log.Logger) { cmdOpt.WithThirdParty, cmdOpt.Target, cmdOpt.PrintHelp, + cmdOpt.DoPackage, cmdOpt.DoClean, cmdOpt.DoInstall, cmdOpt.WithUnitTest, diff --git a/scripts/build_scripts/cuda.go b/scripts/build_scripts/cuda.go index 8d354768..81ad27d4 100755 --- a/scripts/build_scripts/cuda.go +++ b/scripts/build_scripts/cuda.go @@ -18,7 +18,7 @@ package main import ( "fmt" - "os" + // "os" "github.com/PhoenixOS-IPADS/PhOS/scripts/utils" "github.com/charmbracelet/log" @@ -130,7 +130,7 @@ func CRIB_PhOS_CUDA_KernelPatcher(cmdOpt CmdOptions, buildConf BuildConfigs, log } func CRIB_PhOS_CUDA(cmdOpt CmdOptions, buildConf BuildConfigs, logger *log.Logger) { - if cmdOpt.DoBuild { + if cmdOpt.DoPackage { // ==================== Prepare ==================== logger.Infof("pre-build check...") utils.CheckAndInstallPackage("git", "git", nil, nil, logger) @@ -153,7 +153,7 @@ func CRIB_PhOS_CUDA(cmdOpt CmdOptions, buildConf BuildConfigs, logger *log.Logge _, err := utils.BashScriptGetOutput(` #!/bin/bash set -e - pip3 install meson + pip3 install meson -i https://mirrors.aliyun.com/pypi/simple/ `, false, logger, ) return err @@ -164,13 +164,14 @@ func CRIB_PhOS_CUDA(cmdOpt CmdOptions, buildConf BuildConfigs, logger *log.Logge _, err := utils.BashScriptGetOutput(` #!/bin/bash set -e - pip3 install ninja + pip3 install ninja -i https://mirrors.aliyun.com/pypi/simple/ `, false, logger, ) return err } utils.CheckAndInstallPackage("ninja", "", install_ninja, nil, logger) + /* install_cargo := func() error { _, err := utils.BashScriptGetOutput(` #!/bin/bash @@ -194,9 +195,48 @@ func CRIB_PhOS_CUDA(cmdOpt CmdOptions, buildConf BuildConfigs, logger *log.Logge ) os.Exit(0) return nil + } + utils.CheckAndInstallPackage("cargo", "", install_cargo, post_install_cargo, logger) + */ + + install_cargo := func() error { + _, err := utils.BashScriptGetOutput(` + #!/bin/bash + set -e + + # Install rustup (Rust toolchain installer) + export RUSTUP_DIST_SERVER=https://mirrors.tuna.tsinghua.edu.cn/rustup + export RUSTUP_UPDATE_ROOT=https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup + + /usr/bin/curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + + # Source cargo environment (for scripts, not needed for Dockerfile ENV) + source $HOME/.cargo/env + + # Install nightly and set as default + $HOME/.cargo/bin/rustup install nightly + $HOME/.cargo/bin/rustup default nightly + `, + false, logger, + ) + return err + } + utils.CheckAndInstallPackage("cargo", "", install_cargo, nil, logger) + + // XD: fixme: currently only tested on A800 machines with cuda 12.3 + install_nccl := func() error { + _, err := utils.BashScriptGetOutput(` + #!/bin/bash + set -e + apt-get install -y libnccl2 libnccl-dev --allow-change-held-packages + `, + false, logger, + ) + return err } - utils.CheckAndInstallPackage("cargo", "", install_cargo, post_install_cargo, logger) - } + utils.CheckAndInstallPackage("nccl", "", install_nccl, nil, logger) + } + // ==================== CRIB Dependencies ==================== if cmdOpt.WithThirdParty { diff --git a/scripts/build_scripts/main.go b/scripts/build_scripts/main.go index 8dca6784..a9d12036 100755 --- a/scripts/build_scripts/main.go +++ b/scripts/build_scripts/main.go @@ -46,6 +46,8 @@ func main() { var __PrintHelp *bool = flag.Bool("h", false, "Print help message") var __WithThirdParty *bool = flag.Bool("3", false, "Build/clean with 3rd parties") + var __DoPackage *bool = flag.Bool("p", true, "Build/clean with pre-built packages") + var __DoBuild *bool = flag.Bool("b", true, "Build/clean PhOS.") var __DoInstall *bool = flag.Bool("i", false, "Do installation") var __DoClean *bool = flag.Bool("c", false, "Do cleanning") var __WithUnitTest *bool = flag.Bool("u", false, "Do unit-testing after build") @@ -58,6 +60,8 @@ func main() { cmdOpt := CmdOptions{ PrintHelp: *__PrintHelp, WithThirdParty: *__WithThirdParty, + DoBuild: *__DoBuild, + DoPackage: *__DoPackage, DoInstall: *__DoInstall, DoClean: *__DoClean, WithUnitTest: *__WithUnitTest, @@ -91,10 +95,11 @@ func main() { // make sure we won't build/install when clean if cmdOpt.DoClean { + cmdOpt.DoPackage = false cmdOpt.DoBuild = false cmdOpt.DoInstall = false } else { - cmdOpt.DoBuild = true + // cmdOpt.DoBuild = true } CRIB_PhOS(cmdOpt, buildConf, logger) diff --git a/scripts/build_scripts/pos.go b/scripts/build_scripts/pos.go index a79895b8..e5ce5c9c 100755 --- a/scripts/build_scripts/pos.go +++ b/scripts/build_scripts/pos.go @@ -32,6 +32,11 @@ func CRIB_PhOS_Remoting(cmdOpt CmdOptions, buildConf BuildConfigs, logger *log.L utils.CheckAndInstallMultiPackagesViaOsPkgManager([]string{ "libnccl2=2.26.5-1+cuda12.9", "libnccl-dev=2.26.5-1+cuda12.9", }, logger) + //if !utils.CheckPackageViaOsPkgManager("libnccl-dev", logger) { + // utils.UnInstallPackageViaOsPkgManager("libnccl2", logger) + //} + //utils.CheckAndInstallPackageViaOsPkgManager("libnccl2=2.26.5-1+cuda12.9", logger) + //utils.CheckAndInstallPackageViaOsPkgManager("libnccl-dev=2.26.5-1+cuda12.9", logger) utils.CheckAndInstallPackageViaOsPkgManager("clang", logger) utils.CheckAndInstallPackageViaOsPkgManager("cmake", logger) } diff --git a/scripts/utils/dependencies.sh b/scripts/utils/dependencies.sh index 7d072230..18829100 100755 --- a/scripts/utils/dependencies.sh +++ b/scripts/utils/dependencies.sh @@ -59,7 +59,6 @@ util_install_common () { fi } - check_and_install_go() { if [[ ! -x "$(command -v go)" ]]; then warn "no go installed, installing from assets..." diff --git a/scripts/utils/get_root_dir.sh b/scripts/utils/get_root_dir.sh index e01c7ca6..5d8bf6c7 100755 --- a/scripts/utils/get_root_dir.sh +++ b/scripts/utils/get_root_dir.sh @@ -14,4 +14,9 @@ #!/bin/bash -git rev-parse --show-toplevel +if git rev-parse --is-inside-work-tree > /dev/null 2>&1; then + repo_root=$(git rev-parse --show-toplevel) + echo "$repo_root" +else + echo "Not in a git repo." +fi \ No newline at end of file From b22f805ab136927ac1317ed42fc73a54669fd195 Mon Sep 17 00:00:00 2001 From: wxd Date: Sun, 27 Jul 2025 14:49:54 +0800 Subject: [PATCH 02/11] chore: minor uncomment nccl --- scripts/build_scripts/cuda.go | 2 +- scripts/build_scripts/pos.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/build_scripts/cuda.go b/scripts/build_scripts/cuda.go index 81ad27d4..7945a3e0 100755 --- a/scripts/build_scripts/cuda.go +++ b/scripts/build_scripts/cuda.go @@ -223,7 +223,7 @@ func CRIB_PhOS_CUDA(cmdOpt CmdOptions, buildConf BuildConfigs, logger *log.Logge } utils.CheckAndInstallPackage("cargo", "", install_cargo, nil, logger) - // XD: fixme: currently only tested on A800 machines with cuda 12.3 + // XD: fixme: currently only tested on A800 machines with cuda 11.3 install_nccl := func() error { _, err := utils.BashScriptGetOutput(` #!/bin/bash diff --git a/scripts/build_scripts/pos.go b/scripts/build_scripts/pos.go index e5ce5c9c..638576b6 100755 --- a/scripts/build_scripts/pos.go +++ b/scripts/build_scripts/pos.go @@ -29,9 +29,9 @@ const ( func CRIB_PhOS_Remoting(cmdOpt CmdOptions, buildConf BuildConfigs, logger *log.Logger) { if cmdOpt.DoBuild { // TODO(zhuobin): we need to install NCCL version according to CUDA version - utils.CheckAndInstallMultiPackagesViaOsPkgManager([]string{ - "libnccl2=2.26.5-1+cuda12.9", "libnccl-dev=2.26.5-1+cuda12.9", - }, logger) + //utils.CheckAndInstallMultiPackagesViaOsPkgManager([]string{ + // "libnccl2=2.26.5-1+cuda12.9", "libnccl-dev=2.26.5-1+cuda12.9", + //}, logger) //if !utils.CheckPackageViaOsPkgManager("libnccl-dev", logger) { // utils.UnInstallPackageViaOsPkgManager("libnccl2", logger) //} From c483ee95aa55e239a3fd26c0c335ff132577e57b Mon Sep 17 00:00:00 2001 From: wxd Date: Sun, 27 Jul 2025 18:40:21 +0800 Subject: [PATCH 03/11] chore: move cargo install from script to the docker file --- dockerfiles/build_113.Dockerfile | 10 +++++++++- scripts/build_scripts/cuda.go | 32 +++----------------------------- 2 files changed, 12 insertions(+), 30 deletions(-) diff --git a/dockerfiles/build_113.Dockerfile b/dockerfiles/build_113.Dockerfile index e27348fa..b2293cd4 100644 --- a/dockerfiles/build_113.Dockerfile +++ b/dockerfiles/build_113.Dockerfile @@ -20,6 +20,8 @@ RUN pip3 install meson -i https://mirrors.aliyun.com/pypi/simple/ RUN ln -s /opt/nvidia/nsight-compute/2023.1.1/target/linux-desktop-glibc_2_11_3-x64/ncu /usr/local/bin/ncu +RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ + # Copy build scripts from the project root COPY scripts/ /scripts COPY third_party/go1.23.2.linux-amd64.tar.gz /third_party/go1.23.2.linux-amd64.tar.gz @@ -28,8 +30,14 @@ COPY third_party/go1.23.2.linux-amd64.tar.gz /third_party/go1.23.2.linux-amd64. RUN chmod +x /scripts/build_scripts/*.sh RUN cd /scripts/build_scripts && bash build.sh -p -b=false -3=true -ENV PATH="/root/.cargo/bin:${PATH}" ENV PATH="/root/bin:${PATH}" +ENV PATH="/opt/rust/.cargo/bin:${PATH}" ENV LD_LIBRARY_PATH="/root/lib:${LD_LIBRARY_PATH}" +ENV CARGO_HOME=/opt/rust/.cargo +ENV RUSTUP_HOME=/opt/rust/.rustup + +RUN . /opt/rust/.cargo/env +RUN export RUSTUP_DIST_SERVER=https://mirrors.tuna.tsinghua.edu.cn/rustup && rustup default nightly WORKDIR /root + diff --git a/scripts/build_scripts/cuda.go b/scripts/build_scripts/cuda.go index 7945a3e0..a78b0ee8 100755 --- a/scripts/build_scripts/cuda.go +++ b/scripts/build_scripts/cuda.go @@ -153,7 +153,7 @@ func CRIB_PhOS_CUDA(cmdOpt CmdOptions, buildConf BuildConfigs, logger *log.Logge _, err := utils.BashScriptGetOutput(` #!/bin/bash set -e - pip3 install meson -i https://mirrors.aliyun.com/pypi/simple/ + pip3 install meson `, false, logger, ) return err @@ -164,14 +164,13 @@ func CRIB_PhOS_CUDA(cmdOpt CmdOptions, buildConf BuildConfigs, logger *log.Logge _, err := utils.BashScriptGetOutput(` #!/bin/bash set -e - pip3 install ninja -i https://mirrors.aliyun.com/pypi/simple/ + pip3 install ninja `, false, logger, ) return err } utils.CheckAndInstallPackage("ninja", "", install_ninja, nil, logger) - /* install_cargo := func() error { _, err := utils.BashScriptGetOutput(` #!/bin/bash @@ -196,32 +195,7 @@ func CRIB_PhOS_CUDA(cmdOpt CmdOptions, buildConf BuildConfigs, logger *log.Logge os.Exit(0) return nil } - utils.CheckAndInstallPackage("cargo", "", install_cargo, post_install_cargo, logger) - */ - - install_cargo := func() error { - _, err := utils.BashScriptGetOutput(` - #!/bin/bash - set -e - - # Install rustup (Rust toolchain installer) - export RUSTUP_DIST_SERVER=https://mirrors.tuna.tsinghua.edu.cn/rustup - export RUSTUP_UPDATE_ROOT=https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup - - /usr/bin/curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y - - # Source cargo environment (for scripts, not needed for Dockerfile ENV) - source $HOME/.cargo/env - - # Install nightly and set as default - $HOME/.cargo/bin/rustup install nightly - $HOME/.cargo/bin/rustup default nightly - `, - false, logger, - ) - return err - } - utils.CheckAndInstallPackage("cargo", "", install_cargo, nil, logger) + utils.CheckAndInstallPackage("cargo", "", install_cargo, post_install_cargo, logger) // XD: fixme: currently only tested on A800 machines with cuda 11.3 install_nccl := func() error { From c6c0d51557a7de68c1ac3a4f3e9f1e18730e0eed Mon Sep 17 00:00:00 2001 From: wxd Date: Sun, 27 Jul 2025 19:55:35 +0800 Subject: [PATCH 04/11] [doc]: minor seperate quick start doc --- README.md | 131 ++---------------- dockerfiles/build_113.Dockerfile | 23 ++- .../docs/getting_started/build_from_source.md | 129 +++++++++++++++++ 3 files changed, 156 insertions(+), 127 deletions(-) create mode 100644 docs/docs/getting_started/build_from_source.md diff --git a/README.md b/README.md index e5623747..76ed10cb 100755 --- a/README.md +++ b/README.md @@ -63,126 +63,9 @@
-## I. Build and Install PhOS - -### 💡 Option 1: Build and Install From Source - -1. **[Clone Repository]** - First of all, clone this repository **recursively**: - - ```bash - git clone --recursive https://github.com/SJTU-IPADS/PhoenixOS.git - ``` - -2. **(Optional) [Start Container]** - PhOS can be built and installed on official vendor image. - - > NOTE: PhOS require libc6 >= 2.29 for compiling CRIU from source. - - For example, for running PhOS for CUDA 11.3, - one can build on official CUDA images - (e.g., [`nvidia/cuda:11.3.1-cudnn8-devel-ubuntu20.04`](https://hub.docker.com/layers/nvidia/cuda/11.3.1-cudnn8-devel-ubuntu20.04/images/sha256-459c130c94363099b02706b9b25d9fe5822ea233203ce9fbf8dfd276a55e7e95)): - - - ```bash - # enter repository - cd PhoenixOS/scripts/docker - - # start and enter container with id 1 - bash run_torch_cu113.sh -s 1 - - # enter / close container (no need to execute here, just listed) - bash run_torch_cu113.sh -e 1 # enter container - bash run_torch_cu113.sh -c 1 # close container - ``` - - Note that it's important to execute docker container with root privilege, as CRIU needs the permission to C/R kernel-space memory pages. - -3. **[Downloading Necesssary Assets]** - PhOS relies on some assets to build and test, - please download these assets by simply running following commands: - - ```bash - # download assets - cd path/to/phos/scripts/build_scripts - bash download_assets.sh - ``` - -4. **[Build]** - Building PhOS is simple! - - PhOS provides a convinient build system, which covers compiling, linking and installing all PhOS components: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ComponentDescription
phos-autogenAutogen Engine for generating most of Parser and Worker code for specific hardware platform, based on lightwight notation.
phosdPhOS Daemon, which continuously run at the background, taking over the control of all GPU devices on the node.
libphos.soPhOS Hijacker, which hijacks all GPU API calls on the client-side and forward to PhOS Daemon.
libpccl.soPhOS Checkpoint Communication Library (PCCL), which provide highly-optimized device-to-device state migration. Note that this library is not included in current release.
unit-testingUnit Tests for PhOS, which is based on GoogleTest.
phos-cliCommand Line Interface (CLI) for interacting with PhOS.
phos-remotingRemoting Framework, which provide highly-optimized GPU API remoting performance. See more details at SJTU-IPADS/PhoenixOS-Remoting.
- - To build and install all above components and other dependencies, simply run the build script in the container would works: - - ```bash - # inside container - cd /root/scripts/build_scripts - - # clear old build cache - # -c: clear previous build - # -3: the clean process involves all third-parties - bash build.sh -c -3 - - # start building - # -3: the build process involves all third-parties - # -i: install after successful building - # -u: build PhOS with unit test enable - bash build.sh -i -3 -u - ``` - - Optionally, we can use pre-built image to simplify building, e.g., - on CUDA 12.3, please simply use the following: - ```bash - ## build a container named phos-base. Can be skipped if it is on the hub - make build-image - - - ``` - - For customizing build options, please refers to and modify avaiable options under `scripts/build_scripts/build_config.yaml`. - - If you encounter any build issues, you're able to see building logs under `build_log`. Please open a new issue if things are stuck :-| - -### 💡 Option 2: Install From Pre-built Binaries - - Will soon be updated, stay tuned :) - +## I. Quick start + Currently, we don't have pre-built binaries. + Please check [build from Source](docs/docs/getting_started/build_from_source.md) for how to build and run from source!
@@ -201,9 +84,17 @@ Once successfully installed PhOS, you can now try run your program with PhOS sup 1. Start the PhOS daemon (`phosd`), which takes over all GPU reousces on the node: ```bash + ## If built in an interactive container (or host) pos_cli --start --target daemon ``` + or + + ```bash + ## If built with our container + make server-run + ``` + 2. To run your program with PhOS support, one need to put a `yaml` configure file under the directory which your program would regard as `$PWD`. This file contains all necessary informations for PhOS to hijack your program. An example file looks like: diff --git a/dockerfiles/build_113.Dockerfile b/dockerfiles/build_113.Dockerfile index b2293cd4..072e5a35 100644 --- a/dockerfiles/build_113.Dockerfile +++ b/dockerfiles/build_113.Dockerfile @@ -6,7 +6,7 @@ ARG proxy RUN apt update RUN apt-get install -y libibverbs-dev libboost-all-dev net-tools \ git-lfs pkg-config python3-pip libelf-dev libssl-dev libgl1-mesa-dev \ - libvdpau-dev iputils-ping wget gdb vim nsight-compute-2023.1.1 + libvdpau-dev iputils-ping wget gdb vim nsight-compute-2023.1.1 curl RUN apt-get update && \ apt-get install -y software-properties-common && \ @@ -26,18 +26,27 @@ RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ COPY scripts/ /scripts COPY third_party/go1.23.2.linux-amd64.tar.gz /third_party/go1.23.2.linux-amd64.tar.gz + +ENV RUSTUP_UPDATE_ROOT=https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup +RUN RUSTUP_DIST_SERVER=https://mirrors.tuna.tsinghua.edu.cn/rustup +RUN mkdir -p /opt/rust + +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path +RUN . /opt/rust/.cargo/env +RUN rustup install nightly +RUN rustup default nightly + +ENV CARGO_HOME=/opt/rust/.cargo +ENV RUSTUP_HOME=/opt/rust/.rustup +ENV PATH="/opt/rust/.cargo/bin:${PATH}" + + # Make scripts executable and run download_assets.sh RUN chmod +x /scripts/build_scripts/*.sh RUN cd /scripts/build_scripts && bash build.sh -p -b=false -3=true ENV PATH="/root/bin:${PATH}" -ENV PATH="/opt/rust/.cargo/bin:${PATH}" ENV LD_LIBRARY_PATH="/root/lib:${LD_LIBRARY_PATH}" -ENV CARGO_HOME=/opt/rust/.cargo -ENV RUSTUP_HOME=/opt/rust/.rustup - -RUN . /opt/rust/.cargo/env -RUN export RUSTUP_DIST_SERVER=https://mirrors.tuna.tsinghua.edu.cn/rustup && rustup default nightly WORKDIR /root diff --git a/docs/docs/getting_started/build_from_source.md b/docs/docs/getting_started/build_from_source.md new file mode 100644 index 00000000..492d8ecc --- /dev/null +++ b/docs/docs/getting_started/build_from_source.md @@ -0,0 +1,129 @@ +# Quick start + +This guide will help you build and run PhOS from source. +PhOS provides two options, and you can choose **either one** to build PhOS. + +## Overview of the build + + PhOS provides a convinient build system, which covers compiling, linking and installing all PhOS components: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ComponentDescription
phos-autogenAutogen Engine for generating most of Parser and Worker code for specific hardware platform, based on lightwight notation.
phosdPhOS Daemon, which continuously run at the background, taking over the control of all GPU devices on the node.
libphos.soPhOS Hijacker, which hijacks all GPU API calls on the client-side and forward to PhOS Daemon.
libpccl.soPhOS Checkpoint Communication Library (PCCL), which provide highly-optimized device-to-device state migration. Note that this library is not included in current release.
unit-testingUnit Tests for PhOS, which is based on GoogleTest.
phos-cliCommand Line Interface (CLI) for interacting with PhOS.
phos-remotingRemoting Framework, which provide highly-optimized GPU API remoting performance. See more details at SJTU-IPADS/PhoenixOS-Remoting.
+ + +1. **[Clone Repository]** + First of all, clone this repository **recursively**: + + ```bash + git clone --recursive https://github.com/SJTU-IPADS/PhoenixOS.git + ``` + +2. **[Downloading Necesssary (third-party) Assets]** + PhOS relies on some assets to build and test, + please download these assets by simply running following commands: + + ```bash + # download assets + cd path/to/phos/scripts/build_scripts + bash download_assets.sh + ``` + +3. **(Optional#1) [Build with our image]** + First, build our pre-released image (if not found phos-base-113 on the hub): + (This option only works for cuda 11.3 for now) + + ```bash + make build-image + ``` + + Second, use the image to build PhOS all the time: + + ```bash + make build BUILD_ARGS="-i -3 -p=false" + ``` + + Use the following to check possible built options: + + ```bash + make build BUILD_ARGS="-help" + ``` + +3. **(Optional#2) [Start an interactive container]** + PhOS can be built and installed on official vendor image (or host) + if you don't want to use our pre-built image. + + > NOTE: PhOS has some minimal requirements, e.g., it requires libc6 >= 2.29 for compiling CRIU from source. Thus, we strongly recommend you to use our base image as an interactive building environment. + + For example, for running PhOS for CUDA 11.3, + one can build on official CUDA images + (e.g., [`nvidia/cuda:11.3.1-cudnn8-devel-ubuntu20.04`](https://hub.docker.com/layers/nvidia/cuda/11.3.1-cudnn8-devel-ubuntu20.04/images/sha256-459c130c94363099b02706b9b25d9fe5822ea233203ce9fbf8dfd276a55e7e95)): + + + ```bash + # enter repository + cd PhoenixOS/scripts/docker + + # start and enter container with id 1 + bash run_torch_cu113.sh -s 1 + + # enter / close container (no need to execute here, just listed) + bash run_torch_cu113.sh -e 1 # enter container + bash run_torch_cu113.sh -c 1 # close container + ``` + + > Note that it's important to execute docker container with root privilege, as CRIU needs the permission to C/R kernel-space memory pages. + + To build and install all above components and other dependencies, simply run the build script in the container would works: + + ```bash + # inside container + cd /root/scripts/build_scripts + + # clear old build cache + # -c: clear previous build + # -3: the clean process involves all third-parties + bash build.sh -c -3 + + # start building + # -3: the build process involves all third-parties + # -i: install after successful building + # -u: build PhOS with unit test enable + bash build.sh -i -3 -u + ``` + +4. **Build configuration and trouble shooting** + For customizing build options, please refers to and modify avaiable options under `scripts/build_scripts/build_config.yaml`. + + If you encounter any build issues, you're able to see building logs under `build_log`. Please open a new issue if things are stuck :-| The logs typically are quite self-explained. \ No newline at end of file From 0167309236a48fd91317ae8401c9336803e5fbff Mon Sep 17 00:00:00 2001 From: wxd Date: Sun, 27 Jul 2025 19:56:46 +0800 Subject: [PATCH 05/11] [doc] chore: minor fixes --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 76ed10cb..9bd748ae 100755 --- a/README.md +++ b/README.md @@ -64,8 +64,9 @@
## I. Quick start - Currently, we don't have pre-built binaries. - Please check [build from Source](docs/docs/getting_started/build_from_source.md) for how to build and run from source! + +Currently, we don't have pre-built binaries. +Please check [build from Source](docs/docs/getting_started/build_from_source.md) for how to build and run from source!
From 15c94728c0610d49896c5c8b581bf56b9cf63ab5 Mon Sep 17 00:00:00 2001 From: wxd Date: Sun, 27 Jul 2025 19:57:32 +0800 Subject: [PATCH 06/11] [doc] chore: minor fixes --- docs/docs/getting_started/build_from_source.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/getting_started/build_from_source.md b/docs/docs/getting_started/build_from_source.md index 492d8ecc..7308d3fc 100644 --- a/docs/docs/getting_started/build_from_source.md +++ b/docs/docs/getting_started/build_from_source.md @@ -5,7 +5,7 @@ PhOS provides two options, and you can choose **either one** to build PhOS. ## Overview of the build - PhOS provides a convinient build system, which covers compiling, linking and installing all PhOS components: +PhOS provides a convinient build system, which covers compiling, linking and installing all PhOS components: From eb48d88acfc7c3ecbbc6a41f499afc7e2660760f Mon Sep 17 00:00:00 2001 From: wxd Date: Sun, 27 Jul 2025 19:58:04 +0800 Subject: [PATCH 07/11] [doc] chore: minor fix doc table --- docs/docs/getting_started/build_from_source.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/getting_started/build_from_source.md b/docs/docs/getting_started/build_from_source.md index 7308d3fc..242377e7 100644 --- a/docs/docs/getting_started/build_from_source.md +++ b/docs/docs/getting_started/build_from_source.md @@ -7,7 +7,7 @@ PhOS provides two options, and you can choose **either one** to build PhOS. PhOS provides a convinient build system, which covers compiling, linking and installing all PhOS components: -
+
@@ -40,7 +40,7 @@ PhOS provides a convinient build system, which covers compiling, linking and ins -
Component Descriptionphos-remoting Remoting Framework, which provide highly-optimized GPU API remoting performance. See more details at SJTU-IPADS/PhoenixOS-Remoting.
+ 1. **[Clone Repository]** From 53fcabe601257d0daea65af5ae0f6178403906b0 Mon Sep 17 00:00:00 2001 From: wxd Date: Sun, 27 Jul 2025 20:04:11 +0800 Subject: [PATCH 08/11] [doc] chore: minor readme style --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 9bd748ae..4cc434ee 100755 --- a/README.md +++ b/README.md @@ -63,13 +63,11 @@
-## I. Quick start +## I. Quick build Currently, we don't have pre-built binaries. Please check [build from Source](docs/docs/getting_started/build_from_source.md) for how to build and run from source! -
- ## II. Usage Once successfully installed PhOS, you can now try run your program with PhOS support! From 47a4fadb4319d715bb0e5311a505a6ca951a0b25 Mon Sep 17 00:00:00 2001 From: wxd Date: Mon, 28 Jul 2025 14:43:16 +0800 Subject: [PATCH 09/11] chore: fix bugs in build image --- dockerfiles/build_113.Dockerfile | 11 ++++++----- scripts/build_scripts/cuda.go | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/dockerfiles/build_113.Dockerfile b/dockerfiles/build_113.Dockerfile index 072e5a35..03c3e4ba 100644 --- a/dockerfiles/build_113.Dockerfile +++ b/dockerfiles/build_113.Dockerfile @@ -28,18 +28,19 @@ COPY third_party/go1.23.2.linux-amd64.tar.gz /third_party/go1.23.2.linux-amd64. ENV RUSTUP_UPDATE_ROOT=https://mirrors.tuna.tsinghua.edu.cn/rustup/rustup -RUN RUSTUP_DIST_SERVER=https://mirrors.tuna.tsinghua.edu.cn/rustup +ENV RUSTUP_DIST_SERVER=https://mirrors.tuna.tsinghua.edu.cn/rustup RUN mkdir -p /opt/rust +ENV CARGO_HOME=/opt/rust/.cargo +ENV RUSTUP_HOME=/opt/rust/.rustup + RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path +ENV PATH="/opt/rust/.cargo/bin:${PATH}" RUN . /opt/rust/.cargo/env + RUN rustup install nightly RUN rustup default nightly -ENV CARGO_HOME=/opt/rust/.cargo -ENV RUSTUP_HOME=/opt/rust/.rustup -ENV PATH="/opt/rust/.cargo/bin:${PATH}" - # Make scripts executable and run download_assets.sh RUN chmod +x /scripts/build_scripts/*.sh diff --git a/scripts/build_scripts/cuda.go b/scripts/build_scripts/cuda.go index a78b0ee8..f058c0a3 100755 --- a/scripts/build_scripts/cuda.go +++ b/scripts/build_scripts/cuda.go @@ -18,7 +18,7 @@ package main import ( "fmt" - // "os" + "os" "github.com/PhoenixOS-IPADS/PhOS/scripts/utils" "github.com/charmbracelet/log" From 4d9b805e19da68c25742840ab28f84eaab7b700e Mon Sep 17 00:00:00 2001 From: Zhuobin Huang Date: Tue, 29 Jul 2025 13:36:19 +0800 Subject: [PATCH 10/11] Update docs/docs/getting_started/build_from_source.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/docs/getting_started/build_from_source.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/getting_started/build_from_source.md b/docs/docs/getting_started/build_from_source.md index 242377e7..3a5b8fb2 100644 --- a/docs/docs/getting_started/build_from_source.md +++ b/docs/docs/getting_started/build_from_source.md @@ -5,7 +5,7 @@ PhOS provides two options, and you can choose **either one** to build PhOS. ## Overview of the build -PhOS provides a convinient build system, which covers compiling, linking and installing all PhOS components: +PhOS provides a convenient build system, which covers compiling, linking and installing all PhOS components: From 3f15b2184428e1466c56b76e3b510a1e70e3fdf9 Mon Sep 17 00:00:00 2001 From: Zhuobin Huang Date: Tue, 29 Jul 2025 13:36:36 +0800 Subject: [PATCH 11/11] Update docs/docs/getting_started/build_from_source.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/docs/getting_started/build_from_source.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/getting_started/build_from_source.md b/docs/docs/getting_started/build_from_source.md index 3a5b8fb2..c614b382 100644 --- a/docs/docs/getting_started/build_from_source.md +++ b/docs/docs/getting_started/build_from_source.md @@ -50,7 +50,7 @@ PhOS provides a convenient build system, which covers compiling, linking and ins git clone --recursive https://github.com/SJTU-IPADS/PhoenixOS.git ``` -2. **[Downloading Necesssary (third-party) Assets]** +2. **[Downloading Necessary (third-party) Assets]** PhOS relies on some assets to build and test, please download these assets by simply running following commands: