[Misc] Add In-Container restart capability through supervisord for openai server #28502

HappyAmazonian · HappyAmazonian · commit 3d0bec84fe19 · 2025-11-14T22:11:58.000Z
Signed-off-by: Shen Teng &lt;sheteng@amazon.com&gt;
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -497,5 +497,5 @@ ENTRYPOINT ["./sagemaker-entrypoint.sh"]
 
 FROM vllm-openai-base AS vllm-openai
 
-ENTRYPOINT ["vllm", "serve"]
+ENTRYPOINT ["standard-supervisor", "vllm", "serve"]
 #################### OPENAI API SERVER ####################
diff --git a/requirements/common.txt b/requirements/common.txt
@@ -49,4 +49,4 @@ cbor2 # Required for cross-language serialization of hashable objects
 setproctitle # Used to set process names for better debugging and monitoring
 openai-harmony >= 0.0.3  # Required for gpt-oss
 anthropic == 0.71.0
-model-hosting-container-standards < 1.0.0
+model-hosting-container-standards >= 0.1.7, < 1.0.0 # Required for SageMaker integration
diff --git a/tests/entrypoints/sagemaker/conftest.py b/tests/entrypoints/sagemaker/conftest.py
@@ -46,7 +46,10 @@ def basic_server_with_lora(smollm2_lora_files):
         "64",
     ]
 
-    envs = {"VLLM_ALLOW_RUNTIME_LORA_UPDATING": "True"}
+    envs = {
+        "VLLM_ALLOW_RUNTIME_LORA_UPDATING": "True",
+        "SAGEMAKER_ENABLE_STATEFUL_SESSIONS": "True",
+    }
     with RemoteOpenAIServer(MODEL_NAME_SMOLLM, args, env_dict=envs) as remote_server:
         yield remote_server