roboflow · PawelPeczek-Roboflow · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025 · Nov 12, 2025
@@ -12,9 +12,14 @@ on:
         type: boolean
         description: "Do you want to push image after build?"
         default: false
+      custom_tag:
+        type: string
+        description: "Custom tag to use for the image (overrides VERSION)"
+        default: ""
 
 env:
   VERSION: "0.0.0" # Default version, will be overwritten
+  BASE_IMAGE: "roboflow/roboflow-inference-server-jetson-6.2.0"
 
 jobs:
   docker:
@@ -35,13 +40,22 @@ jobs:
         uses: actions/checkout@v4
       - name: Read version from file
         run: echo "VERSION=$(DISABLE_VERSION_CHECK=true python ./inference/core/version.py)" >> $GITHUB_ENV
+      - name: Determine Image Tags
+        id: tags
+        uses: ./.github/actions/determine-tags
+        with:
+          custom_tag: ${{ github.event.inputs.custom_tag }}
+          version: ${{ env.VERSION }}
+          base_image: ${{ env.BASE_IMAGE }}
+          force_push: ${{ github.event.inputs.force_push }}
+          token: ${{ secrets.GITHUB_TOKEN }}
       - name: Set up Depot CLI
         uses: depot/setup-action@v1
       - name: Build and Push
         uses: depot/build-push-action@v1
         with:
           push: ${{ github.event_name == 'release' || (github.event.inputs.force_push == 'true')}}
           project: grl7ffzxd7
-          tags: roboflow/roboflow-inference-server-jetson-6.2.0:latest,roboflow/roboflow-inference-server-jetson-6.2.0:${{ env.VERSION}}
+          tags: ${{ steps.tags.outputs.image_tags }}
           platforms: linux/arm64
           file: ./docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0
@@ -3,9 +3,9 @@
 FROM nvcr.io/nvidia/l4t-jetpack:r36.4.0 AS builder
 
 ARG DEBIAN_FRONTEND=noninteractive
-ARG CMAKE_VERSION=3.31.10
-ARG PYTORCH_VERSION=2.8.0
-ARG TORCHVISION_VERSION=0.23.0
+ARG CMAKE_VERSION=4.2.0
+ARG PYTORCH_VERSION=2.6.0
+ARG TORCHVISION_VERSION=0.21.0
 ARG OPENCV_VERSION=4.10.0
 ARG ONNXRUNTIME_VERSION=1.20.0
 ENV LANG=en_US.UTF-8
@@ -94,6 +94,7 @@ RUN git clone --recursive --branch v${PYTORCH_VERSION} https://github.com/pytorc
     export PYTORCH_BUILD_VERSION=${PYTORCH_VERSION} PYTORCH_BUILD_NUMBER=1 && \
     export CMAKE_BUILD_TYPE=Release BUILD_SHARED_LIBS=ON USE_PRIORITIZED_TEXT_FOR_LD=1 && \
     export MAX_JOBS=12 && \
+    export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \
     python3 setup.py bdist_wheel && \
     python3 -m pip install dist/torch-*.whl
 
@@ -113,6 +114,15 @@ ENV CUDA_HOME=/usr/local/cuda \
     PATH=/usr/local/cuda/bin:$PATH \
     LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 
+# Install Tensorrt
+RUN apt remove -y 'libnvinfer*' 'libnvonnxparsers*' 'libnvparsers*' 'libnvinfer-plugin*' 'python3-libnvinfer*' 'tensorrt*'
+WORKDIR /build/tensorrt-10.x
+RUN wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/local_repo/nv-tensorrt-local-tegra-repo-ubuntu2204-10.7.0-cuda-12.6_1.0-1_arm64.deb && \
+    dpkg -i nv-tensorrt-local-tegra-repo-ubuntu2204-10.7.0-cuda-12.6_1.0-1_arm64.deb && \
+    cp /var/nv-tensorrt-local-tegra-repo-ubuntu2204-10.7.0-cuda-12.6/nv-tensorrt-local-tegra-C50F04B9-keyring.gpg /usr/share/keyrings/ && \
+    apt-get update && \
+    apt-get install -y tensorrt
+
 # Build onnxruntime-gpu from source with TensorRT support
 WORKDIR /build/onnxruntime
 RUN git clone --recursive --branch v${ONNXRUNTIME_VERSION} https://github.com/microsoft/onnxruntime.git && \
@@ -178,6 +188,7 @@ RUN uv pip install --system --break-system-packages --index-strategy unsafe-best
     -r requirements.sdk.http.txt \
     -r requirements.easyocr.txt \
     -r requirements.jetson.txt \
+    "pycuda>=2025.0.0,<2026.0.0" \
     "setuptools<=75.5.0" \
     packaging \
     && rm -rf ~/.cache/uv
@@ -205,7 +216,6 @@ RUN ln -sf /usr/bin/python3 /usr/bin/python && \
 RUN cd /usr/local/lib/python3.10/dist-packages && \
     find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \
     rm -rf debugpy* jupyterlab* jupyter_* notebook* ipython* ipykernel* || true && \
-    rm -rf torch/bin torch/include || true && \
     rm -rf onnx/backend/test onnx/test || true && \
     rm -rf scipy/*/tests pandas/tests || true && \
     rm -rf */examples */benchmarks */docs || true && \
@@ -285,6 +295,12 @@ RUN ldconfig
 
 # Copy Python packages
 COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
+COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt /usr/local/lib/python3.10/dist-packages/tensorrt
+COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt-10.3.0.dist-info /usr/local/lib/python3.10/dist-packages/tensorrt-10.3.0.dist-info
+COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_dispatch /usr/local/lib/python3.10/dist-packages/tensorrt_dispatch
+COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_dispatch-10.3.0.dist-info /usr/local/lib/python3.10/dist-packages/tensorrt_dispatch-10.3.0.dist-info
+COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_lean /usr/local/lib/python3.10/dist-packages/tensorrt_lean
+COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_lean-10.3.0.dist-info /usr/local/lib/python3.10/dist-packages/tensorrt_lean-10.3.0.dist-info
 COPY --from=builder /usr/local/bin/inference /usr/local/bin/inference
 
 ENV PYTHONPATH=/usr/local/lib/python3.10/dist-packages:$PYTHONPATH
@@ -295,6 +311,8 @@ COPY inference_cli inference_cli
 COPY inference_sdk inference_sdk
 COPY docker/config/gpu_http.py gpu_http.py
 
+RUN python -m pip uninstall -y boto3 botocore && python -m pip install "boto3>=1.40.0,<=1.41.5" "botocore>=1.40.0,<=1.41.5"
+
 # Environment variables
 ENV VERSION_CHECK_MODE=once \
     CORE_MODEL_SAM2_ENABLED=True \
@@ -306,14 +324,14 @@ ENV VERSION_CHECK_MODE=once \
     ORT_TENSORRT_ENGINE_CACHE_PATH=/tmp/ort_cache \
     ORT_TENSORRT_MAX_WORKSPACE_SIZE=4294967296 \
     ORT_TENSORRT_BUILDER_OPTIMIZATION_LEVEL=5 \
-    ONNXRUNTIME_EXECUTION_PROVIDERS=[TensorrtExecutionProvider] \
-    REQUIRED_ONNX_PROVIDERS=TensorrtExecutionProvider \
     OPENBLAS_CORETYPE=ARMV8 \
     LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 \
     WORKFLOWS_STEP_EXECUTION_MODE=local \
     WORKFLOWS_MAX_CONCURRENT_STEPS=4 \
     API_LOGGING_ENABLED=True \
-    DISABLE_WORKFLOW_ENDPOINTS=false
+    DISABLE_WORKFLOW_ENDPOINTS=false \
+    ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS=True \
+    USE_INFERENCE_EXP_MODELS=False
 
 LABEL org.opencontainers.image.description="Inference Server - Jetson 6.2.0 (PyTorch from source, numpy 2.x)"
 

@@ -0,0 +1,63 @@
+FROM roboflow/l4t-ml:r35.2.1-py3.12-cu118-trt-10-v0.0.1
+
+COPY requirements/requirements.clip.txt \
+    requirements/requirements.http.txt \
+    requirements/requirements.doctr.txt \
+    requirements/requirements.groundingdino.txt \
+    requirements/requirements.sdk.http.txt \
+    requirements/requirements.yolo_world.txt \
+    requirements/_requirements.txt \
+    requirements/requirements.easyocr.txt \
+    requirements/requirements.gpu.txt \
+    ./
+
+RUN python -m pip install \
+    -r _requirements.txt \
+    -r requirements.clip.txt \
+    -r requirements.http.txt \
+    -r requirements.doctr.txt \
+    -r requirements.groundingdino.txt \
+    -r requirements.sdk.http.txt \
+    -r requirements.yolo_world.txt \
+    -r requirements.easyocr.txt \
+    -r requirements.gpu.txt \
+    "pycuda>=2025.0.0,<2026.0.0"
+
+
+WORKDIR /app/
+COPY inference inference
+COPY inference_cli inference_cli
+COPY inference_sdk inference_sdk
+COPY docker/config/gpu_http.py gpu_http.py
+COPY .release .release
+COPY requirements requirements
+COPY Makefile Makefile
+
+RUN make create_inference_cli_whl PYTHON=python3.12
+RUN python -m pip install dist/inference_cli*.whl
+
+ENV VERSION_CHECK_MODE=continuous \
+    PROJECT=roboflow-platform \
+    ORT_TENSORRT_FP16_ENABLE=1 \
+    ORT_TENSORRT_ENGINE_CACHE_ENABLE=1 \
+    CORE_MODEL_SAM_ENABLED=False \
+    PROJECT=roboflow-platform \
+    NUM_WORKERS=1 \
+    HOST=0.0.0.0 \
+    PORT=9001 \
+    OPENBLAS_CORETYPE=ARMV8 \
+    LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 \
+    WORKFLOWS_STEP_EXECUTION_MODE=local \
+    WORKFLOWS_MAX_CONCURRENT_STEPS=2 \
+    API_LOGGING_ENABLED=True \
+    CORE_MODEL_TROCR_ENABLED=false \
+    RUNS_ON_JETSON=True \
+    ENABLE_PROMETHEUS=True \
+    ENABLE_STREAM_API=True \
+    STREAM_API_PRELOADED_PROCESSES=2 \
+    PYTHONPATH=/app:$PYTHONPATH
+ENV CORE_MODEL_SAM3_ENABLED=False \
+    ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS=True \
+    USE_INFERENCE_EXP_MODELS=False
+
+ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
@@ -208,6 +208,9 @@
 
 # Enable experimental RFDETR backend (inference_exp) rollout, default is True
 USE_INFERENCE_EXP_MODELS = str2bool(os.getenv("USE_INFERENCE_EXP_MODELS", "False"))
+ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS = str2bool(
+    os.getenv("ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS", "False")
+)
 
 # ID of host device, default is None
 DEVICE_ID = os.getenv("DEVICE_ID", None)

@@ -15,7 +15,7 @@
     ObjectDetectionInferenceResponse,
     ObjectDetectionPrediction,
 )
-from inference.core.env import API_KEY
+from inference.core.env import ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS, API_KEY
 from inference.core.logger import logger
 from inference.core.models.base import Model
 from inference.core.utils.image_utils import load_image_rgb
@@ -37,7 +37,10 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
         from inference_exp import AutoModel  # type: ignore
 
         self._exp_model: ObjectDetectionModel = AutoModel.from_pretrained(
-            model_id_or_path=model_id, api_key=self.api_key
+            model_id_or_path=model_id,
+            api_key=self.api_key,
+            allow_untrusted_packages=ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS,
+            allow_direct_local_storage_loading=False,
         )
         # if hasattr(self._exp_model, "optimize_for_inference"):
         #     self._exp_model.optimize_for_inference()

@@ -7,6 +7,7 @@
 from inference_cli.lib.benchmark.dataset import PREDEFINED_DATASETS
 from inference_cli.lib.benchmark_adapter import (
     run_infer_api_speed_benchmark,
+    run_inference_experimental_benchmark,
     run_python_package_speed_benchmark,
     run_workflow_api_speed_benchmark,
 )
@@ -269,5 +270,109 @@ def python_package_speed(
         raise typer.Exit(code=1)
 
 
+@benchmark_app.command(
+    help="This command provides a benchmark of inference-exp package. Currently, support for this feature "
+    "is experimental."
+)
+def inference_experimental_speed(
+    model_id: Annotated[
+        str,
+        typer.Option(
+            "--model_id",
+            "-m",
+            help="Model ID in format project/version.",
+        ),
+    ],
+    dataset_reference: Annotated[
+        str,
+        typer.Option(
+            "--dataset_reference",
+            "-d",
+            help=f"Name of predefined dataset (one of {list(PREDEFINED_DATASETS.keys())}) or path to directory with images",
+        ),
+    ] = "coco",
+    warm_up_inferences: Annotated[
+        int,
+        typer.Option("--warm_up_inferences", "-wi", help="Number of warm-up requests"),
+    ] = 10,
+    benchmark_inferences: Annotated[
+        int,
+        typer.Option(
+            "--benchmark_requests", "-bi", help="Number of benchmark requests"
+        ),
+    ] = 1000,
+    batch_size: Annotated[
+        int,
+        typer.Option("--batch_size", "-bs", help="Batch size of single request"),
+    ] = 1,
+    api_key: Annotated[
+        Optional[str],
+        typer.Option(
+            "--api-key",
+            "-a",
+            help="Roboflow API key for your workspace. If not given - env variable `ROBOFLOW_API_KEY` will be used",
+        ),
+    ] = None,
+    model_configuration: Annotated[
+        Optional[str],
+        typer.Option(
+            "--model_config", "-mc", help="Location of yaml file with model config"
+        ),
+    ] = None,
+    output_location: Annotated[
+        Optional[str],
+        typer.Option(
+            "--output_location",
+            "-o",
+            help="Location where to save the result (path to file or directory)",
+        ),
+    ] = None,
+    model_package_id: Annotated[
+        Optional[str],
+        typer.Option(
+            "--model_package_id",
+            "-o",
+            help="Selected model package ID (leave blank to run auto-negotiation)",
+        ),
+    ] = None,
+    turn_images_to_tensors: Annotated[
+        bool,
+        typer.Option(
+            "--images-as-tensors/--no-images-as-tensors",
+            help="Boolean flag to decide if input images are to be loaded as tensors on the device that model "
+            "is running, or should be left as np.arrays.",
+        ),
+    ] = True,
+    allow_untrusted_packages: Annotated[
+        bool,
+        typer.Option(
+            "--allow-untrusted-packages/--no-allow-untrusted-packages",
+            help="Boolean flag to decide if untrusted packages (for example the ones registered by clients) are "
+            "allowed to be loaded.",
+        ),
+    ] = True,
+):
+    try:
+        run_inference_experimental_benchmark(
+            model_id=model_id,
+            dataset_reference=dataset_reference,
+            warm_up_inferences=warm_up_inferences,
+            benchmark_inferences=benchmark_inferences,
+            batch_size=batch_size,
+            api_key=api_key,
+            model_configuration=model_configuration,
+            output_location=output_location,
+            model_package_id=model_package_id,
+            turn_images_to_tensors=turn_images_to_tensors,
+            allow_untrusted_packages=allow_untrusted_packages,
+        )
+    except KeyboardInterrupt:
+        print("Benchmark interrupted.")
+        return
+    except Exception as error:
+        typer.echo(f"Command failed. Cause: {error}")
+        raise typer.Exit(code=1)
+
+
 if __name__ == "__main__":
     benchmark_app()