diff --git a/.github/workflows/docker.jetson.6.2.0.yml b/.github/workflows/docker.jetson.6.2.0.yml index 6ed5b1d899..317c70aa0e 100644 --- a/.github/workflows/docker.jetson.6.2.0.yml +++ b/.github/workflows/docker.jetson.6.2.0.yml @@ -12,9 +12,14 @@ on: type: boolean description: "Do you want to push image after build?" default: false + custom_tag: + type: string + description: "Custom tag to use for the image (overrides VERSION)" + default: "" env: VERSION: "0.0.0" # Default version, will be overwritten + BASE_IMAGE: "roboflow/roboflow-inference-server-jetson-6.2.0" jobs: docker: @@ -35,6 +40,15 @@ jobs: uses: actions/checkout@v4 - name: Read version from file run: echo "VERSION=$(DISABLE_VERSION_CHECK=true python ./inference/core/version.py)" >> $GITHUB_ENV + - name: Determine Image Tags + id: tags + uses: ./.github/actions/determine-tags + with: + custom_tag: ${{ github.event.inputs.custom_tag }} + version: ${{ env.VERSION }} + base_image: ${{ env.BASE_IMAGE }} + force_push: ${{ github.event.inputs.force_push }} + token: ${{ secrets.GITHUB_TOKEN }} - name: Set up Depot CLI uses: depot/setup-action@v1 - name: Build and Push @@ -42,6 +56,6 @@ jobs: with: push: ${{ github.event_name == 'release' || (github.event.inputs.force_push == 'true')}} project: grl7ffzxd7 - tags: roboflow/roboflow-inference-server-jetson-6.2.0:latest,roboflow/roboflow-inference-server-jetson-6.2.0:${{ env.VERSION}} + tags: ${{ steps.tags.outputs.image_tags }} platforms: linux/arm64 file: ./docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0 diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0 b/docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0 index 11e6b8b63b..108461ab60 100644 --- a/docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0 +++ b/docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0 @@ -3,9 +3,9 @@ FROM nvcr.io/nvidia/l4t-jetpack:r36.4.0 AS builder ARG DEBIAN_FRONTEND=noninteractive -ARG CMAKE_VERSION=3.31.10 -ARG PYTORCH_VERSION=2.8.0 -ARG TORCHVISION_VERSION=0.23.0 +ARG CMAKE_VERSION=4.2.0 +ARG PYTORCH_VERSION=2.6.0 +ARG TORCHVISION_VERSION=0.21.0 ARG OPENCV_VERSION=4.10.0 ARG ONNXRUNTIME_VERSION=1.20.0 ENV LANG=en_US.UTF-8 @@ -94,6 +94,7 @@ RUN git clone --recursive --branch v${PYTORCH_VERSION} https://github.com/pytorc export PYTORCH_BUILD_VERSION=${PYTORCH_VERSION} PYTORCH_BUILD_NUMBER=1 && \ export CMAKE_BUILD_TYPE=Release BUILD_SHARED_LIBS=ON USE_PRIORITIZED_TEXT_FOR_LD=1 && \ export MAX_JOBS=12 && \ + export CMAKE_POLICY_VERSION_MINIMUM=3.5 && \ python3 setup.py bdist_wheel && \ python3 -m pip install dist/torch-*.whl @@ -113,6 +114,15 @@ ENV CUDA_HOME=/usr/local/cuda \ PATH=/usr/local/cuda/bin:$PATH \ LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH +# Install Tensorrt +RUN apt remove -y 'libnvinfer*' 'libnvonnxparsers*' 'libnvparsers*' 'libnvinfer-plugin*' 'python3-libnvinfer*' 'tensorrt*' +WORKDIR /build/tensorrt-10.x +RUN wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/local_repo/nv-tensorrt-local-tegra-repo-ubuntu2204-10.7.0-cuda-12.6_1.0-1_arm64.deb && \ + dpkg -i nv-tensorrt-local-tegra-repo-ubuntu2204-10.7.0-cuda-12.6_1.0-1_arm64.deb && \ + cp /var/nv-tensorrt-local-tegra-repo-ubuntu2204-10.7.0-cuda-12.6/nv-tensorrt-local-tegra-C50F04B9-keyring.gpg /usr/share/keyrings/ && \ + apt-get update && \ + apt-get install -y tensorrt + # Build onnxruntime-gpu from source with TensorRT support WORKDIR /build/onnxruntime RUN git clone --recursive --branch v${ONNXRUNTIME_VERSION} https://github.com/microsoft/onnxruntime.git && \ @@ -178,6 +188,7 @@ RUN uv pip install --system --break-system-packages --index-strategy unsafe-best -r requirements.sdk.http.txt \ -r requirements.easyocr.txt \ -r requirements.jetson.txt \ + "pycuda>=2025.0.0,<2026.0.0" \ "setuptools<=75.5.0" \ packaging \ && rm -rf ~/.cache/uv @@ -205,7 +216,6 @@ RUN ln -sf /usr/bin/python3 /usr/bin/python && \ RUN cd /usr/local/lib/python3.10/dist-packages && \ find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true && \ rm -rf debugpy* jupyterlab* jupyter_* notebook* ipython* ipykernel* || true && \ - rm -rf torch/bin torch/include || true && \ rm -rf onnx/backend/test onnx/test || true && \ rm -rf scipy/*/tests pandas/tests || true && \ rm -rf */examples */benchmarks */docs || true && \ @@ -285,6 +295,12 @@ RUN ldconfig # Copy Python packages COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages +COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt /usr/local/lib/python3.10/dist-packages/tensorrt +COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt-10.7.0.dist-info /usr/local/lib/python3.10/dist-packages/tensorrt-10.7.0.dist-info +COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_dispatch /usr/local/lib/python3.10/dist-packages/tensorrt_dispatch +COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_dispatch-10.7.0.dist-info /usr/local/lib/python3.10/dist-packages/tensorrt_dispatch-10.7.0.dist-info +COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_lean /usr/local/lib/python3.10/dist-packages/tensorrt_lean +COPY --from=builder /usr/lib/python3.10/dist-packages/tensorrt_lean-10.7.0.dist-info /usr/local/lib/python3.10/dist-packages/tensorrt_lean-10.7.0.dist-info COPY --from=builder /usr/local/bin/inference /usr/local/bin/inference ENV PYTHONPATH=/usr/local/lib/python3.10/dist-packages:$PYTHONPATH @@ -295,6 +311,8 @@ COPY inference_cli inference_cli COPY inference_sdk inference_sdk COPY docker/config/gpu_http.py gpu_http.py +RUN python -m pip uninstall -y boto3 botocore && python -m pip install "boto3>=1.40.0,<=1.41.5" "botocore>=1.40.0,<=1.41.5" + # Environment variables ENV VERSION_CHECK_MODE=once \ CORE_MODEL_SAM2_ENABLED=True \ @@ -306,14 +324,14 @@ ENV VERSION_CHECK_MODE=once \ ORT_TENSORRT_ENGINE_CACHE_PATH=/tmp/ort_cache \ ORT_TENSORRT_MAX_WORKSPACE_SIZE=4294967296 \ ORT_TENSORRT_BUILDER_OPTIMIZATION_LEVEL=5 \ - ONNXRUNTIME_EXECUTION_PROVIDERS=[TensorrtExecutionProvider] \ - REQUIRED_ONNX_PROVIDERS=TensorrtExecutionProvider \ OPENBLAS_CORETYPE=ARMV8 \ LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 \ WORKFLOWS_STEP_EXECUTION_MODE=local \ WORKFLOWS_MAX_CONCURRENT_STEPS=4 \ API_LOGGING_ENABLED=True \ - DISABLE_WORKFLOW_ENDPOINTS=false + DISABLE_WORKFLOW_ENDPOINTS=false \ + ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS=True \ + USE_INFERENCE_EXP_MODELS=False LABEL org.opencontainers.image.description="Inference Server - Jetson 6.2.0 (PyTorch from source, numpy 2.x)" diff --git a/docker/dockerfiles/jp51.cu114.inference-experimental.dockerfile b/docker/dockerfiles/jp51.cu114.inference-experimental.dockerfile new file mode 100644 index 0000000000..c44ec31aee --- /dev/null +++ b/docker/dockerfiles/jp51.cu114.inference-experimental.dockerfile @@ -0,0 +1,63 @@ +FROM roboflow/l4t-ml:r35.2.1-py3.12-cu118-trt-10-v0.0.1 + +COPY requirements/requirements.clip.txt \ + requirements/requirements.http.txt \ + requirements/requirements.doctr.txt \ + requirements/requirements.groundingdino.txt \ + requirements/requirements.sdk.http.txt \ + requirements/requirements.yolo_world.txt \ + requirements/_requirements.txt \ + requirements/requirements.easyocr.txt \ + requirements/requirements.gpu.txt \ + ./ + +RUN python -m pip install \ + -r _requirements.txt \ + -r requirements.clip.txt \ + -r requirements.http.txt \ + -r requirements.doctr.txt \ + -r requirements.groundingdino.txt \ + -r requirements.sdk.http.txt \ + -r requirements.yolo_world.txt \ + -r requirements.easyocr.txt \ + -r requirements.gpu.txt \ + "pycuda>=2025.0.0,<2026.0.0" + + +WORKDIR /app/ +COPY inference inference +COPY inference_cli inference_cli +COPY inference_sdk inference_sdk +COPY docker/config/gpu_http.py gpu_http.py +COPY .release .release +COPY requirements requirements +COPY Makefile Makefile + +RUN make create_inference_cli_whl PYTHON=python3.12 +RUN python -m pip install dist/inference_cli*.whl + +ENV VERSION_CHECK_MODE=continuous \ + PROJECT=roboflow-platform \ + ORT_TENSORRT_FP16_ENABLE=1 \ + ORT_TENSORRT_ENGINE_CACHE_ENABLE=1 \ + CORE_MODEL_SAM_ENABLED=False \ + PROJECT=roboflow-platform \ + NUM_WORKERS=1 \ + HOST=0.0.0.0 \ + PORT=9001 \ + OPENBLAS_CORETYPE=ARMV8 \ + LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libgomp.so.1 \ + WORKFLOWS_STEP_EXECUTION_MODE=local \ + WORKFLOWS_MAX_CONCURRENT_STEPS=2 \ + API_LOGGING_ENABLED=True \ + CORE_MODEL_TROCR_ENABLED=false \ + RUNS_ON_JETSON=True \ + ENABLE_PROMETHEUS=True \ + ENABLE_STREAM_API=True \ + STREAM_API_PRELOADED_PROCESSES=2 \ + PYTHONPATH=/app:$PYTHONPATH +ENV CORE_MODEL_SAM3_ENABLED=False \ + ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS=True \ + USE_INFERENCE_EXP_MODELS=False + +ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT \ No newline at end of file diff --git a/inference/core/env.py b/inference/core/env.py index d655c37899..c2dba109a4 100644 --- a/inference/core/env.py +++ b/inference/core/env.py @@ -208,6 +208,9 @@ # Enable experimental RFDETR backend (inference_exp) rollout, default is True USE_INFERENCE_EXP_MODELS = str2bool(os.getenv("USE_INFERENCE_EXP_MODELS", "False")) +ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS = str2bool( + os.getenv("ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS", "False") +) # ID of host device, default is None DEVICE_ID = os.getenv("DEVICE_ID", None) diff --git a/inference/core/models/exp_adapter.py b/inference/core/models/exp_adapter.py index 111e8120e6..71f707cbc9 100644 --- a/inference/core/models/exp_adapter.py +++ b/inference/core/models/exp_adapter.py @@ -15,7 +15,7 @@ ObjectDetectionInferenceResponse, ObjectDetectionPrediction, ) -from inference.core.env import API_KEY +from inference.core.env import ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS, API_KEY from inference.core.logger import logger from inference.core.models.base import Model from inference.core.utils.image_utils import load_image_rgb @@ -37,7 +37,10 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs): from inference_exp import AutoModel # type: ignore self._exp_model: ObjectDetectionModel = AutoModel.from_pretrained( - model_id_or_path=model_id, api_key=self.api_key + model_id_or_path=model_id, + api_key=self.api_key, + allow_untrusted_packages=ALLOW_INFERENCE_EXP_UNTRUSTED_MODELS, + allow_direct_local_storage_loading=False, ) # if hasattr(self._exp_model, "optimize_for_inference"): # self._exp_model.optimize_for_inference() diff --git a/inference_cli/benchmark.py b/inference_cli/benchmark.py index 6ad4ff8426..f36799bd97 100644 --- a/inference_cli/benchmark.py +++ b/inference_cli/benchmark.py @@ -7,6 +7,7 @@ from inference_cli.lib.benchmark.dataset import PREDEFINED_DATASETS from inference_cli.lib.benchmark_adapter import ( run_infer_api_speed_benchmark, + run_inference_experimental_benchmark, run_python_package_speed_benchmark, run_workflow_api_speed_benchmark, ) @@ -269,5 +270,109 @@ def python_package_speed( raise typer.Exit(code=1) +@benchmark_app.command( + help="This command provides a benchmark of inference-exp package. Currently, support for this feature " + "is experimental." +) +def inference_experimental_speed( + model_id: Annotated[ + str, + typer.Option( + "--model_id", + "-m", + help="Model ID in format project/version.", + ), + ], + dataset_reference: Annotated[ + str, + typer.Option( + "--dataset_reference", + "-d", + help=f"Name of predefined dataset (one of {list(PREDEFINED_DATASETS.keys())}) or path to directory with images", + ), + ] = "coco", + warm_up_inferences: Annotated[ + int, + typer.Option("--warm_up_inferences", "-wi", help="Number of warm-up requests"), + ] = 10, + benchmark_inferences: Annotated[ + int, + typer.Option( + "--benchmark_requests", "-bi", help="Number of benchmark requests" + ), + ] = 1000, + batch_size: Annotated[ + int, + typer.Option("--batch_size", "-bs", help="Batch size of single request"), + ] = 1, + api_key: Annotated[ + Optional[str], + typer.Option( + "--api-key", + "-a", + help="Roboflow API key for your workspace. If not given - env variable `ROBOFLOW_API_KEY` will be used", + ), + ] = None, + model_configuration: Annotated[ + Optional[str], + typer.Option( + "--model_config", "-mc", help="Location of yaml file with model config" + ), + ] = None, + output_location: Annotated[ + Optional[str], + typer.Option( + "--output_location", + "-o", + help="Location where to save the result (path to file or directory)", + ), + ] = None, + model_package_id: Annotated[ + Optional[str], + typer.Option( + "--model_package_id", + "-o", + help="Selected model package ID (leave blank to run auto-negotiation)", + ), + ] = None, + turn_images_to_tensors: Annotated[ + bool, + typer.Option( + "--images-as-tensors/--no-images-as-tensors", + help="Boolean flag to decide if input images are to be loaded as tensors on the device that model " + "is running, or should be left as np.arrays.", + ), + ] = True, + allow_untrusted_packages: Annotated[ + bool, + typer.Option( + "--allow-untrusted-packages/--no-allow-untrusted-packages", + help="Boolean flag to decide if untrusted packages (for example the ones registered by clients) are " + "allowed to be loaded.", + ), + ] = True, +): + try: + run_inference_experimental_benchmark( + model_id=model_id, + dataset_reference=dataset_reference, + warm_up_inferences=warm_up_inferences, + benchmark_inferences=benchmark_inferences, + batch_size=batch_size, + api_key=api_key, + model_configuration=model_configuration, + output_location=output_location, + model_package_id=model_package_id, + turn_images_to_tensors=turn_images_to_tensors, + allow_untrusted_packages=allow_untrusted_packages, + ) + except KeyboardInterrupt: + print("Benchmark interrupted.") + return + except Exception as error: + typer.echo(f"Command failed. Cause: {error}") + raise typer.Exit(code=1) + + if __name__ == "__main__": benchmark_app() diff --git a/inference_cli/lib/benchmark/inference_experimental_speed.py b/inference_cli/lib/benchmark/inference_experimental_speed.py new file mode 100644 index 0000000000..2131af22b7 --- /dev/null +++ b/inference_cli/lib/benchmark/inference_experimental_speed.py @@ -0,0 +1,104 @@ +import random +import time +from typing import Any, Dict, List, Optional, Union + +import numpy as np +import torch +from inference_exp import AutoModel +from inference_exp.configuration import DEFAULT_DEVICE +from inference_exp.models.auto_loaders.core import AnyModel +from supervision.utils.file import read_yaml_file +from tqdm import tqdm + +from inference_cli.lib.benchmark.results_gathering import ResultsCollector + + +def run_inference_experimental_benchmark( + model_id: str, + images: List[np.ndarray], + results_collector: ResultsCollector, + warm_up_inferences: int = 10, + benchmark_inferences: int = 1000, + batch_size: int = 1, + api_key: Optional[str] = None, + model_configuration: Optional[str] = None, + model_package_id: Optional[str] = None, + turn_images_to_tensors: bool = True, + allow_untrusted_packages: bool = True, +) -> None: + inference_configuration = {} + if model_configuration is not None: + inference_configuration = read_yaml_file(file_path=model_configuration) + print( + f"Inference will be executed with the following parameters: {inference_configuration}" + ) + AutoModel.describe_model(model_id=model_id, api_key=api_key) + if model_package_id: + AutoModel.describe_model_package( + model_id=model_id, package_id=model_package_id, api_key=api_key + ) + if turn_images_to_tensors: + images = [ + torch.from_numpy(np.ascontiguousarray(image[:, :, ::-1])) + .permute(2, 0, 1) + .to(DEFAULT_DEVICE) + for image in images + ] + model = AutoModel.from_pretrained( + model_id, + api_key=api_key, + model_package_id=model_package_id, + allow_untrusted_packages=allow_untrusted_packages, + device=DEFAULT_DEVICE, + ) + run_model_warm_up( + model=model, + inference_configuration=inference_configuration, + image=images[0], + warm_up_inferences=warm_up_inferences, + ) + run_benchmark( + model=model, + inference_configuration=inference_configuration, + images=images, + results_collector=results_collector, + benchmark_inferences=benchmark_inferences, + batch_size=batch_size, + ) + + +def run_model_warm_up( + model: AnyModel, + inference_configuration: Dict[str, Any], + image: Union[np.ndarray, torch.Tensor], + warm_up_inferences: int, +) -> None: + for _ in tqdm( + range(warm_up_inferences), desc="Warming up model...", total=warm_up_inferences + ): + _ = model(image, **inference_configuration) + + +def run_benchmark( + model: AnyModel, + inference_configuration: Dict[str, Any], + images: List[Union[np.ndarray, torch.Tensor]], + results_collector: ResultsCollector, + benchmark_inferences: int, + batch_size: int, +) -> None: + while len(images) < batch_size: + images = images + images + results_collector.start_benchmark() + try: + for _ in range(benchmark_inferences): + random.shuffle(images) + payload = images[:batch_size] + start = time.time() + _ = model(payload, **inference_configuration) + duration = time.time() - start + results_collector.register_inference_duration( + batch_size=batch_size, duration=duration + ) + finally: + results_collector.stop_benchmark() diff --git a/inference_cli/lib/benchmark_adapter.py b/inference_cli/lib/benchmark_adapter.py index 2be2bbc77d..aaf4138f15 100644 --- a/inference_cli/lib/benchmark_adapter.py +++ b/inference_cli/lib/benchmark_adapter.py @@ -17,6 +17,7 @@ ) from inference_cli.lib.utils import ( dump_json, + ensure_inference_experimental_is_installed, ensure_inference_is_installed, initialise_client, ) @@ -229,6 +230,72 @@ def run_python_package_speed_benchmark( ) +def run_inference_experimental_benchmark( + model_id: str, + dataset_reference: str, + warm_up_inferences: int = 10, + benchmark_inferences: int = 1000, + batch_size: int = 1, + api_key: Optional[str] = None, + model_configuration: Optional[str] = None, + output_location: Optional[str] = None, + model_package_id: Optional[str] = None, + turn_images_to_tensors: bool = True, + allow_untrusted_packages: bool = True, +) -> None: + ensure_inference_experimental_is_installed() + + # importing here not to affect other entrypoints by missing `inference` core library + from inference_cli.lib.benchmark.inference_experimental_speed import ( + run_inference_experimental_benchmark, + ) + + dataset_images = load_dataset_images( + dataset_reference=dataset_reference, + ) + image_sizes = {i.shape[:2] for i in dataset_images} + print(f"Detected images dimensions: {image_sizes}") + results_collector = ResultsCollector() + statistics_display_thread = Thread( + target=display_benchmark_statistics, args=(results_collector,) + ) + statistics_display_thread.start() + run_inference_experimental_benchmark( + model_id=model_id, + images=dataset_images, + results_collector=results_collector, + warm_up_inferences=warm_up_inferences, + benchmark_inferences=benchmark_inferences, + batch_size=batch_size, + api_key=api_key, + model_configuration=model_configuration, + model_package_id=model_package_id, + turn_images_to_tensors=turn_images_to_tensors, + allow_untrusted_packages=allow_untrusted_packages, + ) + benchmark_results = results_collector.get_statistics() + statistics_display_thread.join() + if benchmark_results.avg_remote_execution_time is not None: + print( + f"Average execution time: {benchmark_results.avg_remote_execution_time:.3f}s (across {benchmark_results.inferences_made} inferences)" + ) + if output_location is None: + return None + benchmark_parameters = { + "datetime": datetime.now().isoformat(), + "model_id": model_id, + "dataset_reference": dataset_reference, + "benchmark_inferences": benchmark_inferences, + "batch_size": batch_size, + "model_configuration": model_configuration, + } + dump_benchmark_results( + output_location=output_location, + benchmark_parameters=benchmark_parameters, + benchmark_results=benchmark_results, + ) + + def dump_benchmark_results( output_location: str, benchmark_parameters: dict, diff --git a/inference_cli/lib/roboflow_cloud/data_staging/api_operations.py b/inference_cli/lib/roboflow_cloud/data_staging/api_operations.py index 9931c22888..8405dcb1d0 100644 --- a/inference_cli/lib/roboflow_cloud/data_staging/api_operations.py +++ b/inference_cli/lib/roboflow_cloud/data_staging/api_operations.py @@ -1620,17 +1620,17 @@ def _parse_bucket_path(bucket_path: str) -> Tuple[str, Optional[str]]: "s3://bucket/path/" -> ("s3://bucket/path/", None) "gs://bucket/" -> ("gs://bucket/", None) """ - has_glob = any(char in bucket_path for char in ['*', '?', '[', ']']) + has_glob = any(char in bucket_path for char in ["*", "?", "[", "]"]) if not has_glob: return bucket_path, None - parts = bucket_path.split('/') + parts = bucket_path.split("/") for i in range(len(parts) - 1, -1, -1): - if any(char in parts[i] for char in ['*', '?', '[', ']']): + if any(char in parts[i] for char in ["*", "?", "[", "]"]): continue - base_path = '/'.join(parts[:i+1]) + '/' - glob_pattern = '/'.join(parts[i+1:]) + base_path = "/".join(parts[: i + 1]) + "/" + glob_pattern = "/".join(parts[i + 1 :]) return base_path, glob_pattern return bucket_path, None @@ -1682,10 +1682,14 @@ def _get_fs_kwargs(protocol: Optional[str] = None) -> dict: # Support both adlfs convention and Azure CLI standard naming if protocol in (None, "az", "abfs", "azure"): # Account name: try adlfs convention first, fall back to Azure CLI standard - azure_account = os.getenv("AZURE_STORAGE_ACCOUNT_NAME") or os.getenv("AZURE_STORAGE_ACCOUNT") + azure_account = os.getenv("AZURE_STORAGE_ACCOUNT_NAME") or os.getenv( + "AZURE_STORAGE_ACCOUNT" + ) # Account key: try adlfs convention first, fall back to Azure CLI standard - azure_key = os.getenv("AZURE_STORAGE_ACCOUNT_KEY") or os.getenv("AZURE_STORAGE_KEY") + azure_key = os.getenv("AZURE_STORAGE_ACCOUNT_KEY") or os.getenv( + "AZURE_STORAGE_KEY" + ) # SAS token: same name in both conventions azure_sas_token = os.getenv("AZURE_STORAGE_SAS_TOKEN") @@ -1721,26 +1725,26 @@ def _match_glob_pattern(path: str, pattern: str) -> bool: import re # Convert glob pattern to regex - pattern_parts = pattern.split('/') + pattern_parts = pattern.split("/") regex_parts = [] i = 0 while i < len(pattern_parts): part = pattern_parts[i] - if part == '**': + if part == "**": # ** matches zero or more path segments # If it's at the start and followed by more parts, it's optional if i == 0 and i + 1 < len(pattern_parts): # Make preceding path optional: either nothing or anything/ - regex_parts.append('(?:.*/)?') + regex_parts.append("(?:.*/)?") else: # Match any path segments - regex_parts.append('.*') + regex_parts.append(".*") i += 1 - elif '*' in part: + elif "*" in part: # * matches any characters except / - part_regex = re.escape(part).replace(r'\*', '[^/]*') + part_regex = re.escape(part).replace(r"\*", "[^/]*") regex_parts.append(part_regex) i += 1 else: @@ -1749,13 +1753,13 @@ def _match_glob_pattern(path: str, pattern: str) -> bool: i += 1 # Join with / but handle ** specially (already includes separator in regex) - regex_pattern = '' + regex_pattern = "" for j, part in enumerate(regex_parts): - if j > 0 and not regex_parts[j-1].endswith(')?'): - regex_pattern += '/' + if j > 0 and not regex_parts[j - 1].endswith(")?"): + regex_pattern += "/" regex_pattern += part - regex_pattern += '$' + regex_pattern += "$" return re.match(regex_pattern, path) is not None @@ -1786,7 +1790,7 @@ def _list_and_filter_files_streaming( Exception: Other fsspec errors """ protocol = base_path.split("://")[0] - base_without_protocol = base_path.split("://", 1)[1].rstrip('/') + base_without_protocol = base_path.split("://", 1)[1].rstrip("/") # Validate bucket/path exists before walking # This catches silent failures where fs.walk() would return empty @@ -1831,14 +1835,20 @@ def _list_and_filter_files_streaming( if root_path == base_without_protocol: relative_path = fname else: - relative_path = f"{root_path.removeprefix(base_without_protocol + '/')}/{fname}" + relative_path = ( + f"{root_path.removeprefix(base_without_protocol + '/')}/{fname}" + ) # Check glob pattern if specified if glob_pattern and not _match_glob_pattern(relative_path, glob_pattern): continue # Yield full path with protocol - full_path = f"{root}/{fname}" if root.startswith(f"{protocol}://") else f"{protocol}://{root}/{fname}" + full_path = ( + f"{root}/{fname}" + if root.startswith(f"{protocol}://") + else f"{protocol}://{root}/{fname}" + ) yield full_path @@ -1866,11 +1876,12 @@ def _generate_presigned_urls_parallel( Returns: List of dicts with 'name' and 'url' keys """ - from queue import Queue - from threading import Thread - from rich.progress import Progress, BarColumn, SpinnerColumn, TextColumn import multiprocessing import traceback + from queue import Queue + from threading import Thread + + from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn protocol = base_path.split("://")[0] file_queue = Queue(maxsize=0) # Unlimited queue for continuous listing @@ -1890,7 +1901,11 @@ def generate_url(file_path: str) -> dict: # Special handling for Azure with SAS token # When authenticated with SAS token, fs.sign() fails because it needs account_key # Instead, use the existing SAS token (ignoring expiration_seconds parameter) - if protocol in ("az", "abfs", "azure") and hasattr(fs, 'sas_token') and fs.sas_token: + if ( + protocol in ("az", "abfs", "azure") + and hasattr(fs, "sas_token") + and fs.sas_token + ): # Use adlfs built-in utilities to construct URL with existing SAS token path_without_protocol = file_path.split("://", 1)[1] container, blob, _ = fs.split_path(path_without_protocol) @@ -1922,10 +1937,10 @@ def producer(generator, queue, progress, task_id, exception_queue): except Exception as e: # Capture any errors from fs.walk() or file discovery error_info = { - 'error': e, - 'traceback': traceback.format_exc(), - 'context': 'File discovery (fs.walk)', - 'base_path': base_path + "error": e, + "traceback": traceback.format_exc(), + "context": "File discovery (fs.walk)", + "base_path": base_path, } exception_queue.put(error_info) finally: @@ -1952,10 +1967,10 @@ def consumer(queue, progress, task_id, exception_queue): except Exception as e: # Capture errors from fs.sign() or URL generation error_info = { - 'error': e, - 'traceback': traceback.format_exc(), - 'context': 'Presigned URL generation (fs.sign)', - 'file_path': file_path + "error": e, + "traceback": traceback.format_exc(), + "context": "Presigned URL generation (fs.sign)", + "file_path": file_path, } exception_queue.put(error_info) finally: @@ -1972,7 +1987,7 @@ def consumer(queue, progress, task_id, exception_queue): # Start producer thread producer_thread = Thread( target=producer, - args=(file_paths_generator, file_queue, progress, task, exception_queue) + args=(file_paths_generator, file_queue, progress, task, exception_queue), ) producer_thread.start() @@ -1981,7 +1996,9 @@ def consumer(queue, progress, task_id, exception_queue): consumer_threads = [] for _ in range(num_workers): - t = Thread(target=consumer, args=(file_queue, progress, task, exception_queue)) + t = Thread( + target=consumer, args=(file_queue, progress, task, exception_queue) + ) t.start() consumer_threads.append(t) @@ -2009,9 +2026,9 @@ def consumer(queue, progress, task_id, exception_queue): ) # Add context-specific details - if 'base_path' in first_error: + if "base_path" in first_error: error_msg += f"Base path: {first_error['base_path']}\n" - if 'file_path' in first_error: + if "file_path" in first_error: error_msg += f"File: {first_error['file_path']}\n" # If multiple errors, mention it @@ -2019,7 +2036,7 @@ def consumer(queue, progress, task_id, exception_queue): error_msg += f"\n(Plus {len(errors) - 1} additional error(s))" # Re-raise the original exception with enhanced context - raise type(first_error['error'])(error_msg) from first_error['error'] + raise type(first_error["error"])(error_msg) from first_error["error"] return results @@ -2076,8 +2093,12 @@ def create_images_batch_from_cloud_storage( ) if len(references) > MAX_IMAGE_REFERENCES_IN_INGEST_REQUEST: - num_chunks = (len(references) + MAX_IMAGE_REFERENCES_IN_INGEST_REQUEST - 1) // MAX_IMAGE_REFERENCES_IN_INGEST_REQUEST - print(f"Files will be split into {num_chunks} chunks of up to {MAX_IMAGE_REFERENCES_IN_INGEST_REQUEST} files each") + num_chunks = ( + len(references) + MAX_IMAGE_REFERENCES_IN_INGEST_REQUEST - 1 + ) // MAX_IMAGE_REFERENCES_IN_INGEST_REQUEST + print( + f"Files will be split into {num_chunks} chunks of up to {MAX_IMAGE_REFERENCES_IN_INGEST_REQUEST} files each" + ) workspace = get_workspace(api_key=api_key) @@ -2174,7 +2195,9 @@ def create_videos_batch_from_cloud_storage( print(f"Found {len(references)} video files") if len(references) > SUGGESTED_MAX_VIDEOS_IN_BATCH: - print(f"Warning: Found {len(references)} videos. Suggested max is {SUGGESTED_MAX_VIDEOS_IN_BATCH} videos per batch.") + print( + f"Warning: Found {len(references)} videos. Suggested max is {SUGGESTED_MAX_VIDEOS_IN_BATCH} videos per batch." + ) workspace = get_workspace(api_key=api_key) diff --git a/inference_cli/lib/roboflow_cloud/data_staging/core.py b/inference_cli/lib/roboflow_cloud/data_staging/core.py index c53ec6c2be..7ffe1fcdca 100644 --- a/inference_cli/lib/roboflow_cloud/data_staging/core.py +++ b/inference_cli/lib/roboflow_cloud/data_staging/core.py @@ -175,7 +175,7 @@ def create_batch_of_images( "--bucket-path", "-bp", help="Cloud storage path with optional glob pattern (e.g., 's3://bucket/path/**/*.jpg', 'gs://bucket/images/'). " - "Required for cloud-storage source. Supports S3, GCS, and Azure.", + "Required for cloud-storage source. Supports S3, GCS, and Azure.", ), ] = None, ingest_id: Annotated[ @@ -339,7 +339,7 @@ def create_batch_of_videos( "--bucket-path", "-bp", help="Cloud storage path with optional glob pattern (e.g., 's3://bucket/path/**/*.mp4', 'gs://bucket/videos/'). " - "Required for cloud-storage source. Supports S3, GCS, and Azure.", + "Required for cloud-storage source. Supports S3, GCS, and Azure.", ), ] = None, ingest_id: Annotated[ diff --git a/inference_cli/lib/utils.py b/inference_cli/lib/utils.py index 63c573cd44..394775bfbe 100644 --- a/inference_cli/lib/utils.py +++ b/inference_cli/lib/utils.py @@ -97,6 +97,15 @@ def ensure_inference_is_installed() -> None: ) from inner_error +def ensure_inference_experimental_is_installed() -> None: + try: + import inference_exp + except Exception as error: + raise InferencePackageMissingError( + "You need to install `inference-exp` package to use this feature. Run `pip install inference-exp`" + ) from error + + def read_json(path: str) -> dict: with open(path) as f: return json.load(f) diff --git a/inference_experimental/dockerfiles/jp51.cu114.core.dockerfile b/inference_experimental/dockerfiles/jp51.cu114.core.dockerfile new file mode 100644 index 0000000000..2389eaa854 --- /dev/null +++ b/inference_experimental/dockerfiles/jp51.cu114.core.dockerfile @@ -0,0 +1,207 @@ +FROM nvcr.io/nvidia/l4t-ml:r35.2.1-py3 AS builder + +# install Python 3.12 +RUN apt-get update -y && apt-get install -y \ + libssl-dev \ + git \ + unzip \ + libbz2-dev \ + libssl-dev \ + libsqlite3-dev \ + zlib1g-dev \ + liblzma-dev + +RUN mkdir -p /build/python-3.12 +WORKDIR /build/python-3.12 +RUN wget https://www.python.org/ftp/python/3.12.12/Python-3.12.12.tgz && tar -xzf Python-3.12.12.tgz +WORKDIR /build/python-3.12/Python-3.12.12 +RUN ./configure --enable-optimizations +RUN make -j$(nproc) && make altinstall + +RUN update-alternatives --install /usr/bin/python python /usr/local/bin/python3.12 1 +RUN update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python3.12 1 + +# Get rid of tensorrt-8.X +RUN apt remove -y 'libnvinfer*' 'libnvonnxparsers*' 'libnvparsers*' 'libnvinfer-plugin*' 'python3-libnvinfer*' 'tensorrt*' 'uff-converter*' 'graphsurgeon*' + +# Create out dir where all wheels will be stored +RUN mkdir -p /build/out/wheels + +# Install tensorrt-10.x +RUN mkdir -p /build/tensorrt-10.x +WORKDIR /build/tensorrt-10.x +RUN wget https://storage.googleapis.com/roboflow-tests-assets/TensorRT/TensorRT-10.8.0.43.l4t.aarch64-gnu.cuda-11.4.tar.gz +RUN tar xzf TensorRT-10.8.0.43.l4t.aarch64-gnu.cuda-11.4.tar.gz +WORKDIR /build/tensorrt-10.x/TensorRT-10.8.0.43/targets/aarch64-linux-gnu +RUN mkdir -p /usr/src/tensorrt/bin +RUN cp bin/trtexec /usr/src/tensorrt/bin/trtexec +RUN cp include/* /usr/include/aarch64-linux-gnu/ +RUN mkdir -p /usr/lib/aarch64-linux-gnu/stubs +RUN cp -r lib/stubs/* /usr/lib/aarch64-linux-gnu/stubs/ +RUN cp lib/libnvinfer.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer.so.10.8.0 && \ + cp lib/libnvinfer_builder_resource.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_builder_resource.so.10.8.0 && \ + cp lib/libnvinfer_static.a /usr/lib/aarch64-linux-gnu/libnvinfer_static.a && \ + cp lib/libnvinfer_dispatch.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_dispatch.so.10.8.0 && \ + cp lib/libnvinfer_dispatch_static.a /usr/lib/aarch64-linux-gnu/libnvinfer_dispatch_static.a && \ + cp lib/libnvinfer_lean.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_lean.so.10.8.0 && \ + cp lib/libnvinfer_lean_static.a /usr/lib/aarch64-linux-gnu/libnvinfer_lean_static.a && \ + cp lib/libnvinfer_plugin.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so.10.8.0 && \ + cp lib/libnvinfer_plugin_static.a /usr/lib/aarch64-linux-gnu/libnvinfer_plugin_static.a && \ + cp lib/libnvinfer_vc_plugin.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_vc_plugin.so.10.8.0 && \ + cp lib/libnvinfer_vc_plugin_static.a /usr/lib/aarch64-linux-gnu/libnvinfer_vc_plugin_static.a && \ + cp lib/libnvonnxparser.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvonnxparser.so.10.8.0 && \ + cp lib/libnvonnxparser_static.a /usr/lib/aarch64-linux-gnu/libnvonnxparser_static.a && \ + cp lib/libonnx_proto.a /usr/lib/aarch64-linux-gnu/libonnx_proto.a + +RUN ln -s /usr/lib/aarch64-linux-gnu/libnvinfer.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer.so.10 && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvinfer.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer.so && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvinfer_dispatch.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_dispatch.so.10 && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvinfer_dispatch.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_dispatch.so && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvinfer_lean.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_lean.so.10 && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvinfer_lean.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_lean.so && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so.10 && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvinfer_vc_plugin.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_vc_plugin.so.10 && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvinfer_vc_plugin.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvinfer_vc_plugin.so && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvonnxparser.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvonnxparser.so.10 && \ + ln -s /usr/lib/aarch64-linux-gnu/libnvonnxparser.so.10.8.0 /usr/lib/aarch64-linux-gnu/libnvonnxparser.so + +WORKDIR /build/tensorrt-10.x/TensorRT-10.8.0.43/python +RUN cp -r * /build/out/wheels +RUN python3.12 -m pip install /build/out/wheels/tensorrt-10.8.0.43-cp312-none-linux_aarch64.whl + +# Install newer Cmake for builds +RUN mkdir -p /build/cmake +WORKDIR /build/cmake +RUN wget https://github.com/Kitware/CMake/releases/download/v4.1.2/cmake-4.1.2-linux-aarch64.sh +RUN mkdir build && chmod ugo+x cmake-4.1.2-linux-aarch64.sh && bash cmake-4.1.2-linux-aarch64.sh --skip-license --prefix=./build + +# Install gcc-11 +WORKDIR /build/gcc/ +RUN wget https://ftp.gnu.org/gnu/gcc/gcc-11.1.0/gcc-11.1.0.tar.gz +RUN tar xzf gcc-11.1.0.tar.gz +WORKDIR /build/gcc/gcc-11.1.0 +RUN ./contrib/download_prerequisites +WORKDIR /build/gcc/ +RUN mkdir objdir +WORKDIR /build/gcc/objdir +RUN $PWD/../gcc-11.1.0/configure --prefix=$HOME/GCC-11 --enable-languages=c,c++ +RUN make -j$(nproc) +RUN make install +RUN export PATH=/root/GCC-11/bin:$PATH +RUN export LD_LIBRARY_PATH=/root/GCC-11/lib64/:$LD_LIBRARY_PATH +RUN ldconfig + +# upgrade to CUDA 11.8 +WORKDIR /build/cuda-118 +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/arm64/cuda-ubuntu2004.pin -O /etc/apt/preferences.d/cuda-repository-pin-600 && \ + wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-tegra-repo-ubuntu2004-11-8-local_11.8.0-1_arm64.deb && \ + dpkg -i cuda-tegra-repo-*.deb && \ + rm cuda-tegra-repo-*.deb + +RUN cp /var/cuda-tegra-repo-*/cuda-tegra-*-keyring.gpg /usr/share/keyrings/ + +RUN mkdir /var/cuda-compat && \ + cd /var/cuda-compat && \ + ar x ../cuda-tegra-repo-*/cuda-compat-*.deb && \ + tar xvf data.tar.xz -C / && \ + rm -rf /var/cuda-compat + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + cuda-toolkit-* \ + && rm -rf /var/lib/apt/lists/* \ + && apt-get clean + +# Install ONNX-runtime GPU +RUN mkdir -p /build/onnxruntime +WORKDIR /build/onnxruntime +RUN git clone https://github.com/microsoft/onnxruntime.git +WORKDIR /build/onnxruntime/onnxruntime +RUN git checkout v1.21.1 +# Hash aligned with the source code that had this problem fixed on main branch - we need to stick to this version and patch, as our env is cuda 11 and the patched version do only support cuda 12 +RUN sed -i 's|eigen;https://gitlab.com/libeigen/eigen/-/archive/1d8b82b0740839c0de7f1242a3585e3390ff5f33/eigen-1d8b82b0740839c0de7f1242a3585e3390ff5f33.zip;5ea4d05e62d7f954a46b3213f9b2535bdd866803|eigen;https://github.com/eigen-mirror/eigen/archive/1d8b82b0740839c0de7f1242a3585e3390ff5f33/eigen-1d8b82b0740839c0de7f1242a3585e3390ff5f33.zip;05b19b49e6fbb91246be711d801160528c135e34|' cmake/deps.txt +RUN python3.12 -m pip install packaging setuptools "numpy==2.3.5" +RUN LD_LIBRARY_PATH=/root/GCC-11/lib64/:$LD_LIBRARY_PATH CC=/root/GCC-11/bin/gcc CXX=/root/GCC-11/bin/g++ PATH=/build/cmake/build/bin:$PATH CMAKE_POLICY_VERSION_MINIMUM=3.5 ./build.sh --update --config Release --build --build_wheel --use_cuda --cuda_version=11.8 --cuda_home /usr/local/cuda-11.8 --cudnn_home /usr/lib/aarch64-linux-gnu --use_tensorrt --tensorrt_home /usr/lib/aarch64-linux-gnu --allow_running_as_root --parallel 4 --disable_types float8 --skip_tests --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF +RUN python3.12 -m pip install ./build/Linux/Release/dist/onnxruntime_gpu-1.21.1-cp312-cp312-linux_aarch64.whl +RUN cp ./build/Linux/Release/dist/onnxruntime_gpu-1.21.1-cp312-cp312-linux_aarch64.whl /build/out/wheels/onnxruntime_gpu-1.21.1-cp312-cp312-linux_aarch64.whl + +# Install PyTorch +RUN mkdir -p /build/torch +WORKDIR /build/torch +RUN git clone https://github.com/pytorch/pytorch.git +WORKDIR /build/torch/pytorch +RUN git checkout v2.4.1 +RUN git submodule sync && git submodule update --init --recursive +RUN PATH=/build/cmake/build/bin:$PATH python3.12 -m pip install setuptools wheel astunparse numpy ninja pyyaml cmake "typing-extensions>=4.10.0" requests +ARG MAX_TORCH_COMPILATION_JOBS=4 +RUN PATH=/build/cmake/build/bin:$PATH PYTORCH_BUILD_VERSION=2.4.1 PYTORCH_BUILD_NUMBER=1 MAX_JOBS=${MAX_TORCH_COMPILATION_JOBS} FORCE_CUDA=1 CUDA_HOME=/usr/local/cuda-11.8 CUDACXX=/usr/local/cuda-11.8/bin/nvcc TORCH_CUDA_ARCH_LIST="8.7" USE_NCCL=0 USE_DISTRIBUTED=0 USE_MKLDNN=0 BUILD_TEST=0 CMAKE_POLICY_VERSION_MINIMUM=3.5 python3.12 setup.py bdist_wheel +RUN python3.12 -m pip install dist/torch-*.whl +RUN cp dist/torch-*.whl /build/out/wheels/ + +# Install Torchvision +RUN mkdir -p /build/torchvision +WORKDIR /build/torchvision +RUN git clone https://github.com/pytorch/vision.git +WORKDIR /build/torchvision/vision +RUN git checkout v0.19.1 +RUN git submodule sync && git submodule update --init --recursive +RUN CC=/root/GCC-11/bin/gcc CXX=/root/GCC-11/bin/g++ FORCE_CUDA=1 PATH=/build/cmake/build/bin:$PATH BUILD_VERSION=0.19.1 TORCH_CUDA_ARCH_LIST="8.7" CUDA_HOME=/usr/local/cuda-11.8 CMAKE_POLICY_VERSION_MINIMUM=3.5 python3.12 setup.py bdist_wheel +RUN python3.12 -m pip install dist/torchvision-*.whl +RUN cp dist/torchvision-*.whl /build/out/wheels/ + +FROM nvcr.io/nvidia/l4t-ml:r35.2.1-py3 AS target + +RUN apt-get update -y && apt-get install -y \ + libssl-dev \ + git \ + unzip \ + libbz2-dev \ + libssl-dev \ + libsqlite3-dev \ + zlib1g-dev \ + liblzma-dev + +RUN apt remove -y 'libnvinfer*' 'libnvonnxparsers*' 'libnvparsers*' 'libnvinfer-plugin*' 'python3-libnvinfer*' 'tensorrt*' 'uff-converter*' 'graphsurgeon*' + + +COPY --from=builder /root/GCC-11 /opt/gcc-11 +COPY --from=builder /build/out/wheels /compiled_python_packages +COPY --from=builder /usr/include /usr/include +COPY --from=builder /usr/lib /usr/lib +COPY --from=builder /usr/share /usr/share +COPY --from=builder /usr/src /usr/src +COPY --from=builder /usr/local/bin /usr/local/bin +COPY --from=builder /usr/local/include /usr/local/include +COPY --from=builder /usr/local/lib /usr/local/lib +COPY --from=builder /usr/local/share /usr/local/share +COPY --from=builder /usr/local/cuda-11.8 /usr/local/cuda-11.8 +RUN rm /etc/alternatives/cuda /etc/alternatives/cuda-11 +RUN ln -s /usr/local/cuda-11.8 /etc/alternatives/cuda +RUN ln -s /usr/local/cuda-11.8 /etc/alternatives/cuda-11 +RUN rm -rf /usr/local/cuda-11.4 +ENV LD_LIBRARY_PATH="/opt/gcc-11/lib64:$$LD_LIBRARY_PATH" + + +RUN update-alternatives --install /usr/bin/python python /usr/local/bin/python3.12 1 +RUN update-alternatives --install /usr/bin/python3 python3 /usr/local/bin/python3.12 1 + +# Install OpenCV +RUN mkdir -p /build/opencv +WORKDIR /build/opencv +RUN curl -L https://github.com/opencv/opencv/archive/4.12.0.zip -o opencv-4.12.0.zip +RUN curl -L https://github.com/opencv/opencv_contrib/archive/4.12.0.zip -o opencv_contrib-4.12.0.zip +RUN unzip opencv-4.12.0.zip +RUN unzip opencv_contrib-4.12.0.zip +WORKDIR /build/opencv/opencv-4.12.0 +RUN mkdir release +WORKDIR /build/opencv/opencv-4.12.0/release +RUN cmake -D WITH_CUDA=ON -D WITH_CUDNN=ON -D CUDA_ARCH_BIN="8.7" -D CUDA_ARCH_PTX="" -D OPENCV_GENERATE_PKGCONFIG=ON -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib-4.12.0/modules -D WITH_GSTREAMER=ON -D WITH_LIBV4L=ON -D BUILD_opencv_python3=ON -D BUILD_TESTS=OFF -D BUILD_PERF_TESTS=OFF -D BUILD_EXAMPLES=OFF -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D PYTHON3_INCLUDE_DIR=/usr/local/include/python3.12 -D OPENCV_PYTHON3_INSTALL_PATH=/usr/local/lib/python3.12/site-packages -D PYTHON3_EXECUTABLE=/usr/local/bin/python3.12 -D PYTHON_VERSION=312 -DBUILD_SHARED_LIBS=OFF -DWITH_OPENCLAMDFFT=OFF -DWITH_OPENCLAMDBLAS=OFF -DWITH_VA_INTEL=OFF .. +RUN make -j$(nproc) +RUN make install +RUN python3.12 -m pip wheel ./python_loader --wheel-dir /build/out/wheels --verbose +RUN python3.12 -m pip install /build/out/wheels/opencv-4.12.0-py3-none-any.whl + +WORKDIR / + +ENTRYPOINT ["bash"] diff --git a/inference_experimental/dockerfiles/jp61.cu126.base.dockerfile b/inference_experimental/dockerfiles/jp61.cu126.base.dockerfile index 6126d85175..ff92b0cf48 100644 --- a/inference_experimental/dockerfiles/jp61.cu126.base.dockerfile +++ b/inference_experimental/dockerfiles/jp61.cu126.base.dockerfile @@ -3,6 +3,7 @@ FROM nvcr.io/nvidia/l4t-jetpack:r36.4.0 ARG DEBIAN_FRONTEND=noninteractive ENV LANG=en_US.UTF-8 +RUN chmod 1777 /tmp RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ lshw \ diff --git a/inference_experimental/inference_exp/models/common/onnx.py b/inference_experimental/inference_exp/models/common/onnx.py index c17203971c..1d40f0681a 100644 --- a/inference_experimental/inference_exp/models/common/onnx.py +++ b/inference_experimental/inference_exp/models/common/onnx.py @@ -281,8 +281,14 @@ def run_session_via_iobinding( if pre_allocated_output is not None: result.append(pre_allocated_output) continue - dlpack_tensor = bound_output._ortvalue.to_dlpack() - out_tensor = torch.utils.dlpack.from_dlpack(dlpack_tensor) + # This is added for the sake of true compatibility with older builds of onnxruntime + # which do not support zero-copy OrtValue -> torch.Tensor thanks top dlpack + if not hasattr(bound_output._ortvalue, "to_dlpack"): + # slower but needed :( + out_tensor = torch.from_numpy(bound_output._ortvalue.numpy()).to(device) + else: + dlpack_tensor = bound_output._ortvalue.to_dlpack() + out_tensor = torch.utils.dlpack.from_dlpack(dlpack_tensor) result.append(out_tensor) return result diff --git a/inference_experimental/pyproject.toml b/inference_experimental/pyproject.toml index 851392cf0c..66dfc142fd 100644 --- a/inference_experimental/pyproject.toml +++ b/inference_experimental/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "inference-exp" -version = "0.16.3" +version = "0.16.4" description = "Experimental vresion of inference package which is supposed to evolve into inference 1.0" readme = "README.md" requires-python = ">=3.9,<3.13" diff --git a/inference_experimental/uv.lock b/inference_experimental/uv.lock index 2e971a919d..0b20099192 100644 --- a/inference_experimental/uv.lock +++ b/inference_experimental/uv.lock @@ -1991,7 +1991,7 @@ wheels = [ [[package]] name = "inference-exp" -version = "0.16.3" +version = "0.16.4" source = { virtual = "." } dependencies = [ { name = "accelerate" }, diff --git a/requirements/_requirements.txt b/requirements/_requirements.txt index 535e9c390d..52ed81eb51 100644 --- a/requirements/_requirements.txt +++ b/requirements/_requirements.txt @@ -26,7 +26,8 @@ pydantic-settings<2.8 openai>=1.12.0,<2.0.0 structlog>=24.1.0,<25.0.0 zxing-cpp~=2.2.0 -boto3<=1.35.60 +boto3>=1.40.0,<=1.41.5 +botocore>=1.40.0,<=1.41.5 typing_extensions>=4.8.0,<=4.12.2 pydot~=2.0.0 shapely>=2.0.4,<2.1.0