roboflow · Jacobsolawetz · Oct 2, 2025 · Oct 6, 2025 · Oct 14, 2025
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 **/__pycache__/**
+rf100-vl.egg-info/**
+rf100-vl/
+rf100vl.egg-info/
diff --git a/ODINW_BENCHMARK_NOTES.md b/ODINW_BENCHMARK_NOTES.md
@@ -0,0 +1,78 @@
+# ODinW Benchmark Notes
+
+## Dataset Structure Quirks
+
+When benchmarking models on ODinW datasets, note the following special cases:
+
+### Directory Structure Special Cases
+
+1. **PascalVOC**
+   - Uses `valid` folder instead of `test` folder for evaluation
+   - Script should automatically use `split="valid"` when dataset is PascalVOC
+
+2. **pistols**
+   - Uses `export` folder as the root directory (not standard train/test/valid)
+   - Contains test split in `/root/odinw/pistols/export/`
+
+### ODinW-13 Datasets
+
+The 13 datasets used in the original GLIP ODinW benchmark:
+1. AerialMaritimeDrone (large variant)
+2. Aquarium
+3. CottontailRabbits
+4. EgoHands (generic variant)
+5. NorthAmericaMushrooms
+6. Packages
+7. PascalVOC ⚠️ uses `valid` split
+8. Raccoon
+9. ShellfishOpenImages
+10. VehiclesOpenImages
+11. pistols ⚠️ uses `export` directory
+12. pothole
+13. thermalDogsAndPeople
+
+### ODinW-35 Datasets
+
+The full 35 dataset benchmark includes:
+- All 13 datasets from ODinW-13
+- 22 additional datasets with various configurations
+- Some datasets have multiple variants (e.g., AerialMaritimeDrone has both `large` and `tiled`)
+
+## Implementation Notes
+
+### Handling Special Cases in Code
+
+```python
+def find_dataset_root(dataset_path):
+    dataset_name = os.path.basename(dataset_path)
+
+    # pistols: use 'export' directory
+    if dataset_name == "pistols":
+        export_dir = os.path.join(dataset_path, "export")
+        if os.path.exists(export_dir):
+            return export_dir
+
+    # PascalVOC: uses 'valid' instead of 'test'
+    if dataset_name == "PascalVOC":
+        if split == "test":
+            split = "valid"
+```
+
+### Zero-Shot Evaluation
+
+For zero-shot evaluation on ODinW:
+- Use class names directly (no "a photo of a" prefix)
+- Threshold: 0.01
+- Model: OWLv2-Large recommended
+
+## Directory Locations
+
+- **ODinW data**: `/root/odinw/`
+- **Predictions**: `/root/predictions/odinw13_owlv2_zeroshot/`
+- **Benchmark script**: `/root/benchmark_odinw_owlv2.py`
+- **Multi-GPU launcher**: `/root/run_odinw13_8gpus.py`
+
+## References
+
+- [GLIP ODinW Paper](https://github.com/microsoft/GLIP)
+- [ODinW Challenge](https://eval.ai/web/challenges/challenge-page/1839/overview)
diff --git a/benchmark_dinox.py b/benchmark_dinox.py
@@ -0,0 +1,199 @@
+import os
+import json
+import time
+from PIL import Image
+from dds_cloudapi_sdk import Config, Client
+from dds_cloudapi_sdk.image_resizer import image_to_base64
+from dds_cloudapi_sdk.tasks.v2_task import V2Task
+from tqdm import tqdm
+import argparse
+
+
+def load_coco_annotations(annotation_path):
+    """Load COCO format annotations and extract categories."""
+    with open(annotation_path, 'r') as f:
+        coco_data = json.load(f)
+
+    # Extract category names
+    categories = {cat['id']: cat['name'] for cat in coco_data['categories']}
+    category_names = [cat['name'] for cat in coco_data['categories']]
+
+    # Extract images
+    images = {img['id']: img for img in coco_data['images']}
+
+    return coco_data, categories, category_names, images
+
+
+def run_dinox_inference(client, image_path, text_prompt, threshold=0.25, iou_threshold=0.8):
+    """Run DINO-X inference on a single image via API."""
+    try:
+        # Convert image to base64
+        image = image_to_base64(image_path)
+    except Exception as e:
+        print(f"Error loading image {image_path}: {e}")
+        return []
+
+    # Prepare API request
+    api_path = "/v2/task/dinox/detection"
+    api_body = {
+        "model": "DINO-X-1.0",
+        "image": image,
+        "prompt": {
+            "type": "text",
+            "text": text_prompt
+        },
+        "targets": ["bbox"],
+        "bbox_threshold": threshold,
+        "iou_threshold": iou_threshold
+    }
+
+    task = V2Task(api_path=api_path, api_body=api_body)
+
+    try:
+        client.run_task(task)
+        result = task.result
+        objects = result.get("objects", [])
+    except Exception as e:
+        print(f"Error running inference on {image_path}: {e}")
+        return []
+
+    # Convert results to COCO format
+    predictions = []
+    for obj in objects:
+        bbox = obj["bbox"]  # Already in COCO format [x, y, width, height]
+        score = obj["score"]
+        category_name = obj["category"].lower().strip()
+
+        predictions.append({
+            "bbox": bbox,
+            "score": score,
+            "category_name": category_name
+        })
+
+    return predictions
+
+
+def benchmark_dataset(dataset_path, output_dir, api_token, threshold=0.25, iou_threshold=0.8, rate_limit_delay=0.5):
+    """Benchmark DINO-X on a single dataset."""
+    dataset_name = os.path.basename(dataset_path)
+    test_dir = os.path.join(dataset_path, "test")
+    annotation_path = os.path.join(test_dir, "_annotations.coco.json")
+
+    if not os.path.exists(annotation_path):
+        print(f"Skipping {dataset_name}: No annotations found at {annotation_path}")
+        return
+
+    print(f"\nProcessing dataset: {dataset_name}")
+
+    # Load annotations
+    coco_data, categories, category_names, images = load_coco_annotations(annotation_path)
+
+    # Create name to id mapping
+    category_name_to_id = {name.lower().strip(): cat_id for cat_id, name in categories.items()}
+
+    # Prepare text prompt in DINO-X format (period-separated)
+    text_prompt = " . ".join(category_names)
+    print(f"Categories: {category_names}")
+    print(f"Text prompt: {text_prompt}")
+
+    # Initialize DINO-X client
+    config = Config(api_token)
+    client = Client(config)
+
+    # Run inference on all images
+    all_predictions = []
+
+    for img_id, img_info in tqdm(images.items(), desc=f"Processing {dataset_name}"):
+        image_path = os.path.join(test_dir, img_info['file_name'])
+
+        if not os.path.exists(image_path):
+            print(f"Warning: Image not found: {image_path}")
+            continue
+
+        predictions = run_dinox_inference(
+            client, image_path, text_prompt, threshold, iou_threshold
+        )
+
+        # Map category names to IDs and add image_id
+        for pred in predictions:
+            category_name = pred["category_name"]
+            if category_name in category_name_to_id:
+                pred['category_id'] = category_name_to_id[category_name]
+                pred['image_id'] = img_id
+                # Remove the category_name field as it's not needed in COCO format
+                del pred['category_name']
+                all_predictions.append(pred)
+            else:
+                print(f"Warning: Category '{category_name}' not found in dataset categories")
+
+        # Rate limiting to avoid API throttling
+        time.sleep(rate_limit_delay)
+
+    # Save predictions in COCO format
+    output_dataset_dir = os.path.join(output_dir, dataset_name)
+    os.makedirs(output_dataset_dir, exist_ok=True)
+    output_path = os.path.join(output_dataset_dir, "predictions.json")
+
+    with open(output_path, 'w') as f:
+        json.dump(all_predictions, f, indent=2)
+
+    print(f"Saved {len(all_predictions)} predictions to {output_path}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Benchmark DINO-X on RF100-VL datasets")
+    parser.add_argument("--rf100_dir", type=str, default="rf100-vl/rf100-vl",
+                        help="Path to RF100-VL datasets directory")
+    parser.add_argument("--output_dir", type=str, default="../predictions/dinox",
+                        help="Directory to save predictions")
+    parser.add_argument("--api_token", type=str, required=True,
+                        help="DINO-X API token")
+    parser.add_argument("--threshold", type=float, default=0.25,
+                        help="Detection confidence threshold")
+    parser.add_argument("--iou_threshold", type=float, default=0.8,
+                        help="IOU threshold for NMS")
+    parser.add_argument("--rate_limit_delay", type=float, default=0.5,
+                        help="Delay between API calls (seconds)")
+    parser.add_argument("--datasets", type=str, nargs='+', default=None,
+                        help="Specific datasets to benchmark (default: all)")
+
+    args = parser.parse_args()
+
+    # Create output directory
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # Get list of datasets
+    if args.datasets:
+        datasets = args.datasets
+    else:
+        datasets = [d for d in os.listdir(args.rf100_dir)
+                   if os.path.isdir(os.path.join(args.rf100_dir, d))]
+
+    print(f"Found {len(datasets)} datasets to process")
+
+    # Process each dataset
+    for dataset_name in datasets:
+        dataset_path = os.path.join(args.rf100_dir, dataset_name)
+
+        if not os.path.isdir(dataset_path):
+            print(f"Skipping {dataset_name}: Not a directory")
+            continue
+
+        try:
+            benchmark_dataset(
+                dataset_path=dataset_path,
+                output_dir=args.output_dir,
+                api_token=args.api_token,
+                threshold=args.threshold,
+                iou_threshold=args.iou_threshold,
+                rate_limit_delay=args.rate_limit_delay
+            )
+        except Exception as e:
+            print(f"Error processing {dataset_name}: {e}")
+            import traceback
+            traceback.print_exc()
+            continue
+
+
+if __name__ == "__main__":
+    main()