Skip to content

Commit 0815882

Browse files
committed
feat: add gallery examples registry mapping examples to datasets
Adds cross-ecosystem registry cataloging ~470 examples from Vega, Vega-Lite, and Altair galleries, tracking which datasets each example uses. New files: - _data/gallery_examples.toml: Configuration (URLs, Altair name mappings) - scripts/generate_gallery_examples.py: Generator (2,289 lines, fully typed) - gallery_examples.json: Generated output (~470 examples) When joined with datapackage.json, enables: - Dataset-first learning (find all examples using specific dataset) - Curation analytics (dataset coverage matrices, gap analysis) - High-quality training data for visualization AI/ML systems Examples are curated by the Vega community to demonstrate essential visualization techniques and design patterns. Implementation details: - Handles different spec formats per framework (Vega, Vega-Lite, Altair) - Normalizes all references to canonical datapackage.json names - Altair deduplication: Uses method-based syntax (preferred as of Altair 5) when examples exist in both syntax directories (116 cases) - Temporary name mappings for Altair API (3 mappings, will be removed after Altair PR #3859 lands) - Comprehensive type safety with TypedDict, Protocols, semantic type aliases - Protocol-based validation infrastructure for extensibility Runtime: ~15 seconds to collect all examples Quality: All checks pass (taplo, ruff, pyright, npm build)
1 parent 552ecc5 commit 0815882

File tree

3 files changed

+8759
-0
lines changed

3 files changed

+8759
-0
lines changed

_data/gallery_examples.toml

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Configuration for gallery_examples collection script
2+
# This file externalizes URLs, mappings, and settings to make maintenance easier.
3+
4+
# ============================================================================
5+
# Altair Dataset Name Mappings
6+
# ============================================================================
7+
#
8+
# Manual mappings for Altair Python API names that don't match datapackage.json
9+
# canonical names. These mappings are needed because:
10+
#
11+
# 1. Altair uses camelCase API names (e.g., data.londonBoroughs.url)
12+
# 2. The datapackage.json uses snake_case canonical names (e.g., london_boroughs)
13+
# 3. Some Altair API names don't include file extensions
14+
#
15+
# TEMPORARY: This section will eventually be removed once:
16+
# - The dataset name mapper includes extension-less variants (see ALTAIR_APPROACH_COMPARISON_RESULTS.md)
17+
# - OR Altair completes its migration to match vega-datasets naming conventions
18+
#
19+
# Format: altair_api_name = "canonical_datapackage_name"
20+
21+
[altair.name_mapping]
22+
londonBoroughs = "london_boroughs"
23+
londonCentroids = "london_centroids"
24+
londonTubeLines = "london_tube_lines"
25+
# Add more mappings here as they are discovered
26+
# Example for icon datasets:
27+
# "7zip" = "icon_7zip"
28+
# "github" = "icon_github"
29+
30+
# ============================================================================
31+
# Data Source URLs
32+
# ============================================================================
33+
#
34+
# URLs for fetching gallery metadata and dataset catalog.
35+
# All URLs point to the main/master branch for stable releases.
36+
37+
[sources]
38+
# Vega-datasets canonical dataset catalog
39+
datapackage_url = "https://raw.githubusercontent.com/vega/vega-datasets/main/datapackage.json"
40+
41+
# Vega-Lite gallery examples metadata
42+
vega_lite_examples_url = "https://raw.githubusercontent.com/vega/vega-lite/main/site/_data/examples.json"
43+
44+
# Vega gallery examples metadata
45+
vega_examples_url = "https://raw.githubusercontent.com/vega/vega/main/docs/_data/examples.json"
46+
47+
# Altair example directories
48+
# The script fetches Python files from both syntax styles
49+
altair_examples_dirs = [
50+
"tests/examples_methods_syntax",
51+
"tests/examples_arguments_syntax",
52+
]
53+
54+
# ============================================================================
55+
# Output Configuration
56+
# ============================================================================
57+
#
58+
# Default output settings for the generated JSON file.
59+
60+
[output]
61+
# Default output file path (relative to repository root)
62+
# Can be overridden with --output CLI argument
63+
default_output_path = "gallery_examples.json"
64+
65+
# Dry run mode (doesn't write output file)
66+
# Can be overridden with --dry-run CLI flag
67+
dry_run = false
68+
69+
# ============================================================================
70+
# Network Settings
71+
# ============================================================================
72+
#
73+
# HTTP request configuration for fetching remote resources.
74+
75+
[network]
76+
# Timeout in seconds for HTTP requests
77+
# Used for fetching metadata files and individual example specifications
78+
timeout = 30
79+
80+
# Maximum number of retries for failed requests (future use)
81+
# Currently not implemented, but reserved for potential retry logic
82+
max_retries = 3

0 commit comments

Comments
 (0)