Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 159 additions & 0 deletions crates/prof/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,157 @@ impl MetricDb {
}
}

/// Generate a Mermaid XY chart for GPU memory usage over modules
pub fn generate_gpu_memory_chart(&self) -> Option<String> {
// (timestamp, tracked_gb, reserved_gb, device_gb)
let mut data: Vec<(f64, f64, f64, f64)> = Vec::new();
// module -> [(delta_gb, tracked_gb, context_label)]
let mut module_stats: HashMap<String, Vec<(f64, f64, String)>> = HashMap::new();

for (label_keys, metrics_dict) in &self.dict_by_label_types {
let module_idx = match label_keys.iter().position(|k| k == "module") {
Some(idx) => idx,
None => continue,
};

for (label_values, metrics) in metrics_dict {
let get = |name: &str| metrics.iter().find(|m| m.name == name).map(|m| m.value);
let ts = get("gpu_mem.timestamp_ms");
let delta = get("gpu_mem.delta_bytes");
let tracked = get("gpu_mem.tracked_bytes");
let reserved = get("gpu_mem.reserved_bytes");
let device = get("gpu_mem.device_bytes");

if let (Some(ts), Some(tracked), Some(reserved), Some(device)) =
(ts, tracked, reserved, device)
{
let tracked_gb = tracked / 1e9;
let reserved_gb = reserved / 1e9;
let device_gb = device / 1e9;
data.push((ts, tracked_gb, reserved_gb, device_gb));

let module_name = label_values.get(module_idx).cloned().unwrap_or_default();
let context_label: String = label_keys
.iter()
.zip(label_values.iter())
.filter(|(k, _)| *k != "module")
.map(|(_, v)| v.as_str())
.collect::<Vec<_>>()
.join(".");

let delta_gb = delta.map(|d| d / 1e9).unwrap_or(0.0);
module_stats.entry(module_name).or_default().push((
delta_gb,
tracked_gb,
context_label,
));
}
}
}

if data.is_empty() {
return None;
}

data.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));

let max_tracked = data.iter().map(|(_, t, _, _)| *t).fold(0.0_f64, f64::max);
let max_reserved = data.iter().map(|(_, _, r, _)| *r).fold(0.0_f64, f64::max);
let max_device = data.iter().map(|(_, _, _, d)| *d).fold(0.0_f64, f64::max);
let chart_max = max_tracked.max(max_reserved).max(max_device);

let mut chart = String::new();
chart.push_str("```mermaid\n");
chart.push_str("---\n");
chart.push_str("config:\n");
chart.push_str(" xyChart:\n");
chart.push_str(" xAxis:\n");
chart.push_str(" showLabel: false\n");
chart.push_str(" themeVariables:\n");
chart.push_str(" xyChart:\n");
chart.push_str(" plotColorPalette: \"#2563eb, #16a34a, #dc2626\"\n");
chart.push_str("---\n");
chart.push_str("xychart-beta\n");
chart.push_str(" title \"GPU Memory Usage\"\n");
chart.push_str(&format!(
" y-axis \"Memory (GB)\" 0 --> {:.1}\n",
chart_max * 1.1
));
// Tracked memory line (blue)
chart.push_str(" line [");
chart.push_str(
&data
.iter()
.map(|(_, tracked, _, _)| format!("{:.2}", tracked))
.collect::<Vec<_>>()
.join(", "),
);
chart.push_str("]\n");
// Reserved memory line (green)
chart.push_str(" line [");
chart.push_str(
&data
.iter()
.map(|(_, _, reserved, _)| format!("{:.2}", reserved))
.collect::<Vec<_>>()
.join(", "),
);
chart.push_str("]\n");
// Device memory line (red)
chart.push_str(" line [");
chart.push_str(
&data
.iter()
.map(|(_, _, _, device)| format!("{:.2}", device))
.collect::<Vec<_>>()
.join(", "),
);
chart.push_str("]\n");
chart.push_str("```\n");

chart.push_str("\n> **Legend:** ");
chart.push_str("🔵 Tracked | ");
chart.push_str("🟢 Reserved | ");
chart.push_str("🔴 Device\n");
chart.push_str(&format!("\n**Tracked Max: {:.2} GB**\n", max_tracked));
chart.push_str(&format!("**Reserved Max: {:.2} GB**\n", max_reserved));
chart.push_str(&format!("**Device Max: {:.2} GB**\n", max_device));

// Per-module stats table
chart.push_str(
"\n| Module | Avg Delta (GB) | Max Delta (GB) | Max Tracked (GB) | Max Tracked At |\n",
);
chart.push_str("| --- | ---: | ---: | ---: | --- |\n");

let mut module_rows: Vec<_> = module_stats
.iter()
.map(|(module, entries)| {
let avg_delta =
entries.iter().map(|(d, _, _)| *d).sum::<f64>() / entries.len() as f64;
let max_delta = entries
.iter()
.map(|(d, _, _)| *d)
.fold(f64::NEG_INFINITY, f64::max);
let (max_tracked, max_at) = entries
.iter()
.max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
.map(|(_, t, label)| (*t, label.as_str()))
.unwrap_or((0.0, ""));
(module, avg_delta, max_delta, max_tracked, max_at)
})
.collect();
module_rows.sort_by(|a, b| b.3.partial_cmp(&a.3).unwrap_or(std::cmp::Ordering::Equal));

for (module, avg_delta, max_delta, max_tracked, max_at) in module_rows {
chart.push_str(&format!(
"| {} | {:.3} | {:.3} | {:.2} | {} |\n",
module, avg_delta, max_delta, max_tracked, max_at
));
}

Some(chart)
}

pub fn generate_markdown_tables(&self) -> String {
let mut markdown_output = String::new();
// Get sorted keys to iterate in consistent order
Expand All @@ -148,6 +299,14 @@ impl MetricDb {
.collect();
metric_names.sort_by(|a, b| b.cmp(a));

// Filter out gpu_mem metrics - these are summarized in the GPU memory chart
metric_names.retain(|n| !n.starts_with("gpu_mem."));

// Skip tables that have no metrics left after filtering
if metric_names.is_empty() {
continue;
}

// Create table header
let header = format!(
"| {} | {} |",
Expand Down
7 changes: 7 additions & 0 deletions crates/prof/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@ fn main() -> Result<()> {

let mut markdown_output = String::from_utf8(writer)?;

// Add GPU memory chart if available
if let Some(chart) = db.generate_gpu_memory_chart() {
markdown_output.push_str("\n## GPU Memory Usage\n\n");
markdown_output.push_str(&chart);
markdown_output.push('\n');
}

// TODO: calculate diffs for detailed metrics
// Add detailed metrics in a collapsible section
markdown_output.push_str("\n<details>\n<summary>Detailed Metrics</summary>\n\n");
Expand Down
7 changes: 6 additions & 1 deletion crates/vm/src/system/cuda/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use openvm_cuda_common::{
use openvm_stark_backend::{
p3_field::FieldAlgebra, p3_util::log2_ceil_usize, prover::types::AirProvingContext, Chip,
};
use tracing::instrument;

use super::{
access_adapters::AccessAdapterInventoryGPU,
Expand Down Expand Up @@ -85,8 +86,9 @@ impl MemoryInventoryGPU {
self.persistent.is_some()
}

#[instrument(name = "set_initial_memory", skip_all)]
pub fn set_initial_memory(&mut self, initial_memory: &AddressMap) {
let _mem = MemTracker::start("set initial memory");
let mem = MemTracker::start("set initial memory");
let persistent = self
.persistent
.as_mut()
Expand Down Expand Up @@ -126,8 +128,10 @@ impl MemoryInventoryGPU {
.collect();
}
}
mem.emit_metrics();
}

#[instrument(name = "generate_proving_ctxs", skip_all)]
pub fn generate_proving_ctxs(
&mut self,
access_adapter_arena: DenseRecordArena,
Expand Down Expand Up @@ -229,6 +233,7 @@ impl MemoryInventoryGPU {
self.access_adapters
.generate_air_proving_ctxs(access_adapter_arena),
);
mem.emit_metrics();
ret
}
}
Expand Down
2 changes: 1 addition & 1 deletion extensions/rv32im/circuit/cuda/src/mulh.cu
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ extern "C" int _mulh_tracegen(
assert(height >= d_records.len());
assert(width == sizeof(MulHCols<uint8_t>));

auto [grid, block] = kernel_launch_params(height);
auto [grid, block] = kernel_launch_params(height, 512);

mulh_tracegen<<<grid, block>>>(
d_trace,
Expand Down
Loading