Skip to content

Commit 3c986f8

Browse files
committed
first pass
1 parent 7bff945 commit 3c986f8

File tree

3 files changed

+268
-5
lines changed

3 files changed

+268
-5
lines changed

src/native/build.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
fn main() {
2+
pyo3_build_config::use_pyo3_cfgs();
23
//NOTE(@dmehala): PyO3 doesn't link to `libpython` on MacOS.
34
// This set the correct linker arguments for the platform.
45
// Source: <https://pyo3.rs/main/building-and-distribution.html#macos>

src/native/crashtracker.rs

Lines changed: 191 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
11
use anyhow;
2+
#[cfg(unix)]
3+
use std::cmp;
24
use std::collections::HashMap;
35
use std::ffi::{c_char, c_int, c_void};
46
use std::ptr;
7+
#[cfg(unix)]
8+
use std::slice;
9+
#[cfg(unix)]
10+
use std::sync::atomic::AtomicBool;
511
use std::sync::atomic::{AtomicU8, Ordering};
612
use std::sync::Once;
713
use std::time::Duration;
814

915
use libdd_common::Endpoint;
1016
use libdd_crashtracker::{
11-
register_runtime_stacktrace_string_callback, CrashtrackerConfiguration,
12-
CrashtrackerReceiverConfig, Metadata, StacktraceCollection,
17+
register_runtime_frame_callback, register_runtime_stacktrace_string_callback,
18+
CrashtrackerConfiguration, CrashtrackerReceiverConfig, Metadata, RuntimeStackFrame,
19+
StacktraceCollection,
1320
};
1421
use pyo3::prelude::*;
1522

@@ -29,8 +36,11 @@ static DUMP_TRACEBACK_INIT: std::sync::Once = std::sync::Once::new();
2936
extern "C" {
3037
fn pipe(pipefd: *mut [c_int; 2]) -> c_int;
3138
fn read(fd: c_int, buf: *mut c_void, count: usize) -> isize;
39+
fn write(fd: c_int, buf: *const c_void, count: usize) -> isize;
3240
fn close(fd: c_int) -> c_int;
3341
fn fcntl(fd: c_int, cmd: c_int, arg: c_int) -> c_int;
42+
#[cfg(unix)]
43+
fn PyThreadState_Next(prev: *mut pyo3_ffi::PyThreadState) -> *mut pyo3_ffi::PyThreadState;
3444
}
3545

3646
pub trait RustWrapper {
@@ -273,8 +283,19 @@ pub fn crashtracker_init<'py>(
273283
unsafe {
274284
init_dump_traceback_fn();
275285
}
276-
if let Err(e) = register_runtime_stacktrace_string_callback(native_runtime_stack_callback) {
277-
eprintln!("Failed to register runtime callback: {}", e);
286+
let dump_fn_available = unsafe { get_cached_dump_traceback_fn().is_some() };
287+
if dump_fn_available {
288+
if let Err(e) =
289+
register_runtime_stacktrace_string_callback(
290+
native_runtime_stack_string_callback,
291+
)
292+
{
293+
eprintln!("Failed to register runtime stacktrace callback: {}", e);
294+
}
295+
} else if let Err(e) =
296+
register_runtime_frame_callback(native_runtime_stack_frame_callback)
297+
{
298+
eprintln!("Failed to register runtime frame callback: {}", e);
278299
}
279300
}
280301
match libdd_crashtracker::init(config, receiver_config, metadata) {
@@ -333,6 +354,140 @@ pub fn crashtracker_receiver() -> anyhow::Result<()> {
333354

334355
const MAX_TRACEBACK_SIZE: usize = 8 * 1024; // 8KB
335356

357+
#[cfg(unix)]
358+
const FRAME_FUNCTION_CAP: usize = 256;
359+
#[cfg(unix)]
360+
const FRAME_FILE_CAP: usize = 512;
361+
#[cfg(unix)]
362+
const FRAME_TYPE_CAP: usize = 256;
363+
364+
// AIDEV-NOTE: Python runtime frames are captured on-demand inside the crashtracking callback;
365+
// this guard prevents concurrent re-entry.
366+
#[cfg(unix)]
367+
static FRAME_COLLECTION_GUARD: AtomicBool = AtomicBool::new(false);
368+
369+
#[cfg(unix)]
370+
unsafe fn capture_frames_via_python(emit_frame: unsafe extern "C" fn(&RuntimeStackFrame)) {
371+
let mut emitted = false;
372+
373+
let current = pyo3_ffi::PyThreadState_Get();
374+
375+
if !current.is_null() {
376+
let _ = collect_and_emit_frames_for_thread(current, emit_frame);
377+
}
378+
}
379+
380+
#[cfg(unix)]
381+
unsafe fn collect_and_emit_frames_for_thread(
382+
tstate: *mut pyo3_ffi::PyThreadState,
383+
emit_frame: unsafe extern "C" fn(&RuntimeStackFrame),
384+
) -> bool {
385+
if tstate.is_null() {
386+
return false;
387+
}
388+
389+
let mut emitted = false;
390+
let mut frame = thread_top_frame(tstate);
391+
392+
while !frame.is_null() {
393+
if emit_python_frame(frame, emit_frame) {
394+
emitted = true;
395+
}
396+
frame = advance_frame(frame);
397+
}
398+
399+
emitted
400+
}
401+
402+
unsafe fn thread_top_frame(tstate: *mut pyo3_ffi::PyThreadState) -> *mut pyo3_ffi::PyFrameObject {
403+
if tstate.is_null() {
404+
ptr::null_mut()
405+
} else {
406+
let frame = pyo3_ffi::PyThreadState_GetFrame(tstate);
407+
#[cfg(not(Py_3_11))]
408+
{
409+
if !frame.is_null() {
410+
pyo3_ffi::Py_XINCREF(frame as *mut pyo3_ffi::PyObject);
411+
}
412+
}
413+
frame
414+
}
415+
}
416+
417+
unsafe fn advance_frame(frame: *mut pyo3_ffi::PyFrameObject) -> *mut pyo3_ffi::PyFrameObject {
418+
if frame.is_null() {
419+
return ptr::null_mut();
420+
}
421+
let back = pyo3_ffi::PyFrame_GetBack(frame);
422+
pyo3_ffi::Py_DecRef(frame as *mut pyo3_ffi::PyObject);
423+
back
424+
}
425+
426+
#[cfg(unix)]
427+
unsafe fn emit_python_frame(
428+
frame: *mut pyo3_ffi::PyFrameObject,
429+
emit_frame: unsafe extern "C" fn(&RuntimeStackFrame),
430+
) -> bool {
431+
if frame.is_null() {
432+
return false;
433+
}
434+
435+
let mut file = get_code_attr_utf8(frame, b"co_filename\0");
436+
if file.len() > FRAME_FILE_CAP {
437+
file.truncate(FRAME_FILE_CAP);
438+
}
439+
440+
let mut function = get_code_attr_utf8(frame, b"co_name\0");
441+
if function.len() > FRAME_FUNCTION_CAP {
442+
function.truncate(FRAME_FUNCTION_CAP);
443+
}
444+
let line_number = pyo3_ffi::PyFrame_GetLineNumber(frame);
445+
446+
let runtime_frame = RuntimeStackFrame {
447+
line: if line_number < 0 {
448+
0
449+
} else {
450+
line_number as u32
451+
},
452+
column: 0,
453+
function: function.as_slice(),
454+
file: file.as_slice(),
455+
type_name: &[],
456+
};
457+
458+
emit_frame(&runtime_frame);
459+
true
460+
}
461+
462+
#[cfg(unix)]
463+
unsafe fn get_code_attr_utf8(frame: *mut pyo3_ffi::PyFrameObject, attr: &[u8]) -> Vec<u8> {
464+
let code_obj = pyo3_ffi::PyFrame_GetCode(frame) as *mut pyo3_ffi::PyObject;
465+
if code_obj.is_null() {
466+
return Vec::new();
467+
}
468+
let attr_obj = pyo3_ffi::PyObject_GetAttrString(code_obj, attr.as_ptr() as *const c_char);
469+
pyo3_ffi::Py_DecRef(code_obj);
470+
if attr_obj.is_null() {
471+
return Vec::new();
472+
}
473+
let data = py_unicode_to_vec(attr_obj);
474+
pyo3_ffi::Py_DecRef(attr_obj);
475+
data
476+
}
477+
478+
#[cfg(unix)]
479+
unsafe fn py_unicode_to_vec(obj: *mut pyo3_ffi::PyObject) -> Vec<u8> {
480+
if obj.is_null() {
481+
return Vec::new();
482+
}
483+
let mut size: pyo3_ffi::Py_ssize_t = 0;
484+
let data = pyo3_ffi::PyUnicode_AsUTF8AndSize(obj, &mut size);
485+
if data.is_null() || size <= 0 {
486+
return Vec::new();
487+
}
488+
slice::from_raw_parts(data as *const u8, size as usize).to_vec()
489+
}
490+
336491
// Attempt to resolve _Py_DumpTracebackThreads at runtime
337492
// Try to link once during registration
338493
unsafe fn init_dump_traceback_fn() {
@@ -438,8 +593,39 @@ unsafe fn dump_python_traceback_as_string(
438593
emit_stacktrace_string("<traceback_read_failed>\0".as_ptr() as *const c_char);
439594
}
440595

441-
unsafe extern "C" fn native_runtime_stack_callback(
596+
unsafe fn dump_python_traceback_as_frames(emit_frame: unsafe extern "C" fn(&RuntimeStackFrame)) {
597+
#[cfg(unix)]
598+
{
599+
if emit_frame as usize == 0 {
600+
return;
601+
}
602+
603+
if FRAME_COLLECTION_GUARD
604+
.compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst)
605+
.is_err()
606+
{
607+
return;
608+
}
609+
610+
capture_frames_via_python(emit_frame);
611+
612+
FRAME_COLLECTION_GUARD.store(false, Ordering::SeqCst);
613+
}
614+
615+
#[cfg(not(unix))]
616+
{
617+
let _ = emit_frame;
618+
}
619+
}
620+
621+
unsafe extern "C" fn native_runtime_stack_string_callback(
442622
emit_stacktrace_string: unsafe extern "C" fn(*const c_char),
443623
) {
444624
dump_python_traceback_as_string(emit_stacktrace_string);
445625
}
626+
627+
unsafe extern "C" fn native_runtime_stack_frame_callback(
628+
emit_frame: unsafe extern "C" fn(&RuntimeStackFrame),
629+
) {
630+
dump_python_traceback_as_frames(emit_frame);
631+
}

test_echion_crashtracker.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Test script to verify Echion-based stack walking in crashtracker.
4+
5+
This script:
6+
1. Enables crashtracking with runtime stacks
7+
2. Disables _Py_DumpTracebackThreads to force Echion fallback
8+
3. Triggers a crash to test stack capture
9+
"""
10+
11+
import os
12+
import sys
13+
import signal
14+
import time
15+
import ctypes
16+
17+
def nested_function_3():
18+
"""Deepest function that will trigger a crash"""
19+
print("In nested_function_3 - about to crash!")
20+
# Trigger a segmentation fault
21+
ctypes.string_at(0)
22+
23+
def nested_function_2():
24+
"""Middle function"""
25+
print("In nested_function_2")
26+
nested_function_3()
27+
28+
def nested_function_1():
29+
"""Top-level function"""
30+
print("In nested_function_1")
31+
nested_function_2()
32+
33+
def main():
34+
print("Setting up crashtracker test...")
35+
36+
# Enable crashtracking with runtime stacks
37+
os.environ['DD_CRASHTRACKING_ENABLED'] = 'true'
38+
os.environ['DD_CRASHTRACKING_EMIT_RUNTIME_STACKS'] = 'true'
39+
40+
# Import and initialize crashtracker
41+
try:
42+
import ddtrace
43+
from ddtrace.internal.core import crashtracking
44+
45+
# Start crashtracking
46+
if crashtracking.start():
47+
print("Crashtracker started successfully")
48+
else:
49+
print("Failed to start crashtracker")
50+
return 1
51+
52+
# Give it a moment to initialize
53+
time.sleep(0.5)
54+
55+
print("\nNow triggering a crash with nested function calls...")
56+
print("Expected stack trace should show:")
57+
print(" - main()")
58+
print(" - nested_function_1()")
59+
print(" - nested_function_2()")
60+
print(" - nested_function_3()")
61+
print()
62+
63+
# Call the nested functions that will crash
64+
nested_function_1()
65+
66+
except ImportError as e:
67+
print(f"Failed to import ddtrace: {e}")
68+
print("Make sure dd-trace-py is built and installed")
69+
return 1
70+
71+
# Should not reach here
72+
print("ERROR: Should have crashed by now!")
73+
return 1
74+
75+
if __name__ == "__main__":
76+
sys.exit(main())

0 commit comments

Comments
 (0)