@@ -18,6 +18,9 @@ from functools import wraps
1818from sys import byteorder
1919import sys
2020cimport cython
21+ from cython.operator cimport dereference as deref
22+ from cpython.object cimport PyObject_Hash
23+ from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_GET_SIZE
2124from cpython.version cimport PY_VERSION_HEX
2225from libc.stdint cimport int64_t
2326
@@ -31,7 +34,13 @@ from warnings import warn
3134from weakref import WeakSet
3235
3336from line_profiler._diagnostics import (
34- WRAP_TRACE, SET_FRAME_LOCAL_TRACE, USE_LEGACY_TRACE)
37+ WRAP_TRACE, SET_FRAME_LOCAL_TRACE, USE_LEGACY_TRACE
38+ )
39+
40+ from ._map_helpers cimport (
41+ last_erase_if_present, line_ensure_entry, LastTime, LastTimeMap,
42+ LineTime, LineTimeMap
43+ )
3544
3645
3746NOP_VALUE: int = opcode.opmap[' NOP' ]
@@ -69,6 +78,7 @@ cdef extern from "Python_wrapper.h":
6978 ctypedef struct PyObject
7079 ctypedef struct PyCodeObject
7180 ctypedef struct PyFrameObject
81+ ctypedef Py_ssize_t Py_hash_t
7282 ctypedef long long PY_LONG_LONG
7383 ctypedef int (* Py_tracefunc)(
7484 object self , PyFrameObject * py_frame, int what, PyObject * arg)
@@ -93,6 +103,12 @@ cdef extern from "Python_wrapper.h":
93103 cdef int PyFrame_GetLineNumber(PyFrameObject * frame)
94104 cdef void Py_XDECREF(PyObject * o)
95105
106+ cdef unsigned long PyThread_get_thread_ident()
107+
108+ ctypedef PyCodeObject * PyCodeObjectPtr
109+ # ctypedef unordered_map[int64, LastTime] LastTimeMap
110+ # ctypedef unordered_map[int64, LineTime] LineTimeMap
111+
96112cdef extern from " c_trace_callbacks.c" : # Legacy tracing
97113 ctypedef unsigned long long Py_uintptr_t
98114
@@ -114,18 +130,18 @@ cdef extern from "timers.c":
114130 PY_LONG_LONG hpTimer()
115131 double hpTimerUnit()
116132
117- cdef struct LineTime:
118- int64 code
119- int lineno
120- PY_LONG_LONG total_time
121- long nhits
133+ # cdef struct LineTime:
134+ # int64 code
135+ # int lineno
136+ # PY_LONG_LONG total_time
137+ # long nhits
122138
123- cdef struct LastTime:
124- int f_lineno
125- PY_LONG_LONG time
139+ # cdef struct LastTime:
140+ # int f_lineno
141+ # PY_LONG_LONG time
126142
127143
128- cdef inline int64 compute_line_hash(uint64 block_hash, uint64 linenum):
144+ cdef inline int64 compute_line_hash(uint64 block_hash, uint64 linenum) noexcept :
129145 """
130146 Compute the hash used to store each line timing in an unordered_map.
131147 This is fairly simple, and could use some improvement since linenum
@@ -298,7 +314,7 @@ cpdef _copy_local_sysmon_events(old_code, new_code):
298314 return new_code
299315
300316
301- cpdef int _patch_events(int events, int before, int after):
317+ cpdef int _patch_events(int events, int before, int after) noexcept :
302318 """
303319 Patch ``events`` based on the differences between ``before`` and
304320 ``after``.
@@ -434,7 +450,7 @@ cdef class _SysMonitoringState:
434450 mon.register_callback(self .tool_id, * wrapped_callbacks.popitem())
435451
436452 cdef void call_callback(self , int event_id, object code,
437- object loc_args, object other_args):
453+ object loc_args, object other_args) noexcept :
438454 """
439455 Call the appropriate stored callback. Also take care of the
440456 restoration of :py:mod:`sys.monitoring` callbacks, tool-ID lock,
@@ -550,7 +566,7 @@ sys.monitoring.html#monitoring-event-RERAISE
550566 """
551567 cdef TraceCallback * legacy_callback
552568 cdef _SysMonitoringState mon_state
553- cdef public object active_instances # type: set[LineProfiler]
569+ cdef public set active_instances # type: set[LineProfiler]
554570 cdef int _wrap_trace
555571 cdef int _set_frame_local_trace
556572 cdef int recursion_guard
@@ -732,7 +748,7 @@ sys.monitoring.html#monitoring-event-RERAISE
732748 sys.monitoring.events.RERAISE, code, instruction_offset, exception)
733749
734750 cdef void _handle_exit_event(
735- self , int event_id, object code, int offset, object obj):
751+ self , int event_id, object code, int offset, object obj) noexcept :
736752 """
737753 Base for the frame-exit-event (e.g. via returning or yielding)
738754 callbacks passed to :py:func:`sys.monitoring.register_callback`.
@@ -746,7 +762,7 @@ sys.monitoring.html#monitoring-event-RERAISE
746762
747763 cdef void _base_callback(
748764 self , int is_line_event, int event_id, object code, int lineno,
749- object loc_args, object other_args):
765+ object loc_args, object other_args) noexcept :
750766 """
751767 Base for the various callbacks passed to
752768 :py:func:`sys.monitoring.register_callback`.
@@ -1030,9 +1046,9 @@ cdef class LineProfiler:
10301046 .. _"legacy" trace system: https://github.com/python/cpython/blob/\
103110473.13/Python/legacy_tracing.c
10321048 """
1033- cdef unordered_map[int64, unordered_map[int64, LineTime] ] _c_code_map
1049+ cdef unordered_map[int64, LineTimeMap ] _c_code_map
10341050 # Mapping between thread-id and map of LastTime
1035- cdef unordered_map[int64, unordered_map[int64, LastTime] ] _c_last_time
1051+ cdef unordered_map[int64, LastTimeMap ] _c_last_time
10361052 cdef public list functions
10371053 cdef public dict code_hash_map, dupes_map
10381054 cdef public double timer_unit
@@ -1239,7 +1255,7 @@ datamodel.html#user-defined-functions
12391255
12401256 property _manager :
12411257 def __get__ (self ):
1242- thread_id = threading.get_ident ()
1258+ thread_id = PyThread_get_thread_ident ()
12431259 try :
12441260 return self ._managers[thread_id]
12451261 except KeyError :
@@ -1303,7 +1319,7 @@ datamodel.html#user-defined-functions
13031319 If no profiling data is available on the current thread.
13041320 """
13051321 try :
1306- return (< dict > self ._c_last_time)[threading.get_ident ()]
1322+ return (< dict > self ._c_last_time)[PyThread_get_thread_ident ()]
13071323 except KeyError as e:
13081324 # We haven't actually profiled anything yet
13091325 raise (KeyError (' No profiling data on the current thread '
@@ -1349,7 +1365,7 @@ datamodel.html#user-defined-functions
13491365 return py_last_time
13501366
13511367 cpdef disable(self ):
1352- self ._c_last_time[threading.get_ident ()].clear()
1368+ self ._c_last_time[PyThread_get_thread_ident ()].clear()
13531369 self ._manager._handle_disable_event(self )
13541370
13551371 def get_stats (self ):
@@ -1387,56 +1403,79 @@ datamodel.html#user-defined-functions
13871403
13881404@ cython.boundscheck (False )
13891405@ cython.wraparound (False )
1390- cdef inline inner_trace_callback(
1391- int is_line_event, object instances, object code, int lineno):
1406+ cdef inline void inner_trace_callback(
1407+ int is_line_event, set instances, object code, int lineno):
13921408 """
13931409 The basic building block for the trace callbacks.
13941410 """
1395- cdef object prof_
1396- cdef object bytecode = code.co_code
1411+ cdef LineProfiler prof_
13971412 cdef LineProfiler prof
13981413 cdef LastTime old
13991414 cdef int key
1400- cdef PY_LONG_LONG time
1401- cdef int has_time = 0
1415+ cdef PY_LONG_LONG time = 0
1416+ cdef bint has_time = False
1417+ cdef bint has_last
14021418 cdef int64 code_hash
1403- cdef int64 block_hash
1404- cdef unordered_map[int64, LineTime] line_entries
1405-
1406- if any (bytecode):
1407- block_hash = hash (bytecode)
1408- else : # Cython functions have empty/zero bytecodes
1419+ cdef object py_bytes_obj = code.co_code
1420+ cdef char * data = PyBytes_AS_STRING(py_bytes_obj)
1421+ cdef Py_ssize_t size = PyBytes_GET_SIZE(py_bytes_obj)
1422+ cdef unsigned long ident
1423+ cdef Py_hash_t block_hash
1424+ cdef LineTime* entry
1425+ cdef LineTimeMap* line_entries
1426+ cdef LastTimeMap* last_map
1427+
1428+ # Loop over every byte to check if any are not NULL
1429+ # if there are any non-NULL, that indicates we're profiling Python code
1430+ for i in range (size):
1431+ if data[i]:
1432+ # because we use Python functions like hash, we CANNOT mark this function as nogil
1433+ block_hash = hash (py_bytes_obj)
1434+ break
1435+ else :
1436+ # fallback for Cython functions
14091437 block_hash = hash (code)
1438+
14101439 code_hash = compute_line_hash(block_hash, lineno)
14111440
14121441 for prof_ in instances:
1442+ # for some reason, doing this is much faster than just combining it into the above
1443+ # like doing "for prof in instances:" is far slower
14131444 prof = < LineProfiler> prof_
14141445 if not prof._c_code_map.count(code_hash):
14151446 continue
14161447 if not has_time:
14171448 time = hpTimer()
1418- has_time = 1
1419- ident = threading.get_ident()
1420- if prof._c_last_time[ident].count(block_hash):
1421- old = prof._c_last_time[ident][block_hash]
1422- line_entries = prof._c_code_map[code_hash]
1423- key = old.f_lineno
1424- if not line_entries.count(key):
1425- prof._c_code_map[code_hash][key] = LineTime(
1426- code_hash, key, 0 , 0 )
1427- prof._c_code_map[code_hash][key].nhits += 1
1428- prof._c_code_map[code_hash][key].total_time += time - old.time
1449+ has_time = True
1450+ ident = PyThread_get_thread_ident()
1451+ last_map = & (prof._c_last_time[ident])
1452+ # deref() is Cython's version of the -> accessor in C++. if we don't use deref then
1453+ # Cython thinks that when we index last_map,
1454+ # we want pointer indexing (which is not the case)
1455+ if deref(last_map).count(block_hash):
1456+ old = deref(last_map)[block_hash]
1457+ line_entries = & (prof._c_code_map[code_hash])
1458+ # Ensure that an entry exists in line_entries before accessing it
1459+ entry = line_ensure_entry(line_entries, old.f_lineno, code_hash)
1460+ # Note: explicitly `deref()`-ing here causes the new values
1461+ # to be assigned to a temp var;
1462+ # meanwhile, directly dot-accessing a pointer causes Cython
1463+ # to correctly write `ptr->attr = (ptr->attr + incr)`
1464+ entry.nhits += 1
1465+ entry.total_time += time - old.time
1466+ has_last = True
1467+ else :
1468+ has_last = False
14291469 if is_line_event:
14301470 # Get the time again. This way, we don't record much time
14311471 # wasted in this function.
1432- prof._c_last_time[ident] [block_hash] = LastTime(lineno, hpTimer())
1433- elif prof._c_last_time[ident] .count(block_hash):
1472+ deref(last_map) [block_hash] = LastTime(lineno, hpTimer())
1473+ elif deref(last_map) .count(block_hash):
14341474 # We are returning from a function, not executing a line.
14351475 # Delete the last_time record. It may have already been
14361476 # deleted if we are profiling a generator that is being
14371477 # pumped past its end.
1438- prof._c_last_time[ident].erase(
1439- prof._c_last_time[ident].find(block_hash))
1478+ last_erase_if_present(last_map, block_hash)
14401479
14411480
14421481cdef extern int legacy_trace_callback(
0 commit comments