Refactor GhostTaskHelper and improve performance benchmarking

kristjanvalur · kristjanvalur · commit 46e967d79b56 · 2025-11-08T16:35:10.000Z
- Simplified GhostTaskHelper to static class design with per-loop task storage
- Removed complex WeakKeyDictionary-based instance management
- Streamlined eager task factory implementation with fewer parameters
- Fixed benchmark eager_start parameter detection using runtime testing
- Added 2-sigma outlier filtering for more accurate performance measurements
- Corrected min/max statistical aggregation in benchmark results
- Updated documentation to remove misleading claims about eager_start parameter
diff --git a/src/asynkit/coroutine.py b/src/asynkit/coroutine.py
@@ -6,7 +6,6 @@
 import inspect
 import sys
 import types
-import weakref
 from asyncio import Future, Task
 from collections.abc import (
     AsyncGenerator,
@@ -731,14 +730,11 @@ async def sync_coro():
 
     Notes:
         - This is a different mechanism from Python 3.12's native eager execution
-          feature. Python 3.12 provides `eager_start=True` parameter for
-          `asyncio.create_task()` and `asyncio.eager_task_factory()`. Our
+          feature. Python 3.12 provides `asyncio.eager_task_factory`. Our
           implementation works on all Python versions but may not always create
           a real Task - synchronous coroutines get a TaskLikeFuture instead.
         - All kwargs from asyncio.create_task() are properly forwarded to the
           inner factory when delegation occurs.
-        - This is experimental functionality that modifies global task creation
-          behavior for the entire event loop.
         - If you want to preserve an existing task factory, explicitly pass it
           as inner_factory rather than relying on automatic detection.
 
@@ -770,23 +766,7 @@ def real_task_factory(coro: Coroutine[Any, Any, Any]) -> asyncio.Task[Any]:
             else:
                 return asyncio.Task(coro, loop=loop, **kwargs)
 
-        ghost_task_getter = get_ghost_task_getter(loop)
-
-        return coro_eager_task_helper(
-            loop, coro, name, context, ghost_task_getter, real_task_factory
-        )
-
-    # cache GhostTaskHelper instances per event loop
-    helpers: weakref.WeakKeyDictionary[asyncio.AbstractEventLoop, GhostTaskHelper] = (
-        weakref.WeakKeyDictionary()
-    )
-
-    def get_ghost_task_getter(
-        loop: asyncio.AbstractEventLoop,
-    ) -> Callable[[], asyncio.Task[Any]]:
-        if loop not in helpers:
-            helpers[loop] = GhostTaskHelper(lambda coro: asyncio.Task(coro, loop=loop))
-        return helpers[loop].get
+        return coro_eager_task_helper(loop, coro, name, context, real_task_factory)
 
     return factory
 
@@ -865,7 +845,6 @@ def real_task_factory(coro_arg: Coroutine[Any, Any, T]) -> asyncio.Task[T]:
             coro,
             name,
             context,
-            default_ghost_task_getter,
             real_task_factory,
         )
 
@@ -889,36 +868,38 @@ class GhostTaskHelper:
     temporary task contexts if creating eager tasks in a non-task context.
     """
 
-    cleanup: set[asyncio.Task[Any]] = set()
-
-    def __init__(
-        self, raw_create: Callable[[Coroutine[Any, Any, Any]], asyncio.Task[Any]]
-    ) -> None:
-        self.raw_create = raw_create
-        self.ghost_task: asyncio.Task[Any] | None = None
+    # this could be a WeakKeyDictionary but we want maximum performance here
+    # and there are unlikely to be many event loops in practice.
+    tasks: dict[asyncio.AbstractEventLoop, asyncio.Task[Any]] = {}
 
-    def get(self) -> asyncio.Task[Any]:
-        if self.ghost_task is None:
-
-            async def ghost_coro() -> None:
-                return None
+    @classmethod
+    def get(
+        cls,
+        loop: asyncio.AbstractEventLoop,
+        raw_create: Callable[[Coroutine[Any, Any, Any]], asyncio.Task[Any]],
+    ) -> asyncio.Task[Any]:
+        """
+        Get the GhostTaskHelper instance for the given event loop.
+        """
+        try:
+            return cls.tasks[loop]
+        except KeyError:
+            cls.tasks[loop] = raw_create(cls.task_coro())
+        return cls.tasks[loop]
 
-            self.ghost_task = self.raw_create(ghost_coro())
-            GhostTaskHelper.cleanup.add(self.ghost_task)
-            self.ghost_task.add_done_callback(GhostTaskHelper.cleanup.discard)
-        return self.ghost_task
+    @classmethod
+    async def task_coro(cls) -> None:
+        pass
 
 
-# default instance for the create_task helper
-default_ghost_task_getter = GhostTaskHelper(_create_task).get
+_get_ghost_task = GhostTaskHelper.get  # for easier access
 
 
 def coro_eager_task_helper(
     loop: asyncio.AbstractEventLoop,
     coro: Coroutine[Any, Any, T],
     name: str | None,
     context: Context | None,
-    get_fake_task: Callable[[], asyncio.Task[Any]],
     real_task_factory: Callable[[Coroutine[Any, Any, T]], asyncio.Task[T]],
 ) -> asyncio.Task[T] | TaskLikeFuture[T]:
     """
@@ -936,38 +917,32 @@ def coro_eager_task_helper(
     """
     # In Python < 3.11, context parameter doesn't exist for create_task()
     # so we ignore any provided context and let CoroStart manage its own
-
     if sys.version_info < (3, 11):
         context = None
 
-    # start the coroutine in the task context
-    def start() -> CoroStart[T]:
-        if context is not None:
-            # Enter the context only for the initial start, then use None for CoroStart
-            # This way the continuation won't try to re-enter the context
-            def start_in_context() -> CoroStart[T]:
-                return CoroStart(coro, context=None)
-
-            cs = context.run(start_in_context)
-        else:
-            # No explicit context - use copy_context() as before
-            cs = CoroStart(coro, context=copy_context())
-        return cs
-
-    current_task = asyncio.current_task(loop)
-    # if there is no current task, then we need a fake task to run it in
-    # this is so that asyncio.get_current_task() returns a valid task during
-    # eager start.  This is not the same task as will be created later. This
-    # is purely to satisfy get_current_task() calls during eager start, such
-    # as for anyio that wants to detect the current async framework.
-
     cs: CoroStart[T]
+    current_task = asyncio.current_task(loop)
     if current_task is not None:
-        cs = start()
+        if context is None:
+            cs = CoroStart(coro, context=copy_context())
+        else:
+            # Enter the context only for the initial start, then use None for CoroStart
+            # This way the continuation won't try to re-enter the context
+            cs = context.run(lambda: CoroStart(coro, context=None))
     else:
-        old = swap_current_task(loop, get_fake_task())
+        # if there is no current task, then we need a fake task to run it in
+        # this is so that asyncio.get_current_task() returns a valid task during
+        # eager start.  This is not the same task as will be created later. This
+        # is purely to satisfy get_current_task() calls during eager start, such
+        # as for anyio that wants to detect the current async framework.
+        old = swap_current_task(loop, _get_ghost_task(loop, real_task_factory))
         try:
-            cs = start()
+            if context is None:
+                cs = CoroStart(coro, context=copy_context())
+            else:
+                # Enter the context only for the initial start, then use None for CoroStart
+                # This way the continuation won't try to re-enter the context
+                cs = context.run(lambda: CoroStart(coro, context=None))
         finally:
             swap_current_task(loop, old)
 
@@ -976,7 +951,6 @@ def start_in_context() -> CoroStart[T]:
     if not cs.done():
         return real_task_factory(cs.as_coroutine())
     else:
-        # Return a TaskLikeFuture wrapping the result
         return TaskLikeFuture(cs, name=name, context=context)
 
 
diff --git a/tests/misc/eager_task_factory_benchmark.py b/tests/misc/eager_task_factory_benchmark.py
@@ -10,7 +10,6 @@
 
 import asyncio
 import contextlib
-import inspect
 import statistics
 import sys
 import time
@@ -19,6 +18,31 @@
 
 import asynkit
 
+
+def filter_outliers(
+    data: list[float], sigma_threshold: float = 4.0
+) -> tuple[list[float], int]:
+    """
+    Filter outliers from data using standard deviation method.
+
+    Returns:
+        tuple: (filtered_data, num_outliers_removed)
+    """
+    if len(data) <= 2:
+        return data, 0
+
+    mean = statistics.mean(data)
+    std_dev = statistics.stdev(data)
+
+    if std_dev == 0:
+        return data, 0
+
+    filtered = [x for x in data if abs(x - mean) <= sigma_threshold * std_dev]
+    num_removed = len(data) - len(filtered)
+
+    return filtered, num_removed
+
+
 # Test parameters
 NUM_TASKS = 100
 NUM_SLEEPS_PER_TASK = 100
@@ -161,13 +185,20 @@ async def run_tests(self) -> dict[str, Any]:
                 adjustment_factor = 1000 if self.is_non_eager else 1
                 adjusted_latencies = [lat / adjustment_factor for lat in latencies]
 
-                # Calculate statistics for this run
+                # Filter outliers before calculating statistics
+                filtered_latencies, num_outliers = filter_outliers(
+                    adjusted_latencies, sigma_threshold=2.0
+                )
+
+                # Calculate statistics for this run (using filtered data)
                 run_latency_stats = {
-                    "mean": statistics.mean(adjusted_latencies) * 1_000_000,
-                    "median": statistics.median(adjusted_latencies) * 1_000_000,
-                    "min": min(adjusted_latencies) * 1_000_000,
-                    "max": max(adjusted_latencies) * 1_000_000,
-                    "std_dev": statistics.stdev(adjusted_latencies) * 1_000_000,
+                    "mean": statistics.mean(filtered_latencies) * 1_000_000,
+                    "median": statistics.median(filtered_latencies) * 1_000_000,
+                    "min": min(filtered_latencies) * 1_000_000,
+                    "max": max(filtered_latencies) * 1_000_000,
+                    "std_dev": statistics.stdev(filtered_latencies) * 1_000_000,
+                    "outliers_removed": num_outliers,
+                    "total_samples": len(adjusted_latencies),
                 }
 
                 # Measure throughput for this run
@@ -176,9 +207,12 @@ async def run_tests(self) -> dict[str, Any]:
                 if is_warmup:
                     print("  Warmup run completed (discarded)")
                 else:
+                    outlier_info = ""
+                    if run_latency_stats["outliers_removed"] > 0:
+                        outlier_info = f" ({run_latency_stats['outliers_removed']} outliers filtered)"
                     print(
                         f"latency {run_latency_stats['mean']:.2f}μs, "
-                        f"throughput {throughput:.0f} ops/s"
+                        f"throughput {throughput:.0f} ops/s{outlier_info}"
                     )
                     all_latency_results.append(run_latency_stats)
                     all_throughput_results.append(throughput)
@@ -200,8 +234,8 @@ async def run_tests(self) -> dict[str, Any]:
                 "median_std": statistics.stdev(median_latencies)
                 if len(median_latencies) > 1
                 else 0,
-                "min": statistics.mean(min_latencies),
-                "max": statistics.mean(max_latencies),
+                "min": min(min_latencies),
+                "max": max(max_latencies),
                 "std_dev": statistics.mean(std_dev_latencies),
                 "runs": len(all_latency_results),
             }
@@ -214,6 +248,17 @@ async def run_tests(self) -> dict[str, Any]:
                 else 0
             )
 
+            # Calculate total outliers filtered
+            total_outliers = sum(
+                result.get("outliers_removed", 0) for result in all_latency_results
+            )
+            total_samples = sum(
+                result.get("total_samples", 0) for result in all_latency_results
+            )
+            outlier_percentage = (
+                (total_outliers / total_samples * 100) if total_samples > 0 else 0
+            )
+
             # Display final results
             print(
                 f"\nFinal Results (averaged over {final_latency_stats['runs']} runs):"
@@ -233,6 +278,10 @@ async def run_tests(self) -> dict[str, Any]:
             print(
                 f"  Throughput: {final_throughput:.0f} ± {throughput_std:.0f} operations/second"
             )
+            if total_outliers > 0:
+                print(
+                    f"  Outliers filtered: {total_outliers}/{total_samples} ({outlier_percentage:.1f}%) using 2σ threshold"
+                )
 
             return {
                 "factory_name": self.factory_name,
@@ -309,15 +358,28 @@ async def measure_throughput(self) -> float:
 
 
 async def compare_eager_start_parameter():
-    """Test Python 3.12's per-task eager_start parameter if available."""
+    """Test Python 3.14's per-task eager_start parameter if available."""
+
+    # Check if eager_start parameter is available by testing it
+    python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+
+    # Test if eager_start actually works (it may be handled via **kwargs)
+    try:
+
+        async def test_coro():
+            return "test"
+
+        # Try creating a task with eager_start parameter
+        task = asyncio.create_task(test_coro(), eager_start=True)
+        await task
+        eager_start_available = True
+    except TypeError:
+        eager_start_available = False
 
-    # Check if eager_start parameter is available
-    sig = inspect.signature(asyncio.create_task)
-    if "eager_start" not in sig.parameters:
-        python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+    if not eager_start_available:
         print(f"\n=== Python {python_version} eager_start Parameter ===")
         print("  eager_start parameter not available in this Python version")
-        print("  (eager_start was added in Python 3.12.0a7+)")
+        print("  (eager_start was added to asyncio.create_task() in Python 3.14)")
         return
 
     python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
diff --git a/tests/test_eager_task_factory.py b/tests/test_eager_task_factory.py