format

balthazur · balthazur · commit 98d25ed33dc7 · 2025-11-19T08:56:42.000+01:00
diff --git a/inference/core/interfaces/webrtc_worker/entities.py b/inference/core/interfaces/webrtc_worker/entities.py
@@ -39,7 +39,9 @@ class WebRTCWorkerRequest(BaseModel):
     data_output: Optional[List[str]] = Field(default=None)
     declared_fps: Optional[float] = None
     rtsp_url: Optional[str] = None
-    use_data_channel_frames: bool = False  # When True, expect frames via data channel instead of media track
+    use_data_channel_frames: bool = (
+        False  # When True, expect frames via data channel instead of media track
+    )
     processing_timeout: Optional[int] = WEBRTC_MODAL_FUNCTION_TIME_LIMIT
     processing_session_started: Optional[datetime.datetime] = None
     requested_plan: Optional[str] = "webrtc-gpu-small"
diff --git a/inference/core/interfaces/webrtc_worker/webrtc.py b/inference/core/interfaces/webrtc_worker/webrtc.py
@@ -70,64 +70,65 @@ def create_chunked_binary_message(
     frame_id: int, chunk_index: int, total_chunks: int, payload: bytes
 ) -> bytes:
     """Create a binary message with standard 12-byte header.
-    
+
     Format: [frame_id: 4][chunk_index: 4][total_chunks: 4][payload: N]
     All integers are uint32 little-endian.
     """
-    header = struct.pack('<III', frame_id, chunk_index, total_chunks)
+    header = struct.pack("<III", frame_id, chunk_index, total_chunks)
     return header + payload
 
 
 def parse_chunked_binary_message(message: bytes) -> Tuple[int, int, int, bytes]:
     """Parse a binary message with standard 12-byte header.
-    
+
     Returns: (frame_id, chunk_index, total_chunks, payload)
     """
     if len(message) < 12:
         raise ValueError(f"Message too short: {len(message)} bytes (expected >= 12)")
-    
-    frame_id, chunk_index, total_chunks = struct.unpack('<III', message[0:12])
+
+    frame_id, chunk_index, total_chunks = struct.unpack("<III", message[0:12])
     payload = message[12:]
     return frame_id, chunk_index, total_chunks, payload
 
 
 class ChunkReassembler:
     """Helper to reassemble chunked binary messages."""
-    
+
     def __init__(self):
-        self._chunks: Dict[int, Dict[int, bytes]] = {}  # {frame_id: {chunk_index: data}}
+        self._chunks: Dict[int, Dict[int, bytes]] = (
+            {}
+        )  # {frame_id: {chunk_index: data}}
         self._total: Dict[int, int] = {}  # {frame_id: total_chunks}
-    
+
     def add_chunk(
         self, frame_id: int, chunk_index: int, total_chunks: int, chunk_data: bytes
     ) -> Optional[bytes]:
         """Add a chunk and return complete payload if all chunks received.
-        
+
         Returns:
             Complete reassembled payload bytes if all chunks received, None otherwise.
         """
         # Initialize buffers for new frame
         if frame_id not in self._chunks:
             self._chunks[frame_id] = {}
             self._total[frame_id] = total_chunks
-        
+
         # Store chunk
         self._chunks[frame_id][chunk_index] = chunk_data
-        
+
         # Check if all chunks received
         if len(self._chunks[frame_id]) >= total_chunks:
             # Reassemble in order
-            complete_payload = b''.join(
-                self._chunks[frame_id][i] 
-                for i in range(total_chunks)
+            complete_payload = b"".join(
+                self._chunks[frame_id][i] for i in range(total_chunks)
             )
-            
+
             # Clean up
             del self._chunks[frame_id]
             del self._total[frame_id]
-            
+
             return complete_payload
-        
+
         return None
 
 
@@ -138,7 +139,7 @@ def send_chunked_data(
     chunk_size: int = CHUNK_SIZE,
 ) -> None:
     """Send payload via data channel, automatically chunking if needed.
-    
+
     Args:
         data_channel: RTCDataChannel to send on
         frame_id: Frame identifier
@@ -148,19 +149,21 @@ def send_chunked_data(
     if data_channel.readyState != "open":
         logger.warning(f"Cannot send response for frame {frame_id}, channel not open")
         return
-    
-    total_chunks = (len(payload_bytes) + chunk_size - 1) // chunk_size  # Ceiling division
-    
+
+    total_chunks = (
+        len(payload_bytes) + chunk_size - 1
+    ) // chunk_size  # Ceiling division
+
     if frame_id % 100 == 1:
         logger.info(
             f"Sending response for frame {frame_id}: {total_chunks} chunk(s), {len(payload_bytes)} bytes"
         )
-    
+
     for chunk_index in range(total_chunks):
         start = chunk_index * chunk_size
         end = min(start + chunk_size, len(payload_bytes))
         chunk_data = payload_bytes[start:end]
-        
+
         message = create_chunked_binary_message(
             frame_id, chunk_index, total_chunks, chunk_data
         )
@@ -209,7 +212,9 @@ def __init__(
         self._stop_processing = False
         self.use_data_channel_frames = use_data_channel_frames
         self._data_frame_queue: "asyncio.Queue[Optional[VideoFrame]]" = asyncio.Queue()
-        self._chunk_reassembler = ChunkReassembler()  # For reassembling inbound frame chunks
+        self._chunk_reassembler = (
+            ChunkReassembler()
+        )  # For reassembling inbound frame chunks
 
         self.has_video_track = has_video_track
         self.stream_output = stream_output
@@ -294,7 +299,7 @@ async def _send_data_output(
 
         if self._data_mode == DataOutputMode.NONE:
             # Even empty responses use binary protocol
-            json_bytes = json.dumps(webrtc_output.model_dump()).encode('utf-8')
+            json_bytes = json.dumps(webrtc_output.model_dump()).encode("utf-8")
             send_chunked_data(self.data_channel, self._received_frames, json_bytes)
             return
 
@@ -331,46 +336,48 @@ async def _send_data_output(
             webrtc_output.serialized_output_data = serialized_outputs
 
         # Send using binary chunked protocol
-        json_bytes = json.dumps(webrtc_output.model_dump(mode="json")).encode('utf-8')
+        json_bytes = json.dumps(webrtc_output.model_dump(mode="json")).encode("utf-8")
         send_chunked_data(self.data_channel, self._received_frames, json_bytes)
 
     async def _handle_data_channel_frame(self, message: bytes) -> None:
         """Handle incoming binary frame chunk from upstream_frames data channel.
-        
+
         Uses standard binary protocol with 12-byte header + JPEG chunk payload.
         """
         try:
             # Parse message
-            frame_id, chunk_index, total_chunks, jpeg_chunk = parse_chunked_binary_message(message)
-            
+            frame_id, chunk_index, total_chunks, jpeg_chunk = (
+                parse_chunked_binary_message(message)
+            )
+
             # Add chunk and check if complete
             jpeg_bytes = self._chunk_reassembler.add_chunk(
                 frame_id, chunk_index, total_chunks, jpeg_chunk
             )
-            
+
             if jpeg_bytes is None:
                 # Still waiting for more chunks
                 return
-            
+
             # All chunks received - decode and queue frame
             if frame_id % 100 == 1:
                 logger.info(
                     f"Received frame {frame_id}: {total_chunks} chunk(s), {len(jpeg_bytes)} bytes JPEG"
                 )
-            
+
             nparr = np.frombuffer(jpeg_bytes, np.uint8)
             np_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
-            
+
             if np_image is None:
                 logger.error(f"Failed to decode JPEG for frame {frame_id}")
                 return
-            
+
             video_frame = VideoFrame.from_ndarray(np_image, format="bgr24")
             await self._data_frame_queue.put((frame_id, video_frame))
-            
+
             if frame_id % 100 == 1:
                 logger.info(f"Queued frame {frame_id}")
-            
+
         except Exception as e:
             logger.error(f"Error handling frame chunk: {e}", exc_info=True)
 
@@ -384,7 +391,9 @@ async def process_frames_data_only(self):
             av_logging.set_libav_level(av_logging.ERROR)
             self._av_logging_set = True
 
-        logger.info(f"Starting data-only frame processing (use_data_channel_frames={self.use_data_channel_frames})")
+        logger.info(
+            f"Starting data-only frame processing (use_data_channel_frames={self.use_data_channel_frames})"
+        )
 
         try:
             while not self._stop_processing:
@@ -404,7 +413,7 @@ async def process_frames_data_only(self):
                     # Get frame from media track (existing behavior)
                     if not self.track or self.track.readyState == "ended":
                         break
-                    
+
                     # Drain queue if using PlayerStreamTrack (RTSP)
                     if isinstance(self.track, PlayerStreamTrack):
                         while self.track._queue.qsize() > 30:
@@ -843,16 +852,18 @@ def on_datachannel(channel: RTCDataChannel):
 
         # Handle upstream frames channel (client sending frames to server)
         if channel.label == "upstream_frames":
-            logger.info("Upstream frames channel established, starting data-only processing")
-            
+            logger.info(
+                "Upstream frames channel established, starting data-only processing"
+            )
+
             @channel.on("message")
             def on_frame_message(message):
                 asyncio.create_task(video_processor._handle_data_channel_frame(message))
-            
+
             # Start processing immediately since we won't get a media track
             if webrtc_request.use_data_channel_frames and not should_send_video:
                 asyncio.create_task(video_processor.process_frames_data_only())
-            
+
             return
 
         # Handle inference control channel (bidirectional communication)