fix(api,robot-server): let CSVParameter dialect sniffer see entire CSV file (#20245)

ddcc4 · web-flow · commit 7c675c134d81 · 2025-12-02T13:10:01.000-05:00
# Overview A customer reported that a parameterized protocol works with a short CSV file but fails with their longer CSV file: https://opentrons.slack.com/archives/C389UCULX/p1763376654727119 The root cause seems to be that in `parse_as_csv()`, we were truncating the CSV file to 1024 bytes before passing it to the `csv.Sniffer`. If that chops up a line at an inopportune place, it would cause the sniffer to fail with `Could not determine delimiter`. We should just pass the whole CSV file to the sniffer. From our meeting this morning, we said we would fix this in `edge` rather than trying to get it into RS 8.8.0. ## Test Plan and Hands on Testing Added a test case with a long CSV file derived from the customer report. ## Risk assessment Low. I guess there's a small risk that there could be some horrifying junk in the CSV file after the first 1024 bytes, that the sniffer previously wouldn't see, that would now be visible to the sniffer. But I think overall, it's more correct to let the sniffer see the whole file, rather than arbitrarily cutting it off at 1024 bytes.
diff --git a/api/src/opentrons/protocols/parameters/csv_parameter_interface.py b/api/src/opentrons/protocols/parameters/csv_parameter_interface.py
@@ -70,7 +70,7 @@ def parse_as_csv(
         rows: List[List[str]] = []
         if detect_dialect:
             try:
-                dialect = csv.Sniffer().sniff(self.contents[:1024])
+                dialect = csv.Sniffer().sniff(self.contents)
                 reader = csv.reader(self.contents.split("\n"), dialect, **kwargs)
             except (UnicodeDecodeError, csv.Error):
                 raise ParameterValueError(
diff --git a/api/tests/opentrons/protocols/parameters/test_csv_parameter_interface.py b/api/tests/opentrons/protocols/parameters/test_csv_parameter_interface.py
@@ -45,6 +45,23 @@ def csv_file_different_delimiter() -> bytes:
     return b"x:y:z\na,:1,:2\nb,:3,:4\nc,:5,:6"
 
 
+@pytest.fixture()
+def csv_file_long() -> bytes:
+    """A long CSV file from a customer that caused the sniffer to fail when it only looked at the first 1024 bytes."""
+    return b"""
+Source Labware,Source Slot,Source Well,Source Height,Dest Labware,Dest Slot,Dest Well,Volume
+opentrons_10_tuberack_falcon_4x50ml_6x15ml_conical,0,A1,0,opentrons_24_aluminumblock_nest_0.5ml_screwcap,0,A1,100
+opentrons_10_tuberack_falcon_4x50ml_6x15ml_conical,0,A1,0,opentrons_24_aluminumblock_nest_0.5ml_screwcap,0,A1,100
+opentrons_10_tuberack_falcon_4x50ml_6x15ml_conical,0,A1,0,opentrons_24_aluminumblock_nest_0.5ml_screwcap,0,A1,100
+opentrons_10_tuberack_falcon_4x50ml_6x15ml_conical,0,A1,0,opentrons_24_aluminumblock_nest_0.5ml_screwcap,0,A1,100
+opentrons_10_tuberack_falcon_4x50ml_6x15ml_conical,0,A1,0,opentrons_24_aluminumblock_nest_0.5ml_screwcap,0,A1,100
+opentrons_10_tuberack_falcon_4x50ml_6x15ml_conical,0,A1,0,opentrons_24_aluminumblock_nest_0.5ml_screwcap,0,A1,100
+opentrons_10_tuberack_falcon_4x50ml_6x15ml_conical,0,A1,0,opentrons_24_aluminumblock_nest_0.5ml_screwcap,0,A1,100
+opentrons_10_tuberack_falcon_4x50ml_6x15ml_conical,0,A1,0,opentrons_24_aluminumblock_nest_0.5ml_screwcap,0,A1,100
+opentrons_10_tuberack_falcon_4x50ml_6x15ml_conical,0,A1,0,opentrons_24_aluminumblock_nest_0.5ml_screwcap,0,A1,100
+""".strip()
+
+
 @pytest.fixture
 def csv_file_basic_trailing_empty() -> Tuple[bytes, List[List[str]]]:
     """A basic CSV file with quotes around strings and a trailing newline."""
@@ -102,6 +119,19 @@ def test_csv_parameter(
     assert subject.parse_as_csv()[0] == ["x", "y", "z"]
 
 
+def test_csv_parameter_long_file(
+    decoy: Decoy, api_version: APIVersion, csv_file_long: bytes
+) -> None:
+    """It should detect the CSV dialect for files of unlimited length."""
+    # The previous implementation of parse_as_csv() passed only the first 1024 bytes of
+    # the CSV file to the dialect sniffer, chopping up a line an unfortunate position,
+    # causing the sniffer to fail with "Could not determine delimiter".
+    subject = CSVParameter(csv_file_long, api_version)
+    parsed_rows = subject.parse_as_csv()
+    assert len(parsed_rows) == 10
+    assert len(parsed_rows[0]) == 8
+
+
 @pytest.mark.parametrize(
     "csv_file",
     [