Fix ORT pipeline (#2274)

echarlaix · web-flow · commit 92c178d2bf96 · 2025-05-16T16:04:40.000+02:00
* fix pipeline

* add test

* add test

* add test

* only test for targeted architecture
diff --git a/optimum/pipelines/pipelines_base.py b/optimum/pipelines/pipelines_base.py
@@ -244,6 +244,7 @@ def load_ort_pipeline(
         model_id = SUPPORTED_TASKS[targeted_task]["default"]
         model = SUPPORTED_TASKS[targeted_task]["class"][0].from_pretrained(model_id, export=True)
     elif isinstance(model, str):
+        model_id = model
         model = SUPPORTED_TASKS[targeted_task]["class"][0].from_pretrained(
             model, revision=revision, subfolder=subfolder, token=token, **model_kwargs
         )
diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
@@ -2762,8 +2762,32 @@ def test_pipeline_ort_model(self, test_name: str, model_arch: str, use_cache: bo
         self.assertIsInstance(outputs[0]["generated_text"], str)
         self.assertTrue(len(outputs[0]["generated_text"]) > len(text))
 
+        if model_arch == "llama":
+            with tempfile.TemporaryDirectory() as tmpdir:
+                pipe.save_pretrained(tmpdir)
+                model_kwargs = {"use_cache": use_cache, "use_io_binding": use_io_binding}
+                pipe = pipeline(
+                    "text-generation",
+                    model=tmpdir,
+                    model_kwargs=model_kwargs,
+                    accelerator="ort",
+                )
+                outputs_local_model = pipe(text)
+                self.assertEqual(outputs[0]["generated_text"], outputs_local_model[0]["generated_text"])
+
         gc.collect()
 
+    def test_load_pipeline(self):
+        pipe = pipeline(
+            "text-generation",
+            model="optimum-internal-testing/tiny-random-llama",
+            revision="onnx",
+            accelerator="ort",
+        )
+
+        outputs = pipe("this is an example input")
+        self.assertIsInstance(outputs[0]["generated_text"], str)
+
     @pytest.mark.run_in_series
     def test_pipeline_model_is_none(self):
         pipe = pipeline("text-generation")
@@ -4152,8 +4176,30 @@ def test_pipeline_text_generation(self, test_name: str, model_arch: str, use_cac
             self.assertEqual(pipe.device, onnx_model.device)
             self.assertIsInstance(outputs[0]["translation_text"], str)
 
+            if model_arch == "t5":
+                with tempfile.TemporaryDirectory() as tmpdir:
+                    pipe.save_pretrained(tmpdir)
+                    model_kwargs = {"use_cache": use_cache}
+                    pipe = pipeline(
+                        "translation_en_to_de",
+                        model=tmpdir,
+                        model_kwargs=model_kwargs,
+                        accelerator="ort",
+                    )
+                    outputs_local_model = pipe(text)
+                    self.assertEqual(outputs[0]["translation_text"], outputs_local_model[0]["translation_text"])
+
         gc.collect()
 
+    def test_load_pipeline(self):
+        pipe = pipeline(
+            "text2text-generation",
+            model="echarlaix/t5-small-onnx",
+            accelerator="ort",
+        )
+        outputs = pipe("this is an example input")
+        self.assertIsInstance(outputs[0]["generated_text"], str)
+
     @pytest.mark.run_in_series
     def test_pipeline_model_is_none(self):
         # Text2text generation

Original file line number	Diff line number	Diff line change
`@@ -244,6 +244,7 @@ def load_ort_pipeline(`
`244`	`244`	`model_id = SUPPORTED_TASKS[targeted_task]["default"]`
`245`	`245`	`model = SUPPORTED_TASKS[targeted_task]["class"][0].from_pretrained(model_id, export=True)`
`246`	`246`	`elif isinstance(model, str):`
	`247`	`+ model_id = model`
`247`	`248`	`model = SUPPORTED_TASKS[targeted_task]["class"][0].from_pretrained(`
`248`	`249`	`model, revision=revision, subfolder=subfolder, token=token, **model_kwargs`
`249`	`250`	`)`