add additional test scripts

example · example · commit a186a02a3d2a · 2025-07-14T18:11:27.000-04:00
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,5 @@ wheels/
 jobs.db
 
 logs/
+
+.DS_Store
diff --git a/cli_transcribe.py b/cli_transcribe.py
@@ -0,0 +1,44 @@
+import argparse
+import json
+import os
+import whisper
+
+def transcribe_file(input_file, output_file):
+    # Load the Whisper model
+    model = whisper.load_model("base")
+
+    # Transcribe the audio file
+    result = model.transcribe(input_file, verbose=False)
+
+    # Prepare the output JSON
+    transcription_data = {
+        "transcription": result["text"],
+        "language": result["language"],
+        "segments": [
+            {
+                "text": segment["text"],
+                "start": segment["start"],
+                "end": segment["end"],
+            }
+            for segment in result["segments"]
+        ],
+    }
+
+    # Write the JSON to the output file
+    with open(output_file, "w") as f:
+        json.dump(transcription_data, f, indent=4)
+
+    print(f"Transcription saved to {output_file}")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Transcribe an audio file and save the result as JSON.")
+    parser.add_argument("input_file", help="Path to the input audio file.")
+    parser.add_argument("output_file", help="Path to the output JSON file.")
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.input_file):
+        print(f"Error: Input file {args.input_file} does not exist.")
+        exit(1)
+
+    transcribe_file(args.input_file, args.output_file)
diff --git a/test.py b/test.py
@@ -0,0 +1,39 @@
+import requests
+import json
+import time
+
+audio_path = ".data/test.mp3"
+url = "http://localhost:8002/start"
+
+with open(audio_path, "rb") as f:
+    files = {"audio": f}
+    response = requests.post(url, files=files)
+
+if response.status_code != 200:
+    print("Failed to start transcription:", response.text)
+    exit(1)
+
+job_id = response.json()["job_id"]
+print("Job started:", job_id)
+
+# Poll for status
+status_url = f"http://localhost:8002/status/{job_id}"
+
+while True:
+    status_response = requests.get(status_url)
+    if status_response.status_code != 200:
+        print("Failed to get status:", status_response.text)
+        exit(1)
+    status_data = status_response.json()
+    if status_data["status"] == "completed":
+        print("Transcription completed.")
+        with open(".data/example.json", "w") as out:
+            json.dump(status_data["result"], out, indent=2)
+        print("Transcript saved to .data/example.json")
+        break
+    elif status_data["status"] == "failed":
+        print("Transcription failed:", status_data.get("error"))
+        break
+    else:
+        print("Status:", status_data["status"])
+        time.sleep(2)
diff --git a/test_cli_transcribe.py b/test_cli_transcribe.py
@@ -0,0 +1,39 @@
+import os
+import json
+import subprocess
+import unittest
+
+class TestCliTranscribe(unittest.TestCase):
+    def setUp(self):
+        self.input_file = ".data/test-cli.mp3"
+        self.output_file = "test_output.json"
+
+    def tearDown(self):
+        if os.path.exists(self.output_file):
+            os.remove(self.output_file)
+
+    def test_transcription_output(self):
+        # Run the CLI script
+        result = subprocess.run(
+            ["python", "cli_transcribe.py", self.input_file, self.output_file],
+            capture_output=True,
+            text=True
+        )
+
+        # Check if the script ran successfully
+        self.assertEqual(result.returncode, 0, f"CLI failed with error: {result.stderr}")
+
+        # Check if the output file was created
+        self.assertTrue(os.path.exists(self.output_file), "Output file was not created.")
+
+        # Validate the JSON structure of the output file
+        with open(self.output_file, "r") as f:
+            data = json.load(f)
+
+        self.assertIn("transcription", data, "Missing 'transcription' key in output JSON.")
+        self.assertIn("language", data, "Missing 'language' key in output JSON.")
+        self.assertIn("segments", data, "Missing 'segments' key in output JSON.")
+        self.assertIsInstance(data["segments"], list, "'segments' should be a list.")
+
+if __name__ == "__main__":
+    unittest.main()

-Original file line number
+Diff line change
 jobs.db
 logs/
++
 +.DS_Store