Skip to content

Commit a186a02

Browse files
author
example
committed
add additional test scripts
1 parent b38638a commit a186a02

File tree

4 files changed

+124
-0
lines changed

4 files changed

+124
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,5 @@ wheels/
1313
jobs.db
1414

1515
logs/
16+
17+
.DS_Store

cli_transcribe.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import argparse
2+
import json
3+
import os
4+
import whisper
5+
6+
def transcribe_file(input_file, output_file):
7+
# Load the Whisper model
8+
model = whisper.load_model("base")
9+
10+
# Transcribe the audio file
11+
result = model.transcribe(input_file, verbose=False)
12+
13+
# Prepare the output JSON
14+
transcription_data = {
15+
"transcription": result["text"],
16+
"language": result["language"],
17+
"segments": [
18+
{
19+
"text": segment["text"],
20+
"start": segment["start"],
21+
"end": segment["end"],
22+
}
23+
for segment in result["segments"]
24+
],
25+
}
26+
27+
# Write the JSON to the output file
28+
with open(output_file, "w") as f:
29+
json.dump(transcription_data, f, indent=4)
30+
31+
print(f"Transcription saved to {output_file}")
32+
33+
if __name__ == "__main__":
34+
parser = argparse.ArgumentParser(description="Transcribe an audio file and save the result as JSON.")
35+
parser.add_argument("input_file", help="Path to the input audio file.")
36+
parser.add_argument("output_file", help="Path to the output JSON file.")
37+
38+
args = parser.parse_args()
39+
40+
if not os.path.exists(args.input_file):
41+
print(f"Error: Input file {args.input_file} does not exist.")
42+
exit(1)
43+
44+
transcribe_file(args.input_file, args.output_file)

test.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import requests
2+
import json
3+
import time
4+
5+
audio_path = ".data/test.mp3"
6+
url = "http://localhost:8002/start"
7+
8+
with open(audio_path, "rb") as f:
9+
files = {"audio": f}
10+
response = requests.post(url, files=files)
11+
12+
if response.status_code != 200:
13+
print("Failed to start transcription:", response.text)
14+
exit(1)
15+
16+
job_id = response.json()["job_id"]
17+
print("Job started:", job_id)
18+
19+
# Poll for status
20+
status_url = f"http://localhost:8002/status/{job_id}"
21+
22+
while True:
23+
status_response = requests.get(status_url)
24+
if status_response.status_code != 200:
25+
print("Failed to get status:", status_response.text)
26+
exit(1)
27+
status_data = status_response.json()
28+
if status_data["status"] == "completed":
29+
print("Transcription completed.")
30+
with open(".data/example.json", "w") as out:
31+
json.dump(status_data["result"], out, indent=2)
32+
print("Transcript saved to .data/example.json")
33+
break
34+
elif status_data["status"] == "failed":
35+
print("Transcription failed:", status_data.get("error"))
36+
break
37+
else:
38+
print("Status:", status_data["status"])
39+
time.sleep(2)

test_cli_transcribe.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import os
2+
import json
3+
import subprocess
4+
import unittest
5+
6+
class TestCliTranscribe(unittest.TestCase):
7+
def setUp(self):
8+
self.input_file = ".data/test-cli.mp3"
9+
self.output_file = "test_output.json"
10+
11+
def tearDown(self):
12+
if os.path.exists(self.output_file):
13+
os.remove(self.output_file)
14+
15+
def test_transcription_output(self):
16+
# Run the CLI script
17+
result = subprocess.run(
18+
["python", "cli_transcribe.py", self.input_file, self.output_file],
19+
capture_output=True,
20+
text=True
21+
)
22+
23+
# Check if the script ran successfully
24+
self.assertEqual(result.returncode, 0, f"CLI failed with error: {result.stderr}")
25+
26+
# Check if the output file was created
27+
self.assertTrue(os.path.exists(self.output_file), "Output file was not created.")
28+
29+
# Validate the JSON structure of the output file
30+
with open(self.output_file, "r") as f:
31+
data = json.load(f)
32+
33+
self.assertIn("transcription", data, "Missing 'transcription' key in output JSON.")
34+
self.assertIn("language", data, "Missing 'language' key in output JSON.")
35+
self.assertIn("segments", data, "Missing 'segments' key in output JSON.")
36+
self.assertIsInstance(data["segments"], list, "'segments' should be a list.")
37+
38+
if __name__ == "__main__":
39+
unittest.main()

0 commit comments

Comments
 (0)