Skip to content

Commit ae4b555

Browse files
committed
[docs] Add Google-style docstrings for dspy/adapters/types/audio.py Audio class #9086
1 parent 02b148e commit ae4b555

File tree

1 file changed

+128
-21
lines changed

1 file changed

+128
-21
lines changed

dspy/adapters/types/audio.py

Lines changed: 128 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,54 @@ def _normalize_audio_format(audio_format: str) -> str:
2323

2424

2525
class Audio(Type):
26+
"""A type for representing audio data in DSPy.
27+
28+
The Audio class provides a standardized way to handle audio inputs for language models
29+
that support audio processing. Audio data is stored as base64-encoded strings along
30+
with format metadata.
31+
32+
Attributes:
33+
data: Base64-encoded audio data.
34+
audio_format: The audio format (e.g., "wav", "mp3", "flac").
35+
36+
Example:
37+
Basic usage with a local file:
38+
39+
```python
40+
import dspy
41+
42+
dspy.configure(lm=dspy.LM("openai/gpt-4o-audio-preview"))
43+
44+
class TranscribeAudio(dspy.Signature):
45+
audio: dspy.Audio = dspy.InputField()
46+
transcription: str = dspy.OutputField()
47+
48+
# Create Audio from a local file
49+
audio = dspy.Audio.from_file("speech.wav")
50+
51+
predict = dspy.Predict(TranscribeAudio)
52+
result = predict(audio=audio)
53+
```
54+
55+
Example:
56+
Creating Audio from different sources:
57+
58+
```python
59+
import dspy
60+
61+
# From a URL
62+
audio = dspy.Audio.from_url("https://example.com/audio.mp3")
63+
64+
# From a local file path (auto-detected)
65+
audio = dspy.Audio("path/to/audio.wav")
66+
67+
# From a numpy array (requires soundfile)
68+
import numpy as np
69+
samples = np.random.randn(16000) # 1 second of audio at 16kHz
70+
audio = dspy.Audio.from_array(samples, sampling_rate=16000)
71+
```
72+
"""
73+
2674
data: str
2775
audio_format: str
2876

@@ -32,18 +80,20 @@ class Audio(Type):
3280
)
3381

3482
def format(self) -> list[dict[str, Any]]:
83+
"""Format the audio data for consumption by language models.
84+
85+
Returns:
86+
A list containing the audio block in the format expected by
87+
audio-enabled language models.
88+
89+
Raises:
90+
ValueError: If the audio data cannot be formatted.
91+
"""
3592
try:
3693
data = self.data
3794
except Exception as e:
3895
raise ValueError(f"Failed to format audio for DSPy: {e}")
39-
return [{
40-
"type": "input_audio",
41-
"input_audio": {
42-
"data": data,
43-
"format": self.audio_format
44-
}
45-
}]
46-
96+
return [{"type": "input_audio", "input_audio": {"data": data, "format": self.audio_format}}]
4797

4898
@pydantic.model_validator(mode="before")
4999
@classmethod
@@ -57,8 +107,20 @@ def validate_input(cls, values: Any) -> Any:
57107

58108
@classmethod
59109
def from_url(cls, url: str) -> "Audio":
60-
"""
61-
Download an audio file from URL and encode it as base64.
110+
"""Create an Audio instance by downloading from a URL.
111+
112+
Downloads the audio file from the specified URL, determines the format
113+
from the Content-Type header, and encodes the content as base64.
114+
115+
Args:
116+
url: The URL of the audio file to download.
117+
118+
Returns:
119+
An Audio instance containing the base64-encoded audio data.
120+
121+
Raises:
122+
ValueError: If the Content-Type is not an audio MIME type.
123+
requests.HTTPError: If the HTTP request fails.
62124
"""
63125
response = requests.get(url)
64126
response.raise_for_status()
@@ -74,8 +136,19 @@ def from_url(cls, url: str) -> "Audio":
74136

75137
@classmethod
76138
def from_file(cls, file_path: str) -> "Audio":
77-
"""
78-
Read local audio file and encode it as base64.
139+
"""Create an Audio instance from a local file.
140+
141+
Reads the audio file from disk, determines the format from the file
142+
extension, and encodes the content as base64.
143+
144+
Args:
145+
file_path: The path to the local audio file.
146+
147+
Returns:
148+
An Audio instance containing the base64-encoded audio data.
149+
150+
Raises:
151+
ValueError: If the file does not exist or has an unsupported MIME type.
79152
"""
80153
if not os.path.isfile(file_path):
81154
raise ValueError(f"File not found: {file_path}")
@@ -95,11 +168,26 @@ def from_file(cls, file_path: str) -> "Audio":
95168
return cls(data=encoded_data, audio_format=audio_format)
96169

97170
@classmethod
98-
def from_array(
99-
cls, array: Any, sampling_rate: int, format: str = "wav"
100-
) -> "Audio":
101-
"""
102-
Process numpy-like array and encode it as base64. Uses sampling rate and audio format for encoding.
171+
def from_array(cls, array: Any, sampling_rate: int, format: str = "wav") -> "Audio":
172+
"""Create an Audio instance from a numpy array.
173+
174+
Converts a numpy-like array of audio samples into an Audio instance
175+
by encoding it with the specified format and sampling rate.
176+
177+
Args:
178+
array: A numpy-like array containing audio samples.
179+
sampling_rate: The sampling rate in Hz (e.g., 16000 for 16kHz).
180+
format: The output audio format. Defaults to "wav".
181+
182+
Returns:
183+
An Audio instance containing the base64-encoded audio data.
184+
185+
Raises:
186+
ImportError: If the soundfile library is not installed.
187+
188+
Note:
189+
This method requires the ``soundfile`` library to be installed.
190+
Install it with ``pip install soundfile``.
103191
"""
104192
if not SF_AVAILABLE:
105193
raise ImportError("soundfile is required to process audio arrays.")
@@ -122,11 +210,30 @@ def __repr__(self) -> str:
122210
length = len(self.data)
123211
return f"Audio(data=<AUDIO_BASE_64_ENCODED({length})>, audio_format='{self.audio_format}')"
124212

213+
125214
def encode_audio(audio: Union[str, bytes, dict, "Audio", Any], sampling_rate: int = 16000, format: str = "wav") -> dict:
126-
"""
127-
Encode audio to a dict with 'data' and 'audio_format'.
128-
129-
Accepts: local file path, URL, data URI, dict, Audio instance, numpy array, or bytes (with known format).
215+
"""Encode audio from various sources into a standardized dictionary format.
216+
217+
This function accepts multiple input types and normalizes them into a dictionary
218+
containing base64-encoded audio data and format information.
219+
220+
Args:
221+
audio: The audio input. Supported types include:
222+
223+
- ``str``: Local file path, HTTP(S) URL, or data URI
224+
- ``bytes``: Raw audio bytes
225+
- ``dict``: Dictionary with "data" and "audio_format" keys
226+
- ``Audio``: An existing Audio instance
227+
- ``numpy.ndarray``: Audio samples as a numpy array (requires soundfile)
228+
229+
sampling_rate: The sampling rate in Hz for numpy array inputs. Defaults to 16000.
230+
format: The audio format for numpy array or bytes inputs. Defaults to "wav".
231+
232+
Returns:
233+
A dictionary with "data" (base64-encoded string) and "audio_format" keys.
234+
235+
Raises:
236+
ValueError: If the input type is unsupported or the data URI is malformed.
130237
"""
131238
if isinstance(audio, dict) and "data" in audio and "audio_format" in audio:
132239
return audio

0 commit comments

Comments
 (0)