Skip to content

Commit db86310

Browse files
committed
correction prompt updated
1 parent 6434d79 commit db86310

File tree

1 file changed

+26
-6
lines changed

1 file changed

+26
-6
lines changed

app/services/correction.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@ def process(self, transcript: Transcript, **kwargs):
1919
logger.info(f"Correcting transcript with {self.provider}...")
2020
keywords = kwargs.get('keywords', [])
2121

22-
# Build the prompt
23-
prompt = self._build_prompt(transcript.outputs['raw'], keywords)
22+
metadata = transcript.source.to_json()
23+
24+
prompt = self._build_prompt(transcript.outputs['raw'], keywords, metadata)
2425

2526
# Call the LLM
2627
response = self.client.chat.completions.create(
@@ -33,10 +34,29 @@ def process(self, transcript: Transcript, **kwargs):
3334
transcript.outputs['corrected_text'] = corrected_text
3435
logger.info("Correction complete.")
3536

36-
def _build_prompt(self, text, keywords):
37-
prompt = "Please correct the following transcript for punctuation, grammar, and spelling. Do not change the content or the speaker labels."
37+
def _build_prompt(self, text, keywords, metadata):
38+
prompt = (
39+
"You are a domain expert in Bitcoin and blockchain technologies.\n\n"
40+
"The following transcript was generated using an automatic speech recognition (ASR) system. "
41+
"Your task is to correct it based on the contextual metadata provided.\n\n"
42+
"--- Contextual Metadata ---\n"
43+
)
44+
45+
if metadata.get('title'):
46+
prompt += f"Title: {metadata['title']}\n"
47+
if metadata.get('speakers'):
48+
prompt += f"Speakers: {', '.join(metadata['speakers'])}\n"
49+
if metadata.get('tags'):
50+
prompt += f"Tags: {', '.join(metadata['tags'])}\n"
51+
52+
prompt += "Please use this metadata to improve the accuracy of your corrections.\n"
53+
3854
if keywords:
39-
prompt += "\n\nPlease pay special attention to the following keywords and ensure they are spelled correctly:\n- "
55+
prompt += (
56+
"\nAdditionally, prioritize the following keywords. Ensure they are spelled, cased, and formatted correctly "
57+
"whenever they appear in the transcript:\n- "
58+
)
4059
prompt += "\n- ".join(keywords)
41-
prompt += f"\n\n---\n\n{text}"
60+
61+
prompt += f"\n\n--- Transcript Start ---\n\n{text.strip()}\n\n--- Transcript End ---"
4262
return prompt

0 commit comments

Comments
 (0)