@@ -19,8 +19,9 @@ def process(self, transcript: Transcript, **kwargs):
1919 logger .info (f"Correcting transcript with { self .provider } ..." )
2020 keywords = kwargs .get ('keywords' , [])
2121
22- # Build the prompt
23- prompt = self ._build_prompt (transcript .outputs ['raw' ], keywords )
22+ metadata = transcript .source .to_json ()
23+
24+ prompt = self ._build_prompt (transcript .outputs ['raw' ], keywords , metadata )
2425
2526 # Call the LLM
2627 response = self .client .chat .completions .create (
@@ -33,10 +34,29 @@ def process(self, transcript: Transcript, **kwargs):
3334 transcript .outputs ['corrected_text' ] = corrected_text
3435 logger .info ("Correction complete." )
3536
36- def _build_prompt (self , text , keywords ):
37- prompt = "Please correct the following transcript for punctuation, grammar, and spelling. Do not change the content or the speaker labels."
37+ def _build_prompt (self , text , keywords , metadata ):
38+ prompt = (
39+ "You are a domain expert in Bitcoin and blockchain technologies.\n \n "
40+ "The following transcript was generated using an automatic speech recognition (ASR) system. "
41+ "Your task is to correct it based on the contextual metadata provided.\n \n "
42+ "--- Contextual Metadata ---\n "
43+ )
44+
45+ if metadata .get ('title' ):
46+ prompt += f"Title: { metadata ['title' ]} \n "
47+ if metadata .get ('speakers' ):
48+ prompt += f"Speakers: { ', ' .join (metadata ['speakers' ])} \n "
49+ if metadata .get ('tags' ):
50+ prompt += f"Tags: { ', ' .join (metadata ['tags' ])} \n "
51+
52+ prompt += "Please use this metadata to improve the accuracy of your corrections.\n "
53+
3854 if keywords :
39- prompt += "\n \n Please pay special attention to the following keywords and ensure they are spelled correctly:\n - "
55+ prompt += (
56+ "\n Additionally, prioritize the following keywords. Ensure they are spelled, cased, and formatted correctly "
57+ "whenever they appear in the transcript:\n - "
58+ )
4059 prompt += "\n - " .join (keywords )
41- prompt += f"\n \n ---\n \n { text } "
60+
61+ prompt += f"\n \n --- Transcript Start ---\n \n { text .strip ()} \n \n --- Transcript End ---"
4262 return prompt
0 commit comments