correction prompt updated

staru09 · staru09 · commit db8631096b5e · 2025-08-04T22:54:59.000+05:30
diff --git a/app/services/correction.py b/app/services/correction.py
@@ -19,8 +19,9 @@ def process(self, transcript: Transcript, **kwargs):
         logger.info(f"Correcting transcript with {self.provider}...")
         keywords = kwargs.get('keywords', [])
         
-        # Build the prompt
-        prompt = self._build_prompt(transcript.outputs['raw'], keywords)
+        metadata = transcript.source.to_json()
+
+        prompt = self._build_prompt(transcript.outputs['raw'], keywords, metadata)
 
         # Call the LLM
         response = self.client.chat.completions.create(
@@ -33,10 +34,29 @@ def process(self, transcript: Transcript, **kwargs):
         transcript.outputs['corrected_text'] = corrected_text
         logger.info("Correction complete.")
 
-    def _build_prompt(self, text, keywords):
-        prompt = "Please correct the following transcript for punctuation, grammar, and spelling. Do not change the content or the speaker labels."
+    def _build_prompt(self, text, keywords, metadata):
+        prompt = (
+            "You are a domain expert in Bitcoin and blockchain technologies.\n\n"
+            "The following transcript was generated using an automatic speech recognition (ASR) system. "
+            "Your task is to correct it based on the contextual metadata provided.\n\n"
+            "--- Contextual Metadata ---\n"
+        )
+
+        if metadata.get('title'):
+            prompt += f"Title: {metadata['title']}\n"
+        if metadata.get('speakers'):
+            prompt += f"Speakers: {', '.join(metadata['speakers'])}\n"
+        if metadata.get('tags'):
+            prompt += f"Tags: {', '.join(metadata['tags'])}\n"
+        
+        prompt += "Please use this metadata to improve the accuracy of your corrections.\n"
+
         if keywords:
-            prompt += "\n\nPlease pay special attention to the following keywords and ensure they are spelled correctly:\n- "
+            prompt += (
+                "\nAdditionally, prioritize the following keywords. Ensure they are spelled, cased, and formatted correctly "
+                "whenever they appear in the transcript:\n- "
+            )
             prompt += "\n- ".join(keywords)
-        prompt += f"\n\n---\n\n{text}"
+
+        prompt += f"\n\n--- Transcript Start ---\n\n{text.strip()}\n\n--- Transcript End ---"
         return prompt