Merge pull request #341 from n3d1117/feature/rate-limit-retry

n3d1117 · web-flow · commit 72b5527e554d · 2023-06-14T12:42:24.000+02:00
Auto retry while rate limited using tenacity
diff --git a/bot/main.py b/bot/main.py
@@ -16,6 +16,7 @@ def main():
         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
         level=logging.INFO
     )
+    logging.getLogger("httpx").setLevel(logging.WARNING)
 
     # Check if the required environment variables are set
     required_values = ['TELEGRAM_BOT_TOKEN', 'OPENAI_API_KEY']
diff --git a/bot/openai_helper.py b/bot/openai_helper.py
@@ -12,6 +12,8 @@
 from datetime import date
 from calendar import monthrange
 
+from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type
+
 # Models can be found here: https://platform.openai.com/docs/models/overview
 GPT_3_MODELS = ("gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613")
 GPT_3_16K_MODELS = ("gpt-3.5-turbo-16k", "gpt-3.5-turbo-16k-0613")
@@ -145,6 +147,12 @@ async def get_chat_response_stream(self, chat_id: int, query: str):
 
         yield answer, tokens_used
 
+    @retry(
+        reraise=True,
+        retry=retry_if_exception_type(openai.error.RateLimitError),
+        wait=wait_fixed(20),
+        stop=stop_after_attempt(3)
+    )
     async def __common_get_chat_response(self, chat_id: int, query: str, stream=False):
         """
         Request a response from the GPT model.
@@ -190,7 +198,7 @@ async def __common_get_chat_response(self, chat_id: int, query: str, stream=Fals
             )
 
         except openai.error.RateLimitError as e:
-            raise Exception(f"⚠️ _{localized_text('openai_rate_limit', bot_language)}._ ⚠️\n{str(e)}") from e
+            raise e
 
         except openai.error.InvalidRequestError as e:
             raise Exception(f"⚠️ _{localized_text('openai_invalid', bot_language)}._ ⚠️\n{str(e)}") from e
diff --git a/bot/telegram_bot.py b/bot/telegram_bot.py
@@ -388,84 +388,88 @@ async def prompt(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
             total_tokens = 0
 
             if self.config['stream']:
-                await update.effective_message.reply_chat_action(
-                    action=constants.ChatAction.TYPING,
-                    message_thread_id=get_thread_id(update)
-                )
+                async def _reply():
+                    nonlocal total_tokens
+                    await update.effective_message.reply_chat_action(
+                        action=constants.ChatAction.TYPING,
+                        message_thread_id=get_thread_id(update)
+                    )
 
-                stream_response = self.openai.get_chat_response_stream(chat_id=chat_id, query=prompt)
-                i = 0
-                prev = ''
-                sent_message = None
-                backoff = 0
-                stream_chunk = 0
-
-                async for content, tokens in stream_response:
-                    if len(content.strip()) == 0:
-                        continue
-
-                    stream_chunks = split_into_chunks(content)
-                    if len(stream_chunks) > 1:
-                        content = stream_chunks[-1]
-                        if stream_chunk != len(stream_chunks) - 1:
-                            stream_chunk += 1
-                            try:
-                                await edit_message_with_retry(context, chat_id, str(sent_message.message_id),
-                                                              stream_chunks[-2])
-                            except:
-                                pass
+                    stream_response = self.openai.get_chat_response_stream(chat_id=chat_id, query=prompt)
+                    i = 0
+                    prev = ''
+                    sent_message = None
+                    backoff = 0
+                    stream_chunk = 0
+
+                    async for content, tokens in stream_response:
+                        if len(content.strip()) == 0:
+                            continue
+
+                        stream_chunks = split_into_chunks(content)
+                        if len(stream_chunks) > 1:
+                            content = stream_chunks[-1]
+                            if stream_chunk != len(stream_chunks) - 1:
+                                stream_chunk += 1
+                                try:
+                                    await edit_message_with_retry(context, chat_id, str(sent_message.message_id),
+                                                                  stream_chunks[-2])
+                                except:
+                                    pass
+                                try:
+                                    sent_message = await update.effective_message.reply_text(
+                                        message_thread_id=get_thread_id(update),
+                                        text=content if len(content) > 0 else "..."
+                                    )
+                                except:
+                                    pass
+                                continue
+
+                        cutoff = get_stream_cutoff_values(update, content)
+                        cutoff += backoff
+
+                        if i == 0:
                             try:
+                                if sent_message is not None:
+                                    await context.bot.delete_message(chat_id=sent_message.chat_id,
+                                                                     message_id=sent_message.message_id)
                                 sent_message = await update.effective_message.reply_text(
                                     message_thread_id=get_thread_id(update),
-                                    text=content if len(content) > 0 else "..."
+                                    reply_to_message_id=get_reply_to_message_id(self.config, update),
+                                    text=content
                                 )
                             except:
-                                pass
-                            continue
-
-                    cutoff = get_stream_cutoff_values(update, content)
-                    cutoff += backoff
+                                continue
 
-                    if i == 0:
-                        try:
-                            if sent_message is not None:
-                                await context.bot.delete_message(chat_id=sent_message.chat_id,
-                                                                 message_id=sent_message.message_id)
-                            sent_message = await update.effective_message.reply_text(
-                                message_thread_id=get_thread_id(update),
-                                reply_to_message_id=get_reply_to_message_id(self.config, update),
-                                text=content
-                            )
-                        except:
-                            continue
+                        elif abs(len(content) - len(prev)) > cutoff or tokens != 'not_finished':
+                            prev = content
 
-                    elif abs(len(content) - len(prev)) > cutoff or tokens != 'not_finished':
-                        prev = content
+                            try:
+                                use_markdown = tokens != 'not_finished'
+                                await edit_message_with_retry(context, chat_id, str(sent_message.message_id),
+                                                              text=content, markdown=use_markdown)
 
-                        try:
-                            use_markdown = tokens != 'not_finished'
-                            await edit_message_with_retry(context, chat_id, str(sent_message.message_id),
-                                                          text=content, markdown=use_markdown)
+                            except RetryAfter as e:
+                                backoff += 5
+                                await asyncio.sleep(e.retry_after)
+                                continue
 
-                        except RetryAfter as e:
-                            backoff += 5
-                            await asyncio.sleep(e.retry_after)
-                            continue
+                            except TimedOut:
+                                backoff += 5
+                                await asyncio.sleep(0.5)
+                                continue
 
-                        except TimedOut:
-                            backoff += 5
-                            await asyncio.sleep(0.5)
-                            continue
+                            except Exception:
+                                backoff += 5
+                                continue
 
-                        except Exception:
-                            backoff += 5
-                            continue
+                            await asyncio.sleep(0.01)
 
-                        await asyncio.sleep(0.01)
+                        i += 1
+                        if tokens != 'not_finished':
+                            total_tokens = int(tokens)
 
-                    i += 1
-                    if tokens != 'not_finished':
-                        total_tokens = int(tokens)
+                await wrap_with_indicator(update, context, _reply, constants.ChatAction.TYPING)
 
             else:
                 async def _reply():
diff --git a/requirements.txt b/requirements.txt
@@ -2,4 +2,6 @@ python-dotenv~=1.0.0
 pydub~=0.25.1
 tiktoken==0.4.0
 openai==0.27.8
-python-telegram-bot==20.2
+python-telegram-bot==20.3
+requests~=2.31.0
+tenacity==8.2.2

Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,7 @@ def main():`
`16`	`16`	`format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',`
`17`	`17`	`level=logging.INFO`
`18`	`18`	`)`
	`19`	`+ logging.getLogger("httpx").setLevel(logging.WARNING)`
`19`	`20`
`20`	`21`	`# Check if the required environment variables are set`
`21`	`22`	`required_values = ['TELEGRAM_BOT_TOKEN', 'OPENAI_API_KEY']`