Skip to content

Commit e6a81e8

Browse files
authored
Merge pull request #80 from LRWeber/timeout-fix
GitHub Read Timeout Handling
2 parents 536a72c + f898c68 commit e6a81e8

File tree

1 file changed

+29
-19
lines changed

1 file changed

+29
-19
lines changed

scraper/github/queryManager.py

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -262,20 +262,33 @@ def queryGitHub(
262262
_vPrint(
263263
(verbosity >= 0), "Sending %s query..." % ("REST" if rest else "GraphQL")
264264
)
265-
response = self._submitQuery(
266-
gitquery,
267-
gitvars=gitvars,
268-
verbose=(verbosity > 0),
269-
rest=rest,
270-
headers=headers,
271-
)
265+
try:
266+
response = self._submitQuery(
267+
gitquery,
268+
gitvars=gitvars,
269+
verbose=(verbosity > 0),
270+
rest=rest,
271+
headers=headers,
272+
)
273+
except requests.exceptions.ReadTimeout: # Handles intermittent response delays
274+
_vPrint((verbosity >= 0), "Read timed out.")
275+
_vPrint((verbosity >= 0), "Repeating query...")
276+
return self.queryGitHub(
277+
gitquery,
278+
gitvars=gitvars,
279+
verbosity=verbosity,
280+
paginate=paginate,
281+
cursorVar=cursorVar,
282+
keysToList=keysToList,
283+
rest=rest,
284+
requestCount=requestCount,
285+
pageNum=(pageNum - 1), # retry same page
286+
headers=headers,
287+
)
272288
_vPrint((verbosity >= 0), "Checking response...")
273289
_vPrint((verbosity >= 0), "HTTP STATUS %s" % (response["statusTxt"]))
274290
statusNum = response["statusNum"]
275291

276-
# Decrement page count before error checks to properly reflect any repeated queries
277-
pageNum -= 1
278-
279292
# Make sure the query limit didn't run out
280293
try:
281294
apiStatus = {
@@ -297,7 +310,7 @@ def queryGitHub(
297310
keysToList=keysToList,
298311
rest=rest,
299312
requestCount=(requestCount - 1), # not counted against retries
300-
pageNum=pageNum,
313+
pageNum=(pageNum - 1), # retry same page
301314
headers=headers,
302315
)
303316
except KeyError: # Handles error responses without X-RateLimit data
@@ -339,8 +352,8 @@ def queryGitHub(
339352
cursorVar=cursorVar,
340353
keysToList=keysToList,
341354
rest=rest,
342-
requestCount=(requestCount),
343-
pageNum=pageNum,
355+
requestCount=requestCount,
356+
pageNum=(pageNum - 1), # retry same page
344357
headers=headers,
345358
)
346359
# Check for accepted but not yet processed, usually due to un-cached data
@@ -369,7 +382,7 @@ def queryGitHub(
369382
keysToList=keysToList,
370383
rest=rest,
371384
requestCount=requestCount,
372-
pageNum=pageNum,
385+
pageNum=(pageNum - 1), # retry same page
373386
headers=headers,
374387
)
375388
# Check for server error responses
@@ -398,7 +411,7 @@ def queryGitHub(
398411
keysToList=keysToList,
399412
rest=rest,
400413
requestCount=requestCount,
401-
pageNum=pageNum,
414+
pageNum=(pageNum - 1), # retry same page
402415
headers=headers,
403416
)
404417
# Check for other error responses
@@ -443,17 +456,14 @@ def queryGitHub(
443456
keysToList=keysToList,
444457
rest=rest,
445458
requestCount=requestCount,
446-
pageNum=pageNum,
459+
pageNum=(pageNum - 1), # retry same page
447460
headers=headers,
448461
)
449462

450463
raise RuntimeError(
451464
"GraphQL API error.\n%s" % (json.dumps(outObj["errors"]))
452465
)
453466

454-
# Re-increment page count before the next page query
455-
pageNum += 1
456-
457467
# Pagination
458468
if paginate:
459469
if rest and response["linkDict"]:

0 commit comments

Comments
 (0)