|
14 | 14 | import threading |
15 | 15 |
|
16 | 16 | active_containers = [] |
17 | | -#finished_containers = [] |
18 | 17 | thread = {} |
19 | 18 | current_date = time.strftime('%Y-%m-%d', time.localtime()) |
20 | 19 | tlock = threading.Lock() |
21 | | -crash_keywords=['Traceback (most recent call last)'] |
| 20 | +crash_keywords = ['Traceback (most recent call last)'] |
22 | 21 |
|
23 | 22 |
|
24 | 23 | def message_bot(msg): |
25 | 24 | os.system(f'./kci-slackbot.py --message "{msg}"') |
26 | 25 |
|
27 | 26 |
|
| 27 | +THROTTLE_WIN_START = 0 |
| 28 | +THROTTLE_WIN_COUNT = 0 |
| 29 | +THROTTLE_WIN_SIZE = 600 |
| 30 | +THROTTLE_WIN_COUNT_MAX = 5 |
| 31 | + |
| 32 | + |
| 33 | +def is_msg_throttle(): |
| 34 | + ''' |
| 35 | + If last 5 minutes we got more than 5 messages, throttle it |
| 36 | + ''' |
| 37 | + global THROTTLE_WIN_START, THROTTLE_WIN_COUNT |
| 38 | + if THROTTLE_WIN_START == 0: |
| 39 | + THROTTLE_WIN_START = time.time() |
| 40 | + if time.time() - THROTTLE_WIN_START > THROTTLE_WIN_SIZE: |
| 41 | + THROTTLE_WIN_START = time.time() |
| 42 | + THROTTLE_WIN_COUNT = 0 |
| 43 | + THROTTLE_WIN_COUNT += 1 |
| 44 | + if THROTTLE_WIN_COUNT == THROTTLE_WIN_COUNT_MAX: |
| 45 | + logging.error(f'Message throttled, count: {THROTTLE_WIN_COUNT}') |
| 46 | + message_bot(f'Message throttled, count: {THROTTLE_WIN_COUNT}') |
| 47 | + if THROTTLE_WIN_COUNT > THROTTLE_WIN_COUNT_MAX: |
| 48 | + return True |
| 49 | + return False |
| 50 | + |
| 51 | + |
28 | 52 | def container_logger_thread(container, logpath): |
29 | 53 | '''Container logger thread''' |
30 | 54 | active_containers.append(container.id) |
31 | 55 | with open(logpath, 'a') as logfile: |
32 | 56 | for line in container.logs(stream=True): |
| 57 | + logfile.write(line.decode('utf-8')) |
33 | 58 | # detect crash keywords, lowercase both |
34 | 59 | if any(keyword.lower() in line.decode('utf-8').lower() for keyword in crash_keywords): |
35 | | - logging.error(f'Crash detected in container: {container.name} id: {container.id}') |
36 | | - message_bot(f'Crash detected in container: {container.name} id: {container.id}') |
37 | | - logfile.write(line.decode('utf-8')) |
| 60 | + if is_msg_throttle(): |
| 61 | + logging.error(f'Crash detected in container: {container.name} id: {container.id}, but throttled') |
| 62 | + continue |
| 63 | + else: |
| 64 | + logging.error(f'Crash detected in container: {container.name} id: {container.id}') |
| 65 | + message_bot(f'Crash detected in container: {container.name} id: {container.id}') |
38 | 66 | with tlock: |
39 | 67 | active_containers.remove(container.id) |
40 | | - #finished_containers.append(container.id) |
41 | 68 |
|
42 | 69 |
|
43 | 70 | def get_containers_by_pattern(client, pattern, exclude=None): |
@@ -105,5 +132,6 @@ def main(): |
105 | 132 | current_date = time.strftime('%Y-%m-%d', time.localtime()) |
106 | 133 | time.sleep(1) |
107 | 134 |
|
| 135 | + |
108 | 136 | if __name__ == '__main__': |
109 | 137 | main() |
0 commit comments