Skip to content

Commit eb47002

Browse files
committed
feat(kci-dockerwatch): Add message throttling
On repeating error bot might become too noisy, add throttling. Signed-off-by: Denys Fedoryshchenko <[email protected]>
1 parent bd52477 commit eb47002

File tree

1 file changed

+34
-6
lines changed

1 file changed

+34
-6
lines changed

tools/kci-dockerwatch.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,30 +14,57 @@
1414
import threading
1515

1616
active_containers = []
17-
#finished_containers = []
1817
thread = {}
1918
current_date = time.strftime('%Y-%m-%d', time.localtime())
2019
tlock = threading.Lock()
21-
crash_keywords=['Traceback (most recent call last)']
20+
crash_keywords = ['Traceback (most recent call last)']
2221

2322

2423
def message_bot(msg):
2524
os.system(f'./kci-slackbot.py --message "{msg}"')
2625

2726

27+
THROTTLE_WIN_START = 0
28+
THROTTLE_WIN_COUNT = 0
29+
THROTTLE_WIN_SIZE = 600
30+
THROTTLE_WIN_COUNT_MAX = 5
31+
32+
33+
def is_msg_throttle():
34+
'''
35+
If last 5 minutes we got more than 5 messages, throttle it
36+
'''
37+
global THROTTLE_WIN_START, THROTTLE_WIN_COUNT
38+
if THROTTLE_WIN_START == 0:
39+
THROTTLE_WIN_START = time.time()
40+
if time.time() - THROTTLE_WIN_START > THROTTLE_WIN_SIZE:
41+
THROTTLE_WIN_START = time.time()
42+
THROTTLE_WIN_COUNT = 0
43+
THROTTLE_WIN_COUNT += 1
44+
if THROTTLE_WIN_COUNT == THROTTLE_WIN_COUNT_MAX:
45+
logging.error(f'Message throttled, count: {THROTTLE_WIN_COUNT}')
46+
message_bot(f'Message throttled, count: {THROTTLE_WIN_COUNT}')
47+
if THROTTLE_WIN_COUNT > THROTTLE_WIN_COUNT_MAX:
48+
return True
49+
return False
50+
51+
2852
def container_logger_thread(container, logpath):
2953
'''Container logger thread'''
3054
active_containers.append(container.id)
3155
with open(logpath, 'a') as logfile:
3256
for line in container.logs(stream=True):
57+
logfile.write(line.decode('utf-8'))
3358
# detect crash keywords, lowercase both
3459
if any(keyword.lower() in line.decode('utf-8').lower() for keyword in crash_keywords):
35-
logging.error(f'Crash detected in container: {container.name} id: {container.id}')
36-
message_bot(f'Crash detected in container: {container.name} id: {container.id}')
37-
logfile.write(line.decode('utf-8'))
60+
if is_msg_throttle():
61+
logging.error(f'Crash detected in container: {container.name} id: {container.id}, but throttled')
62+
continue
63+
else:
64+
logging.error(f'Crash detected in container: {container.name} id: {container.id}')
65+
message_bot(f'Crash detected in container: {container.name} id: {container.id}')
3866
with tlock:
3967
active_containers.remove(container.id)
40-
#finished_containers.append(container.id)
4168

4269

4370
def get_containers_by_pattern(client, pattern, exclude=None):
@@ -105,5 +132,6 @@ def main():
105132
current_date = time.strftime('%Y-%m-%d', time.localtime())
106133
time.sleep(1)
107134

135+
108136
if __name__ == '__main__':
109137
main()

0 commit comments

Comments
 (0)