From ad3e0877148fc6585d0a1b36116fa383334e8ae3 Mon Sep 17 00:00:00 2001 From: Roy Hvaara Date: Sat, 14 Sep 2024 19:41:40 +0200 Subject: [PATCH 1/4] Increase frequency of progress checker --- bigcodebench/evaluate.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/bigcodebench/evaluate.py b/bigcodebench/evaluate.py index 61e2a43f..7b926e24 100644 --- a/bigcodebench/evaluate.py +++ b/bigcodebench/evaluate.py @@ -204,14 +204,26 @@ def evaluate(flags): assert len(completion_id) == len(problems), "Missing problems in samples" def stucking_checker(): + unchanged_duration = 0 + last_size = len(remainings) + while remainings: - last_size = len(remainings) - time.sleep(240) - if last_size != len(remainings) or len(remainings) == 0: - continue - # Potential stucking - warn("No samples had finished testing in the last 240s") - warn(f"{len(remainings)} samples to be tested: {remainings}") + time.sleep(1) + current_size = len(remainings) + + if current_size != last_size or current_size == 0: + # Reset the unchanged duration if something has changed + unchanged_duration = 0 + last_size = current_size + else: + # Increment the duration if nothing has changed + unchanged_duration += 1 + + if unchanged_duration >= 240: + # Output warnings after 240 seconds of no change + warn("No samples have finished testing in the last 240s") + warn(f"{len(remainings)} samples to be tested: {remainings}") + unchanged_duration = 0 # Reset after warning threading.Thread(target=stucking_checker).start() From bf58e0f124365f5f916f8fb5fa931a15cbcb6868 Mon Sep 17 00:00:00 2001 From: Roy Hvaara Date: Sat, 14 Sep 2024 20:07:53 +0200 Subject: [PATCH 2/4] Wait on futures in progress checker --- bigcodebench/evaluate.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/bigcodebench/evaluate.py b/bigcodebench/evaluate.py index 7b926e24..0451d2dd 100644 --- a/bigcodebench/evaluate.py +++ b/bigcodebench/evaluate.py @@ -6,7 +6,7 @@ import threading import time from collections import Counter, defaultdict -from concurrent.futures import ProcessPoolExecutor, as_completed +from concurrent.futures import ProcessPoolExecutor, as_completed, wait, ALL_COMPLETED from datetime import datetime from typing import Any, Dict, List, Tuple from warnings import warn @@ -204,26 +204,14 @@ def evaluate(flags): assert len(completion_id) == len(problems), "Missing problems in samples" def stucking_checker(): - unchanged_duration = 0 - last_size = len(remainings) - - while remainings: - time.sleep(1) - current_size = len(remainings) - - if current_size != last_size or current_size == 0: - # Reset the unchanged duration if something has changed - unchanged_duration = 0 - last_size = current_size - else: - # Increment the duration if nothing has changed - unchanged_duration += 1 - - if unchanged_duration >= 240: + not_done = [True] + while len(not_done) > 0: + done, not_done = wait(futures, timeout=240, return_when=ALL_COMPLETED) + + if len(done) == 0: # Output warnings after 240 seconds of no change warn("No samples have finished testing in the last 240s") warn(f"{len(remainings)} samples to be tested: {remainings}") - unchanged_duration = 0 # Reset after warning threading.Thread(target=stucking_checker).start() From 6c01136cf7a54aae2b4489996e683a4e5a35607a Mon Sep 17 00:00:00 2001 From: Roy Hvaara Date: Sat, 14 Sep 2024 20:09:45 +0200 Subject: [PATCH 3/4] Remove superfluous comment --- bigcodebench/evaluate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bigcodebench/evaluate.py b/bigcodebench/evaluate.py index 0451d2dd..187cfc2b 100644 --- a/bigcodebench/evaluate.py +++ b/bigcodebench/evaluate.py @@ -209,7 +209,6 @@ def stucking_checker(): done, not_done = wait(futures, timeout=240, return_when=ALL_COMPLETED) if len(done) == 0: - # Output warnings after 240 seconds of no change warn("No samples have finished testing in the last 240s") warn(f"{len(remainings)} samples to be tested: {remainings}") From afc1f87a0685d0b21b834ac0d0769dc7c3147a7b Mon Sep 17 00:00:00 2001 From: Roy Hvaara Date: Sat, 14 Sep 2024 23:45:55 +0200 Subject: [PATCH 4/4] Reset timer on progress --- bigcodebench/evaluate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bigcodebench/evaluate.py b/bigcodebench/evaluate.py index 187cfc2b..41c7cc07 100644 --- a/bigcodebench/evaluate.py +++ b/bigcodebench/evaluate.py @@ -6,7 +6,7 @@ import threading import time from collections import Counter, defaultdict -from concurrent.futures import ProcessPoolExecutor, as_completed, wait, ALL_COMPLETED +from concurrent.futures import ProcessPoolExecutor, as_completed, wait, FIRST_COMPLETED from datetime import datetime from typing import Any, Dict, List, Tuple from warnings import warn @@ -204,9 +204,9 @@ def evaluate(flags): assert len(completion_id) == len(problems), "Missing problems in samples" def stucking_checker(): - not_done = [True] + not_done = futures while len(not_done) > 0: - done, not_done = wait(futures, timeout=240, return_when=ALL_COMPLETED) + done, not_done = wait(not_done, timeout=240, return_when=FIRST_COMPLETED) if len(done) == 0: warn("No samples have finished testing in the last 240s")