From 48d41a306600f68a7583fd34e947a9c119b04960 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Thu, 15 Jul 2021 10:43:35 -0500 Subject: [PATCH 1/5] Cache: reduce sleep time --- pyopencl/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyopencl/cache.py b/pyopencl/cache.py index a09ec9a0a..4778f205e 100644 --- a/pyopencl/cache.py +++ b/pyopencl/cache.py @@ -89,7 +89,7 @@ def __init__(self, cleanup_m, cache_dir): pass from time import sleep - sleep(1) + sleep(0.05) attempts += 1 From aa708a25e467dae2575e8af937b3c6e378117ca9 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Thu, 15 Jul 2021 10:45:21 -0500 Subject: [PATCH 2/5] Update cache.py --- pyopencl/cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyopencl/cache.py b/pyopencl/cache.py index 4778f205e..40473eaaa 100644 --- a/pyopencl/cache.py +++ b/pyopencl/cache.py @@ -98,8 +98,8 @@ def __init__(self, cleanup_m, cache_dir): warn("could not obtain cache lock--delete '%s' if necessary" % self.lock_file) - if attempts > 3 * 60: - raise RuntimeError("waited more than three minutes " + if attempts > 60 / 0.05: + raise RuntimeError("waited more than one minute " "on the lock file '%s'" "--something is wrong" % self.lock_file) From 8ab383a969b4f58a6cc3900f1126ed3165414765 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 21 Jul 2021 11:28:59 -0500 Subject: [PATCH 3/5] print warnings fix --- pyopencl/cache.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pyopencl/cache.py b/pyopencl/cache.py index 40473eaaa..90dec26ee 100644 --- a/pyopencl/cache.py +++ b/pyopencl/cache.py @@ -88,17 +88,18 @@ def __init__(self, cleanup_m, cache_dir): except OSError: pass + wait_time_seconds = 0.05 from time import sleep - sleep(0.05) + sleep(wait_time_seconds) attempts += 1 - if attempts > 10: + if attempts % (10/wait_time_seconds) == 0: from warnings import warn warn("could not obtain cache lock--delete '%s' if necessary" % self.lock_file) - if attempts > 60 / 0.05: + if attempts > 60 / wait_time_seconds: raise RuntimeError("waited more than one minute " "on the lock file '%s'" "--something is wrong" % self.lock_file) From 4d13b82a0c387901783ccf8c2b61eacaaddec7be Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Fri, 10 Dec 2021 12:16:04 -0600 Subject: [PATCH 4/5] restructure --- pyopencl/cache.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pyopencl/cache.py b/pyopencl/cache.py index 90dec26ee..2c22aedf2 100644 --- a/pyopencl/cache.py +++ b/pyopencl/cache.py @@ -89,17 +89,24 @@ def __init__(self, cleanup_m, cache_dir): pass wait_time_seconds = 0.05 + + # Warn every 10 seconds if not able to acquire lock + warn_attempts = int(10/wait_time_seconds) + + # Exit after 60 seconds if not able to acquire lock + exit_attempts = int(60/wait_time_seconds) + from time import sleep sleep(wait_time_seconds) attempts += 1 - if attempts % (10/wait_time_seconds) == 0: + if attempts % warn_attempts == 0: from warnings import warn warn("could not obtain cache lock--delete '%s' if necessary" % self.lock_file) - if attempts > 60 / wait_time_seconds: + if attempts > exit_attempts: raise RuntimeError("waited more than one minute " "on the lock file '%s'" "--something is wrong" % self.lock_file) From 387011ed34ce7f85e74bffd10dd7a01103f88871 Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Mon, 13 Dec 2021 13:04:47 +0100 Subject: [PATCH 5/5] add comment about our choice for the timeout --- pyopencl/cache.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyopencl/cache.py b/pyopencl/cache.py index e3c0fe8d1..582a79081 100644 --- a/pyopencl/cache.py +++ b/pyopencl/cache.py @@ -88,6 +88,12 @@ def __init__(self, cleanup_m, cache_dir): except OSError: pass + # This value was chosen based on the py-filelock package: + # https://github.com/tox-dev/py-filelock/blob/a6c8fabc4192fa7a4ae19b1875ee842ec5eb4f61/src/filelock/_api.py#L113 + # When running pyopencl in an application with multiple ranks + # that share a cache_dir, higher timeouts can lead to + # application stalls even with low numbers of ranks. + # cf. https://github.com/inducer/pyopencl/pull/504 wait_time_seconds = 0.05 # Warn every 10 seconds if not able to acquire lock