From 5342489f79668ae71d08d0bbcd7f0baff18f006c Mon Sep 17 00:00:00 2001 From: Markus Alexander Kuppe <tlaplus.net@lemmster.de> Date: Wed, 13 May 2015 14:59:28 +0200 Subject: [PATCH] [Bugfix] Remove deadlock that happens when the main thread tries to block all worker threads to do its periodic work (liveness checking and checkpointing) while the worker threads finish concurrently. This makes the main thread wait indefinitely. The chance for this bug has been drastically increased by the fix in commit ea6532a2672216e8d9d9dce8297dcc8d614dbbad (check liveness independent of checkpointing timer). --- tlatools/src/tlc2/tool/queue/StateQueue.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tlatools/src/tlc2/tool/queue/StateQueue.java b/tlatools/src/tlc2/tool/queue/StateQueue.java index b55f1eaee..d968a67b0 100644 --- a/tlatools/src/tlc2/tool/queue/StateQueue.java +++ b/tlatools/src/tlc2/tool/queue/StateQueue.java @@ -179,7 +179,19 @@ public abstract class StateQueue implements IStateQueue { */ public synchronized void finishAll() { this.finish = true; + // Notify all other worker threads. this.notifyAll(); + // Need to wake main thread that waits (mu.wait()) to suspend access to + // the squeue (see suspendAll). The main thread might attempt to do its + // periodic work (tlc2.tool.ModelChecker.doPeriodicWork()) the moment + // all worker threads finish. Since suspendAll assumes the main thread + // is woken up (potentially) multiple times from sleeping indefinitely + // in the while loop before it finally returns after locking the + // StateQueue, we have to live up to this assumption. + synchronized (this.mu) { + // Technically notify() would do. + this.mu.notify(); + } } /* (non-Javadoc) @@ -194,11 +206,13 @@ public abstract class StateQueue implements IStateQueue { this.stop = true; needWait = needsWaiting(); } + // Wait for all worker threads to stop. while (needWait) { synchronized (this.mu) { try { // waiting here assumes that subsequently a worker - // is going to wake us up by calling isAvail() + // is going to wake us up by calling isAvail() or + // this.mu.notify*() this.mu.wait(); } catch (Exception e) { MP.printError(EC.GENERAL, "waiting for a worker to wake up", e); // LL changed call 7 April 2012 -- GitLab