Detect early deadlock in Hot Standby when Startup is already waiting. First

author Simon Riggs <simon@2ndquadrant.com>

Sun, 31 Jan 2010 19:01:11 +0000 (19:01 +0000)

committer Simon Riggs <simon@2ndquadrant.com>

Sun, 31 Jan 2010 19:01:11 +0000 (19:01 +0000)
author Simon Riggs <simon@2ndquadrant.com>
Sun, 31 Jan 2010 19:01:11 +0000 (19:01 +0000)
committer Simon Riggs <simon@2ndquadrant.com>
Sun, 31 Jan 2010 19:01:11 +0000 (19:01 +0000)
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c

index 656e5633b481db27200d8ae9a69c1556be50c49c..4eccf3b64d200f68a6083892f38a409ec9492e8d 100644 (file)
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -22,6 +22,7 @@
  #include "access/xlog.h"
  #include "miscadmin.h"
  #include "pgstat.h"
+#include "storage/bufmgr.h"
  #include "storage/lmgr.h"
  #include "storage/proc.h"
  #include "storage/procarray.h"
@@ -384,7 +385,7 @@ ResolveRecoveryConflictWithBufferPin(void)
         TimestampDifference(GetLatestXLogTime(), now,
                             &standby_delay_secs, &standby_delay_usecs);
  
-       if (standby_delay_secs >= (long) MaxStandbyDelay)
+       if (standby_delay_secs >= MaxStandbyDelay)
             SendRecoveryConflictWithBufferPin();
         else
         {
@@ -445,6 +446,39 @@ SendRecoveryConflictWithBufferPin(void)
     CancelDBBackends(InvalidOid, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, false);
  }
  
+/*
+ * In Hot Standby perform early deadlock detection.  We abort the lock
+ * wait if are about to sleep while holding the buffer pin that Startup
+ * process is waiting for. The deadlock occurs because we can only be
+ * waiting behind an AccessExclusiveLock, which can only clear when a
+ * transaction completion record is replayed, which can only occur when
+ * Startup process is not waiting. So if Startup process is waiting we
+ * never will clear that lock, so if we wait we cause deadlock. If we
+ * are the Startup process then no need to check for deadlocks.
+ */
+void
+CheckRecoveryConflictDeadlock(LWLockId partitionLock)
+{
+   Assert(!InRecovery);
+
+   if (!HoldingBufferPinThatDelaysRecovery())
+       return;
+
+   LWLockRelease(partitionLock);
+
+   /*
+    * Error message should match ProcessInterrupts() but we avoid calling
+    * that because we aren't handling an interrupt at this point. Note
+    * that we only cancel the current transaction here, so if we are in a
+    * subtransaction and the pin is held by a parent, then the Startup
+    * process will continue to wait even though we have avoided deadlock.
+    */
+   ereport(ERROR,
+           (errcode(ERRCODE_QUERY_CANCELED),
+            errmsg("canceling statement due to conflict with recovery"),
+            errdetail("User transaction caused buffer deadlock with recovery.")));
+}
+
  /*
   * -----------------------------------------------------
   * Locking in Recovery Mode
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c

index cc45f7f59991cbdfd73c43ebd4a2b2587ebab314..6738e8d1656aca476229b0a7d8150409baf79d9f 100644 (file)
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -814,6 +814,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
             return LOCKACQUIRE_NOT_AVAIL;
         }
  
+       /*
+        * In Hot Standby perform early deadlock detection in normal backends.
+        * If deadlock found we release partition lock but do not return.
+        */
+       if (RecoveryInProgress() && !InRecovery)
+           CheckRecoveryConflictDeadlock(partitionLock);
+
         /*
          * Set bitmask of locks this process already holds on this object.
          */
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h

index e1b6d991050236236c698399600961ba5f9c3019..79f1e43bbf816988b9dfb4726a76528f05c99c0a 100644 (file)
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -31,6 +31,7 @@ extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
  
  extern void ResolveRecoveryConflictWithBufferPin(void);
  extern void SendRecoveryConflictWithBufferPin(void);
+extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock);
  
  /*
   * Standby Rmgr (RM_STANDBY_ID)
author	Simon Riggs <simon@2ndquadrant.com>
	Sun, 31 Jan 2010 19:01:11 +0000 (19:01 +0000)
committer	Simon Riggs <simon@2ndquadrant.com>
	Sun, 31 Jan 2010 19:01:11 +0000 (19:01 +0000)
src/backend/storage/ipc/standby.c		patch \| blob \| blame \| history
src/backend/storage/lmgr/lock.c		patch \| blob \| blame \| history
src/include/storage/standby.h		patch \| blob \| blame \| history