Add GUC log_lock_waits to log long wait times.
authorBruce Momjian <bruce@momjian.us>
Sat, 3 Mar 2007 18:46:40 +0000 (18:46 +0000)
committerBruce Momjian <bruce@momjian.us>
Sat, 3 Mar 2007 18:46:40 +0000 (18:46 +0000)
Simon Riggs

doc/src/sgml/config.sgml
src/backend/storage/lmgr/deadlock.c
src/backend/storage/lmgr/proc.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/storage/lock.h
src/include/storage/proc.h

index 7a379287b57139152d3b65677b17a5f4432f7952..eb070f0e90f174b674614f3af1cb1dca572f52b1 100644 (file)
@@ -2946,6 +2946,21 @@ SELECT * FROM parent WHERE key = 2400;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-log-lock-waits" xreflabel="log_lock_waits">
+      <term><varname>log_lock_waits</varname> (<type>boolean</type>)</term>
+      <indexterm>
+       <primary><varname>log_lock_waits</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        Controls whether a log message is produced when a statement waits
+        longer than <xref linkend="guc-deadlock-timeout"> to acquire a
+        lock.  This is useful in determining if lock waits are causing
+        poor performance.  The default is <literal>off</>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-log-temp-files" xreflabel="log_temp_files">
       <term><varname>log_temp_files</varname> (<type>integer</type>)</term>
       <indexterm>
@@ -3980,17 +3995,18 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
         This is the amount of time, in milliseconds, to wait on a lock
         before checking to see if there is a deadlock condition. The
         check for deadlock is relatively slow, so the server doesn't run
-        it every time it waits for a lock. We (optimistically?) assume
+        it every time it waits for a lock. We optimistically assume
         that deadlocks are not common in production applications and
         just wait on the lock for a while before starting the check for a
         deadlock. Increasing this value reduces the amount of time
         wasted in needless deadlock checks, but slows down reporting of
         real deadlock errors. The default is one second (<literal>1s</>),
         which is probably about the smallest value you would want in
-        practice. On a heavily loaded server you might want to raise it.
+        practice.  Set <xref linkend="guc-log-lock-waits"> to log deadlock
+        checks.  On a heavily loaded server you might want to raise it.
         Ideally the setting should exceed your typical transaction time,
-        so as to improve the odds that a lock will be released before
-        the waiter decides to check for deadlock.
+        so as to improve the odds that a lock will be released before the
+        waiter decides to check for deadlock.
        </para>
       </listitem>
      </varlistentry>
index 2b7d65918d0db206a836fd2a2b8ae113d790a604..31bd135bf8c6800bb68bbc4b20e7971515cc9a7d 100644 (file)
@@ -187,13 +187,14 @@ InitDeadLockChecking(void)
  * deadlock.  If resolution is impossible, return TRUE --- the caller
  * is then expected to abort the given proc's transaction.
  *
- * Caller must already have locked all partitions of the lock tables.
+ * Caller must already have locked all partitions of the lock tables,
+ * so standard error logging/reporting code is handled by caller.
  *
  * On failure, deadlock details are recorded in deadlockDetails[] for
  * subsequent printing by DeadLockReport().  That activity is separate
  * because we don't want to do it while holding all those LWLocks.
  */
-bool
+DeadlockState
 DeadLockCheck(PGPROC *proc)
 {
        int                     i,
@@ -204,6 +205,11 @@ DeadLockCheck(PGPROC *proc)
        nPossibleConstraints = 0;
        nWaitOrders = 0;
 
+#ifdef LOCK_DEBUG
+       if (Debug_deadlocks)
+               DumpAllLocks();
+#endif
+
        /* Search for deadlocks and possible fixes */
        if (DeadLockCheckRecurse(proc))
        {
@@ -217,7 +223,7 @@ DeadLockCheck(PGPROC *proc)
                if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
                        elog(FATAL, "deadlock seems to have disappeared");
 
-               return true;                    /* cannot find a non-deadlocked state */
+               return DS_HARD_DEADLOCK;        /* cannot find a non-deadlocked state */
        }
 
        /* Apply any needed rearrangements of wait queues */
@@ -249,7 +255,11 @@ DeadLockCheck(PGPROC *proc)
                /* See if any waiters for the lock can be woken up now */
                ProcLockWakeup(GetLocksMethodTable(lock), lock);
        }
-       return false;
+
+       if (nWaitOrders > 0)
+               return DS_SOFT_DEADLOCK;
+       else
+               return DS_DEADLOCK_NOT_FOUND;
 }
 
 /*
@@ -896,7 +906,7 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *lock)
 }
 
 /*
- * Report a detected deadlock, with available details.
+ * Report a detected DS_HARD_DEADLOCK, with available details.
  */
 void
 DeadLockReport(void)
index 48c3f82fc9cb72ae6c932cc686d1538fa1291bff..c16fa551b4c8e3a1522c4503aaefb7ea59e823c0 100644 (file)
@@ -48,6 +48,7 @@
 /* GUC variables */
 int                    DeadlockTimeout = 1000;
 int                    StatementTimeout = 0;
+bool           log_lock_waits = false;
 
 /* Pointer to this process's PGPROC struct, if any */
 PGPROC    *MyProc = NULL;
@@ -979,6 +980,7 @@ static void
 CheckDeadLock(void)
 {
        int                     i;
+       DeadlockState deadlock_state = DS_DEADLOCK_NOT_FOUND;
 
        /*
         * Acquire exclusive lock on the entire shared lock data structures. Must
@@ -1004,60 +1006,77 @@ CheckDeadLock(void)
         * This is quicker than checking our semaphore's state, since no kernel
         * call is needed, and it is safe because we hold the lock partition lock.
         */
-       if (MyProc->links.prev == INVALID_OFFSET ||
-               MyProc->links.next == INVALID_OFFSET)
-               goto check_done;
-
-#ifdef LOCK_DEBUG
-       if (Debug_deadlocks)
-               DumpAllLocks();
-#endif
-
-       if (!DeadLockCheck(MyProc))
+       if (MyProc->links.prev != INVALID_OFFSET &&
+               MyProc->links.next != INVALID_OFFSET)
+               deadlock_state = DeadLockCheck(MyProc);
+       
+       if (deadlock_state == DS_HARD_DEADLOCK)
        {
-               /* No deadlock, so keep waiting */
-               goto check_done;
-       }
-
-       /*
-        * Oops.  We have a deadlock.
-        *
-        * Get this process out of wait state.  (Note: we could do this more
-        * efficiently by relying on lockAwaited, but use this coding to preserve
-        * the flexibility to kill some other transaction than the one detecting
-        * the deadlock.)
-        *
-        * RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
-        * ProcSleep will report an error after we return from the signal handler.
-        */
-       Assert(MyProc->waitLock != NULL);
-       RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
+               /*
+                * Oops.  We have a deadlock.
+                *
+                * Get this process out of wait state.  (Note: we could do this more
+                * efficiently by relying on lockAwaited, but use this coding to preserve
+                * the flexibility to kill some other transaction than the one detecting
+                * the deadlock.)
+                *
+                * RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
+                * ProcSleep will report an error after we return from the signal handler.
+                */
+               Assert(MyProc->waitLock != NULL);
+               RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
 
-       /*
-        * Unlock my semaphore so that the interrupted ProcSleep() call can
-        * finish.
-        */
-       PGSemaphoreUnlock(&MyProc->sem);
+               /*
+                * Unlock my semaphore so that the interrupted ProcSleep() call can
+                * finish.
+                */
+               PGSemaphoreUnlock(&MyProc->sem);
 
-       /*
-        * We're done here.  Transaction abort caused by the error that ProcSleep
-        * will raise will cause any other locks we hold to be released, thus
-        * allowing other processes to wake up; we don't need to do that here.
-        * NOTE: an exception is that releasing locks we hold doesn't consider the
-        * possibility of waiters that were blocked behind us on the lock we just
-        * failed to get, and might now be wakable because we're not in front of
-        * them anymore.  However, RemoveFromWaitQueue took care of waking up any
-        * such processes.
-        */
+               /*
+                * We're done here.  Transaction abort caused by the error that ProcSleep
+                * will raise will cause any other locks we hold to be released, thus
+                * allowing other processes to wake up; we don't need to do that here.
+                * NOTE: an exception is that releasing locks we hold doesn't consider the
+                * possibility of waiters that were blocked behind us on the lock we just
+                * failed to get, and might now be wakable because we're not in front of
+                * them anymore.  However, RemoveFromWaitQueue took care of waking up any
+                * such processes.
+                */
+       }
 
        /*
         * Release locks acquired at head of routine.  Order is not critical, so
         * do it back-to-front to avoid waking another CheckDeadLock instance
         * before it can get all the locks.
         */
-check_done:
        for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
                LWLockRelease(FirstLockMgrLock + i);
+
+       /*
+        * Issue any log messages requested.
+        *
+        * Deadlock ERROR messages are issued as part of transaction abort, so 
+        * these messages should not raise error states intentionally.
+        */
+       if (log_lock_waits)
+       {
+               switch (deadlock_state)
+               {
+                       case DS_SOFT_DEADLOCK:
+                               ereport(LOG,
+                                       (errmsg("deadlock avoided by rearranging lock order")));
+                               break;
+                       case DS_DEADLOCK_NOT_FOUND:
+                               ereport(LOG,
+                                       (errmsg("statement waiting for lock for at least %d ms",
+                                                               DeadlockTimeout)));
+                               break;
+                       case DS_HARD_DEADLOCK:
+                               break;  /* ERROR message handled during abort */
+                       default:
+                               break;
+               }
+       }
 }
 
 
index 99b84827599f24efa76582931a3c7d994b272527..cd6b590230c2298e47ab84c486a49a6a47c0610e 100644 (file)
@@ -811,6 +811,14 @@ static struct config_bool ConfigureNamesBool[] =
        },
 #endif
 
+       {
+               {"log_lock_waits", PGC_SIGHUP, LOGGING_WHAT,
+                       gettext_noop("Logs long lock wait events."),
+                       NULL
+               },
+               &log_lock_waits,
+               false, NULL, NULL
+       },
        {
                {"log_hostname", PGC_SIGHUP, LOGGING_WHAT,
                        gettext_noop("Logs the host name in the connection logs."),
index 77a86f0020b3b5551cb7c0a5aa61c46c23ffbb70..7ee8d417c00944b687e96280151e8e303719a05d 100644 (file)
                                        # e.g. '<%u%%%d> '
 #log_statement = 'none'                        # none, ddl, mod, all
 #log_hostname = off
-
+#log_lock_waits = off                  # Log lock waits longer than deadlock_timeout
 #log_temp_files = -1                   # Log temporary files equal or larger
                                        # than the specified number of kilobytes.
                                        # -1 disables;  0 logs all temp files
index 77d19ac51d80ebe238b8cba0a3fe24f6edfc3860..1ce815cf1a40218192e95d87703dd82341424356 100644 (file)
@@ -394,6 +394,13 @@ typedef enum
        LOCKACQUIRE_ALREADY_HELD        /* incremented count for lock already held */
 } LockAcquireResult;
 
+/* Deadlock states identified by DeadlockCheck() */
+typedef enum
+{
+       DS_DEADLOCK_NOT_FOUND,          /* no deadlock found within database server */
+       DS_SOFT_DEADLOCK,                       /* deadlock, but lock queues rearrangeable */
+       DS_HARD_DEADLOCK                        /* deadlock, no way out but ERROR */
+} DeadlockState;
 
 /*
  * The lockmgr's shared hash tables are partitioned to reduce contention.
@@ -442,7 +449,7 @@ extern void lock_twophase_postcommit(TransactionId xid, uint16 info,
 extern void lock_twophase_postabort(TransactionId xid, uint16 info,
                                                void *recdata, uint32 len);
 
-extern bool DeadLockCheck(PGPROC *proc);
+extern DeadlockState DeadLockCheck(PGPROC *proc);
 extern void DeadLockReport(void);
 extern void RememberSimpleDeadLock(PGPROC *proc1,
                                           LOCKMODE lockmode,
index f50bbad726ace68114b88567837afe5d58a3dab4..d6c92daf925032608c99a5bd07e2a239653920f9 100644 (file)
@@ -127,6 +127,7 @@ typedef struct PROC_HDR
 /* configurable options */
 extern int     DeadlockTimeout;
 extern int     StatementTimeout;
+extern bool    log_lock_waits;
 
 extern volatile bool cancel_from_timeout;