diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index f2ccbe1..b3e5f76 100644 *** a/src/backend/storage/lmgr/lwlock.c --- b/src/backend/storage/lmgr/lwlock.c *************** *** 29,34 **** --- 29,88 ---- #include "storage/ipc.h" #include "storage/proc.h" #include "storage/spin.h" + #include "portability/instr_time.h" + #include "utils/ps_status.h" + + #include + + /******************* LWLock statistics controls **************************/ + + /* displays counters (number of lock acquires, etc) + * on process exit */ + #define LWLOCK_STATS + + #ifdef LWLOCK_STATS + + /* also displays cumulative timings of time spent + * waiting and holding locks + * (depends on LWLOCK_STATS) + * Caution : measuring time has significant overhead ! + */ + #define LWLOCK_TIMING_STATS + + /* If defined, the "lock wait" time doesn't include spinlock wait time. + * This has lower overhead, since if the lock is acquired without wait, + * we record a wait time of 0 without actually measuring it. But we won't + * know if the spinlock itself delays. + */ + #define LWLOCK_TIMING_STARTS_AFTER_SPINLOCK + + /* displays only locks which were waited on more than + * this amount of seconds (set to 0 to display all) + * (depends on LWLOCK_TIMING_STATS) + */ + #define LWLOCKS_STATS_TIME_DISPLAY_THRESHOLD 0.1 + + #ifdef LWLOCK_TIMING_STATS + /* allows the selection of a speficic lock (or locks) to time + * so no time is wasted timing other locks */ + static inline bool lwlock_stats_time_lock( LWLockId id ) + { + return id == WALInsertLock || id == WALWriteLock; + } + /* allows the selection of a speficic lock (or locks) for which + * we'll need a histogram of lock wait/held times */ + static inline bool lwlock_stats_time_histo_lock( LWLockId id ) + { + return id == WALInsertLock; + } + #endif + + static void lock_stats_setup( void ); + static void print_lwlock_stats(int code, Datum arg); + + #endif /* LWLOCK_STATS */ + + /*************************************************************************/ /* We use the ShmemLock spinlock to protect LWLockAssign */ *************** static LWLockId held_lwlocks[MAX_SIMUL_L *** 88,100 **** static int lock_addin_request = 0; static bool lock_addin_request_allowed = true; - #ifdef LWLOCK_STATS - static int counts_for_pid = 0; - static int *sh_acquire_counts; - static int *ex_acquire_counts; - static int *block_counts; - #endif - #ifdef LOCK_DEBUG bool Trace_lwlocks = false; --- 142,147 ---- *************** LOG_LWDEBUG(const char *where, LWLockId *** 119,148 **** #define LOG_LWDEBUG(a,b,c) #endif /* LOCK_DEBUG */ ! #ifdef LWLOCK_STATS ! static void ! print_lwlock_stats(int code, Datum arg) { ! int i; ! int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); ! int numLocks = LWLockCounter[1]; ! ! /* Grab an LWLock to keep different backends from mixing reports */ ! LWLockAcquire(0, LW_EXCLUSIVE); ! ! for (i = 0; i < numLocks; i++) ! { ! if (sh_acquire_counts[i] || ex_acquire_counts[i] || block_counts[i]) ! fprintf(stderr, "PID %d lwlock %d: shacq %u exacq %u blk %u\n", ! MyProcPid, i, sh_acquire_counts[i], ex_acquire_counts[i], ! block_counts[i]); ! } ! ! LWLockRelease(0); ! } ! #endif /* LWLOCK_STATS */ /* * Compute number of LWLocks to allocate. --- 166,206 ---- #define LOG_LWDEBUG(a,b,c) #endif /* LOCK_DEBUG */ ! /* struct for statistics info */ ! #ifdef LWLOCK_STATS ! typedef struct { ! /* Process-local Counters for the number of times the lock has been ... */ ! int count_acquired_share; /* acquired in SHARE mode */ ! int count_waited_share; /* and it needed to wait */ ! ! int count_acquired_exclusive; /* acquired in EXCLUSIVEmode */ ! int count_waited_exclusive; /* and it needed to wait */ ! ! #ifdef LWLOCK_TIMING_STATS ! /* Process local sum of time during which ... */ ! instr_time time_waited_share; /* we waited to get a SHARE lock */ ! instr_time time_held_share; /* we held the SHARE lock */ ! instr_time time_waited_exclusive; /* we waited to get an EXCLUSIVE lock */ ! instr_time time_held_exclusive; /* we held the EXCLUSIVE lock */ ! ! /* histogram of lock wait/hold times is an array of ints, with array[n] ! * being a counter for how many occurences of wait time approx. 2^n nanoseconds. ! */ ! int32 *time_waited_exclusive_histogram; ! int32 *time_held_exclusive_histogram; ! ! instr_time ta, th; /* temp variables */ ! #endif ! } LWLockStatsData; + static int counts_for_pid = 0; + static LWLockStatsData *lwlock_stats; + #ifdef LWLOCK_TIMING_STATS + static instr_time lwlock_stats_begin_time; + #endif /* LWLOCK_TIMING_STATS */ + #endif /* LWLOCK_STATS */ /* * Compute number of LWLocks to allocate. *************** LWLockAssign(void) *** 300,306 **** return result; } - /* * LWLockAcquire - acquire a lightweight lock in the specified mode * --- 358,363 ---- *************** LWLockAcquire(LWLockId lockid, LWLockMod *** 315,341 **** PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; ! PRINT_LWDEBUG("LWLockAcquire", lockid, lock); #ifdef LWLOCK_STATS /* Set up local count state first time through in a given process */ if (counts_for_pid != MyProcPid) { ! int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); ! int numLocks = LWLockCounter[1]; ! ! sh_acquire_counts = calloc(numLocks, sizeof(int)); ! ex_acquire_counts = calloc(numLocks, sizeof(int)); ! block_counts = calloc(numLocks, sizeof(int)); ! counts_for_pid = MyProcPid; ! on_shmem_exit(print_lwlock_stats, 0); } ! /* Count lock acquisition attempts */ ! if (mode == LW_EXCLUSIVE) ! ex_acquire_counts[lockid]++; ! else ! sh_acquire_counts[lockid]++; #endif /* LWLOCK_STATS */ /* --- 372,399 ---- PGPROC *proc = MyProc; bool retry = false; int extraWaits = 0; ! #ifdef LWLOCK_TIMING_STATS ! bool acq_time_recorded = false; ! #endif ! PRINT_LWDEBUG("LWLockAcquire", lockid, lock); #ifdef LWLOCK_STATS /* Set up local count state first time through in a given process */ if (counts_for_pid != MyProcPid) + lock_stats_setup(); + + /* Count lock acquisition attempts */ + if (mode == LW_EXCLUSIVE) lwlock_stats[lockid].count_acquired_exclusive++; + else lwlock_stats[lockid].count_acquired_share++; + #if defined(LWLOCK_TIMING_STATS) && !defined(LWLOCK_TIMING_STARTS_AFTER_SPINLOCK) + /* record start of wait time */ + if( lwlock_stats_time_lock( lockid )) { ! INSTR_TIME_SET_CURRENT( lwlock_stats[lockid].ta ); ! acq_time_recorded = true; } ! #endif #endif /* LWLOCK_STATS */ /* *************** LWLockAcquire(LWLockId lockid, LWLockMod *** 355,361 **** * manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); ! /* * Loop here to try to acquire lock after each time we are signaled by * LWLockRelease. --- 413,419 ---- * manipulations of data structures in shared memory. */ HOLD_INTERRUPTS(); ! /* * Loop here to try to acquire lock after each time we are signaled by * LWLockRelease. *************** LWLockAcquire(LWLockId lockid, LWLockMod *** 445,451 **** LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); #ifdef LWLOCK_STATS ! block_counts[lockid]++; #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode); --- 503,519 ---- LOG_LWDEBUG("LWLockAcquire", lockid, "waiting"); #ifdef LWLOCK_STATS ! /* count lock waits */ ! if (mode == LW_EXCLUSIVE) lwlock_stats[lockid].count_waited_exclusive++; ! else lwlock_stats[lockid].count_waited_share++; ! #if defined(LWLOCK_TIMING_STATS) && defined(LWLOCK_TIMING_STARTS_AFTER_SPINLOCK) ! /* record start of wait time */ ! if( lwlock_stats_time_lock( lockid )) ! { ! INSTR_TIME_SET_CURRENT( lwlock_stats[lockid].ta ); ! acq_time_recorded = true; ! } ! #endif #endif TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode); *************** LWLockAcquire(LWLockId lockid, LWLockMod *** 480,485 **** --- 548,582 ---- */ while (extraWaits-- > 0) PGSemaphoreUnlock(&proc->sem); + + #ifdef LWLOCK_TIMING_STATS + /* record end of wait time and start of hold time */ + if( lwlock_stats_time_lock( lockid )) + { + INSTR_TIME_SET_CURRENT( lwlock_stats[lockid].th ); + if( acq_time_recorded ) + { + if (mode == LW_EXCLUSIVE) + { + instr_time dt = lwlock_stats[lockid].th; + INSTR_TIME_SUBTRACT( dt, lwlock_stats[lockid].ta ); + INSTR_TIME_ADD( lwlock_stats[lockid].time_waited_exclusive, dt ); + if( lwlock_stats_time_histo_lock( lockid )) + { + double t = INSTR_TIME_GET_DOUBLE( dt ); + /* increment a counter for a histocram bin, we use log2 ( time in ns ) */ + int p = Max( 0, Min( 31, log2f( t*1e9 ))); + if( !lwlock_stats[lockid].time_waited_exclusive_histogram ) + lwlock_stats[lockid].time_waited_exclusive_histogram = calloc( 32, sizeof(int32) ); + lwlock_stats[lockid].time_waited_exclusive_histogram[p]++; + } + } + else + INSTR_TIME_ACCUM_DIFF( lwlock_stats[lockid].time_waited_share, + lwlock_stats[lockid].th, lwlock_stats[lockid].ta ); + } + } + #endif } /* *************** LWLockConditionalAcquire(LWLockId lockid *** 548,553 **** --- 645,665 ---- /* Add lock to list of locks held by this backend */ held_lwlocks[num_held_lwlocks++] = lockid; TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(lockid, mode); + + #ifdef LWLOCK_STATS + /* Set up local count state first time through in a given process */ + if (counts_for_pid != MyProcPid) + lock_stats_setup(); + + /* Count lock acquisition attempts */ + if (mode == LW_EXCLUSIVE) lwlock_stats[lockid].count_acquired_exclusive++; + else lwlock_stats[lockid].count_acquired_share++; + #ifdef LWLOCK_TIMING_STATS + /* record start of hold time */ + if( lwlock_stats_time_lock( lockid )) + INSTR_TIME_SET_CURRENT( lwlock_stats[lockid].th ); + #endif + #endif /* LWLOCK_STATS */ } return !mustwait; *************** LWLockRelease(LWLockId lockid) *** 563,568 **** --- 675,683 ---- PGPROC *head; PGPROC *proc; int i; + #ifdef LWLOCK_TIMING_STATS + bool was_exclusive = false; + #endif PRINT_LWDEBUG("LWLockRelease", lockid, lock); *************** LWLockRelease(LWLockId lockid) *** 586,592 **** --- 701,712 ---- /* Release my hold on lock */ if (lock->exclusive > 0) + { lock->exclusive--; + #ifdef LWLOCK_TIMING_STATS + was_exclusive = true; + #endif + } else { Assert(lock->shared > 0); *************** LWLockRelease(LWLockId lockid) *** 651,656 **** --- 771,802 ---- * Now okay to allow cancel/die interrupts. */ RESUME_INTERRUPTS(); + + #ifdef LWLOCK_TIMING_STATS + /* record end of hold time */ + if( lwlock_stats_time_lock( lockid )) + { + INSTR_TIME_SET_CURRENT( lwlock_stats[lockid].ta ); + + if (was_exclusive) + { + instr_time dt = lwlock_stats[lockid].ta; + INSTR_TIME_SUBTRACT( dt, lwlock_stats[lockid].th ); + INSTR_TIME_ADD( lwlock_stats[lockid].time_held_exclusive, dt ); + if( lwlock_stats_time_histo_lock( lockid )) + { + double t = INSTR_TIME_GET_DOUBLE( dt ); + int p = Max( 0, Min( 31, log2f( t*1e9 ))); + if( !lwlock_stats[lockid].time_held_exclusive_histogram ) + lwlock_stats[lockid].time_held_exclusive_histogram = calloc( 32, sizeof(int32) ); + lwlock_stats[lockid].time_held_exclusive_histogram[p]++; + } + } + else + INSTR_TIME_ACCUM_DIFF( lwlock_stats[lockid].time_held_share, + lwlock_stats[lockid].ta, lwlock_stats[lockid].th ); + } + #endif } *************** LWLockHeldByMe(LWLockId lockid) *** 693,695 **** --- 839,1057 ---- } return false; } + + #ifdef LWLOCK_STATS + /* LWLocks stats functions */ + + /* returns name of a lock from its Id */ + static const char* lwlock_stats_lock_name( LWLockId id ) + { + static const char * names[] = { + "BufFreelist", + "ShmemIndex", + "OidGen", + "XidGen", + "ProcArray", + "SInvalRead", + "SInvalWrite", + "WALInsert", + "WALWrite", + "ControlFile", + "Checkpoint", + "CLogControl", + "SubtransControl", + "MultiXactGen", + "MultiXactOffsetControl", + "MultiXactMemberControl", + "RelCacheInit", + "BgWriterComm", + "TwoPhaseState", + "TablespaceCreate", + "BtreeVacuum", + "AddinShmemInit", + "Autovacuum", + "AutovacuumSchedule", + "SyncScan" }; + + if( id >= 0 && id < (sizeof( names )/sizeof( names[0] ))) + return names[id]; + else + return ""; + } + + #ifdef LWLOCK_TIMING_STATS + /* prints histogram of time values */ + static void lwlock_stats_print_histo( int lockid, const char* legend, int count_nowait, int* histo ) + { + /* tabular display (col,line) */ + int col,line,bin; + double total_time = 0; + + for( bin=1; bin<31; bin++ ) + total_time += (double)(1<<(bin-1)) * histo[bin-1]; + + printf( "\nHistogram for lock %d (%s) : %s\n", lockid, lwlock_stats_lock_name( lockid ), legend); + for( line=0; line<8; line++ ) + { + for( col=0; col<4; col++ ) + { + /* bin is the cell to display, #0 is no-wait locks */ + bin = line+col*8; + + if( !bin ) + printf( "| no wait %9d ", count_nowait ); + else + { + double bin_time = (1U<<(bin-1)); /* time waited (approx) in nanoseconds */ + double time = bin_time * histo[bin-1]; + static const char* units[] = { "ns", "us", "ms", " s" }; /* unit display */ + int unit; + + for( unit=0; unit<3; unit++ ) /* get right unit */ + { + if( bin_time < 1000 ) break; + bin_time *= .001; + } + + printf( "|%11.00f %s x %9d ", + bin_time, units[unit], histo[bin-1] ); + + if( histo[bin-1] ) + printf( "-> %5.02f%% ", 100.1 * time / total_time ); + else + printf( " " ); + } + } + printf( "\n" ); + } + } + #endif + + /* prints formatted statistics */ + static void + print_lwlock_stats(int code, Datum arg) + { + int id, lines=0, display; + int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); + int numLocks = LWLockCounter[1]; + bool something_to_display = false; + + #ifdef LWLOCK_TIMING_STATS + /* compute process life time to display % of time spent in locks */ + instr_time end_time; + double total_percents; + INSTR_TIME_SET_CURRENT( end_time ); + INSTR_TIME_SUBTRACT( end_time, lwlock_stats_begin_time ); + total_percents = 100.0 / INSTR_TIME_GET_DOUBLE( end_time ); + #endif + + /* Grab an LWLock to keep different backends from mixing reports */ + LWLockAcquire(0, LW_EXCLUSIVE); + + + for( display=0; display<=1; display++ ) + { + /* first time through the loop, check if we have something to display */ + if( display ) + { + const char* title; + int title_len; + title = get_ps_display( &title_len ); + + fprintf( stderr, "\n-------- Lock stats for PID %d : %*s\n", MyProcPid, title_len, title ); + } + + for (id = 0; id < numLocks; id++) + { + LWLockStatsData *p = &lwlock_stats[id]; + #ifdef LWLOCK_TIMING_STATS + double time_waited_share = INSTR_TIME_GET_DOUBLE(p->time_waited_share ); + double time_held_share = INSTR_TIME_GET_DOUBLE(p->time_held_share ); + double time_waited_exclusive = INSTR_TIME_GET_DOUBLE(p->time_waited_exclusive); + double time_held_exclusive = INSTR_TIME_GET_DOUBLE(p->time_held_exclusive ); + #endif + + if( !( p->count_waited_share || p->count_waited_exclusive )) + continue; + + #ifdef LWLOCK_TIMING_STATS + if( time_waited_share < LWLOCKS_STATS_TIME_DISPLAY_THRESHOLD + && time_waited_exclusive < LWLOCKS_STATS_TIME_DISPLAY_THRESHOLD ) + continue; + #endif + + something_to_display = true; + + if( !display ) + continue; + + #ifdef LWLOCK_TIMING_STATS + if( !(lines++ & 15) ) + fprintf( stderr, " PID Lock ShAcq ShWait ShWaitT ShHeldT ExAcq " + "ExWait ExWaitT ExHeldT Name\n" ); + + fprintf( stderr, + "%7d %7d %10d %10d %11.02f %11.02f %10d %10d %11.02f (%6.02f %%) %11.02f (%6.02f %%) %s\n", + MyProcPid, id, + p->count_acquired_share, p->count_waited_share, time_waited_share, time_held_share, + p->count_acquired_exclusive, p->count_waited_exclusive, + time_waited_exclusive, time_waited_exclusive * total_percents, + time_held_exclusive, time_held_exclusive * total_percents, + lwlock_stats_lock_name( id )); + #else + if( !(lines++ & 15) ) + fprintf( stderr, " PID Lock ShAcq ShWait ExAcq ExWait Name\n" ); + + fprintf( stderr, + "%7d %7d %10d %10d %10d %10d %s\n", + MyProcPid, id, + p->count_acquired_share, p->count_waited_share, + p->count_acquired_exclusive, p->count_waited_exclusive, + lwlock_stats_lock_name( id )); + #endif + } + + if( !something_to_display ) + break; + } + + #ifdef LWLOCK_TIMING_STATS + /* display histogram of lock wait times */ + if( something_to_display ) + for (id = 0; id < numLocks; id++) + { + LWLockStatsData *p = &lwlock_stats[id]; + double time_waited_exclusive = INSTR_TIME_GET_DOUBLE(p->time_waited_exclusive); + double time_held_exclusive = INSTR_TIME_GET_DOUBLE(p->time_held_exclusive ); + + if( p->time_waited_exclusive_histogram && time_waited_exclusive > LWLOCKS_STATS_TIME_DISPLAY_THRESHOLD ) + lwlock_stats_print_histo( id, "Exclusive WAIT Time", + p->count_acquired_exclusive - p->count_waited_exclusive, + p->time_waited_exclusive_histogram ); + + if( p->time_held_exclusive_histogram && time_held_exclusive > LWLOCKS_STATS_TIME_DISPLAY_THRESHOLD) + lwlock_stats_print_histo( id, "Exclusive HOLD Time", + 0, + p->time_held_exclusive_histogram ); + } + #endif + + LWLockRelease(0); + } + + /* setup backend-local stats information */ + static void lock_stats_setup( void ) + { + int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); + int numLocks = LWLockCounter[1]; + + lwlock_stats = calloc(numLocks, sizeof(LWLockStatsData)); + counts_for_pid = MyProcPid; + + #ifdef LWLOCK_TIMING_STATS + INSTR_TIME_SET_CURRENT( lwlock_stats_begin_time ); + #endif + + on_shmem_exit(print_lwlock_stats, 0); + } + #endif /* LWLOCK_STATS */