diff --git configure configure index f891914..5dcebdb 100755 --- configure +++ configure @@ -654,6 +654,7 @@ LIBOBJS UUID_LIBS LDAP_LIBS_BE LDAP_LIBS_FE +with_wait_event_detail PTHREAD_CFLAGS PTHREAD_LIBS PTHREAD_CC @@ -864,6 +865,7 @@ with_libxslt with_system_tzdata with_zlib with_gnu_ld +with_wait_event_detail enable_largefile enable_float4_byval enable_float8_byval @@ -1566,6 +1568,8 @@ Optional Packages: use system time zone data in DIR --without-zlib do not use Zlib --with-gnu-ld assume the C compiler uses GNU ld [default=no] + --with-wait-event-detail + build with Wait Event Detail support Some influential environment variables: CC C compiler command @@ -10553,6 +10557,41 @@ fi # fi +# +# Wait Event Detail Information +# +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with Wait Event Detail support" >&5 +$as_echo_n "checking whether to build with Wait Event Detail support... " >&6; } + + + +# Check whether --with-wait-event-detail was given. +if test "${with_wait_event_detail+set}" = set; then : + withval=$with_wait_event_detail; + case $withval in + yes) + +$as_echo "#define USE_WAIT_EVENT_DETAIL 1" >>confdefs.h + + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-wait-event-detail option" "$LINENO" 5 + ;; + esac + +else + with_wait_event_detail=no + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_wait_event_detail" >&5 +$as_echo "$with_wait_event_detail" >&6; } + + ## ## Libraries diff --git configure.in configure.in index 5712419..861ad6d 100644 --- configure.in +++ configure.in @@ -1086,6 +1086,15 @@ PTHREAD_LIBS= AC_SUBST(PTHREAD_CFLAGS) AC_SUBST(PTHREAD_LIBS) +# +# Wait Event Detail Information +# +AC_MSG_CHECKING([whether to build with Wait Event Detail support]) +PGAC_ARG_BOOL(with, wait-event-detail, no, [build with Wait Event Detail support], + [AC_DEFINE([USE_WAIT_EVENT_DETAIL], 1, [Define to build with Wait Event Detail support. (--with-wait-event-detail)])]) +AC_MSG_RESULT([$with_wait_event_detail]) +AC_SUBST(with_wait_event_detail) + ## ## Libraries diff --git src/backend/postmaster/pgstat.c src/backend/postmaster/pgstat.c index bbe7361..e9ed305 100644 --- src/backend/postmaster/pgstat.c +++ src/backend/postmaster/pgstat.c @@ -337,6 +337,10 @@ static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int le static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len); static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len); +#ifdef USE_WAIT_EVENT_DETAIL +static int pgstat_get_wait_event_array_index(uint32 wait_event_info); +#endif + /* ------------------------------------------------------------ * Public functions called from postmaster follow * ------------------------------------------------------------ @@ -2930,6 +2934,14 @@ pgstat_bestart(void) beentry->st_progress_command = PROGRESS_COMMAND_INVALID; beentry->st_progress_command_target = InvalidOid; +#ifdef USE_WAIT_EVENT_DETAIL + /* additional statistics for wait events */ + beentry->st_wait_event_start_timestamp = 0; + MemSet(beentry->st_wait_event_total_elapsed, 0, sizeof(beentry->st_wait_event_total_elapsed)); + MemSet(beentry->st_wait_event_max_elapsed, 0, sizeof(beentry->st_wait_event_max_elapsed)); + MemSet(beentry->st_wait_event_counting, 0, sizeof(beentry->st_wait_event_counting)); +#endif + /* * we don't zero st_progress_param here to save cycles; nobody should * examine it until st_progress_command has been set to something other @@ -6380,3 +6392,209 @@ pgstat_clip_activity(const char *raw_activity) return activity; } + +#ifdef USE_WAIT_EVENT_DETAIL +/* + * Only called from pgstat_report_wait_start inline function + * (see pgstat_report_wait_start()'s documentation) + */ +void +pgstat_report_wait_event_detail_start(uint32 wait_event_info) +{ + volatile PgBackendStatus *beentry = MyBEEntry; + TimestampTz current_timestamp; + + if (!beentry || !wait_event_info || beentry->st_state != STATE_RUNNING) + return; + + current_timestamp = GetCurrentTimestamp(); + pgstat_increment_changecount_before(beentry); + beentry->st_wait_event_start_timestamp = current_timestamp; + pgstat_increment_changecount_after(beentry); +} + +/* + * Only called from pgstat_report_wait_end inline function + * (see pgstat_report_wait_end()'s documentation) + */ +void +pgstat_report_wait_event_detail_end(uint32 wait_event_info) +{ + volatile PgBackendStatus *beentry = MyBEEntry; + TimestampTz current_timestamp; + uint64 elapsed; + int arrayIndex; + + if (!beentry || !wait_event_info || beentry->st_state != STATE_RUNNING) + return; + + current_timestamp = GetCurrentTimestamp(); + elapsed = current_timestamp - beentry->st_wait_event_start_timestamp; + arrayIndex = pgstat_get_wait_event_array_index(wait_event_info); + + pgstat_increment_changecount_before(beentry); + beentry->st_wait_event_start_timestamp = 0; + beentry->st_wait_event_total_elapsed[arrayIndex] += elapsed; + beentry->st_wait_event_max_elapsed[arrayIndex] = Max(beentry->st_wait_event_max_elapsed[arrayIndex], elapsed); + beentry->st_wait_event_counting[arrayIndex]++; + pgstat_increment_changecount_after(beentry); +} + +/* + * Convert a wait_event_info number to + * PgBackendStatus's wait_event additional information arrays index + * (see PgBackendStatus.st_wait_event_*'s documentation) + */ +static int +pgstat_get_wait_event_array_index(uint32 wait_event_info) +{ + uint32 classId; + uint16 eventId; + int arrayIndex = 0; + + classId = wait_event_info & 0xFF000000; + eventId = wait_event_info & 0x0000FFFF; + + switch (classId) + { + case PG_WAIT_LWLOCK: + arrayIndex = eventId; + break; + case PG_WAIT_LOCK: + arrayIndex = NUM_WAIT_LWLOCK + eventId; + break; + case PG_WAIT_BUFFER_PIN: + arrayIndex = NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + eventId; + break; + case PG_WAIT_ACTIVITY: + arrayIndex = NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + eventId; + break; + case PG_WAIT_CLIENT: + arrayIndex = NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + eventId; + break; + case PG_WAIT_EXTENSION: + arrayIndex = NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT + eventId; + break; + case PG_WAIT_IPC: + arrayIndex = NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT + + NUM_WAIT_EXTENSION + eventId; + break; + case PG_WAIT_TIMEOUT: + arrayIndex = NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT + + NUM_WAIT_EXTENSION + NUM_WAIT_IPC + eventId; + break; + case PG_WAIT_IO: + arrayIndex = NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT + + NUM_WAIT_EXTENSION + NUM_WAIT_IPC + + NUM_WAIT_TIMEOUT + eventId; + break; + default: + arrayIndex = 0; + break; + } + + return arrayIndex; +} + +/* + * Convert a PgBackendStatus's wait_event additional information arrays index to + * wait_event_info number + * (see PgBackendStatus.st_wait_event_*'s documentation) + */ +uint32 +pgstat_get_wait_event_info(int wait_event_array_index) +{ + uint32 wait_event_info = 0; + + if (wait_event_array_index >= NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT + + NUM_WAIT_EXTENSION + NUM_WAIT_IPC + + NUM_WAIT_TIMEOUT) + { + wait_event_array_index -= (NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT + + NUM_WAIT_EXTENSION + NUM_WAIT_IPC + + NUM_WAIT_TIMEOUT); + wait_event_info = (PG_WAIT_IO | wait_event_array_index); + } + else if (wait_event_array_index >= NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT + + NUM_WAIT_EXTENSION + NUM_WAIT_IPC) + { + wait_event_array_index -= (NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT + + NUM_WAIT_EXTENSION + NUM_WAIT_IPC); + wait_event_info = (PG_WAIT_TIMEOUT | wait_event_array_index); + } + else if (wait_event_array_index >= NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT + + NUM_WAIT_EXTENSION) + { + wait_event_array_index -= (NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT + + NUM_WAIT_EXTENSION); + wait_event_info = (PG_WAIT_IPC | wait_event_array_index); + } + else if (wait_event_array_index >= NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT) + { + wait_event_array_index -= (NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY + NUM_WAIT_CLIENT); + wait_event_info = (PG_WAIT_EXTENSION | wait_event_array_index); + } + else if (wait_event_array_index >= NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY) + { + wait_event_array_index -= (NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN + + NUM_WAIT_ACTIVITY); + wait_event_info = (PG_WAIT_CLIENT | wait_event_array_index); + } + else if (wait_event_array_index >= NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN) + { + wait_event_array_index -= (NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK + NUM_WAIT_BUFFER_PIN); + wait_event_info = (PG_WAIT_ACTIVITY | wait_event_array_index); + } + else if (wait_event_array_index >= NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK) + { + wait_event_array_index -= (NUM_WAIT_LWLOCK + + NUM_WAIT_LOCK); + wait_event_info = (PG_WAIT_BUFFER_PIN | wait_event_array_index); + } + else if (wait_event_array_index >= NUM_WAIT_LWLOCK) + { + wait_event_array_index -= NUM_WAIT_LWLOCK; + wait_event_info = (PG_WAIT_LOCK | wait_event_array_index); + } + else + { + wait_event_info = (PG_WAIT_LWLOCK | wait_event_array_index); + } + + return wait_event_info; +} +#endif diff --git src/backend/utils/adt/pgstatfuncs.c src/backend/utils/adt/pgstatfuncs.c index e95e347..9d8e71f 100644 --- src/backend/utils/adt/pgstatfuncs.c +++ src/backend/utils/adt/pgstatfuncs.c @@ -868,6 +868,152 @@ pg_stat_get_activity(PG_FUNCTION_ARGS) return (Datum) 0; } +/* + * Returns wait event additional statistics of PG backends. + */ +Datum +pg_stat_get_wait_events(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_WAIT_EVENT_COLS 6 + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); +#ifdef USE_WAIT_EVENT_DETAIL + { + int num_backends = pgstat_fetch_stat_numbackends(); + int curr_backend; + int pid = PG_ARGISNULL(0) ? -1 : PG_GETARG_INT32(0); + + /* 1-based index */ + for (curr_backend = 1; curr_backend <= num_backends; curr_backend++) + { + int i; + + /* for each row */ + Datum values[PG_STAT_GET_WAIT_EVENT_COLS]; + bool nulls[PG_STAT_GET_WAIT_EVENT_COLS]; + LocalPgBackendStatus *local_beentry; + PgBackendStatus *beentry; + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + /* Get the next one in the list */ + local_beentry = pgstat_fetch_stat_local_beentry(curr_backend); + if (!local_beentry) + { + /* Ignore missing entries if looking for specific PID */ + if (pid != -1) + continue; + + for (i = 0; i < lengthof(nulls); i++) + nulls[i] = true; + + nulls[1] = false; + values[1] = CStringGetTextDatum(""); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + continue; + } + + beentry = &local_beentry->backendStatus; + + /* If looking for specific PID, ignore all the others */ + if (pid != -1 && beentry->st_procpid != pid) + continue; + + /* Values only available to role member or pg_read_all_stats */ + if (has_privs_of_role(GetUserId(), beentry->st_userid) || + is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_ALL_STATS)) + { + int i; + uint32 wait_event_info; + + for (i = 0; i < NUM_WAIT_EVENT; i++) + { + values[0] = Int32GetDatum(beentry->st_procpid); + + wait_event_info = pgstat_get_wait_event_info(i); + + values[1] = CStringGetTextDatum(pgstat_get_wait_event_type(wait_event_info)); + values[2] = CStringGetTextDatum(pgstat_get_wait_event(wait_event_info)); + values[3] = UInt64GetDatum(beentry->st_wait_event_total_elapsed[i]); + values[4] = UInt64GetDatum(beentry->st_wait_event_max_elapsed[i]); + values[5] = UInt32GetDatum(beentry->st_wait_event_counting[i]); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + } + else + { + values[0] = Int32GetDatum(beentry->st_procpid); + + /* No permissions to view data about this session */ + values[1] = CStringGetTextDatum(""); + nulls[2] = true; + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + /* If only a single backend was requested, and we found it, break. */ + if (pid != -1) + break; + } + } +#else /* USE_WAIT_EVENT_DETAIL */ + { + /* for each row */ + Datum values[PG_STAT_GET_WAIT_EVENT_COLS]; + bool nulls[PG_STAT_GET_WAIT_EVENT_COLS]; + int i; + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + for (i = 0; i < lengthof(nulls); i++) + nulls[i] = true; + + nulls[1] = false; + values[1] = CStringGetTextDatum(""); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } +#endif /* USE_WAIT_EVENT_DETAIL */ + + /* clean up and return the tuplestore */ + tuplestore_donestoring(tupstore); + + return (Datum) 0; +} + Datum pg_backend_pid(PG_FUNCTION_ARGS) diff --git src/include/catalog/pg_proc.dat src/include/catalog/pg_proc.dat index a146510..d6997ee 100644 --- src/include/catalog/pg_proc.dat +++ src/include/catalog/pg_proc.dat @@ -5511,6 +5511,16 @@ prorettype => 'void', proargtypes => 'oid', prosrc => 'pg_stat_reset_single_function_counters' }, +{ oid => '3423', + descr => 'statistics: information about currently active backends wait events additional statistics', + proname => 'pg_stat_get_wait_events', prorows => '100', proisstrict => 'f', + proretset => 't', provolatile => 's', proparallel => 'r', + prorettype => 'record', proargtypes => 'int4', + proallargtypes => '{int4,int4,text,text,int8,int8,int4}', + proargmodes => '{i,o,o,o,o,o,o}', + proargnames => '{pid,pid,wait_event_type,wait_event,total_elapsed,max_elapsed,counting}', + prosrc => 'pg_stat_get_wait_events' }, + { oid => '3163', descr => 'current trigger depth', proname => 'pg_trigger_depth', provolatile => 's', proparallel => 'r', prorettype => 'int4', proargtypes => '', prosrc => 'pg_trigger_depth' }, diff --git src/include/pg_config.h.in src/include/pg_config.h.in index f9fb92f..895e49c 100644 --- src/include/pg_config.h.in +++ src/include/pg_config.h.in @@ -941,6 +941,10 @@ /* Define to select unnamed POSIX semaphores. */ #undef USE_UNNAMED_POSIX_SEMAPHORES +/* Define to build with Wait Event Detail support. (--with-wait-event-detail) + */ +#undef USE_WAIT_EVENT_DETAIL + /* Define to use native Windows API for random number generation */ #undef USE_WIN32_RANDOM diff --git src/include/pgstat.h src/include/pgstat.h index d59c24a..dbd08b1 100644 --- src/include/pgstat.h +++ src/include/pgstat.h @@ -938,6 +938,33 @@ typedef enum ProgressCommandType #define PGSTAT_NUM_PROGRESS_PARAM 10 +#ifdef USE_WAIT_EVENT_DETAIL +/* ---------- + * Total number of wait event. + * Wait Classes num(9) + Wait Events last enum. + * ---------- + */ +#define NUM_WAIT_LWLOCK (LWTRANCHE_FIRST_USER_DEFINED) +#define NUM_WAIT_LOCK (LOCKTAG_LAST_TYPE + 1) +#define NUM_WAIT_BUFFER_PIN 1 +#define NUM_WAIT_ACTIVITY ((WAIT_EVENT_WAL_WRITER_MAIN & 0x0000FFFF) + 1) +#define NUM_WAIT_CLIENT ((WAIT_EVENT_WAL_SENDER_WRITE_DATA & 0x0000FFFF) + 1) +#define NUM_WAIT_EXTENSION 1 +#define NUM_WAIT_IPC ((WAIT_EVENT_SYNC_REP & 0x0000FFFF) + 1) +#define NUM_WAIT_TIMEOUT ((WAIT_EVENT_RECOVERY_APPLY_DELAY & 0x0000FFFF) + 1) +#define NUM_WAIT_IO ((WAIT_EVENT_WAL_WRITE & 0x0000FFFF) + 1) + +#define NUM_WAIT_EVENT (NUM_WAIT_LWLOCK \ + + NUM_WAIT_LOCK \ + + NUM_WAIT_BUFFER_PIN \ + + NUM_WAIT_ACTIVITY \ + + NUM_WAIT_CLIENT \ + + NUM_WAIT_EXTENSION \ + + NUM_WAIT_IPC \ + + NUM_WAIT_TIMEOUT \ + + NUM_WAIT_IO) +#endif + /* ---------- * Shared-memory data structures * ---------- @@ -1041,6 +1068,18 @@ typedef struct PgBackendStatus ProgressCommandType st_progress_command; Oid st_progress_command_target; int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]; + +#ifdef USE_WAIT_EVENT_DETAIL + + /* + * proc's wait_event additional information. each wait_events elapsed time + * & count. + */ + TimestampTz st_wait_event_start_timestamp; + uint64 st_wait_event_total_elapsed[NUM_WAIT_EVENT]; + uint64 st_wait_event_max_elapsed[NUM_WAIT_EVENT]; + uint32 st_wait_event_counting[NUM_WAIT_EVENT]; +#endif } PgBackendStatus; /* @@ -1218,6 +1257,13 @@ extern void pgstat_initstats(Relation rel); extern char *pgstat_clip_activity(const char *raw_activity); +#ifdef USE_WAIT_EVENT_DETAIL +extern void pgstat_report_wait_event_detail_start(uint32 wait_event_info); +extern void pgstat_report_wait_event_detail_end(uint32 wait_event_info); +extern uint32 pgstat_get_wait_event_info(int wait_event_array_index); +#endif + + /* ---------- * pgstat_report_wait_start() - * @@ -1246,6 +1292,10 @@ pgstat_report_wait_start(uint32 wait_event_info) * four-bytes, updates are atomic. */ proc->wait_event_info = wait_event_info; + +#ifdef USE_WAIT_EVENT_DETAIL + pgstat_report_wait_event_detail_start(wait_event_info); +#endif } /* ---------- @@ -1265,6 +1315,10 @@ pgstat_report_wait_end(void) if (!pgstat_track_activities || !proc) return; +#ifdef USE_WAIT_EVENT_DETAIL + pgstat_report_wait_event_detail_end(proc->wait_event_info); +#endif + /* * Since this is a four-byte field which is always read and written as * four-bytes, updates are atomic.