diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 40111990c5..9ae4caeac3 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -167,19 +167,30 @@ top: * but we need to be consistent */ /* - * Check if the page is still the rightmost leaf page, has enough - * free space to accommodate the new tuple, no split is in progress - * and the scankey is greater than or equal to the first key on the - * page. + * Check if the page is still the rightmost valid leaf page, has + * enough free space to accommodate the new tuple and the scankey + * is strictly greater than the first key on the page. + * + * NB: We could take the fastpath even when the target block + * doesn't have enough free space (but it's the right-most block) + * since _bt_insertonpg() is capable of working with a NULL stack + * and that's the only additional thing the slow path sets up. But + * we don't optimise for that case because while spliting and + * inserting into the parent without the stack is relatively slow + * operation. */ if (P_ISLEAF(lpageop) && P_RIGHTMOST(lpageop) && - !P_INCOMPLETE_SPLIT(lpageop) && - !P_IGNORE(lpageop) && - (PageGetFreeSpace(page) > itemsz) && - PageGetMaxOffsetNumber(page) >= P_FIRSTDATAKEY(lpageop) && - _bt_compare(rel, natts, itup_scankey, page, - P_FIRSTDATAKEY(lpageop)) > 0) + !P_IGNORE(lpageop) && + (PageGetFreeSpace(page) > itemsz) && + PageGetMaxOffsetNumber(page) >= P_FIRSTDATAKEY(lpageop) && + _bt_compare(rel, natts, itup_scankey, page, + P_FIRSTDATAKEY(lpageop)) > 0) { + /* + * The right-most block should never have incomplete split. But + * be paranoid and check for it anyways. + */ + Assert(!P_INCOMPLETE_SPLIT(lpageop)); fastpath = true; } else @@ -901,6 +912,7 @@ _bt_insertonpg(Relation rel, BTMetaPageData *metad = NULL; OffsetNumber itup_off; BlockNumber itup_blkno; + BlockNumber cachedBlock = InvalidBlockNumber; itup_off = newitemoff; itup_blkno = BufferGetBlockNumber(buf); @@ -975,10 +987,12 @@ _bt_insertonpg(Relation rel, /* * Cache the block information if we just inserted into the - * rightmost leaf page of the index. + * rightmost leaf page of the index and it's not the root page. + * For very small index where root is also the leaf, there is + * no point trying for any optimisation. */ - if (P_RIGHTMOST(lpageop)) - RelationSetTargetBlock(rel, BufferGetBlockNumber(buf)); + if (P_RIGHTMOST(lpageop) && !P_ISROOT(lpageop)) + cachedBlock = BufferGetBlockNumber(buf); } else { @@ -1038,6 +1052,23 @@ _bt_insertonpg(Relation rel, if (BufferIsValid(cbuf)) _bt_relbuf(rel, cbuf); _bt_relbuf(rel, buf); + + /* + * If we decided to cache the insertion target block, then set it now. + * But before that, check for the height of the tree and don't go for + * the optimisation for small indexes. We defer that check to this + * point to ensure that we don't call _bt_getrootheight while holding + * lock on any other block. + * + * We do this after dropping locks on all buffers. So the information + * about whether the insertion block is still the rightmost block or + * not may have changed in between. But we will deal with that during + * next insert operation. No special care is required while setting it. + */ +#define BTREE_FASTPATH_MIN_LEVEL 2 + if (BlockNumberIsValid(cachedBlock) && + _bt_getrootheight(rel) >= BTREE_FASTPATH_MIN_LEVEL) + RelationSetTargetBlock(rel, cachedBlock); } }