Optimize kernel readahead using buffer access strategy
Hi,
I create a patch that is improvement of disk-read and OS file caches. It can
optimize kernel readahead parameter using buffer access strategy and
posix_fadvice() in various disk-read situations.
In general OS, readahead parameter was dynamically decided by disk-read
situations. If long time disk-read was happened, readahead parameter becomes big.
However it is based on experienced or heuristic algorithm, it causes waste
disk-read and throws out useful OS file caches in some case. It is bad for
disk-read performance a lot.
My proposed method is controlling OS readahead parameter by using buffer access
strategy in PostgreSQL and posix_fadvice() system call which can control OS
readahead parameter. Though, it is a general method in database.
For your information of effect of this patch, I got results of pgbench which are
in-memory-size database and out-memory-size database, and postgresql.conf
settings are always used by us. It seems to improve performance to a better. And
I think that this feature is going to be necessary for business intelligence
which will be realized at PostgreSQL version 10. I seriously believe Simon's
presentation in PostgreSQL conference Europe 2013! It was very exciting!!!
PostgreSQL have a lot of kind of disk-read method that are selected by planner,
however. I think that we need to discuss more other situations except pgbench,
and other cache cold situations. I think that optimizing kernel readahead
parameter with considering planner in PostgreSQL seems to be quite difficult, so
I seriously recruit co-author in this patch:-)
Regards,
--
Mitsumasa KONDO
NTT Open Source Software Center
Attachments:
optimize_kernel-readahead_using_buffer-access-strategy_v1.patchtext/x-diff; name=optimize_kernel-readahead_using_buffer-access-strategy_v1.patchDownload
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 0b31f55..e4b411f 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -9117,7 +9117,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
/* If we got a cancel signal during the copy of the data, quit */
CHECK_FOR_INTERRUPTS();
- smgrread(src, forkNum, blkno, buf);
+ smgrread(src, forkNum, blkno, buf, BAS_BULKREAD);
if (!PageIsVerified(page, blkno))
ereport(ERROR,
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f848391..488cdf1 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -41,6 +41,7 @@
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
+#include "storage/buf.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
@@ -451,7 +452,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if (track_io_timing)
INSTR_TIME_SET_CURRENT(io_start);
- smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
+ smgrread(smgr, forkNum, blockNum, (char *) bufBlock, strategy);
if (track_io_timing)
{
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index de4d902..8cda2f9 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -73,8 +73,10 @@
#include "catalog/pg_tablespace.h"
#include "common/relpath.h"
#include "pgstat.h"
+#include "storage/buf.h"
#include "storage/fd.h"
#include "storage/ipc.h"
+#include "storage/bufmgr.h"
#include "utils/guc.h"
#include "utils/resowner_private.h"
@@ -383,6 +385,21 @@ pg_flush_data(int fd, off_t offset, off_t amount)
return 0;
}
+/*
+ * pg_fadvise --- advise OS that the cache will need or not
+ *
+ * Not all platforms have posix_fadvise. If it does not support posix_fadvise,
+ * we do nothing about here.
+ */
+int
+pg_fadvise(int fd, off_t offset, off_t amount, int advise)
+{
+#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) && defined(POSIX_FADV_RANDOM) && defined(POSIX_FADV_SEQUENTIAL)
+ return posix_fadvise(fd, offset, amount, advise);
+#else
+ return 0;
+#endif
+}
/*
* fsync_fname -- fsync a file or directory, handling errors properly
@@ -1142,6 +1159,33 @@ OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
}
/*
+ * Controling OS file cache using posix_fadvise()
+ */
+int
+FileCacheAdvise(File file, off_t offset, off_t amount, int advise)
+{
+ return pg_fadvise(VfdCache[file].fd, offset, amount, advise);
+}
+
+/*
+ * Select OS readahead strategy using buffer hint. If we select POSIX_FADV_SEQUENTIAL,
+ * readahead parameter becomes the maximum and can read more faster. On the other hand,
+ * if we select POSIX_FADV_RANDOM, readahead wasn't executed at all and file cache
+ * replace algorithm will be more smart. Because it can calculate correct number of accesses
+ * which are hot data.
+ */
+int
+BufferHintIOAdvise(File file, off_t offset, off_t amount, char *strategy)
+{
+ if(strategy != NULL)
+ /* use maximum readahead setting in kernel, we can read more faster */
+ return FileCacheAdvise(file, offset, amount, POSIX_FADV_SEQUENTIAL);
+ else
+ /* don't use readahead in kernel, so we can more effectively use OS file cache */
+ return FileCacheAdvise(file, offset, amount, POSIX_FADV_RANDOM);
+}
+
+/*
* close a file when done with it
*/
void
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index e629181..e8ff0b0 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -653,7 +653,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
*/
void
mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
- char *buffer)
+ char *buffer, char *strategy)
{
off_t seekpos;
int nbytes;
@@ -677,6 +677,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
errmsg("could not seek to block %u in file \"%s\": %m",
blocknum, FilePathName(v->mdfd_vfd))));
+ BufferHintIOAdvise(v->mdfd_vfd, buffer, BLCKSZ, strategy);
nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ);
TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index f7f1437..7a38aec 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -50,7 +50,7 @@ typedef struct f_smgr
void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
- BlockNumber blocknum, char *buffer);
+ BlockNumber blocknum, char *buffer, char *strategy);
void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
@@ -588,9 +588,9 @@ smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
*/
void
smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
- char *buffer)
+ char *buffer, char *strategy)
{
- (*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
+ (*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer, strategy);
}
/*
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 6dc031e..ca9a16a 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -44,6 +44,8 @@ typedef enum
/* in globals.c ... this duplicates miscadmin.h */
extern PGDLLIMPORT int NBuffers;
+
+
/* in bufmgr.c */
extern bool zero_damaged_pages;
extern int bgwriter_lru_maxpages;
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 2a60229..3922c0a 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -68,6 +68,7 @@ extern int max_safe_fds;
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
extern File OpenTemporaryFile(bool interXact);
extern void FileClose(File file);
+extern int FileCacheAdvise(File file, off_t offset, off_t amount, int advise);
extern int FilePrefetch(File file, off_t offset, int amount);
extern int FileRead(File file, char *buffer, int amount);
extern int FileWrite(File file, char *buffer, int amount);
@@ -75,6 +76,7 @@ extern int FileSync(File file);
extern off_t FileSeek(File file, off_t offset, int whence);
extern int FileTruncate(File file, off_t offset);
extern char *FilePathName(File file);
+extern int BufferHintIOAdvise(File file, off_t offset, off_t amount, char *strategy);
/* Operations that allow use of regular stdio --- USE WITH CAUTION */
extern FILE *AllocateFile(const char *name, const char *mode);
@@ -113,6 +115,7 @@ extern int pg_fsync_no_writethrough(int fd);
extern int pg_fsync_writethrough(int fd);
extern int pg_fdatasync(int fd);
extern int pg_flush_data(int fd, off_t offset, off_t amount);
+extern int pg_fadvise(int fd, off_t offset, off_t amount, int advise);
extern void fsync_fname(char *fname, bool isdir);
/* Filename components for OpenTemporaryFile */
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 98b6f13..0c4f14e 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -92,7 +92,7 @@ extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
extern void smgrread(SMgrRelation reln, ForkNumber forknum,
- BlockNumber blocknum, char *buffer);
+ BlockNumber blocknum, char *buffer, char *strategy);
extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
@@ -118,7 +118,7 @@ extern void mdextend(SMgrRelation reln, ForkNumber forknum,
extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
- char *buffer);
+ char *buffer, char *strategy);
extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
On Thu, Nov 14, 2013 at 9:09 AM, KONDO Mitsumasa
<kondo.mitsumasa@lab.ntt.co.jp> wrote:
I create a patch that is improvement of disk-read and OS file caches. It can
optimize kernel readahead parameter using buffer access strategy and
posix_fadvice() in various disk-read situations.In general OS, readahead parameter was dynamically decided by disk-read
situations. If long time disk-read was happened, readahead parameter becomes big.
However it is based on experienced or heuristic algorithm, it causes waste
disk-read and throws out useful OS file caches in some case. It is bad for
disk-read performance a lot.
It would be relevant to know which kernel did you use for those tests.
@@ -677,6 +677,7 @@ mdread(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum,
errmsg("could not seek to block %u in file \"%s\": %m",
blocknum, FilePathName(v->mdfd_vfd))));
+ BufferHintIOAdvise(v->mdfd_vfd, buffer, BLCKSZ, strategy);
nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ);
TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
A while back, I tried to use posix_fadvise to prefetch index pages. I
ended up finding out that interleaving posix_fadvise with I/O like
that severly hinders (ie: completely disables) the kernel's read-ahead
algorithm.
How exactly did you set up those benchmarks? pg_bench defaults?
pg_bench does not exercise heavy sequential access patterns, or long
index scans. It performs many single-page index lookups per
transaction and that's it. You may want to try your patch with more
real workloads, and maybe you'll confirm what I found out last time I
messed with posix_fadvise. If my experience is still relevant, those
patterns will have suffered a severe performance penalty with this
patch, because it will disable kernel read-ahead on sequential index
access. It may still work for sequential heap scans, because the
access strategy will tell the kernel to do read-ahead, but many other
access methods will suffer.
Try OLAP-style queries.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Thu, Nov 14, 2013 at 9:09 PM, KONDO Mitsumasa
<kondo.mitsumasa@lab.ntt.co.jp> wrote:
Hi,
I create a patch that is improvement of disk-read and OS file caches. It can
optimize kernel readahead parameter using buffer access strategy and
posix_fadvice() in various disk-read situations.
When I compiled the HEAD code with this patch on MacOS, I got the following
error and warnings.
gcc -O0 -Wall -Wmissing-prototypes -Wpointer-arith
-Wdeclaration-after-statement -Wendif-labels
-Wmissing-format-attribute -Wformat-security -fno-strict-aliasing
-fwrapv -g -I../../../../src/include -c -o fd.o fd.c
fd.c: In function 'BufferHintIOAdvise':
fd.c:1182: error: 'POSIX_FADV_SEQUENTIAL' undeclared (first use in
this function)
fd.c:1182: error: (Each undeclared identifier is reported only once
fd.c:1182: error: for each function it appears in.)
fd.c:1185: error: 'POSIX_FADV_RANDOM' undeclared (first use in this function)
make[4]: *** [fd.o] Error 1
make[3]: *** [file-recursive] Error 2
make[2]: *** [storage-recursive] Error 2
make[1]: *** [install-backend-recurse] Error 2
make: *** [install-src-recurse] Error 2
tablecmds.c:9120: warning: passing argument 5 of 'smgrread' makes
pointer from integer without a cast
bufmgr.c:455: warning: passing argument 5 of 'smgrread' from
incompatible pointer type
Regards,
--
Fujii Masao
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hi Claudio,
(2013/11/14 22:53), Claudio Freire wrote:
On Thu, Nov 14, 2013 at 9:09 AM, KONDO Mitsumasa
<kondo.mitsumasa@lab.ntt.co.jp> wrote:I create a patch that is improvement of disk-read and OS file caches. It can
optimize kernel readahead parameter using buffer access strategy and
posix_fadvice() in various disk-read situations.In general OS, readahead parameter was dynamically decided by disk-read
situations. If long time disk-read was happened, readahead parameter becomes big.
However it is based on experienced or heuristic algorithm, it causes waste
disk-read and throws out useful OS file caches in some case. It is bad for
disk-read performance a lot.It would be relevant to know which kernel did you use for those tests.
I use CentOS 6.4 which kernel version is 2.6.32-358.23.2.el6.x86_64 in this test.
A while back, I tried to use posix_fadvise to prefetch index pages.
I search your past work. Do you talk about this ML-thread? Or is there another
latest discussion? I see your patch is interesting, but it wasn't submitted to CF
and stopping discussions.
/messages/by-id/CAGTBQpZzf70n0PYJ=VQLd+jb3wJGo=2TXmY+SkJD6G_vjC5QNg@mail.gmail.com
I ended up finding out that interleaving posix_fadvise with I/O like
that severly hinders (ie: completely disables) the kernel's read-ahead
algorithm.
Your patch becomes maximum readahead, when a sql is selected index range scan. Is
it right? I think that your patch assumes that pages are ordered by index-data.
This assumption is partially wrong. If your assumption is true, we don't need
CLUSTER command. In actuary, CLUSTER command becomes better performance than nothing.
How exactly did you set up those benchmarks? pg_bench defaults?
My detail test setting is under following,
* Server info
CPU: Intel(R) Xeon(R) CPU E5645 @ 2.40GHz (2U/12C)
RAM: 6GB
-> I reduced it intentionally in OS paraemter, because large memory tests
have long time.
HDD: SEAGATE Model: ST2000NM0001 @ 7200rpm * 1
RAID: none.
* postgresql.conf(summarized)
shared_buffers = 600MB (10% of RAM = 6GB)
work_mem = 1MB
maintenance_work_mem = 64MB
wal_level = archive
fsync = on
archive_mode = on
checkpoint_segments = 300
checkpoint_timeout = 15min
checkpoint_completion_target = 0.7
* pgbench settings
pgbench -j 4 -c 32 -T 600 pgbench
pg_bench does not exercise heavy sequential access patterns, or long
index scans. It performs many single-page index lookups per
transaction and that's it.
Yes, your argument is right. And it is also a fact that performance becomes
better in these situations.
You may want to try your patch with more
real workloads, and maybe you'll confirm what I found out last time I
messed with posix_fadvise. If my experience is still relevant, those
patterns will have suffered a severe performance penalty with this
patch, because it will disable kernel read-ahead on sequential index
access. It may still work for sequential heap scans, because the
access strategy will tell the kernel to do read-ahead, but many other
access methods will suffer.
The decisive difference with your patch is that my patch uses buffer hint control
architecture, so it can control readahaed smarter in some cases.
However, my patch is on the way and needed to more improvement. I am going to add
method of controlling readahead by GUC, for user can freely select readahed
parameter in their transactions.
Try OLAP-style queries.
I have DBT-3(TPC-H) benchmark tools. If you don't like TPC-H, could you tell me
good OLAP benchmark tools?
Regards,
--
Mitsumasa KONDO
NTT Open Source Software
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Thu, Nov 14, 2013 at 6:18 PM, KONDO Mitsumasa
<kondo.mitsumasa@lab.ntt.co.jp> wrote:
I will fix it. Could you tell me your Mac OS version and gcc version? I have
only mac book air with Maverick OS(10.9).
I have an idea that Mac OSX doesn't have posix_fadvise at all. Didn't
you use the relevant macros so that the code at least builds on those
platforms?
--
Peter Geoghegan
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Import Notes
Reply to msg id not found: 52858486.5000205@lab.ntt.co.jp
(2013/11/15 2:03), Fujii Masao wrote:
On Thu, Nov 14, 2013 at 9:09 PM, KONDO Mitsumasa
<kondo.mitsumasa@lab.ntt.co.jp> wrote:Hi,
I create a patch that is improvement of disk-read and OS file caches. It can
optimize kernel readahead parameter using buffer access strategy and
posix_fadvice() in various disk-read situations.When I compiled the HEAD code with this patch on MacOS, I got the following
error and warnings.gcc -O0 -Wall -Wmissing-prototypes -Wpointer-arith
-Wdeclaration-after-statement -Wendif-labels
-Wmissing-format-attribute -Wformat-security -fno-strict-aliasing
-fwrapv -g -I../../../../src/include -c -o fd.o fd.c
fd.c: In function 'BufferHintIOAdvise':
fd.c:1182: error: 'POSIX_FADV_SEQUENTIAL' undeclared (first use in
this function)
fd.c:1182: error: (Each undeclared identifier is reported only once
fd.c:1182: error: for each function it appears in.)
fd.c:1185: error: 'POSIX_FADV_RANDOM' undeclared (first use in this function)
make[4]: *** [fd.o] Error 1
make[3]: *** [file-recursive] Error 2
make[2]: *** [storage-recursive] Error 2
make[1]: *** [install-backend-recurse] Error 2
make: *** [install-src-recurse] Error 2tablecmds.c:9120: warning: passing argument 5 of 'smgrread' makes
pointer from integer without a cast
bufmgr.c:455: warning: passing argument 5 of 'smgrread' from
incompatible pointer type
Thanks you for your report!
I will fix it. Could you tell me your Mac OS version and gcc version? I have only
mac book air with Maverick OS(10.9).
Regards,
--
Mitsumasa KONDO
NTT Open Source Software Center
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
(2013/11/15 11:17), Peter Geoghegan wrote:
On Thu, Nov 14, 2013 at 6:18 PM, KONDO Mitsumasa
<kondo.mitsumasa@lab.ntt.co.jp> wrote:I will fix it. Could you tell me your Mac OS version and gcc version? I have
only mac book air with Maverick OS(10.9).I have an idea that Mac OSX doesn't have posix_fadvise at all. Didn't
you use the relevant macros so that the code at least builds on those
platforms?
Thank you for your nice advice, too.
I try to fix macro program.
Regards,
--
Mitsumasa KONDO
NTT Open Source Software Center
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Thu, Nov 14, 2013 at 11:13 PM, KONDO Mitsumasa
<kondo.mitsumasa@lab.ntt.co.jp> wrote:
Hi Claudio,
(2013/11/14 22:53), Claudio Freire wrote:
On Thu, Nov 14, 2013 at 9:09 AM, KONDO Mitsumasa
<kondo.mitsumasa@lab.ntt.co.jp> wrote:I create a patch that is improvement of disk-read and OS file caches. It
can
optimize kernel readahead parameter using buffer access strategy and
posix_fadvice() in various disk-read situations.In general OS, readahead parameter was dynamically decided by disk-read
situations. If long time disk-read was happened, readahead parameter
becomes big.
However it is based on experienced or heuristic algorithm, it causes
waste
disk-read and throws out useful OS file caches in some case. It is bad
for
disk-read performance a lot.It would be relevant to know which kernel did you use for those tests.
I use CentOS 6.4 which kernel version is 2.6.32-358.23.2.el6.x86_64 in this
test.
That's close to the kernel version I was using, so you should see the
same effect.
A while back, I tried to use posix_fadvise to prefetch index pages.
I search your past work. Do you talk about this ML-thread? Or is there
another latest discussion? I see your patch is interesting, but it wasn't
submitted to CF and stopping discussions.
/messages/by-id/CAGTBQpZzf70n0PYJ=VQLd+jb3wJGo=2TXmY+SkJD6G_vjC5QNg@mail.gmail.com
Yes, I didn't, exactly because of that bad interaction with the
kernel. It needs either more smarts to only do fadvise on known-random
patterns (what you did mostly), or an accompanying kernel patch (which
I was working on, but ran out of test machines).
I ended up finding out that interleaving posix_fadvise with I/O like
that severly hinders (ie: completely disables) the kernel's read-ahead
algorithm.Your patch becomes maximum readahead, when a sql is selected index range
scan. Is it right?
Ehm... sorta.
I think that your patch assumes that pages are ordered by
index-data.
No. It just knows which pages will be needed, and fadvises them. No
guessing involved, except the guess that the scan will not be aborted.
There's a heuristic to stop limited scans from attempting to fadvise,
and that's that prefetch strategy is applied only from the Nth+ page
walk.
It improves index-only scans the most, but I also attempted to handle
heap prefetches. That's where the kernel started conspiring against
me, because I used many naturally-clustered indexes, and THERE
performance was adversely affected because of that kernel bug.
You may want to try your patch with more
real workloads, and maybe you'll confirm what I found out last time I
messed with posix_fadvise. If my experience is still relevant, those
patterns will have suffered a severe performance penalty with this
patch, because it will disable kernel read-ahead on sequential index
access. It may still work for sequential heap scans, because the
access strategy will tell the kernel to do read-ahead, but many other
access methods will suffer.The decisive difference with your patch is that my patch uses buffer hint
control architecture, so it can control readahaed smarter in some cases.
Indeed, but it's not enough. See my above comment about naturally
clustered indexes. The planner expects that, and plans accordingly. It
will notice correlation between a PK and physical location, and will
treat an index scan over PK to be almost sequential. With your patch,
that assumption will be broken I believe.
However, my patch is on the way and needed to more improvement. I am going
to add method of controlling readahead by GUC, for user can freely select
readahed parameter in their transactions.
Rather, I'd try to avoid fadvising consecutive or almost-consecutive
blocks. Detecting that is hard at the block level, but maybe you can
tie that detection into the planner, and specify a sequential strategy
when the planner expects index-heap correlation?
Try OLAP-style queries.
I have DBT-3(TPC-H) benchmark tools. If you don't like TPC-H, could you tell
me good OLAP benchmark tools?
I don't really know. Skimming the specs, I'm not sure if those queries
generate large index range queries. You could try, maybe with
autoexplain?
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On 11/14/13, 7:09 AM, KONDO Mitsumasa wrote:
I create a patch that is improvement of disk-read and OS file caches. It can
optimize kernel readahead parameter using buffer access strategy and
posix_fadvice() in various disk-read situations.
Various compiler warnings:
tablecmds.c: In function ‘copy_relation_data’:
tablecmds.c:9120:3: warning: passing argument 5 of ‘smgrread’ makes pointer from integer without a cast [enabled by default]
In file included from tablecmds.c:79:0:
../../../src/include/storage/smgr.h:94:13: note: expected ‘char *’ but argument is of type ‘int’
bufmgr.c: In function ‘ReadBuffer_common’:
bufmgr.c:455:4: warning: passing argument 5 of ‘smgrread’ from incompatible pointer type [enabled by default]
In file included from ../../../../src/include/storage/buf_internals.h:22:0,
from bufmgr.c:45:
../../../../src/include/storage/smgr.h:94:13: note: expected ‘char *’ but argument is of type ‘BufferAccessStrategy’
md.c: In function ‘mdread’:
md.c:680:2: warning: passing argument 2 of ‘BufferHintIOAdvise’ makes integer from pointer without a cast [enabled by default]
In file included from md.c:34:0:
../../../../src/include/storage/fd.h:79:12: note: expected ‘off_t’ but argument is of type ‘char *’
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
(2013/11/15 13:48), Claudio Freire wrote:
On Thu, Nov 14, 2013 at 11:13 PM, KONDO Mitsumasa
I use CentOS 6.4 which kernel version is 2.6.32-358.23.2.el6.x86_64 in this
test.That's close to the kernel version I was using, so you should see the
same effect.
OK. You proposed readahead maximum patch, I think it seems to get benefit for
perofomance and your part of argument is really true.
Your patch becomes maximum readahead, when a sql is selected index range
scan. Is it right?Ehm... sorta.
I think that your patch assumes that pages are ordered by
index-data.No. It just knows which pages will be needed, and fadvises them. No
guessing involved, except the guess that the scan will not be aborted.
There's a heuristic to stop limited scans from attempting to fadvise,
and that's that prefetch strategy is applied only from the Nth+ page
walk.
We may completely optimize kernel readahead in PostgreSQL in the future,
however it is very difficult and takes long time that it completely comes true
from a beginning. So I propose GUC switch that can use in their transactions.(I
will create this patch in this CF.). If someone off readahed for using file cache
more efficient in his transactions, he can set "SET readahead = off". PostgreSQL
is open source, and I think that it becomes clear which case it is effective for,
by using many people.
It improves index-only scans the most, but I also attempted to handle
heap prefetches. That's where the kernel started conspiring against
me, because I used many naturally-clustered indexes, and THERE
performance was adversely affected because of that kernel bug.
I also create gaussinan-distributed pgbench now and submit this CF. It can clear
which situasion is effective, partially we will know.
You may want to try your patch with more
real workloads, and maybe you'll confirm what I found out last time I
messed with posix_fadvise. If my experience is still relevant, those
patterns will have suffered a severe performance penalty with this
patch, because it will disable kernel read-ahead on sequential index
access. It may still work for sequential heap scans, because the
access strategy will tell the kernel to do read-ahead, but many other
access methods will suffer.The decisive difference with your patch is that my patch uses buffer hint
control architecture, so it can control readahaed smarter in some cases.Indeed, but it's not enough. See my above comment about naturally
clustered indexes. The planner expects that, and plans accordingly. It
will notice correlation between a PK and physical location, and will
treat an index scan over PK to be almost sequential. With your patch,
that assumption will be broken I believe.
~
However, my patch is on the way and needed to more improvement. I am going
to add method of controlling readahead by GUC, for user can freely select
readahed parameter in their transactions.Rather, I'd try to avoid fadvising consecutive or almost-consecutive
blocks. Detecting that is hard at the block level, but maybe you can
tie that detection into the planner, and specify a sequential strategy
when the planner expects index-heap correlation?
I think we had better to develop these patches in step by step each patches,
because it is difficult that readahead optimizetion is completely come true from
a beginning of one patch. We need flame-work in these patches, first.
Try OLAP-style queries.
I have DBT-3(TPC-H) benchmark tools. If you don't like TPC-H, could you tell
me good OLAP benchmark tools?I don't really know. Skimming the specs, I'm not sure if those queries
generate large index range queries. You could try, maybe with
autoexplain?
OK, I do. And, I will use simple large index range queries with explain command.
Regards,
--
Mitsuamsa KONDO
NTT Open Source Software Center
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
On Sun, Nov 17, 2013 at 11:02 PM, KONDO Mitsumasa
<kondo.mitsumasa@lab.ntt.co.jp> wrote:
However, my patch is on the way and needed to more improvement. I am
going
to add method of controlling readahead by GUC, for user can freely select
readahed parameter in their transactions.Rather, I'd try to avoid fadvising consecutive or almost-consecutive
blocks. Detecting that is hard at the block level, but maybe you can
tie that detection into the planner, and specify a sequential strategy
when the planner expects index-heap correlation?I think we had better to develop these patches in step by step each patches,
because it is difficult that readahead optimizetion is completely come true
from a beginning of one patch. We need flame-work in these patches, first.
Well, problem is, that without those smarts, I don't think this patch
can be enabled by default. It will considerably hurt common use cases
for postgres.
But I guess we'll have a better idea about that when we see how much
of a performance impact it makes when you run those tests, so no need
to guess in the dark.
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
(2013/11/18 11:25), Claudio Freire wrote:
On Sun, Nov 17, 2013 at 11:02 PM, KONDO Mitsumasa
<kondo.mitsumasa@lab.ntt.co.jp> wrote:However, my patch is on the way and needed to more improvement. I am
going
to add method of controlling readahead by GUC, for user can freely select
readahed parameter in their transactions.Rather, I'd try to avoid fadvising consecutive or almost-consecutive
blocks. Detecting that is hard at the block level, but maybe you can
tie that detection into the planner, and specify a sequential strategy
when the planner expects index-heap correlation?I think we had better to develop these patches in step by step each patches,
because it is difficult that readahead optimizetion is completely come true
from a beginning of one patch. We need flame-work in these patches, first.Well, problem is, that without those smarts, I don't think this patch
can be enabled by default. It will considerably hurt common use cases
for postgres.
Yes. I have thought as much you that defalut setting is false.
(use normal readahead as before). Next version of my patch will become these.
But I guess we'll have a better idea about that when we see how much
of a performance impact it makes when you run those tests, so no need
to guess in the dark.
Yes, sure.
Regards,
--
Mitsumasa KONDO
NTT Open Source Software Center
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers
Hi,
I revise this patch and re-run performance test, it can work collectry in Linux
and no complile wanings. I add GUC about enable_kernel_readahead option in new
version. When this GUC is on(default), it works in POSIX_FADV_NORMAL which is
general readahead in OS. And when it is off, it works in POSXI_FADV_RANDOM or
POSIX_FADV_SEQUENTIAL which is judged by buffer hint in Postgres, readahead
parameter is optimized by postgres. We can change this parameter in their
transactions everywhere and everytime.
* Test server
Server: HP Proliant DL360 G7
CPU: Xeon E5640 2.66GHz (1P/4C)
Memory: 18GB(PC3-10600R-9)
Disk: 146GB(15k)*4 RAID1+0
RAID controller: P410i/256MB
OS: RHEL 6.4(x86_64)
FS: Ext4
* Test setting
I use "pgbench -c 8 -j 4 -T 2400 -S -P 10 -a"
I also use my accurate patch in this test. So I exexuted under following
command before each benchmark.
1. cluster all database
2. truncate pgbench_history
3. checkpoint
4. sync
5. checkpoint
* postresql.conf
shared_buffers = 2048MB
maintenance_work_mem = 64MB
wal_level = minimal
checkpoint_segments = 300
checkpoint_timeout = 15min
checkpoint_completion_target = 0.7
* Performance test result
** In memory database size
s=1000 | 1 | 2 | 3 | avg
---------------------------------------------
readahead=on | 39836 | 40229 | 40055 | 40040
readahead=off | 31259 | 29656 | 30693 | 30536
ratio | 78% | 74% | 77% | 76%
** Over memory database size
s=2000 | 1 | 2 | 3 | avg
---------------------------------------------
readahead=on | 1288 | 1370 | 1367 | 1341
readahead=off | 1683 | 1688 | 1395 | 1589
ratio | 131% | 123% | 102% | 118%
s=3000 | 1 | 2 | 3 | avg
---------------------------------------------
readahead=on | 965 | 862 | 993 | 940
readahead=off | 1113 | 1098 | 935 | 1049
ratio | 115% | 127% | 94% | 112%
It seems good performance expect scale factor=1000. When readahead parameter is
off, disk IO keep to a minimum or necessary, therefore it is faster than
"readahead=on". "readahead=on" uses useless diskIO. For example, which is faster
8KB random read or 12KB random read from disks in many times transactions? It is
self-evident that the former is faster.
In scale factor 1000, it becomes to slower buffer-is-hot than "readahead=on". So
it seems to less performance. But it is essence in measuring perfomance. And you
can confirm it by attached benchmark graphs. We can use this parameter when
buffer is reratively hot. If you want to see other trial graphs, I will send.
And I will support to MacOS and create document about this patch in this week.
#MacOS is in my house.
Regards,
--
Mitsumasa KONDO
NTT Open Source Software Center
Attachments:
optimizing_kernel-readahead_using_buffer-access-strategy_v3.patchtext/x-diff; name=optimizing_kernel-readahead_using_buffer-access-strategy_v3.patchDownload
*** a/configure
--- b/configure
***************
*** 19937,19943 **** LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
! for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
do
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
--- 19937,19943 ----
! for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fadvise pstat readlink setproctitle setsid shm_open sigprocmask symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
do
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
{ $as_echo "$as_me:$LINENO: checking for $ac_func" >&5
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
***************
*** 9119,9125 **** copy_relation_data(SMgrRelation src, SMgrRelation dst,
/* If we got a cancel signal during the copy of the data, quit */
CHECK_FOR_INTERRUPTS();
! smgrread(src, forkNum, blkno, buf);
if (!PageIsVerified(page, blkno))
ereport(ERROR,
--- 9119,9125 ----
/* If we got a cancel signal during the copy of the data, quit */
CHECK_FOR_INTERRUPTS();
! smgrread(src, forkNum, blkno, buf, (char *) BAS_BULKREAD);
if (!PageIsVerified(page, blkno))
ereport(ERROR,
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
***************
*** 41,46 ****
--- 41,47 ----
#include "pg_trace.h"
#include "pgstat.h"
#include "postmaster/bgwriter.h"
+ #include "storage/buf.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
***************
*** 451,457 **** ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if (track_io_timing)
INSTR_TIME_SET_CURRENT(io_start);
! smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
if (track_io_timing)
{
--- 452,458 ----
if (track_io_timing)
INSTR_TIME_SET_CURRENT(io_start);
! smgrread(smgr, forkNum, blockNum, (char *) bufBlock, (char *) strategy);
if (track_io_timing)
{
*** a/src/backend/storage/file/fd.c
--- b/src/backend/storage/file/fd.c
***************
*** 73,80 ****
--- 73,82 ----
#include "catalog/pg_tablespace.h"
#include "common/relpath.h"
#include "pgstat.h"
+ #include "storage/buf.h"
#include "storage/fd.h"
#include "storage/ipc.h"
+ #include "storage/bufmgr.h"
#include "utils/guc.h"
#include "utils/resowner_private.h"
***************
*** 123,129 **** int max_files_per_process = 1000;
* setting this variable, and so need not be tested separately.
*/
int max_safe_fds = 32; /* default if not changed */
!
/* Debugging.... */
--- 125,131 ----
* setting this variable, and so need not be tested separately.
*/
int max_safe_fds = 32; /* default if not changed */
! bool enable_kernel_readahead = true ;
/* Debugging.... */
***************
*** 383,388 **** pg_flush_data(int fd, off_t offset, off_t amount)
--- 385,405 ----
return 0;
}
+ /*
+ * pg_fadvise --- advise OS that the cache will need or not
+ *
+ * Not all platforms have posix_fadvise. If it does not support posix_fadvise,
+ * we do nothing about here.
+ */
+ int
+ pg_fadvise(int fd, off_t offset, off_t amount, int advise)
+ {
+ #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) && defined(POSIX_FADV_RANDOM) && defined(POSIX_FADV_SEQUENTIAL)
+ return posix_fadvise(fd, offset, amount, advise);
+ #else
+ return 0;
+ #endif
+ }
/*
* fsync_fname -- fsync a file or directory, handling errors properly
***************
*** 1142,1147 **** OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
--- 1159,1195 ----
}
/*
+ * Controling OS file cache using posix_fadvise()
+ */
+ int
+ FileCacheAdvise(File file, off_t offset, off_t amount, int advise)
+ {
+ return pg_fadvise(VfdCache[file].fd, offset, amount, advise);
+ }
+
+ /*
+ * Select OS readahead strategy using buffer hint. If we select POSIX_FADV_SEQUENTIAL,
+ * readahead parameter becomes the maximum and can read more faster. On the other hand,
+ * if we select POSIX_FADV_RANDOM, readahead wasn't executed at all and file cache
+ * replace algorithm will be more smart. Because it can calculate correct number of accesses
+ * which are hot data.
+ */
+ int
+ BufferHintIOAdvise(File file, char *offset, off_t amount, char *strategy)
+ {
+ if(enable_kernel_readahead)
+ return FileCacheAdvise(file, (off_t) offset, amount, POSIX_FADV_NORMAL);
+
+ /* readahead optimization */
+ if(strategy != NULL)
+ /* use maximum readahead setting in kernel, we can read more faster */
+ return FileCacheAdvise(file, (off_t) offset, amount, POSIX_FADV_SEQUENTIAL);
+ else
+ /* don't use readahead in kernel, so we can more effectively use OS file cache */
+ return FileCacheAdvise(file, (off_t) offset, amount, POSIX_FADV_RANDOM);
+ }
+
+ /*
* close a file when done with it
*/
void
*** a/src/backend/storage/smgr/md.c
--- b/src/backend/storage/smgr/md.c
***************
*** 162,168 **** static List *pendingUnlinks = NIL;
static CycleCtr mdsync_cycle_ctr = 0;
static CycleCtr mdckpt_cycle_ctr = 0;
-
typedef enum /* behavior for mdopen & _mdfd_getseg */
{
EXTENSION_FAIL, /* ereport if segment not present */
--- 162,167 ----
***************
*** 653,659 **** mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
*/
void
mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! char *buffer)
{
off_t seekpos;
int nbytes;
--- 652,658 ----
*/
void
mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! char *buffer, char *strategy)
{
off_t seekpos;
int nbytes;
***************
*** 677,682 **** mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
--- 676,683 ----
errmsg("could not seek to block %u in file \"%s\": %m",
blocknum, FilePathName(v->mdfd_vfd))));
+ /* Control buffered IO in OS by using posix_fadvise() */
+ BufferHintIOAdvise(v->mdfd_vfd, buffer, BLCKSZ, strategy);
nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ);
TRACE_POSTGRESQL_SMGR_MD_READ_DONE(forknum, blocknum,
*** a/src/backend/storage/smgr/smgr.c
--- b/src/backend/storage/smgr/smgr.c
***************
*** 50,56 **** typedef struct f_smgr
void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
! BlockNumber blocknum, char *buffer);
void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
--- 50,56 ----
void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
! BlockNumber blocknum, char *buffer, char *strategy);
void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
***************
*** 588,596 **** smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
*/
void
smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! char *buffer)
{
! (*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
}
/*
--- 588,596 ----
*/
void
smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! char *buffer, char *strategy)
{
! (*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer, strategy);
}
/*
*** a/src/backend/utils/misc/guc.c
--- b/src/backend/utils/misc/guc.c
***************
*** 762,767 **** static struct config_bool ConfigureNamesBool[] =
--- 762,776 ----
NULL, NULL, NULL
},
{
+ {"enable_kernel_readahead", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("On is optimize readahead by kernel, off is optimized by postgres."),
+ NULL
+ },
+ &enable_kernel_readahead,
+ true,
+ NULL, NULL, NULL
+ },
+ {
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),
gettext_noop("This algorithm attempts to do planning without "
*** a/src/backend/utils/misc/postgresql.conf.sample
--- b/src/backend/utils/misc/postgresql.conf.sample
***************
*** 135,140 ****
--- 135,142 ----
#temp_file_limit = -1 # limits per-session temp file space
# in kB, or -1 for no limit
+ #enable_kernel_readahead = on # on is optimized by OS,
+ # off is optimized by postgres
# - Kernel Resource Usage -
*** a/src/include/storage/bufmgr.h
--- b/src/include/storage/bufmgr.h
***************
*** 44,55 **** typedef enum
--- 44,58 ----
/* in globals.c ... this duplicates miscadmin.h */
extern PGDLLIMPORT int NBuffers;
+
+
/* in bufmgr.c */
extern bool zero_damaged_pages;
extern int bgwriter_lru_maxpages;
extern double bgwriter_lru_multiplier;
extern bool track_io_timing;
extern int target_prefetch_pages;
+ extern bool enable_kernel_readahead;
/* in buf_init.c */
extern PGDLLIMPORT char *BufferBlocks;
*** a/src/include/storage/fd.h
--- b/src/include/storage/fd.h
***************
*** 68,73 **** extern int max_safe_fds;
--- 68,74 ----
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
extern File OpenTemporaryFile(bool interXact);
extern void FileClose(File file);
+ extern int FileCacheAdvise(File file, off_t offset, off_t amount, int advise);
extern int FilePrefetch(File file, off_t offset, int amount);
extern int FileRead(File file, char *buffer, int amount);
extern int FileWrite(File file, char *buffer, int amount);
***************
*** 75,80 **** extern int FileSync(File file);
--- 76,82 ----
extern off_t FileSeek(File file, off_t offset, int whence);
extern int FileTruncate(File file, off_t offset);
extern char *FilePathName(File file);
+ extern int BufferHintIOAdvise(File file, char *offset, off_t amount, char *strategy);
/* Operations that allow use of regular stdio --- USE WITH CAUTION */
extern FILE *AllocateFile(const char *name, const char *mode);
***************
*** 113,118 **** extern int pg_fsync_no_writethrough(int fd);
--- 115,121 ----
extern int pg_fsync_writethrough(int fd);
extern int pg_fdatasync(int fd);
extern int pg_flush_data(int fd, off_t offset, off_t amount);
+ extern int pg_fadvise(int fd, off_t offset, off_t amount, int advise);
extern void fsync_fname(char *fname, bool isdir);
/* Filename components for OpenTemporaryFile */
*** a/src/include/storage/smgr.h
--- b/src/include/storage/smgr.h
***************
*** 92,98 **** extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
extern void smgrread(SMgrRelation reln, ForkNumber forknum,
! BlockNumber blocknum, char *buffer);
extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
--- 92,98 ----
extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
extern void smgrread(SMgrRelation reln, ForkNumber forknum,
! BlockNumber blocknum, char *buffer, char *strategy);
extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
***************
*** 118,124 **** extern void mdextend(SMgrRelation reln, ForkNumber forknum,
extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! char *buffer);
extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
--- 118,124 ----
extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
! char *buffer, char *strategy);
extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
*** a/src/test/regress/expected/rangefuncs.out
--- b/src/test/regress/expected/rangefuncs.out
***************
*** 1,18 ****
SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%';
! name | setting
! ----------------------+---------
! enable_bitmapscan | on
! enable_hashagg | on
! enable_hashjoin | on
! enable_indexonlyscan | on
! enable_indexscan | on
! enable_material | on
! enable_mergejoin | on
! enable_nestloop | on
! enable_seqscan | on
! enable_sort | on
! enable_tidscan | on
! (11 rows)
CREATE TABLE foo2(fooid int, f2 int);
INSERT INTO foo2 VALUES(1, 11);
--- 1,19 ----
SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%';
! name | setting
! -------------------------+---------
! enable_bitmapscan | on
! enable_hashagg | on
! enable_hashjoin | on
! enable_indexonlyscan | on
! enable_indexscan | on
! enable_kernel_readahead | on
! enable_material | on
! enable_mergejoin | on
! enable_nestloop | on
! enable_seqscan | on
! enable_sort | on
! enable_tidscan | on
! (12 rows)
CREATE TABLE foo2(fooid int, f2 int);
INSERT INTO foo2 VALUES(1, 11);
s=1000-try1.pngimage/png; name="s=1000-try1.png"Download
�PNG
IHDR � ��*� )PLTE��� ���� � ��� � ���@ �� ��� �@����@ ��� �` �� `��`� � @��0`��` @@@@� ��`�``�`� � � ` ���@��`��`� `��� � �` �``` @@ @�`� `�``����@ � ���������� ���` ����`��� ��`�@ �@@�����`����� ������� � ��������`` � �� �� �������� � � �� � �� � �@ �@��`��`��� �� ��@��@��`��p����� ������T&�s 9IDATx������*Eq���LT���e�=�&�D�Nm�*m!�B!�B!�DL���<��Exu��D��^P ��XO��}K�J L
�V����>A%�X���&���>8A���<�p�[+�z�dT��� �<�?��\�(�(�(�(�b����=��;�O��f}�mf����o���9D{�����
2�C hG-��"��"@���c���
�� ����>�������5��������mfK�o?�m��&K?xi����^��H�E�h��M��5���������0������T�vv!�����O6�0�"y/H�xk��"�<J����q)��h�]���?*�fw!�������M�n�TR��{��)�>�I�;&����^��KtFQ��T��������7j����%�a��\�N�9���V�&���<�KpF�j�'i�j�9�6z;/�>�A�[5@T�+;�{�N1�(]��S�������{h�2��8S�Om���d�]�W>���k[�����a�}@����A����������Y���dBMN�J)�F������l��Mj�LV�ouk���]'�����+���m�6�a���7|ji�fS%������+����~�An���/n"T�}������O�on]�Om}"��_K��D�r�.�7��L�M.��wH`k�
�n���"R ��f�(�\�Gd�f �� �
0��k4@� ���{(��)�((���� �H�hD��� �7p���;�� �|?�q n~��@��t�e�\
�� ��y�����
���8�������� �IP�\ �= ��=�>