diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b0e4c41..e9a96cf 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -651,7 +651,7 @@ static void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg); static void WriteControlFile(void); -static void ReadControlFile(void); +static void ReadControlFile(ControlFileData *controlFile); static char *str_time(pg_time_t tnow); static bool CheckForStandbyTrigger(void); @@ -4652,7 +4652,7 @@ WriteControlFile(void) } static void -ReadControlFile(void) +ReadControlFile(ControlFileData *controlFile) { pg_crc32 crc; int fd; @@ -4669,7 +4669,7 @@ ReadControlFile(void) errmsg("could not open control file \"%s\": %m", XLOG_CONTROL_FILE))); - if (read(fd, ControlFile, sizeof(ControlFileData)) != sizeof(ControlFileData)) + if (read(fd, controlFile, sizeof(ControlFileData)) != sizeof(ControlFileData)) ereport(PANIC, (errcode_for_file_access(), errmsg("could not read from control file: %m"))); @@ -4683,31 +4683,31 @@ ReadControlFile(void) * enlightening than complaining about wrong CRC. */ - if (ControlFile->pg_control_version != PG_CONTROL_VERSION && ControlFile->pg_control_version % 65536 == 0 && ControlFile->pg_control_version / 65536 != 0) + if (controlFile->pg_control_version != PG_CONTROL_VERSION && controlFile->pg_control_version % 65536 == 0 && controlFile->pg_control_version / 65536 != 0) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x)," " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).", - ControlFile->pg_control_version, ControlFile->pg_control_version, + controlFile->pg_control_version, controlFile->pg_control_version, PG_CONTROL_VERSION, PG_CONTROL_VERSION), errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb."))); - if (ControlFile->pg_control_version != PG_CONTROL_VERSION) + if (controlFile->pg_control_version != PG_CONTROL_VERSION) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d," " but the server was compiled with PG_CONTROL_VERSION %d.", - ControlFile->pg_control_version, PG_CONTROL_VERSION), + controlFile->pg_control_version, PG_CONTROL_VERSION), errhint("It looks like you need to initdb."))); /* Now check the CRC. */ INIT_CRC32(crc); COMP_CRC32(crc, - (char *) ControlFile, + (char *) controlFile, offsetof(ControlFileData, crc)); FIN_CRC32(crc); - if (!EQ_CRC32(crc, ControlFile->crc)) + if (!EQ_CRC32(crc, controlFile->crc)) ereport(FATAL, (errmsg("incorrect checksum in control file"))); @@ -4716,84 +4716,84 @@ ReadControlFile(void) * compatible with the backend executable, we want to abort before we can * possibly do any damage. */ - if (ControlFile->catalog_version_no != CATALOG_VERSION_NO) + if (controlFile->catalog_version_no != CATALOG_VERSION_NO) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with CATALOG_VERSION_NO %d," " but the server was compiled with CATALOG_VERSION_NO %d.", - ControlFile->catalog_version_no, CATALOG_VERSION_NO), + controlFile->catalog_version_no, CATALOG_VERSION_NO), errhint("It looks like you need to initdb."))); - if (ControlFile->maxAlign != MAXIMUM_ALIGNOF) + if (controlFile->maxAlign != MAXIMUM_ALIGNOF) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with MAXALIGN %d," " but the server was compiled with MAXALIGN %d.", - ControlFile->maxAlign, MAXIMUM_ALIGNOF), + controlFile->maxAlign, MAXIMUM_ALIGNOF), errhint("It looks like you need to initdb."))); - if (ControlFile->floatFormat != FLOATFORMAT_VALUE) + if (controlFile->floatFormat != FLOATFORMAT_VALUE) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster appears to use a different floating-point number format than the server executable."), errhint("It looks like you need to initdb."))); - if (ControlFile->blcksz != BLCKSZ) + if (controlFile->blcksz != BLCKSZ) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with BLCKSZ %d," " but the server was compiled with BLCKSZ %d.", - ControlFile->blcksz, BLCKSZ), + controlFile->blcksz, BLCKSZ), errhint("It looks like you need to recompile or initdb."))); - if (ControlFile->relseg_size != RELSEG_SIZE) + if (controlFile->relseg_size != RELSEG_SIZE) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with RELSEG_SIZE %d," " but the server was compiled with RELSEG_SIZE %d.", - ControlFile->relseg_size, RELSEG_SIZE), + controlFile->relseg_size, RELSEG_SIZE), errhint("It looks like you need to recompile or initdb."))); - if (ControlFile->xlog_blcksz != XLOG_BLCKSZ) + if (controlFile->xlog_blcksz != XLOG_BLCKSZ) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with XLOG_BLCKSZ %d," " but the server was compiled with XLOG_BLCKSZ %d.", - ControlFile->xlog_blcksz, XLOG_BLCKSZ), + controlFile->xlog_blcksz, XLOG_BLCKSZ), errhint("It looks like you need to recompile or initdb."))); - if (ControlFile->xlog_seg_size != XLOG_SEG_SIZE) + if (controlFile->xlog_seg_size != XLOG_SEG_SIZE) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with XLOG_SEG_SIZE %d," " but the server was compiled with XLOG_SEG_SIZE %d.", - ControlFile->xlog_seg_size, XLOG_SEG_SIZE), + controlFile->xlog_seg_size, XLOG_SEG_SIZE), errhint("It looks like you need to recompile or initdb."))); - if (ControlFile->nameDataLen != NAMEDATALEN) + if (controlFile->nameDataLen != NAMEDATALEN) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with NAMEDATALEN %d," " but the server was compiled with NAMEDATALEN %d.", - ControlFile->nameDataLen, NAMEDATALEN), + controlFile->nameDataLen, NAMEDATALEN), errhint("It looks like you need to recompile or initdb."))); - if (ControlFile->indexMaxKeys != INDEX_MAX_KEYS) + if (controlFile->indexMaxKeys != INDEX_MAX_KEYS) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with INDEX_MAX_KEYS %d," " but the server was compiled with INDEX_MAX_KEYS %d.", - ControlFile->indexMaxKeys, INDEX_MAX_KEYS), + controlFile->indexMaxKeys, INDEX_MAX_KEYS), errhint("It looks like you need to recompile or initdb."))); - if (ControlFile->toast_max_chunk_size != TOAST_MAX_CHUNK_SIZE) + if (controlFile->toast_max_chunk_size != TOAST_MAX_CHUNK_SIZE) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with TOAST_MAX_CHUNK_SIZE %d," " but the server was compiled with TOAST_MAX_CHUNK_SIZE %d.", - ControlFile->toast_max_chunk_size, (int) TOAST_MAX_CHUNK_SIZE), + controlFile->toast_max_chunk_size, (int) TOAST_MAX_CHUNK_SIZE), errhint("It looks like you need to recompile or initdb."))); #ifdef HAVE_INT64_TIMESTAMP - if (ControlFile->enableIntTimes != true) + if (controlFile->enableIntTimes != true) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized without HAVE_INT64_TIMESTAMP" " but the server was compiled with HAVE_INT64_TIMESTAMP."), errhint("It looks like you need to recompile or initdb."))); #else - if (ControlFile->enableIntTimes != false) + if (controlFile->enableIntTimes != false) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with HAVE_INT64_TIMESTAMP" @@ -4802,14 +4802,14 @@ ReadControlFile(void) #endif #ifdef USE_FLOAT4_BYVAL - if (ControlFile->float4ByVal != true) + if (controlFile->float4ByVal != true) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized without USE_FLOAT4_BYVAL" " but the server was compiled with USE_FLOAT4_BYVAL."), errhint("It looks like you need to recompile or initdb."))); #else - if (ControlFile->float4ByVal != false) + if (controlFile->float4ByVal != false) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with USE_FLOAT4_BYVAL" @@ -4818,14 +4818,14 @@ ReadControlFile(void) #endif #ifdef USE_FLOAT8_BYVAL - if (ControlFile->float8ByVal != true) + if (controlFile->float8ByVal != true) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized without USE_FLOAT8_BYVAL" " but the server was compiled with USE_FLOAT8_BYVAL."), errhint("It looks like you need to recompile or initdb."))); #else - if (ControlFile->float8ByVal != false) + if (controlFile->float8ByVal != false) ereport(FATAL, (errmsg("database files are incompatible with server"), errdetail("The database cluster was initialized with USE_FLOAT8_BYVAL" @@ -4835,6 +4835,17 @@ ReadControlFile(void) } void +GetControlFile(ControlFileData *controlFile) +{ + if (ControlFile == NULL) + { + ReadControlFile(controlFile); + } else { + memcpy(controlFile, ControlFile, sizeof(ControlFileData)); + } +} + +void UpdateControlFile(void) { int fd; @@ -5041,7 +5052,7 @@ XLOGShmemInit(void) * reasons why). */ if (!IsBootstrapProcessingMode()) - ReadControlFile(); + ReadControlFile(ControlFile); } /* @@ -5991,7 +6002,7 @@ StartupXLOG(void) * Note: in most control paths, *ControlFile is already valid and we need * not do ReadControlFile() here, but might as well do it to be sure. */ - ReadControlFile(); + ReadControlFile(ControlFile); if (ControlFile->state < DB_SHUTDOWNED || ControlFile->state > DB_IN_PRODUCTION || diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index dadb49d..1f68941 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -127,6 +127,16 @@ InitBufferPool(void) /* Init other shared buffer-management stuff */ StrategyInitialize(!foundDescs); + + if (EnableBufferCacheHibernation) + { + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTERS, + (char *)BufferDescriptors, sizeof(BufferDesc), NBuffers); + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS, + (char *)BufferBlocks, BLCKSZ, NBuffers); + + ResumeBufferCacheHibernation(); + } } /* diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index f96685d..10e0789 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -31,8 +31,10 @@ #include "postgres.h" #include +#include #include +#include "access/xlog.h" #include "catalog/catalog.h" #include "executor/instrument.h" #include "miscadmin.h" @@ -61,6 +63,11 @@ #define BUF_WRITTEN 0x01 #define BUF_REUSABLE 0x02 +/* enable this to debug buffer cache hibernation. */ +#if 0 +#define DEBUG_BUFFER_CACHE_HIBERNATION +#endif + /* GUC variables */ bool zero_damaged_pages = false; @@ -765,6 +772,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, } } +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION + elog(DEBUG5, + "alloc [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d", + buf->buf_id, buf->flags, buf->usage_count, buf->refcount, + buf->wait_backend_pid, buf->freeNext, + newHash, newTag.rnode.spcNode, + newTag.rnode.dbNode, newTag.rnode.relNode, + newTag.forkNum, newTag.blockNum); +#endif + return buf; } @@ -800,6 +817,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * the old content is no longer relevant. (The usage_count starts out at * 1 so that the buffer can survive one clock-sweep pass.) */ +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION + elog(DEBUG5, + "rename [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d", + buf->buf_id, buf->flags, buf->usage_count, buf->refcount, + buf->wait_backend_pid, buf->freeNext, + oldHash, oldTag.rnode.spcNode, + oldTag.rnode.dbNode, oldTag.rnode.relNode, + oldTag.forkNum, oldTag.blockNum); +#endif + buf->tag = newTag; buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT); if (relpersistence == RELPERSISTENCE_PERMANENT) @@ -2772,3 +2799,506 @@ local_buffer_write_error_callback(void *arg) pfree(path); } } + +/* ---------------------------------------------------------------- + * buffer cache hibernation support stuff + * + * Suspend/resume buffer cache data structure using hibernation files + * at shutdown/startup. + * ---------------------------------------------------------------- + */ + +bool EnableBufferCacheHibernation = false; + +#define BUFFER_CACHE_HIBERNATION_FILE_STRATEGY "global/pg_buffer_cache_hibernation_strategy" +#define BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS "global/pg_buffer_cache_hibernation_descriptors" +#define BUFFER_CACHE_HIBERNATION_FILE_BLOCKS "global/pg_buffer_cache_hibernation_blocks" +#define BUFFER_CACHE_HIBERNATION_FILE_CRC32 "global/pg_buffer_cache_hibernation_crc32" + +static struct +{ + char *hibernation_file; + char *data_ptr; + Size record_length; + Size num_records; + pg_crc32 crc; +} BufferCacheHibernationData[] = +{ + /* BufferStrategyControl */ + { + BUFFER_CACHE_HIBERNATION_FILE_STRATEGY, + NULL, 0, 0, 0 + }, + + /* BufferDescriptors */ + { + BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS, + NULL, 0, 0, 0 + }, + + /* BufferBlocks */ + { + BUFFER_CACHE_HIBERNATION_FILE_BLOCKS, + NULL, 0, 0, 0 + }, + + /* End-of-list marker */ + { + NULL, + NULL, 0, 0, 0 + }, +}; + +/* + * AtProcExit_BufferCacheHibernation: + * store buffer cache into hibernation files at shutdown. + */ +static void +AtProcExit_BufferCacheHibernation(int code, Datum arg) +{ + ControlFileData controlFile; + BufferHibernationFileType id; + int i; + int fd; + + if (EnableBufferCacheHibernation == false) + { + return; + } + + /* + * get control file to check the system state validation. + */ + GetControlFile(&controlFile); + if (controlFile.state != DB_SHUTDOWNED) + { + elog(WARNING, + "database system was not shut down normally, " + "aborting buffer cache hibernation"); + return; + } + + /* + * suspend buffer cache data structure into hibernation files. + */ + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) + { + Size record_length; + Size num_records; + char *ptr; + pg_crc32 crc; + + if (BufferCacheHibernationData[id].data_ptr == NULL || + BufferCacheHibernationData[id].record_length == 0 || + BufferCacheHibernationData[id].num_records == 0) + { + return; + } + + elog(NOTICE, + "buffer cache hibernate into %s", + BufferCacheHibernationData[id].hibernation_file); + + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file, + O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) + { + elog(WARNING, + "could not open %s", + BufferCacheHibernationData[id].hibernation_file); + goto cleanup; + } + + record_length = BufferCacheHibernationData[id].record_length; + num_records = BufferCacheHibernationData[id].num_records; + + INIT_CRC32(crc); + for (i = 0; i < num_records; i++) + { + ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length); + if (write(fd, (void *)ptr, record_length) != record_length) + { + elog(WARNING, + "could not write %s", + BufferCacheHibernationData[id].hibernation_file); + goto cleanup; + } + + COMP_CRC32(crc, ptr, record_length); + } + + FIN_CRC32(crc); + close(fd); + + BufferCacheHibernationData[id].crc = crc; + + elog(DEBUG5, + "%s crc: %x", + BufferCacheHibernationData[id].hibernation_file, + crc); + } + + /* + * write computed crc values for validation at resuming. + */ + fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32, + O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) + { + elog(WARNING, + "could not open %s", + BUFFER_CACHE_HIBERNATION_FILE_CRC32); + goto cleanup; + } + + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) + { + pg_crc32 crc; + + crc = BufferCacheHibernationData[id].crc; + if (write(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32)) + { + elog(WARNING, + "could not write %s", + BUFFER_CACHE_HIBERNATION_FILE_CRC32); + goto cleanup; + } + } + close(fd); + + return; + +cleanup: + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) + { + unlink(BufferCacheHibernationData[id].hibernation_file); + } + + return; +} + +/* + * ResisterBufferCacheHibernation: + * register buffer cache data structure info. + */ +void +ResisterBufferCacheHibernation(BufferHibernationFileType id, char *ptr, Size record_length, Size num_records) +{ + if (EnableBufferCacheHibernation == false) + { + return; + } + + if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY && + id != BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTERS && + id != BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS) + { + return; + } + + BufferCacheHibernationData[id].data_ptr = ptr; + BufferCacheHibernationData[id].record_length = record_length; + BufferCacheHibernationData[id].num_records = num_records; +} + +/* + * ResumeBufferCacheHibernation: + * resume buffer cache from hibernation file at startup. + */ +void +ResumeBufferCacheHibernation(void) +{ + ControlFileData controlFile; + BufferHibernationFileType id; + int i; + int fd; + Size buf_size; + char *buf_common; + + if (EnableBufferCacheHibernation == false) + { + return; + } + + /* + * AtProcExit_BufferCacheHibernation to be called at shutdown. + */ + on_shmem_exit(AtProcExit_BufferCacheHibernation, 0); + + + /* + * get control file to check the system state and + * hibernation file validation. + */ + GetControlFile(&controlFile); + if (controlFile.state != DB_SHUTDOWNED) + { + elog(WARNING, + "database system was not shut down normally, " + "aborting buffer cache hibernation"); + return; + } + + /* + * read crc values which was computed at hibernation files creation. + */ + fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32, + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) + { + elog(WARNING, + "could not open %s", + BUFFER_CACHE_HIBERNATION_FILE_CRC32); + return; + } + + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) + { + pg_crc32 crc; + + if (read(fd, (void *)&crc, sizeof(pg_crc32)) != sizeof(pg_crc32)) + { + elog(WARNING, + "could not read %s", + BUFFER_CACHE_HIBERNATION_FILE_CRC32); + close(fd); + return; + } + BufferCacheHibernationData[id].crc = crc; + } + + close(fd); + + /* + * allocate buffer to read the contents of hibernation file for validation. + */ + buf_size = 0; + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) + { + if (buf_size < BufferCacheHibernationData[id].record_length) + { + buf_size = BufferCacheHibernationData[id].record_length; + } + } + + buf_common = malloc(buf_size); + Assert(buf_common != NULL); + + /* + * check if all hibernation files are valid. + */ + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) + { + Size record_length; + Size num_records; + struct stat sb; + pg_crc32 crc; + + if (BufferCacheHibernationData[id].data_ptr == NULL || + BufferCacheHibernationData[id].record_length == 0 || + BufferCacheHibernationData[id].num_records == 0) + { + free(buf_common); + return; + } + + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file, + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) + { + free(buf_common); + return; + } + + if (fstat(fd, &sb) < 0) + { + elog(WARNING, + "could not get stats of buffer cache hibernation file: %s", + BufferCacheHibernationData[id].hibernation_file); + close(fd); + free(buf_common); + return; + } + + record_length = BufferCacheHibernationData[id].record_length; + num_records = BufferCacheHibernationData[id].num_records; + + if (sb.st_size != (record_length * num_records)) + { + elog(WARNING, + "size mismatch on buffer cache hibernation file: %s", + BufferCacheHibernationData[id].hibernation_file); + close(fd); + free(buf_common); + return; + } + + if ((pg_time_t)sb.st_mtime < controlFile.time) + { + elog(WARNING, + "hibernation file is older than control file: %s", + BufferCacheHibernationData[id].hibernation_file); + close(fd); + free(buf_common); + return; + } + + INIT_CRC32(crc); + for (i = 0; i < num_records; i++) + { + if (read(fd, (void *)buf_common, record_length) != record_length) + { + elog(WARNING, + "could not read buffer cache hibernation file: %s", + BufferCacheHibernationData[id].hibernation_file); + close(fd); + free(buf_common); + return; + } + + COMP_CRC32(crc, buf_common, record_length); + + /* + * buffer descriptors validation. + */ + if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTERS) + { + BufferDesc *buf; + BufFlags abnormal_flags; + + abnormal_flags = (BM_DIRTY | BM_IO_IN_PROGRESS | BM_IO_ERROR | + BM_JUST_DIRTIED | BM_PIN_COUNT_WAITER); + + buf = (BufferDesc *)buf_common; + + if (buf->flags & abnormal_flags) + { + elog(WARNING, + "abnormal flags in buffer descriptors: %d", + buf->flags); + close(fd); + free(buf_common); + return; + } + + if (buf->usage_count > BM_MAX_USAGE_COUNT) + { + elog(WARNING, + "invalid usage count in buffer descriptors: %d", + buf->usage_count); + close(fd); + free(buf_common); + return; + } + + if (buf->buf_id < 0 || buf->buf_id >= num_records) + { + elog(WARNING, + "invalid buffer id in buffer descriptors: %d", + buf->buf_id); + close(fd); + free(buf_common); + return; + } + } + } + + FIN_CRC32(crc); + close(fd); + + elog(DEBUG5, + "%s crc: %x", + BufferCacheHibernationData[id].hibernation_file, + crc); + + if (!EQ_CRC32(BufferCacheHibernationData[id].crc, crc)) + { + elog(WARNING, + "crc mismatch on buffer cache hibernation file: %s", + BufferCacheHibernationData[id].hibernation_file); + close(fd); + free(buf_common); + return; + } + } + + free(buf_common); + + /* + * resume buffer cache data structure from hibernation files. + */ + for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; id++) + { + int fd; + Size record_length; + Size num_records; + char *ptr; + + record_length = BufferCacheHibernationData[id].record_length; + num_records = BufferCacheHibernationData[id].num_records; + + elog(NOTICE, + "buffer cache resume from %s(%d * %d)", + BufferCacheHibernationData[id].hibernation_file, + record_length, num_records); + + fd = BasicOpenFile(BufferCacheHibernationData[id].hibernation_file, + O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR); + if (fd < 0) + { + return; + } + + for (i = 0; i < num_records; i++) + { + ptr = BufferCacheHibernationData[id].data_ptr + (i * record_length); + read(fd, (void *)ptr, record_length); + } + + close(fd); + } + + /* + * setup lookup hashtable based on buffer descriptors. + */ + for (i = 0; i < NBuffers; i++) + { + BufferDesc *buf; + BufferTag newTag; + uint32 newHash; + int buf_id; + + buf = &BufferDescriptors[i]; + if (buf->tag.rnode.spcNode == InvalidOid && + buf->tag.rnode.dbNode == InvalidOid && + buf->tag.rnode.relNode == InvalidOid) + { + continue; + } + + INIT_BUFFERTAG(newTag, buf->tag.rnode, buf->tag.forkNum, buf->tag.blockNum); + newHash = BufTableHashCode(&newTag); + buf_id = BufTableInsert(&newTag, newHash, buf->buf_id); + if (buf_id != -1) + { + /* the entry exists already, return it to the freelist. */ + buf->refcount = 0; + buf->flags = 0; + InvalidateBuffer(buf); + continue; + } + + /* clear wait_backend_pid because the process was terminated already. */ + buf->wait_backend_pid = 0; + +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION + elog(DEBUG5, + "resume [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d", + buf->buf_id, buf->flags, buf->usage_count, buf->refcount, + buf->wait_backend_pid, buf->freeNext, + newHash, newTag.rnode.spcNode, + newTag.rnode.dbNode, newTag.rnode.relNode, + newTag.forkNum, newTag.blockNum); +#endif + } +} diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index bf9903b..8a9f772 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -347,6 +347,12 @@ StrategyInitialize(bool init) } else Assert(!init); + + if (EnableBufferCacheHibernation) + { + ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY, + (char *)StrategyControl, sizeof(BufferStrategyControl), 1); + } } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 738e215..ed2adbf 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1429,6 +1429,16 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"enable_buffer_cache_hibernation", PGC_POSTMASTER, UNGROUPED, + gettext_noop("Enables buffer cache hibernation."), + gettext_noop("Suspend/resume buffer cache data structure using hibernation files " + "at shutdown/startup.") + }, + &EnableBufferCacheHibernation, + false, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index b8a1582..1fe5cfe 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -118,6 +118,10 @@ #work_mem = 1MB # min 64kB #maintenance_work_mem = 16MB # min 1MB #max_stack_depth = 2MB # min 100kB +#enable_buffer_cache_hibernation = off # "on" allows buffer cache hibernation + # support (suspend/resume buffer cache + # data structure using hibernation files + # at shutdown/startup) # - Kernel Resource Usage - diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 7056fd6..b2f7cf5 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -13,6 +13,7 @@ #include "access/rmgr.h" #include "access/xlogdefs.h" +#include "catalog/pg_control.h" #include "lib/stringinfo.h" #include "storage/buf.h" #include "utils/pg_crc.h" @@ -294,6 +295,7 @@ extern bool XLogInsertAllowed(void); extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream); extern XLogRecPtr GetXLogReplayRecPtr(void); +extern void GetControlFile(ControlFileData *controlFile); extern void UpdateControlFile(void); extern uint64 GetSystemIdentifier(void); extern Size XLOGShmemSize(void); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index b8fc87e..0fbb49f 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -211,6 +211,20 @@ extern void BgBufferSync(void); extern void AtProcExit_LocalBuffers(void); +/* buffer cache hibernation support stuff */ +extern bool EnableBufferCacheHibernation; + +typedef enum BufferHibernationFileType +{ + BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY, + BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTERS, + BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS +} BufferHibernationFileType; + +extern void ResisterBufferCacheHibernation(BufferHibernationFileType id, + char *ptr, Size record_length, Size num_records); +extern void ResumeBufferCacheHibernation(void); + /* in freelist.c */ extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype); extern void FreeAccessStrategy(BufferAccessStrategy strategy); diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out index 51d561b..cc6cd38 100644 --- a/src/test/regress/expected/rangefuncs.out +++ b/src/test/regress/expected/rangefuncs.out @@ -1,17 +1,18 @@ SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%'; - name | setting --------------------+--------- - enable_bitmapscan | on - enable_hashagg | on - enable_hashjoin | on - enable_indexscan | on - enable_material | on - enable_mergejoin | on - enable_nestloop | on - enable_seqscan | on - enable_sort | on - enable_tidscan | on -(10 rows) + name | setting +---------------------------------+--------- + enable_bitmapscan | on + enable_buffer_cache_hibernation | off + enable_hashagg | on + enable_hashjoin | on + enable_indexscan | on + enable_material | on + enable_mergejoin | on + enable_nestloop | on + enable_seqscan | on + enable_sort | on + enable_tidscan | on +(11 rows) CREATE TABLE foo2(fooid int, f2 int); INSERT INTO foo2 VALUES(1, 11);